Я использовал VGG16 с TensorFlow для набора данных SVHN (номера домов с видом на улицу) и получил крайне низкую точность (~ 18%). Это проблема мультиклассовой классификации (10 классов)
Я использовал тот же код, который получил из интернета, и, к сожалению, не получил ту же точность.
Я пытался использовать другой оптимизатор, другую скорость обучения, другой импульс, другой номер эпохи, другой размер пакета.
Код прилагается. Кто-нибудь может помочь?
import numpy as np
import scipy.io
import keras
import keras.layers
import keras.preprocessing
import cv2
from keras.models import Sequential, Model
from keras.layers import Dense, Flatten, MaxPool2D
from keras.layers import Conv2D, MaxPooling2D, Dropout, Input, ZeroPadding2D, Convolution2D
from keras.utils import np_utils
from keras.optimizers import SGD, Adam
from keras.callbacks import History
from keras.preprocessing.image import ImageDataGenerator
from keras.models import load_model
from keras.applications import VGG16
def vgg16Model(X_train, y_train, X_test, y_test, input_shape):
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
adam = Adam(lr=0.1, beta_1=0.9, beta_2=0.999, epsilon=None, decay=1e-6, amsgrad=False)
epochs = 20
loss = 'categorical_crossentropy'
optimizer = sgd
metrics = ['accuracy']
samples_per_epoch = X_train.shape[0]
batch_size = 64
input_shape = (32, 32, 3)
img_input = Input(shape=input_shape, name='img_input')
x = Conv2D(64, (3, 3), padding='same', activation='relu', name='block1_conv1')(img_input)
x = Conv2D(64, (3, 3), padding='same', activation='relu', name='block1_conv2')(x)
x = MaxPool2D((2, 2), strides=(2, 2), name='block1_pool')(x)
x = Conv2D(128, (3, 3), padding='same', activation='relu', name='block2_conv1')(x)
x = Conv2D(128, (3, 3), padding='same', activation='relu', name='block2_conv2')(x)
x = MaxPool2D((2, 2), strides=(2, 2), name='block2_pool')(x)
x = Conv2D(256, (3, 3), padding='same', activation='relu', name='block3_conv1')(x)
x = Conv2D(256, (3, 3), padding='same', activation='relu', name='block3_conv2')(x)
x = Conv2D(256, (3, 3), padding='same', activation='relu', name='block3_conv3')(x)
x = MaxPool2D((2, 2), strides=(2, 2), name='block3_pool')(x)
x = Conv2D(512, (3, 3), padding='same', activation='relu', name='block4_conv1')(x)
x = Conv2D(512, (3, 3), padding='same', activation='relu', name='block4_conv2')(x)
x = Conv2D(512, (3, 3), padding='same', activation='relu', name='block4_conv3')(x)
x = MaxPool2D((2, 2), strides=(2, 2), name='block4_pool')(x)
x = Conv2D(512, (3, 3), padding='same', activation='relu', name='block5_conv1')(x)
x = Conv2D(512, (3, 3), padding='same', activation='relu', name='block5_conv2')(x)
x = Conv2D(512, (3, 3), padding='same', activation='relu', name='block5_conv3')(x)
x = MaxPool2D((2, 2), strides=(2, 2), name='block5_pool')(x)
x = Flatten(name='flatten')(x)
x = Dense(4096, activation='relu', name='fc1')(x)
x = Dense(4096, activation='relu', name='fc2')(x)
x = Dense(10, activation='softmax', name='predictions')(x)
vgg16model = Model(inputs=img_input, outputs=x, name='vgg16-funcapi')
vgg16model.summary()
vgg16model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
history = History()
early_stop = keras.callbacks.EarlyStopping(monitor='acc',min_delta=0,patience=2,verbose=0,mode='auto')
callbacks = [history, early_stop]
datagen = ImageDataGenerator(
featurewise_center=True,
featurewise_std_normalization=True,
rotation_range=180,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True)
datagen.fit(X_train)
model_history = vgg16model.fit_generator(datagen.flow(X_train, y_train, batch_size=batch_size),
steps_per_epoch=int(samples_per_epoch/batch_size),
verbose=1,
epochs=epochs,
callbacks=callbacks,
shuffle=True)
filename = 'models/vgg16_' + str(epochs) + '_' + str(batch_size)+'.h'
vgg16model.save(filename)
print('SCORE')
score = vgg16model.evaluate(X_test, y_test, batch_size=64)
print(vgg16model.metrics_names)
print(score)
return vgg16model, model_history, score
def preprocess():
train_data = scipy.io.loadmat('data/train_32x32.mat')
test_data = scipy.io.loadmat('data/test_32x32.mat')
train_imgs = train_data['X'][:, :, :, :].astype(np.float32)
train_labels = train_data['y']
test_imgs = test_data['X'][:, :, :, :].astype(np.float32)
test_labels = test_data['y']
train_imgs /= 255.
test_imgs /= 255.
(X_train, y_train) = np.moveaxis(train_imgs, 3, 0), train_labels.flatten()
(X_test, y_test) = np.moveaxis(test_imgs, 3, 0), test_labels.flatten()
y_train[y_train == 10] = 0
y_test[y_test == 10] = 0
y_train = np_utils.to_categorical(y_train.reshape(1, -1)[0], num_classes=10)
y_test = np_utils.to_categorical(y_test.reshape(1, -1)[0], num_classes=10)
return (X_train, y_train, X_test, y_test)
if __name__ == '__main__':
(X_train, y_train, X_test, y_test) = preprocess()
vgg16model, vgg16model_history, vgg16score = vgg16Model(X_train, y_train, X_test, y_test, input_shape=(32, 32, 3))
print('-' * 100)
print('This is the history of the vgg16 model, trained from scratch')
print(vgg16model_history.history)
print('-' * 100)
print('This is the score')
print(vgg16score)