Я строю сеть распознавания активности на основе VGG + LSTM. Используйте предварительно обученный VGG imagenet и извлеченные из него функции и сохраните их в .npy файлах
Мои генераторы наборов данных
def bring_data_from_directory():
datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = datagen.flow_from_directory(
'dataset/train',
target_size=(224, 224),
batch_size=batch_size,
class_mode='categorical', # this means our generator will only yield batches of data, no labels
shuffle=True,
classes=['PlayingPiano','HorseRiding','Skiing', 'Basketball','BaseballPitch'])
validation_generator = datagen.flow_from_directory(
'dataset/validate',
target_size=(224, 224),
batch_size=batch_size,
class_mode='categorical', # this means our generator will only yield batches of data, no labels
shuffle=True,
classes=['PlayingPiano','HorseRiding','Skiing', 'Basketball','BaseballPitch'])
return train_generator,validation_generator
Функция извлечения из VGG
def extract_features_and_store(train_generator,validation_generator,base_model, save_features):
x_generator = ""
y_lable = None
batch = 0
count = 0
if save_features:
for x,y in train_generator:
count += 1
if batch == (56021//batch_size):
break
print("predict on batch:",batch)
batch+=1
if len(x_generator)==0:
x_generator = base_model.predict_on_batch(x)
print(len(x_generator))
y_lable = y
else:
x_generator = np.append(x_generator,base_model.predict_on_batch(x),axis=0)
y_lable = np.append(y_lable,y,axis=0)
print(count)
x_generator,y_lable = shuffle(x_generator,y_lable)
np.save(open(os.path.join(ROOT_DIR + '/npy_lstm', 'video_x_VGG16.npy'), 'wb'),x_generator)
np.save(open(os.path.join(ROOT_DIR + '/npy_lstm', 'video_y_VGG16.npy'),'wb'),y_lable)
batch = 0
x_generator = []
y_lable = None
for x,y in validation_generator:
if batch == (3974//batch_size):
break
print ("predict on batch validate:",batch)
batch+=1
if len(x_generator)==0:
x_generator = base_model.predict_on_batch(x)
y_lable = y
else:
x_generator = np.append(x_generator,base_model.predict_on_batch(x),axis=0)
y_lable = np.append(y_lable,y,axis=0)
x_generator,y_lable = shuffle(x_generator,y_lable)
np.save(open(os.path.join(ROOT_DIR + '/npy_lstm', 'video_x_validate_VGG16.npy'), 'wb'),x_generator)
np.save(open(os.path.join(ROOT_DIR + '/npy_lstm', 'video_y_validate_VGG16.npy'),'wb'),y_lable)
train_data = np.load(open(os.path.join(ROOT_DIR + '/npy_lstm', 'video_x_VGG16.npy'),'rb'))
train_labels = np.load(open(os.path.join(ROOT_DIR + '/npy_lstm', 'video_y_VGG16.npy'),'rb'))
train_data,train_labels = shuffle(train_data,train_labels)
validation_data = np.load(open(os.path.join(ROOT_DIR + '/npy_lstm', 'video_x_validate_VGG16.npy'),'rb'))
validation_labels = np.load(open(os.path.join(ROOT_DIR + '/npy_lstm', 'video_y_validate_VGG16.npy'),'rb'))
validation_data,validation_labels = shuffle(validation_data,validation_labels)
train_data = train_data.reshape(train_data.shape[0],
train_data.shape[1] * train_data.shape[2],
train_data.shape[3])
validation_data = validation_data.reshape(validation_data.shape[0],
validation_data.shape[1] * validation_data.shape[2],
validation_data.shape[3])
return train_data,train_labels,validation_data,validation_labels
Эта модель, которую я использую для тренировок, вводит узкие места из VGG
train_data = train_data.reshape(-1,7,7,512)
validation_data = validation_data.reshape(-1,7,7,512)
model = Sequential()
model.add(Reshape((49,512), input_shape=(7,7,512)))
model.add(LSTM(512,dropout=0.2, return_sequences=True, input_shape=(49,
512)))
model.add(LSTM(256, dropout=0.2,return_sequences=True ))
model.add(LSTM(64, dropout=0.2))
model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(5, activation='softmax'))
sgd = SGD(lr=0.009, decay = 1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
#model.load_weights('video_1_LSTM_1_512.h5')
print(model.summary())
trained_models_path = './weights_lstm/' + 'LSTM_1024'
model_names = trained_models_path + '.{epoch:02d}-{val_acc:.2f}.h5'
reduce_lr = ReduceLROnPlateau('val_loss', factor=0.1, verbose=1, min_delta=0.0001, min_lr=0, patience = 10)
early_stop = EarlyStopping(monitor='val_loss', patience=50, verbose=0)
model_check_point = ModelCheckpoint(model_names, monitor='val_loss', save_best_only=True, verbose=1)
callbacks = [early_stop, model_check_point, reduce_lr]
nb_epoch = 500
model.fit(train_data,train_labels, validation_data=(validation_data,validation_labels),batch_size=batch_size,nb_epoch=nb_epoch,callbacks=callbacks,shuffle=True,verbose=1)
return model
А это логи тренировок, так как вы можете видеть, что val_loss зависает на 0,43 и не двигается дальше, все эти цифры точности не показывают реальную производительность модели, иногда она допускает ошибки, например, в HorseRiding
Epoch 00010: val_loss did not improve from 0.43106
Epoch 11/500
55798/55798 [==============================] - 48s 865us/step - loss: 0.0322 - acc: 0.9921 - val_loss: 0.6460 - val_acc: 0.8745
Epoch 00011: val_loss did not improve from 0.43106
Epoch 12/500
55798/55798 [==============================] - 48s 866us/step - loss: 0.0284 - acc: 0.9933 - val_loss: 0.7122 - val_acc: 0.8596
Epoch 00012: val_loss did not improve from 0.43106
Epoch 13/500
55798/55798 [==============================] - 48s 869us/step - loss: 0.0295 - acc: 0.9927 - val_loss: 0.4936 - val_acc: 0.8891
Epoch 00013: val_loss did not improve from 0.43106
Epoch 14/500
55798/55798 [==============================] - 49s 869us/step - loss: 0.0294 - acc: 0.9932 - val_loss: 0.6888 - val_acc: 0.8725
Epoch 00014: val_loss did not improve from 0.43106
Epoch 15/500
55798/55798 [==============================] - 48s 869us/step - loss: 0.0232 - acc: 0.9943 - val_loss: 0.7516 - val_acc: 0.8692
Epoch 00015: val_loss did not improve from 0.43106
Epoch 16/500
55798/55798 [==============================] - 48s 869us/step - loss: 0.0206 - acc: 0.9950 - val_loss: 0.6544 - val_acc: 0.8823
Epoch 00016: val_loss did not improve from 0.43106
Epoch 17/500
55798/55798 [==============================] - 49s 869us/step - loss: 0.0185 - acc: 0.9955 - val_loss: 0.7565 - val_acc: 0.8775
Epoch 00017: val_loss did not improve from 0.43106
Epoch 00017: ReduceLROnPlateau reducing learning rate to 0.000899999961256981.
Epoch 18/500
55798/55798 [==============================] - 49s 870us/step - loss: 0.0093 - acc: 0.9976 - val_loss: 0.6456 - val_acc: 0.8803
Epoch 00018: val_loss did not improve from 0.43106
Epoch 19/500
55798/55798 [==============================] - 48s 869us/step - loss: 0.0055 - acc: 0.9988 - val_loss: 0.7091 - val_acc: 0.8773
Epoch 00019: val_loss did not improve from 0.43106
Epoch 20/500
55798/55798 [==============================] - 49s 870us/step - loss: 0.0062 - acc: 0.9985 - val_loss: 0.6342 - val_acc: 0.8803
Epoch 00020: val_loss did not improve from 0.43106
Epoch 21/500
55798/55798 [==============================] - 48s 869us/step - loss: 0.0046 - acc: 0.9991 - val_loss: 0.6921 - val_acc: 0.8810
Epoch 00021: val_loss did not improve from 0.43106
Epoch 22/500
55798/55798 [==============================] - 48s 869us/step - loss: 0.0049 - acc: 0.9987 - val_loss: 0.6954 - val_acc: 0.8760
Epoch 00022: val_loss did not improve from 0.43106
Epoch 23/500
55798/55798 [==============================] - 48s 869us/step - loss: 0.0050 - acc: 0.9989 - val_loss: 0.7276 - val_acc: 0.8727
Epoch 00023: val_loss did not improve from 0.43106
Epoch 24/500
55798/55798 [==============================] - 48s 867us/step - loss: 0.0043 - acc: 0.9989 - val_loss: 0.7749 - val_acc: 0.8765
Epoch 00024: val_loss did not improve from 0.43106
Epoch 25/500
55798/55798 [==============================] - 49s 870us/step - loss: 0.0049 - acc: 0.9989 - val_loss: 0.6937 - val_acc: 0.8813
Epoch 00025: val_loss did not improve from 0.43106
Epoch 26/500
55798/55798 [==============================] - 48s 868us/step - loss: 0.0035 - acc: 0.9991 - val_loss: 0.8164 - val_acc: 0.8727
Epoch 00026: val_loss did not improve from 0.43106
Epoch 27/500
55798/55798 [==============================] - 48s 868us/step - loss: 0.0043 - acc: 0.9991 - val_loss: 0.7467 - val_acc: 0.8790
Затем я настраиваю свою модель, и здесь все становится еще хуже, поэтому модель вообще не улучшается.
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224,224,3))
print('Model loaded.')
top_model = Sequential()
top_model.add(Reshape((49,512), input_shape=(7,7,512)))
top_model.add(LSTM(512,dropout=0.2, return_sequences=True, input_shape=(49,
512)))
top_model.add(LSTM(256, dropout=0.2,return_sequences=True ))
top_model.add(LSTM(64, dropout=0.2))
top_model.add(Dense(1024, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(512, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(512, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(64, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(5, activation='softmax'))
top_model.load_weights(top_model_weights_path)
model = Model(inputs= base_model.input, outputs= top_model(base_model.output))
for layer in model.layers[:25]:
layer.trainable = False
sgd = SGD(lr=0.009, decay = 1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
#model.load_weights('video_1_LSTM_1_512.h5')
trained_models_path = './fine_tunning/' + 'VGG_LSTM_1024'
model_names = trained_models_path + '.{epoch:02d}-{val_acc:.2f}.h5'
reduce_lr = ReduceLROnPlateau('val_loss', factor=0.1, verbose=1, min_delta=0.0001, min_lr=0, patience = 10)
early_stop = EarlyStopping(monitor='val_loss', patience=50, verbose=0)
model_check_point = ModelCheckpoint(model_names, monitor='val_loss', save_best_only=True, verbose=1)
callbacks = [early_stop, model_check_point, reduce_lr]
nb_epoch = 500
model.fit_generator(train_generator,validation_data=validation_generator, steps_per_epoch=100, epochs=nb_epoch,callbacks=callbacks,shuffle=True,verbose=1)
return model
Вот логи тонкой настройки
Epoch 1/500
100/100 [==============================] - 60s 597ms/step - loss: 0.1379 - acc: 0.9649 - val_loss: 0.4705 - val_acc: 0.8683
Epoch 00001: val_loss improved from inf to 0.47049, saving model to ./fine_tunning/VGG_LSTM_1024.01-0.87.h5
Epoch 2/500
100/100 [==============================] - 56s 556ms/step - loss: 0.1396 - acc: 0.9647 - val_loss: 0.4705 - val_acc: 0.8683
Epoch 00002: val_loss did not improve from 0.47049
Epoch 3/500
100/100 [==============================] - 56s 555ms/step - loss: 0.1307 - acc: 0.9660 - val_loss: 0.4705 - val_acc: 0.8683
Epoch 00003: val_loss did not improve from 0.47049
Epoch 4/500
100/100 [==============================] - 55s 549ms/step - loss: 0.1277 - acc: 0.9683 - val_loss: 0.4705 - val_acc: 0.8683
Epoch 00004: val_loss did not improve from 0.47049
Epoch 5/500
100/100 [==============================] - 56s 560ms/step - loss: 0.1422 - acc: 0.9651 - val_loss: 0.4705 - val_acc: 0.8683
Epoch 00005: val_loss did not improve from 0.47049
Epoch 6/500
100/100 [==============================] - 55s 553ms/step - loss: 0.1365 - acc: 0.9646 - val_loss: 0.4705 - val_acc: 0.8683
Epoch 00006: val_loss did not improve from 0.47049
Epoch 7/500
100/100 [==============================] - 56s 556ms/step - loss: 0.1365 - acc: 0.9681 - val_loss: 0.4705 - val_acc: 0.8683
Epoch 00007: val_loss did not improve from 0.47049
Epoch 8/500
100/100 [==============================] - 54s 542ms/step - loss: 0.1371 - acc: 0.9665 - val_loss: 0.4705 - val_acc: 0.8683
Epoch 00008: val_loss improved from 0.47049 to 0.47049, saving model to ./fine_tunning/VGG_LSTM_1024.08-0.87.h5
Epoch 9/500
100/100 [==============================] - 53s 526ms/step - loss: 0.1355 - acc: 0.9656 - val_loss: 0.4705 - val_acc: 0.8683
Epoch 00009: val_loss did not improve from 0.47049
Epoch 10/500
100/100 [==============================] - 54s 539ms/step - loss: 0.1351 - acc: 0.9658 - val_loss: 0.4705 - val_acc: 0.8683
Как видите, модель вообще не улучшается.
Итак, резюме таково:
1) Почему при узком месте тренировка val_loss так высока и акк высока, но она не показывает реальную производительность модели, она все еще допускает ошибки?
2) При тонкой настройке модель вообще не улучшается. Почему?
С уважением, Дмитрий