VGG + LSTM - особенности узкого места в обучении распознаванию активности и тонкой настройке, val_loss слишком высок - PullRequest
0 голосов
/ 29 марта 2019

Я строю сеть распознавания активности на основе VGG + LSTM. Используйте предварительно обученный VGG imagenet и извлеченные из него функции и сохраните их в .npy файлах

Мои генераторы наборов данных

def bring_data_from_directory():
  datagen = ImageDataGenerator(rescale=1. / 255)
  train_generator = datagen.flow_from_directory(
          'dataset/train',
          target_size=(224, 224),
          batch_size=batch_size,
          class_mode='categorical',  # this means our generator will only yield batches of data, no labels
          shuffle=True,
          classes=['PlayingPiano','HorseRiding','Skiing', 'Basketball','BaseballPitch'])

  validation_generator = datagen.flow_from_directory(
          'dataset/validate',
          target_size=(224, 224),
          batch_size=batch_size,
          class_mode='categorical',  # this means our generator will only yield batches of data, no labels
          shuffle=True,
          classes=['PlayingPiano','HorseRiding','Skiing', 'Basketball','BaseballPitch'])
  return train_generator,validation_generator

Функция извлечения из VGG

def extract_features_and_store(train_generator,validation_generator,base_model, save_features):
  x_generator = ""
  y_lable = None
  batch = 0
  count = 0
  if save_features:
    for x,y in train_generator:
        count += 1
        if batch == (56021//batch_size):
            break
        print("predict on batch:",batch)
        batch+=1
        if len(x_generator)==0:
            x_generator = base_model.predict_on_batch(x)
            print(len(x_generator))
            y_lable = y
        else:
            x_generator = np.append(x_generator,base_model.predict_on_batch(x),axis=0)
            y_lable = np.append(y_lable,y,axis=0)
        print(count)
    x_generator,y_lable = shuffle(x_generator,y_lable)
    np.save(open(os.path.join(ROOT_DIR + '/npy_lstm', 'video_x_VGG16.npy'), 'wb'),x_generator)
    np.save(open(os.path.join(ROOT_DIR + '/npy_lstm', 'video_y_VGG16.npy'),'wb'),y_lable)
    batch = 0
    x_generator = []
    y_lable = None
    for x,y in validation_generator:
        if batch == (3974//batch_size):
            break
        print ("predict on batch validate:",batch)
        batch+=1
        if len(x_generator)==0:
            x_generator = base_model.predict_on_batch(x)
            y_lable = y
        else:
            x_generator = np.append(x_generator,base_model.predict_on_batch(x),axis=0)
            y_lable = np.append(y_lable,y,axis=0)
    x_generator,y_lable = shuffle(x_generator,y_lable)
    np.save(open(os.path.join(ROOT_DIR + '/npy_lstm', 'video_x_validate_VGG16.npy'), 'wb'),x_generator)
    np.save(open(os.path.join(ROOT_DIR + '/npy_lstm', 'video_y_validate_VGG16.npy'),'wb'),y_lable)

  train_data = np.load(open(os.path.join(ROOT_DIR + '/npy_lstm', 'video_x_VGG16.npy'),'rb'))
  train_labels = np.load(open(os.path.join(ROOT_DIR + '/npy_lstm', 'video_y_VGG16.npy'),'rb'))
  train_data,train_labels = shuffle(train_data,train_labels)
  validation_data = np.load(open(os.path.join(ROOT_DIR + '/npy_lstm', 'video_x_validate_VGG16.npy'),'rb'))
  validation_labels = np.load(open(os.path.join(ROOT_DIR + '/npy_lstm', 'video_y_validate_VGG16.npy'),'rb'))
  validation_data,validation_labels = shuffle(validation_data,validation_labels)

  train_data = train_data.reshape(train_data.shape[0],
                     train_data.shape[1] * train_data.shape[2],
                     train_data.shape[3])
  validation_data = validation_data.reshape(validation_data.shape[0],
                     validation_data.shape[1] * validation_data.shape[2],
                     validation_data.shape[3])

  return train_data,train_labels,validation_data,validation_labels

Эта модель, которую я использую для тренировок, вводит узкие места из VGG

train_data = train_data.reshape(-1,7,7,512)
  validation_data = validation_data.reshape(-1,7,7,512)
  model = Sequential()
  model.add(Reshape((49,512), input_shape=(7,7,512)))
  model.add(LSTM(512,dropout=0.2, return_sequences=True, input_shape=(49,
                     512)))
  model.add(LSTM(256, dropout=0.2,return_sequences=True ))
  model.add(LSTM(64,  dropout=0.2))
  model.add(Dense(1024, activation='relu'))
  model.add(Dropout(0.5))
  model.add(Dense(512, activation='relu'))
  model.add(Dropout(0.5))
  model.add(Dense(512, activation='relu'))
  model.add(Dropout(0.5))
  model.add(Dense(64, activation='relu'))
  model.add(Dropout(0.5))
  model.add(Dense(5, activation='softmax'))
  sgd = SGD(lr=0.009, decay = 1e-6, momentum=0.9, nesterov=True)
  model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
  #model.load_weights('video_1_LSTM_1_512.h5')

  print(model.summary())
  trained_models_path = './weights_lstm/' + 'LSTM_1024'
  model_names = trained_models_path + '.{epoch:02d}-{val_acc:.2f}.h5'
  reduce_lr = ReduceLROnPlateau('val_loss', factor=0.1, verbose=1, min_delta=0.0001, min_lr=0, patience = 10)
  early_stop = EarlyStopping(monitor='val_loss', patience=50, verbose=0)
  model_check_point = ModelCheckpoint(model_names, monitor='val_loss', save_best_only=True, verbose=1)
  callbacks = [early_stop, model_check_point, reduce_lr]
  nb_epoch = 500
  model.fit(train_data,train_labels, validation_data=(validation_data,validation_labels),batch_size=batch_size,nb_epoch=nb_epoch,callbacks=callbacks,shuffle=True,verbose=1)
  return model

А это логи тренировок, так как вы можете видеть, что val_loss зависает на 0,43 и не двигается дальше, все эти цифры точности не показывают реальную производительность модели, иногда она допускает ошибки, например, в HorseRiding

Epoch 00010: val_loss did not improve from 0.43106
Epoch 11/500
55798/55798 [==============================] - 48s 865us/step - loss: 0.0322 - acc: 0.9921 - val_loss: 0.6460 - val_acc: 0.8745

Epoch 00011: val_loss did not improve from 0.43106
Epoch 12/500
55798/55798 [==============================] - 48s 866us/step - loss: 0.0284 - acc: 0.9933 - val_loss: 0.7122 - val_acc: 0.8596

Epoch 00012: val_loss did not improve from 0.43106
Epoch 13/500
55798/55798 [==============================] - 48s 869us/step - loss: 0.0295 - acc: 0.9927 - val_loss: 0.4936 - val_acc: 0.8891

Epoch 00013: val_loss did not improve from 0.43106
Epoch 14/500
55798/55798 [==============================] - 49s 869us/step - loss: 0.0294 - acc: 0.9932 - val_loss: 0.6888 - val_acc: 0.8725

Epoch 00014: val_loss did not improve from 0.43106
Epoch 15/500
55798/55798 [==============================] - 48s 869us/step - loss: 0.0232 - acc: 0.9943 - val_loss: 0.7516 - val_acc: 0.8692

Epoch 00015: val_loss did not improve from 0.43106
Epoch 16/500
55798/55798 [==============================] - 48s 869us/step - loss: 0.0206 - acc: 0.9950 - val_loss: 0.6544 - val_acc: 0.8823

Epoch 00016: val_loss did not improve from 0.43106
Epoch 17/500
55798/55798 [==============================] - 49s 869us/step - loss: 0.0185 - acc: 0.9955 - val_loss: 0.7565 - val_acc: 0.8775

Epoch 00017: val_loss did not improve from 0.43106

Epoch 00017: ReduceLROnPlateau reducing learning rate to 0.000899999961256981.
Epoch 18/500
55798/55798 [==============================] - 49s 870us/step - loss: 0.0093 - acc: 0.9976 - val_loss: 0.6456 - val_acc: 0.8803

Epoch 00018: val_loss did not improve from 0.43106
Epoch 19/500
55798/55798 [==============================] - 48s 869us/step - loss: 0.0055 - acc: 0.9988 - val_loss: 0.7091 - val_acc: 0.8773

Epoch 00019: val_loss did not improve from 0.43106
Epoch 20/500
55798/55798 [==============================] - 49s 870us/step - loss: 0.0062 - acc: 0.9985 - val_loss: 0.6342 - val_acc: 0.8803

Epoch 00020: val_loss did not improve from 0.43106
Epoch 21/500
55798/55798 [==============================] - 48s 869us/step - loss: 0.0046 - acc: 0.9991 - val_loss: 0.6921 - val_acc: 0.8810

Epoch 00021: val_loss did not improve from 0.43106
Epoch 22/500
55798/55798 [==============================] - 48s 869us/step - loss: 0.0049 - acc: 0.9987 - val_loss: 0.6954 - val_acc: 0.8760

Epoch 00022: val_loss did not improve from 0.43106
Epoch 23/500
55798/55798 [==============================] - 48s 869us/step - loss: 0.0050 - acc: 0.9989 - val_loss: 0.7276 - val_acc: 0.8727

Epoch 00023: val_loss did not improve from 0.43106
Epoch 24/500
55798/55798 [==============================] - 48s 867us/step - loss: 0.0043 - acc: 0.9989 - val_loss: 0.7749 - val_acc: 0.8765

Epoch 00024: val_loss did not improve from 0.43106
Epoch 25/500
55798/55798 [==============================] - 49s 870us/step - loss: 0.0049 - acc: 0.9989 - val_loss: 0.6937 - val_acc: 0.8813

Epoch 00025: val_loss did not improve from 0.43106
Epoch 26/500
55798/55798 [==============================] - 48s 868us/step - loss: 0.0035 - acc: 0.9991 - val_loss: 0.8164 - val_acc: 0.8727

Epoch 00026: val_loss did not improve from 0.43106
Epoch 27/500
55798/55798 [==============================] - 48s 868us/step - loss: 0.0043 - acc: 0.9991 - val_loss: 0.7467 - val_acc: 0.8790

Затем я настраиваю свою модель, и здесь все становится еще хуже, поэтому модель вообще не улучшается.

    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224,224,3))
    print('Model loaded.')

    top_model = Sequential()
    top_model.add(Reshape((49,512), input_shape=(7,7,512)))
    top_model.add(LSTM(512,dropout=0.2, return_sequences=True, input_shape=(49,
                        512)))
    top_model.add(LSTM(256, dropout=0.2,return_sequences=True ))
    top_model.add(LSTM(64,  dropout=0.2))
    top_model.add(Dense(1024, activation='relu'))
    top_model.add(Dropout(0.5))
    top_model.add(Dense(512, activation='relu'))
    top_model.add(Dropout(0.5))
    top_model.add(Dense(512, activation='relu'))
    top_model.add(Dropout(0.5))
    top_model.add(Dense(64, activation='relu'))
    top_model.add(Dropout(0.5))
    top_model.add(Dense(5, activation='softmax'))

    top_model.load_weights(top_model_weights_path)

    model = Model(inputs= base_model.input, outputs= top_model(base_model.output))

    for layer in model.layers[:25]:
        layer.trainable = False

    sgd = SGD(lr=0.009, decay = 1e-6, momentum=0.9, nesterov=True)
    model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
    #model.load_weights('video_1_LSTM_1_512.h5')
    trained_models_path = './fine_tunning/' + 'VGG_LSTM_1024'
    model_names = trained_models_path + '.{epoch:02d}-{val_acc:.2f}.h5'
    reduce_lr = ReduceLROnPlateau('val_loss', factor=0.1, verbose=1, min_delta=0.0001, min_lr=0, patience = 10)
    early_stop = EarlyStopping(monitor='val_loss', patience=50, verbose=0)
    model_check_point = ModelCheckpoint(model_names, monitor='val_loss', save_best_only=True, verbose=1)
    callbacks = [early_stop, model_check_point, reduce_lr]
    nb_epoch = 500
    model.fit_generator(train_generator,validation_data=validation_generator, steps_per_epoch=100, epochs=nb_epoch,callbacks=callbacks,shuffle=True,verbose=1)
    return model


Вот логи тонкой настройки

Epoch 1/500
100/100 [==============================] - 60s 597ms/step - loss: 0.1379 - acc: 0.9649 - val_loss: 0.4705 - val_acc: 0.8683

Epoch 00001: val_loss improved from inf to 0.47049, saving model to ./fine_tunning/VGG_LSTM_1024.01-0.87.h5
Epoch 2/500
100/100 [==============================] - 56s 556ms/step - loss: 0.1396 - acc: 0.9647 - val_loss: 0.4705 - val_acc: 0.8683

Epoch 00002: val_loss did not improve from 0.47049
Epoch 3/500
100/100 [==============================] - 56s 555ms/step - loss: 0.1307 - acc: 0.9660 - val_loss: 0.4705 - val_acc: 0.8683

Epoch 00003: val_loss did not improve from 0.47049
Epoch 4/500
100/100 [==============================] - 55s 549ms/step - loss: 0.1277 - acc: 0.9683 - val_loss: 0.4705 - val_acc: 0.8683

Epoch 00004: val_loss did not improve from 0.47049
Epoch 5/500
100/100 [==============================] - 56s 560ms/step - loss: 0.1422 - acc: 0.9651 - val_loss: 0.4705 - val_acc: 0.8683

Epoch 00005: val_loss did not improve from 0.47049
Epoch 6/500
100/100 [==============================] - 55s 553ms/step - loss: 0.1365 - acc: 0.9646 - val_loss: 0.4705 - val_acc: 0.8683

Epoch 00006: val_loss did not improve from 0.47049
Epoch 7/500
100/100 [==============================] - 56s 556ms/step - loss: 0.1365 - acc: 0.9681 - val_loss: 0.4705 - val_acc: 0.8683

Epoch 00007: val_loss did not improve from 0.47049
Epoch 8/500
100/100 [==============================] - 54s 542ms/step - loss: 0.1371 - acc: 0.9665 - val_loss: 0.4705 - val_acc: 0.8683

Epoch 00008: val_loss improved from 0.47049 to 0.47049, saving model to ./fine_tunning/VGG_LSTM_1024.08-0.87.h5
Epoch 9/500
100/100 [==============================] - 53s 526ms/step - loss: 0.1355 - acc: 0.9656 - val_loss: 0.4705 - val_acc: 0.8683

Epoch 00009: val_loss did not improve from 0.47049
Epoch 10/500
100/100 [==============================] - 54s 539ms/step - loss: 0.1351 - acc: 0.9658 - val_loss: 0.4705 - val_acc: 0.8683


Как видите, модель вообще не улучшается. Итак, резюме таково: 1) Почему при узком месте тренировка val_loss так высока и акк высока, но она не показывает реальную производительность модели, она все еще допускает ошибки? 2) При тонкой настройке модель вообще не улучшается. Почему?

С уважением, Дмитрий

...