Как заморозить специфику слоя или сцены в Resnet50? - PullRequest
1 голос
/ 23 февраля 2020

Я очень благодарен за предоставленную помощь, я новичок в python. Я пытался сделать классификацию pornographics изображений с 3-х классов, я использую повторно snet 50 для архитектуры. Прямо сейчас у меня проблема с низкой точностью в поездах, val, тестовых прогнозах (результаты прилагаются). Кроме того, потери при проверке, которые не уменьшаются и делают мое обучение, рано прекращаются примерно через 40 эпох.

Я не знаю, является ли мой набор данных плохим (у меня всего 6267 изображений с, 80:20 для train: test и 50:50 для train: val) или в моем коде есть ошибки или ошибки. Сейчас я ищу решение, и недавно я обнаружил, что замораживание некоторого определенного слоя или стадии в re snet может помочь улучшить мой результат.

(кредит для pskrunner14 в github для re snet код, я просто добавляю некоторый код для настройки моего необходимого)

На вопрос, как я могу заморозить определенный слой или стадию в моей модели resnet50 ниже, чтобы слой не тренировался или вес не обновлялся ? (пример: я хочу заморозить до 4-го этапа и оставить 5-й этап обучаемым)

Спасибо за помощь или совет, я очень открыт для другого решения.

Вот мой код модели:

import keras
import numpy as np

%%initializer =  keras.initializers.glorot_uniform(seed=0)

initializer = keras.initializers.glorot_normal()


"""
Creates Residual Network with 50 layers
"""
def create_model(input_shape=(64, 64, 3), classes=3):
    # Define the input as a tensor with shape input_shape
    X_input = keras.layers.Input(input_shape)

    # Zero-Padding
    X = keras.layers.ZeroPadding2D((3, 3))(X_input)

    # Stage 1
    X = keras.layers.Conv2D(64, (7, 7), strides=(2, 2), name='conv1', 
                            kernel_initializer=initializer)(X)
    X = keras.layers.BatchNormalization(axis=3, name='bn_conv1')(X)
    X = keras.layers.Activation('relu')(X)
    X = keras.layers.MaxPooling2D((3, 3), strides=(2, 2))(X)

    # Stage 2
    X = convolutional_block(X, f = 3, filters=[64, 64, 256], stage=2, block='a', s=1)
    X = identity_block(X, 3, [64, 64, 256], stage=2, block='b')
    X = identity_block(X, 3, [64, 64, 256], stage=2, block='c')

    # Stage 3
    X = convolutional_block(X, f = 3, filters=[128, 128, 512], stage=3, block='a', s=2)
    X = identity_block(X, 3, [128, 128, 512], stage=3, block='b')
    X = identity_block(X, 3, [128, 128, 512], stage=3, block='c')
    X = identity_block(X, 3, [128, 128, 512], stage=3, block='d')

    # Stage 4
    X = convolutional_block(X, f = 3, filters=[256, 256, 1024], stage=4, block='a', s=2)
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='b')
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='c')
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='d')
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='e')
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='f')

    # Stage 5
    X = convolutional_block(X, f = 3, filters=[512, 512, 2048], stage=5, block='a', s=2)
    X = identity_block(X, 3, [512, 512, 2048], stage=5, block='b')
    X = identity_block(X, 3, [512, 512, 2048], stage=5, block='c')

    # AVGPOOL
    X = keras.layers.AveragePooling2D(pool_size=(2, 2))(X)

    # output layer
    X = keras.layers.Flatten()(X)
    X = keras.layers.Dense(classes, activation='softmax', name='fc{}'
                            .format(classes), kernel_initializer=initializer)(X)

    # Create model
    model = keras.models.Model(inputs=X_input, outputs=X, name='resnet50')

    return model

"""
Identity Block of ResNet
"""
def identity_block(X, f, filters, stage, block):
    # defining name basis
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'

    # Retrieve Filters
    F1, F2, F3 = filters

    # Save the input value. You'll need this later to add back to the main path. 
    X_shortcut = X

    # First component of main path
    X = keras.layers.Conv2D(filters=F1, kernel_size=(1, 1), strides=(1,1), padding='valid', 
                            name=conv_name_base + '2a', kernel_initializer=initializer)(X)
    X = keras.layers.BatchNormalization(axis=3, name=bn_name_base + '2a')(X)
    X = keras.layers.Activation('relu')(X)
    X = keras.layers.Dropout(0.5)(X)

    # Second component of main path
    X = keras.layers.Conv2D(filters=F2, kernel_size=(f, f), strides=(1,1), padding='same', 
                            name=conv_name_base + '2b', kernel_initializer=initializer)(X)
    X = keras.layers.BatchNormalization(axis=3, name=bn_name_base + '2b')(X)
    X = keras.layers.Activation('relu')(X)
    X = keras.layers.Dropout(0.5)(X)

    # Third component of main path
    X = keras.layers.Conv2D(filters=F3, kernel_size=(1, 1), strides=(1,1), padding='valid', 
                            name=conv_name_base + '2c', kernel_initializer=initializer)(X)
    X = keras.layers.BatchNormalization(axis=3, name=bn_name_base + '2c')(X)

    # Add shortcut value to main path, and pass it through a RELU activation
    X = keras.layers.Add()([X, X_shortcut])
    X = keras.layers.Activation('relu')(X)

    return X

"""
Convolutional Block of ResNet
"""
def convolutional_block(X, f, filters, stage, block, s=2):
    # defining name basis
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'

    # Retrieve Filters
    F1, F2, F3 = filters

    # Save the input value
    X_shortcut = X

    # First component of main path 
    X = keras.layers.Conv2D(F1, (1, 1), strides=(s, s), name=conv_name_base + '2a', 
                            padding='valid', kernel_initializer=initializer)(X)
    X = keras.layers.BatchNormalization(axis=3, name=bn_name_base + '2a')(X)
    X = keras.layers.Activation('relu')(X)
    X = keras.layers.Dropout(0.5)(X)

    # Second component of main path
    X = keras.layers.Conv2D(F2, (f, f), strides=(1, 1), name=conv_name_base + '2b', 
                            padding='same', kernel_initializer=initializer)(X)
    X = keras.layers.BatchNormalization(axis=3, name=bn_name_base + '2b')(X)
    X = keras.layers.Activation('relu')(X)
    X = keras.layers.Dropout(0.5)(X)

    # Third component of main path
    X = keras.layers.Conv2D(F3, (1, 1), strides=(1, 1), name=conv_name_base + '2c', 
                            padding='valid', kernel_initializer=initializer)(X)
    X = keras.layers.BatchNormalization(axis=3, name=bn_name_base + '2c')(X)

    X_shortcut = keras.layers.Conv2D(F3, (1, 1), strides=(s,s), name=conv_name_base + '1', 
                                    padding='valid', kernel_initializer=initializer)(X_shortcut)
    X_shortcut = keras.layers.BatchNormalization(axis=3, name=bn_name_base + '1')(X_shortcut)

    # Add shortcut value to main path, and pass it through a RELU activation
    X = keras.layers.Add()([X, X_shortcut])
    X = keras.layers.Activation('relu')(X)

    return X

и вот мой код поезда:

import os
import click
import logging

import keras
import numpy as np
import keras.backend as K


from model import create_model
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, multilabel_confusion_matrix
from sklearn import metrics
import pandas as pd 
import seaborn as sn

K.set_image_data_format('channels_last')

"""
Train Model [optional args]
"""
@click.command(name='Training Configuration')
@click.option(
    '-lr', 
    '--learning-rate', 
    default=0.001, 
    help='Learning rate for minimizing loss during training'
)
@click.option(
    '-bz',
    '--batch-size',
    default=32,
    help='Batch size of minibatches to use during training'
)
@click.option(
    '-ne', 
    '--num-epochs', 
    default=100, 
    help='Number of epochs for training model'
)
@click.option(
    '-se',
    '--save-every',
    default=1,
    help='Epoch interval to save model checkpoints during training'
)
@click.option(
    '-tb',
    '--tensorboard-vis',
    is_flag=True,
    help='Flag for TensorBoard Visualization'
)
@click.option(
    '-ps',
    '--print-summary',
    is_flag=True,
    help='Flag for printing summary of the model'
)
def train(learning_rate, batch_size, num_epochs, save_every, tensorboard_vis, print_summary):
    setup_paths()

    datagen = keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

    get_gen = lambda x: datagen.flow_from_directory(
        'datasets/caltech_101/{}'.format(x),
        target_size=(64, 64),
        batch_size=batch_size,
        class_mode='categorical'
    )

    # generator objects
    train_generator = get_gen('train')
    val_generator = get_gen('val')
    test_generator = get_gen('test')



    if os.path.exists('models/resnet50.h5'):
        # load model
        logging.info('loading pre-trained model')
        resnet50 = keras.models.load_model('models/resnet50.h5')
    else:
        # create model
        logging.info('creating model')
        resnet50 = create_model(input_shape=(64, 64, 3), classes=3)

    optimizer = keras.optimizers.Adam(learning_rate)
    resnet50.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

    if print_summary:
        resnet50.summary()

    callbacks = configure_callbacks(save_every, tensorboard_vis)

    # train model
    logging.info('training model')
    archi = resnet50.fit_generator(
        train_generator,
        steps_per_epoch= 2507 //batch_size,
        epochs=num_epochs,
        verbose=1,
        validation_data=val_generator,
        validation_steps= 2507 // batch_size,
        shuffle=True,
        callbacks=callbacks
    )
    # save model
    logging.info('Saving trained model to `models/resnet50.h5`')
    resnet50.save('models/resnet50.h5')

    # evaluate model
    logging.info('evaluating model')
    preds = resnet50.evaluate_generator(
        test_generator,
        steps=1253 // batch_size,
        verbose=1
    )
    logging.info('test loss: {:.4f} - test acc: {:.4f}'.format(preds[0], preds[1]))

    keras.utils.plot_model(resnet50, to_file='models/resnet50.png')


    #Visualizing the training, validating, and test accuracy

    #training-validation acc
    plt.plot(archi.history['acc'])
    plt.plot(archi.history['val_acc'])
    plt.title('model accuracy')
    plt.ylabel('acc')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper left')
    plt.savefig('train vs val acc.png')
    plt.show()



    #visualizing the training, validating, and test loss
    #training-validation loss
    plt.plot(archi.history['loss'])
    plt.plot(archi.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train','val'], loc='upper left')
    plt.savefig('train vs val loss.png')
    plt.show()

    # make a confusion matrix

    Y_pred = resnet50.predict_generator(test_generator, 40)
    y_pred = np.argmax(Y_pred, axis=1)
    print('Confusion Matrix')
    print(confusion_matrix(test_generator.classes, y_pred))
    print('Classification Report')
    target_names = ['npd', 'npe', 'porn']
    print(classification_report(test_generator.classes, y_pred, target_names=target_names))



    #save a confusion matrix, and etc

    clf_rep = metrics.precision_recall_fscore_support(test_generator.classes, y_pred)
    out_dict = {
                 "precision" :clf_rep[0].round(2)
                 ,"recall" : clf_rep[1].round(2)
                 ,"f1-score" : clf_rep[2].round(2)
                 ,"support" : clf_rep[3]
                 }
    out_df = pd.DataFrame(out_dict, index = ['npd', 'npe', 'porn'] )
    avg_tot = (out_df.apply(lambda x: round(x.mean(), 2) if x.name!="support" else  round(x.sum(), 2)).to_frame().T)
    avg_tot.index = ["avg/total"]
    out_df = out_df.append(avg_tot)
    print (out_df    )
    out_df.to_excel('Precision recall fscore support.xlsx', index= True)
    out_df.to_csv('Precision recall fscore support.csv', index= True)


    clf_acc = metrics.accuracy_score(test_generator.classes, y_pred)
    out_dict_acc = {
                 "" : [clf_acc]
                 }
    out_df_acc = pd.DataFrame(out_dict_acc , index = ['acc'] )
    print (out_df_acc)
    out_df_acc.to_excel('acc.xlsx', index= True)
    out_df_acc.to_csv('acc.csv', index= True)


    clf_cm = metrics.confusion_matrix(test_generator.classes, y_pred)
    out_dict_cm = {
                 "npd" :clf_cm[0]
                 ,"npe" : clf_cm[1]
                 ,"porn" : clf_cm[2]

                 }
    out_df_cm = pd.DataFrame(out_dict_cm, index = ['predicted as  npd', 'predicted as npe', 'predicted as porn']).transpose()
    print (out_df_cm)
    out_df_cm.to_excel('conf matrix.xlsx', index= True)
    out_df_cm.to_csv('conf matrix.csv', index= True)





"""
Configure Callbacks for Training
"""
def configure_callbacks(save_every=1, tensorboard_vis=False):
    # checkpoint models only when `val_loss` impoves
    saver = keras.callbacks.ModelCheckpoint(
        'models/ckpts/model.ckpt',
        monitor='val_loss',
        save_best_only=True,
        period=save_every,
        verbose=1
    )

    # reduce LR when `val_loss` plateaus
    reduce_lr = keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.1,
        patience=5,
        verbose=1,
        min_lr=1e-10
    )

    # early stopping when `val_loss` stops improving
    early_stopper = keras.callbacks.EarlyStopping(
        monitor='val_loss', 
        min_delta=0, 
        patience=10, 
        verbose=1
    )

    callbacks = [saver, reduce_lr, early_stopper]

    if tensorboard_vis:
        # tensorboard visualization callback
        tensorboard_cb = keras.callbacks.TensorBoard(
            log_dir='./logs',
            write_graph=True,
            write_images=True
        )
        callbacks.append(tensorboard_cb)

    return callbacks

def setup_paths():
    if not os.path.isdir('models/ckpts'):
        if not os.path.isdir('models'):
            os.mkdir('models')
        os.mkdir('models/ckpts')

def main():
    LOG_FORMAT = '%(levelname)s %(message)s'
    logging.basicConfig(
        format=LOG_FORMAT, 
        level='INFO'
    )

    try:
        train()
    except KeyboardInterrupt:
        print('EXIT')

if __name__ == '__main__':
    main()

Вот результат на данный момент :

Поезд против Val cc

Поезд против Val

conf матрица и результат классификации

матрица конф

1 Ответ

2 голосов
/ 23 февраля 2020

Вы можете сделать что-то вроде этого:

for layer in model.layers[:10]:
   layer.trainable = False
for layer in model.layers[10:]:
   layer.trainable = True

Просмотрите model.layers и решите, какие именно слои вы хотите заморозить.

В вашем случае вы можете попробовать это:

for layer in [l for l in model.layers if 'conv5' not in l.name]:
   layer.trainable = False
for layer in [l for l in model.layers if 'conv5' in l.name or l.name == 'probs']:
   layer.trainable = True
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...