Question

Я разработал нейронную сеть глубокой свертки для мультиклассовой классификации изображений с Keras и Tensorflow в качестве фона. Я обнаружил, что моя модель работает лучше, когда я использовал 1000 наборов данных по сравнению с тем, когда я использовал 5000-25 000 наборов данных. Я знаю, что глубокое обучение работает лучше с большим набором данных, но для меня это не так. Любая подсказка или помощь будут оценены. Ниже мой код:

    from keras.utils import to_categorical
    from keras.preprocessing import image
    import numpy as np
    import pandas as pd
    from tqdm import tqdm

    from sklearn.model_selection import train_test_split

    # Importing Keras packages
    from keras.models import Sequential
    from keras.layers import Convolution2D, Conv2D
    from keras.layers import MaxPooling2D
    from keras.layers import AveragePooling2D
    from keras.layers import Flatten
    from keras.layers import Dense, Dropout, BatchNormalization
    from matplotlib import pyplot as plt

    # demonstration of calculating metrics for a neural network model using sklearn
    from sklearn.metrics import matthews_corrcoef
    from sklearn.metrics import precision_score
    from sklearn.metrics import multilabel_confusion_matrix
    from sklearn.metrics import roc_auc_score, average_precision_score,f1_score,recall_score
    from sklearn.metrics import confusion_matrix, balanced_accuracy_score

    train_csv= pd.read_csv('dataset/ISIC_2019_Training_GroundTruth1k.csv')    # reading the csv file

    train_image = []
    for i in tqdm(range(train_csv.shape[0])):
        img = image.load_img('dataset/data/'+train_csv['Original'][i]+'.jpg',target_size=(32,32,3))
        img = image.img_to_array(img)
        img = img/255
        train_image.append(img)
    X = np.array(train_image)

    X.shape

    y = np.array(train_csv.drop(['Original', 'Filtered', 'Segmented'], axis=1))
    y.shape

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2)

    #initializing the CNN
    classifier = Sequential()

    #Adding the Convolution Layer
    classifier.add(Convolution2D(32, 3, 3, input_shape = (32, 32, 3), activation = "relu"))
    classifier.add(Convolution2D(32, 3, 3, activation = "relu"))
    classifier.add(Dropout(0.25))
    classifier.add(MaxPooling2D(pool_size=(2, 2)))

    classifier.add(Convolution2D(32, 3, 3, activation = "relu"))
    classifier.add(Convolution2D(32, 3, 3, activation = "relu"))
    classifier.add(Dropout(0.25))
    classifier.add(AveragePooling2D(pool_size=(2, 2)))

    #Flattening the layer
    classifier.add(Flatten())


    classifier.add(Dense(128, activation = "relu"))
    classifier.add(Dropout(0.5))
    classifier.add(Dense(9, activation ="softmax"))

    classifier.summary()

    #Compiling the CNN
    classifier.compile(optimizer = 'adam', loss='categorical_crossentropy', metrics=['accuracy'])

    history = classifier.fit(X_train, y_train, epochs=60, validation_data=(X_test, y_test))

    # evaluate the model
    _, train_acc = classifier.evaluate(X_train, y_train, verbose=0)
    _, test_acc = classifier.evaluate(X_test, y_test, verbose=0)

    print('Train Loss: %.3f, Test Loss: %.3f' % (np.amin(history.history['loss']), np.amin(history.history['val_loss'])))

    print('Train Accuracy: %.3f, Test Accuracy: %.3f' % (np.amax(history.history['acc']), np.amax(history.history['val_acc'])))

    # Convert to 1D
    yhat_probs = classifier.predict(X_test, verbose=0)
    yhat_probs[1]

    yhat_classes = classifier.predict_classes(X_test, verbose=0)
    yhat_classes[1]

    # Convert to 1D
    rounded_labels=np.argmax(y_test, axis=1)
    rounded_labels[1]

    balanced_accuracy = balanced_accuracy_score(rounded_labels, yhat_classes)
    print('Balanced Accuracy: %f' % balanced_accuracy)

    matthews = matthews_corrcoef(rounded_labels, yhat_classes)
    print('Matthews: %f' % matthews)

    # confusion matrix
    matrix = confusion_matrix(rounded_labels, yhat_classes)
    print("Confusion Matrix: %s" % matrix)

    multilabel_matrix = multilabel_confusion_matrix(rounded_labels, yhat_classes)
    print("Multilabel Matrix: %s" % multilabel_matrix)

    precision = precision_score(rounded_labels, yhat_classes)
    print("Precision %f" % precision)

    f1 = f1_score(rounded_labels, yhat_classes)
    print("F1: %f" % f1)

    recall = recall_score(rounded_labels, yhat_classes)
    print("Recall: %f" % recall)

    average_precision= average_precision_score(rounded_labels, yhat_classes)
    print("Average Precision: %f" % average_precision)

    # ROC AUC
    roc_auc = roc_auc_score(rounded_labels, yhat_classes)
    print('ROC AUC: %f' % roc_auc)

    # plot loss during training
    plt.subplot(211)
    plt.title('Loss')
    plt.plot(history.history['loss'], label='train')
    plt.plot(history.history['val_loss'], label='test')
    plt.legend()
    # plot accuracy during training
    plt.subplot(212)
    plt.title('Accuracy')
    plt.plot(history.history['acc'], label='train')
    plt.plot(history.history['val_acc'], label='test')
    plt.legend()
    plt.show()

Result Screenshot with 1000 dataset Result Screenshot with 5000 dataset

Antonio Paladini · Answer 1 · 30 января 2020

Было бы здорово распечатать эволюцию потерь обучения и валидации в течение 60 эпох. Вы можете сделать это следующим образом:

import matplotlib.pyplot as plt 

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val_loss'], loc='upper right')

Может быть, с образцами вы должны использовать больше эпох, вы могли бы осознать это, глядя на эволюцию тренировочной потери. Вы также должны проверить распространение данных, которые вы используете. Если 5000 используемых образцов имеют другое распределение по сравнению с первыми 1000, это может быть причиной другого результата. Попробуйте использовать больше данных, включая оба набора.

Harsh Gupta · Answer 2 · 30 января 2020

при использовании большого набора данных вы должны попробовать эти приемы

Попробуйте использовать трансферное обучение на предварительно обученных моделях https://keras.io/applications/
Попробуйте использовать увеличение данных https://keras.io/preprocessing/image/
Вы также можете попробовать перекрестную проверку в K-кратном порядке для разделения пробного поезда https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.StratifiedKFold.html

Почему моя архитектура нейронной сети Deep Convolution лучше работает с небольшим набором данных

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

Ответы [ 2 ]

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Почему моя архитектура нейронной сети Deep Convolution лучше работает с небольшим набором данных

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

Ответы [ 2 ]

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Похожие темы