Я разработал нейронную сеть глубокой свертки для мультиклассовой классификации изображений с Keras и Tensorflow в качестве фона. Я обнаружил, что моя модель работает лучше, когда я использовал 1000 наборов данных по сравнению с тем, когда я использовал 5000-25 000 наборов данных. Я знаю, что глубокое обучение работает лучше с большим набором данных, но для меня это не так. Любая подсказка или помощь будут оценены. Ниже мой код:
from keras.utils import to_categorical
from keras.preprocessing import image
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split
# Importing Keras packages
from keras.models import Sequential
from keras.layers import Convolution2D, Conv2D
from keras.layers import MaxPooling2D
from keras.layers import AveragePooling2D
from keras.layers import Flatten
from keras.layers import Dense, Dropout, BatchNormalization
from matplotlib import pyplot as plt
# demonstration of calculating metrics for a neural network model using sklearn
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import precision_score
from sklearn.metrics import multilabel_confusion_matrix
from sklearn.metrics import roc_auc_score, average_precision_score,f1_score,recall_score
from sklearn.metrics import confusion_matrix, balanced_accuracy_score
train_csv= pd.read_csv('dataset/ISIC_2019_Training_GroundTruth1k.csv') # reading the csv file
train_image = []
for i in tqdm(range(train_csv.shape[0])):
img = image.load_img('dataset/data/'+train_csv['Original'][i]+'.jpg',target_size=(32,32,3))
img = image.img_to_array(img)
img = img/255
train_image.append(img)
X = np.array(train_image)
X.shape
y = np.array(train_csv.drop(['Original', 'Filtered', 'Segmented'], axis=1))
y.shape
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2)
#initializing the CNN
classifier = Sequential()
#Adding the Convolution Layer
classifier.add(Convolution2D(32, 3, 3, input_shape = (32, 32, 3), activation = "relu"))
classifier.add(Convolution2D(32, 3, 3, activation = "relu"))
classifier.add(Dropout(0.25))
classifier.add(MaxPooling2D(pool_size=(2, 2)))
classifier.add(Convolution2D(32, 3, 3, activation = "relu"))
classifier.add(Convolution2D(32, 3, 3, activation = "relu"))
classifier.add(Dropout(0.25))
classifier.add(AveragePooling2D(pool_size=(2, 2)))
#Flattening the layer
classifier.add(Flatten())
classifier.add(Dense(128, activation = "relu"))
classifier.add(Dropout(0.5))
classifier.add(Dense(9, activation ="softmax"))
classifier.summary()
#Compiling the CNN
classifier.compile(optimizer = 'adam', loss='categorical_crossentropy', metrics=['accuracy'])
history = classifier.fit(X_train, y_train, epochs=60, validation_data=(X_test, y_test))
# evaluate the model
_, train_acc = classifier.evaluate(X_train, y_train, verbose=0)
_, test_acc = classifier.evaluate(X_test, y_test, verbose=0)
print('Train Loss: %.3f, Test Loss: %.3f' % (np.amin(history.history['loss']), np.amin(history.history['val_loss'])))
print('Train Accuracy: %.3f, Test Accuracy: %.3f' % (np.amax(history.history['acc']), np.amax(history.history['val_acc'])))
# Convert to 1D
yhat_probs = classifier.predict(X_test, verbose=0)
yhat_probs[1]
yhat_classes = classifier.predict_classes(X_test, verbose=0)
yhat_classes[1]
# Convert to 1D
rounded_labels=np.argmax(y_test, axis=1)
rounded_labels[1]
balanced_accuracy = balanced_accuracy_score(rounded_labels, yhat_classes)
print('Balanced Accuracy: %f' % balanced_accuracy)
matthews = matthews_corrcoef(rounded_labels, yhat_classes)
print('Matthews: %f' % matthews)
# confusion matrix
matrix = confusion_matrix(rounded_labels, yhat_classes)
print("Confusion Matrix: %s" % matrix)
multilabel_matrix = multilabel_confusion_matrix(rounded_labels, yhat_classes)
print("Multilabel Matrix: %s" % multilabel_matrix)
precision = precision_score(rounded_labels, yhat_classes)
print("Precision %f" % precision)
f1 = f1_score(rounded_labels, yhat_classes)
print("F1: %f" % f1)
recall = recall_score(rounded_labels, yhat_classes)
print("Recall: %f" % recall)
average_precision= average_precision_score(rounded_labels, yhat_classes)
print("Average Precision: %f" % average_precision)
# ROC AUC
roc_auc = roc_auc_score(rounded_labels, yhat_classes)
print('ROC AUC: %f' % roc_auc)
# plot loss during training
plt.subplot(211)
plt.title('Loss')
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()
# plot accuracy during training
plt.subplot(212)
plt.title('Accuracy')
plt.plot(history.history['acc'], label='train')
plt.plot(history.history['val_acc'], label='test')
plt.legend()
plt.show()