Я использую модель Inception v3 для идентификации заболевания, присутствующего на изображении рентгенограммы грудной клетки.Для обучения я использую NIH Chest XRay Dataset.У меня есть 14 различных классов заболеваний, присутствующих в наборе данных, а также я уменьшил исходное разрешение изображения, чтобы уменьшить размер набора данных на диске.Поскольку у меня нет графического процессора, я использую Google Colab для обучения своей модели, и я беру только 300 изображений на класс для всех классов меньшинств и 400 изображений для класса «Не находить» (класс большинства).Пожалуйста, укажите на ошибки в моем коде, если таковые имеются, и, пожалуйста, предложите мне другие подходы, чтобы я мог добиться большей точности.
import numpy as np
import tensorflow as tf
import random as rn
import os
os.environ['PYTHONHASHSEED'] = '0'
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
from keras import backend as K
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras.preprocessing.image import ImageDataGenerator
# from keras import backend as K
from keras.callbacks import ModelCheckpoint
from keras.callbacks import TensorBoard
from keras.layers.core import Flatten, Dense, Dropout, Reshape, Lambda
from keras.layers.normalization import BatchNormalization
from sklearn.preprocessing import LabelEncoder
from keras.utils.np_utils import to_categorical
from sklearn.metrics import log_loss
from sklearn.model_selection import train_test_split
# import os.path
'''F1 score calculation class'''
# import numpy as np
# from keras.callbacks import Callback
# from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score
# class Metrics(Callback):
# def on_train_begin(self, logs={}):
# self.val_f1s = []
# self.val_recalls = []
# self.val_precisions = []
# def on_epoch_end(self, epoch, logs={}):
# val_predict = (np.asarray(self.model.predict(self.model.validation_data[0]))).round()
# val_targ = self.model.validation_data[1]
# _val_f1 = f1_score(val_targ, val_predict)
# _val_recall = recall_score(val_targ, val_predict)
# _val_precision = precision_score(val_targ, val_predict)
# self.val_f1s.append(_val_f1)
# self.val_recalls.append(_val_recall)
# self.val_precisions.append(_val_precision)
# print(" — val_f1: %f — val_precision: %f — val_recall %f" % (_val_f1, _val_precision, _val_recall))
# return
# metrics = Metrics()
# create the base pre-trained model
base_model = InceptionV3(weights='imagenet', include_top=False)
# dimensions of our images.
#Inception input size
img_width, img_height = 299, 299
top_layers_checkpoint_path = 'cp.top.best.hdf5'
fine_tuned_checkpoint_path = 'cp.fine_tuned.best.hdf5'
new_extended_inception_weights = 'final_weights.hdf5'
train_data_dir = 'drive/My Drive/Colab Notebooks/Sample-300-XRay-Dataset/train'
validation_data_dir = 'drive/My Drive/Colab Notebooks/Sample-300-XRay-Dataset/test'
nb_train_samples = 3528
nb_validation_samples = 896
top_epochs = 50
fit_epochs = 50
batch_size = 24
# add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(1024, activation='relu')(x)
x = BatchNormalization()(x)
#x =Dropout(0.2)(x)
x = Dense(512, activation='relu')(x)
x = BatchNormalization()(x)
#x= Dropout(0.3)(x)
# and a logistic layer -- we have 15 classes
predictions = Dense(15, activation='softmax')(x)
# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)
if os.path.exists(top_layers_checkpoint_path):
print ("Checkpoint '" + top_layers_checkpoint_path + "' loaded.")
# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
layer.trainable = False
# compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
# prepare data augmentation configuration
train_datagen = ImageDataGenerator(
rescale=1. / 255,
test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow_from_directory(
target_size=(img_height, img_width),
validation_generator = test_datagen.flow_from_directory(
target_size=(img_height, img_width),
#Save the model after every epoch.
mc_top = ModelCheckpoint(top_layers_checkpoint_path, monitor='val_acc', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1)
#Save the TensorBoard logs.
tb = TensorBoard(log_dir='./logs', histogram_freq=0, write_graph=True, write_images=True)
# train the model on the new data for a few epochs
samples_per_epoch=nb_train_samples // batch_size,
nb_val_samples=nb_validation_samples // batch_size,
callbacks=[mc_top, tb])
# at this point, the top layers are well trained and we can start fine-tuning
# convolutional layers from inception V3. We will freeze the bottom N layers
# and train the remaining top layers.
# let's visualize layer names and layer indices to see how many layers
# we should freeze:
# for i, layer in enumerate(base_model.layers):
# print(i, layer.name)
#Save the model after every epoch.
mc_fit = ModelCheckpoint(fine_tuned_checkpoint_path, monitor='val_acc', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1)
if os.path.exists(fine_tuned_checkpoint_path):
print ("Checkpoint '" + fine_tuned_checkpoint_path + "' loaded.")
# we chose to train the top 2 inception blocks, i.e. we will freeze
# the first 172 layers and unfreeze the rest:
for layer in model.layers[:172]:
layer.trainable = False
for layer in model.layers[172:]:
layer.trainable = True
# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
from keras.optimizers import SGD
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy'])
# we train our model again (this time fine-tuning the top 2 inception blocks
# alongside the top Dense layers
samples_per_epoch=nb_train_samples // batch_size,
nb_val_samples=nb_validation_samples // batch_size,
callbacks=[mc_fit, tb])