Мой CNN Keras не предсказывает должным образом, и я не знаю, что делать - PullRequest
1 голос
/ 20 октября 2019

Мне нужно сделать CNN для диагностики диабетической ретинопатии на 4-й стадии (двоичная классификация - или 0 (non4thStage - nonPdr), или 1 (4thStage - pdr)). Я использую vgg16 и gaussianBlur для лучшей классификации. У меня есть 1400 тестовых изображений (по 700 в каждом классе), и это мой train.py:

#import tensorflow as tf
import cv2
import os
import numpy as np

from keras.layers.core import Flatten, Dense, Dropout, Reshape
from keras.models import Model
from keras.layers import Input, ZeroPadding2D, Dropout
from keras import optimizers
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping

from keras.applications.vgg16 import VGG16

TRAIN_DIR = 'train/'
TEST_DIR = 'test/'
v = 'v/'
BATCH_SIZE = 32
NUM_EPOCHS = 5


def ReadImages(Path):
    LabelList = list()
    ImageCV = list()
    classes = ["nonPdr", "pdr"]

    # Get all subdirectories
    FolderList = [f for f in os.listdir(Path) if not f.startswith('.')]

    # Loop over each directory
    for File in FolderList:
        for index, Image in enumerate(os.listdir(os.path.join(Path, File))):
            # Convert the path into a file
            ImageCV.append(cv2.resize(cv2.imread(os.path.join(Path, File) + os.path.sep + Image), (224,224)))
            LabelList.append(classes.index(os.path.splitext(File)[0])) 


            ImageCV[index] = cv2.addWeighted (ImageCV[index],4,cv2.GaussianBlur(ImageCV[index] , (0,0) , 10) ,-4 ,128)
    return ImageCV, LabelList


data, labels = ReadImages(TRAIN_DIR)
valid, vlabels = ReadImages(TEST_DIR)

vgg16_model = VGG16(weights="imagenet", include_top=True)

# (2) remove the top layer
base_model = Model(input=vgg16_model.input, 
                   output=vgg16_model.get_layer("block5_pool").output)

# (3) attach a new top layer
base_out = base_model.output
base_out = Reshape((25088,))(base_out)
top_fc1 = Dense(64, activation="relu")(base_out)
top_fc1 = Dropout(0.50)(base_out)
# output layer: (None, 5)
top_preds = Dense(1, activation="sigmoid")(top_fc1)

# (4) freeze weights until the last but one convolution layer (block4_pool)
for layer in base_model.layers[0:14]:
    layer.trainable = False

# (5) create new hybrid model
model = Model(input=base_model.input, output=top_preds)

# (6) compile and train the model
sgd = SGD(lr=0.000001, momentum=0.9)
model.compile(optimizer=sgd, loss="binary_crossentropy", metrics=["accuracy"])

data = np.asarray(data)
valid = np.asarray(valid)

data = data.astype('float32')
valid = valid.astype('float32')

data /= 255
valid /= 255
labels = np.array(labels)

datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True)

# compute quantities required for featurewise normalization
# (std, mean, and principal components if ZCA whitening is applied)
datagen.fit(data)
mean = datagen.mean  
std = datagen.std

print(mean, "mean")
print(std, "std")

es = EarlyStopping(monitor='val_loss', verbose=1)

# fits the model on batches with real-time data augmentation:
model.fit_generator(datagen.flow(data, np.array(labels), batch_size=32), 
                    steps_per_epoch=len(data) / 32, epochs=50,
                    validation_data=(valid, np.array(vlabels)),
                    nb_val_samples=72, callbacks=[es])


model.save('model.h5')

, и это возвращает следующее:

Epoch 1/50
44/43 [==============================] - 475s 11s/step - loss: 0.9671 - acc: 0.4789 - val_loss: 0.6808 - val_acc: 0.6389
Epoch 2/50
44/43 [==============================] - 467s 11s/step - loss: 0.8427 - acc: 0.5007 - val_loss: 0.6364 - val_acc: 0.6389
Epoch 3/50
44/43 [==============================] - 468s 11s/step - loss: 0.7703 - acc: 0.5204 - val_loss: 0.6136 - val_acc: 0.6806
Epoch 4/50
44/43 [==============================] - 466s 11s/step - loss: 0.7324 - acc: 0.5512 - val_loss: 0.5941 - val_acc: 0.7500
Epoch 5/50
44/43 [==============================] - 466s 11s/step - loss: 0.7074 - acc: 0.5679 - val_loss: 0.5758 - val_acc: 0.7639
Epoch 6/50
44/43 [==============================] - 461s 10s/step - loss: 0.6640 - acc: 0.6146 - val_loss: 0.5584 - val_acc: 0.8194
Epoch 7/50
44/43 [==============================] - 455s 10s/step - loss: 0.6562 - acc: 0.6077 - val_loss: 0.5418 - val_acc: 0.8333
Epoch 8/50
44/43 [==============================] - 458s 10s/step - loss: 0.6076 - acc: 0.6700 - val_loss: 0.5263 - val_acc: 0.8889
Epoch 9/50
44/43 [==============================] - 456s 10s/step - loss: 0.5743 - acc: 0.7005 - val_loss: 0.5119 - val_acc: 0.9167
Epoch 10/50
44/43 [==============================] - 457s 10s/step - loss: 0.5649 - acc: 0.7041 - val_loss: 0.4981 - val_acc: 0.9306
Epoch 11/50
44/43 [==============================] - 452s 10s/step - loss: 0.5654 - acc: 0.7088 - val_loss: 0.4855 - val_acc: 0.9444
Epoch 12/50
44/43 [==============================] - 458s 10s/step - loss: 0.5046 - acc: 0.7616 - val_loss: 0.4740 - val_acc: 0.9444
Epoch 13/50
44/43 [==============================] - 465s 11s/step - loss: 0.5002 - acc: 0.7808 - val_loss: 0.4633 - val_acc: 0.9444
Epoch 14/50
44/43 [==============================] - 459s 10s/step - loss: 0.4694 - acc: 0.7924 - val_loss: 0.4514 - val_acc: 0.9583
Epoch 15/50
44/43 [==============================] - 463s 11s/step - loss: 0.4482 - acc: 0.8184 - val_loss: 0.4432 - val_acc: 0.9444
Epoch 16/50
44/43 [==============================] - 456s 10s/step - loss: 0.4326 - acc: 0.8343 - val_loss: 0.4330 - val_acc: 0.9583
Epoch 17/50
44/43 [==============================] - 454s 10s/step - loss: 0.4291 - acc: 0.8303 - val_loss: 0.4233 - val_acc: 0.9583
Epoch 18/50
44/43 [==============================] - 457s 10s/step - loss: 0.4060 - acc: 0.8376 - val_loss: 0.4145 - val_acc: 0.9583
Epoch 19/50
44/43 [==============================] - 457s 10s/step - loss: 0.3933 - acc: 0.8686 - val_loss: 0.4069 - val_acc: 0.9583
Epoch 20/50
44/43 [==============================] - 455s 10s/step - loss: 0.3786 - acc: 0.8684 - val_loss: 0.3985 - val_acc: 0.9583
Epoch 21/50
44/43 [==============================] - 456s 10s/step - loss: 0.3661 - acc: 0.8774 - val_loss: 0.3902 - val_acc: 0.9583
Epoch 22/50
44/43 [==============================] - 454s 10s/step - loss: 0.3493 - acc: 0.8956 - val_loss: 0.3833 - val_acc: 0.9583
Epoch 23/50
44/43 [==============================] - 456s 10s/step - loss: 0.3355 - acc: 0.9065 - val_loss: 0.3765 - val_acc: 0.9444
Epoch 24/50
44/43 [==============================] - 456s 10s/step - loss: 0.3332 - acc: 0.9053 - val_loss: 0.3680 - val_acc: 0.9583
Epoch 25/50
44/43 [==============================] - 457s 10s/step - loss: 0.3236 - acc: 0.9160 - val_loss: 0.3625 - val_acc: 0.9444
Epoch 26/50
44/43 [==============================] - 458s 10s/step - loss: 0.3097 - acc: 0.9181 - val_loss: 0.3559 - val_acc: 0.9583
Epoch 27/50
44/43 [==============================] - 469s 11s/step - loss: 0.2915 - acc: 0.9242 - val_loss: 0.3517 - val_acc: 0.9444
Epoch 28/50
44/43 [==============================] - 473s 11s/step - loss: 0.2832 - acc: 0.9368 - val_loss: 0.3454 - val_acc: 0.9583
Epoch 29/50
44/43 [==============================] - 468s 11s/step - loss: 0.2747 - acc: 0.9418 - val_loss: 0.3416 - val_acc: 0.9583
Epoch 30/50
44/43 [==============================] - 470s 11s/step - loss: 0.2627 - acc: 0.9508 - val_loss: 0.3350 - val_acc: 0.9722
Epoch 31/50
44/43 [==============================] - 469s 11s/step - loss: 0.2517 - acc: 0.9638 - val_loss: 0.3311 - val_acc: 0.9722
Epoch 32/50
44/43 [==============================] - 470s 11s/step - loss: 0.2517 - acc: 0.9484 - val_loss: 0.3266 - val_acc: 0.9722
Epoch 33/50
44/43 [==============================] - 490s 11s/step - loss: 0.2348 - acc: 0.9560 - val_loss: 0.3211 - val_acc: 0.9722
Epoch 34/50
44/43 [==============================] - 461s 10s/step - loss: 0.2427 - acc: 0.9517 - val_loss: 0.3158 - val_acc: 0.9722
Epoch 35/50
44/43 [==============================] - 467s 11s/step - loss: 0.2260 - acc: 0.9616 - val_loss: 0.3109 - val_acc: 0.9722
Epoch 36/50
44/43 [==============================] - 459s 10s/step - loss: 0.2243 - acc: 0.9706 - val_loss: 0.3064 - val_acc: 0.9722
Epoch 37/50
44/43 [==============================] - 456s 10s/step - loss: 0.2099 - acc: 0.9687 - val_loss: 0.3029 - val_acc: 0.9722
Epoch 38/50
44/43 [==============================] - 457s 10s/step - loss: 0.2094 - acc: 0.9733 - val_loss: 0.2994 - val_acc: 0.9722
Epoch 39/50
44/43 [==============================] - 465s 11s/step - loss: 0.2014 - acc: 0.9744 - val_loss: 0.2941 - val_acc: 0.9722
Epoch 40/50
44/43 [==============================] - 465s 11s/step - loss: 0.1924 - acc: 0.9709 - val_loss: 0.2915 - val_acc: 0.9722
Epoch 41/50
44/43 [==============================] - 457s 10s/step - loss: 0.1908 - acc: 0.9735 - val_loss: 0.2897 - val_acc: 0.9722
Epoch 42/50
44/43 [==============================] - 463s 11s/step - loss: 0.1864 - acc: 0.9709 - val_loss: 0.2861 - val_acc: 0.9722
Epoch 43/50
44/43 [==============================] - 464s 11s/step - loss: 0.1787 - acc: 0.9773 - val_loss: 0.2822 - val_acc: 0.9722
Epoch 44/50
44/43 [==============================] - 468s 11s/step - loss: 0.1820 - acc: 0.9744 - val_loss: 0.2794 - val_acc: 0.9722
Epoch 45/50
44/43 [==============================] - 469s 11s/step - loss: 0.1646 - acc: 0.9818 - val_loss: 0.2763 - val_acc: 0.9722
Epoch 46/50
44/43 [==============================] - 469s 11s/step - loss: 0.1689 - acc: 0.9820 - val_loss: 0.2730 - val_acc: 0.9722
Epoch 47/50
44/43 [==============================] - 471s 11s/step - loss: 0.1495 - acc: 0.9879 - val_loss: 0.2711 - val_acc: 0.9722
Epoch 48/50
44/43 [==============================] - 469s 11s/step - loss: 0.1578 - acc: 0.9858 - val_loss: 0.2676 - val_acc: 0.9722
Epoch 49/50
44/43 [==============================] - 462s 10s/step - loss: 0.1557 - acc: 0.9858 - val_loss: 0.2643 - val_acc: 0.9722
Epoch 50/50
44/43 [==============================] - 454s 10s/step - loss: 0.1501 - acc: 0.9794 - val_loss: 0.2612 - val_acc: 0.9722

Это мой предикат.py:

from keras.models import load_model
import cv2
import os
import numpy as np
from keras.preprocessing import image

TEST_DIR = 'v/'
pdr = 0
nonPdr = 0

model = load_model('model.h5')

def normalize(x, mean, std):
    x[..., 0] -= mean[0]
    x[..., 1] -= mean[1]
    x[..., 2] -= mean[2]
    x[..., 0] /= std[0]
    x[..., 1] /= std[1]
    x[..., 2] /= std[2]
    return x

for filename in os.listdir(r'v/'):
    if filename.endswith(".jpg") or filename.endswith(".ppm") or filename.endswith(".jpeg") or filename.endswith(".png"):
        ImageCV = cv2.resize(cv2.imread(os.path.join(TEST_DIR) + filename), (224,224))
        ImageCV = cv2.addWeighted (ImageCV,4,cv2.GaussianBlur(ImageCV , (0,0) , 10) ,-4 ,128)
        ImageCV = np.asarray(ImageCV)

        ImageCV = ImageCV.astype('float32')

        ImageCV /= 255  
        x = ImageCV

        x = np.expand_dims(x, axis=0)
        x = normalize(x, [0.23883381, 0.23883381, 0.23883381], [0.24483591, 0.24579705, 0.2510857])

        prob = model.predict(x)
        if prob <= 0.75:  #.75 = 80% | .70=79% >>>> .70 = 82% | .75 = 79%
            print("nonPDR >>>", filename)
            nonPdr += 1
        else:
            print("PDR >>>", filename)
            pdr += 1
        print(prob)
print("Number of retinas with PDR: ",pdr)
print("Number of retinas without PDR: ",nonPdr)

Проблема в том, что после того, как поезд вернул около 97% точности, все мои прогнозы оказались неверными ... например, эти 3 изображения должны быть PDR (class1):

nonPDR >>> 16_left.jpeg
[[0.07062916]]
nonPDR >>> 16_right.jpeg
[[0.09434311]]
nonPDR >>> 217_left.jpeg
[[0.14126943]]

Если я протестирую те же изображения, что и на базе поезда, модель тоже не будет правильно предсказывать ...

Я уже пытался тренироваться без gaussianBlur, но точность была очень низкой.

Что я делаю не так? Пожалуйста, я ценю вашу помощь !!

1 Ответ

1 голос
/ 20 октября 2019

Несколько вещей, которые стоит попробовать: я бы предложил не использовать увеличение данных, пока у вас не будет уверенности в том, что ваш тренировочный процесс работает, даже если вначале производительность не была хорошей. В качестве двойной проверки вы можете захотеть напрямую делать прогнозирование сразу после model.fit, используя данные обучения, просто чтобы убедиться, что полученная точность такая же, как и у вас на тренировке. У вас могут быть небольшие различия в обработке тестовых данных, из-за которых сеть ведет себя плохо, поэтому первым хорошим шагом будет убедиться, что с обучающей частью все в порядке, тогда вы можете сосредоточиться на тестовой части. Надеюсь, это поможет.

...