Точность проверки достоверности потока Keras отличается при использовании синтаксиса подкласса по сравнению с функциональным или последовательным - PullRequest
0 голосов
/ 21 февраля 2020

Я переопределил пример Keras MINST CNN, используя синтаксис Sequential, Functional и SubClass.

Все компилируется и работает нормально, но я заметил существенное различие в точности проверки при использовании синтаксиса подкласса (35%) по сравнению с последовательным / функциональным синтаксисом (75%). Архитектура модели должна быть такой же, поэтому меня это смущает.

import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten

# BUG: ClassCNN accuracy is only 36% compared to 75% for SequentialCNN / FunctionalCNN
# SequentialCNN   validation: | loss: 1.3756675141198293 | accuracy: 0.7430952
# FunctionalCNN   validation: | loss: 1.4285654685610816 | accuracy: 0.7835714
# ClassCNN        validation: | loss: 1.9851970995040167 | accuracy: 0.36214286
class ClassCNN(tf.keras.Model):

    def __init__(self, input_shape, output_shape, **kwargs):
        super(ClassCNN, self).__init__()
        self._input_shape  = input_shape   # = (28, 28, 1)
        self._output_shape = output_shape  # = 10

        self.conv1      = Conv2D(32, kernel_size=(3, 3), activation=tf.nn.relu)
        self.conv2      = Conv2D(64, kernel_size=(3, 3), activation=tf.nn.relu)
        self.maxpool    = MaxPooling2D(pool_size=(2, 2))
        self.dropout1   = Dropout(0.25, name='dropout1')
        self.flatten    = Flatten()
        self.dense1     = Dense(128, activation=tf.nn.relu)
        self.dropout2   = Dropout(0.5, name='dropout2')
        self.activation = Dense(self._output_shape, activation=tf.nn.relu)

        self.conv1.build(     (None,) + input_shape )
        self.conv2.build(     (None,) + tuple(np.subtract(input_shape[:-1],2)) + (32,) )
        self.maxpool.build(   (None,) + tuple(np.subtract(input_shape[:-1],4)) + (64,) )
        self.dropout1.build( tuple(np.floor_divide(np.subtract(input_shape[:-1],4),2)) + (64,) )
        self.dropout2.build( 128 )
        self.build(           (None,) + input_shape)


    def call(self, x, training=False, **kwargs):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.maxpool(x)
        if training:  x = self.dropout1(x)
        x = self.flatten(x)
        x = self.dense1(x)
        if training:  x = self.dropout2(x)
        x = self.activation(x)
        return x
import os

from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense
from tensorflow.keras.utils import plot_model


def FunctionalCNN(input_shape, output_shape):
    inputs = Input(shape=input_shape)
    x = Conv2D(32, kernel_size=(3, 3), activation='relu')(inputs)
    x = Conv2D(64, kernel_size=(3, 3), activation='relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(0.25)(x)
    x = Flatten()(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(output_shape, activation='softmax')(x)

    model = Model(inputs, x, name="FunctionalCNN")
    plot_model(model, to_file=os.path.join(os.path.dirname(__file__), "FunctionalCNN.png"))
    return model
import os

from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense
from tensorflow.keras.utils import plot_model


def SequentialCNN(input_shape, output_shape):
    model = Sequential()
    model.add( Conv2D(32, kernel_size=(3, 3),
                     activation='relu',
                     input_shape=input_shape) )
    model.add( Conv2D(64, (3, 3), activation='relu') )
    model.add( MaxPooling2D(pool_size=(2, 2)) )
    model.add( Dropout(0.25) )
    model.add( Flatten() )
    model.add( Dense(128, activation='relu') )
    model.add( Dropout(0.5) )
    model.add( Dense(output_shape, activation='softmax') )

    plot_model(model, to_file=os.path.join(os.path.dirname(__file__), "SequentialCNN.png"))
    return model

main.py

#!/usr/bin/env python3
import multiprocessing
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'  # 0, 1, 2, 3  # Disable Tensortflow Logging
os.chdir( os.path.dirname( os.path.abspath(__file__) ) )

import tensorflow as tf
import tensorflow.keras as keras
import time

from src.dataset import DataSet
from src.keras.examples.ClassCNN import ClassCNN
from src.keras.examples.ClassNN import ClassNN
from src.keras.examples.FunctionalCNN import FunctionalCNN
from src.keras.examples.SequentialCNN import SequentialCNN
from src.utils.csv import predict_to_csv

tf.random.set_seed(42)

timer_start = time.time()

dataset = DataSet()
config = {
    "verbose":      False,
    "epochs":       12,
    "batch_size":   128,
    "input_shape":  dataset.input_shape(),
    "output_shape": dataset.output_shape(),
}
print("config", config)

# BUG: ClassCNN accuracy is only 36% compared to 75% for SequentialCNN / FunctionalCNN
# SequentialCNN   validation: | loss: 1.3756675141198293 | accuracy: 0.7430952
# FunctionalCNN   validation: | loss: 1.4285654685610816 | accuracy: 0.7835714
# ClassCNN        validation: | loss: 1.9851970995040167 | accuracy: 0.36214286
models = {
    "SequentialCNN": SequentialCNN(
        input_shape=dataset.input_shape(),
        output_shape=dataset.output_shape()
    ),
    "FunctionalCNN": FunctionalCNN(
        input_shape=dataset.input_shape(),
        output_shape=dataset.output_shape()
    ),
    "ClassCNN": ClassCNN(
        input_shape=dataset.input_shape(),
        output_shape=dataset.output_shape()
    ),
}


for model_name, model in models.items():
    print(model_name)

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.Adadelta(),
                  metrics=['accuracy'])

    model.summary()

    model.fit(
        dataset.data['train_X'], dataset.data['train_Y'],
        batch_size = config["batch_size"],
        epochs     = config["epochs"],
        verbose    = config["verbose"],
        validation_data = (dataset.data["valid_X"], dataset.data["valid_Y"]),
        use_multiprocessing = True, workers = multiprocessing.cpu_count()
    )

for model_name, model in models.items():
    score = model.evaluate(dataset.data['valid_X'], dataset.data['valid_Y'], verbose=config["verbose"])
    print(model_name.ljust(15), "validation:", '| loss:', score[0], '| accuracy:', score[1])

for model_name, model in models.items():
    predict_to_csv( model.predict(dataset.data['test_X']), f'../../../submissions/keras-examples/keras-examples-{model_name}.csv')

print("time:", int(time.time() - timer_start), "s")

Вывод:

./src/keras/examples/main.py 
config {'verbose': False, 'epochs': 12, 'batch_size': 128, 'input_shape': (28, 28, 1), 'output_shape': 10}
SequentialCNN
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d (Conv2D)              (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 24, 24, 64)        18496     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 12, 12, 64)        0         
_________________________________________________________________
dropout (Dropout)            (None, 12, 12, 64)        0         
_________________________________________________________________
flatten (Flatten)            (None, 9216)              0         
_________________________________________________________________
dense (Dense)                (None, 128)               1179776   
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 10)                1290      
=================================================================
Total params: 1,199,882
Trainable params: 1,199,882
Non-trainable params: 0
_________________________________________________________________
FunctionalCNN
Model: "FunctionalCNN"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 24, 24, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 12, 12, 64)        0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 12, 12, 64)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 9216)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 128)               1179776   
_________________________________________________________________
dropout_3 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)                1290      
=================================================================
Total params: 1,199,882
Trainable params: 1,199,882
Non-trainable params: 0
_________________________________________________________________
ClassCNN
Model: "class_cnn"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d_4 (Conv2D)            multiple                  320       
_________________________________________________________________
conv2d_5 (Conv2D)            multiple                  18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 multiple                  0         
_________________________________________________________________
dropout1 (Dropout)           multiple                  0         
_________________________________________________________________
flatten_2 (Flatten)          multiple                  0         
_________________________________________________________________
dense_4 (Dense)              multiple                  1179776   
_________________________________________________________________
dropout2 (Dropout)           multiple                  0         
_________________________________________________________________
dense_5 (Dense)              multiple                  1290      
=================================================================
Total params: 1,199,882
Trainable params: 1,199,882
Non-trainable params: 0
_________________________________________________________________
SequentialCNN   validation: | loss: 1.370523907570612  | accuracy: 0.74964285
FunctionalCNN   validation: | loss: 1.4270000725700742 | accuracy: 0.78511906
ClassCNN        validation: | loss: 2.028766530354818  | accuracy: 0.35630953

SequentialCNN + FunctionalCNN оба дают одинаковую точность (75%), но есть кое-что принципиально другое с точностью ClassCNN (35%). Визуально модели выглядят одинаково

Кто-нибудь может объяснить?

1 Ответ

1 голос
/ 22 февраля 2020

Я думаю, что в последнем уровне активации ClassCNN это 'relu', которое должно быть 'softmax', как в случае с другими моделями ... Это просто человеческая ошибка ..... Спасибо ...

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...