У меня есть Deep CNN на основе Re sNet и набор данных (10000, 50,50,1) для классификации цифр. когда я запускаю его, чтобы начать худеть, точность просто останавливается на некотором значении и слегка колеблется (около 0,2). Мне интересно, есть ли у него переоснащение или есть другая проблема?
вот блок идентификации:
def identity_block(X, f, filters, stage, block):
# defining name basics
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
# retrieve filters
F1, F2, F3 = filters
# save the shortcut
X_shortcut = X
# first component
X = Conv2D(filters=F1, kernel_size=(1, 1), strides=(1, 1), padding='valid', name=conv_name_base + '2a',
kernel_initializer=initializers.glorot_uniform(seed=0))(X)
X = BatchNormalization(axis=3, name=bn_name_base + '2a')(X)
X = Activation('relu')(X)
# second component
X = Conv2D(filters=F2, kernel_size=(f, f), strides=(1, 1), padding='same', name=conv_name_base + '2b',
kernel_initializer=initializers.glorot_uniform(seed=0))(X)
X = BatchNormalization(axis=3, name=bn_name_base + '2b')(X)
X = Activation('relu')(X)
# third component
X = Conv2D(filters=F3, kernel_size=(1, 1), strides=(1, 1), padding='valid', name=conv_name_base + '2c',
kernel_initializer=initializers.glorot_uniform(seed=0))(X)
X = BatchNormalization(axis=3, name=bn_name_base + '2c')(X)
# final component
X = Add()([X, X_shortcut])
X = Activation('relu')(X)
return X
и сверточный блок:
def conv_block(X, f, filters, stage, block, s=2):
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
# Retivr filters
F1, F2, F3 = filters
# Save shortcut
X_shortcut = X
# First component
X = Conv2D(F1, kernel_size=(1, 1), strides=(s, s), name=conv_name_base + '2a',
kernel_initializer=initializers.glorot_uniform(seed=0))(X)
X = BatchNormalization(axis=3, name=bn_name_base + '2a')(X)
X = Activation('relu')(X)
# Second component
X = Conv2D(F2, kernel_size=(f, f), strides=(1, 1), padding='same', name=conv_name_base + '2b',
kernel_initializer=initializers.glorot_uniform(seed=0))(X)
X = BatchNormalization(axis=3, name=bn_name_base + '2b')(X)
X = Activation('relu')(X)
# third component
X = Conv2D(F3, kernel_size=(1, 1), strides=(1, 1), name=conv_name_base + '2c', padding='valid',
kernel_initializer=initializers.glorot_uniform(seed=0))(X)
X = BatchNormalization(axis=3, name=bn_name_base + '2c')(X)
# short cut
X_shortcut = Conv2D(F3, kernel_size=(1, 1), strides=(s, s), name=conv_name_base + '1',
kernel_initializer=initializers.glorot_uniform(seed=0))(X_shortcut)
X_shortcut = BatchNormalization(axis=3, name=bn_name_base + '1')(X_shortcut)
# finaly
X = Add()([X, X_shortcut])
X = Activation('relu')(X)
return X
и окончательно Re sNet:
def ResNet( input_shape=(50, 50, 1), classes=10):
inp = Input(shape=(50,50,1))
# zero padding
X = ZeroPadding2D((3, 3), name='pad0')(inp)
# stage1
X = Conv2D(32, (5,5), name='conv1', input_shape=input_shape,
kernel_initializer=initializers.glorot_uniform(seed=0))(X)
X = BatchNormalization(axis=3, name='bn1')(X)
X = Activation('relu')(X)
X = MaxPooling2D((2,2), name='pool1')(X)
# Stage 2
stage2_filtersize = 32
X = conv_block(X, 3, filters=[stage2_filtersize, stage2_filtersize, stage2_filtersize], stage=2, block='a', s=1)
X = identity_block(X, 3, [stage2_filtersize,stage2_filtersize, stage2_filtersize], stage=2, block='b')
X = identity_block(X, 3, [stage2_filtersize, stage2_filtersize, stage2_filtersize], stage=2, block='c')
# Stage 3
stage3_filtersize = 64
X = conv_block(X, 3, filters=[stage3_filtersize, stage3_filtersize, stage3_filtersize], stage=3, block='a', s=1)
X = identity_block(X, 3, [stage3_filtersize, stage3_filtersize, stage3_filtersize], stage=3, block='b')
X = identity_block(X, 3, [stage3_filtersize, stage3_filtersize, stage3_filtersize], stage=3, block='c')
# Stage 4
stage4_filtersize = 128
X = conv_block(X, 3, filters=[stage4_filtersize, stage4_filtersize, stage4_filtersize], stage=4, block='a', s=1)
X = identity_block(X, 3, [stage4_filtersize, stage4_filtersize, stage4_filtersize], stage=4, block='b')
X = identity_block(X, 3, [stage4_filtersize, stage4_filtersize, stage4_filtersize], stage=4, block='c')
# final
X = AveragePooling2D((2, 2), padding='same', name='Pool0')(X)
# FC
X = Flatten(name='D0')(X)
X = Dense(classes, activation='softmax', kernel_initializer=initializers.glorot_uniform(seed=0), name='D2')(X)
# creat model
model = Model(inputs=inp, outputs=X)
return model
обновление 1: Вот методы подгонки и компиляции:
model.compile(optimizer='adam',
loss=tensorflow.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
model.compile(optimizer='adam',
loss=tensorflow.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
print("model compiled settings imported successfully")
early_stopping = EarlyStopping(monitor='val_loss', patience=2)
model.fit(X_train, Y_train, validation_split=0.2, callbacks=[early_stopping], epochs=10)
test_loss, test_acc = model.evaluate(X_test, Y_test, verbose=2)