Я относительно новичок в Керас / Си-Эн-Эн.Я построил модель, которая объединяет выходные данные трех последовательных CNN с некоторыми другими метаданными в конечную плотную сеть.Выводы из 3 отдельных уровней являются разумными и работают нормально, но окончательный результат при объединении с метаданными показывает худшую производительность и, похоже, не усваивает информацию, даже если некоторые из этих метаданных должны быть очень полезны для прогнозирования.Ярлыки представляют собой классификационные данные с одним горячим кодом (4 разных ярлыка).Я немного сбит с толку, почему окончательная каскадная модель работает так плохо по сравнению с отдельными частями, был бы признателен за любое понимание того, что я могу делать неправильно.Спасибо!
# create first conv layers
first_input = Input(shape=input_shape, dtype='int32', name='first_input')
x = Embedding(input_dim=num_features,output_dim=embedding_dim,input_length=input_shape[0])(first_input)
#x = Dropout(rate = dropout_rate)(x)
x = Conv1D(filters=filters,
kernel_size=kernel_size,
strides = 1,
activation='relu',
bias_initializer='random_uniform',
padding='same')(x)
x = Conv1D(filters=filters,
kernel_size=kernel_size,
strides = 1,
activation='relu',
bias_initializer='random_uniform',
padding='same')(x)
x = MaxPooling1D(pool_size=pool_size)(x)
x = Conv1D(filters=filters * 2,
kernel_size=kernel_size,
strides = 1,
activation='relu',
bias_initializer='random_uniform',
padding='same')(x)
x = Conv1D(filters=filters * 2,
kernel_size=kernel_size,
strides = 1,
activation='relu',
bias_initializer='random_uniform',
padding='same')(x)
x = GlobalAveragePooling1D()(x)
aux_predictions = Dense(op_units, activation=op_activation)(x)
# now create a convolutional model for second
second_input = Input(shape=input_shape, dtype='int32', name='second_input')
x = Embedding(input_dim=num_features,output_dim=embedding_dim,input_length=input_shape[0])(second_input)
#x = Dropout(rate = dropout_rate)(x)
x = Conv1D(filters=filters,
kernel_size=kernel_size,
strides = 1,
activation='relu',
bias_initializer='random_uniform',
padding='same')(x)
x = Conv1D(filters=filters,
kernel_size=kernel_size,
strides = 1,
activation='relu',
bias_initializer='random_uniform',
padding='same')(x)
x = MaxPooling1D(pool_size=pool_size)(x)
x = Conv1D(filters=filters * 2,
kernel_size=kernel_size,
strides = 1,
activation='relu',
bias_initializer='random_uniform',
padding='same')(x)
x = Conv1D(filters=filters * 2,
kernel_size=kernel_size,
strides = 1,
activation='relu',
bias_initializer='random_uniform',
padding='same')(x)
x = GlobalAveragePooling1D()(x)
aux_predictions2 = Dense(op_units, activation=op_activation)(x)
# now create a convolutional model for second
third_input = Input(shape=input_shape, dtype='int32', name='third_input')
x = Embedding(input_dim=num_features,output_dim=embedding_dim,input_length=input_shape[0])(third_input)
#x = Dropout(rate = dropout_rate)(x)
x = Conv1D(filters=filters,
kernel_size=kernel_size,
strides = 1,
activation='relu',
bias_initializer='random_uniform',
padding='same')(x)
x = Conv1D(filters=filters,
kernel_size=kernel_size,
strides = 1,
activation='relu',
bias_initializer='random_uniform',
padding='same')(x)
x = MaxPooling1D(pool_size=pool_size)(x)
x = Conv1D(filters=filters * 2,
kernel_size=kernel_size,
strides = 1,
activation='relu',
bias_initializer='random_uniform',
padding='same')(x)
x = Conv1D(filters=filters * 2,
kernel_size=kernel_size,
strides = 1,
activation='relu',
bias_initializer='random_uniform',
padding='same')(x)
x = GlobalAveragePooling1D()(x)
aux_predictions3 = Dense(op_units, activation=op_activation)(x)
# Now combine three CNN layers with metadata
auxiliary_input = Input(shape=metadata_dim, name='aux_input')
x = keras.layers.concatenate([aux_predictions, aux_predictions2, aux_predictions3, auxiliary_input ])
#x = Dropout(rate = dropout_rate)(x)
x = Dense(64, activation='relu')(x)
x = Dense(64, activation='relu')(x)
x = Dense(64, activation='relu')(x)
model_output = Dense(op_units, activation=op_activation, name='model_output')(x)
opt = SGD(lr=0.01)
fullmetamodel = Model(inputs=[first_input, second_input, third_input, auxiliary_input], outputs=[aux_predictions, aux_predictions2, aux_predictions3, model_output])
fullmetamodel.compile(
metrics=['categorical_accuracy'], loss='categorical_crossentropy',
loss_weights=[0.2, 0.2, 0.2, 1.], optimizer = opt)
callbacks = [keras.callbacks.EarlyStopping(monitor='val_loss', patience=2), TQDMNotebookCallback(leave_inner=False, leave_outer = True)]
fullmetamodel.fit(x=[first_x_train, second_x_train, third_x_train, training_meta], y=[training_labels,training_labels,training_labels, training_labels],
batch_size=32, epochs=40, validation_data=([first_x_val, second_x_val, third_x_val, val_meta], [val_labels, val_labels, val_labels, val_labels])
, verbose = 0, callbacks = callbacks) # starts training
# Output, three conv layers working ok, concatenated model performing poorly
Training
50% 20/40 [2:49:34<2:49:23, 508.20s/it]
Epoch 20
[loss: 8.002, dense_118_loss: 0.749, dense_119_loss: 0.769, dense_120_loss: 0.876, model_output_loss: 7.523, dense_118_categorical_accuracy: 0.686
, dense_119_categorical_accuracy: 0.626, dense_120_categorical_accuracy: 0.620, model_output_categorical_accuracy: 0.532] : 66% 265184/400000 [05:13<02:40, 840.90it/s]