Мой вопрос такой же, как и Как я могу извлечь x_train и y_train из train_generator? используя train_generator в обучении ансамблям. Однако вот ссылка https://www.kaggle.com/kmader/gpu-pretrained-vgg16-hratten-tuberculosis для примера кода из Kaggle. В моем случае я использовал 2 модели cnn, т.е. vgg16 и vgg19. Обе модели дают выходные данные (прогноз, точность, AU C и др. c). Однако я хочу создать ансамбль (любой вид ансамбля). Но из-за train_generation я не знаю, как извлечь тренировочные значения и тестовые значения. А также, поскольку мы использовали предварительно обученные модели. Как составить структуру ансамбля модели. Извините, задайте бессмысленный вопрос. Я изучал это из видео и учебника. И у меня очень ограниченные знания. Спасибо за любую помощь, спасибо.
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.imagenet_utils import preprocess_input
from PIL import Image
ppi = lambda x: Image.fromarray(preprocess_input(np.array(x).astype(np.float32)))
IMG_SIZE = (224, 224) # slightly smaller than vgg16 normally expects
core_idg = ImageDataGenerator(samplewise_center=False,
samplewise_std_normalization=False,
horizontal_flip = True,
vertical_flip = False,
height_shift_range = 0.15,
width_shift_range = 0.15,
rotation_range = 5,
shear_range = 0.01,
fill_mode = 'nearest',
zoom_range=0.2)
def flow_from_dataframe(img_data_gen, in_df, path_col, y_col, **dflow_args):
base_dir = os.path.dirname(in_df[path_col].values[0])
print('## Ignore next message from keras, values are replaced anyways')
df_gen = img_data_gen.flow_from_directory(base_dir,
class_mode = 'sparse',
**dflow_args)
df_gen.filenames = in_df[path_col].values
df_gen.classes = np.stack(in_df[y_col].values)
df_gen.samples = in_df.shape[0]
df_gen.n = in_df.shape[0]
df_gen._set_index_array()
df_gen.directory = '' # since we have the full path
print('Reinserting dataframe: {} images'.format(in_df.shape[0]))
return df_gen
train_gen_VGG16 = flow_from_dataframe(core_idg, train_df,
path_col = 'png',
y_col = 'pulm_state',
target_size = IMG_SIZE,
color_mode = 'rgb',
batch_size = 64)
valid_gen_VGG16 = flow_from_dataframe(core_idg, valid_df,
path_col = 'png',
y_col = 'pulm_state',
target_size = IMG_SIZE,
color_mode = 'rgb',
batch_size = 512) # we can use much larger batches for evaluation
# used a fixed dataset for evaluating the algorithm
test_X_VGG16, test_Y_VGG16 = next(flow_from_dataframe(core_idg,
valid_df,
path_col = 'png',
y_col = 'pulm_state',
target_size = IMG_SIZE,
color_mode = 'rgb',
batch_size = 1024)) # one big batch
train_gen_VGG16 = flow_from_dataframe(core_idg, train_df,
path_col = 'png',
y_col = 'pulm_state',
target_size = IMG_SIZE,
color_mode = 'rgb',
batch_size = 64)
valid_gen_VGG16 = flow_from_dataframe(core_idg, valid_df,
path_col = 'png',
y_col = 'pulm_state',
target_size = IMG_SIZE,
color_mode = 'rgb',
batch_size = 512) # we can use much larger batches for evaluation
# used a fixed dataset for evaluating the algorithm
test_X_VGG16, test_Y_VGG16 = next(flow_from_dataframe(core_idg,
valid_df,
path_col = 'png',
y_col = 'pulm_state',
target_size = IMG_SIZE,
color_mode = 'rgb',
batch_size = 1024)) # one big batch
t_x_VGG16, t_y_VGG16 = next(train_gen_VGG16)
fig, m_axs = plt.subplots(2, 4, figsize = (16, 8))
for (c_x, c_y, c_ax) in zip(t_x_VGG16, t_y_VGG16, m_axs.flatten()):
c_ax.imshow(c_x[:,:,0], cmap = 'bone', vmin = 0, vmax = 255)
c_ax.set_title('%s' % ('Pulmonary Abnormality' if c_y>0.5 else 'Healthy'))
c_ax.axis('off')
from keras.applications.vgg16 import VGG16
from keras.layers import GlobalAveragePooling2D, Dense, Dropout, Flatten, Input, Conv2D, multiply, LocallyConnected2D, Lambda
from keras.models import Model
in_lay = Input(t_x_VGG16.shape[1:])
name = 'VGG16'
base_pretrained_model_VGG16 = VGG16(input_shape = t_x_VGG16.shape[1:],
include_top = False, weights = 'imagenet')
base_pretrained_model_VGG16.trainable = False
pt_depth = base_pretrained_model_VGG16.get_output_shape_at(0)[-1]
pt_features = base_pretrained_model_VGG16(in_lay)
from keras.layers import BatchNormalization
bn_features = BatchNormalization()(pt_features)
# here we do an attention mechanism to turn pixels in the GAP on an off
attn_layer = Conv2D(64, kernel_size = (1,1), padding = 'same', activation = 'relu')(bn_features)
attn_layer = Conv2D(16, kernel_size = (1,1), padding = 'same', activation = 'relu')(attn_layer)
attn_layer = Conv2D(1,
kernel_size = (1,1),
padding = 'valid',
activation = 'sigmoid')(attn_layer)
# fan it out to all of the channels
up_c2_w = np.ones((1, 1, 1, pt_depth))
up_c2 = Conv2D(pt_depth, kernel_size = (1,1), padding = 'same',
activation = 'linear', use_bias = False, weights = [up_c2_w])
up_c2.trainable = False
attn_layer = up_c2(attn_layer)
mask_features = multiply([attn_layer, bn_features])
gap_features = GlobalAveragePooling2D()(mask_features)
gap_mask = GlobalAveragePooling2D()(attn_layer)
# to account for missing values from the attention model
gap = Lambda(lambda x: x[0]/x[1], name = 'RescaleGAP')([gap_features, gap_mask])
gap_dr = Dropout(0.5)(gap)
dr_steps = Dropout(0.25)(Dense(128, activation = 'elu')(gap_dr))
out_layer = Dense(1, activation = 'sigmoid')(dr_steps)
tb_model_VGG16 = Model(inputs = [in_lay], outputs = [out_layer], name=name)
tb_model_VGG16.compile(optimizer = 'adam', loss = 'binary_crossentropy',
metrics = ['binary_accuracy'])
tb_model_VGG16.summary()
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau#,TensorBoard
weight_path_VGG16="{}_weights.VGG-16.hdf5".format('tb_detector')
checkpoint = ModelCheckpoint(weight_path_VGG16, monitor='val_loss', verbose=1,
save_best_only=True, mode='min', save_weights_only = True)
reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.8, patience=10, verbose=1, mode='auto', epsilon=0.0001, cooldown=5, min_lr=0.0001)
early = EarlyStopping(monitor="val_loss",
mode="min",
patience=5) # probably needs to be more patient, but kaggle time is limited
# tensorboard = TensorBoard()
callbacks_list = [checkpoint, early, reduceLROnPlat]
%%time
History_VGG16 = tb_model_VGG16.fit_generator(train_gen_VGG16,
steps_per_epoch = 3,
validation_data = (test_X_VGG16, test_Y_VGG16),
epochs = 4,
callbacks = callbacks_list)
For Vgg19 also the same code with few changes
train_gen_VGG19 = flow_from_dataframe(core_idg, train_df,
path_col = 'png',
y_col = 'pulm_state',
target_size = IMG_SIZE,
color_mode = 'rgb',
batch_size = 32)
valid_gen_VGG19 = flow_from_dataframe(core_idg, valid_df,
path_col = 'png',
y_col = 'pulm_state',
target_size = IMG_SIZE,
color_mode = 'rgb',
batch_size = 256) # we can use much larger batches for evaluation
# used a fixed dataset for evaluating the algorithm
test_X_VGG19, test_Y_VGG19 = next(flow_from_dataframe(core_idg,
valid_df,
path_col = 'png',
y_col = 'pulm_state',
target_size = IMG_SIZE,
color_mode = 'rgb',
batch_size = 1024)) # one big batch
t_x_VGG19, t_y_VGG19 = next(train_gen_VGG19)
fig, m_axs = plt.subplots(2, 4, figsize = (16, 8))
for (c_x, c_y, c_ax) in zip(t_x_VGG19, t_y_VGG19, m_axs.flatten()):
c_ax.imshow(c_x[:,:,0], cmap = 'bone', vmin = 0, vmax = 255)
c_ax.set_title('%s' % ('Pulmonary Abnormality' if c_y>0.5 else 'Healthy'))
c_ax.axis('off')
from keras.applications.vgg19 import VGG19
from keras.layers import GlobalAveragePooling2D, Dense, Dropout, Flatten, Input, Conv2D, multiply, LocallyConnected2D, Lambda
from keras.models import Model
in_lay = Input(t_x_VGG19.shape[1:])
name = 'VGG19'
base_pretrained_model_VGG19 = VGG19(input_shape = t_x_VGG19.shape[1:],
include_top = False, weights = 'imagenet')
base_pretrained_model_VGG19.trainable = False
pt_depth = base_pretrained_model_VGG19.get_output_shape_at(0)[-1]
pt_features = base_pretrained_model_VGG19(in_lay)
from keras.layers import BatchNormalization
bn_features = BatchNormalization()(pt_features)
# here we do an attention mechanism to turn pixels in the GAP on an off
attn_layer = Conv2D(64, kernel_size = (1,1), padding = 'same', activation = 'relu')(bn_features)
attn_layer = Conv2D(16, kernel_size = (1,1), padding = 'same', activation = 'relu')(attn_layer)
attn_layer = Conv2D(1,
kernel_size = (1,1),
padding = 'valid',
activation = 'sigmoid')(attn_layer)
# fan it out to all of the channels
up_c2_w = np.ones((1, 1, 1, pt_depth))
up_c2 = Conv2D(pt_depth, kernel_size = (1,1), padding = 'same',
activation = 'linear', use_bias = False, weights = [up_c2_w])
up_c2.trainable = False
attn_layer = up_c2(attn_layer)
mask_features = multiply([attn_layer, bn_features])
gap_features = GlobalAveragePooling2D()(mask_features)
gap_mask = GlobalAveragePooling2D()(attn_layer)
# to account for missing values from the attention model
gap = Lambda(lambda x: x[0]/x[1], name = 'RescaleGAP')([gap_features, gap_mask])
gap_dr = Dropout(0.5)(gap)
dr_steps = Dropout(0.25)(Dense(128, activation = 'elu')(gap_dr))
out_layer = Dense(1, activation = 'sigmoid')(dr_steps)
tb_model_VGG19 = Model(inputs = [in_lay], outputs = [out_layer], name = name)
tb_model_VGG19.compile(optimizer = 'adam', loss = 'binary_crossentropy',
metrics = ['binary_accuracy'])
tb_model_VGG19.summary()
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
weight_path_VGG19="{}_weights.VGG-19.hdf5".format('tb_detector')
checkpoint = ModelCheckpoint(weight_path_VGG19, monitor='val_loss', verbose=1,
save_best_only=True, mode='min', save_weights_only = True)
reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.8, patience=10, verbose=1, mode='auto', epsilon=0.0001, cooldown=5, min_lr=0.0001)
early = EarlyStopping(monitor="val_loss",
mode="min",
patience=5) # probably needs to be more patient, but kaggle time is limited
callbacks_list = [checkpoint, early, reduceLROnPlat]
%%time
History_VGG19 = tb_model_VGG19.fit_generator(train_gen_VGG19,
steps_per_epoch = 3,
validation_data = (test_X_VGG19, test_Y_VGG19),
epochs = 4,
callbacks = callbacks_list)
позже из матрицы смешения пакетов sklearn и кривых RO C реализованы
Вот кривая ro c как Vgg16, так и vgg19 на одном графике. Я хочу, чтобы ансамблевая кривая тоже появлялась вместе с другими кривыми ..