GridsearchCV настраивает KerasClassifier с ошибкой обратных вызовов: ValueError: Найдены входные переменные с непоследовательным количеством выборок - PullRequest
0 голосов
/ 21 мая 2018

Использование sklearn.GridSearchCV для точной настройки гиперпараметров модели в Керасе.Кроме того, я добавляю callbacks.

Формат ввода: (1500, 3, 10, 10)

Формат вывода: (1500,)

Код поиска по сетке:

def Grid_Search_Training(model):

    # parameters grid
    epochs = [300]
    activations = ['relu', 'tanh']
    L2_lambda = [0.01, 0.001, 0.0001]
    batches = [16, 32, 64, 128]
    param_grid = dict(activation=activations, epochs=epochs, batch_size=batches, L2_lambda=L2_lambda)
    grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring='accuracy', cv=5)

    return grid

def run(grid_search = True): 

    model = Model()
    plot_model(model, to_file='Model_plot.png', show_shapes=True, show_layer_names=True)

    # save layer names into a set, to visualize all layers' output in tensorboard
    embeddings_all_layer_names = set(layer.name for layer in model.layers if layer.name.startswith('tower_'))

    # train and save the model weights
    Model_weights_path = 'Model_weights.h5'

    checkpointer = ModelCheckpoint(Model_weights_path, monitor='val_loss', verbose=1, save_best_only=True)

    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0000001)

    tensorboard_log_dir = 'ModelLogs/{}'.format(time.time())
    tensorboard  = TensorBoard(log_dir = tensorboard_log_dir, histogram_freq = 1, 
                                write_graph=True, write_images=True, embeddings_freq=1,
                                embeddings_layer_names=embeddings_all_layer_names, embeddings_metadata=None)

    callbacks_list = [checkpointer, reduce_lr, tensorboard]
    fit_params = dict(callbacks=callbacks_list)


    if grid_search:

        t0 = time.time()
        print incepModel().summary()
        model = KerasClassifier(build_fn = model, verbose=1)

        grid = Grid_Search_Training(model)
        print 'Start Training the model......'
        grid_result = grid.fit(X_train, y_train, **fit_params)
        print("Best acc Score: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

        t1 = time.time()
        t = t1-t0

        print 'The GirdSearch on CNN took %.2f mins.' %(round(t/60., 2))
        means = grid_result.cv_results_['mean_test_score']
        stds = grid_result.cv_results_['std_test_score']
        params = grid_result.cv_results_['params']
        for mean, stdev, param in zip(means, stds, params):
            print("%f (%f) with: %r" % (mean, stdev, param))

    else:
        history = model.fit(X_train, to_categorical(y_train), epochs=100, batch_size=64, validation_split=0.2, callbacks=callbacks_list)

X_train, X_test, y_train, y_test = read_split(data)

run(grid_search=True)

Ошибка:

    grid_result = grid.fit(X_train, y_train, fit_params)
  File "/Users/jd/anaconda2/lib/python2.7/site-packages/sklearn/model_selection/_search.py", line 615, in fit
    X, y, groups = indexable(X, y, groups)
  File "/Users/jd/anaconda2/lib/python2.7/site-packages/sklearn/utils/validation.py", line 229, in indexable
    check_consistent_length(*result)
  File "/Users/jd/anaconda2/lib/python2.7/site-packages/sklearn/utils/validation.py", line 204, in check_consistent_length
    " samples: %r" % [int(l) for l in lengths])
ValueError: Found input variables with inconsistent numbers of samples: [1500, 1500, 1]

Код хорошо работает без callbacks, т. Е. Нет fit_params в grid_result = grid.fit(X_train, y_train, fit_params).Нет ошибки.

Что вызывает такую ​​ошибку?

...