Я запускаю GridSearchCV, чтобы найти лучшие гиперпараметры для моей регрессии ANN. Однако мне трудно интерпретировать значения test_score и train_score и почему они так необоснованно отличаются.
ИНН создается с помощью Keras, а MSE используется как потеря.
Если я установлю параметр cv = 10, то результаты будут разумными, но когда я использую cv = 3, это не разумно и не сравнимо с тем, если бы я делал регулярное разбиение на наборы тест / поезд и запускал его вручную (MSE тогда будет намного меньше).
Результаты для cv = 10.
{'mean_fit_time': array([ 45.64931359]),
'mean_score_time': array([ 0.88778265]),
'mean_test_score': array([-80.58573808]),
'mean_train_score': array([-54.32416867]),
'param_batch_size': masked_array(data = [25],
mask = [False],
fill_value = ?),
'param_epochs': masked_array(data = [500],
mask = [False],
fill_value = ?),
'params': [{'batch_size': 25, 'epochs': 500}],
'rank_test_score': array([1]),
'split0_test_score': array([-79.8304968]),
'split0_train_score': array([-54.1543676]),
'split1_test_score': array([-88.66814966]),
'split1_train_score': array([-52.53867387]),
'split2_test_score': array([-72.68088307]),
'split2_train_score': array([-54.22193575]),
'split3_test_score': array([-65.62303455]),
'split3_train_score': array([-55.11768506]),
'split4_test_score': array([-91.78870671]),
'split4_train_score': array([-53.60385677]),
'split5_test_score': array([-69.70716374]),
'split5_train_score': array([-54.90494821]),
'split6_test_score': array([-72.369327]),
'split6_train_score': array([-55.06327332]),
'split7_test_score': array([-63.47190503]),
'split7_train_score': array([-57.14306102]),
'split8_test_score': array([-61.61434507]),
'split8_train_score': array([-56.35855999]),
'split9_test_score': array([-139.97252682]),
'split9_train_score': array([-50.13532509]),
'std_fit_time': array([ 4.63885948]),
'std_score_time': array([ 0.36701861]),
'std_test_score': array([ 21.97077353]),
'std_train_score': array([ 1.86790488])}
Если я вместо этого использую cv = 3, я получаю очень большие числа для test_score, но не train_score.
'params': [{'batch_size': 25, 'epochs': 500}],
'rank_test_score': array([1]),
'split0_test_score': array([-9366.9518699]),
'split0_train_score': array([-47.92415035]),
'split1_test_score': array([-9379.17264271]),
'split1_train_score': array([-51.44866994]),
'split2_test_score': array([-9065.89809108]),
'split2_train_score': array([-49.70452372]),
'std_fit_time': array([ 1.89022517]),
'std_score_time': array([ 0.08712083]),
'std_test_score': array([ 144.81240151]),
'std_train_score': array([ 1.43890443])}
Полный скрипт:
#Importing the libraries
import numpy as np
import matplotlib as plt
import pandas as pd
import tensorflow as tf
import random
#Importing the dataset
dataset = pd.read_csv('dataset.csv', encoding="latin-1", sep=",")
X = dataset.loc[:, (dataset.columns !='Visitors') & (dataset.columns !='X') &
(dataset.columns != 'Unnamed: 0') & (dataset.columns != 'Empty Spot') &
(dataset.columns != 'Date') & (dataset.columns != 'Empty Spot')].values
Y = dataset.loc[:, (dataset.columns !='X') &
(dataset.columns !='Empty spot') & (dataset.columns !='Logi.total') &
(dataset.columns != 'Rain.mm') & (dataset.columns != 'Lufttryck') &
(dataset.columns != 'Unnamed: 0') & (dataset.columns != 'Day') &
(dataset.columns != 'Month') & (dataset.columns != 'Week.day') &
(dataset.columns != 'Week.number') & (dataset.columns != 'Year') &
(dataset.columns != 'Temp18') & (dataset.columns != 'Rain') &
(dataset.columns != 'Cloud') & (dataset.columns != 'utan_regn') & (dataset.columns != 'Vindstyrka') &
(dataset.columns != 'med_regn') & (dataset.columns != 'SMA') & (dataset.columns !='Date') &
(dataset.columns != 'Temp06') & (dataset.columns != 'Empty spot')].values
#Feature scaling
from sklearn.preprocessing import LabelEncoder
labelencoder_X = LabelEncoder()
X[:, 0] = labelencoder_X.fit_transform(X[:, 0])
X[:, 8] = labelencoder_X.fit_transform(X[:, 8])
from sklearn.preprocessing import OneHotEncoder
onehotencoder = OneHotEncoder(categorical_features = [0, 1, 2, 3, 4, 8])
X = onehotencoder.fit_transform(X).toarray()
#Feature scaling
from sklearn.preprocessing import StandardScaler
Scaler_X = StandardScaler()
X = Scaler_X.fit_transform(X)
#Buidling the ANN
from keras.models import Sequential
from sklearn.model_selection import GridSearchCV
from keras.layers import Dense, Activation
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
def HO():
model = Sequential()
model.add(Dense(73, kernel_initializer = 'normal', activation='linear',
input_dim=73))
model.add(Dense(35, kernel_initializer = 'normal',activation='linear'))
model.add(Dense(1, kernel_initializer = 'normal'))
model.compile(loss='mse', optimizer='adam')
return model
model = KerasRegressor(build_fn = HO, verbose = 1)
# define the grid search parameters
batch_size = [25]
epochs = [500]
param_grid = dict(batch_size = batch_size, epochs = epochs)
# cv=3 or cv=10 in my examples
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=3)
grid_result = grid.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_,
grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))
Почему test_score вычисляется так по-разному по сравнению с train_score, если я использую меньше сгибов? Я также получаю неоправданно высокую ошибку при cv = 2, cv = 5 и т. Д.