Я смущен, почему lightgbm не сохраняет лучшую модель, когда я выполняю раннюю остановку.
Мой код здесь:
params = {'num_leaves': 31,
'class_weight' : 'balanced',
'max_depth': -1,
'learning_rate': 0.1,
'n_estimators': 1000,
'subsample_for_bin': 200000,
'objective': 'binary',
'class_weight': 'balanced',
'min_split_gain': 0.0,
'min_child_weight': 0.001,
'min_child_samples': 20,
'subsample': 1.0,
'subsample_freq': 0,
'colsample_bytree': 0.7,
'reg_alpha': 0.2,
'reg_lambda': 10.0,
'random_state': 7,
'n_jobs': -1,
'silent': True,
'importance_type': 'split' }
def f_lgboost(data, params):
model = lgb.LGBMClassifier(**params)
X_train = data['X_train']
y_train = data['y_train']
X_dev = data['X_dev']
y_dev = data['y_dev']
X_test = data['X_test']
categorical_feature= ['Ticker_code', 'Category_code']
X_train[categorical_feature] = X_train[categorical_feature].astype('category')
X_dev[categorical_feature] = X_dev[categorical_feature].astype('category')
X_test[categorical_feature] = X_test[categorical_feature].astype('category')
feature_name = X_train.columns.to_list()
model.fit(X_train, y_train, eval_set = [(X_dev, y_dev)], eval_metric = 'auc', early_stopping_rounds = 20,
categorical_feature = categorical_feature, feature_name = feature_name)
y_pred_train = model.predict_proba(X_train)[:, 1].ravel()
y_pred_dev = model.predict_proba(X_dev)[:, 1].ravel()
from sklearn.metrics import roc_auc_score
auc_train = roc_auc_score(y_train, y_pred_train)
auc_dev = roc_auc_score(y_dev, y_pred_dev)
from sklearn.metrics import precision_recall_fscore_support
precision, recall ,fscore, support = precision_recall_fscore_support(y_dev, (y_pred_dev > 0.5).astype(int), beta=0.5)
y_pred_test = model.predict_proba(X_test)[:, 1].ravel()
print(f'auc_train: {auc_train}, auc_dev : {auc_dev}, precision : {precision}, recall: {recall}, fscore : {fscore}')
Results = {
'params' : params,
'data' : data,
'lg_boost_model' : bst,
'y_pred_train' : y_pred_train,
'y_pred_dev' : y_pred_dev,
'y_pred_test' : y_pred_test,
'auc_train' : auc_train,
'auc_dev' : auc_dev,
'precision_dev': precision,
'recall_dev' : recall,
'fscore_dev' : fscore,
'support_dev' : support
}
return Results
Как вы это объясните и что бы вы мне посоветовали?