Среда такая же
numeric_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy='mean')),
('scaler', StandardScaler())])
categorical_transformer = Pipeline(steps=[
('imputer',
SimpleImputer(strategy='constant', fill_value='missing')),
('onehot', OneHotEncoder(handle_unknown='ignore'))
])
numeric_features = X.select_dtypes(
include=['int64', 'float64']).columns
categorical_features = X.select_dtypes(
include=['object']).columns
preprocessor = ColumnTransformer(
transformers=[
('num', numeric_transformer, numeric_features),
('cat', categorical_transformer, categorical_features)])
# Number of trees
n_estimators = [int(x) for x in
np.linspace(start=50, stop=1000, num=10)]
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(1, 32, 32, endpoint=True)]
# Booster
booster = ['gbtree', 'gblinear', 'dart']
# selecting gamma
gamma = [i / 10.0 for i in range(0, 5)]
# Learning rate
learning_rate = np.linspace(0.01, 0.2, 15)
# Evaluation metric
# eval_metric = ['rmse','mae']
# regularization
reg_alpha = [1e-5, 1e-2, 0.1, 1, 100]
reg_lambda = [1e-5, 1e-2, 0.1, 1, 100]
# Min chile weight
min_child_weight = list(range(1, 6, 2))
# Samples
subsample = [i / 10.0 for i in range(6, 10)]
colsample_bytree = [i / 10.0 for i in range(6, 10)]
# Create the random grid
random_grid = {'n_estimators': n_estimators,
'max_depth': max_depth,
'booster': booster,
'gamma': gamma,
'learning_rate': learning_rate,
# 'eval_metric' : eval_metric,
'reg_alpha': reg_alpha,
'reg_lambda': reg_lambda,
'min_child_weight': min_child_weight,
'subsample': subsample,
'colsample_bytree': colsample_bytree
}
# Use the random grid to search for best hyperparameters
# First create the base model to tune
rf = xgboost.XGBRegressor(objective='reg:squarederror', n_jobs=4)
# Random search of parameters, using 3 fold cross validation,
# search across 100 different combinations, and use all available cores
rf_random = RandomizedSearchCV(estimator=rf,
param_distributions=random_grid,
n_iter=100,
cv=3,
verbose=0,
random_state=42,
n_jobs=4)
pipe = Pipeline(steps=[('preprocessor', preprocessor),
('regressor', rf_random)])
pipe.fit(X, y)