В последней версии (март 2020 г.) «Машинного обучения с Python и H2O» (http://docs.h2o.ai/h2o/latest-stable/h2o-docs/booklets/PythonBooklet.pdf) на стр. 36 приведен пример интеграции H2O с scikit-learn. Я не могу заставить его работать.
Я бегу с h2o '3.28.0.3' и scikit-learn '0.22.1'.
Мой код:
iris_data_path = "http://h2o-public-test-data.s3.amazonaws.com/smalldata/iris/iris.csv" # load demonstration data
iris_df = h2o.import_file(path=iris_data_path)
# from sklearn.grid_search import RandomizedSearchCV
from sklearn.model_selection import RandomizedSearchCV
from h2o.cross_validation import H2OKFold
from h2o.model.regression import h2o_r2_score
# from sklearn.metrics.scorer import make_scorer
from sklearn.metrics import make_scorer
from h2o.estimators import H2OPrincipalComponentAnalysisEstimator, H2OGradientBoostingEstimator
# Parameters to test
params = {"standardize__center": [True, False],
"standardize__scale": [True, False],
"pca__k": [2,3],
"gbm__ntrees": [10,20],
"gbm__max_depth": [1,2,3],
"gbm__learn_rate": [0.1,0.2]
}
custom_cv = H2OKFold(iris_df, n_folds=5, seed=42)
pipeline = Pipeline([("standardize", H2OScaler()),
#("pca", H2OPrincipalComponentAnalysisEstimator(k=2)),
("pca", H2OPrincipalComponentAnalysisEstimator(k=2).init_for_pipeline()),
("gbm", H2OGradientBoostingEstimator( distribution="gaussian"))])
random_search = RandomizedSearchCV(pipeline, params,
n_iter=5,
scoring=make_scorer(h2o_r2_score),
cv=custom_cv,
random_state=42,
n_jobs=1)
# Runs OK:
#pipeline.fit(iris_df[1:], iris_df[0])
# Fails:
random_search.fit(iris_df[1:], iris_df[0])
Работает pipe.fit () работает. Когда я запускаю random_search.fit (), я получаю эти сообщения об ошибках. Любые предложения, как это исправить?
ValueError Traceback (most recent call last)
<ipython-input-62-936508540549> in <module>()
37
38 # Fails:
---> 39 random_search.fit(iris_df[1:], iris_df[0])
---------------------------------------------------------------------------------------------
/usr/local/lib/python3.6/dist-packages/sklearn/utils/__init__.py in _determine_key_type(key, accept_slice)
286 except KeyError:
287 raise ValueError(err_msg)
--> 288 raise ValueError(err_msg)
289
290
ValueError: No valid specification of the columns. Only a scalar, list or slice of all integers or all strings, or boolean mask is allowed