Пример RandomizedSearchCV из руководства «Машинное обучение с Python и H2O» не работает - PullRequest
1 голос
/ 16 февраля 2020

Я немного озадачен, так как я не получаю последний пример из «Машинного обучения с Python и H2O», руководство работает (стр. 36).

Вот code:

import h2o
h2o.init()

from h2o.estimators.gbm import H2OGradientBoostingEstimator
from h2o.transforms.preprocessing import H2OScaler
from h2o.cross_validation import H2OKFold
from h2o.model.regression import h2o_r2_score

from sklearn.pipeline import Pipeline
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics.scorer import make_scorer


h2o.__PROGRESS_BAR__=False
h2o.no_progress()

iris_data_path = "http://h2o-public-test-data.s3.amazonaws.com/smalldata/iris/iris.csv"# load demonstration data1819In [5]: 
iris_df = h2o.import_file(path=iris_data_path)

params = {"standardize__center":    [True, False],
          "standardize__scale":     [True, False],
          "gbm__ntrees":            [10,20],
          "gbm__max_depth":         [1,2,3],
          "gbm__learn_rate":        [0.1,0.2]}

custom_cv = H2OKFold(iris_df, n_folds=5, seed=42)

pipeline = Pipeline([("standardize", H2OScaler()),
                     ("gbm", H2OGradientBoostingEstimator(distribution="gaussian"))])

random_search = RandomizedSearchCV(pipeline, params, n_iter=5, scoring=make_scorer(h2o_r2_score),
                                               cv=custom_cv, random_state=42, n_jobs=1)

random_search.fit(iris_df[1:], iris_df[0])

Возвращает ошибку ValueError: Нет допустимой спецификации столбцов. Допускается только скаляр, список или фрагмент всех целых или всех строк, или логическая маска .

Полное сообщение терминала:

Traceback (most recent call last):

  File "untitled-Copy1.py", line 34, in <module>
    random_search.fit(iris_df[1:], iris_df[0])
  File "/department/jupyter-dev/anaconda3/envs/python36/lib/python3.6/site-packages/sklearn/model_selection/_search.py", line 710, in fit
    self._run_search(evaluate_candidates)
  File "/department/jupyter-dev/anaconda3/envs/python36/lib/python3.6/site-packages/sklearn/model_selection/_search.py", line 1484, in _run_search
    random_state=self.random_state))
  File "/department/jupyter-dev/anaconda3/envs/python36/lib/python3.6/site-packages/sklearn/model_selection/_search.py", line 689, in evaluate_candidates
    cv.split(X, y, groups)))
  File "/department/jupyter-dev/anaconda3/envs/python36/lib/python3.6/site-packages/joblib/parallel.py", line 1004, in __call__
    if self.dispatch_one_batch(iterator):
  File "/department/jupyter-dev/anaconda3/envs/python36/lib/python3.6/site-packages/joblib/parallel.py", line 835, in dispatch_one_batch
    self._dispatch(tasks)
  File "/department/jupyter-dev/anaconda3/envs/python36/lib/python3.6/site-packages/joblib/parallel.py", line 754, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "/department/jupyter-dev/anaconda3/envs/python36/lib/python3.6/site-packages/joblib/_parallel_backends.py", line 209, in apply_async
    result = ImmediateResult(func)
  File "/department/jupyter-dev/anaconda3/envs/python36/lib/python3.6/site-packages/joblib/_parallel_backends.py", line 590, in __init__
    self.results = batch()
  File "/department/jupyter-dev/anaconda3/envs/python36/lib/python3.6/site-packages/joblib/parallel.py", line 256, in __call__
    for func, args, kwargs in self.items]
  File "/department/jupyter-dev/anaconda3/envs/python36/lib/python3.6/site-packages/joblib/parallel.py", line 256, in <listcomp>
    for func, args, kwargs in self.items]
  File "/department/jupyter-dev/anaconda3/envs/python36/lib/python3.6/site-packages/sklearn/model_selection/_validation.py", line 508, in _fit_and_score
    X_train, y_train = _safe_split(estimator, X, y, train)
  File "/department/jupyter-dev/anaconda3/envs/python36/lib/python3.6/site-packages/sklearn/utils/metaestimators.py", line 201, in _safe_split
    X_subset = _safe_indexing(X, indices)
  File "/department/jupyter-dev/anaconda3/envs/python36/lib/python3.6/site-packages/sklearn/utils/__init__.py", line 390, in _safe_indexing
    indices_dtype = _determine_key_type(indices)
  File "/department/jupyter-dev/anaconda3/envs/python36/lib/python3.6/site-packages/sklearn/utils/__init__.py", line 288, in _determine_key_type
    raise ValueError(err_msg)
ValueError: No valid specification of the columns. Only a scalar, list or slice of all integers or all strings, or boolean mask is allowed
Closing connection _sid_b8c1 at exit
H2O session _sid_b8c1 closed.

Я использую python 3.6.10 с sklearn 0.22.1 и h2o 3.28.0.3.

Что я делаю не так? Любая помощь приветствуется!

Хорошего дня:)

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...