Ошибка перекрестной проверки поиска по сетке при попытке подгонки X, y с помощью GridSearchCV sklearn - PullRequest
0 голосов
/ 02 апреля 2019

Python sci-kit learn KNN Grid Search Ошибка перекрестной проверки

Я пытаюсь воссоздать модель KNN для прогнозирования места назначения автомобиля.https://github.com/carlosbkm/car-destination-prediction Код не работает при перекрестной проверке поиска в Grid: https://github.com/carlosbkm/car-destination-prediction/blob/master/k-nearest-model.ipynb Сначала геодэш не работал, поэтому я переключил его на geodash2, и проблем не было.Когда я пытаюсь соответствовать модели, я получаю.TypeError: неподдерживаемые типы операндов для /: 'str' и 'int'

Когда я пытаюсь подобрать X и y для перекрестной проверки по сетке, я получаю ошибку.Проблема исходит от

def cv_optimize(clf, parameters, X, y, n_jobs=1, n_folds=5, score_func=None):
    if score_func:
        gs = GridSearchCV(clf, param_grid=parameters, cv=n_folds, n_jobs=n_jobs, scoring=score_func)
    else:
        gs = GridSearchCV(clf, param_grid=parameters, n_jobs=n_jobs, cv=n_folds)
    gs.fit(X, y)
    print ("BEST", gs.best_params_, gs.best_score_, gs.cv_results_)
    best = gs.best_estimator_
    return best

Я не могу подогнать модель к X и y: gs.fit (X, y)

Я пытался сделать X и y поплавками, но ничегоизменилось

Когда я выполню это:

# Create a k-Nearest Neighbors Regression estimator
knn_estimator = KNeighborsRegressor()
#knn_parameters = {"n_neighbors": [1,2,5,10,20,50,100]}
knn_parameters = {"n_neighbors": [1,2,5]}
knn_best = cv_optimize(knn_estimator, knn_parameters, X_train, y_train, score_func='neg_mean_squared_error')

Я получу:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-124-34b56429c6b5> in <module>()
      4 #knn_parameters = {"n_neighbors": [1,2,5,10,20,50,100]}
      5 knn_parameters = {"n_neighbors": [1,2,5]}
----> 6 knn_best = cv_optimize(knn_estimator, knn_parameters, X_train, y_train, score_func='neg_mean_squared_error')

<ipython-input-116-1a00f84f1047> in cv_optimize(clf, parameters, X, y, n_jobs, n_folds, score_func)
      6     else:
      7         gs = GridSearchCV(clf, param_grid=parameters, n_jobs=n_jobs, cv=n_folds)
----> 8     gs.fit(X, y)
      9     print ("BEST", gs.best_params_, gs.best_score_, gs.cv_results_)
     10     best = gs.best_estimator_

~/anaconda3/envs/datascience/lib/python3.6/site-packages/sklearn/model_selection/_search.py in fit(self, X, y, groups)
    943             train/test set.
    944         """
--> 945         return self._fit(X, y, groups, ParameterGrid(self.param_grid))
    946 
    947 

~/anaconda3/envs/datascience/lib/python3.6/site-packages/sklearn/model_selection/_search.py in _fit(self, X, y, groups, parameter_iterable)
    562                                   return_times=True, return_parameters=True,
    563                                   error_score=self.error_score)
--> 564           for parameters in parameter_iterable
    565           for train, test in cv_iter)
    566 

~/anaconda3/envs/datascience/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self, iterable)
    756             # was dispatched. In particular this covers the edge
    757             # case of Parallel used with an exhausted iterator.
--> 758             while self.dispatch_one_batch(iterator):
    759                 self._iterating = True
    760             else:

~/anaconda3/envs/datascience/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in dispatch_one_batch(self, iterator)
    606                 return False
    607             else:
--> 608                 self._dispatch(tasks)
    609                 return True
    610 

~/anaconda3/envs/datascience/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in _dispatch(self, batch)
    569         dispatch_timestamp = time.time()
    570         cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self)
--> 571         job = self._backend.apply_async(batch, callback=cb)
    572         self._jobs.append(job)
    573 

~/anaconda3/envs/datascience/lib/python3.6/site-packages/sklearn/externals/joblib/_parallel_backends.py in apply_async(self, func, callback)
    107     def apply_async(self, func, callback=None):
    108         """Schedule a func to be run"""
--> 109         result = ImmediateResult(func)
    110         if callback:
    111             callback(result)

~/anaconda3/envs/datascience/lib/python3.6/site-packages/sklearn/externals/joblib/_parallel_backends.py in __init__(self, batch)
    324         # Don't delay the application, to avoid keeping the input
    325         # arguments in memory
--> 326         self.results = batch()
    327 
    328     def get(self):

~/anaconda3/envs/datascience/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
    132 
    133     def __len__(self):

~/anaconda3/envs/datascience/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in <listcomp>(.0)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
    132 
    133     def __len__(self):

~/anaconda3/envs/datascience/lib/python3.6/site-packages/sklearn/model_selection/_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, error_score)
    258     else:
    259         fit_time = time.time() - start_time
--> 260         test_score = _score(estimator, X_test, y_test, scorer)
    261         score_time = time.time() - start_time - fit_time
    262         if return_train_score:

~/anaconda3/envs/datascience/lib/python3.6/site-packages/sklearn/model_selection/_validation.py in _score(estimator, X_test, y_test, scorer)
    286         score = scorer(estimator, X_test)
    287     else:
--> 288         score = scorer(estimator, X_test, y_test)
    289     if hasattr(score, 'item'):
    290         try:

~/anaconda3/envs/datascience/lib/python3.6/site-packages/sklearn/metrics/scorer.py in __call__(self, estimator, X, y_true, sample_weight)
     89         super(_PredictScorer, self).__call__(estimator, X, y_true,
     90                                              sample_weight=sample_weight)
---> 91         y_pred = estimator.predict(X)
     92         if sample_weight is not None:
     93             return self._sign * self._score_func(y_true, y_pred,

~/anaconda3/envs/datascience/lib/python3.6/site-packages/sklearn/neighbors/regression.py in predict(self, X)
    151 
    152         if weights is None:
--> 153             y_pred = np.mean(_y[neigh_ind], axis=1)
    154         else:
    155             y_pred = np.empty((X.shape[0], _y.shape[1]), dtype=np.float64)

~/anaconda3/envs/datascience/lib/python3.6/site-packages/numpy/core/fromnumeric.py in mean(a, axis, dtype, out, keepdims)
   2907 
   2908     return _methods._mean(a, axis=axis, dtype=dtype,
-> 2909                           out=out, **kwargs)
   2910 
   2911 

~/anaconda3/envs/datascience/lib/python3.6/site-packages/numpy/core/_methods.py in _mean(a, axis, dtype, out, keepdims)
     71     if isinstance(ret, mu.ndarray):
     72         ret = um.true_divide(
---> 73                 ret, rcount, out=ret, casting='unsafe', subok=False)
     74         if is_float16_result and out is None:
     75             ret = arr.dtype.type(ret)

TypeError: unsupported operand type(s) for /: 'str' and 'int'
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...