ValueError: входные данные содержат NaN при использовании GridSerachCV с конвейером - PullRequest
0 голосов
/ 26 мая 2019

Я пытаюсь использовать GridSearchCV, чтобы получить лучшие параметры любого алгоритма регрессии в этом примере. Я использую KNN-регрессор, конвейер работает в одиночку, но когда я помещаю его в GridSearchCV, он возвращает ошибку «ValueError: Ввод содержит NaN»

Я перепробовал множество других алгоритмов и изменил шаги, но такая же проблема существует.

И я уверен, что в DataFrame нет нулевых значений после того, как я преобразовал данные с помощью конвейера 'PIPE_FULL_FLOW'.

#This is the Pipeline of Imputation & Encoding
PIPE_FULL_FLOW = Pipeline([('FEATUNION_MISSING_IMPUTATION',FEATUNION_MISSING_IMPUTATION),('FEATUNION_STEP3',FEATUNION_STEP3)])

#This Code outputs no nulls in any column
PIPE_FULL_FLOW.transform(DF_FULL_DATA).isnull().sum()

#Clonning the Pipeline to another one to add KNN as the final Estimator
PIPE_FULL_FLOW_KNN = Pipeline(PIPE_FULL_FLOW.steps.copy())

#Adding KNN Object as the last step
PIPE_FULL_FLOW_KNN.steps.append(('PREDICT_KNN',KNeighborsRegressor()))

#The Pipeline fits normally without GridSearchCV
#PIPE_FULL_FLOW_KNN.fit(DF_FULL_DATA,DF_FULL_DATA[['SalePrice']])

DICT_GRID_KNN = {'PREDICT_KNN__n_neighbors':[i for i in range(3,11)]}

GRID_KNN = GridSearchCV(PIPE_FULL_FLOW_KNN,param_grid=DICT_GRID_KNN,cv=5,error_score='raise')

#Once i try to fit the Grid Search it outputs the error Provided below
GRID_KNN.fit(DF_FULL_DATA,DF_FULL_DATA[['SalePrice']])

Ошибка полного сообщения: -

ValueError                                Traceback (most recent call last)
<ipython-input-66-9ca843c67586> in <module>
----> 1 GRID_KNN.fit(DF_FULL_DATA,DF_FULL_DATA[['SalePrice']])

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in fit(self, X, y, groups, **fit_params)
    720                 return results_container[0]
    721 
--> 722             self._run_search(evaluate_candidates)
    723 
    724         results = results_container[0]

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in _run_search(self, evaluate_candidates)
   1189     def _run_search(self, evaluate_candidates):
   1190         """Search all candidates in param_grid"""
-> 1191         evaluate_candidates(ParameterGrid(self.param_grid))
   1192 
   1193 

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in evaluate_candidates(candidate_params)
    709                                for parameters, (train, test)
    710                                in product(candidate_params,
--> 711                                           cv.split(X, y, groups)))
    712 
    713                 all_candidate_params.extend(candidate_params)

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
    915             # remaining jobs.
    916             self._iterating = False
--> 917             if self.dispatch_one_batch(iterator):
    918                 self._iterating = self._original_iterator is not None
    919 

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in dispatch_one_batch(self, iterator)
    757                 return False
    758             else:
--> 759                 self._dispatch(tasks)
    760                 return True
    761 

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in _dispatch(self, batch)
    714         with self._lock:
    715             job_idx = len(self._jobs)
--> 716             job = self._backend.apply_async(batch, callback=cb)
    717             # A job can complete so quickly than its callback is
    718             # called before we get here, causing self._jobs to

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in apply_async(self, func, callback)
    180     def apply_async(self, func, callback=None):
    181         """Schedule a func to be run"""
--> 182         result = ImmediateResult(func)
    183         if callback:
    184             callback(result)

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in __init__(self, batch)
    547         # Don't delay the application, to avoid keeping the input
    548         # arguments in memory
--> 549         self.results = batch()
    550 
    551     def get(self):

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self)
    223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    224             return [func(*args, **kwargs)
--> 225                     for func, args, kwargs in self.items]
    226 
    227     def __len__(self):

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0)
    223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    224             return [func(*args, **kwargs)
--> 225                     for func, args, kwargs in self.items]
    226 
    227     def __len__(self):

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score)
    526             estimator.fit(X_train, **fit_params)
    527         else:
--> 528             estimator.fit(X_train, y_train, **fit_params)
    529 
    530     except Exception as e:

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\pipeline.py in fit(self, X, y, **fit_params)
    263             This estimator
    264         """
--> 265         Xt, fit_params = self._fit(X, y, **fit_params)
    266         if self._final_estimator is not None:
    267             self._final_estimator.fit(Xt, y, **fit_params)

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\pipeline.py in _fit(self, X, y, **fit_params)
    228                 Xt, fitted_transformer = fit_transform_one_cached(
    229                     cloned_transformer, Xt, y, None,
--> 230                     **fit_params_steps[name])
    231                 # Replace the transformer of the step with the fitted
    232                 # transformer. This is necessary when loading the transformer

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\memory.py in __call__(self, *args, **kwargs)
    340 
    341     def __call__(self, *args, **kwargs):
--> 342         return self.func(*args, **kwargs)
    343 
    344     def call_and_shelve(self, *args, **kwargs):

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\pipeline.py in _fit_transform_one(transformer, X, y, weight, **fit_params)
    612 def _fit_transform_one(transformer, X, y, weight, **fit_params):
    613     if hasattr(transformer, 'fit_transform'):
--> 614         res = transformer.fit_transform(X, y, **fit_params)
    615     else:
    616         res = transformer.fit(X, y, **fit_params).transform(X)

<ipython-input-5-ec6c2a2a481e> in fit_transform(self, X, y, **fit_params)
     14                 weight=weight,
     15                 **fit_params)
---> 16             for name, trans, weight in self._iter())
     17 
     18         if not result:

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
    918                 self._iterating = self._original_iterator is not None
    919 
--> 920             while self.dispatch_one_batch(iterator):
    921                 pass
    922 

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in dispatch_one_batch(self, iterator)
    757                 return False
    758             else:
--> 759                 self._dispatch(tasks)
    760                 return True
    761 

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in _dispatch(self, batch)
    714         with self._lock:
    715             job_idx = len(self._jobs)
--> 716             job = self._backend.apply_async(batch, callback=cb)
    717             # A job can complete so quickly than its callback is
    718             # called before we get here, causing self._jobs to

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in apply_async(self, func, callback)
    180     def apply_async(self, func, callback=None):
    181         """Schedule a func to be run"""
--> 182         result = ImmediateResult(func)
    183         if callback:
    184             callback(result)

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in __init__(self, batch)
    547         # Don't delay the application, to avoid keeping the input
    548         # arguments in memory
--> 549         self.results = batch()
    550 
    551     def get(self):

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self)
    223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    224             return [func(*args, **kwargs)
--> 225                     for func, args, kwargs in self.items]
    226 
    227     def __len__(self):

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0)
    223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    224             return [func(*args, **kwargs)
--> 225                     for func, args, kwargs in self.items]
    226 
    227     def __len__(self):

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\pipeline.py in _fit_transform_one(transformer, X, y, weight, **fit_params)
    612 def _fit_transform_one(transformer, X, y, weight, **fit_params):
    613     if hasattr(transformer, 'fit_transform'):
--> 614         res = transformer.fit_transform(X, y, **fit_params)
    615     else:
    616         res = transformer.fit(X, y, **fit_params).transform(X)

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\pipeline.py in fit_transform(self, X, y, **fit_params)
    298         Xt, fit_params = self._fit(X, y, **fit_params)
    299         if hasattr(last_step, 'fit_transform'):
--> 300             return last_step.fit_transform(Xt, y, **fit_params)
    301         elif last_step is None:
    302             return Xt

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\preprocessing\_encoders.py in fit_transform(self, X, y)
    514                 self._categorical_features, copy=True)
    515         else:
--> 516             return self.fit(X).transform(X)
    517 
    518     def _legacy_transform(self, X):

<ipython-input-7-87152cbc4d01> in fit(self, X, y, sep)
      6 
      7     def fit(self, X, y=None,sep='_'):
----> 8         super(MyOneHotEncoder,self).fit(X)
      9 
     10         self.LIST_FEATURES_CATEGORIES = []

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\preprocessing\_encoders.py in fit(self, X, y)
    425             return self
    426         else:
--> 427             self._fit(X, handle_unknown=self.handle_unknown)
    428             return self
    429 

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\preprocessing\_encoders.py in _fit(self, X, handle_unknown)
     59 
     60     def _fit(self, X, handle_unknown='error'):
---> 61         X = self._check_X(X)
     62 
     63         n_samples, n_features = X.shape

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\preprocessing\_encoders.py in _check_X(self, X)
     54             if not _get_config()['assume_finite']:
     55                 if _object_dtype_isnan(X).any():
---> 56                     raise ValueError("Input contains NaN")
     57 
     58         return X

ValueError: Input contains NaN
...