Я пытаюсь приспособить модель Knearest Neigbors к моим данным. но я получаю эту ошибку:
ValueError: Input contains NaN, infinity or a value too large for dtype('float64').
Вот код моего алгоритма Knearest Neigbhors:
def knn_train_test_new(training_col, target_col, df):
np.random.seed(1)
df = df.loc[np.random.permutation(len(df))]
# shuffled_index = np.random.permutation(df.index)
# df = df.reindex(shuffled_index)
train_df = df.iloc[0:150] #training set has 75% of the data
test_df = df.iloc[150:] #test set has 25% of the data
k = [5]
rmse = {}
for k_val in k:
model = KNeighborsRegressor(n_neighbors = k_val)
model.fit(train_df[training_col], train_df[target_col])
predictions = model.predict(test_df[training_col])
mse = mean_squared_error(test_df[target_col], predictions)
rmse[k_val] = (mse ** 0.5)
return rmse
two_features = ["width", "wheel-base"]
rmse_val = knn_train_test(two_features, 'price', numeric_cars)
И первые пять строк моего кадра данных:
numeric_cars.head()
![enter image description here](https://i.stack.imgur.com/K0Wwu.png)
Я не получаю эту ошибку, когда использую shuffled_index (который я прокомментировал) вместо np.random.permutation. Я не совсем понимаю разницу между ними.
Полный трек ошибок:
ValueError Traceback (most recent call last)
<ipython-input-13-03e0dd7acfd0> in <module>()
25
26 two_features = ["width", "wheel-base"]
---> 27 rmse_val = knn_train_test_new(two_features, 'price', numeric_cars)
28
29 #rmse_results = {}
<ipython-input-13-03e0dd7acfd0> in knn_train_test_new(training_col, target_col, df)
14
15 model = KNeighborsRegressor(n_neighbors = k_val)
---> 16 model.fit(train_df[training_col], train_df[target_col])
17
18 predictions = model.predict(test_df[training_col])
~\Anaconda3\lib\site-packages\sklearn\neighbors\base.py in fit(self, X, y)
743 """
744 if not isinstance(X, (KDTree, BallTree)):
--> 745 X, y = check_X_y(X, y, "csr", multi_output=True)
746 self._y = y
747 return self._fit(X)
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_X_y(X, y, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)
571 X = check_array(X, accept_sparse, dtype, order, copy, force_all_finite,
572 ensure_2d, allow_nd, ensure_min_samples,
--> 573 ensure_min_features, warn_on_dtype, estimator)
574 if multi_output:
575 y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False,
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
451 % (array.ndim, estimator_name))
452 if force_all_finite:
--> 453 _assert_all_finite(array)
454
455 shape_repr = _shape_repr(array.shape)
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in _assert_all_finite(X)
42 and not np.isfinite(X).all()):
43 raise ValueError("Input contains NaN, infinity"
---> 44 " or a value too large for %r." % X.dtype)
45
46
ValueError: Input contains NaN, infinity or a value too large for dtype('float64').