Я тестирую приведенный ниже код и получаю сообщение об ошибке в самой последней строке.
dataset = df[['Rate', 'Weights', 'Change', 'Price', 'CategoryOne']].copy() #
dataset.shape
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, 4].values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
#Import knearest neighbors Classifier model
from sklearn.neighbors import KNeighborsClassifier
#Create KNN Classifier
knn = KNeighborsClassifier(n_neighbors=5)
#Train the model using the training sets
knn.fit(X_train, y_train)
#Predict the response for test dataset
y_pred = knn.predict(X_test)
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
#Import knearest neighbors Classifier model
from sklearn.neighbors import KNeighborsClassifier
#Create KNN Classifier
knn = KNeighborsClassifier(n_neighbors=7)
#Train the model using the training sets
knn.fit(X_train, y_train)
В этой самой последней строке, когда я пытаюсь установить X_train и y_train, я получаю эта ошибка:
TypeError: '<' not supported between instances of 'int' and 'str'
Данные в поле CategoryOne выглядят так: '2a', '1', '2a'
. Может ли это быть проблема? Я знаю, что целевая переменная не обязательно должна быть цифрой c. Я просто хочу увидеть связь между независимыми переменными и зависимой переменной (CategoryOne).
Вот StackTrace:
Traceback (most recent call last):
File "<ipython-input-108-36266936f0ca>", line 29, in <module>
knn.fit(X_train, y_train)
File "C:\Users\rs\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\neighbors\base.py", line 906, in fit
check_classification_targets(y)
File "C:\Users\rs\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\utils\multiclass.py", line 166, in check_classification_targets
y_type = type_of_target(y)
File "C:\Users\rs\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\utils\multiclass.py", line 287, in type_of_target
if (len(np.unique(y)) > 2) or (y.ndim >= 2 and len(y[0]) > 1):
File "C:\Users\rs\AppData\Local\Continuum\anaconda3\lib\site-packages\numpy\lib\arraysetops.py", line 264, in unique
ret = _unique1d(ar, return_index, return_inverse, return_counts)
File "C:\Users\rs\AppData\Local\Continuum\anaconda3\lib\site-packages\numpy\lib\arraysetops.py", line 312, in _unique1d
ar.sort()
TypeError: '<' not supported between instances of 'int' and 'str'