Я пытаюсь использовать повторяющиеся k-кратные с перекрестной проверкой для набора данных, но получаю ошибку, связанную с типами.
Я попытался удалить пробел из значений данных, но ошибка все еще остается
recipes = pd.read_csv('epi_r.csv')
keep_col = ['rating','calories','protein','fat','sodium']
recipes = recipes[keep_col]
recipes = recipes.dropna()
bins = (-1, 3.5, 5)
group_names = ['bad','good']
recipes['rating'] = pd.cut(recipes['rating'].dropna(), bins = bins, labels = group_names)
recipes['rating'].unique()
label_rating = LabelEncoder()
recipes['rating'] = label_rating.fit_transform(recipes['rating'].astype(str))
rkf = RepeatedKFold(n_splits=2, n_repeats=2, random_state=50)
for train, test in rkf.split(recipes.astype(str)):
print("%s %s" % (train, test))
xtrain, xtest = x[train], x[test]
ytrain, ytest = y[train], y[test]
Поезд и тестовая распечатка, но впоследствии я получаю сообщение об ошибке
KeyError Traceback (most recent call last)
<ipython-input-19-102e0c6c5a53> in <module>
2 for train, test in rkf.split(recipes.astype(str)):
3 print("%s %s" % (train, test))
----> 4 xtrain, xtest = x[train], x[test]
5 ytrain, ytest = y[train], y[test]
6
/usr/local/lib/python3.7/site-packages/pandas/core/frame.py in __getitem__(self, key)
2932 key = list(key)
2933 indexer = self.loc._convert_to_indexer(key, axis=1,
-> 2934 raise_missing=True)
2935
2936 # take() does not accept boolean indexers
/usr/local/lib/python3.7/site-packages/pandas/core/indexing.py in _convert_to_indexer(self, obj, axis, is_setter, raise_missing)
1352 kwargs = {'raise_missing': True if is_setter else
1353 raise_missing}
-> 1354 return self._get_listlike_indexer(obj, axis, **kwargs)[1]
1355 else:
1356 try:
/usr/local/lib/python3.7/site-packages/pandas/core/indexing.py in _get_listlike_indexer(self, key, axis, raise_missing)
1159 self._validate_read_indexer(keyarr, indexer,
1160 o._get_axis_number(axis),
-> 1161 raise_missing=raise_missing)
1162 return keyarr, indexer
1163
/usr/local/lib/python3.7/site-packages/pandas/core/indexing.py in _validate_read_indexer(self, key, indexer, axis, raise_missing)
1244 raise KeyError(
1245 u"None of [{key}] are in the [{axis}]".format(
-> 1246 key=key, axis=self.obj._get_axis_name(axis)))
1247
1248 # We (temporarily) allow for some missing keys with .loc, except in
KeyError: "None of [Int64Index([ 0, 2, 4, 7, 10, 11, 12, 13, 14,\n 15,\n ...\n 15844, 15846, 15847, 15850, 15853, 15854, 15857, 15859, 15860,\n 15861],\n dtype='int64', length=7932)] are in the [columns]"