from sklearn.linear_model import SGDClassifier
sgd_clf = SGDClassifier(random_state=42)
sgd_clf.fit(X_train, y_train)
from sklearn.model_selection import StratifiedKFold
from sklearn.base import clone
skfolds = StratifiedKFold(n_splits=4, random_state=42)
for train_index, test_index in skfolds.split(X_train, y_train):
clone_clf = clone(sgd_clf)
X_train_folds = X_train[train_index]
y_train_folds = (y_train[train_index])
X_test_fold = X_train[test_index]
y_test_fold = (y_train[test_index])
clone_clf.fit(X_train_folds, y_train_folds)
y_pred = clone_clf.predict(X_test_fold)
n_correct = sum(y_pred == y_test_fold)
print(n_correct / len(y_pred))
контекст: класс StratifiedKFold выполняет стратифицированную выборку для получения сгибов, которые содержат репрезентативное соотношение каждого класса. На каждой итерации код создает клон классификатора, обучает его клонированию на тренировочных сгибах и делает прогнозы на тестовом сгибе. Затем он подсчитывает количество правильных прогнозов и выводит соотношение правильных прогнозов.
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-34-3c1dd4b2ed7e> in <module>
4 for train_index, cv_index in skfolds.split(X_train, y_train):
5 clone_clf = clone(sgd_clf)
6 X_train_folds = X_train[train_index]
7 y_train_folds = (y_train[train_index])
8 X_cv_fold = X_train[cv_index]
~/.local/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
2999 if is_iterator(key):
3000 key = list(key)
3001 indexer = self.loc._convert_to_indexer(key, axis=1, raise_missing=True)
3002
3003 # take() does not accept boolean indexers
~/.local/lib/python3.6/site-packages/pandas/core/indexing.py in _convert_to_indexer(self,
obj, axis, is_setter, raise_missing)
1283 # When setting, missing keys are not allowed, even with .loc:
1284 kwargs = {"raise_missing": True if is_setter else raise_missing}
1285 return self._get_listlike_indexer(obj, axis, **kwargs)[1]
1286 else:
1287 try:
~/.local/lib/python3.6/site-packages/pandas/core/indexing.py in _get_listlike_indexer(self,
key, axis, raise_missing)
1090
1091 self._validate_read_indexer(
1092 keyarr, indexer, o._get_axis_number(axis), raise_missing=raise_missing
1093 )
1094 return keyarr, indexer
~/.local/lib/python3.6/site-packages/pandas/core/indexing.py in _validate_read_indexer(self,
key, indexer, axis, raise_missing)
1175 raise KeyError(
1176 "None of [{key}] are in the [{axis}]".format(
1177 key=key, axis=self.obj._get_axis_name(axis)
1178 )
1179 )
KeyError: "None of [Int64Index([ 767, 789, 791, 793, 798, 799, 803, 805, 810,
811,\n ...\n 7990, 7991, 7992, 7993, 7994, 7995, 7996, 7997, 7998,
7999],\n dtype='int64', length=7111)] are in the [columns]