Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=1/3, random_state=85)
models = [
RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42),
LinearSVC(),
MultinomialNB(),
LogisticRegression(random_state=0),
]
# 5 Cross-validation
CV = 5
cv_df = pd.DataFrame(index=range(CV * len(models)))
entries = []
for model in models:
model_name = model.__class__.__name__
accuracies = cross_val_score(model, features, labels, scoring='accuracy', cv=CV)
for fold_idx, accuracy in enumerate(accuracies):
entries.append((model_name, fold_idx, accuracy))
cv_df = pd.DataFrame(entries, columns=['model_name', 'fold_idx', 'accuracy'])
ошибка:
UserWarning: наименее заполненный класс в y имеет только 1 член, что меньше n_splits = 5.