Я пытаюсь обучать и оценивать модели прогнозирования, используя набор данных, который я нашел в Kaggle, но моя точность равна 0, и мне интересно, делаю ли я что-то не так
Код работает для модели случайного леса, но не для SVM или нейронных сетей
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn import svm
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
%matplotlib inline
#loading dataset
recipes = pd.read_csv('epi_r.csv')
keep_col = ['rating','calories','protein','fat','sodium']
recipes = recipes[keep_col]
recipes = recipes.dropna()
#preprocessing data
bins = (-1, 4, 5)
group_names = ['bad','good']
recipes['rating'] = pd.cut(recipes['rating'].dropna(), bins = bins, labels = group_names)
recipes['rating'].unique()
#bad=0; good=1
label_rating = LabelEncoder()
recipes['rating'] = label_rating.fit_transform(recipes['rating'].astype(str))
#separate dataset as response variable and feature variables
x = recipes.drop('rating', axis=1)
y = recipes['rating']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.20, random_state = 42)
#converts the values & levels the playing fields
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
#don't fit again b/c want to use the same fit
x_test = sc.transform(x_test)
clf=svm.SVC()
clf.fit(x_train,y_train)
pred_clf = clf.predict(x_test)
print(classification_report(y_test, pred_clf))
print(confusion_matrix(y_test, pred_clf))
precision recall f1-score support
0 0.00 0.00 0.00 1465
1 0.54 1.00 0.70 1708
micro avg 0.54 0.54 0.54 3173
macro avg 0.27 0.50 0.35 3173
weighted avg 0.29 0.54 0.38 3173
[[ 0 1465]
[ 0 1708]]
/usr/local/lib/python3.7/site-packages/sklearn/metrics/classification.py:1143: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.
'precision', 'predicted', average, warn_for)
это результат, который я получаю, ничего не прогнозируется правильно