Я тренирую 3 модели. Они предскажут, что такое мошенническая транзакция (бинарная классификация). Логистическая регрессия, кнн и манекен. Но каждая метрика, которую я вычисляю для logreg, равна 1! Я не верю этому. И я едва начал искать ошибки. Но:
Итак, я попытался гуглить. Но не нашел ничего подобного моему делу. Вот код:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.dummy import DummyClassifier
from sklearn.metrics import accuracy_score, precision_score, \
recall_score, f1_score, roc_auc_score, roc_curve
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
df = pd.read_csv("creditcard.csv").drop('Time', axis=1) # read the dataset except of the column time(when transaction was committed), because this column is not valuable.
x = df.iloc[:,0:30].values
y = df.Class.values
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.1, rando
m_state=437)
logistic_regression_model = LogisticRegression()
logistic_regression_model.fit(x_train, y_train)
knn_model = KNeighborsClassifier()
knn_model.fit(x_train, y_train)
dummy_model = DummyClassifier(constant=0)
dummy_model.fit(x_train, y_train)
y_logreg_pred = logistic_regression_model.predict(x_test)
y_knn_pred = knn_model.predict(x_test)
y_dummy_pred = dummy_model.predict(x_test)
print("ACCURACY")
print("\tlogreg:", accuracy_score(y_logreg_pred, y_test))
print("\tkngg:", accuracy_score(y_knn_pred, y_test))
print("\tdummy:", accuracy_score(y_dummy_pred, y_test))
print("PRESICION")
print("\tlogreg:", precision_score(y_logreg_pred, y_test))
print("\tknn:", precision_score(y_knn_pred, y_test))
print("\tdummy:", precision_score(y_dummy_pred, y_test))
print("RECALL")
print("\tlogreg:", recall_score(y_logreg_pred, y_test))
print("\tknn:", recall_score(y_knn_pred, y_test))
print("\tdummy:", recall_score(y_dummy_pred, y_test))
print("F1")
print("\tlogreg:", f1_score(y_logreg_pred, y_test))
print("\tknn:", f1_score(y_knn_pred, y_test))
print("\tdummy:", f1_score(y_dummy_pred, y_test))
print("ROC AUC")
print("\tlogreg:", roc_auc_score(y_logreg_pred, y_test))
print("\tknn:", roc_auc_score(y_knn_pred, y_test))
print("\tdummy:", roc_auc_score(y_dummy_pred, y_test))
logreg_y_pred_proba = logistic_regression_model.predict_proba(x_test)[:, 1]
knn_y_pred_proba = knn_model.predict_proba(x_test)[:, 1]
dummy_y_pred_proba = dummy_model.predict_proba(x_test)[:, 1]
fpr, tpr, tresholds = roc_curve(y_test, logreg_y_pred_proba)
plt.figure(figsize=(7, 7))
plt.plot(fpr, tpr)
И вывод:
ACCURACY
logreg: 1.0
kngg: 0.9992275552122467
dummy: 0.9967346652154068
PRESICION
logreg: 1.0
knn: 0.5957446808510638
dummy: 0.0
RECALL
logreg: 1.0
knn: 0.9032258064516129
dummy: 0.0
F1
logreg: 1.0
knn: 0.7179487179487178
dummy: 0.0
ROC AUC
logreg: 1.0
knn: 0.9512789840693917
dummy: 0.4991735537190083
Что я делаю не так?