У меня есть один CSV-файл, и я попытался построить кривую RO C без использования каких-либо предопределенных библиотек для построения кривой. Я использовал numpy и pandas для моего кода. Может кто-нибудь сказать, пожалуйста, где я иду не так? RO C Кривая
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('5_b.csv')
#adding a new column to the data
df['Y_pred'] = np.where(df['proba']<=0.5, 0, 1)
#printing the top 5 values in data
#df.head(5)
#sorting dataframe
df=df.sort_values(by =['proba'])
df.head(5)
#confusion matrix
TP_Main = len(df[(df['y'] == 1) & (df['Y_pred'] == 1)])
FP_Main = len(df[(df['y'] == 0) & (df['Y_pred'] == 1)])
FN_Main = len(df[(df['y'] == 1) & (df['Y_pred'] == 0)])
TN_Main = len(df[(df['y'] == 0) & (df['Y_pred'] == 0)])
print("TN_Main : {0},FN_Main : {1}".format(TN_Main,FN_Main))
print("FP_Main : {0},TP_Main : {1}".format(FP_Main,TP_Main))
#F1score
precision = TP_Main/(TP_Main+FP_Main)
recall = TP_Main/(TP_Main+FN_Main)
F1score = ((precision*recall)/(precision+recall))*2
print("precision : {0},recall : {1}".format(precision,recall))
print("F1score : ",F1score)
#df.sort_values(by =['proba'], inplace = True, ascending = False)
tprList = []
fprList = []
for i in range(len(df)):
df['Y_pred'] =np.where(df['proba']<=df.iloc[i][1],0,1)
TP = len(df[(df['y'] == 1) & (df['Y_pred'] == 1)])
FP = len(df[(df['y'] == 0) & (df['Y_pred'] == 1)])
FN = len(df[(df['y'] == 1) & (df['Y_pred'] == 0)])
TN = len(df[(df['y'] == 0) & (df['Y_pred'] == 0)])
TPR = TP/(FN+TP)
FPR = TN/(FP+TN)
tprList.append(TPR)
fprList.append(FPR)
tpr_array = np.array(tprList)
fpr_array = np.array(fprList)
#Accuracy score
AccScore = (TN_Main+TP_Main)/len(df)
print("Accuracy Score =", AccScore)
AUCScore = np.trapz(tpr_array,fpr_array)
print("AUC Score :",AUCScore)
plt.plot(tpr_array,fpr_array)