Я использую библиотеку Yellowbrick и пытаюсь построить кривую RO C. Мне нужно построить четыре набора данных на одном рисунке. Вот что я попробовал.
from yellowbrick.classifier import PrecisionRecallCurve
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from yellowbrick.classifier import ROCAUC
# dataset
_V1 = V1
_V2 = V2
_V3 = V3
_V4 = V4
lable = V1
#### Split data
x_train1,x_test1,y_train1,y_test1 =train_test_split(_V1,lable,test_size=0.30,random_state=40)
x_train2,x_test2,y_train2,y_test2 =train_test_split(_V2,lable,test_size=0.30,random_state=40)
x_train3,x_test3,y_train3,y_test3 =train_test_split(_V3,lable,test_size=0.30,random_state=40)
x_train4,x_test4,y_train4,y_test4 =train_test_split(_V4,lable,test_size=0.30,random_state=40)
##### PLot sub
model = KNeighborsClassifier(n_neighbors=3)
###### ROC plot
import matplotlib.pyplot as plt
fig, axes = plt.subplots(2,2)
viz_V11 = ROCAUC(model,ax=axes[0][0],title = 'V1')
viz_V22 = ROCAUC(model,ax=axes[0][1],title = 'V2')
viz_V33 = ROCAUC(model,ax=axes[1][0],title = 'V3')
viz_V44 = ROCAUC(model,ax=axes[1][1],title = 'V4')
viz_V11.fit(x_train1, y_train1)
viz_V11.score(x_test1, y_test1)
viz_V11.finalize()
viz_V22.fit(x_train2, y_train2)
viz_V22.score(x_test2, y_test2)
viz_V22.finalize()
viz_V33.fit(x_train3, y_train3)
viz_V33.score(x_test3, y_test3)
viz_V33.finalize()
viz_V44.fit(x_train4, y_train4)
viz_V44.score(x_test4, y_test4)
viz_V44.finalize()
plt.show()
Это дает мне следующие ошибки:
viz_V22.score(x_test2, y_test2)
File "C:\Users\PC\AppData\Local\Programs\Python\Python37\lib\site-packages\yellowbrick\classifier\rocauc.py", line 216, in score
super(ROCAUC, self).score(X, y)
File "C:\Users\PC\AppData\Local\Programs\Python\Python37\lib\site-packages\yellowbrick\classifier\base.py", line 236, in score
self.score_ = self.estimator.score(X, y)
File "C:\Users\PC\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\base.py", line 288, in score
return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
File "C:\Users\PC\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\neighbors\classification.py", line 149, in predict
neigh_dist, neigh_ind = self.kneighbors(X)
File "C:\Users\PC\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\neighbors\base.py", line 455, in kneighbors
for s in gen_even_slices(X.shape[0], n_jobs)
File "C:\Users\PC\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\externals\joblib\parallel.py", line 917, in __call__
if self.dispatch_one_batch(iterator):
File "C:\Users\PC\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\externals\joblib\parallel.py", line 759, in dispatch_one_batch
self._dispatch(tasks)
File "C:\Users\PC\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\externals\joblib\parallel.py", line 716, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "C:\Users\PC\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py", line 182, in apply_async
result = ImmediateResult(func)
File "C:\Users\PC\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py", line 549, in __init__
self.results = batch()
File "C:\Users\PC\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\externals\joblib\parallel.py", line 225, in __call__
for func, args, kwargs in self.items]
File "C:\Users\PC\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\externals\joblib\parallel.py", line 225, in <listcomp>
for func, args, kwargs in self.items]
File "C:\Users\PC\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\neighbors\base.py", line 292, in _tree_query_parallel_helper
return tree.query(data, n_neighbors, return_distance)
File "sklearn\neighbors\binary_tree.pxi", line 1305, in sklearn.neighbors.kd_tree.BinaryTree.query
ValueError: query data dimension must match training data dimension