RO C Кривая с использованием Python Yellowbrick Library - PullRequest
1 голос
/ 13 апреля 2020

Я использую библиотеку Yellowbrick и пытаюсь построить кривую RO C. Мне нужно построить четыре набора данных на одном рисунке. Вот что я попробовал.

from yellowbrick.classifier import PrecisionRecallCurve
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from yellowbrick.classifier import ROCAUC


# dataset
_V1 = V1
_V2 = V2
_V3 = V3
_V4 = V4

lable = V1


#### Split data
x_train1,x_test1,y_train1,y_test1 =train_test_split(_V1,lable,test_size=0.30,random_state=40)
x_train2,x_test2,y_train2,y_test2 =train_test_split(_V2,lable,test_size=0.30,random_state=40)
x_train3,x_test3,y_train3,y_test3 =train_test_split(_V3,lable,test_size=0.30,random_state=40)
x_train4,x_test4,y_train4,y_test4 =train_test_split(_V4,lable,test_size=0.30,random_state=40)

##### PLot sub
model = KNeighborsClassifier(n_neighbors=3)

###### ROC  plot
import matplotlib.pyplot as plt
fig, axes = plt.subplots(2,2)

viz_V11 = ROCAUC(model,ax=axes[0][0],title = 'V1')
viz_V22 = ROCAUC(model,ax=axes[0][1],title = 'V2')
viz_V33 = ROCAUC(model,ax=axes[1][0],title = 'V3')
viz_V44 = ROCAUC(model,ax=axes[1][1],title = 'V4')


viz_V11.fit(x_train1, y_train1)
viz_V11.score(x_test1, y_test1)
viz_V11.finalize()

viz_V22.fit(x_train2, y_train2)
viz_V22.score(x_test2, y_test2)
viz_V22.finalize()

viz_V33.fit(x_train3, y_train3)
viz_V33.score(x_test3, y_test3)
viz_V33.finalize()

viz_V44.fit(x_train4, y_train4)
viz_V44.score(x_test4, y_test4)
viz_V44.finalize()

plt.show()  

Это дает мне следующие ошибки:

viz_V22.score(x_test2, y_test2)
  File "C:\Users\PC\AppData\Local\Programs\Python\Python37\lib\site-packages\yellowbrick\classifier\rocauc.py", line 216, in score
    super(ROCAUC, self).score(X, y)
  File "C:\Users\PC\AppData\Local\Programs\Python\Python37\lib\site-packages\yellowbrick\classifier\base.py", line 236, in score
    self.score_ = self.estimator.score(X, y)
  File "C:\Users\PC\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\base.py", line 288, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "C:\Users\PC\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\neighbors\classification.py", line 149, in predict
    neigh_dist, neigh_ind = self.kneighbors(X)
  File "C:\Users\PC\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\neighbors\base.py", line 455, in kneighbors
    for s in gen_even_slices(X.shape[0], n_jobs)
  File "C:\Users\PC\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\externals\joblib\parallel.py", line 917, in __call__
    if self.dispatch_one_batch(iterator):
  File "C:\Users\PC\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\externals\joblib\parallel.py", line 759, in dispatch_one_batch
    self._dispatch(tasks)
  File "C:\Users\PC\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\externals\joblib\parallel.py", line 716, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "C:\Users\PC\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py", line 182, in apply_async
    result = ImmediateResult(func)
  File "C:\Users\PC\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py", line 549, in __init__
    self.results = batch()
  File "C:\Users\PC\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\externals\joblib\parallel.py", line 225, in __call__
    for func, args, kwargs in self.items]
  File "C:\Users\PC\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\externals\joblib\parallel.py", line 225, in <listcomp>
    for func, args, kwargs in self.items]
  File "C:\Users\PC\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\neighbors\base.py", line 292, in _tree_query_parallel_helper
    return tree.query(data, n_neighbors, return_distance)
  File "sklearn\neighbors\binary_tree.pxi", line 1305, in sklearn.neighbors.kd_tree.BinaryTree.query
ValueError: query data dimension must match training data dimension
...