Мультиклассовый формат не поддерживается - PullRequest
0 голосов
/ 21 января 2020

У меня проблема с kmeans иification_report, потому что у меня мультикласс. Я пытаюсь проверить алгоритм Кеманса на лимфографии набора данных, но в классификационном отчете также приводится класс 0, которого нет в наборе данных сайта: https://archive.ics.uci.edu/ml/datasets/Lymphography

    feature = ["class", "lymphatics", "block of affere", "bl. of lymph. c", "bl. of lymph. s", "by pass", "extravasates", "regeneration of", "early uptake in",
           "lym.nodes dimin","lym.nodes enlar","changes in lym","defect in node","changes in node","changes in stru","special forms","dislocation of","exclusion of no","no. of nodes in"]
feature_dummied = ["lymphatics", "block of affere", "bl. of lymph. c", "bl. of lymph. s", "by pass", "extravasates", "regeneration of", "early uptake in",
           "lym.nodes dimin","lym.nodes enlar","changes in lym","defect in node","changes in node","changes in stru","special forms","dislocation of","exclusion of no","no. of nodes in"]
dataset = pd.read_csv("lymphography.csv", sep=",", names=feature,
                      dtype={'class': np.int32, 'lymphatics': np.int32, 'block of affere': np.int32, 'bl. of lymph. c':np.int32,
                             'bl. of lymph. s': np.int32, 'by pass': np.int32, 'extravasates': np.int32, 'regeneration of': np.int32,
                             'early uptake in': np.int32, 'lym.nodes dimin': np.int32, 'lym.nodes enlar': np.int32, 'changes in lym': np.int32, 
                             'defect in node': np.int32, 'changes in node': np.int32, 'changes in stru': np.int32, 'special forms':np.int32,
                             'dislocation of': np.int32, 'exclusion of no': np.int32, 'no. of nodes in': np.int32})


data_dummies = pd.get_dummies(dataset, columns=feature_dummied)
X = data_dummies.drop(["class"], axis=1)

y = data_dummies['class']

kmeans = KMeans(n_clusters=8, init='k-means++', max_iter=2, n_init=9, random_state=0)
y_kmeans = kmeans.fit_predict(X)

centroids = kmeans.cluster_centers_
print("\nEtichette:")
print(kmeans.labels_)
#target_names = ['class 0', 'class 1', 'class 2','class 3'] target_names=target_names
print('\nClasification report:\n', classification_report(y, y_kmeans,labels=[1, 2, 3, 4] ))
print('\nConfussion matrix:\n', confusion_matrix(y, y_kmeans,labels=[1, 2, 3, 4]))

confusion_matrix = confusion_matrix(y, y_kmeans,labels=[1, 2, 3, 4])
df_cm = pd.DataFrame(confusion_matrix, index=[i for i in "1234"], columns=[i for i in "1234"])
plt.figure(figsize=(10, 7))
sns.set()
sns.heatmap(df_cm, annot=True)
plt.show()

average_precision = average_precision_score(y, y_kmeans )
precision, recall, _ = precision_recall_curve(y, y_kmeans )

# In matplotlib < 1.5, plt.fill_between does not have a 'step' argument
step_kwargs = ({'step': 'post'}
               if 'step' in signature(plt.fill_between).parameters
               else {})
plt.step(recall, precision, color='b', alpha=0.2,
         where='post')
plt.fill_between(recall, precision, alpha=0.2, color='b', **step_kwargs)  # Riempi l'area tra due curve orizzontali.

plt.xlabel('Recall')
plt.ylabel('Precision')
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])
plt.title('2-class Precision-Recall curve: AP={0:0.2f}'.format(average_precision))
plt.show()

ошибка

 Traceback (most recent call last):
  File "c:/Users/Administrator/Desktop/progetto ic/KMeans1.py", line 56, in <module>
    average_precision = average_precision_score(y, y_kmeans )
  File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\metrics\_ranking.py", line 215, in average_precision_score
    average, sample_weight=sample_weight)
  File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\metrics\_base.py", line 74, in _average_binary_score
    raise ValueError("{0} format is not supported".format(y_type))
ValueError: multiclass format is not supported
...