У меня проблема с kmeans иification_report, потому что у меня мультикласс. Я пытаюсь проверить алгоритм Кеманса на лимфографии набора данных, но в классификационном отчете также приводится класс 0, которого нет в наборе данных сайта: https://archive.ics.uci.edu/ml/datasets/Lymphography
feature = ["class", "lymphatics", "block of affere", "bl. of lymph. c", "bl. of lymph. s", "by pass", "extravasates", "regeneration of", "early uptake in",
"lym.nodes dimin","lym.nodes enlar","changes in lym","defect in node","changes in node","changes in stru","special forms","dislocation of","exclusion of no","no. of nodes in"]
feature_dummied = ["lymphatics", "block of affere", "bl. of lymph. c", "bl. of lymph. s", "by pass", "extravasates", "regeneration of", "early uptake in",
"lym.nodes dimin","lym.nodes enlar","changes in lym","defect in node","changes in node","changes in stru","special forms","dislocation of","exclusion of no","no. of nodes in"]
dataset = pd.read_csv("lymphography.csv", sep=",", names=feature,
dtype={'class': np.int32, 'lymphatics': np.int32, 'block of affere': np.int32, 'bl. of lymph. c':np.int32,
'bl. of lymph. s': np.int32, 'by pass': np.int32, 'extravasates': np.int32, 'regeneration of': np.int32,
'early uptake in': np.int32, 'lym.nodes dimin': np.int32, 'lym.nodes enlar': np.int32, 'changes in lym': np.int32,
'defect in node': np.int32, 'changes in node': np.int32, 'changes in stru': np.int32, 'special forms':np.int32,
'dislocation of': np.int32, 'exclusion of no': np.int32, 'no. of nodes in': np.int32})
data_dummies = pd.get_dummies(dataset, columns=feature_dummied)
X = data_dummies.drop(["class"], axis=1)
y = data_dummies['class']
kmeans = KMeans(n_clusters=8, init='k-means++', max_iter=2, n_init=9, random_state=0)
y_kmeans = kmeans.fit_predict(X)
centroids = kmeans.cluster_centers_
print("\nEtichette:")
print(kmeans.labels_)
#target_names = ['class 0', 'class 1', 'class 2','class 3'] target_names=target_names
print('\nClasification report:\n', classification_report(y, y_kmeans,labels=[1, 2, 3, 4] ))
print('\nConfussion matrix:\n', confusion_matrix(y, y_kmeans,labels=[1, 2, 3, 4]))
confusion_matrix = confusion_matrix(y, y_kmeans,labels=[1, 2, 3, 4])
df_cm = pd.DataFrame(confusion_matrix, index=[i for i in "1234"], columns=[i for i in "1234"])
plt.figure(figsize=(10, 7))
sns.set()
sns.heatmap(df_cm, annot=True)
plt.show()
average_precision = average_precision_score(y, y_kmeans )
precision, recall, _ = precision_recall_curve(y, y_kmeans )
# In matplotlib < 1.5, plt.fill_between does not have a 'step' argument
step_kwargs = ({'step': 'post'}
if 'step' in signature(plt.fill_between).parameters
else {})
plt.step(recall, precision, color='b', alpha=0.2,
where='post')
plt.fill_between(recall, precision, alpha=0.2, color='b', **step_kwargs) # Riempi l'area tra due curve orizzontali.
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])
plt.title('2-class Precision-Recall curve: AP={0:0.2f}'.format(average_precision))
plt.show()
ошибка
Traceback (most recent call last):
File "c:/Users/Administrator/Desktop/progetto ic/KMeans1.py", line 56, in <module>
average_precision = average_precision_score(y, y_kmeans )
File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\metrics\_ranking.py", line 215, in average_precision_score
average, sample_weight=sample_weight)
File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\metrics\_base.py", line 74, in _average_binary_score
raise ValueError("{0} format is not supported".format(y_type))
ValueError: multiclass format is not supported