Я пытаюсь запустить код, показанный ниже. Я пытаюсь подогнать классификатор к тренировочным данным, запустить его на тестовых данных и оценить его эффективность. ' run_model ' запускает наивный байесовский классификатор, ' get_metrics ' оценивает оценку точности и отчет о классификации.
training_corpus=["The Limmat flows out of the lake.",
"The bears are in the bear pit near the river.",
"The Rhône flows out of Lake Geneva.",
]
training_labels=["zurich",
"bern",
"geneva",
]
bow_training_features = vectorizer.fit_transform(training_corpus)
test_corpus = ['We saw the bears there.',
'We crossed the Rhône.',
'There is no lake.',
]
test_labels = ['bern',
'geneva',
'bern',
]
bow_test_features = vectorizer.fit_transform(test_corpus)
def show_overview(test_corpus, test_labels, predicted_labels):
df=pd.DataFrame()
df['test_corpus']= test_corpus
df['test_labels']= test_labels
df['predicted_labels']= predicted_labels
print (df.to_string())
def run_model(classifier,\
training_data, training_labels,\
test_data, test_labels):
# build model
classifier.fit(training_data, training_labels)
# predict using model
predictions = classifier.predict(test_data)
# evaluate model prediction performance
get_metrics(true_labels=test_labels,
predicted_labels=predictions)
return list(predictions)
def get_metrics(true_labels, predicted_labels):
from sklearn import metrics
import numpy as np
print ('Accuracy:', np.round(
metrics.accuracy_score(true_labels,
predicted_labels), 3))
from sklearn.metrics import classification_report
print(classification_report(true_labels, predicted_labels))
from sklearn.naive_bayes import MultinomialNB
mnb = MultinomialNB()
predicted_labels = run_model(classifier=mnb,\
training_data = bow_training_features, \
training_labels = training_labels, \
test_data = bow_test_features, \
test_labels = test_labels)
show_overview(test_corpus, test_labels, predicted_labels)
Я получаю следующую ошибку после выполнения
ValueError Traceback (most recent call last)
<ipython-input-59-a5a2d1fb46c6> in <module>
37 training_labels = training_labels, \
38 test_data = bow_test_features, \
---> 39 test_labels = test_labels)
40
41 show_overview(test_corpus, test_labels, predicted_labels)
ValueError: dimension mismatch
Я ожидаю такой вывод
Accuracy: 0.667
precision recall f1-score support
bern 1.00 0.50 0.67 2
geneva 1.00 1.00 1.00 1
zurich 0.00 0.00 0.00 0
accuracy 0.67 3
macro avg 0.67 0.50 0.56 3
weighted avg 1.00 0.67 0.78 3
test_corpus test_labels predicted_labels
0 We saw the bears there. bern bern
1 We crossed the Rhône. geneva geneva
2 There is no lake. bern zurich
Пожалуйста, помогите мне