Это код, над которым я работаю
Вот код коротких обзоров фильмов.
documents = []
all_words = []
allowed_words_types = ['J']
for p in short_pos.split('\n'):
documents.append((p,"pos"))
words = word_tokenize(p)
pos = nltk.pos_tag(words)
for w in pos:
if w[1][0] in allowed_words_types:
all_words.append(w[0].lower())
for p in short_neg.split('\n'):
documents.append((p,"neg"))
words = word_tokenize(p)
pos = nltk.pos_tag(words)
for w in pos:
if w[1][0] in allowed_words_types:
all_words.append(w[0].lower())
all_words = nltk.FreqDist(all_words)
words_features = list(all_words.keys())[:5000]
def find_features(document):
words = word_tokenize(document)
features = {}
for w in words_features:
features[w] = (w in words)
return features
featuresets = [(find_features(rev),category) for (rev,category) in documents]
random.shuffle(featuresets)
print(len(featuresets))
training_set = featuresets[:100]
testing_set = featuresets[100:]
classifier = nltk.NaiveBayesClassifier.train(training_set)
Здесь я хочу вычислить Матрицу смешения и ROC
Я просто нахожу точность, но я не могу найти Рок и Матрицу смешения, это очень поможет любомуПомоги мне.спасибо.
print(" Original Naive Bayes Algo accuracy percent : ",(nltk.classify.accuracy(classifier,testing_set))*100)
MNB_classifier = SklearnClassifier(MultinomialNB())
MNB_classifier.train(training_set)
print("MNB_classifier accuracy percent : ",(nltk.classify.accuracy(MNB_classifier,testing_set))*100)
voted_classifier = VoteClassifier(classifier,
MNB_classifier)
def sentiment(text):
feats = find_features(text)
return voted_classifier.classify(feats),voted_classifier.confidence(feats)