Я очень новичок в Python и машинном обучении, ниже приведен мой код на Python 3, и я пишу код на Python в jupyter nottebook.
import random
def splitDataset(dataset, splitRatio):
trainSize = int(len(dataset) * splitRatio)
trainSet = []
copy = list(dataset)
while len(trainSet) < trainSize:
index = random.randrange(len(copy))
trainSet.append(copy.pop(index))
return [trainSet, testSet]
import csv
import sys
from langdetect import detect
import random
import math
def loadCsv(filename):
lines = csv.reader(open(filename, "r",encoding='latin1'))
x=0
myList=[]
for line in lines:
t=line[14]
try:
b = detect(t)
if b=="en":
myList.insert(x,t)
x=x+1
except Exception :
y=0
return myList
import nltk.classify.util
from nltk.classify import NaiveBayesClassifier
filename = 'F:\\Study\\Text Mining (GIT)\\sources\\Data.csv'
splitRatio = 0.8
loadCsv(filename)
trainingSet, testSet = splitDataset(myList, splitRatio)
classifier = nltk.NaiveBayesClassifier.train(trainingSet)
print (nltk.classify.util.accuracy(classifier, testSet))
classifier.show_most_informative_features()
После запуска кода abve я получаю следующееошибка
ValueError Traceback (most recent call last)
<ipython-input-206-75c0ffc409d5> in <module>()
10 print(len(testSet))
11
---> 12 classifier = nltk.NaiveBayesClassifier.train(trainingSet)
13 print (nltk.classify.util.accuracy(classifier, testSet))
14
f:\python\lib\site-packages\nltk\classify\naivebayes.py in train(cls,
labeled_featuresets, estimator)
195 # Count up how many times each feature value occurred, given
196 # the label and featurename.
--> 197 for featureset, label in labeled_featuresets:
198 label_freqdist[label] += 1
199 for fname, fval in featureset.items():
ValueError: too many values to unpack (expected 2)
trainingSet=[ "Pleasant 10 min walk along the sea front to the Water Bus. restaurants etc. Hotel was comfortable breakfast was good - quite a variety. Room aircon didn't work very well. Take mosquito repelant!", "Really lovely hotel. Stayed on the very top floor and were surprised by a Jacuzzi bath we didn't know we were getting! Staff were friendly and helpful and the included breakfast was great! Great location and great value for money. Didn't want to leave!", 'We stayed here for four nights in October. The hotel staff were welcoming, friendly and helpful. Assisted in booking tickets for the opera. The rooms were clean and comfortable- good shower, light and airy rooms with windows you could open wide. Beds were comfortable. Plenty of choice for breakfast.Spa at hotel nearby which we used while we were there.', 'We stayed here for four nights in October. The hotel staff were welcoming, friendly and helpful. Assisted in booking tickets for the opera. The rooms were clean and comfortable- good shower, light and airy rooms with windows you could open wide. Beds were comfortable. Plenty of choice for breakfast.Spa at hotel nearby which we used while we were there.',.....]
Я видел следующий сайт для решения, но не нашел решения: ValueError: слишком много значений для распаковки (классификатор NLTK)
NLTK ValueError: слишком много значений для распаковки (ожидается 2)
http://www.solutionscan.org/220106-python
ValueError: слишком много значений для распаковки (ожидается 2)
Точность NLTK: «Ошибка значения: слишком много значений для распаковки»