Я пытаюсь обучить модель обнаружению фальшивых новостей и пытаюсь сделать модель из множества слов. Однако, когда я пытаюсь соответствовать своей модели, я получаю эту ошибку:
Traceback (most recent call last):
File "/Users/amanpuranik/PycharmProjects/covid/fake news 2.py", line 89, in <module>
headline_bow.fit(lower)
File "/Users/amanpuranik/PycharmProjects/covid/venv/lib/python3.7/site-packages/sklearn/feature_extraction/text.py", line 1186, in fit
self.fit_transform(raw_documents)
File "/Users/amanpuranik/PycharmProjects/covid/venv/lib/python3.7/site-packages/sklearn/feature_extraction/text.py", line 1220, in fit_transform
self.fixed_vocabulary_)
File "/Users/amanpuranik/PycharmProjects/covid/venv/lib/python3.7/site-packages/sklearn/feature_extraction/text.py", line 1131, in _count_vocab
for feature in analyze(doc):
File "/Users/amanpuranik/PycharmProjects/covid/venv/lib/python3.7/site-packages/sklearn/feature_extraction/text.py", line 103, in _analyze
doc = preprocessor(doc)
File "/Users/amanpuranik/PycharmProjects/covid/venv/lib/python3.7/site-packages/sklearn/feature_extraction/text.py", line 68, in _preprocess
doc = doc.lower()
AttributeError: 'list' object has no attribute 'lower'
Я не уверен, почему я получаю эту ошибку. Это набор данных, который я пытаюсь уместить:
[['four', 'way', 'bob', 'corker', 'skewer', 'donald', 'trump'], ['linklat', "'s", 'war', 'veteran', 'comedi', 'speak', 'modern', 'america', ',', 'say', 'star'], ['trump', '’', 'fight', 'with', 'corker', 'jeopard', 'his', 'legisl', 'agenda']]
Вот остаток моего кода:
data = pd.read_csv("/Users/amanpuranik/Desktop/fake-news-detection/data.csv")
data = data[['Headline', "Label"]]
x = np.array(data['Headline'])
print(x[0])
y = np.array(data["Label"])
# tokenization of the data here'
headline_vector = []
for headline in x:
headline_vector.append(word_tokenize(headline))
print(headline_vector)
stopwords = set(stopwords.words('english'))
#removing stopwords at this part
filtered = [[word for word in sentence if word not in stopwords]
for sentence in headline_vector]
#print(filtered)
stemmed2 = [[stem(word) for word in headline] for headline in filtered]
#print(stemmed2)
#lowercase
lower = [[word.lower() for word in headline] for headline in stemmed2] #start here
#organising
articles = []
for headline in lower:
articles.append(headline)
#creating the bag of words model
headline_bow = CountVectorizer()
headline_bow.fit(lower)
a = headline_bow.transform(lower)
Почему это происходит и что я могу сделать, чтобы это исправить?