Я работаю над проблемой анализа настроений в твиттере. Моя цель - собрать данные в CSV-файл, в три столбца в зависимости от настроения. После некоторых попыток скрипт работает (частично). Кажется, что настроение смотрит только на 1 символ за раз
Кроме того, после некоторого цикла оно прерывается и выдает сообщение об ошибке
import tweepy
from textblob import TextBlob
import pandas as pd
from plotly import __version__
import cufflinks as cf
from plotly.offline import download_plotlyjs,init_notebook_mode,plot,iplot
init_notebook_mode(connected=True)
cf.go_offline()
import matplotlib.pyplot as plt
from tweepy import Stream,StreamListener
import json,re,csv
consumer_key = 'xxxxxxxxxxxxxxxx'
consumer_key_secret = 'xxxxxxxxxxxxx'
access_token = 'xxxxxxxxxxxxxxxxxxx'
access_token_secret = 'xxxxxxxxxxxxxx'
auth = tweepy.OAuthHandler(consumer_key, consumer_key_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
pos1, neg1, neu1 = 0, 0, 0
header=['Positive','Negative','Neutral']
#Create an empty csv file which has three headers: 'Positive','Negative','Neutra'
with open('List.csv','w') as file:
write=csv.DictWriter(file,fieldnames=header)
write.writeheader()
class Listener(StreamListener):
def on_data(self, data):
raw_t=json.loads(data)
data=raw_t['text']
#four lines below will clear the tweets by removing: mentions, hash tag etc.
data = re.sub('@[A-Za-z0–9]+', '',data) #Removing @mentions
data = re.sub('#', '', data) # Removing '#' hash tag
data = re.sub('RT[\s]+', '', data) # Removing RT
data = re.sub('https?:\/\/\S+', '', data) # Removing hyperlink
global pos1
global neg1
global neu1
pos, neg, neu = 0, 0, 0
for tweet in data:
print(tweet)
analysis = TextBlob(tweet)
print(analysis.sentiment)
#the below if statement will count the number of tweets based on their sentiment('Positive','Negative','Neutra')
if analysis.sentiment[0]>0:
pos+=1
elif analysis.sentiment[0]<0:
neg+=1
else:
neu+=1
pos1=pos1+pos
neg1=neg1+neg
neu1=neu1+neu
#write the result from counting to the csv file "List.csv"
with open('List.csv', 'a') as file:
writer = csv.DictWriter(file, fieldnames=header)
info={
'Positive':pos1,
'Negative':neg1,
'Neutral':neu1
}
writer.writerow(info)
print(data)
return True
def on_error(self, status):
print(status)
l = Listener()
stream = Stream(auth, l)
stream.filter(track=['trump'])
:
<ipython-input-3-607cdfdcdd9b> in on_data(self, data)
12 raw_t=json.loads(data)
13
---> 14 data=raw_t['text']
15 #four lines below will clear the tweets by removing: metions, has tag etc.
16 data = re.sub('@[A-Za-z0–9]+', '',data) #Removing @mentions
KeyError: 'text'