Для l oop неправильно добавляются данные в CSV-файл - PullRequest
0 голосов
/ 06 февраля 2020

Я работаю над проблемой анализа настроений в твиттере. Моя цель - собрать данные в CSV-файл, в три столбца в зависимости от настроения. После некоторых попыток скрипт работает (частично). Кажется, что настроение смотрит только на 1 символ за раз

Кроме того, после некоторого цикла оно прерывается и выдает сообщение об ошибке

import tweepy
from textblob import TextBlob
import pandas as pd
from plotly import __version__
import cufflinks as cf
from plotly.offline import download_plotlyjs,init_notebook_mode,plot,iplot
init_notebook_mode(connected=True)
cf.go_offline()
import matplotlib.pyplot as plt
from tweepy import Stream,StreamListener
import json,re,csv

consumer_key = 'xxxxxxxxxxxxxxxx'
consumer_key_secret = 'xxxxxxxxxxxxx'

access_token = 'xxxxxxxxxxxxxxxxxxx'
access_token_secret = 'xxxxxxxxxxxxxx'

auth = tweepy.OAuthHandler(consumer_key, consumer_key_secret)

auth.set_access_token(access_token, access_token_secret)

api = tweepy.API(auth)

    pos1, neg1, neu1 = 0, 0, 0
    header=['Positive','Negative','Neutral']
    #Create an empty csv file which has three headers: 'Positive','Negative','Neutra'
    with open('List.csv','w') as file:
        write=csv.DictWriter(file,fieldnames=header)
        write.writeheader()


    class Listener(StreamListener):

        def on_data(self, data):
            raw_t=json.loads(data)

            data=raw_t['text']
            #four lines below will clear the tweets by removing: mentions, hash tag etc.
            data = re.sub('@[A-Za-z0–9]+', '',data) #Removing @mentions
            data = re.sub('#', '', data) # Removing '#' hash tag
            data = re.sub('RT[\s]+', '', data) # Removing RT
            data = re.sub('https?:\/\/\S+', '', data) # Removing hyperlink

            global pos1 
            global neg1 
            global neu1
            pos, neg, neu = 0, 0, 0

            for tweet in data:
                print(tweet)
                analysis = TextBlob(tweet)
                print(analysis.sentiment)

                #the below if statement will count the number of tweets based on their sentiment('Positive','Negative','Neutra')
                if analysis.sentiment[0]>0:
                    pos+=1
                elif analysis.sentiment[0]<0:
                    neg+=1
                else:
                    neu+=1
            pos1=pos1+pos
            neg1=neg1+neg
            neu1=neu1+neu

            #write the result from counting to the csv file "List.csv"
            with open('List.csv', 'a') as file:
                writer = csv.DictWriter(file, fieldnames=header)
                info={
                    'Positive':pos1,
                    'Negative':neg1,
                    'Neutral':neu1
                      }
                writer.writerow(info)

            print(data)
            return True


        def on_error(self, status):
            print(status)


    l = Listener()
        stream = Stream(auth, l)
        stream.filter(track=['trump'])
:
 <ipython-input-3-607cdfdcdd9b> in on_data(self, data)
         12         raw_t=json.loads(data)
         13 
    ---> 14         data=raw_t['text']
         15         #four lines below will clear the tweets by removing: metions, has tag etc.
         16         data = re.sub('@[A-Za-z0–9]+', '',data) #Removing @mentions

    KeyError: 'text'
...