Как получить твиты для загрузки в базу данных SQLite с помощью Python / Tweepy? - PullRequest
0 голосов
/ 20 марта 2020

Привет, поэтому у меня есть код, который транслирует твиты из API Twitter и выполняет анализ настроений на них в виде файла CSV. Я хочу, чтобы твиты загружались в базу данных SQLite перед выполнением анализа настроений. Я неплохо справился с работой go и просто не могу понять это. Я использовал документацию https://docs.python.org/3/library/sqlite3.html и просмотрел множество сообщений в стеке ( твип-поток в базу данных sqlite - недопустимый синтаксис , Python твип-запись в sqlite3 db , tweepy-поток в базу данных sqlite - недопустимый synatx , Tweepy-to-sqlite3 ), но я все еще не могу заставить его делать то, что мне нужно. Я также посмотрел учебники здесь (https://tech.marksblogg.com/sqlite3-tutorial-and-guide.html) (https://recycledrobot.co.uk/words/?sqlite) и до сих пор не могу правильно сформулировать утверждение INSERT. CREATE TABLE работает и создает файл SQLite, но я не могу сохранить свои твиты. Я также запускаю оператор CREATE TABLE только один раз, затем закомментирую его. Весь код ниже. Любая помощь высоко ценится!

import csv
import re
import sys
import tweepy
import matplotlib.pyplot as plt
from textblob import TextBlob
import sqlite3 as lite
# import models.py
global db
global cursor


class SentimentAnalysis:

    def __init__(self):
        self.tweets = [] # initialising empty list objects
        self.tweetText = []
        # self.api = tweepy.API(self.auth)

    # Change access details below to point to own application
    def download_data(self):
        # authenticating
        consumerKey = '***'
        consumerSecret = '***'
        accessToken = '***'
        accessTokenSecret = '***'
        auth = tweepy.OAuthHandler(consumerKey, consumerSecret)
        auth.set_access_token(accessToken, accessTokenSecret)
        api = tweepy.API(auth)

        # input for term to be searched and how many tweets to search
        searchTerm = input("Enter Keyword/Tag to search about: ")
        NoOfTerms = int(input("Enter how many tweets to search: "))

        # searching for tweets
        self.tweets = tweepy.Cursor(api.search, q=searchTerm, lang="en").items(NoOfTerms)

        # Open/create a file to append data to
        csvFile = open('result.csv', 'a')

        # Use csv writer
        csvWriter = csv.writer(csvFile)

        # creating variables to store info
        polarity = 0
        positive = 0
        wpositive = 0
        spositive = 0
        negative = 0
        wnegative = 0
        snegative = 0
        neutral = 0

        # iterating through tweets fetched
        for tweet in self.tweets:
            # Append to temp so that we can store in csv later. I use encode UTF-8
            self.tweetText.append(self.clean_tweet(tweet.text).encode('utf-8'))
            # print (tweet.text.translate(non_bmp_map))    #print tweet's text
            analysis = TextBlob(tweet.text)
            # print(analysis.sentiment)  # print tweet's polarity
            polarity += analysis.sentiment.polarity  # adding up polarities to find the average later

            if analysis.sentiment.polarity == 0:  # adding reaction of how people are reacting to find average later
                neutral += 1
            elif 0 < analysis.sentiment.polarity <= 0.3:
                wpositive += 1
            elif 0.3 < analysis.sentiment.polarity <= 0.6:
                positive += 1
            elif 0.6 < analysis.sentiment.polarity <= 1:
                spositive += 1
            elif -0.3 < analysis.sentiment.polarity <= 0:
                wnegative += 1
            elif -0.6 < analysis.sentiment.polarity <= -0.3:
                negative += 1
            elif -1 < analysis.sentiment.polarity <= -0.6:
                snegative += 1

        # Write to csv and close csv file
        csvWriter.writerow(self.tweetText)
        # csvFile.close()  # delete this for the insert SQL code bit

        con = lite.connect(r"C:\\Users\\Student User\\PycharmProjects\\DataScienceLabs\\tweets.db")
        cur = con.cursor()
        cur.execute('''CREATE TABLE tweets(created_at text, id_str text, text text)''')
        # commented out as models.py creates database

        cur.execute("INSERT INTO tweets(?, ?, ?)", (created_at,
                                                    id_str,
                                                    text))

        con.commit()

        # finding average of how people are reacting
        positive = self.percentage(positive, NoOfTerms)
        wpositive = self.percentage(wpositive, NoOfTerms)
        spositive = self.percentage(spositive, NoOfTerms)
        negative = self.percentage(negative, NoOfTerms)
        wnegative = self.percentage(wnegative, NoOfTerms)
        snegative = self.percentage(snegative, NoOfTerms)
        neutral = self.percentage(neutral, NoOfTerms)

        # finding average reaction
        polarity = polarity / NoOfTerms

        # printing out data
        print("How people are reacting on " + searchTerm + " by analyzing " + str(NoOfTerms) + " tweets.")
        print()
        print("General Report: ")

        if polarity == 0:
            print("Neutral")
        elif 0 < polarity <= 0.3:
            print("Weakly Positive")
        elif 0.3 < polarity <= 0.6:
            print("Positive")
        elif 0.6 < polarity <= 1:
            print("Strongly Positive")
        elif -0.3 < polarity <= 0:
            print("Weakly Negative")
        elif -0.6 < polarity <= -0.3:
            print("Negative")
        elif -1 < polarity <= -0.6:
            print("Strongly Negative")

        print()
        print("Detailed Report: ")
        print(str(positive) + "% people thought it was positive")
        print(str(wpositive) + "% people thought it was weakly positive")
        print(str(spositive) + "% people thought it was strongly positive")
        print(str(negative) + "% people thought it was negative")
        print(str(wnegative) + "% people thought it was weakly negative")
        print(str(snegative) + "% people thought it was strongly negative")
        print(str(neutral) + "% people thought it was neutral")

        self.plotPieChart(positive, wpositive, spositive, negative, wnegative, snegative, neutral, searchTerm,
                          NoOfTerms)

    def clean_tweet(self, tweet):
        # Remove links, special characters, etc., from tweet
        return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t]) | (\w +:\ / \ / \S +)", " ", tweet).split())

    # function to calculate percentage
    def percentage(self, part, whole):
        temp = 100 * float(part) / float(whole)
        return format(temp, '.2f')

    def plotPieChart(self, positive, wpositive, spositive, negative, wnegative, snegative, neutral, searchTerm, noOfSearchTerms):
        labels = ['Positive [' + str(positive) + '%]', 'Weakly Positive [' + str(wpositive) + '%]','Strongly Positive [' + str(spositive) + '%]', 'Neutral [' + str(neutral) + '%]',
                  'Negative [' + str(negative) + '%]', 'Weakly Negative [' + str(wnegative) + '%]', 'Strongly Negative [' + str(snegative) + '%]']
        sizes = [positive, wpositive, spositive, neutral, negative, wnegative, snegative]
        colors = ['yellowgreen','lightgreen','darkgreen', 'gold', 'red','lightsalmon','darkred']
        patches, texts = plt.pie(sizes, colors=colors, startangle=90)
        plt.legend(patches, labels, loc="best")
        plt.title('How people are reacting on ' + searchTerm + ' by analyzing ' + str(noOfSearchTerms) + ' Tweets.')
        plt.axis('equal')
        plt.tight_layout()
        plt.show()

    # def plt_histogram


if __name__== "__main__":
    # db_init()
    sa = SentimentAnalysis()
    sa.download_data()
...