Привет, поэтому у меня есть код, который транслирует твиты из API Twitter и выполняет анализ настроений на них в виде файла CSV. Я хочу, чтобы твиты загружались в базу данных SQLite перед выполнением анализа настроений. Я неплохо справился с работой go и просто не могу понять это. Я использовал документацию https://docs.python.org/3/library/sqlite3.html и просмотрел множество сообщений в стеке ( твип-поток в базу данных sqlite - недопустимый синтаксис , Python твип-запись в sqlite3 db , tweepy-поток в базу данных sqlite - недопустимый synatx , Tweepy-to-sqlite3 ), но я все еще не могу заставить его делать то, что мне нужно. Я также посмотрел учебники здесь (https://tech.marksblogg.com/sqlite3-tutorial-and-guide.html) (https://recycledrobot.co.uk/words/?sqlite) и до сих пор не могу правильно сформулировать утверждение INSERT. CREATE TABLE работает и создает файл SQLite, но я не могу сохранить свои твиты. Я также запускаю оператор CREATE TABLE только один раз, затем закомментирую его. Весь код ниже. Любая помощь высоко ценится!
import csv
import re
import sys
import tweepy
import matplotlib.pyplot as plt
from textblob import TextBlob
import sqlite3 as lite
# import models.py
global db
global cursor
class SentimentAnalysis:
def __init__(self):
self.tweets = [] # initialising empty list objects
self.tweetText = []
# self.api = tweepy.API(self.auth)
# Change access details below to point to own application
def download_data(self):
# authenticating
consumerKey = '***'
consumerSecret = '***'
accessToken = '***'
accessTokenSecret = '***'
auth = tweepy.OAuthHandler(consumerKey, consumerSecret)
auth.set_access_token(accessToken, accessTokenSecret)
api = tweepy.API(auth)
# input for term to be searched and how many tweets to search
searchTerm = input("Enter Keyword/Tag to search about: ")
NoOfTerms = int(input("Enter how many tweets to search: "))
# searching for tweets
self.tweets = tweepy.Cursor(api.search, q=searchTerm, lang="en").items(NoOfTerms)
# Open/create a file to append data to
csvFile = open('result.csv', 'a')
# Use csv writer
csvWriter = csv.writer(csvFile)
# creating variables to store info
polarity = 0
positive = 0
wpositive = 0
spositive = 0
negative = 0
wnegative = 0
snegative = 0
neutral = 0
# iterating through tweets fetched
for tweet in self.tweets:
# Append to temp so that we can store in csv later. I use encode UTF-8
self.tweetText.append(self.clean_tweet(tweet.text).encode('utf-8'))
# print (tweet.text.translate(non_bmp_map)) #print tweet's text
analysis = TextBlob(tweet.text)
# print(analysis.sentiment) # print tweet's polarity
polarity += analysis.sentiment.polarity # adding up polarities to find the average later
if analysis.sentiment.polarity == 0: # adding reaction of how people are reacting to find average later
neutral += 1
elif 0 < analysis.sentiment.polarity <= 0.3:
wpositive += 1
elif 0.3 < analysis.sentiment.polarity <= 0.6:
positive += 1
elif 0.6 < analysis.sentiment.polarity <= 1:
spositive += 1
elif -0.3 < analysis.sentiment.polarity <= 0:
wnegative += 1
elif -0.6 < analysis.sentiment.polarity <= -0.3:
negative += 1
elif -1 < analysis.sentiment.polarity <= -0.6:
snegative += 1
# Write to csv and close csv file
csvWriter.writerow(self.tweetText)
# csvFile.close() # delete this for the insert SQL code bit
con = lite.connect(r"C:\\Users\\Student User\\PycharmProjects\\DataScienceLabs\\tweets.db")
cur = con.cursor()
cur.execute('''CREATE TABLE tweets(created_at text, id_str text, text text)''')
# commented out as models.py creates database
cur.execute("INSERT INTO tweets(?, ?, ?)", (created_at,
id_str,
text))
con.commit()
# finding average of how people are reacting
positive = self.percentage(positive, NoOfTerms)
wpositive = self.percentage(wpositive, NoOfTerms)
spositive = self.percentage(spositive, NoOfTerms)
negative = self.percentage(negative, NoOfTerms)
wnegative = self.percentage(wnegative, NoOfTerms)
snegative = self.percentage(snegative, NoOfTerms)
neutral = self.percentage(neutral, NoOfTerms)
# finding average reaction
polarity = polarity / NoOfTerms
# printing out data
print("How people are reacting on " + searchTerm + " by analyzing " + str(NoOfTerms) + " tweets.")
print()
print("General Report: ")
if polarity == 0:
print("Neutral")
elif 0 < polarity <= 0.3:
print("Weakly Positive")
elif 0.3 < polarity <= 0.6:
print("Positive")
elif 0.6 < polarity <= 1:
print("Strongly Positive")
elif -0.3 < polarity <= 0:
print("Weakly Negative")
elif -0.6 < polarity <= -0.3:
print("Negative")
elif -1 < polarity <= -0.6:
print("Strongly Negative")
print()
print("Detailed Report: ")
print(str(positive) + "% people thought it was positive")
print(str(wpositive) + "% people thought it was weakly positive")
print(str(spositive) + "% people thought it was strongly positive")
print(str(negative) + "% people thought it was negative")
print(str(wnegative) + "% people thought it was weakly negative")
print(str(snegative) + "% people thought it was strongly negative")
print(str(neutral) + "% people thought it was neutral")
self.plotPieChart(positive, wpositive, spositive, negative, wnegative, snegative, neutral, searchTerm,
NoOfTerms)
def clean_tweet(self, tweet):
# Remove links, special characters, etc., from tweet
return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t]) | (\w +:\ / \ / \S +)", " ", tweet).split())
# function to calculate percentage
def percentage(self, part, whole):
temp = 100 * float(part) / float(whole)
return format(temp, '.2f')
def plotPieChart(self, positive, wpositive, spositive, negative, wnegative, snegative, neutral, searchTerm, noOfSearchTerms):
labels = ['Positive [' + str(positive) + '%]', 'Weakly Positive [' + str(wpositive) + '%]','Strongly Positive [' + str(spositive) + '%]', 'Neutral [' + str(neutral) + '%]',
'Negative [' + str(negative) + '%]', 'Weakly Negative [' + str(wnegative) + '%]', 'Strongly Negative [' + str(snegative) + '%]']
sizes = [positive, wpositive, spositive, neutral, negative, wnegative, snegative]
colors = ['yellowgreen','lightgreen','darkgreen', 'gold', 'red','lightsalmon','darkred']
patches, texts = plt.pie(sizes, colors=colors, startangle=90)
plt.legend(patches, labels, loc="best")
plt.title('How people are reacting on ' + searchTerm + ' by analyzing ' + str(noOfSearchTerms) + ' Tweets.')
plt.axis('equal')
plt.tight_layout()
plt.show()
# def plt_histogram
if __name__== "__main__":
# db_init()
sa = SentimentAnalysis()
sa.download_data()