У меня есть код для мониторинга различных хэштегов из твиттера, когда появляется новый твит, я записываю его в файл CSV.Поскольку поток основан на событиях, я использую функцию хранилища, чтобы контролировать из другого потока.Мне нужно следить за подачей, найти новую строку и распечатать заявление, когда это произойдет.Прямо сейчас, когда я использую таймер, он не работает, файл блокируется.Ниже приведен мой код.
import twittercredentials
import csv
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import time
hashtag1 = "#a"
hashtag2 = "#b"
hashtag3= "#c"
# Open/create a file to append data to
#csvFile = open('result.csv', 'a')
#Use csv writer
csvFile = open('C:/code/IntelligentSuit/tweet.csv', 'a')
csvWriter = csv.writer(csvFile)
class listener(StreamListener):
# def on_data(self, data):
# tweet = data.split(',"text":"')[1].split('","source')[0]
# if(tweet):
# #replace with custom tag 1
# if ('#ToinasMakerRegresa' in tweet):
# print(tweet+"\n")
# time.sleep(5)
# #replace with custom tag 2
# elif(hashtag2 in tweet):
# print(tweet+"\n")
# time.sleep(5)
# #replace with custom tag 3
# elif(hashtag3 in tweet):
# print(tweet+"\n")
# time.sleep(5)
# return(True)
def on_status(self, status):
print 'Tweet text:' + status.text
try:
csvFile = open('C:/code/IntelligentSuit/tweet.csv', 'a')
#Use csv writer
#csvWriter = csv.writer(csvFile)
csvWriter.writerow([status.created_at, status.text.encode('utf-8')])
csvFile.close()
return(True)
except:
print("file is already open")
raise
def on_error(self, status_code):
print('Got an error with status code: ' + str(status_code))
return(True) # To continue listening
def on_timeout(self):
print('Timeout...')
return(True) # To continue listening
# def on_error(self, status):
# if status == 420:
# #returning False on_data method in case rate limit occurs
# return False
# print(status)
if __name__ == '__main__':
auth = OAuthHandler(twittercredentials.CONSUMER_KEY, twittercredentials.CONSUMER_SECRET)
auth.set_access_token(twittercredentials.ACCESS_TOKEN, twittercredentials.ACCESS_TOKEN_SECRET)
twitterStream = Stream(auth, listener())
#replace hashtags with custom tags below. ALSO: replace tags in lines #30, #48, and #66
print("tracking: " + hashtag1 + "," + hashtag2 + "," + hashtag3)
twitterStream.filter(track=[hashtag1, hashtag2, hashtag3])
print "does it ever go back here?"
Файл TwitterPoll
import time
import csv
import pandas as pd
import numpy
import threading
from IPython.display import display as ds
filename='C:/code/IntelligentSuit/tweet.csv'
df_in =pd.DataFrame()
try:
with open(filename,'rb') as fh:
df_in = pd.read_csv(filename)
print("old frame is:\n{}".format(df_in))
print("executing trhead")
except:
print("error")
def printdiffs():
find_diffs(df_in)
return True
def find_diffs(df_in):
df_latest = pd.read_csv(filename)
print("newdata frame is:\n{}".format(df_latest))
diff_df = pd.merge(df_latest, df_in, how='outer', indicator='Exist')
diff_df = diff_df.loc[diff_df['Exist'] != 'both']
print 'new dataframe size is' + str(df_in.size)
if diff_df.size >= 1:
print 'diff found' + str(diff_df.size)
print str(diff_df.size)
print df_latest.values[-1].tolist()
df_in = df_latest
return df_in
if __name__== '__main__':
i=0
while i < 10:
printdiffs()
time.sleep(10)
i = i +1
print "finished"
#threading.Timer(5.0, printdiffs()).start()
# i=0
# j=0
# last_tweet=""
# tweet_date=""
# tweet_text=""
# with open(filename, 'r') as f:
# for row in reversed(list(csv.reader(f))):
# if i==0:
# print(', '.join(row))
# i+=1
# if row == last_tweet:
# break
# else:
# for column in row:
# if j==0:
# if tweet_date=="":
# tweet_date=column
# print tweet_date
# if j==1:
# tweet_text=column
# print tweet_text
# j=+1
# if last_tweet == "":
# last_tweet = row
# print last_tweet
Этот код должен постоянно работать