как собрать python неограниченное количество самых старых твитов из множества хэштегов? (более 20 хэштегов) и назначить реальную дату публикации твитов?
Я пробовал приведенный ниже код, но результаты были самыми новыми, а дата публикации неверна (запишите текущее время сохранения твитов !)
import datetime
import json
import time
import tweepy
from google.cloud import pubsub_v1
from tweepy.streaming import StreamListener
topic_path = publisher.topic_path(" ", " ")
auth = tweepy.OAuthHandler("consumer_key", "consumer_secret")
auth.set_access_token("access_token", "access_token_secret")
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=False)
lst_hashtags = ["", "", "", "", "#Funny", "#Deeplearning"]
def write_to_pubsub(data):
try:
if data["lang"] == "en":
publisher.publish(topic_path, data=json.dumps({
"id_str": data["id_str"],
"id": data["id"],
"retweeted_id": data["retweeted_id"],
"favorite_count": data["favorite_count"],
"retweet_count": data["retweet_count"],
"coordinates_latitude": data["coordinates_latitude"],
"coordinates_longitude": data["coordinates_longitude"],
"place": data["place"],
"user_id": data["user_id"],
"user_name": data["user_name"],
"user_location": data["user_location"],
"user_url": data["user_url"],
"user_description": data["user_description"],
"text": data["text"],
"posted_at": datetime.datetime.fromtimestamp(data["created_at"]).strftime('%Y-%m-%d %H:%M:%S')
}).encode("utf-8"), tweet_id=str(data["id"]).encode("utf-8"))
except Exception as e:
raise
def reformat_tweet(tweet):
x = tweet
processed_doc = {
"id_str":x["id_str"],
"id": x["id"],
"lang": x["lang"],
"retweeted_id": x["retweeted_status"]["id"] if "retweeted_status" in x else None,
"favorite_count": x["favorite_count"] if "favorite_count" in x else 0,
"retweet_count": x["retweet_count"] if "retweet_count" in x else 0,
"coordinates_latitude": x["coordinates"]["coordinates"][0] if x["coordinates"] else 0,
"coordinates_longitude": x["coordinates"]["coordinates"][0] if x["coordinates"] else 0,
"place": x["place"]["country_code"] if x["place"] else None,
"user_id": x["user"]["id"],
"user_name": x["user"]["name"],
"user_screen_name": x["user"]["screen_name"],
"user_location": x["user"]["location"],
"user_url": x["user"]["url"],
"user_description": x["user"]["description"],
"created_at": time.mktime(time.strptime(x["created_at"], "%a %b %d %H:%M:%S +0000 %Y"))
}
if x["entities"]["hashtags"]:
processed_doc["hashtags"] = [{"text": y["text"], "startindex": y["indices"][0]} for y in
x["entities"]["hashtags"]]
else:
processed_doc["hashtags"] = []
if x["entities"]["user_mentions"]:
processed_doc["usermentions"] = [{"screen_name": y["screen_name"], "startindex": y["indices"][0]} for y in
x["entities"]["user_mentions"]]
else:
processed_doc["usermentions"] = []
if "extended_tweet" in x:
processed_doc["text"] = x["extended_tweet"]["full_text"]
elif "full_text" in x:
processed_doc["text"] = x["full_text"]
else:
processed_doc["text"] = x["text"]
return processed_doc
class StdOutListener(StreamListener):
def __init__(self):
super(StdOutListener, self).__init__()
self._counter = 0
def on_status(self, data):
write_to_pubsub(reformat_tweet(data._json))
self._counter += 1
return True
Кто-нибудь может мне помочь, пожалуйста?
note Я использую облачную платформу Google и запускаю код по запросу s sh