#Stuff needed to run
import requests
import urllib.request
import io
from bs4 import BeautifulSoup as soup
#Pick url, request it, save response, read response, soup it into variable
my_url = 'https://old.reddit.com/r/all/'
request = urllib.request.Request(my_url,headers={'User-Agent': 'your bot 0.1'})
response = urllib.request.urlopen(request)
page_html = response.read()
page_soup = soup(page_html, "html.parser")
#get all the posts, get one post, get all the authors, get one author
posts = page_soup.findAll("div", {"class": "top-matter"})
post = posts[0]
authors = page_soup.findAll("p", {"class":"tagline"})
author = authors[0]
#make filename, open to write, set the headers, write the headers,
filename = "redditAll.csv"
f = open(filename, "w")
headers = "Title of the post, Author of the post\n"
f.write(headers)
#for the post and author in posts and authors, get one of each, open the file & write it, repeat
for post, author in zip(posts, authors):
post_text = post.p.a.text.replace(",", " -")
username = author.a.text
with open(filename, "w", encoding="utf-8") as f:
f.write(post_text + "," + username + "\n")
#close the file
f.close()
После запуска этого кода и открытия файла csv в тексте есть только две ячейки.
Их должно быть больше двух, так как на reddit.com более двух сообщений. / r / all
Изменено это
for post, author in zip(posts, authors):
post_text = post.p.a.text.replace(",", " -")
username = author.a.text
with open(filename, "w", encoding="utf-8") as f:
f.write(post_text + "," + username + "\n")
На это
with open(filename, "w", encoding="utf-8") as f:
for post, author in zip(posts, authors):
post_text = post.p.a.text.replace(",", " -")
username = author.a.text
f.write(post_text + "," + username + "\n")