С трудом пытаясь выяснить, куда поместить мой код электронной почты в мой веб-скребок, чтобы отправить мне по электронной почте файл csv, который я создал из очищенных данных. (Все еще плохо знакомый со всем этим, поэтому я изучаю, как и где разместить код в другом коде)
Я попытался запустить его после for l oop, но, похоже, это не правильно , Любая помощь будет высоко ценится.
import requests
from bs4 import BeautifulSoup
from csv import writer
from time import sleep
### email modules ###
import smtplib,ssl
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email.mime.text import MIMEText
from email.utils import formatdate
from email import encoders
all_names = []
base_url = "https://www.brownells.com/"
#this is the base url as stated below to help with the page scraper
url = "/optics-mounting/index.htm?f_a=1"
#put first page to scrape url here
#-------making the csv file and titles -------
with open("Brownells_Optics.csv", "w") as csv_file: #the "w" means to "write" the csv file
csv_writer = writer(csv_file)
csv_writer.writerow(["Title", "Price", "Item Url", "Image"])
#another loop for the pagination and everything below it has to be indented in it
while url:
res = requests.get(f"{base_url}{url}")
print(f"Now Scraping {base_url}{url}...")
#gives you feedback of what it is scraping when it is running it
soup = BeautifulSoup(res.text, "html.parser")
names = soup.findAll("div", {"class": "media listing"})
#the whole element that is selected
#looping through all of the names with a for loop
for name in names:
title = name.find(itemprop= "name").get_text()
#print(title) <--- each one of these were to see if it would print the right thing
price = name.find(itemprop= "lowPrice").text.strip()
#print(price)
item_url = name.find("a").get("href")
#can also be written name.find("a")["href"]
#print(item_url)
img = name.find("img").get("src")
#print(img)
#print(title, price, item_url, img) #doing this to make sure that all three things we are scraping is what we want
csv_writer.writerow([title, price, item_url, img])
#needs to be within and below names and above the scraping next page button code in order for it to be within the true statement
### Function to send the email ###
def send_an_email():
toaddr = 'who to send email to'
me = 'from email'
subject = "Put subject here"
msg = MIMEMultipart()
msg['Subject'] = subject
msg['From'] = me
msg['To'] = toaddr
msg.preamble = "test "
#msg.attach(MIMEText(text))
part = MIMEBase('application', "octet-stream")
part.set_payload(open("PUT FILE PATH HERE", "rb").read())
encoders.encode_base64(part)
part.add_header('Content-Disposition', 'attachment; filename="PUTFILEHERE.csv"')
msg.attach(part)
try:
s = smtplib.SMTP('smtp.gmail.com', 587)
s.ehlo()
s.starttls()
s.ehlo()
s.login(user = 'PUT EMAIL HERE', password = 'PUT EMAIL PASSWORD HERE')
#s.send_message(msg)
s.sendmail(me, toaddr, msg.as_string())
s.quit()
#except:
# print ("Error: unable to send email")
except SMTPException as error:
print ("Error")
send_an_email()
#scraping the next page button
next_btn = soup.find(class_="next").get("href")
url = next_btn if next_btn else None
#telling it to click the next page button and then if there is not one, it wont run anymore, making the text above false and stops it