Я сделал веб-мастер, который печатает цену, имя продавца и штрих-код продукта.
Однако я не могу понять, как записать данные значения, чтобы преуспеть.
from selenium import webdriver
import openpyxl
import time
import os
wb = openpyxl.load_workbook('test.xlsx')
sheet = wb.active
cell = sheet['A1']
driver = webdriver.Chrome('C:\\Users\\NAME\\Desktop\\Webscraper\\chromedriver.exe')
#---------------------Base Bones----------------------------------------------
def start_browser():
driver.get('https://www.google.com/shopping?hl=en');
time.sleep(1)
def sku_selector(barcode):
search_box = driver.find_element_by_xpath ('//*[@id="lst-ib"]')
search_box.send_keys(barcode)
search_box.submit()
def get_url():
print(driver.current_url)
def repeat():
driver.get('https://www.google.com/shopping?hl=en')
#----------------------------------------------------------------------------
#----------------------Main Scraper---------------------------------------------
def scraper():
#Step 1: Selects the first product after search
try:
time.sleep(2)
print('Looking for product....')
product_select = driver.find_element_by_css_selector ('#rso > div.sh-sr__shop-result-group.BXIkFb > div > div:nth-child(1) > div > div.ZGFjDb > div > div.eIuuYe > a')
product_select.click()
print('Product found! Clicking it')
#Step 2: clicks on "Compare Prices" to open Google sopping product page
time.sleep(2)
print('Trying to open product page...')
compare_shops = driver.find_element_by_css_selector ('#rso > div > div > div.pspo-popout.pspo-lpop > div > div > div > div.pspo-content > div.pspo-fade > div._-mc > div._-eo > div > a > div')
compare_shops.click()
print('Success! Opening the product')
except:
#Step 1: Selects the first pruduct after search------ needs more work
time.sleep(2)
print('Looking for product again')
product_close = driver.find_element_by_css_selector ('#rso > div.sh-sr__shop-result-group.BXIkFb > div.sh-pr__product-results > div.pspo-popout.pspo-lpop > div > div > div > a')
product_close.click()
print('closing Product')
time.sleep(2)
product_select_2 = driver.find_element_by_css_selector ('#rso > div:nth-child(2) > div.sh-pr__product-results > div.sh-dlr__list-result > div > div.ZGFjDb > div > div.eIuuYe > a')
product_select_2.click()
print('Product found! Clicking it')
#Step 2.1: clicks on "Compare Prices" to open Google sopping product page
time.sleep(2)
print('Trying to open product page...')
compare_shops_2 = driver.find_element_by_css_selector ('#rso > div:nth-child(2) > div.sh-pr__product-results > div.pspo-popout.pspo-lpop > div > div > div > div.pspo-content > div.pspo-fade > div._-mc > div._-eo > div > a')
compare_shops_2.click()
print('Success! Opening the product, using method 2')
try:
#Step 3: clicks on "Base Price" to sort via cheapest price
print('Finding the lowest price')
time.sleep(2)
base_price = driver.find_element_by_css_selector ('#os-price-col-txt')
base_price.click()
#Step 4: gets the first price and prints text
time.sleep(2)
get_price = driver.find_element_by_xpath ('//*[@id="os-sellers-table"]/tbody/tr[2]/td[4]/span').text
print(get_price)
#Step 5: gets first seller name and prints text
seller = driver.find_element_by_xpath ('//*[@id="os-sellers-table"]/tbody/tr[2]/td[1]/span/a').text
print(seller)
except:
time.sleep(3)
print('Could not find lowest price. Using data in the top right corner')
#Step 4.1: Gets the price from the top right
topright_price = driver.find_element_by_css_selector ('#bb-os-list > li:nth-child(1) > div.bb-price').text
print(topright_price)
#Step 5.1: Gets the seller name from the top right
topright_seller = driver.find_element_by_css_selector ('#bb-os-list > li:nth-child(1) > div.bb-seller').text
print(topright_seller)
#Step 5: Gets GTIN number and prints text
try:
time.sleep(1)
print('Trying to get GTIN')
gtin = driver.find_element_by_css_selector ('#specs > div > div > div:nth-child(3) > span.gaBVed').text
print('Barcode:')
print(gtin)
except:
print('Unable to find GTIN')
#Step 6: Gets Url and prints
time.sleep(1)
print('URL:')
print(driver.current_url)
#Starting Script------------------------------------------------------------------------------------------------------
start_browser()
for row in range(1, sheet.max_row + 1):
try:
sku_selector(sheet['A' + str(row)].value)
print('Looking for SKU ' + sheet['A' + str(row)].value)
scraper()
repeat()
print('Success! Using next SKU')
except:
repeat()
get_url()
print('-------------------------------------------------')
print('| Criticall Error! Trying next sku |')
print('-------------------------------------------------')
print('Done! All skus have been scraped')
Если у кого-то есть идея, как мне включить в этот код письмо, чтобы преуспеть, пожалуйста, дайте мне знать.