Как написать значения функции, чтобы преуспеть - PullRequest
1 голос
/ 05 июля 2019

Я сделал веб-мастер, который печатает цену, имя продавца и штрих-код продукта.

Однако я не могу понять, как записать данные значения, чтобы преуспеть.

from selenium import webdriver
import openpyxl
import time
import os  

wb = openpyxl.load_workbook('test.xlsx')
sheet = wb.active
cell = sheet['A1']
driver = webdriver.Chrome('C:\\Users\\NAME\\Desktop\\Webscraper\\chromedriver.exe')


#---------------------Base Bones----------------------------------------------
def start_browser():
    driver.get('https://www.google.com/shopping?hl=en');
    time.sleep(1)

def sku_selector(barcode):
    search_box = driver.find_element_by_xpath ('//*[@id="lst-ib"]')
    search_box.send_keys(barcode)
    search_box.submit()

def get_url():
    print(driver.current_url)

def repeat():
    driver.get('https://www.google.com/shopping?hl=en')

#----------------------------------------------------------------------------



#----------------------Main Scraper---------------------------------------------


def scraper():

    #Step 1: Selects the first product after search
    try:
        time.sleep(2)
        print('Looking for product....')
        product_select = driver.find_element_by_css_selector ('#rso > div.sh-sr__shop-result-group.BXIkFb > div > div:nth-child(1) > div > div.ZGFjDb > div > div.eIuuYe > a')
        product_select.click()
        print('Product found! Clicking it')

    #Step 2: clicks on "Compare Prices" to open Google sopping product page 

        time.sleep(2)
        print('Trying to open product page...')
        compare_shops = driver.find_element_by_css_selector ('#rso > div > div > div.pspo-popout.pspo-lpop > div > div > div > div.pspo-content > div.pspo-fade > div._-mc > div._-eo > div > a > div')
        compare_shops.click()
        print('Success! Opening the product')

    except:
     #Step 1: Selects the first pruduct after search------ needs more work
        time.sleep(2)
        print('Looking for product again')

        product_close = driver.find_element_by_css_selector ('#rso > div.sh-sr__shop-result-group.BXIkFb > div.sh-pr__product-results > div.pspo-popout.pspo-lpop > div > div > div > a')
        product_close.click()
        print('closing Product')

        time.sleep(2)
        product_select_2 = driver.find_element_by_css_selector ('#rso > div:nth-child(2) > div.sh-pr__product-results > div.sh-dlr__list-result > div > div.ZGFjDb > div > div.eIuuYe > a')
        product_select_2.click()
        print('Product found! Clicking it')

    #Step 2.1: clicks on "Compare Prices" to open Google sopping product page 
        time.sleep(2)
        print('Trying to open product page...')
        compare_shops_2 = driver.find_element_by_css_selector ('#rso > div:nth-child(2) > div.sh-pr__product-results > div.pspo-popout.pspo-lpop > div > div > div > div.pspo-content > div.pspo-fade > div._-mc > div._-eo > div > a')
        compare_shops_2.click()
        print('Success! Opening the product, using method 2')

    try:
        #Step 3: clicks on "Base Price" to sort via cheapest price
        print('Finding the lowest price')
        time.sleep(2)
        base_price = driver.find_element_by_css_selector ('#os-price-col-txt')
        base_price.click()

        #Step 4: gets the first price and prints text
        time.sleep(2)
        get_price = driver.find_element_by_xpath ('//*[@id="os-sellers-table"]/tbody/tr[2]/td[4]/span').text
        print(get_price)

        #Step 5: gets first seller name and prints text
        seller = driver.find_element_by_xpath ('//*[@id="os-sellers-table"]/tbody/tr[2]/td[1]/span/a').text
        print(seller)

    except:
        time.sleep(3)
        print('Could not find lowest price. Using data in the top right corner')
        #Step 4.1: Gets the price from the top right
        topright_price = driver.find_element_by_css_selector ('#bb-os-list > li:nth-child(1) > div.bb-price').text
        print(topright_price)
        #Step 5.1: Gets the seller name from the top right
        topright_seller = driver.find_element_by_css_selector ('#bb-os-list > li:nth-child(1) > div.bb-seller').text
        print(topright_seller)

    #Step 5: Gets GTIN number and prints text
    try:
        time.sleep(1)
        print('Trying to get GTIN')
        gtin = driver.find_element_by_css_selector ('#specs > div > div > div:nth-child(3) > span.gaBVed').text
        print('Barcode:')
        print(gtin)
    except:
        print('Unable to find GTIN')

    #Step 6: Gets Url and prints
    time.sleep(1)
    print('URL:')
    print(driver.current_url)





#Starting Script------------------------------------------------------------------------------------------------------



start_browser()

for row in range(1, sheet.max_row + 1):   


    try:
        sku_selector(sheet['A' + str(row)].value)
        print('Looking for SKU ' + sheet['A' + str(row)].value) 

        scraper()
        repeat()

        print('Success! Using next SKU')




    except:
        repeat()
        get_url()
        print('-------------------------------------------------')
        print('|     Criticall Error! Trying next sku          |')
        print('-------------------------------------------------')



print('Done! All skus have been scraped')

Если у кого-то есть идея, как мне включить в этот код письмо, чтобы преуспеть, пожалуйста, дайте мне знать.

Ответы [ 2 ]

0 голосов
/ 05 июля 2019

Попробуйте использовать xlsxwriter

import xlsxwriter

#Creating Excel sheet to export data
workbook = xlsxwriter.Workbook('C:/Users/Myuser/Desktop/DataExcel.xlsx') //replace with your desired path
excel_hashtag = workbook.add_worksheet('Sheet 1') //replace with your sheet name
excel_hashtag.write(0,0,"Your data to be printed") // enter desired row and column number to insert data with data to be printed in excel.

Дайте мне знать, если понадобится какое-либо разъяснение по этому вопросу

0 голосов
/ 05 июля 2019

чтобы сохранить ваши результаты в CSV-файле

with open('results.csv', "a", newline='') as fp:
    wr = csv.writer(fp, dialect='excel')
    wr.writerow([SKU1-14,60$,Amazon])

не забудьте вернуть список ваших результатов в функции scraper ()

scraper():
     ...
     results =[gtin,price,seller ...]
     return results
...