Как распараллелить гусеницу селена? - PullRequest
0 голосов
/ 03 мая 2019

Я не могу распараллелить этот код ..

Это для Youtube Crawler.Скорость driver.find_element медленная, поэтому я попытался распараллелить эти линии.Но я не смог.Есть ли другой возможный способ распараллеливания (или многопроцессорной обработки) этого кода?

from concurrent.futures import ProcessPoolExecutor as PoolExecutor
from selenium import webdriver

for add in addresses:
informations = []
driver.get(add)
time.sleep(1)
body = driver.find_element_by_tag_name("body")
body.send_keys(Keys.PAGE_DOWN)
time.sleep(2)
def get_informations(): # I want to parallelize this function 
    try :
        commentcount = driver.find_element_by_xpath("//*[@id='contents']/ytd-message-renderer").text
    except Exception:
        commentcount = driver.find_element_by_xpath("//*[@id='count']/yt-formatted-string").text
        num = len(informations) + 1
        viewcount = driver.find_element_by_xpath("//*[@id='count']/yt-view-count-renderer/span[1]").text
        date = driver.find_element_by_xpath("//*[@id='upload-info']/span").text
        likes = driver.find_element_by_xpath("//*[@id='top-level-buttons']/ytd-toggle-button-renderer[1]/a//*[@id='text']").get_attribute("aria-label")
        dislikes = driver.find_element_by_xpath("//*[@id='top-level-buttons']/ytd-toggle-button-renderer[2]/a//*[@id='text']").get_attribute("aria-label")

    else:
        num = len(informations) + 1
        viewcount = driver.find_element_by_xpath("//*[@id='count']/yt-view-count-renderer/span[1]").text
        date = driver.find_element_by_xpath("//*[@id='upload-info']/span").text
        likes = driver.find_element_by_xpath( "//*[@id='top-level-buttons']/ytd-toggle-button-renderer[1]/a//*[@id='text']").get_attribute("aria-label")
        dislikes = driver.find_element_by_xpath("//*[@id='top-level-buttons']/ytd-toggle-button-renderer[2]/a//*[@id='text']").get_attribute("aria-label")

    informations.append([num, viewcount, date, commentcount, likes, dislikes])

with PoolExecutor(max_workers=4) as executor:
    for _ in executor.map(get_informations, ?): # What should i write on ?
        pass
...