#scrape.py
import threading
from selenium import webdriver
from selenium.common.exceptions import *
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options
threadLocal = threading.local()
def get_driver():
browser = getattr(threadLocal, 'browser', None)
if browser is None:
chrome_options = Options()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument("--headless")
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument("--lang=en")
chrome_options.add_argument("--start-maximized")
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('useAutomationExtension', False)
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36")
chrome_options.binary_location = "/usr/bin/google-chrome"
browser = webdriver.Chrome(executable_path=r'/usr/local/bin/chromedriver', options=chrome_options)
setattr(threadLocal, 'browser', browser)
return browser
def run_scrape(link):
browser = get_driver()
browser.get(<link passed here>)
try:
#scrape process
except:
#other stuffs
#multiprocess.py
from scrape import run_scrape
from multiprocessing.pool import ThreadPool
if __name__ == '__main__':
start_time = time.time()
#links = list of links to be scraped
pool = ThreadPool(20)
results = pool.map(run_scrape, links)
print("Total Time Processed: "+"--- %s seconds ---" % (time.time() - start_time))
Хромедрайвер завершает работу после того, как рабочие закончили, теперь мой вопрос: есть ли экземпляры браузеров chrome, которые не выходили? Есть ли способ, которым я могу сохранить его также в теме локальных?