Я новичок в параллельных фьючерсах и мне действительно нужна помощь в правильной настройке. Я пытаюсь очистить страницы бейсбольной статистики и хотел бы ускорить процесс. У меня есть код, который работает без ошибок; однако кажется, что работает только один экземпляр, и он не совсем использует многопоточность, поскольку время обработки вообще не изменилось с обычного парсинга на многопоточность. Кроме того, я также вижу, что открыт только один браузер (не уверен, что это должно измениться или как это работает в фоновом режиме). Любая помощь будет принята с благодарностью!
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd
import time
import requests
import concurrent.futures
MAX_THREADS=30
t0 = time.time()
browser = webdriver.Chrome()
player_urls = ['http://www.fangraphs.com/statsplits.aspx?playerid=5677&position=C&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=5677&position=C&season=0&split=0.5',
'http://www.fangraphs.com/statsplits.aspx?playerid=7077&position=C&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=7077&position=C&season=0&split=0.5',
'http://www.fangraphs.com/statsplits.aspx?playerid=9256&position=OF&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=9256&position=OF&season=0&split=0.5',
'http://www.fangraphs.com/statsplits.aspx?playerid=11270&position=OF&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=11270&position=OF&season=0&split=0.5',
'http://www.fangraphs.com/statsplits.aspx?playerid=4087&position=OF&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=4087&position=OF&season=0&split=0.5',
'http://www.fangraphs.com/statsplits.aspx?playerid=5297&position=OF&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=5297&position=OF&season=0&split=0.5',
'http://www.fangraphs.com/statsplits.aspx?playerid=15640&position=OF&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=15640&position=OF&season=0&split=0.5',
'http://www.fangraphs.com/statsplits.aspx?playerid=15496&position=SS&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=15496&position=SS&season=0&split=0.5',
'http://www.fangraphs.com/statsplits.aspx?playerid=19844&position=3B&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=19844&position=3B&season=0&split=0.5',
'http://www.fangraphs.com/statsplits.aspx?playerid=13769&position=2B/SS&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=13769&position=2B/SS&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=10950&position=OF&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=10950&position=OF&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=15082&position=OF&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=15082&position=OF&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=19878&position=OF&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=19878&position=OF&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=1904&position=1B&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=1904&position=1B&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=8027&position=1B/DH&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=8027&position=1B/DH&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=9362&position=C&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=9362&position=C&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=9682&position=2B/3B/SS&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=9682&position=2B/3B/SS&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=14106&position=SS&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=14106&position=SS&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=19287&position=OF&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=19287&position=OF&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=639&position=3B&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=639&position=3B&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=10294&position=C&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=10294&position=C&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=11338&position=2B/3B&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=11338&position=2B/3B&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=16246&position=1B/DH&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=16246&position=1B/DH&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=14109&position=OF&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=14109&position=OF&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=3336&position=2B/3B&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=3336&position=2B/3B&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=7125&position=C&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=7125&position=C&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=6310&position=SS&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=6310&position=SS&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=15937&position=SS&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=15937&position=SS&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=3371&position=OF&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=3371&position=OF&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=7476&position=C&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=7476&position=C&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=17678&position=3B&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=17678&position=3B&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=7223&position=OF&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=7223&position=OF&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=12649&position=OF&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=12649&position=OF&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=15670&position=3B/OF&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=15670&position=3B/OF&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=17276&position=C&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=17276&position=C&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=5411&position=3B&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=5411&position=3B&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=13877&position=2B/3B/SS&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=13877&position=2B/3B/SS&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=5305&position=OF&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=5305&position=OF&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=2090&position=OF&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=2090&position=OF&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=17027&position=OF&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=17027&position=OF&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=5133&position=SS&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=5133&position=SS&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=5248&position=2B&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=5248&position=2B&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=14267&position=C&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=14267&position=C&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=8848&position=C&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=8848&position=C&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=15722&position=SS/OF&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=15722&position=SS/OF&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=15518&position=SS&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=15518&position=SS&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=6265&position=OF&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=6265&position=OF&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=10847&position=SS&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=10847&position=SS&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=1488&position=OF&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=1488&position=OF&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=17901&position=OF&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=17901&position=OF&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=14942&position=C&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=14942&position=C&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=5386&position=OF&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=5386&position=OF&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=1159&position=3B/SS&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=1159&position=3B/SS&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=13132&position=C&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=13132&position=C&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=13414&position=OF&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=13414&position=OF&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=14196&position=2B/3B/OF&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=14196&position=2B/3B/OF&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=12779&position=3B&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=12779&position=3B&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=6444&position=3B&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=6444&position=3B&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=3972&position=SS&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=3972&position=SS&season=0&split=0.5', 'http://www.fangraphs.com/statsplits.aspx?playerid=2918&position=OF&season=0&split=0.6', 'http://www.fangraphs.com/statsplits.aspx?playerid=2918&position=OF&season=0&split=0.5']
def get_stats(url):
browser.get(url)
standard = browser.find_element_by_css_selector("#standard").get_attribute('outerHTML')
soup = BeautifulSoup(standard, 'html.parser')
dfStandard = pd.read_html(str(soup))[0]
advanced = browser.find_element_by_css_selector("#advanced").get_attribute('outerHTML')
soup = BeautifulSoup(advanced, 'html.parser')
dfAdvanced = pd.read_html(str(soup))[0]
battedBall = browser.find_element_by_css_selector("#batted-ball").get_attribute('outerHTML')
soup = BeautifulSoup(battedBall, 'html.parser')
dfbattedBall = pd.read_html(str(soup))[0]
dfinitial = pd.merge(dfStandard, dfAdvanced, on=['Season', 'Handedness'], how='inner')
final = pd.merge(dfinitial, dfbattedBall, on=['Season', 'Handedness'], how='inner')
final.drop(['AVG_y'], axis=1, inplace=True)
def get_pages(urls):
threads = min(MAX_THREADS, len(player_urls))
with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor:
executor.map(get_stats, urls)
def main(urls):
t0 = time.time()
get_pages(player_urls)
t1 = time.time()
print(f"{t1-t0} seconds to download data.")
main(player_urls)
t1 = time.time()
print(t1-t0)
browser.quit()