Я видел много вопросов по этой теме c, но ни один из них не содержит решения, которое у меня еще работает. Вот мой полный код:
import pandas as pd
import requests
import time
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
# Game Id
game_id = '0021900001'
# Headers for API Request
header_data = {
'Host': 'stats.nba.com',
'Connection': 'keep-alive',
'Cache-Control': 'max-age=0',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',
'Referer': 'stats.nba.com',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
}
###
### Calculate Players on court at the start of each period
###
# Build advanced boxscore url
def advanced_boxscore_url(game_id, start, end):
return 'https://stats.nba.com/stats/boxscoretraditionalv2/?gameId={0}&startPeriod=0&endPeriod=14&startRange={1}&endRange={2}&rangeType=2'.format(game_id, start, end)
# Helper functions
def calculate_time_at_period(period):
if period > 5:
return (720 * 4 + (period - 5) * (5 * 60)) * 10
else:
return (720 * (period - 1)) * 10
def split_subs(df, tag):
subs = df[[tag, 'PERIOD', 'EVENTNUM']]
subs['SUB'] = tag
subs.columns = ['PLAYER_ID', 'PERIOD', 'EVENTNUM', 'SUB']
return subs
def frame_to_row(df):
team1 = df['TEAM_ID'].unique()[0]
team2 = df['TEAM_ID'].unique()[1]
players1 = df[df['TEAM_ID'] == team1]['PLAYER_ID'].tolist()
players1.sort()
players2 = df[df['TEAM_ID'] == team2]['PLAYER_ID'].tolist()
players2.sort()
lst = [team1]
lst.append(players1)
lst.append(team2)
lst.append(players2)
return lst
# extracts data from api response
def extract_data(url):
print(url)
r = requests.get(url, headers=header_data)
resp = r.json()
results = resp['resultSets'][0]
headers = results['headers']
rows = results['rowSet']
frame = pd.DataFrame(rows)
frame.columns = headers
return frame
play_by_play = pd.read_csv('pbp_v3_0210.csv')
substitutionsOnly = play_by_play[play_by_play['EVENTMSGTYPE'] == 8][['PERIOD', 'EVENTNUM', 'PLAYER1_ID', 'PLAYER2_ID']]
substitutionsOnly.columns = ['PERIOD', 'EVENTNUM', 'OUT', 'IN']
subs_in = split_subs(substitutionsOnly, 'IN')
subs_out = split_subs(substitutionsOnly, 'OUT')
full_subs = pd.concat([subs_out, subs_in], axis=0).reset_index()[['PLAYER_ID', 'PERIOD', 'EVENTNUM', 'SUB']]
first_event_of_period = full_subs.loc[full_subs.groupby(by=['PERIOD', 'PLAYER_ID'])['EVENTNUM'].idxmin()]
players_subbed_in_at_each_period = first_event_of_period[first_event_of_period['SUB'] == 'IN'][
['PLAYER_ID', 'PERIOD', 'SUB']]
periods = players_subbed_in_at_each_period['PERIOD'].drop_duplicates().values.tolist()
rows = []
for period in periods:
low = calculate_time_at_period(period) + 5
high = calculate_time_at_period(period + 1) - 5
boxscore = advanced_boxscore_url(game_id, low, high)
# time.sleep(2)
boxscore_players = extract_data(boxscore)[['PLAYER_NAME', 'PLAYER_ID', 'TEAM_ID']]
boxscore_players['PERIOD'] = period
players_subbed_in_at_period = players_subbed_in_at_each_period[players_subbed_in_at_each_period['PERIOD'] == period]
joined_players = pd.merge(boxscore_players, players_subbed_in_at_period, on=['PLAYER_ID', 'PERIOD'], how='left')
joined_players = joined_players[pd.isnull(joined_players['SUB'])][['PLAYER_NAME', 'PLAYER_ID', 'TEAM_ID', 'PERIOD']]
row = frame_to_row(joined_players)
row.append(period)
rows.append(row)
players_on_court_at_start_of_period = pd.DataFrame(rows)
cols = ['TEAM_ID_1', 'TEAM_1_PLAYERS', 'TEAM_ID_2', 'TEAM_2_PLAYERS', 'PERIOD']
players_on_court_at_start_of_period.columns = cols
holder = "00219"
for x in range(2, 10):
time.sleep(2)
if x == 707:
continue
excess = ""
if(x < 10):
excess = "0000" + str(x)
elif(x < 100):
excess = "000" + str(x)
elif(x < 1000):
excess = "00" + str(x)
else:
excess = "0" + str(x)
holder = "00219" + excess
game_id = holder
holder_play_by_play = pd.read_csv('pbp_for_parsing')
substitutionsOnly = holder_play_by_play[holder_play_by_play['EVENTMSGTYPE'] == 8][
['PERIOD', 'EVENTNUM', 'PLAYER1_ID', 'PLAYER2_ID']]
substitutionsOnly.columns = ['PERIOD', 'EVENTNUM', 'OUT', 'IN']
subs_in = split_subs(substitutionsOnly, 'IN')
subs_out = split_subs(substitutionsOnly, 'OUT')
full_subs = pd.concat([subs_out, subs_in], axis=0).reset_index()[['PLAYER_ID', 'PERIOD', 'EVENTNUM', 'SUB']]
first_event_of_period = full_subs.loc[full_subs.groupby(by=['PERIOD', 'PLAYER_ID'])['EVENTNUM'].idxmin()]
players_subbed_in_at_each_period = first_event_of_period[first_event_of_period['SUB'] == 'IN'][
['PLAYER_ID', 'PERIOD', 'SUB']]
periods = players_subbed_in_at_each_period['PERIOD'].drop_duplicates().values.tolist()
rows = []
for period in periods:
low = calculate_time_at_period(period) + 5
high = calculate_time_at_period(period + 1) - 5
boxscore = advanced_boxscore_url(game_id, low, high)
boxscore_players = extract_data(boxscore)[['PLAYER_NAME', 'PLAYER_ID', 'TEAM_ID']]
boxscore_players['PERIOD'] = period
players_subbed_in_at_period = players_subbed_in_at_each_period[
players_subbed_in_at_each_period['PERIOD'] == period]
joined_players = pd.merge(boxscore_players, players_subbed_in_at_period, on=['PLAYER_ID', 'PERIOD'], how='left')
joined_players = joined_players[pd.isnull(joined_players['SUB'])][
['PLAYER_NAME', 'PLAYER_ID', 'TEAM_ID', 'PERIOD']]
row = frame_to_row(joined_players)
row.append(period)
rows.append(row)
holder_players_on_court_at_start_of_period = pd.DataFrame(rows)
cols = ['TEAM_ID_1', 'TEAM_1_PLAYERS', 'TEAM_ID_2', 'TEAM_2_PLAYERS', 'PERIOD']
holder_players_on_court_at_start_of_period.columns = cols
players_on_court_at_start_of_period = players_on_court_at_start_of_period.concat([players_on_court_at_start_of_period, holder_players_on_court_at_start_of_period], axis=0).reset_index()[['TEAM_ID_1', 'TEAM_1_PLAYERS', 'TEAM_ID_2', 'TEAM_2_PLAYERS', 'PERIOD']]
players_on_court_at_start_of_period.to_csv("onoff0210.csv", index=False)
Полное сообщение об ошибке, которое я получаю:
Traceback (most recent call last):
File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\urllib3\connectionpool.py", line 672, in urlopen
chunked=chunked,
File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\urllib3\connectionpool.py", line 421, in _make_request
six.raise_from(e, None)
File "<string>", line 3, in raise_from
File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\urllib3\connectionpool.py", line 416, in _make_request
httplib_response = conn.getresponse()
File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\http\client.py", line 1344, in getresponse
response.begin()
File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\http\client.py", line 306, in begin
version, status, reason = self._read_status()
File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\http\client.py", line 267, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\socket.py", line 589, in readinto
return self._sock.recv_into(b)
File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\urllib3\contrib\pyopenssl.py", line 318, in recv_into
raise SocketError(str(e))
OSError: (10060, 'WSAETIMEDOUT')
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\requests\adapters.py", line 449, in send
timeout=timeout
File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\urllib3\connectionpool.py", line 720, in urlopen
method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\urllib3\util\retry.py", line 400, in increment
raise six.reraise(type(error), error, _stacktrace)
File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\urllib3\packages\six.py", line 734, in reraise
raise value.with_traceback(tb)
File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\urllib3\connectionpool.py", line 672, in urlopen
chunked=chunked,
File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\urllib3\connectionpool.py", line 421, in _make_request
six.raise_from(e, None)
File "<string>", line 3, in raise_from
File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\urllib3\connectionpool.py", line 416, in _make_request
httplib_response = conn.getresponse()
File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\http\client.py", line 1344, in getresponse
response.begin()
File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\http\client.py", line 306, in begin
version, status, reason = self._read_status()
File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\http\client.py", line 267, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\socket.py", line 589, in readinto
return self._sock.recv_into(b)
File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\urllib3\contrib\pyopenssl.py", line 318, in recv_into
raise SocketError(str(e))
urllib3.exceptions.ProtocolError: ('Connection aborted.', OSError("(10060, 'WSAETIMEDOUT')"))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:/Users/xxxxx/PycharmProjects/NBAdata/V.3/On Off V3.py", line 100, in <module>
boxscore_players = extract_data(boxscore)[['PLAYER_NAME', 'PLAYER_ID', 'TEAM_ID']]
File "C:/Users/xxxxx/PycharmProjects/NBAdata/V.3/On Off V3.py", line 69, in extract_data
r = requests.get(url, headers=header_data)
File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\requests\api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\requests\api.py", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\requests\sessions.py", line 533, in request
resp = self.send(prep, **send_kwargs)
File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\requests\sessions.py", line 646, in send
r = adapter.send(request, **kwargs)
File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\requests\adapters.py", line 498, in send
raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', OSError("(10060, 'WSAETIMEDOUT')"))
Я не самый опытный с этим кодом, поэтому я пришел к переполнение стека, чтобы попытаться найти решение, некоторые из которых включают создание User-Agent (что я уже сделал), переключение расширенных настроек LAN на панели управления (даже не удалось найти расширенные настройки LAN, возможно, были удалены из Windows) , пытаясь использовать онлайн IDE (но ни один из найденных мною не позволяет мне импортировать csv и выводить мой код в csv после завершения), пытаясь добавить тайм-аут в мои request.get (который действительно только вызвал больше ошибок) и, возможно, даже некоторые другие, которые я сейчас забываю. Я также сделал кучу других файлов с похожими форматами и похожими целевыми URL, которые работали нормально. И да, мое inte rnet соединение полностью в порядке, все остальное работает гладко, включая другие Python файлы.
Это действительно вызвало препятствия в моем текущем проекте, и я не могу ничего продолжать, пока Я разрешаю это, так что если кто-нибудь может прийти с волшебным решением, которое было бы фантастическим c. Ожидаемый результат этого кода состоит в том, что он будет выполнять итерацию for для l oop вплоть до максимального параметра и выводить csv, который затем будет использоваться в качестве кадра данных для всех данных, которые я только что очистил.