Я сталкиваюсь с несколькими ошибками, которые могут быть связаны с этим 1 конкретным .csv
.
Cannot cast array data from dtype('complex128') to dtype('float64') according to the rule 'safe'
Исправлен комплекс128
Это столбцы home_score
и away_score
.
'utf-8' codec can't decode byte 0xe9 in position 5: invalid continuation byte
Другая ошибка - электронная ошибка MontrΘal Canadiens
.
Ниже приведен сценарий.Я никогда не сталкивался с этой ошибкой раньше, и я уже использовал данные этих сайтов.
Интересно, придется ли мне экспортировать этот .csv другим способом, чтобы избавиться от двух ошибок.
import requests
from pprint import pprint as pp
import pandas as pd
import datetime
import pendulum
gamepk_list = []
list_of_outcomes = []
start = pendulum.datetime(2017, 10, 4)
end = pendulum.datetime(2017, 10, 8)
dates = pendulum.period(start, end)
for date in dates.range('days'):
day = date.format('DD')
month = date.format('MM')
year = date.format('YYYY')
the_date = str(year) + "-" + str(month) + "-" + str(day)
try:
schedule_api = requests.get('https://statsapi.web.nhl.com/api/v1/schedule?date=' + str(the_date) + '')
game_id_data = schedule_api.json()
game_id_dict = game_id_data['dates']
for ids in game_id_dict:
game = ids['games']
for i in game:
game_id = i['gamePk']
if game_id < 2017021722:
#gamepk_list.append(game_id)
outcome = []
print(game_id)
url = requests.get('http://statsapi.web.nhl.com/api/v1/game/' + str(game_id) + '/feed/live')
pbp_dict = url.json()
linescore = pbp_dict['liveData']['linescore']
outcome.append(pbp_dict['gamePk'])
outcome.append(pbp_dict['gameData']['game']['type'])
outcome.append(pbp_dict['gameData']['game']['season'])
outcome.append(the_date)
outcome.append(pbp_dict['liveData']['linescore']['teams']['home']['team']['id'])
outcome.append(pbp_dict['liveData']['linescore']['teams']['home']['team']['name'])
outcome.append(pbp_dict['liveData']['linescore']['teams']['home']['team']['abbreviation'])
outcome.append(pbp_dict['liveData']['linescore']['teams']['home']['goals'])
outcome.append(pbp_dict['liveData']['linescore']['teams']['away']['team']['id'])
outcome.append(pbp_dict['liveData']['linescore']['teams']['away']['team']['name'])
outcome.append(pbp_dict['liveData']['linescore']['teams']['away']['team']['abbreviation'])
outcome.append(pbp_dict['liveData']['linescore']['teams']['away']['goals'])
if pbp_dict['liveData']['linescore']['currentPeriod'] == 4:
outcome.append(1)
else:
outcome.append(0)
if pbp_dict['liveData']['linescore']['currentPeriod'] == 5:
outcome.append(1)
else:
outcome.append(0)
if pbp_dict['liveData']['linescore']['currentPeriod'] == 4:
try:
game_end_time = pbp_dict['liveData']['plays']['currentPlay']['about']['periodTime'].split(':')
seconds = int(game_end_time[0]) * 60 + int(game_end_time[1])
outcome.append(seconds)
except KeyError:
outcome.append(0)
elif pbp_dict['liveData']['linescore']['currentPeriod'] == 5:
outcome.append(300)
else:
outcome.append(0)
# seconds_in_ot
if outcome[7] > outcome[11]:
outcome.append(1)
else:
outcome.append(0)
list_of_outcomes.append(outcome)
except(KeyError,TypeError):
pass
df = pd.DataFrame(list_of_outcomes)
df.columns = ["game_id", "game_type", "season", "game_date",
"home_team_id", "home_team", "home_abbrev",
"home_score", "away_team_id", "away_team",
"away_abbrev", "away_score", "ot_flag",
"shootout_flag", "seconds_in_ot",
"home_win"]
df.to_csv('TEST_OUTCOME_error.csv', sep=',', float_format='%g', index = False)