Когда я печатаю df_goalie_per.head (), я вижу, что у меня есть дубликаты по какой-то причине, не понимаю почему, какие-либо предложения?
Я также проверил другие DataFrames и обнаружил, что ТОЛЬКО df_goalie_это влияет на данные.Это связано с циклом?Пытался изменить порядок зацикливания, но безуспешно.
Код указан ниже:
#Importing Libraries
import numpy as np
import pandas as pd
import requests
import json
from sklearn import preprocessing
from sklearn.preprocessing import OneHotEncoder
#Create Empty lists
player_id = {}
goalie_id = {}
person = []
position = []
skaterstats = []
goalie_person=[]
goalie_position=[]
goalie_stats=[]
team = []
team_goals = []
matchid = []
#Connect to NHL-API
for game_id in range(2017020001, 2017020002, 1):
url = 'https://statsapi.web.nhl.com/api/v1/game/{}/feed/live'.format(game_id)
r = requests.get(url)
game_data = r.json()
#PLAYER SCRAPING
for homeaway in ['home','away']:
player_dict = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('skaters')
player_id[homeaway] = player_dict
#Get PlayerStats/TeamStats
for homeaway in player_id:
for playerID in player_id[homeaway]:
play_dict_teamname = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('team').get('name')
play_dict_teamgoals = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('teamStats').get('teamSkaterStats').get('goals')
play_dict_gameid = game_data.get('gamePk')
play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('person')
play_dict_position = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('position')
play_dict_skaterstats = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('stats').get('skaterStats')
#Append TeamStats to Empty list
team.append(play_dict_teamname)
team_goals.append(play_dict_teamgoals)
matchid.append(play_dict_gameid)
#Append PlayerStats to Empty list
person.append(play_dict_person)
position.append(play_dict_position)
if play_dict_skaterstats:
skaterstats.append(play_dict_skaterstats)
#GOALIE SCRAPING
for homeaway in ['home','away']:
goalie_dict = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('goalies')
goalie_id[homeaway] = goalie_dict
#Get GoalieStats
for homeaway in goalie_id:
for goalieID in goalie_id[homeaway]:
goal_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('person')
goal_dict_position = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('position')
goal_dict_stats = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('stats').get('goalieStats')
#Append GoalieStats to Empty list
goalie_person.append(goal_dict_person)
goalie_position.append(goal_dict_position)
if goal_dict_stats:
goalie_stats.append(goal_dict_stats)
#Create DataFrames for all lists
df_person = pd.DataFrame(person)
df_position = pd.DataFrame(position)
df_skaterstats = pd.DataFrame(skaterstats)
df_team = pd.DataFrame(team)
df_teamgoals = pd.DataFrame(team_goals)
df_gameID = pd.DataFrame(matchid)
df_goalie_per = pd.DataFrame(goalie_person)
df_goalie_pos = pd.DataFrame(goalie_position)
df_goalie_stats = pd.DataFrame(goalie_stats)
df_goalie_per.head()
fullName id link rosterStatus shootsCatches
0 Steve Mason 8473461 /api/v1/people/8473461 Y R
1 Connor Hellebuyck 8476945 /api/v1/people/8476945 Y L
2 Steve Mason 8473461 /api/v1/people/8473461 Y R
3 Connor Hellebuyck 8476945 /api/v1/people/8476945 Y L
4 Frederik Andersen 8475883 /api/v1/people/8475883 Y L