Почему процесс очистки создает дубликаты? - PullRequest
0 голосов
/ 02 января 2019

Когда я печатаю df_goalie_per.head (), я вижу, что у меня есть дубликаты по какой-то причине, не понимаю почему, какие-либо предложения?

Я также проверил другие DataFrames и обнаружил, что ТОЛЬКО df_goalie_это влияет на данные.Это связано с циклом?Пытался изменить порядок зацикливания, но безуспешно.

Код указан ниже:

#Importing Libraries 
import numpy as np
import pandas as pd
import requests
import json
from sklearn import preprocessing
from sklearn.preprocessing import OneHotEncoder


#Create Empty lists
player_id = {}
goalie_id = {}

person = []
position = []
skaterstats = []

goalie_person=[]
goalie_position=[]
goalie_stats=[]

team = []
team_goals = []
matchid = []

#Connect to NHL-API
for game_id in range(2017020001, 2017020002, 1):
    url = 'https://statsapi.web.nhl.com/api/v1/game/{}/feed/live'.format(game_id)
    r = requests.get(url)
    game_data = r.json()

#PLAYER SCRAPING
for homeaway in ['home','away']:
    player_dict = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('skaters')
    player_id[homeaway] = player_dict


    #Get PlayerStats/TeamStats
    for homeaway in player_id:
        for playerID in player_id[homeaway]:
            play_dict_teamname = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('team').get('name')
            play_dict_teamgoals = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('teamStats').get('teamSkaterStats').get('goals')
            play_dict_gameid = game_data.get('gamePk')

            play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('person')
            play_dict_position = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('position')
            play_dict_skaterstats = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('stats').get('skaterStats')

    #Append TeamStats to Empty list
            team.append(play_dict_teamname)
            team_goals.append(play_dict_teamgoals)
            matchid.append(play_dict_gameid)

    #Append PlayerStats to Empty list
            person.append(play_dict_person)
            position.append(play_dict_position)
            if play_dict_skaterstats: 
                skaterstats.append(play_dict_skaterstats)

#GOALIE SCRAPING
    for homeaway in ['home','away']:
        goalie_dict = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('goalies')
        goalie_id[homeaway] = goalie_dict

        #Get GoalieStats
        for homeaway in goalie_id:
            for goalieID in goalie_id[homeaway]:
                goal_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('person')
                goal_dict_position = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('position')
                goal_dict_stats = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('stats').get('goalieStats')


        #Append GoalieStats to Empty list
                goalie_person.append(goal_dict_person)
                goalie_position.append(goal_dict_position)
                if goal_dict_stats: 
                    goalie_stats.append(goal_dict_stats)


#Create DataFrames for all lists
df_person = pd.DataFrame(person)
df_position = pd.DataFrame(position)
df_skaterstats = pd.DataFrame(skaterstats)

df_team = pd.DataFrame(team)
df_teamgoals = pd.DataFrame(team_goals)
df_gameID = pd.DataFrame(matchid)

df_goalie_per = pd.DataFrame(goalie_person)
df_goalie_pos = pd.DataFrame(goalie_position)
df_goalie_stats = pd.DataFrame(goalie_stats)   

df_goalie_per.head()    

    fullName           id        link               rosterStatus    shootsCatches
0   Steve Mason     8473461 /api/v1/people/8473461  Y   R
1   Connor Hellebuyck   8476945 /api/v1/people/8476945  Y   L
2   Steve Mason     8473461 /api/v1/people/8473461  Y   R
3   Connor Hellebuyck  8476945  /api/v1/people/8476945  Y   L
4   Frederik Andersen  8475883  /api/v1/people/8475883  Y   L  
...