Я думаю, что вы ищете это. Надеюсь, это поможет.
import pandas as pd
data = dict()
data['T1_TeamID'] = [1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1,2,3]
data['score'] = [50,60,70,60,70,80,70,80,90,60,70,80,70,80,90,80,90,100,90,100,110]
data['Season'] = [2018,2018,2018,2018,2018,2018,2018,2018,2019,2019,2019,2019,2019,2019,2019,2019,2019,2019,2019,2019,2019,]
#create dataframe
df = pd.DataFrame(data)
print('df-----')
print(df)
#groupby
df_gr = df.groupby(['T1_TeamID','Season']).agg({'score':'mean'})
print('after group by-----')
print(df_gr)
#refer to this example if you want to retrieve a score
team_id = 2
year = 2019
to_search = df_gr.loc[(df_gr.index.get_level_values('T1_TeamID')==team_id)&(df_gr.index.get_level_values('Season')==year)]
print('the row to search-----')
print(to_search)
print('the value-----')
print(to_search['score'].iloc[0])
Вывод
df-----
T1_TeamID score Season
0 1 50 2018
1 2 60 2018
2 3 70 2018
3 1 60 2018
4 2 70 2018
5 3 80 2018
6 1 70 2018
7 2 80 2018
8 3 90 2019
9 1 60 2019
10 2 70 2019
11 3 80 2019
12 1 70 2019
13 2 80 2019
14 3 90 2019
15 1 80 2019
16 2 90 2019
17 3 100 2019
18 1 90 2019
19 2 100 2019
20 3 110 2019
after group by-----
score
T1_TeamID Season
1 2018 60
2019 75
2 2018 70
2019 85
3 2018 75
2019 94
the row to search-----
score
T1_TeamID Season
2 2019 85
the value-----
85
Edit2: Новое решение после того, как я понял ваш комментарий. Вам нужен текущий средний балл каждой команды. Вы можете сначала отсортировать фрейм данных, как я.
import pandas as pd
from numpy import nan as NaN
data = dict()
data['T1_TeamID'] = [1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1,2,3]
data['score'] = [50,60,70,60,70,80,70,80,90,60,70,80,70,80,90,80,90,100,90,100,110]
data['avg_score'] = [NaN,NaN,NaN,50,60,70,55,65,75,NaN,NaN,NaN,60,70,80,65,75,85,70,80,90]
data['Season'] = ['2018','2018','2018','2018','2018','2018','2018','2018','2019','2019','2019','2019','2019','2019','2019','2019','2019','2019','2019','2019','2019']
data['DayNum'] = ['1','1','1','2','2','2','3','3','3','1','1','1','2','2','2','3','3','3','4','4','4']
#create dataframe
df = pd.DataFrame(data)
df.sort_values(by=['T1_TeamID','Season','DayNum'],inplace=True)
print('df-----')
print(df)
#groupby
df_gr = df.groupby(['T1_TeamID'])['score'].expanding().mean()
print('after group by-----')
print(df_gr)
#tidying the output
print('final dataframe-----')
df['running_avg'] = pd.DataFrame(df_gr).reset_index()['score']
print(df)
Вывод
df-----
T1_TeamID score avg_score Season DayNum
0 1 50 NaN 2018 1
3 1 60 50.0 2018 2
6 1 70 55.0 2018 3
9 1 60 NaN 2019 1
12 1 70 60.0 2019 2
15 1 80 65.0 2019 3
18 1 90 70.0 2019 4
1 2 60 NaN 2018 1
4 2 70 60.0 2018 2
7 2 80 65.0 2018 3
10 2 70 NaN 2019 1
13 2 80 70.0 2019 2
16 2 90 75.0 2019 3
19 2 100 80.0 2019 4
2 3 70 NaN 2018 1
5 3 80 70.0 2018 2
11 3 80 NaN 2019 1
14 3 90 80.0 2019 2
8 3 90 75.0 2019 3
17 3 100 85.0 2019 3
20 3 110 90.0 2019 4
after group by-----
T1_TeamID
1 0 50.000000
3 55.000000
6 60.000000
9 60.000000
12 62.000000
15 65.000000
18 68.571429
2 1 60.000000
4 65.000000
7 70.000000
10 70.000000
13 72.000000
16 75.000000
19 78.571429
3 2 70.000000
5 75.000000
11 76.666667
14 80.000000
8 82.000000
17 85.000000
20 88.571429
Name: score, dtype: float64
final dataframe-----
T1_TeamID score avg_score Season DayNum running_avg
0 1 50 NaN 2018 1 50.000000
3 1 60 50.0 2018 2 60.000000
6 1 70 55.0 2018 3 68.571429
9 1 60 NaN 2019 1 70.000000
12 1 70 60.0 2019 2 75.000000
15 1 80 65.0 2019 3 75.000000
18 1 90 70.0 2019 4 82.000000
1 2 60 NaN 2018 1 55.000000
4 2 70 60.0 2018 2 62.000000
7 2 80 65.0 2018 3 60.000000
10 2 70 NaN 2019 1 70.000000
13 2 80 70.0 2019 2 78.571429
16 2 90 75.0 2019 3 76.666667
19 2 100 80.0 2019 4 85.000000
2 3 70 NaN 2018 1 60.000000
5 3 80 70.0 2018 2 65.000000
11 3 80 NaN 2019 1 72.000000
14 3 90 80.0 2019 2 70.000000
8 3 90 75.0 2019 3 65.000000
17 3 100 85.0 2019 3 80.000000
20 3 110 90.0 2019 4 88.571429
Edit3: Вот так я быстро создаю ваш столбец avg_score
. Я также исправил ваши неправильные вычисления для avg_score
для индексов 8 и 17
import pandas as pd
from numpy import nan as NaN
#create the data
data = dict()
data['T1_TeamID'] = [1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1,2,3]
data['score'] = [50,60,70,60,70,80,70,80,90,60,70,80,70,80,90,80,90,100,90,100,110]
data['Season'] = ['2018','2018','2018','2018','2018','2018','2018','2018','2019','2019','2019','2019','2019','2019','2019','2019','2019','2019','2019','2019','2019']
data['DayNum'] = ['1','1','1','2','2','2','3','3','3','1','1','1','2','2','2','3','3','3','4','4','4']
#create the dataframe
df = pd.DataFrame(data)
df.sort_values(by=['T1_TeamID','Season','DayNum'],inplace=True)
#groupby cumulative average and shift by one
temp = df.groupby(['T1_TeamID','Season'])['score'].expanding().mean().groupby(['T1_TeamID','Season']).shift()
#reset_index and renaming
temp = temp.reset_index().set_index('level_2')['score'].rename_axis(None)
df['avg_score'] = temp
print(df)
Вывод
T1_TeamID score Season DayNum avg_score
0 1 50 2018 1 NaN
3 1 60 2018 2 50.000000
6 1 70 2018 3 55.000000
9 1 60 2019 1 NaN
12 1 70 2019 2 60.000000
15 1 80 2019 3 65.000000
18 1 90 2019 4 70.000000
1 2 60 2018 1 NaN
4 2 70 2018 2 60.000000
7 2 80 2018 3 65.000000
10 2 70 2019 1 NaN
13 2 80 2019 2 70.000000
16 2 90 2019 3 75.000000
19 2 100 2019 4 80.000000
2 3 70 2018 1 NaN
5 3 80 2018 2 70.000000
11 3 80 2019 1 NaN
14 3 90 2019 2 80.000000
8 3 90 2019 3 85.000000
17 3 100 2019 3 86.666667
20 3 110 2019 4 90.000000