Поскольку другой ответ не сохраняет мультииндекс, Для сохранения и вычисления используйте:
print(df_in)
Region EU ROW
Ind_code i01.a i01.b i02.1.a i03 i01.a i01.b i02.1.a i03
Region Prod_code
EU p01.a 5 9 6 3 4 7 7 3
p01.b 9 6 4 6 9 2 6 4
p02.1 7 1 4 6 4 2 7 4
p02.1 7 3 3 8 1 6 6 8
p02.1 2 4 7 8 9 5 5 3
p03 7 7 6 3 5 7 8 1
ROW p01.a 3 3 3 7 5 7 4 4
p01.b 8 8 1 4 4 3 3 3
p02.1 8 5 3 6 6 4 4 3
p02.1 8 1 3 5 5 5 6 5
p02.1 1 7 1 4 9 3 6 3
p03 3 6 1 5 1 8 4 1
ВЫХОД:
#getting level 1 values of multi-index
Prod_code = df_in.index.get_level_values(1)
#Assing these values to `Prod_code` column
df_in['Prod_code'] = Prod_code
#Setting the level 1 with values that contain duplicates
df_in.index.set_levels(Prod_code.str.slice(start=0,stop=5,step=1),
level=1,verify_integrity=False,inplace=True)
#Getting the values with only one duplicated value as per OP's condition
#Using groupby on level=0,1 of multi-index
level_0 = df_in.index.get_level_values(0)
level_1 = df_in.index.get_level_values(1)
valuestoset = df_in.groupby([level_0,level_1])['Prod_code'].first()
#Finding the sum on groupby object on level=0,1 of multi-index which contains duplicates
df_out = df_in.groupby([level_0,level_1]).sum()
#Finally setting the valuestoset to multi-index to preseve the order
df_out.index.set_levels(valuestoset,level=1,verify_integrity=False,inplace=True)
print(df_out)
Region EU ROW
Ind_code i01.a i01.b i02.1.a i03 i01.a i01.b i02.1.a i03
Region Prod_code
EU p01.a 5 9 6 3 4 7 7 3
p01.b 9 6 4 6 9 2 6 4
p02.1.a 16 8 14 22 14 13 18 15
p03 7 7 6 3 5 7 8 1
ROW p01.a 3 3 3 7 5 7 4 4
p01.b 8 8 1 4 4 3 3 3
p02.1.a 17 13 7 15 20 12 16 11
p03 3 6 1 5 1 8 4 1
Объяснение:
print(df_in.index.get_level_values(1))
Index(['p01.a', 'p01.b', 'p02.1.a', 'p02.1.b', 'p02.1.c', 'p03', 'p01.a',
'p01.b', 'p02.1.a', 'p02.1.b', 'p02.1.c', 'p03'],
dtype='object', name='Prod_code')
Prod_code = df_in.index.get_level_values(1)
df_in['Prod_code'] = index_col
df_in.index.set_levels(Prod_code.str.slice(start=0,stop=5,step=1),
level=1,verify_integrity=False,inplace=True)
print(df_in)
Region EU ROW Prod_code
Ind_code i01.a i01.b i02.1.a i03 i01.a i01.b i02.1.a i03
Region Prod_code
EU p01.a 5 9 6 3 4 7 7 3 p01.a
p01.b 9 6 4 6 9 2 6 4 p01.b
p02.1 7 1 4 6 4 2 7 4 p02.1.a
p02.1 7 3 3 8 1 6 6 8 p02.1.b
p02.1 2 4 7 8 9 5 5 3 p02.1.c
p03 7 7 6 3 5 7 8 1 p03
ROW p01.a 3 3 3 7 5 7 4 4 p01.a
p01.b 8 8 1 4 4 3 3 3 p01.b
p02.1 8 5 3 6 6 4 4 3 p02.1.a
p02.1 8 1 3 5 5 5 6 5 p02.1.b
p02.1 1 7 1 4 9 3 6 3 p02.1.c
p03 3 6 1 5 1 8 4 1 p03
df_in.groupby([level_0,level_1])['Prod_code'].first()
Region Prod_code
EU p01.a p01.a
p01.b p01.b
p02.1 p02.1.a
p03 p03
ROW p01.a p01.a
p01.b p01.b
p02.1 p02.1.a
p03 p03
Name: Prod_code, dtype: object
df_in.groupby([level_0,level_1]).sum()
Region EU ROW
Ind_code i01.a i01.b i02.1.a i03 i01.a i01.b i02.1.a i03
Region Prod_code
EU p01.a 5 9 6 3 4 7 7 3
p01.b 9 6 4 6 9 2 6 4
p02.1 16 8 14 22 14 13 18 15
p03 7 7 6 3 5 7 8 1
ROW p01.a 3 3 3 7 5 7 4 4
p01.b 8 8 1 4 4 3 3 3
p02.1 17 13 7 15 20 12 16 11
p03 3 6 1 5 1 8 4 1
df_out.index.set_levels(valuestoset,level=1,verify_integrity=False,inplace=True)
Region EU ROW
Ind_code i01.a i01.b i02.1.a i03 i01.a i01.b i02.1.a i03
Region Prod_code
EU p01.a 5 9 6 3 4 7 7 3
p01.b 9 6 4 6 9 2 6 4
p02.1.a 16 8 14 22 14 13 18 15
p03 7 7 6 3 5 7 8 1
ROW p01.a 3 3 3 7 5 7 4 4
p01.b 8 8 1 4 4 3 3 3
p02.1.a 17 13 7 15 20 12 16 11
p03 3 6 1 5 1 8 4 1