Вам нужно join
с to_frame
:
a = input_df.groupby(["FeatureID", "region"])["Target"].size().to_frame('New')
input_df = input_df.join(a, on=["FeatureID", "region"])
Образец :
import pandas as pd
from dask import dataframe as dd
input_df = pd.DataFrame({
'FeatureID':[4,5,4,5,5,4],
'region':list('aaabbb'),
'Target':[7,8,9,4,2,3],
})
print (input_df)
FeatureID region Target
0 4 a 7
1 5 a 8
2 4 a 9
3 5 b 4
4 5 b 2
5 4 b 3
sd = dd.from_pandas(input_df, npartitions=3)
print (sd)
FeatureID region Target
npartitions=3
0 int64 object int64
2 ... ... ...
4 ... ... ...
5 ... ... ...
Dask Name: from_pandas, 3 tasks
a = sd.groupby(["FeatureID", "region"])["Target"].size().to_frame('New')
out = sd.join(a, on=["FeatureID", "region"]).compute()
print (out)
FeatureID region Target New
0 4 a 7 2
1 5 a 8 1
2 4 a 9 2
3 5 b 4 2
4 5 b 2 2
5 4 b 3 1