Например, у вас есть df, как показано ниже
df=pd.DataFrame({
'key1':['a','a','b','b','a'],
'data1':[1,1,np.nan,1,2]
})
grouped=df['data1'].groupby(df['key1'])
grouped.size()# return length of value included the NaN value
Out[413]:
key1
a 3
b 2
Name: data1, dtype: int64
grouped.count()# not include the NaN , it will ignore np.nan in b
Out[414]:
key1
a 3
b 1
Name: data1, dtype: int64
grouped.nunique() # only return the real unique value(exclude NaN) , in a it will be 1 , 2 so return 2 , at b it will be NaN and 1 so return 1
Out[415]:
key1
a 2
b 1
Name: data1, dtype: int64