Подход № 1
Вот векторизованный с NumPy -
def groupby_mean(a):
# Sort array by groupby column
b = a[a[:,0].argsort()]
# Get interval indices for the sorted groupby col
idx = np.flatnonzero(np.r_[True,b[:-1,0]!=b[1:,0],True])
# Get counts of each group and sum rows based on the groupings & hence averages
counts = np.diff(idx)
avg = np.add.reduceat(b[:,1:],idx[:-1],axis=0)/counts.astype(float)[:,None]
# Finally concatenate for the output in desired format
return np.c_[b[idx[:-1],0],avg]
Подход № 2
Еще один векторизованный, использующий matrix-multiplication
-
def groupby_mean_matmul(a):
unq = np.unique(a[:,0])
m = a[:,0,None] == unq
return np.c_[unq, m.T.dot(a[:,1:])/m.sum(0)[:,None].astype(float)]
Пробный прогон -
In [51]: a
Out[51]:
array([[1, 2, 3, 4],
[2, 2, 3, 4],
[1, 4, 5, 6],
[3, 2, 3, 4]])
In [52]: groupby_mean(a)
Out[52]:
array([[1., 3., 4., 5.],
[2., 2., 3., 4.],
[3., 2., 3., 4.]])