Добавить DataFrame.dropna
после melt
для удаления пропущенных строк по столбцу value
:
dfn = pd.melt(df1, id_vars='ID', value_vars=['Article', 'Article_2']).dropna(subset=['value'])
dfn = dfn.pivot_table(index='ID',
columns=dfn.groupby('ID')['value'].cumcount().add(1),
values='value',
aggfunc='first').add_prefix('Article_').rename_axis(None, axis='index')
print (dfn)
Article_1 Article_2 Article_3 Article_4 Article_5
1 Banana Apple Pineapple Coconut Tropical
2 Apple Banana Coconut NaN NaN
3 Tomatoe Apple Coconut Pineapple NaN
4 Apple Coconut NaN NaN NaN
5 Apple Coconut NaN NaN NaN
При необходимости во всех столбцах используется немного измененная функция justify
:
dfn = pd.melt(df1, id_vars='ID', value_vars=['Article', 'Article_2'])
dfn = dfn.pivot_table(index='ID',
columns=dfn.groupby('ID')['value'].cumcount().add(1),
values='value',
aggfunc='first').add_prefix('Article_').rename_axis(None, axis='index')
#https://stackoverflow.com/a/44559180/2901002
def justify(a, invalid_val=0, axis=1, side='left'):
"""
Justifies a 2D array
Parameters
----------
A : ndarray
Input array to be justified
axis : int
Axis along which justification is to be made
side : str
Direction of justification. It could be 'left', 'right', 'up', 'down'
It should be 'left' or 'right' for axis=1 and 'up' or 'down' for axis=0.
"""
if invalid_val is np.nan:
mask = pd.notna(a)
else:
mask = a!=invalid_val
justified_mask = np.sort(mask,axis=axis)
if (side=='up') | (side=='left'):
justified_mask = np.flip(justified_mask,axis=axis)
out = np.full(a.shape, invalid_val, dtype=object)
if axis==1:
out[justified_mask] = a[mask]
else:
out.T[justified_mask.T] = a.T[mask.T]
return out
dfn = pd.DataFrame(justify(dfn.values, invalid_val=np.nan, axis=1, side='left'),
index=dfn.index, columns=dfn.columns)
print (dfn)
Article_1 Article_2 Article_3 Article_4 Article_5 Article_6
1 Banana Apple Pineapple Coconut Tropical NaN
2 Apple Banana Coconut NaN NaN NaN
3 Tomatoe Apple Coconut Pineapple NaN NaN
4 Apple Coconut NaN NaN NaN NaN
5 Apple Coconut NaN NaN NaN NaN