In [68]: import string
...: import numpy as np
...: import pandas as pd
In [69]: M, N = 100, 100
...: labels = np.random.choice(['good', 'bad'], size=M)
...: ids = np.random.choice(list(string.ascii_lowercase), size=M)
...: features = np.empty((M,), dtype=object)
...: features[:] = list(map(list, np.random.randn(M, N)))
...: df = pd.DataFrame([labels, ids, features], index=['label', 'id', 'features']).T
...: df1 = df.copy()
In [70]: %%time
...: columns = [f"f{i:04d}" for i in range(N)]
...: features = pd.DataFrame(list(map(np.asarray, df1.pop('features').to_numpy())), index=df.index, columns=columns)
...: df1 = pd.concat([df1, features], axis=1)
Wall time: 13.9 ms
In [71]: M, N = 1000, 1000
...: labels = np.random.choice(['good', 'bad'], size=M)
...: ids = np.random.choice(list(string.ascii_lowercase), size=M)
...: features = np.empty((M,), dtype=object)
...: features[:] = list(map(list, np.random.randn(M, N)))
...: df = pd.DataFrame([labels, ids, features], index=['label', 'id', 'features']).T
...: df1 = df.copy()
In [72]: %%time
...: columns = [f"f{i:04d}" for i in range(N)]
...: features = pd.DataFrame(list(map(np.asarray, df1.pop('features').to_numpy())), index=df.index, columns=columns)
...: df1 = pd.concat([df1, features], axis=1)
Wall time: 627 ms
In [73]: df1.shape
Out[73]: (1000, 1002)
Редактировать: Примерно в 2 раза быстрее, чем оригинал
In [79]: df2 = df.copy()
In [80]: %%time
...: features = df2.pop('features')
...: for i in range(N):
...: df2[f'f{i:04d}'] = features.map(lambda x: x[i])
...:
Wall time: 1.46 s
In [81]: df1.equals(df2)
Out[81]: True
Редактировать: Редактировать: Более быстрый способ построения DataFrame дает 8-кратное улучшение по сравнению с оригиналом:
In [22]: df1 = df.copy()
In [23]: %%time
...: features = pd.DataFrame({f"f{i:04d}": np.asarray(row) for i, row in enumerate(df1.pop('features').to_numpy())})
...: df1 = pd.concat([df1, features], axis=1)
Wall time: 165 ms