Для L oop
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randint(0,10, (1000,3)),
columns=['col_A', 'col_B', 'col_C'])
s = pd.Series([20,0,300],
index=['col_A', 'col_B', 'col_C'])
for col, idx in s.iteritems():
df.loc[:idx, col] = -1
df
col_A col_B col_C
0 -999 -999 -999
1 -999 3 -999
2 -999 3 -999
3 -999 2 -999
4 -999 1 -999
.. ... ... ...
995 2 6 9
996 1 9 5
997 2 6 4
998 4 0 1
999 9 2 8
Pandas Применить
def f(c, s):
c[:s[c.name]] = -1
return c
df = df.apply(lambda c: f(c,s))
df
col_A col_B col_C
0 -1 6 -1
1 -1 1 -1
2 -1 6 -1
3 -1 1 -1
4 -1 6 -1
.. ... ... ...
995 2 4 3
996 2 0 0
997 8 5 7
998 3 5 5
999 5 7 7
Производительность В моя локальная машина, использующая N = 1000, для l oop немного быстрее. Увеличение N до 1M, apply
метод быстрее:
def for_loop(N):
df = pd.DataFrame(np.random.randint(0,10, (N,3)),
columns=['col_A', 'col_B', 'col_C'])
for col, idx in s.iteritems():
df.loc[:idx, col] = -1
return df
def apply_method(N):
def f(c, s):
c[:s[c.name]] = -1
return c
return df.apply(lambda c: f(c,s))
%timeit for_loop(1000)
1.19 ms ± 58.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
%timeit apply_method(1000)
185 ms ± 44.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
%timeit for_loop(1000_000)
303 ms ± 25.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
%timeit apply_method(1000_000)
162 ms ± 8.84 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)