Я предлагаю использовать решение без петель с mask
и цепную логическую маску в numpy
путем широковещательной передачи:
df = pd.DataFrame({'close':[100] * 6,
'open':[4,5,4,5,np.nan,4],
'high':[np.nan,8,9,4,2,3],
'low':[1,3,5,7,np.nan,np.nan],
'change':[0,3,6,9,0,4],
'col':[np.nan]*6})
print (df)
change close col high low open
0 0 100 NaN NaN 1.0 4.0
1 3 100 NaN 8.0 3.0 5.0
2 6 100 NaN 9.0 5.0 4.0
3 9 100 NaN 4.0 7.0 5.0
4 0 100 NaN 2.0 NaN NaN
5 4 100 NaN 3.0 NaN 4.0
cols = ['open', 'high', 'low']
m = df[cols].isnull().values & (df['change'] == 0).values[:, None]
df[cols] = df[cols].mask(m, df['close'], axis=0)
#numpy alternative
#df[cols] = np.where(m, df['close'].values[:, None], df[cols])
print (df)
change close col high low open
0 0 100 NaN 100.0 1.0 4.0
1 3 100 NaN 8.0 3.0 5.0
2 6 100 NaN 9.0 5.0 4.0
3 9 100 NaN 4.0 7.0 5.0
4 0 100 NaN 2.0 100.0 100.0
5 4 100 NaN 3.0 NaN 4.0
Пояснение :
Существует цепочка проблем boolen DataFrame
с boolen Series
, ошибка получения:
m = df[cols].isnull() & (df['change'] == 0)
ValueError: operands could not be broadcast together with shapes (18,) (3,)
Решение в NumPy Broadcast :
print (df[cols].isnull().values)
[[False True False]
[False False False]
[False False False]
[False False False]
[ True False True]
[False False True]]
print ((df['change'] == 0).values)
[ True False False False True False]
Так чтонеобходимо создать массив N x 1:
print ((df['change'] == 0).values[:, None])
[[ True]
[False]
[False]
[False]
[ True]
[False]]
m = df[cols].isnull().values & (df['change'] == 0).values[:, None]
print (m)
[[False True False]
[False False False]
[False False False]
[False False False]
[ True False True]
[False False False]]