У меня есть эта функция, которая сканирует фрейм данных pandas, но она очень медленная, я хотел бы получить некоторую помощь в том, как ее ускорить. Функции для каждой строки набора данных просматривают имя каждого столбца в списке (Candle_rankings), значение этой строки и столбца может быть от 0 до 100 или 100, а затем будут искать в списке dict для "силы" этого столбца, и он добавит это усиление к общему индексу, и я помещу метку самого сильного столбца в новый столбец.
def add_candle_rankings(_data):
candle_names = talib.get_function_groups()['Pattern Recognition']
from itertools import compress
_data['candlestick_pattern'] = np.nan
_data['candlestick_match_count'] = np.nan
_data['candlestick_match_count_bull'] = 0
_data['candlestick_match_count_bear'] = 0
_data['candlestick_points'] = 0
printProgressBar(0, len(_data)-1, prefix = 'Calculating Candle Rankings:', suffix = 'Complete', length = 50)
_progress=-1
for index, row in _data.iterrows():
_progress+=1
printProgressBar(_progress, len(_data)-1, prefix = 'Calculating Candle Rankings:', suffix = 'Complete', length = 50)
# no pattern found
if len(row[candle_names]) - sum(row[candle_names] == 0) == 0:
_data.loc[index,'candlestick_pattern'] = "" #"NO_PATTERN"
_data.loc[index, 'candlestick_match_count'] = 0
#_data['candlestick_match_count_bull'] = 0
#_data['candlestick_match_count_bear'] = 0
# single pattern found
elif len(row[candle_names]) - sum(row[candle_names] == 0) == 1:
# bull pattern 100 or 200
if any(row[candle_names].values > 0):
pattern = list(compress(row[candle_names].keys(), row[candle_names].values != 0))[0] + '_Bull'
_data.loc[index, 'candlestick_pattern'] = pattern
if candle_rankings.get(pattern):
_data.loc[index, 'candlestick_points'] = (len(candle_rankings)+1 - candle_rankings.get(pattern))
else:
pass
_data.loc[index, 'candlestick_match_count'] = 1
_data.loc[index, 'candlestick_match_count_bull'] = 1
# bear pattern -100 or -200
else:
pattern = list(compress(row[candle_names].keys(), row[candle_names].values != 0))[0] + '_Bear'
_data.loc[index, 'candlestick_pattern'] = pattern
if candle_rankings.get(pattern):
_data.loc[index, 'candlestick_points'] = -(len(candle_rankings)+1 - candle_rankings.get(pattern))
else:
pass
_data.loc[index, 'candlestick_match_count'] = 1
_data.loc[index, 'candlestick_match_count_bear'] = 1
# multiple patterns matched -- select best performance
else:
# filter out pattern names from bool list of values
patterns = list(compress(row[candle_names].keys(), row[candle_names].values != 0))
container = []
for pattern in patterns:
if row[pattern] > 0:
container.append(pattern + '_Bull')
_data.loc[index, 'candlestick_match_count_bull'] += 1
if candle_rankings.get(pattern + '_Bull'):
_data.loc[index, 'candlestick_points'] += (len(candle_rankings)+1 - candle_rankings.get(pattern + '_Bull'))
else:
pass
else:
container.append(pattern + '_Bear')
_data.loc[index, 'candlestick_match_count_bear'] += 1
if candle_rankings.get(pattern + '_Bear'):
_data.loc[index, 'candlestick_points'] += -(len(candle_rankings)+1 - candle_rankings.get(pattern + '_Bear'))
else:
pass
rank_list = [candle_rankings[p] for p in container]
if len(rank_list) == len(container):
rank_index_best = rank_list.index(min(rank_list))
_data.loc[index, 'candlestick_pattern'] = container[rank_index_best]
_data.loc[index, 'candlestick_match_count'] = len(container)
# clean up candle columns
_data.drop(candle_names, axis = 1, inplace = True)
_data['candlestick_index'] = _data['candlestick_match_count_bull'] -_data['candlestick_match_count_bear']
candle_rankings = {
"CDL3LINESTRIKE_Bull": 1,
"CDL3LINESTRIKE_Bear": 2,
.....
"CDLKICKINGBYLENGTH_Bull":108,
"CDLKICKINGBYLENGTH_Bear":108,
}