Python ускорение функции - PullRequest
0 голосов
/ 28 мая 2020

У меня есть эта функция, которая сканирует фрейм данных pandas, но она очень медленная, я хотел бы получить некоторую помощь в том, как ее ускорить. Функции для каждой строки набора данных просматривают имя каждого столбца в списке (Candle_rankings), значение этой строки и столбца может быть от 0 до 100 или 100, а затем будут искать в списке dict для "силы" этого столбца, и он добавит это усиление к общему индексу, и я помещу метку самого сильного столбца в новый столбец.

def add_candle_rankings(_data):

candle_names = talib.get_function_groups()['Pattern Recognition']
from itertools import compress
_data['candlestick_pattern'] = np.nan
_data['candlestick_match_count'] = np.nan
_data['candlestick_match_count_bull'] = 0
_data['candlestick_match_count_bear'] = 0
_data['candlestick_points'] = 0
printProgressBar(0, len(_data)-1, prefix = 'Calculating Candle Rankings:', suffix = 'Complete', length = 50)
_progress=-1
for index, row in _data.iterrows():
    _progress+=1
    printProgressBar(_progress, len(_data)-1, prefix = 'Calculating Candle Rankings:', suffix = 'Complete', length = 50)
    # no pattern found
    if len(row[candle_names]) - sum(row[candle_names] == 0) == 0:
        _data.loc[index,'candlestick_pattern'] = "" #"NO_PATTERN"
        _data.loc[index, 'candlestick_match_count'] = 0
        #_data['candlestick_match_count_bull'] = 0
        #_data['candlestick_match_count_bear'] = 0
    # single pattern found
    elif len(row[candle_names]) - sum(row[candle_names] == 0) == 1:
        # bull pattern 100 or 200
        if any(row[candle_names].values > 0):
            pattern = list(compress(row[candle_names].keys(), row[candle_names].values != 0))[0] + '_Bull'
            _data.loc[index, 'candlestick_pattern'] = pattern
            if candle_rankings.get(pattern):                    
                _data.loc[index, 'candlestick_points'] = (len(candle_rankings)+1 - candle_rankings.get(pattern))
            else:
                pass
            _data.loc[index, 'candlestick_match_count'] = 1
            _data.loc[index, 'candlestick_match_count_bull'] = 1
        # bear pattern -100 or -200
        else:
            pattern = list(compress(row[candle_names].keys(), row[candle_names].values != 0))[0] + '_Bear'
            _data.loc[index, 'candlestick_pattern'] = pattern
            if candle_rankings.get(pattern):                    
                _data.loc[index, 'candlestick_points'] = -(len(candle_rankings)+1 - candle_rankings.get(pattern))
            else:
                pass
            _data.loc[index, 'candlestick_match_count'] = 1
            _data.loc[index, 'candlestick_match_count_bear'] = 1
    # multiple patterns matched -- select best performance
    else:
        # filter out pattern names from bool list of values
        patterns = list(compress(row[candle_names].keys(), row[candle_names].values != 0))
        container = []
        for pattern in patterns:
            if row[pattern] > 0:
                container.append(pattern + '_Bull')
                _data.loc[index, 'candlestick_match_count_bull'] += 1
                if candle_rankings.get(pattern + '_Bull'):
                    _data.loc[index, 'candlestick_points'] += (len(candle_rankings)+1 - candle_rankings.get(pattern + '_Bull'))
                else:
                    pass
            else:
                container.append(pattern + '_Bear')
                _data.loc[index, 'candlestick_match_count_bear'] += 1
                if candle_rankings.get(pattern + '_Bear'):
                    _data.loc[index, 'candlestick_points'] += -(len(candle_rankings)+1 - candle_rankings.get(pattern + '_Bear'))
                else:
                    pass

        rank_list = [candle_rankings[p] for p in container]
        if len(rank_list) == len(container):
            rank_index_best = rank_list.index(min(rank_list))
            _data.loc[index, 'candlestick_pattern'] = container[rank_index_best]
            _data.loc[index, 'candlestick_match_count'] = len(container)
# clean up candle columns
_data.drop(candle_names, axis = 1, inplace = True)
_data['candlestick_index'] = _data['candlestick_match_count_bull'] -_data['candlestick_match_count_bear']

candle_rankings = {
    "CDL3LINESTRIKE_Bull": 1,
    "CDL3LINESTRIKE_Bear": 2,
    .....
    "CDLKICKINGBYLENGTH_Bull":108,
    "CDLKICKINGBYLENGTH_Bear":108,
}
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...