Python Ошибки многопроцессорного пула: импорт XGBoost - PullRequest
0 голосов
/ 03 мая 2020

Я пытаюсь выполнить многопроцессорный пул в pandas DataFrame, чтобы обновить некоторые данные через серию многопроцессорных запросов API. Я сталкиваюсь с ошибками импорта с помощью XGBoost (одной из библиотек, которую я использую), только при запуске пула многопроцессорной обработки.

Вот выдержка из ошибки:

2020-05-02 20:52:58,338 - WARNING - Traceback (most recent call last):
2020-05-02 20:52:58,338 - WARNING -   File "<string>", line 1, in <module>
2020-05-02 20:52:58,339 - WARNING -   File "C:\Users\Garett\AppData\Local\Programs\Python\Python37\lib\multiprocessing\spawn.py", line 105, in spawn_main
2020-05-02 20:52:58,339 - WARNING - exitcode = _main(fd)
2020-05-02 20:52:58,339 - WARNING -   File "C:\Users\Garett\AppData\Local\Programs\Python\Python37\lib\multiprocessing\spawn.py", line 114, in _main
2020-05-02 20:52:58,340 - WARNING - prepare(preparation_data)
2020-05-02 20:52:58,340 - WARNING -   File "C:\Users\Garett\AppData\Local\Programs\Python\Python37\lib\multiprocessing\spawn.py", line 225, in prepare
2020-05-02 20:52:58,340 - WARNING - _fixup_main_from_path(data['init_main_from_path'])
2020-05-02 20:52:58,340 - WARNING -   File "C:\Users\Garett\AppData\Local\Programs\Python\Python37\lib\multiprocessing\spawn.py", line 277, in _fixup_main_from_path
2020-05-02 20:52:58,341 - WARNING - run_name="__mp_main__")
2020-05-02 20:52:58,341 - WARNING -   File "C:\Users\Garett\AppData\Local\Programs\Python\Python37\lib\runpy.py", line 263, in run_path
2020-05-02 20:52:58,341 - WARNING - pkg_name=pkg_name, script_name=fname)
2020-05-02 20:52:58,342 - WARNING -   File "C:\Users\Garett\AppData\Local\Programs\Python\Python37\lib\runpy.py", line 96, in _run_module_code
2020-05-02 20:52:58,342 - WARNING - mod_name, mod_spec, pkg_name, script_name)
2020-05-02 20:52:58,342 - WARNING - Traceback (most recent call last):
2020-05-02 20:52:58,342 - WARNING -   File "C:\Users\Garett\AppData\Local\Programs\Python\Python37\lib\runpy.py", line 85, in _run_code
2020-05-02 20:52:58,342 - WARNING -   File "<string>", line 1, in <module>
2020-05-02 20:52:58,342 - WARNING - exec(code, run_globals)
2020-05-02 20:52:58,343 - WARNING -   File "D:\GitSpace\Financial\Stock-Analysis-ML\equity_finder\main.py", line 13, in <module>
2020-05-02 20:52:58,343 - WARNING -   File "C:\Users\Garett\AppData\Local\Programs\Python\Python37\lib\multiprocessing\spawn.py", line 105, in spawn_main
2020-05-02 20:52:58,343 - WARNING - from equity_finder.utils.model_loading_utils import save_model, load_model
2020-05-02 20:52:58,343 - WARNING -   File "D:\GitSpace\Financial\Stock-Analysis-ML\equity_finder\utils\model_loading_utils.py", line 5, in <module>
2020-05-02 20:52:58,343 - WARNING - exitcode = _main(fd)
2020-05-02 20:52:58,343 - WARNING -   File "C:\Users\Garett\AppData\Local\Programs\Python\Python37\lib\multiprocessing\spawn.py", line 114, in _main
2020-05-02 20:52:58,343 - WARNING - from equity_finder.modelgenerator.model_container import ModelContainer
2020-05-02 20:52:58,344 - WARNING -   File "D:\GitSpace\Financial\Stock-Analysis-ML\equity_finder\modelgenerator\model_container.py", line 9, in <module>
2020-05-02 20:52:58,344 - WARNING - prepare(preparation_data)
2020-05-02 20:52:58,344 - WARNING - from xgboost import XGBClassifier
2020-05-02 20:52:58,344 - WARNING -   File "C:\Users\Garett\AppData\Local\Programs\Python\Python37\lib\multiprocessing\spawn.py", line 225, in prepare
2020-05-02 20:52:58,344 - WARNING -   File "D:\GitSpace\Financial\Stock-Analysis-ML\.env\lib\site-packages\xgboost\__init__.py", line 11, in <module>
2020-05-02 20:52:58,344 - WARNING - _fixup_main_from_path(data['init_main_from_path'])
2020-05-02 20:52:58,344 - WARNING - from .core import DMatrix, Booster
2020-05-02 20:52:58,344 - WARNING -   File "C:\Users\Garett\AppData\Local\Programs\Python\Python37\lib\multiprocessing\spawn.py", line 277, in _fixup_main_from_path
2020-05-02 20:52:58,344 - WARNING -   File "D:\GitSpace\Financial\Stock-Analysis-ML\.env\lib\site-packages\xgboost\core.py", line 174, in <module>
2020-05-02 20:52:58,345 - WARNING - run_name="__mp_main__")
2020-05-02 20:52:58,345 - WARNING - _LIB = _load_lib()
2020-05-02 20:52:58,345 - WARNING -   File "C:\Users\Garett\AppData\Local\Programs\Python\Python37\lib\runpy.py", line 263, in run_path
2020-05-02 20:52:58,345 - WARNING -   File "D:\GitSpace\Financial\Stock-Analysis-ML\.env\lib\site-packages\xgboost\core.py", line 134, in _load_lib
2020-05-02 20:52:58,345 - WARNING - lib_paths = find_lib_path()
2020-05-02 20:52:58,345 - WARNING - pkg_name=pkg_name, script_name=fname)
2020-05-02 20:52:58,345 - WARNING -   File "D:\GitSpace\Financial\Stock-Analysis-ML\.env\lib\site-packages\xgboost\libpath.py", line 50, in find_lib_path
2020-05-02 20:52:58,346 - WARNING -   File "C:\Users\Garett\AppData\Local\Programs\Python\Python37\lib\runpy.py", line 96, in _run_module_code
2020-05-02 20:52:58,346 - WARNING - 'List of candidates:\n' + ('\n'.join(dll_path)))
2020-05-02 20:52:58,346 - WARNING - mod_name, mod_spec, pkg_name, script_name)
2020-05-02 20:52:58,346 - WARNING - xgboost.libpath
2020-05-02 20:52:58,346 - WARNING -   File "C:\Users\Garett\AppData\Local\Programs\Python\Python37\lib\runpy.py", line 85, in _run_code
2020-05-02 20:52:58,346 - WARNING - .
2020-05-02 20:52:58,346 - WARNING - XGBoostLibraryNotFound
2020-05-02 20:52:58,346 - WARNING - :
2020-05-02 20:52:58,346 - WARNING - exec(code, run_globals)
2020-05-02 20:52:58,346 - WARNING - Cannot find XGBoost Library in the candidate path, did you install compilers and run build.sh in root path?
2020-05-02 20:52:58,347 - WARNING -   File "D:\GitSpace\Financial\Stock-Analysis-ML\equity_finder\main.py", line 13, in <module>
2020-05-02 20:52:58,347 - WARNING - D:\GitSpace\Financial\Stock-Analysis-ML\.env\lib\site-packages\xgboost\xgboost.dll
2020-05-02 20:52:58,347 - WARNING - D:\GitSpace\Financial\Stock-Analysis-ML\.env\lib\site-packages\xgboost\../../lib/xgboost.dll
2020-05-02 20:52:58,347 - WARNING - from equity_finder.utils.model_loading_utils import save_model, load_model
2020-05-02 20:52:58,347 - WARNING - D:\GitSpace\Financial\Stock-Analysis-ML\.env\lib\site-packages\xgboost\./lib/xgboost.dll
2020-05-02 20:52:58,347 - WARNING -   File "D:\GitSpace\Financial\Stock-Analysis-ML\equity_finder\utils\model_loading_utils.py", line 5, in <module>
2020-05-02 20:52:58,347 - WARNING - C:\Users\Garett\AppData\Local\Programs\Python\Python37\xgboost\xgboost.dll
2020-05-02 20:52:58,347 - WARNING - D:\GitSpace\Financial\Stock-Analysis-ML\.env\lib\site-packages\xgboost\../../windows/x64/Release/xgboost.dll
2020-05-02 20:52:58,347 - WARNING - from equity_finder.modelgenerator.model_container import ModelContainer
2020-05-02 20:52:58,347 - WARNING - D:\GitSpace\Financial\Stock-Analysis-ML\.env\lib\site-packages\xgboost\./windows/x64/Release/xgboost.dll
2020-05-02 20:52:58,348 - WARNING -   File "D:\GitSpace\Financial\Stock-Analysis-ML\equity_finder\modelgenerator\model_container.py", line 9, in <module>
2020-05-02 20:52:58,348 - WARNING - from xgboost import XGBClassifier
2020-05-02 20:52:58,348 - WARNING -   File "D:\GitSpace\Financial\Stock-Analysis-ML\.env\lib\site-packages\xgboost\__init__.py", line 11, in <module>
2020-05-02 20:52:58,348 - WARNING - from .core import DMatrix, Booster
2020-05-02 20:52:58,348 - WARNING -   File "D:\GitSpace\Financial\Stock-Analysis-ML\.env\lib\site-packages\xgboost\core.py", line 174, in <module>
2020-05-02 20:52:58,349 - WARNING - _LIB = _load_lib()
2020-05-02 20:52:58,349 - WARNING -   File "D:\GitSpace\Financial\Stock-Analysis-ML\.env\lib\site-packages\xgboost\core.py", line 134, in _load_lib
2020-05-02 20:52:58,349 - WARNING - lib_paths = find_lib_path()
2020-05-02 20:52:58,349 - WARNING -   File "D:\GitSpace\Financial\Stock-Analysis-ML\.env\lib\site-packages\xgboost\libpath.py", line 50, in find_lib_path
2020-05-02 20:52:58,350 - WARNING - 'List of candidates:\n' + ('\n'.join(dll_path)))
2020-05-02 20:52:58,350 - WARNING - xgboost.libpath
2020-05-02 20:52:58,350 - WARNING - .

Ошибки продолжаются неопределенно, и тогда мой регистратор начинает отказывать из-за переполнения. Я думаю, что происходит то, что каждый процесс в пуле завершается сбоем, а затем начинается следующий, который завершается с ошибкой снова.

Точка входа моей программы защищена if __name__ == '__main__':

if __name__ == '__main__':
equity_finder: EquityFinder = EquityFinder()
equity_finder.equity_finder(sys.argv[1:], configuration.get())

Вот функция, которая делает распараллеливание. Он импортируется как субмодуль

from itertools import repeat

from multiprocessing import Pool, cpu_count

from typing import List, Callable

import numpy as np
import pandas as pd
from pandas import DataFrame


def parallelize_df_func(df: DataFrame, func: Callable, args: List):
    """
    This function is used to parallelize a function that is to be applied over a DataFrame
    Args:
        df (DataFrame):
        func (Callable):
        args (List):

    Returns:

    """
    # Leaving one core free to not freeze the machine
    num_cores = cpu_count() - 1
    df_split = np.array_split(df, num_cores)
    with Pool(cpu_count()) as pool:
        df = pd.concat(pool.starmap(func, zip(df_split, *[repeat(arg) for arg in args])))
    return df

Вот функция, которую я передаю для parallelize_df_fun c

def _replace_price_with_current(df: DataFrame, valid_price_dates: str) -> DataFrame:
    def _vectorized_replacement(ticker: str, current_price: float, dates: str) -> float:
        data = // API call redacted (I left API call out for privacy)
        if data.empty:
            return current_price
        return data.iloc[0].close
    return np.vectorize(_vectorized_replacement)(df['ticker'],
                                                 df['price'],
                                                 str(valid_price_dates))

Кто-нибудь сталкивался с этой проблемой раньше? Помощь будет высоко ценится. Спасибо!

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...