Я пытаюсь выполнить многопроцессорный пул в pandas DataFrame, чтобы обновить некоторые данные через серию многопроцессорных запросов API. Я сталкиваюсь с ошибками импорта с помощью XGBoost (одной из библиотек, которую я использую), только при запуске пула многопроцессорной обработки.
Вот выдержка из ошибки:
2020-05-02 20:52:58,338 - WARNING - Traceback (most recent call last):
2020-05-02 20:52:58,338 - WARNING - File "<string>", line 1, in <module>
2020-05-02 20:52:58,339 - WARNING - File "C:\Users\Garett\AppData\Local\Programs\Python\Python37\lib\multiprocessing\spawn.py", line 105, in spawn_main
2020-05-02 20:52:58,339 - WARNING - exitcode = _main(fd)
2020-05-02 20:52:58,339 - WARNING - File "C:\Users\Garett\AppData\Local\Programs\Python\Python37\lib\multiprocessing\spawn.py", line 114, in _main
2020-05-02 20:52:58,340 - WARNING - prepare(preparation_data)
2020-05-02 20:52:58,340 - WARNING - File "C:\Users\Garett\AppData\Local\Programs\Python\Python37\lib\multiprocessing\spawn.py", line 225, in prepare
2020-05-02 20:52:58,340 - WARNING - _fixup_main_from_path(data['init_main_from_path'])
2020-05-02 20:52:58,340 - WARNING - File "C:\Users\Garett\AppData\Local\Programs\Python\Python37\lib\multiprocessing\spawn.py", line 277, in _fixup_main_from_path
2020-05-02 20:52:58,341 - WARNING - run_name="__mp_main__")
2020-05-02 20:52:58,341 - WARNING - File "C:\Users\Garett\AppData\Local\Programs\Python\Python37\lib\runpy.py", line 263, in run_path
2020-05-02 20:52:58,341 - WARNING - pkg_name=pkg_name, script_name=fname)
2020-05-02 20:52:58,342 - WARNING - File "C:\Users\Garett\AppData\Local\Programs\Python\Python37\lib\runpy.py", line 96, in _run_module_code
2020-05-02 20:52:58,342 - WARNING - mod_name, mod_spec, pkg_name, script_name)
2020-05-02 20:52:58,342 - WARNING - Traceback (most recent call last):
2020-05-02 20:52:58,342 - WARNING - File "C:\Users\Garett\AppData\Local\Programs\Python\Python37\lib\runpy.py", line 85, in _run_code
2020-05-02 20:52:58,342 - WARNING - File "<string>", line 1, in <module>
2020-05-02 20:52:58,342 - WARNING - exec(code, run_globals)
2020-05-02 20:52:58,343 - WARNING - File "D:\GitSpace\Financial\Stock-Analysis-ML\equity_finder\main.py", line 13, in <module>
2020-05-02 20:52:58,343 - WARNING - File "C:\Users\Garett\AppData\Local\Programs\Python\Python37\lib\multiprocessing\spawn.py", line 105, in spawn_main
2020-05-02 20:52:58,343 - WARNING - from equity_finder.utils.model_loading_utils import save_model, load_model
2020-05-02 20:52:58,343 - WARNING - File "D:\GitSpace\Financial\Stock-Analysis-ML\equity_finder\utils\model_loading_utils.py", line 5, in <module>
2020-05-02 20:52:58,343 - WARNING - exitcode = _main(fd)
2020-05-02 20:52:58,343 - WARNING - File "C:\Users\Garett\AppData\Local\Programs\Python\Python37\lib\multiprocessing\spawn.py", line 114, in _main
2020-05-02 20:52:58,343 - WARNING - from equity_finder.modelgenerator.model_container import ModelContainer
2020-05-02 20:52:58,344 - WARNING - File "D:\GitSpace\Financial\Stock-Analysis-ML\equity_finder\modelgenerator\model_container.py", line 9, in <module>
2020-05-02 20:52:58,344 - WARNING - prepare(preparation_data)
2020-05-02 20:52:58,344 - WARNING - from xgboost import XGBClassifier
2020-05-02 20:52:58,344 - WARNING - File "C:\Users\Garett\AppData\Local\Programs\Python\Python37\lib\multiprocessing\spawn.py", line 225, in prepare
2020-05-02 20:52:58,344 - WARNING - File "D:\GitSpace\Financial\Stock-Analysis-ML\.env\lib\site-packages\xgboost\__init__.py", line 11, in <module>
2020-05-02 20:52:58,344 - WARNING - _fixup_main_from_path(data['init_main_from_path'])
2020-05-02 20:52:58,344 - WARNING - from .core import DMatrix, Booster
2020-05-02 20:52:58,344 - WARNING - File "C:\Users\Garett\AppData\Local\Programs\Python\Python37\lib\multiprocessing\spawn.py", line 277, in _fixup_main_from_path
2020-05-02 20:52:58,344 - WARNING - File "D:\GitSpace\Financial\Stock-Analysis-ML\.env\lib\site-packages\xgboost\core.py", line 174, in <module>
2020-05-02 20:52:58,345 - WARNING - run_name="__mp_main__")
2020-05-02 20:52:58,345 - WARNING - _LIB = _load_lib()
2020-05-02 20:52:58,345 - WARNING - File "C:\Users\Garett\AppData\Local\Programs\Python\Python37\lib\runpy.py", line 263, in run_path
2020-05-02 20:52:58,345 - WARNING - File "D:\GitSpace\Financial\Stock-Analysis-ML\.env\lib\site-packages\xgboost\core.py", line 134, in _load_lib
2020-05-02 20:52:58,345 - WARNING - lib_paths = find_lib_path()
2020-05-02 20:52:58,345 - WARNING - pkg_name=pkg_name, script_name=fname)
2020-05-02 20:52:58,345 - WARNING - File "D:\GitSpace\Financial\Stock-Analysis-ML\.env\lib\site-packages\xgboost\libpath.py", line 50, in find_lib_path
2020-05-02 20:52:58,346 - WARNING - File "C:\Users\Garett\AppData\Local\Programs\Python\Python37\lib\runpy.py", line 96, in _run_module_code
2020-05-02 20:52:58,346 - WARNING - 'List of candidates:\n' + ('\n'.join(dll_path)))
2020-05-02 20:52:58,346 - WARNING - mod_name, mod_spec, pkg_name, script_name)
2020-05-02 20:52:58,346 - WARNING - xgboost.libpath
2020-05-02 20:52:58,346 - WARNING - File "C:\Users\Garett\AppData\Local\Programs\Python\Python37\lib\runpy.py", line 85, in _run_code
2020-05-02 20:52:58,346 - WARNING - .
2020-05-02 20:52:58,346 - WARNING - XGBoostLibraryNotFound
2020-05-02 20:52:58,346 - WARNING - :
2020-05-02 20:52:58,346 - WARNING - exec(code, run_globals)
2020-05-02 20:52:58,346 - WARNING - Cannot find XGBoost Library in the candidate path, did you install compilers and run build.sh in root path?
2020-05-02 20:52:58,347 - WARNING - File "D:\GitSpace\Financial\Stock-Analysis-ML\equity_finder\main.py", line 13, in <module>
2020-05-02 20:52:58,347 - WARNING - D:\GitSpace\Financial\Stock-Analysis-ML\.env\lib\site-packages\xgboost\xgboost.dll
2020-05-02 20:52:58,347 - WARNING - D:\GitSpace\Financial\Stock-Analysis-ML\.env\lib\site-packages\xgboost\../../lib/xgboost.dll
2020-05-02 20:52:58,347 - WARNING - from equity_finder.utils.model_loading_utils import save_model, load_model
2020-05-02 20:52:58,347 - WARNING - D:\GitSpace\Financial\Stock-Analysis-ML\.env\lib\site-packages\xgboost\./lib/xgboost.dll
2020-05-02 20:52:58,347 - WARNING - File "D:\GitSpace\Financial\Stock-Analysis-ML\equity_finder\utils\model_loading_utils.py", line 5, in <module>
2020-05-02 20:52:58,347 - WARNING - C:\Users\Garett\AppData\Local\Programs\Python\Python37\xgboost\xgboost.dll
2020-05-02 20:52:58,347 - WARNING - D:\GitSpace\Financial\Stock-Analysis-ML\.env\lib\site-packages\xgboost\../../windows/x64/Release/xgboost.dll
2020-05-02 20:52:58,347 - WARNING - from equity_finder.modelgenerator.model_container import ModelContainer
2020-05-02 20:52:58,347 - WARNING - D:\GitSpace\Financial\Stock-Analysis-ML\.env\lib\site-packages\xgboost\./windows/x64/Release/xgboost.dll
2020-05-02 20:52:58,348 - WARNING - File "D:\GitSpace\Financial\Stock-Analysis-ML\equity_finder\modelgenerator\model_container.py", line 9, in <module>
2020-05-02 20:52:58,348 - WARNING - from xgboost import XGBClassifier
2020-05-02 20:52:58,348 - WARNING - File "D:\GitSpace\Financial\Stock-Analysis-ML\.env\lib\site-packages\xgboost\__init__.py", line 11, in <module>
2020-05-02 20:52:58,348 - WARNING - from .core import DMatrix, Booster
2020-05-02 20:52:58,348 - WARNING - File "D:\GitSpace\Financial\Stock-Analysis-ML\.env\lib\site-packages\xgboost\core.py", line 174, in <module>
2020-05-02 20:52:58,349 - WARNING - _LIB = _load_lib()
2020-05-02 20:52:58,349 - WARNING - File "D:\GitSpace\Financial\Stock-Analysis-ML\.env\lib\site-packages\xgboost\core.py", line 134, in _load_lib
2020-05-02 20:52:58,349 - WARNING - lib_paths = find_lib_path()
2020-05-02 20:52:58,349 - WARNING - File "D:\GitSpace\Financial\Stock-Analysis-ML\.env\lib\site-packages\xgboost\libpath.py", line 50, in find_lib_path
2020-05-02 20:52:58,350 - WARNING - 'List of candidates:\n' + ('\n'.join(dll_path)))
2020-05-02 20:52:58,350 - WARNING - xgboost.libpath
2020-05-02 20:52:58,350 - WARNING - .
Ошибки продолжаются неопределенно, и тогда мой регистратор начинает отказывать из-за переполнения. Я думаю, что происходит то, что каждый процесс в пуле завершается сбоем, а затем начинается следующий, который завершается с ошибкой снова.
Точка входа моей программы защищена if __name__ == '__main__':
if __name__ == '__main__':
equity_finder: EquityFinder = EquityFinder()
equity_finder.equity_finder(sys.argv[1:], configuration.get())
Вот функция, которая делает распараллеливание. Он импортируется как субмодуль
from itertools import repeat
from multiprocessing import Pool, cpu_count
from typing import List, Callable
import numpy as np
import pandas as pd
from pandas import DataFrame
def parallelize_df_func(df: DataFrame, func: Callable, args: List):
"""
This function is used to parallelize a function that is to be applied over a DataFrame
Args:
df (DataFrame):
func (Callable):
args (List):
Returns:
"""
# Leaving one core free to not freeze the machine
num_cores = cpu_count() - 1
df_split = np.array_split(df, num_cores)
with Pool(cpu_count()) as pool:
df = pd.concat(pool.starmap(func, zip(df_split, *[repeat(arg) for arg in args])))
return df
Вот функция, которую я передаю для parallelize_df_fun c
def _replace_price_with_current(df: DataFrame, valid_price_dates: str) -> DataFrame:
def _vectorized_replacement(ticker: str, current_price: float, dates: str) -> float:
data = // API call redacted (I left API call out for privacy)
if data.empty:
return current_price
return data.iloc[0].close
return np.vectorize(_vectorized_replacement)(df['ticker'],
df['price'],
str(valid_price_dates))
Кто-нибудь сталкивался с этой проблемой раньше? Помощь будет высоко ценится. Спасибо!