Я пытаюсь сохранить большой объект (~ 7 ГБ) в двоичном файле pickle с каждым параллельным процессом joblib.Тем не менее, Joblib вызывает MemoryError.
У меня достаточно оперативной памяти (256 ГБ) и памяти (4 ТБ).Я назначил joblib 12 ядер.Я проверил память, и память в порядке (более половины всей памяти остается пустым).
Код просто структурирован как
import pickle
from joblib import Parallel, delayed
def do_something(arg1, arg2):
...
pickle.dump(save_somthing, open('somefile.p','wb'), protocol=-1)
return 1
JobList = ['a1','b1','c1','d1',\
'a2','b2','c2','d2',\
'a3','b3','c3','d3']
arg2 = 'sth'
Parallel(n_jobs=12)(delayed(do_somthing)(i, 'sth') for i in JobList)
Я хочу, чтобы он закончил мою работуобычно, но я не знаю, как выделить (или разрешить) joblib использовать больше памяти
++. Среда ОС: Ubuntu 18.04.2 (64-разрядная версия) Python: Python 3.6.8 (GCC 7.3.0)
joblib.externals.loky.process_executor._RemoteTraceback:
"""
Traceback (most recent call last):
File "/home/yjw0510/anaconda3/lib/python3.6/site-packages/joblib/externals/loky/process_executor.py", line 418, in _process_worker
r = call_item()
File "/home/yjw0510/anaconda3/lib/python3.6/site-packages/joblib/externals/loky/process_executor.py", line 272, in __call__
return self.fn(*self.args, **self.kwargs)
File "/home/yjw0510/anaconda3/lib/python3.6/site-packages/joblib/_parallel_backends.py", line 567, in __call__
return self.func(*args, **kwargs)
File "/home/yjw0510/anaconda3/lib/python3.6/site-packages/joblib/parallel.py", line 225, in __call__
for func, args, kwargs in self.items]
File "/home/yjw0510/anaconda3/lib/python3.6/site-packages/joblib/parallel.py", line 225, in <listcomp>
for func, args, kwargs in self.items]
File "/home/yjw0510/anaconda3/lib/python3.6/site-packages/joblib/memory.py", line 568, in __call__
File "/home/yjw0510/anaconda3/lib/python3.6/site-packages/joblib/memory.py", line 534, in _cached_call
out, metadata = self.call(*args, **kwargs)
File "/home/yjw0510/anaconda3/lib/python3.6/site-packages/joblib/memory.py", line 734, in call
output = self.func(*args, **kwargs)
File "02_trj_ConvertToPickle.py", line 65, in to_pickle
configArray = np.zeros((nAtoms,9))
MemoryError
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "02_trj_ConvertToPickle.py", line 106, in <module>
res = Parallel(n_jobs=numCPUcores,verbose=32)(delayed(to_pickle)(i, directoryBufferProcessing) for i in fileList)
File "/home/yjw0510/anaconda3/lib/python3.6/site-packages/joblib/parallel.py", line 934, in __call__
self.retrieve()
File "/home/yjw0510/anaconda3/lib/python3.6/site-packages/joblib/parallel.py", line 833, in retrieve
self._output.extend(job.get(timeout=self.timeout))
File "/home/yjw0510/anaconda3/lib/python3.6/site-packages/joblib/_parallel_backends.py", line 521, in wrap_future_result
return future.result(timeout=timeout)
File "/home/yjw0510/anaconda3/lib/python3.6/concurrent/futures/_base.py", line 425, in result
return self.__get_result()
File "/home/yjw0510/anaconda3/lib/python3.6/concurrent/futures/_base.py", line 384, in __get_result
raise self._exception
MemoryError