Я обычно отслеживаю ход выполнения ipyparallel
заданий, и я хотел бы сделать то же самое с модулем multiprocessing
- в частности, какой-нибудь способ печати, чтобы отобразить обновление о том, сколько процессов завершено, обновляется каждый Х секунд. Я привык выполнять команды параллельно на ноутбуке Jupyter, примерно так:
import time
from ipyparallel import Client
from IPython.display import clear_output
def get_client(profile='default') -> tuple:
"""Get lview,dview from ipcluster."""
rc = Client(profile=profile)
dview = rc[:]
lview = rc.load_balanced_view()
print(len(lview),len(dview))
return lview, dview
def update(args:list) -> None:
"""For jupyter notebook, clear printout and print something new.
Good for for-loops etc.
"""
clear_output(wait=True)
[print(x) for x in args]
def watch_async(jobs:list, phase=None) -> None:
"""Wait until jobs are done executing, get updates"""
print(len(jobs))
count = 0
while count != len(jobs):
time.sleep(5)
count = 0
for j in jobs:
if j.ready():
count += 1
if phase is not None:
update([phase, count, len(jobs)])
else:
update([count, len(jobs)])
def mytask(myarg):
...
return result
lview,dview = get_client()
args = # some list of args to pass to mytask()
jobs = [lview.apply_async(mytask, arg) for arg in args]
# watch progress of jobs (this is the part I'd like to replicate with multiprocessing)
# here it just clears the last print in a jupyter notebook
# what I'd like to do is just get a count of finished jobs
# so I can then use a progress bar from tqdm, which would be better for
# a command line script IMO.
watch_async(jobs, phase='mytask')
print("done!")