Как соответствовать пользовательскому распределению смеси в pymc3? - PullRequest
0 голосов
/ 12 января 2020

Я собираюсь разместить смесь пользовательских распределений. Для начала я сначала хочу научиться делать это с помощью обычной гауссовой смеси. Сначала я генерирую некоторые фиктивные данные:

import numpy as np
import pymc3 as pm
import theano.tensor as tt
import theano
from copy import copy 

import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse

from theano.compile.ops import as_op

import scipy.integrate as integrate

# Sample parameters
nsamples = 2000
mu1_true = 0.0
mu2_true = 1.5
sig1_true = 0.5
sig2_true = 0.5
p_true = 0.2

# Samples generation
np.random.seed(3)
s1 = distributions.norm.rvs(mu1_true, sig1_true, size=round(p_true*nsamples))
s2 = distributions.norm.rvs(mu2_true, sig2_true, size=round((1-p_true)*nsamples))
samples = np.hstack([s1, s2])

plt.hist(samples,bins=30,density=True);

enter image description here

Вот что я дошел до того, что касается подгонки:

u1=samples

def gauss(x, mu, sigma):
    return 1/np.sqrt(2.*np.pi*np.power(sigma,2)) * np.exp(-np.power(x-mu,2)/(2.*np.power(sigma,2)))

def likelihood(x):
    return p*gauss(x, mu1, sigma1) + (1-p)*gauss(x, mu2, sigma2)

with pm.Model() as model:
    p = pm.Uniform('p', lower=0, upper=1)
    mu1 = pm.Uniform('mu1', lower=-0.5, upper=0.5)
    mu2 = pm.Uniform('mu2', lower=1, upper=2)
    sigma1 = pm.Uniform('sigma1', lower=0, upper=1)
    sigma2 = pm.Uniform('sigma2', lower=0, upper=1)

with model:
    like=pm.DensityDist('like', likelihood, observed=u1)
    step = pm.Metropolis()
    trace = pm.sample(1000, step=step)

Это с треском проваливается:

---------------------------------------------------------------------------
BrokenPipeError                           Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pymc3\parallel_sampling.py in __init__(self, draws, tune, step_method, chain, seed, start)
    241         try:
--> 242             self._process.start()
    243         except IOError as e:

~\Anaconda3\lib\multiprocessing\process.py in start(self)
    111         _cleanup()
--> 112         self._popen = self._Popen(self)
    113         self._sentinel = self._popen.sentinel

~\Anaconda3\lib\multiprocessing\context.py in _Popen(process_obj)
    222     def _Popen(process_obj):
--> 223         return _default_context.get_context().Process._Popen(process_obj)
    224 

~\Anaconda3\lib\multiprocessing\context.py in _Popen(process_obj)
    321             from .popen_spawn_win32 import Popen
--> 322             return Popen(process_obj)
    323 

~\Anaconda3\lib\multiprocessing\popen_spawn_win32.py in __init__(self, process_obj)
     64                 reduction.dump(prep_data, to_child)
---> 65                 reduction.dump(process_obj, to_child)
     66             finally:

~\Anaconda3\lib\multiprocessing\reduction.py in dump(obj, file, protocol)
     59     '''Replacement for pickle.dump() using ForkingPickler.'''
---> 60     ForkingPickler(file, protocol).dump(obj)
     61 

BrokenPipeError: [Errno 32] Broken pipe

During handling of the above exception, another exception occurred:

RuntimeError                              Traceback (most recent call last)
<ipython-input-33-8398f6d24814> in <module>
     17     like=pm.DensityDist('like', likelihood, observed=u1)
     18     step = pm.Metropolis()
---> 19     trace = pm.sample(1000, step=step)

~\Anaconda3\lib\site-packages\pymc3\sampling.py in sample(draws, step, init, n_init, start, trace, chain_idx, chains, cores, tune, progressbar, model, random_seed, discard_tuned_samples, compute_convergence_checks, **kwargs)
    435             _print_step_hierarchy(step)
    436             try:
--> 437                 trace = _mp_sample(**sample_args)
    438             except pickle.PickleError:
    439                 _log.warning("Could not pickle model, sampling singlethreaded.")

~\Anaconda3\lib\site-packages\pymc3\sampling.py in _mp_sample(draws, tune, step, chains, cores, chain, random_seed, start, progressbar, trace, model, **kwargs)
    963     sampler = ps.ParallelSampler(
    964         draws, tune, chains, cores, random_seed, start, step,
--> 965         chain, progressbar)
    966     try:
    967         try:

~\Anaconda3\lib\site-packages\pymc3\parallel_sampling.py in __init__(self, draws, tune, chains, cores, seeds, start_points, step_method, start_chain_num, progressbar)
    359                 draws, tune, step_method, chain + start_chain_num, seed, start
    360             )
--> 361             for chain, seed, start in zip(range(chains), seeds, start_points)
    362         ]
    363 

~\Anaconda3\lib\site-packages\pymc3\parallel_sampling.py in <listcomp>(.0)
    359                 draws, tune, step_method, chain + start_chain_num, seed, start
    360             )
--> 361             for chain, seed, start in zip(range(chains), seeds, start_points)
    362         ]
    363 

~\Anaconda3\lib\site-packages\pymc3\parallel_sampling.py in __init__(self, draws, tune, step_method, chain, seed, start)
    249                     # all its error message
    250                     time.sleep(0.2)
--> 251                     raise exc
    252             raise
    253 

RuntimeError: The communication pipe between the main process and its spawned children is broken.
In Windows OS, this usually means that the child process raised an exception while it was being spawned, before it was setup to communicate to the main process.
The exceptions raised by the child process while spawning cannot be caught or handled from the main process, and when running from an IPython or jupyter notebook interactive kernel, the child's exception and traceback appears to be lost.
A known way to see the child's error, and try to fix or handle it, is to run the problematic code as a batch script from a system's Command Prompt. The child's exception will be printed to the Command Promt's stderr, and it should be visible above this error and traceback.
Note that if running a jupyter notebook that was invoked from a Command Prompt, the child's exception should have been printed to the Command Prompt on which the notebook is running.

Как это сделать? Это правильное направление? (Конечная цель состоит в подборе смеси некоторых обобщенных косо-нормальных распределений.)

...