Проблема с установкой Пуассона с нулевым накачиванием в R - PullRequest
1 голос
/ 05 мая 2020

У меня 755 строк данных и ~ 87% нулей. Мне сложно сопоставить эти данные с нулевым раздутым Пуассоном или отрицательной биномиальной (или любой другой) регрессией. Я пробовал 4 разных способа и не могу заставить его работать. Я даже не совсем уверен, следует ли мне использовать именно эти регрессии. Любая помощь приветствуется. Я также не очень хорошо умею кодировать, что, я уверен, будет очевидно.

Я знаю, что это долго, но это мои фактические данные ...

c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0.404749055585537, 0, 0.134916351861846, 
0, 0.149907057624273, 0, 0, 0, 0, 0, 0, 0, 0, 0.269832703723691, 
0.134916351861846, 0.134916351861846, 0, 0, 0.269832703723691, 
0, 0.269832703723691, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.112430293218205, 0, 0, 0, 
0, 0, 0, 0.367953686895943, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0.269832703723691, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0.122651228965314, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.337290879654614, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0.578212936550767, 0, 0, 0.404749055585537, 0, 0, 0, 0.269832703723691, 
0.269832703723691, 0, 0, 0.299814115248546, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0.404749055585537, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.192737645516922, 
0.192737645516922, 0, 0, 0, 0.134916351861846, 0, 0.134916351861846, 
0, 0, 0, 0, 0, 0.404749055585537, 0.134916351861846, 0.134916351861846, 
0.337290879654614, 0, 0, 0, 0, 0.674581759309228, 0, 0.134916351861846, 
0, 0.299814115248546, 0.168645439827307, 0.449721172872819, 0, 
0, 0, 0.269832703723691, 0, 0, 0, 0, 0, 0, 0.134916351861846, 
0, 0, 0, 0.122651228965314, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0.134916351861846, 0, 0.134916351861846, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0.122651228965314, 0, 0, 0.134916351861846, 
0, 0, 0.149907057624273, 0, 0, 0, 0, 0.269832703723691, 0, 0, 
0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0.449721172872819, 0, 0, 0, 0, 0, 0, 0, 0.112430293218205, 
0, 0, 0.134916351861846, 0.539665407447383, 0.134916351861846, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0.134916351861846, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0.134916351861846, 0.134916351861846, 0, 0, 0, 0, 0, 0.134916351861846, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0.269832703723691, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 
0, 0, 0, 0, 0, 0, 0, 0, 0.404749055585537, 0, 0, 0, 0.404749055585537, 
0, 0, 0.674581759309228, 0.269832703723691, 0, 0, 0, 0, 0, 0, 
0.134916351861846, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0.269832703723691, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.404749055585537, 
0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0.269832703723691, 
0.269832703723691, 0.134916351861846, 0, 0.404749055585537, 0.809498111171074, 
0, 0.134916351861846, 0.134916351861846, 1.07933081489477, 0.134916351861846, 
0, 0.269832703723691, 0, 0.94441446303292, 0.245302457930628, 
0, 0, 0, 0, 0, 0.245302457930628, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)

Это 4 метода, которые я попробовал сегодня.

> hog.cpue <- hogA$hog.cpue
> fitg <- fitdist(hog.cpue, "ZIP")
Error in computing default starting values.
Error in manageparam(start.arg = start, fix.arg = fix.arg, obs = data,  : 
  Error in start.arg.default(obs, distname) : 
  Unknown starting values for distribution ZIP.
> fit_zip2 <- fitdist(hogA$hog.cpue, 'nbinom', start = list(mu = 0.293, size = 0.1)) 
<simpleError in optim(par = vstart, fn = fnobj, fix.arg = fix.arg, obs = data,     gr = gradient, ddistnam = ddistname, hessian = TRUE, method = meth,     lower = lower, upper = upper, ...): function cannot be evaluated at initial parameters>
Error in fitdist(hogA$hog.cpue, "nbinom", start = list(mu = 0.293, size = 0.1)) : 
  the function mle failed to estimate the parameters, 
                with the error code 100
> fitzip <- fitdist(hogA$hog.cpue, "ZIP", start = list(mu = 0.293, sigma = 0.1), discrete = TRUE,
+                   optim.method = "L-BFGS-B", lower = c(0, 0), upper = c(Inf, 1))
<simpleError in dZIP(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.404749055585537, 0, 0.134916351861846, 0, 0.149907057624273, 0, 0, 0, 0, 0, 0, 0, 0, 0.269832703723691, 0.134916351861846, 0.134916351861846, 0, 0, 0.269832703723691, 0, 0.269832703723691, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.112430293218205, 0, 0, 0, 0, 0, 0, 0.367953686895943, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.269832703723691, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.122651228965314, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.337290879654614, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.578212936550767, 0, 0, 0.404749055585537, 0, 0, 0, 0.269832703723691, 0.269832703723691, 0, 0, 0.299814115248546, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.404749055585537, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.192737645516922, 0.192737645516922, 0, 0, 0, 0.134916351861846, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0.404749055585537, 0.134916351861846, 0.134916351861846, 0.337290879654614, 0, 0, 0, 0, 0.674581759309228, 0, 0.134916351861846, 0, 0.299814115248546, 0.168645439827307, 0.449721172872819, 0, 0, 0, 0.269832703723691, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0.122651228965314, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.122651228965314, 0, 0, 0.134916351861846, 0, 0, 0.149907057624273, 0, 0, 0, 0, 0.269832703723691, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.449721172872819, 0, 0, 0, 0, 0, 0, 0, 0.112430293218205, 0, 0, 0.134916351861846, 0.539665407447383, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0.134916351861846, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.269832703723691, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0.404749055585537, 0, 0, 0, 0.404749055585537, 0, 0, 0.674581759309228, 0.269832703723691, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0.269832703723691, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.404749055585537, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0.269832703723691, 0.269832703723691, 0.134916351861846, 0, 0.404749055585537, 0.809498111171074, 0, 0.134916351861846, 0.134916351861846, 1.07933081489477, 0.134916351861846, 0, 0.269832703723691, 0, 0.94441446303292, 0.245302457930628, 0, 0, 0, 0, 0, 0.245302457930628, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),     mu = 0, sigma = 1, log = TRUE): mu must be greater than 0 
 >
Error in fitdist(hogA$hog.cpue, "ZIP", start = list(mu = 0.293, sigma = 0.1),  : 
  the function mle failed to estimate the parameters, 
                with the error code 100
In addition: Warning messages:
1: In fitdist(hogA$hog.cpue, "ZIP", start = list(mu = 0.293, sigma = 0.1),  :
  The dZIP function should return a zero-length vector when input has length zero
2: In fitdist(hogA$hog.cpue, "ZIP", start = list(mu = 0.293, sigma = 0.1),  :
  The pZIP function should return a zero-length vector when input has length zero
> fpoisZI <- fitdist(hogA$hog.cpue, "ZIP", start=list(sigma=sum(hogA$hog.cpue == 0)/length(hogA$hog.cpue), mu=mean(hogA$hog.cpue)))
<simpleError in dZIP(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.404749055585537, 0, 0.134916351861846, 0, 0.149907057624273, 0, 0, 0, 0, 0, 0, 0, 0, 0.269832703723691, 0.134916351861846, 0.134916351861846, 0, 0, 0.269832703723691, 0, 0.269832703723691, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.112430293218205, 0, 0, 0, 0, 0, 0, 0.367953686895943, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.269832703723691, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.122651228965314, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.337290879654614, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.578212936550767, 0, 0, 0.404749055585537, 0, 0, 0, 0.269832703723691, 0.269832703723691, 0, 0, 0.299814115248546, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.404749055585537, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.192737645516922, 0.192737645516922, 0, 0, 0, 0.134916351861846, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0.404749055585537, 0.134916351861846, 0.134916351861846, 0.337290879654614, 0, 0, 0, 0, 0.674581759309228, 0, 0.134916351861846, 0, 0.299814115248546, 0.168645439827307, 0.449721172872819, 0, 0, 0, 0.269832703723691, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0.122651228965314, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.122651228965314, 0, 0, 0.134916351861846, 0, 0, 0.149907057624273, 0, 0, 0, 0, 0.269832703723691, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.449721172872819, 0, 0, 0, 0, 0, 0, 0, 0.112430293218205, 0, 0, 0.134916351861846, 0.539665407447383, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0.134916351861846, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.269832703723691, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0.404749055585537, 0, 0, 0, 0.404749055585537, 0, 0, 0.674581759309228, 0.269832703723691, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0.269832703723691, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.404749055585537, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0.269832703723691, 0.269832703723691, 0.134916351861846, 0, 0.404749055585537, 0.809498111171074, 0, 0.134916351861846, 0.134916351861846, 1.07933081489477, 0.134916351861846, 0, 0.269832703723691, 0, 0.94441446303292, 0.245302457930628, 0, 0, 0, 0, 0, 0.245302457930628, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.134916351861846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),     sigma = 0.426547699594046, mu = -0.020557328452897, log = TRUE): mu must be greater than 0 
 >
Error in fitdist(hogA$hog.cpue, "ZIP", start = list(sigma = sum(hogA$hog.cpue ==  : 
  the function mle failed to estimate the parameters, 
                with the error code 100
In addition: Warning messages:
1: In fitdist(hogA$hog.cpue, "ZIP", start = list(sigma = sum(hogA$hog.cpue ==  :
  The dZIP function should return a zero-length vector when input has length zero
2: In fitdist(hogA$hog.cpue, "ZIP", start = list(sigma = sum(hogA$hog.cpue ==  :
  The pZIP function should return a zero-length vector when input has length zero

1 Ответ

2 голосов
/ 05 мая 2020

Для распределения Пуассона, с завышенным нулем или без, значения должны быть положительными и дискретными, то есть целыми числами, без десятичных знаков .. Я не знаю, предназначено ли это значение. отсортировав значения, вы можете использовать метод на основе glm:

library(pscl)

x = rpois(1000,20)
x[sample(length(x),200)] = 0
# fits intercept only model
fit = zeroinfl(x ~ 1,dist="poisson")

estimated_mean = exp(coefficients(fit)["count_(Intercept)"])
count_(Intercept) 
         20.14875

estimated_missing = coefficients(fit)["zero_(Intercept)"]
# it's a logit you need to convert to prob
estimated_missing = exp(estimated_missing)/(1+exp(estimated_missing))
zero_(Intercept) 
             0.2 

Если у вас есть десятичные числа, потому что это скорость, вам нужно смещение , поэтому предположите базовое среднее коэффициент 0,5 и пропущенный 0,2:

n = rep(1000 * 1:5 , each=100)
x = rpois(length(n),0.5*n)
x[sample(length(x),0.2*length(n))] = 0
# fits intercept only model
fit = zeroinfl(x ~ 1,dist="poisson",offset=log(n))

И вы повторяете вышеупомянутое и получаете обратно 0,5 в качестве коэффициента и 0,2 в качестве коэффициента пропуска.

...