Регрессионная модель Logisti c с непрерывными и категориальными переменными - PullRequest
0 голосов
/ 28 мая 2020

У меня есть набор данных «ex» с категориальными и непрерывными переменными. Переменная ответа - A16. Я хочу построить регрессионную модель logisti c, но получаю сообщение:

glm.fit: подобраны числовые вероятности 0 или 1

, и я подозреваю, что что-то не так структура набора данных. Хотя я понимаю, что факторы не должны быть проблемой для моделей GLM в R. Вот набор данных:

   A1    A2     A3 A4 A5 A6 A7     A8 A9 A10 A11 A12 A13      A14  A15 A16
68  b 19.42  6.500  u  g  w  h  1.460  t   t   7   f   g  80.0000 2954   1
69  b 35.17 25.125  u  g  x  h  1.625  t   t   1   t   g 515.0000  500   1
70  b 32.33  7.500  u  g  e bb  1.585  t   f   0   t   s 420.0000    0   0
71  b 34.83  4.000  u  g  d bb 12.500  t   f   0   t   g 280.2858    0   0
72  a 38.58  5.000  u  g cc  v 13.500  t   f   0   t   g 980.0000    0   0
73  b 44.25  0.500  u  g  m  v 10.750  t   f   0   f   s 400.0000    0   0
74  b 44.83  7.000  y  p  c  v  1.625  f   f   0   f   g 160.0000    2   0
75  b 20.67  5.290  u  g  q  v  0.375  t   t   1   f   g 160.0000    0   0
76  b 34.08  6.500  u  g aa  v  0.125  t   f   0   t   g 443.0000    0   0
77  a 19.17  0.585  y  p aa  v  0.585  t   f   0   t   g 160.0000    0   0
78  b 21.67  1.165  y  p  k  v  2.500  t   t   1   f   g 180.0000   20   0
data.frame':    11 obs. of  16 variables:
 $ A1 : Factor w/ 2 levels "a","b": 2 2 2 2 1 2 2 2 2 1 ...
 $ A2 : num  19.4 35.2 32.3 34.8 38.6 ...
 $ A3 : num  6.5 25.1 7.5 4 5 ...
 $ A4 : Factor w/ 3 levels "l","u","y": 2 2 2 2 2 2 3 2 2 3 ...
 $ A5 : Factor w/ 3 levels "g","gg","p": 1 1 1 1 1 1 3 1 1 3 ...
 $ A6 : Factor w/ 14 levels "aa","c","cc",..: 13 14 5 4 3 10 2 11 1 1 ...
 $ A7 : Factor w/ 9 levels "bb","dd","ff",..: 4 4 1 1 8 8 8 8 8 8 ...
 $ A8 : num  1.46 1.62 1.58 12.5 13.5 ...
 $ A9 : Factor w/ 2 levels "f","t": 2 2 2 2 2 2 1 2 2 2 ...
 $ A10: Factor w/ 2 levels "f","t": 2 2 1 1 1 1 1 2 1 1 ...
 $ A11: int  7 1 0 0 0 0 0 1 0 0 ...
 $ A12: Factor w/ 2 levels "f","t": 1 2 2 2 2 1 1 1 2 2 ...
 $ A13: Factor w/ 3 levels "g","p","s": 1 1 3 1 1 3 1 1 1 1 ...
 $ A14: num  80 515 420 280 980 ...
 $ A15: int  2954 500 0 0 0 0 2 0 0 0 ...
 $ A16: num  1 1 0 0 0 0 0 0 0 0 ...

dput(ex)
structure(list(A1 = structure(c(2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 
2L, 1L, 2L), .Label = c("a", "b"), class = "factor"), A2 = c(19.42, 
35.17, 32.33, 34.83, 38.58, 44.25, 44.83, 20.67, 34.08, 19.17, 
21.67), A3 = c(6.5, 25.125, 7.5, 4, 5, 0.5, 7, 5.29, 6.5, 0.585, 
1.165), A4 = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 
3L, 3L), .Label = c("l", "u", "y"), class = "factor"), A5 = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 3L, 3L), .Label = c("g", "gg", 
"p"), class = "factor"), A6 = structure(c(13L, 14L, 5L, 4L, 3L, 
10L, 2L, 11L, 1L, 1L, 9L), .Label = c("aa", "c", "cc", "d", "e", 
"ff", "i", "j", "k", "m", "q", "r", "w", "x"), class = "factor"), 
    A7 = structure(c(4L, 4L, 1L, 1L, 8L, 8L, 8L, 8L, 8L, 8L, 
    8L), .Label = c("bb", "dd", "ff", "h", "j", "n", "o", "v", 
    "z"), class = "factor"), A8 = c(1.46, 1.625, 1.585, 12.5, 
    13.5, 10.75, 1.625, 0.375, 0.125, 0.585, 2.5), A9 = structure(c(2L, 
    2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L), .Label = c("f", 
    "t"), class = "factor"), A10 = structure(c(2L, 2L, 1L, 1L, 
    1L, 1L, 1L, 2L, 1L, 1L, 2L), .Label = c("f", "t"), class = "factor"), 
    A11 = c(7L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L), A12 = structure(c(1L, 
    2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L), .Label = c("f", 
    "t"), class = "factor"), A13 = structure(c(1L, 1L, 3L, 1L, 
    1L, 3L, 1L, 1L, 1L, 1L, 1L), .Label = c("g", "p", "s"), class = "factor"), 
    A14 = c(80, 515, 420, 280.285801010101, 980, 400, 160, 160, 
    443, 160, 180), A15 = c(2954L, 500L, 0L, 0L, 0L, 0L, 2L, 
    0L, 0L, 0L, 20L), A16 = c(1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0
    )), row.names = 68:78, class = "data.frame")

А вот код модели:

fitex1 <- glm(A16~., data=ex_train, family=binomial)
fitex0 <- glm(A16~1, data=ex_train, family=binomial)
...