Я управляю моделью PLR, используя функцию поезда с Каретом в R. По какой-то причине я получаю знаки коэффициента в обратном направлении. Например, если вы посмотрите на следующий код. Знак коэффициентов верен с логистической регрессией GLM, но обратен с PLR.
Я знаю, основываясь на теории и знакомстве с данными, что переменные "P_D" и "SHP" должны быть отрицательными (как видно из краткого описания glm), а точка пересечения и "SFB" должны быть положительными.
library(caret)
library(stepPlr)
df$y <- as.factor(df$y)
set.seed(88)
modelFit <- train(y ~ P_D + SFB + SHP,
data = df,
trControl = trainControl(method = "repeatedcv", number
= 10, repeats = 3),
tuneLength = 3, method = "plr")
summary(modelFit)
stepPlr::plr(x = x, y = y, weights = if (!is.null(wts)) wts else rep(1,
length(y)), lambda = param$lambda, cp = as.character(param$cp))
Coefficients:
Estimate Std.Error z value Pr(>|z|)
Intercept -5.03481 2.60165 -1.935 0.053
P_D 3635.74505 1808.31782 2.011 0.044
SFB -1372.89625 790.51458 -1.737 0.082
SHP 109.73106 575.97467 0.191 0.849
Null deviance: 68.99 on 49 degrees of freedom
Residual deviance: 0.64 on 46 degrees of freedom
Score: deviance + 3.9 * df = 16.29
GLM
model_test <- glm(y ~ P_D + SFB + SHP,
data = df,
family = "binomial")
summary(model_test)
glm(formula = y ~ P_D + SFB + SHP, family = "binomial", data = df)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.567e-04 -2.100e-08 -2.100e-08 5.099e-05 5.099e-05
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 20.46 3507.67 0.006 0.995
P_D -18954.07 2647341.00 -0.007 0.994
SFB 7503.18 1261245.16 0.006 0.995
SHP -506.06 192973.16 -0.003 0.998
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 6.8994e+01 on 49 degrees of freedom
Residual deviance: 1.0331e-07 on 46 degrees of freedom
AIC: 8
Number of Fisher Scoring iterations: 25
df <- structure(list(y = c(1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L,
0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L), SFB1 = c(8,
10, 96, 6.33333333333333, 10, 95, 95, 9, 95, 10, 93, 90, 9, 89,
98, 96, 99, 98, 93, 9.8, 95, 88, 10, 10, 93, 93, 88, 83, 78,
10, 96, 99, 92, 90, 86, 89, 90, 96, 97, 10, 97, 98, 10, 90, 87,
87, 76, 94, 88, 98), SFB = c(0,
0, 0, -0.0869565217391304, 0, 0.00531914893617021, 0, 0, 0,
0.0263157894736842,
0, -0.00819672131147541, 0, -0.0136612021857923, 0.0077720207253886,
0.0161290322580645, 0, 0, 0, -0.00505050505050503, 0,
-0.0164835164835165,
0.0263157894736842, 0, -0.00267379679144385, 0.0166666666666667,
-0.0294117647058824, -0.0284090909090909, 0, 0.0263157894736842,
-0.00515463917525773, 0.00769230769230769, 0, -0.00549450549450549,
-0.0326086956521739, 0, 0, 0, -0.00256410256410256, 0, 0,
-0.00253807106598985,
0, -0.00549450549450549, -0.0271739130434783, -0.032258064516129,
0, 0, -0.0138121546961326, 0), P = c(116, 1910, 31.97, 164,
20, 116.95, 50.6, 9, 110.24, 8, 257.05, 62.85, 21, 989.99, 13.29,
29.63, 73.65, 9.2, 209.13, 22.6, 8.7, 8.42, 7, 15, 77.77, 510.72,
49.95, 18.48, 100.66, 88, 9.98, 24.4, 246.6, 46.24, 21.35, 65.44,
411.14, 18.16, 16.94, 10, 0.61, 15.99, 2020, 26.85, 50.44, 32.64,
8.45, 12.41, 30.8, 11.69), P_D = c(0, 0, 0, 0.148221343873518,
0, 0.0285875706214689, 0, 0, 0, 0.0714285714285714, 0,
0.00360576923076922,
0, 0.0208746573504575, 0.00513112884834663, 0.0202809482001756,
0, 0, 0, 0.0707070707070707, 0, 0.0400898011545863, 0, 0,
0.125764402961056,
0.0157120930608288, 0.0263435194942044, 0.172978878368536, 0,
0.0333333333333333, 0, 0.0729044376614228, 0, 0.0615739616225407,
0.184294871794872, 0, 0, 0, 0.0257603972687772, 0, 0.0126050420168067,
0.041299932295193, 0, 0.127776478840309, 0.0987654320987654,
0.000536727495782858, 0, 0, 0.0236314178850731, 0), SH = c(36,
24, 24, 20, 36, 36, 24, 36, 60, 0, 24, 24, 36, 108, 36, 36, 24,
24, 36, 175.2, 24, 60, 1560, 36, 36, 36, 36, 24, 36, 0, 24, 84,
36, 24, 36, 36, 24, 36, 36, 36, 24, 24, 840, 24, 108, 84, 36,
36, 108, 36), SHP = c(0, 0, 0, -0.34375, 0, -0.125,
0, 0, 0, -0.5, 0, 0, 0, 0.142857142857143, 0, 0, 0, 0, 0,
0.329545454545455,
0, 0.214285714285714, 0.477443609022556, 0, 0, 0.1, 0, 0, 0,
-0.5, 0, 0.2, 0, -0.1, 0, 0, 0, 0, 0, 0, -0.1, -0.1, 0.472222222222222,
0, 0.25, -0.0625, 0, 0, 0.25, 0)), row.names = c(NA, -50L), class =
c("tbl_df",
"tbl", "data.frame"))