Как точно смоделировать полиномиальную регрессию с ковариатами в R? - PullRequest
0 голосов
/ 29 января 2020

У меня есть этот набор данных:

structure(list(Mean_MD_LEFT = c(0.00088825, 0.001259, 0.001235957, 
0.001280247, 0.001430563, 0.001324122, 0.001494777, 0.001462501, 
0.001216678, 0.001221862, 0.001203147, 0.001215058, 0.001108018, 
0.001503618, 0.001095351, 0.001349668, 0.001276019, 0.001165876, 
0.00130888, 0.001068913, 0.001240759, 0.001226056, 0.001527899, 
0.00116013, 0.00134519, 0.001006155, 0.001231465, 0.001288484, 
0.00143473, 0.00143622, 0.001243509, 0.001264716, 0.001203803, 
0.001307169, 0.00121279, 0.001232703, 0.001401195, 0.001322438, 
0.001298628, 0.001249505, 0.001050123, 0.001105836, 0.00117939, 
0.001326636, 0.001347434, 0.001067201, 0.001319122, 0.001211844, 
0.001197964, 0.001258269, 0.001045118, 0.001277756, 0.001435313, 
0.001321847, 0.001236232, 0.001165788, 0.00116973, 0.00149342, 
0.001027366, 0.001094613, 0.001069547, 0.001306566, 0.001259494, 
0.001240423, 0.001033707, 0.001197759, 0.001394555, 0.001170185, 
0.001260466, 0.001158902, 0.001377764, 0.001309146, 0.001179443, 
0.001383694, 0.001215307, 0.00114434, 0.001431507, 0.001421201, 
0.001064307, 0.001025391, 0.001233471, 0.001170882, 0.001365245, 
0.00136893, 0.001499212, 0.001090375, 0.001427458, 0.001085357, 
0.00103689, 0.001093251, 0.001422171, 0.00119575, 0.001391897, 
0.001210912, 0.001168271, 0.001324225, 0.001284984, 0.001033035, 
0.001166252, 0.001222974, 0.001245109, 0.001178002, 0.00116386, 
0.001324646, 0.001451479, 0.001457251, 0.001219186, 0.001230473, 
0.001450659, 0.00125125, 0.001264565, 0.001349405, 0.001083656, 
0.00112727, 0.001346319, 0.001252156, 0.001476877, 0.001169471, 
0.001259253, 0.001012222, 0.001057799, 0.001122175, 0.001108255, 
0.001358312, 0.001331997, 0.001283446, 0.001203632, 0.001236934, 
0.001347706, 0.001146588, 0.001022434, 0.001138765, 0.001397827, 
0.001414755, 0.001403063, 0.001014438, 0.001331307, 0.001237988, 
0.001362939, 0.001082224, 0.00133866, 0.001468182, 0.001363511, 
0.001388554, 0.001532314, 0.00126091, 0.001182895, 0.001202614, 
0.001303636, 0.001102782, 0.001086897, 0.001496149, 0.001501578, 
0.001155195, 0.001257773, 0.001316343, 0.001229209, 0.001426947, 
0.001181559, 0.001374065, 0.00146704, 0.001166356, 0.001307215, 
0.001292058, 0.001435635, 0.001173174, 0.001330012, 0.001446601, 
0.001159955, 0.001143457, 0.000992727, 0.001461219, 0.001225041, 
0.00128361, 0.001234469, 0.00147448, 0.001100211, 0.001385283, 
0.001035872, 0.001182171, 0.001230223, 0.001402662, 0.001328903, 
0.001472754, 0.001282768, 0.001014782, 0.001053086, 0.001335282, 
0.001243229, 0.001452911, 0.001282477, 0.001290732, 0.00125048, 
0.001112043, 0.001126495, 0.001252536, 0.001200341, 0.00131363, 
0.001175371, 0.001161454, 0.001312941, 0.001473375, 0.001187962, 
0.001314401, 0.001169353, 0.00134934, 0.001227139, 0.00136958, 
0.00136663, 0.001079438, 0.001116415, 0.001297459, 0.001094325, 
0.001048047, 0.00114134, 0.001056977, 0.00120083, 0.001306271, 
0.001164866, 0.001231917, 0.001140059, 0.001276569, 0.000944486, 
0.000992018, 0.001203498, 0.001039085, 0.001205807, 0.001233297, 
0.00125329, 0.001148577, 0.001412452, 0.001014066, 0.001107054, 
0.001362156, 0.001040276, 0.001097155, 0.001372316, 0.001289548, 
0.00107317, 0.001292969, 0.001313911, 0.001152729, 0.001279696, 
0.001029458, 0.001048689, 0.001050842, 0.00107934, 0.001118004, 
0.001043398, 0.00138693, 0.001400837, 0.001431492, 0.001558817, 
0.001265295, 0.001282091, 0.00125957, 0.001286478, 0.001142287, 
0.001225016, 0.001171392, 0.001105721, 0.001255527, 0.001278233, 
0.001279594, 0.001262035, 0.001200033, 0.001536636, 0.00128721, 
0.001413856, 0.001492463, 0.001424841, 0.001116566, 0.001503886, 
0.001302865, 0.001494661, 0.001578798, 0.001289593, 0.00110231, 
0.001552079, 0.001168193, 0.001198898, 0.001297516, 0.001471236, 
0.001394322, 0.001067478, 0.001300658, 0.001485742, 0.001519234, 
0.001205528, 0.001454817, 0.001156602, 0.001474134, 0.001378713, 
0.00115438, 0.001153431, 0.001229611, 0.001254038, 0.001146266, 
0.001366854, 0.001317427, 0.001300542, 0.00141882, 0.001391517, 
0.001292336), SexCoded = c("Male", "Male", "Female", "Male", 
NA, "Male", "Male", "Male", NA, "Female", "Female", NA, "Female", 
NA, "Female", NA, "Female", "Male", "Male", "Female", "Male", 
"Male", "Male", "Female", "Male", "Female", "Male", NA, "Male", 
"Male", NA, "Male", "Male", "Male", "Female", "Female", NA, "Male", 
NA, NA, "Female", "Female", "Male", "Male", "Female", "Male", 
"Male", "Female", "Male", "Male", "Male", "Female", "Female", 
"Female", "Female", "Male", "Male", "Male", "Male", "Male", "Female", 
"Female", "Male", "Male", "Male", "Female", "Female", NA, "Male", 
NA, "Male", "Female", "Male", NA, "Female", "Female", "Female", 
"Male", "Male", "Male", "Female", "Male", "Male", NA, "Male", 
"Male", NA, "Female", "Male", "Female", "Male", "Male", "Male", 
NA, "Male", "Male", "Male", "Female", "Male", "Female", "Male", 
"Female", "Female", "Male", "Male", "Female", NA, "Male", NA, 
"Female", "Male", "Female", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Female", "Male", "Female", "Male", "Male", "Male", "Female", 
"Male", "Male", "Male", "Female", "Female", NA, "Male", "Female", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Female", 
"Female", "Female", "Female", "Male", "Male", "Female", "Male", 
"Female", "Male", "Male", "Male", "Male", "Female", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Female", "Female", 
"Male", "Female", "Male", "Male", "Male", "Male", "Male", "Female", 
"Male", "Male", "Female", "Female", "Male", "Male", "Female", 
NA, "Female", "Female", "Male", "Female", "Female", "Male", "Male", 
"Male", "Male", "Female", "Male", "Male", "Female", "Female", 
"Female", "Female", "Male", "Male", "Male", "Female", "Male", 
"Male", "Male", "Female", "Female", "Female", NA, "Male", "Male", 
"Male", "Male", "Male", "Male", "Female", "Female", "Female", 
"Female", "Male", "Male", "Female", "Male", "Female", "Male", 
"Female", "Male", "Male", "Male", "Female", "Male", "Male", "Male", 
"Male", "Female", "Male", "Female", "Female", "Male", "Female", 
"Male", "Male", "Male", NA, "Female", "Male", "Female", "Male", 
"Male", "Male", "Female", "Male", "Female", "Male", "Male", "Female", 
"Female", "Female", NA, "Male", "Female", "Male", "Male", "Male", 
NA, "Male", "Male", "Male", NA, "Male", "Male", "Female", "Female", 
"Male", "Female", "Female", NA, "Male", "Male", "Male", "Male", 
"Male", "Female", "Male", "Male", "Male", "Male", "Male", "Female", 
"Male", "Male", "Female", NA, "Male", "Female", "Male", "Male", 
"Male", "Female"), AgeGroup = structure(c(2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 
2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 
2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 
1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 
1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 
2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 
2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 
2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Young", 
"Old"), class = "factor"), DS116 = c(50, 19, 47, 42, 30, 49, 
24, 37, 31, 36, 39, 45, 49, 53, 35, 30, 39, 75, 53, 62, 75, 35, 
24, 43, 57, 72, 35, NA, 63, 57, 39, 33, 36, 45, 34, 50, 36, 36, 
65, 46, 47, 28, 30, 57, 42, 71, 30, 45, 46, 38, 66, 25, 50, 36, 
41, 24, 82, 46, 70, 49, 41, 60, 56, 26, 64, 66, 36, 32, 31, NA, 
25, 49, 49, 36, 75, 44, 49, 34, 52, 65, 34, 39, 46, 37, 40, 66, 
51, 54, 56, 69, 50, 56, 47, 62, 38, 25, 42, 55, 47, 63, 45, 45, 
50, 50, 35, 40, 57, 51, 47, 44, 42, 34, 56, 56, 25, 38, 37, 65, 
58, 76, 63, 62, 67, 40, 50, 67, 44, 60, 42, 67, 50, 49, 45, 26, 
33, 67, 46, 66, 30, 58, 57, 32, 34, 38, 46, 72, 41, 46, 40, 45, 
41, 35, 43, 54, 46, 38, 40, 39, 38, 31, 42, 44, 31, 42, 43, 49, 
25, 39, 33, 43, 69, 47, 59, 48, 48, 42, 66, 22, 74, 43, 39, 54, 
63, 40, 67, 84, 71, 31, 44, 30, 76, 38, 39, 60, 46, 39, 60, 39, 
49, 63, 41, 37, 53, NA, 49, 54, 52, 35, 45, 69, 77, 73, 65, 74, 
43, 84, 64, 33, 47, 57, 67, 57, 62, 81, 52, 61, 37, 26, 39, 53, 
38, 82, 32, 46, 72, 57, 38, 22, 55, 43, 43, 75, 78, 70, 79, 59, 
43, 64, 62, 41, 37, 53, 48, 65, 46, 35, 19, 65, 33, 76, 44, 30, 
36, 41, 52, 46, 37, 58, 56, 28, 40, 47, 58, 34, 52, 32, 49, 60, 
38, 62, 68, 61, 30, 41, 72, 44, 31, 65, 31, 58, 49, 38, 50, 41, 
41, 40, 51, 46, 38, 53, 46, 40, 39, 59)), row.names = c(NA, -304L
), class = c("tbl_df", "tbl", "data.frame"))

, который должен выглядеть следующим образом:

# A tibble: 6 x 4
  Mean_MD_LEFT SexCoded AgeGroup DS116
         <dbl> <chr>    <fct>    <dbl>
1     0.000888 Male     Old         50
2     0.00126  Male     Old         19
3     0.00124  Female   Old         47
4     0.00128  Male     Old         42
5     0.00143  NA       Old         30
6     0.00132  Male     Old         49

Когда я строю данные, x = Mean_MD_LEFT и y = DS116 имеют цветовую кодировку AgeGroup , кажется, есть небольшая кривая - заставляет меня задаться вопросом, подойдет ли полином 2-го порядка лучше.

Простая линейная модель, которую я имею:

lm1 = lm(DS116 ~ Mean_MD_LEFT + AgeGroup + SexCoded, data = Fakedata)

Я не понимаю, как правильно моделировать мой многочлен. Должен ли только мой интересующий объект (Mean_MD_LEFT) быть полиномом или AgeGroup и SexCoded должны быть включены в качестве полинома?

Например, это точно для моделирования и визуализации данных?

lm2 <- lm(DS116 ~ poly(Mean_MD_LEFT, degree=2) + AgeGroup + SexCoded, data = Fakedata)

p1 <- ggplot(Fakedata, aes(x= Mean_MD_LEFT, y= DS116, color = AgeGroup)) +
  geom_point()+
  stat_smooth(method = "lm", formula = y ~ poly(x,2), col = "darkslategrey", se=F) +
  xlab("MD") + ylab("DS")

Наконец, это резюме lm2 говорит мне, что полином НЕ значим / применим для использования?

Call:
lm(formula = DS116 ~ poly(Mean_MD_LEFT, 2) + AgeGroup + SexCoded, 
    data = Fakedata)

Residuals:
     Min       1Q   Median       3Q      Max 
-28.4208  -7.4362  -0.5791   6.1186  29.9390 

Coefficients:
                       Estimate Std. Error t value Pr(>|t|)    
(Intercept)              67.212      2.169  30.987  < 2e-16 ***
poly(Mean_MD_LEFT, 2)1  -26.194     14.579  -1.797 0.073512 .  
poly(Mean_MD_LEFT, 2)2    4.159     11.967   0.348 0.728463
AgeGroupOld             -19.628      2.177  -9.016  < 2e-16 ***
SexCodedMale             -5.252      1.373  -3.826 0.000162 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 10.57 on 271 degrees of freedom
  (28 observations deleted due to missingness)
Multiple R-squared:  0.4425,    Adjusted R-squared:  0.4343 
F-statistic: 53.77 on 4 and 271 DF,  p-value: < 2.2e-16
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...