Вот проблема:
Когда я использую расстояние повара для проверки влиятельных точек в зеркалке, я использовал два метода.
Первый:
plot(mortality.model, which = 4)
Этот дает мне правильный ответ , Второй:
plot(cooks.distance(mortality.model), type = 'p')
identify(cooks.distance(mortality.model))
Этот дает мне неправильный ответ, но очень близок к правильному ответу.
Прочитайте набор данных:
df.mortality <- read.csv("mortality.csv", header = TRUE)
Постройте модель:
mortality.model <- lm(log(infant) ~ log(income))
Кстати, набор данных имеет значения NA. Если вы хотите увидеть набор данных, я могу отправить его вам по электронной почте.
Результат dput:
structure(list(X = structure(c(4L, 5L, 7L, 15L, 23L, 29L, 30L, 101L,
41L,43L, 46L, 61L, 62L, 66L, 73L, 79L, 86L, 87L, 10L, 97L, 2L, 25L, 38L,
39L, 40L, 52L, 65L, 75L, 100L, 3L, 9L, 18L, 19L, 21L, 24L, 32L, 33L, 42L,
45L, 50L, 55L, 58L, 63L, 68L, 71L, 77L, 83L, 89L, 93L, 94L, 99L, 103L,
105L, 8L, 14L, 20L, 26L, 27L, 31L, 36L, 44L, 47L, 80L, 51L, 59L, 69L, 70L,
72L, 88L, 91L, 95L, 81L, 1L, 6L,11L, 12L, 13L, 16L, 17L, 22L, 28L, 34L,
35L, 37L, 48L, 49L, 53L, 54L, 56L, 57L, 60L, 64L, 67L, 74L, 76L, 78L, 84L,
85L, 90L, 92L, 96L, 98L, 82L, 102L, 104L), .Label = c("Afganistan",
"Algeria", "Argentina", "Australia", "Austria", "Bangladesh","Belgium",
"Bolivia", "Brazil", "Britain", "Burma","Burundi","Cambodia","Cameroon",
"Canada", "Central.African.Republic", "Chad","Chile", "Colombia","Congo",
"Costa.Rica", "Dahomey", "Denmark", "Dominican.Republic", "Ecuador",
"Egypt", "El.Salvador", "Ethiopia", "Finland", "France", "Ghana",
"Greece", "Guatemala", "Guinea", "Haiti", "Honduras", "India",
"Indonesia", "Iran", "Iraq", "Ireland", "Israel", "Italy", "Ivory.Coast",
"Jamaica", "Japan", "Jordan", "Kenya", "Laos", "Lebanon", "Liberia",
"Libya", "Madagascar", "Malawi", "Malaysia", "Mali", "Mauritania",
"Mexico", "Moroco", "Nepal", "Netherlands", "New.Zealand", "Nicaragua",
"Niger", "Nigeria", "Norway", "Pakistan", "Panama", "Papua.New.Guinea",
"Paraguay", "Peru", "Philippines", "Portugal", "Rwanda", "Saudi.Arabia",
"Sierra.Leone", "Singapore", "Somalia", "South.Africa", "South.Korea",
"South.Vietnam", "Southern.Yemen", "Spain", "Sri.Lanka", "Sudan",
"Sweden", "Switzerland", "Syria", "Taiwan", "Tanzania", "Thailand",
"Togo", "Trinidad.and.Tobago", "Tunisia", "Turkey", "Uganda",
"United.States", "Upper.Volta", "Uruguay", "Venezuela", "West.Germany",
"Yemen", "Yugoslavia", "Zaire", "Zambia"), class = "factor"),
income = c(3426L, 3350L, 3346L, 4751L, 5029L, 3312L, 3403L,
5040L, 2009L, 2298L, 3292L, 4103L, 3723L, 4102L, 956L, 1000L,
5596L, 2963L, 2503L, 5523L, 400L, 250L, 110L, 1280L, 560L,
3010L, 220L, 1530L, 1240L, 1191L, 425L, 590L, 426L, 725L,
406L, 1760L, 302L, 2526L, 727L, 631L, 295L, 684L, 507L, 754L,
335L, 1268L, 1256L, 261L, 732L, 434L, 799L, 406L, 310L, 200L,
100L, 281L, 210L, 319L, 217L, 284L, 387L, 334L, 344L, 197L,
279L, 477L, 347L, 230L, 334L, 210L, 435L, 130L, 75L, 100L,
73L, 68L, 123L, 122L, 70L, 81L, 79L, 79L, 100L, 93L, 169L,
71L, 120L, 130L, 50L, 174L, 90L, 70L, 102L, 61L, 148L, 85L,
162L, 125L, 120L, 160L, 134L, 82L, 96L, 77L, 118L), infant = c(26.7,
23.7, 17, 16.8, 13.5, 10.1, 12.9, 20.4, 17.8, 25.7, 11.7,
11.6, 16.2, 11.3, 44.8, 71.5, 9.6, 12.8, 17.5, 17.6, 86.3,
78.5, 125, NA, 28.1, 300, 58, 650, 51.7, 59.6, 170, 78, 62.8,
54.4, 48.8, 27.8, 79.1, 22.1, 26.2, 13.6, 32, 60.9, 46, 34.1,
65.1, 20.4, 15.1, 19.1, 26.2, 76.3, 40.4, 43.3, 259, 60.4,
137, 180, 114, 58.2, 63.7, 39.3, 138, 21.3, 58, 159.2, 149,
10.2, 38.6, 67.9, 21.7, 27, 153, 100, 400, 124.3, 200, 150,
100, 190, 160, 109.6, 84.2, 216, NA, 60.6, 55, NA, 102, 148.3,
120, 187, NA, 200, 124.3, 132.9, 170, 158, 45.1, 129.4, 162.5,
127, 160, 180, 80, 50, 104), region = structure(c(3L, 4L,
4L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 1L, 4L,
4L, 4L, 2L, 1L, 2L, 3L, 3L, 3L, 1L, 1L, 3L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 4L, 2L, 3L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 3L, 4L,
3L, 2L, 1L, 2L, 4L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 3L,
3L, 1L, 1L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L,
1L, 1L, 1L, 1L, 1L, 2L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 3L, 1L,
3L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 1L), .Label = c("Africa",
"Americas", "Asia", "Europe"), class = "factor"), oil = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("no",
"yes"), class = "factor")), class = "data.frame", row.names = c(NA,
-105L))
Спасибо!
Вот результаты: Правильный ответ Неправильный ответ
Может ли кто-нибудь объяснить, почему это произошло?