Прогнозируйте конкретное c значение x после регрессии лёсса для каждой группы с помощью dplyr :: group_by () - PullRequest
1 голос
/ 04 августа 2020

Я просто хотел бы включить функцию predict, чтобы предсказать значение x на основе постоянного значения y. В этом примере я сделал distance измерений для групп cat и dog сверх time. До сих пор я построил модель для каждой группы с подобранными значениями и извлек предсказанные значения в эти наблюдаемые моменты времени. Там, где я застрял, я использую эти модели для прогнозирования постоянного значения y (measurement), равного 70 , для которого я точно не проводил измерения. То есть, я хотел бы знать, сколько времени (time) потребовалось как cat, так и dog, чтобы достичь measurement == 70.

Код на данный момент:

library(dplyr)
library(tidyr)
library(purrr)

model <- df %>%
    nest(-sample) %>%
    drop_na() %>%
    group_by(sample) %>%
    mutate(m = purrr::map(data, loess,                 # Perform loess calculation on each sample_long group
           formula = measurement ~ time, span = 0.1),  # Make span as small as possible in order to draw the nearest straighest line
           fitted = purrr::map(m, `[[`, "fitted"))     # Retrieve the fitted values from each model

# Create prediction column
results <- model %>%
        dplyr::select(-m) %>%
        tidyr::unnest(cols = c(data, fitted))

Воспроизводимый код:

df <- structure(list(time = c(5.4919, 5.9919, 6.4919, 6.9919, 7.4919, 
7.9919, 8.4919, 8.9919, 9.4919, 9.9919, 10.4919, 10.9919, 11.4919, 
11.9919, 12.4919, 12.9919, 13.4919, 13.9919, 14.4919, 14.9919, 
15.4919, 15.9919, 16.4919, 16.9919, 17.4919, 17.9919, 18.4919, 
18.9919, 19.4919, 19.9919, 20.4919, 20.9919, 21.4919, 21.9919, 
22.4919, 22.9919, 23.4919, 23.9919, 24.4919, 24.9919, 25.4919, 
25.9919, 26.4919, 26.9919, 27.4919, 27.9919, 28.4919, 28.9919, 
29.4919, 29.9919, 30.4919, 30.9919, 31.4919, 31.9919, 32.4919, 
32.9919, 33.4919, 33.9919, 34.4919, 34.9919, 35.4919, 35.9919, 
36.4919, 36.9919, 37.4919, 37.9919, 38.4919, 38.9919, 39.4919, 
39.9919, 40.4919, 40.9919, 41.4919, 41.9919, 42.4919, 42.9919, 
43.4919, 43.9919, 44.4919, 44.9919, 45.4919, 45.9919, 46.4919, 
46.9919, 47.4919, 47.9919, 48.4919, 48.9919, 49.4919, 49.9919, 
50.4919, 50.9919, 51.4919, 51.9919, 52.4919, 52.9919, 53.4919, 
53.9919, 54.4919, 54.9919, 55.4919, 55.9919, 56.4919, 56.9919, 
57.4919, 57.9919, 58.4919, 58.9919, 59.4919, 59.9919, 60.4919, 
60.9919, 61.4919, 61.9919, 62.4919, 62.9919, 63.4919, 63.9919, 
64.4919, 64.9919, 65.4919, 65.9919, 66.4919, 66.9919, 67.4919, 
67.9919, 68.4919, 68.9919, 69.4919, 69.9919, 70.4919, 70.9919, 
71.4919, 71.9919, 5.4919, 5.9919, 6.4919, 6.9919, 7.4919, 7.9919, 
8.4919, 8.9919, 9.4919, 9.9919, 10.4919, 10.9919, 11.4919, 11.9919, 
12.4919, 12.9919, 13.4919, 13.9919, 14.4919, 14.9919, 15.4919, 
15.9919, 16.4919, 16.9919, 17.4919, 17.9919, 18.4919, 18.9919, 
19.4919, 19.9919, 20.4919, 20.9919, 21.4919, 21.9919, 22.4919, 
22.9919, 23.4919, 23.9919, 24.4919, 24.9919, 25.4919, 25.9919, 
26.4919, 26.9919, 27.4919, 27.9919, 28.4919, 28.9919, 29.4919, 
29.9919, 30.4919, 30.9919, 31.4919, 31.9919, 32.4919, 32.9919, 
33.4919, 33.9919, 34.4919, 34.9919, 35.4919, 35.9919, 36.4919, 
36.9919, 37.4919, 37.9919, 38.4919, 38.9919, 39.4919, 39.9919, 
40.4919, 40.9919, 41.4919, 41.9919, 42.4919, 42.9919, 43.4919, 
43.9919, 44.4919, 44.9919, 45.4919, 45.9919, 46.4919, 46.9919, 
47.4919, 47.9919, 48.4919, 48.9919, 49.4919, 49.9919, 50.4919, 
50.9919, 51.4919, 51.9919, 52.4919, 52.9919, 53.4919, 53.9919, 
54.4919, 54.9919, 55.4919, 55.9919, 56.4919, 56.9919, 57.4919, 
57.9919, 58.4919, 58.9919, 59.4919, 59.9919, 60.4919, 60.9919, 
61.4919, 61.9919, 62.4919, 62.9919, 63.4919, 63.9919, 64.4919, 
64.9919, 65.4919, 65.9919, 66.4919, 66.9919, 67.4919, 67.9919, 
68.4919, 68.9919, 69.4919, 69.9919, 70.4919, 70.9919, 71.4919, 
71.9919), measurement_type = structure(c(1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "distance", class = "factor"), 
    measurement = c(27.3, 27.7, 28.3, 29.1, 30, 31.1, 32.3, 33.6, 
    34.8, 36.2, 37.6, 39.2, 40.9, 42.6, 44.5, 46.4, 48.6, 50.7, 
    53.1, 55.6, 58.2, 60.9, 63.5, 66.4, 69.1, 72, 74.7, 77.2, 
    79.5, 82.3, 85, 87.4, 89.6, 91.8, 91.7, 92.5, 92.5, 92.7, 
    92.5, 92.2, 91.9, 91.7, 91.5, 91.2, 91, 90.8, 90.7, 90.6, 
    90.4, 90.4, 90.3, 90.2, 90.2, 90.2, 90.1, 90.1, 90.1, 90.1, 
    90.1, 90.1, 90.1, 90.1, 90.1, 90.1, 90.1, 90.1, 90.1, 90.1, 
    90.2, 90.2, 90.2, 90.2, 90.2, 90.2, 90.2, 90.2, 90.3, 90.3, 
    90.3, 90.2, 90.3, 90.3, 90.3, 90.3, 90.3, 90.3, 90.3, 90.3, 
    90.3, 90.3, 90.3, 90.2, 90.2, 90.2, 90.2, 90.2, 90.2, 90.1, 
    90.1, 90.1, 90.1, 90.1, 90, 90, 90, 89.9, 89.9, 89.8, 89.8, 
    89.7, 89.7, 89.7, 89.6, 89.5, 89.5, 89.4, 89.4, 89.4, 89.3, 
    89.2, 89.2, 89.1, 89.1, 89, 88.9, 88.9, 88.9, 88.7, 88.7, 
    88.7, 88.6, 88.6, 88.5, 88.5, 29.6, 31.5, 33.5, 35.8, 38.3, 
    40.8, 43.2, 45.5, 47.8, 50, 52.1, 54.3, 56.3, 58.3, 60.3, 
    62.2, 64, 66, 67.8, 69.7, 71.4, 73.3, 74.9, 76.6, 78.3, 79.7, 
    81.2, 82.6, 83.9, 85.2, 86.4, 87.6, 88.7, 89.9, 90.7, 91.7, 
    92.5, 93.2, 93.9, 94.4, 94.9, 95.2, 95.5, 95.7, 95.7, 95.7, 
    95.7, 95.6, 95.6, 95.6, 95.5, 95.6, 95.5, 95.5, 95.5, 95.5, 
    95.6, 95.6, 95.6, 95.7, 95.7, 95.7, 95.8, 95.8, 95.8, 95.8, 
    95.8, 95.9, 95.9, 95.9, 95.9, 96, 96, 96, 96.1, 96, 96, 96, 
    96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 
    95.9, 96, 95.9, 95.9, 95.8, 95.8, 95.8, 95.8, 95.8, 95.9, 
    95.7, 95.7, 95.6, 95.6, 95.6, 95.5, 95.6, 95.4, 95.4, 95.4, 
    95.3, 95.2, 95.3, 95.2, 95.2, 95.1, 95.1, 95.1, 95, 95, 94.9, 
    94.9, 94.9, 94.9, 94.8, 94.7, 94.6, 94.6, 94.6, 94.5, 94.6
    ), sample = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L), .Label = c("cat", "dog"), class = "factor")), row.names = c(NA, 
-268L), class = "data.frame")

Я дошел до этого, следуя этому вопросу SO: регрессия лёсса для каждой группы с dplyr :: group_by ()

1 Ответ

1 голос
/ 04 августа 2020

Похоже, я делаю это слишком простым, но разве вы не хотите ...

map(model$m, ~ predict(.x, newdata = 70))
[[1]]
[1] 88.66499

[[2]]
[1] 94.66321

Изменение направления предсказания на противоположное, поскольку оно двумерное

library(dplyr)
library(purrr)
library(tidyr)
model <- df %>%
  nest(-sample) %>%
  drop_na() %>%
  group_by(sample) %>%
  mutate(m = purrr::map(data, loess,                 # Perform loess calculation on each sample_long group
                        formula = time ~ measurement, span = 0.25),  # Make span as small as possible in order to draw the nearest straighest line
         fitted = purrr::map(m, `[[`, "fitted"))     # Retrieve the fitted values from each model
#> Warning: Problem with `mutate()` input `m`.
#> x pseudoinverse used at 90.2
#> ℹ Input `m` is `purrr::map(data, loess, formula = time ~ measurement, span = 0.25)`.


names(model$m) <-  model$sample
map(model$m, ~ predict(.x, newdata = 70))
#> $cat
#> [1] 17.08772
#> 
#> $dog
#> [1] 15.03579
...