Вот полное решение, начиная с df
, воспроизводимый пример:
# load libraries
load_pkgs <- c("forecast", "zoo", "timetk", "tidyverse")
sapply(load_pkgs, function(x) suppressPackageStartupMessages(library(x, character.only = T)))
Шаг 1: Предварительная обработка
# perform split by group
ld <- split(df[, -1], df$group)
# Tidy-up the splits
ld <- lapply(ld, function(x) {
x %>%
gather(key, value, -year) %>%
unite(date, year, key, sep = "-") %>%
mutate(date = paste0(date, "-01")) %>%
mutate(date = as.Date(date, format = "%Y-%b-%d"))
})
dput
первые ts:
structure(list(date = structure(c(1096, 1461, 1826, 2191, 2557,
2922, 1127, 1492, 1857, 2222, 2588, 2953, 1155, 1520, 1885, 2251,
2616, 2981, 1186, 1551, 1916, 2282, 2647, 3012, 1216, 1581, 1946,
2312, 2677, 3042, 1247, 1612, 1977, 2343, 2708, 3073, 1277, 1642,
2007, 2373, 2738, 3103, 1308, 1673, 2038, 2404, 2769, 3134, 1339,
1704, 2069, 2435, 2800, 3165, 1369, 1734, 2099, 2465, 2830, 3195,
1400, 1765, 2130, 2496, 2861, 3226, 1430, 1795, 2160, 2526, 2891,
3256), class = "Date"), value = c(9007L, 7750L, 8162L, 7717L,
7792L, 7836L, 8106L, 6981L, 7306L, 7461L, 6957L, 6892L, 8928L,
8038L, 8124L, 7767L, 7726L, 7791L, 9137L, 8422L, 7870L, 7925L,
8106L, 8192L, 10017L, 8714L, 9387L, 8623L, 8890L, 9115L, 10826L,
9512L, 9556L, 8945L, 9299L, 9434L, 11317L, 10120L, 10093L, 10078L,
10625L, 10484L, 10744L, 9823L, 9620L, 9179L, 9302L, 9827L, 9713L,
8743L, 8285L, 8037L, 8314L, 9110L, 9938L, 9129L, 8466L, 8488L,
8850L, 9070L, 9161L, 8710L, 8160L, 7874L, 8265L, 8633L, 8927L,
8680L, 8034L, 8647L, 8796L, 9240L)), class = "data.frame", row.names = c(NA,
-72L))
Затем
# Transform time series to ts objects
ld <- lapply(ld, function(x) {
yr <- lubridate::year(min(x$date))
mth <- lubridate::month(min(x$date))
timetk::tk_ts(data = x, select = value, frequency = 12,
start = c(yr, mth))
})
dput
первые ts:
structure(c(9007L, 8106L, 8928L, 9137L, 10017L, 10826L, 11317L,
10744L, 9713L, 9938L, 9161L, 8927L, 7750L, 6981L, 8038L, 8422L,
8714L, 9512L, 10120L, 9823L, 8743L, 9129L, 8710L, 8680L, 8162L,
7306L, 8124L, 7870L, 9387L, 9556L, 10093L, 9620L, 8285L, 8466L,
8160L, 8034L, 7717L, 7461L, 7767L, 7925L, 8623L, 8945L, 10078L,
9179L, 8037L, 8488L, 7874L, 8647L, 7792L, 6957L, 7726L, 8106L,
8890L, 9299L, 10625L, 9302L, 8314L, 8850L, 8265L, 8796L, 7836L,
6892L, 7791L, 8192L, 9115L, 9434L, 10484L, 9827L, 9110L, 9070L,
8633L, 9240L), .Dim = c(72L, 1L), .Dimnames = list(NULL, "value"), index = structure(c(94694400,
97372800, 99792000, 102470400, 105062400, 107740800, 110332800,
113011200, 115689600, 118281600, 120960000, 123552000, 126230400,
128908800, 131328000, 134006400, 136598400, 139276800, 141868800,
144547200, 147225600, 149817600, 152496000, 155088000, 157766400,
160444800, 162864000, 165542400, 168134400, 170812800, 173404800,
176083200, 178761600, 181353600, 184032000, 186624000, 189302400,
191980800, 194486400, 197164800, 199756800, 202435200, 205027200,
207705600, 210384000, 212976000, 215654400, 218246400, 220924800,
223603200, 226022400, 228700800, 231292800, 233971200, 236563200,
239241600, 241920000, 244512000, 247190400, 249782400, 252460800,
255139200, 257558400, 260236800, 262828800, 265507200, 268099200,
270777600, 273456000, 276048000, 278726400, 281318400), tzone = "UTC", tclass = "Date"), .indexCLASS = "Date", tclass = "Date", .indexTZ = "UTC", tzone = "UTC", class = "ts", .Tsp = c(1973,
1978.91666666667, 12))
Шаг 2: Поезд и прогноз с ets
Youнужна вспомогательная функция для преобразования выходных данных во фрейм данных:
# helping function
make_df <- function(ts_obj) {
ts_df <- timetk::tk_tbl(preserve_index = TRUE, ts_obj) %>%
mutate(index = zoo::as.Date(x = .$index, frac = 0)) %>%
dplyr::rename(date = index)
return(ts_df)
}
Следующая функция обучает ets
и прогнозирует следующие 12 месяцев;затем он подготавливает таблицы с установленными и прогнозируемыми значениями:
lts <- lapply(ld, function(ts_obj) {
# train ets model and get fitted results
res_model <- ets(ts_obj, model = "ZZZ")
res_fit <- ts(as.numeric(res_model$fitted), start = start(ts_obj), frequency = 12)
# add extra metrics you may be interested in
model <- res_model[["method"]]
mse <- res_model[["mse"]]
# get forecasts for the next 12 months
res_fct <- forecast(res_model, h = 12)
res_fcst <- ts(res_fct$mean, start = end(ts_obj) + 1/12, frequency = 12)
# transform results to tbl
# for fitted output we keep the residuals and the 95% CI
res_fit_tbl <- make_df(res_fit) %>%
mutate(residuals = as.numeric(res_model[["residuals"]])) %>%
mutate(CI95_upper = value + 1.96*sqrt(res_model$sigma2),
CI95_lower = value - 1.96*sqrt(res_model$sigma2))
# the forecast output does not have residuals
res_fcst_tbl <- make_df(res_fcst)
return(list(res_fit_tbl = res_fit_tbl, res_fcst_tbl = res_fcst_tbl, model = model, mse = mse)) # don't forget to pass the extra metrics as output
})
Шаг 3: Объедините подготовленные и прогнозируемые результаты по различным группам
# add groups back + other metrics of interest
lts_all <- lapply(names(lts), function(x, lts) {
output_fit <- lts[[x]][["res_fit_tbl"]] %>%
mutate(group = x,
model = lts[[x]][["model"]],
mse = lts[[x]][["mse"]])
output_fcst <- lts[[x]][["res_fcst_tbl"]] %>%
mutate(group = x)
return(list(output_fit=output_fit, output_fcst=output_fcst))
}, lts)
Пример выходных данных:
> lts_all[[1]][["output_fit"]]
# A tibble: 72 x 6
date value residuals CI95_upper CI95_lower group
<date> <dbl> <dbl> <dbl> <dbl> <chr>
1 1973-01-01 8509. 498. 9083. 7936. 1
2 1973-02-01 8006. 99.5 8580. 7433. 1
3 1973-03-01 8864. 64.4 9437. 8290. 1
4 1973-04-01 9152. -15.2 9726. 8579. 1
5 1973-05-01 9939. 77.9 10513. 9365. 1
6 1973-06-01 10435. 391. 11009. 9861. 1
7 1973-07-01 11595. -278. 12168. 11021. 1
8 1973-08-01 10717. 26.9 11291. 10143. 1
9 1973-09-01 9641. 72.4 10214. 9067. 1
10 1973-10-01 10024. -85.7 10597. 9450. 1
# ... with 62 more rows
> lts_all[[2]][["output_fit"]]
# A tibble: 72 x 6
date value residuals CI95_upper CI95_lower group
<date> <dbl> <dbl> <dbl> <dbl> <chr>
1 1973-01-01 8509. 498. 9083. 7936. 2
2 1973-02-01 8006. 99.5 8580. 7433. 2
3 1973-03-01 8864. 64.4 9437. 8290. 2
4 1973-04-01 9152. -15.2 9726. 8579. 2
5 1973-05-01 9939. 77.9 10513. 9365. 2
6 1973-06-01 10435. 391. 11009. 9861. 2
7 1973-07-01 11595. -278. 12168. 11021. 2
8 1973-08-01 10717. 26.9 11291. 10143. 2
9 1973-09-01 9641. 72.4 10214. 9067. 2
10 1973-10-01 10024. -85.7 10597. 9450. 2
# ... with 62 more rows
Тогда
# bring together the fitted respectively forecasting results
output_fit_all <- lapply(lts_all, function(x) x[[1]])
output_fit_all <- bind_rows(output_fit_all)
output_fcst_all <- lapply(lts_all, function(x) x[[2]])
output_fcst_all <- bind_rows(output_fcst_all)