Прогноз Холта Уинтера занимает очень много времени, чтобы дать результат в R studio - PullRequest
0 голосов
/ 04 ноября 2018

Я использовал «функцию Холта-Уинтера» в R, прогнозируя большой набор данных.

Моей R studio требуется более 1 часа, чтобы выдать 10 000 точек прогноза.

Могу ли я узнать, как мне добавить код R, чтобы вместо ожидания в течение очень длительного периода я мог получать результаты прогнозирования Холта Винтера в течение короткого периода?

Я пытался прогнозировать 10 000 пунктов с помощью ETS. С ETS проблем нет, и я могу получить результат в течение 30 минут или короче. Но выход Holt Winter занимает очень много времени.

 ``` r

# Loading Libraries -------------------------------------------------------
library(zoo)
#> Warning: package 'zoo' was built under R version 3.5.1
#> 
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#> 
#>     as.Date, as.Date.numeric
library(forecast)
#> Warning: package 'forecast' was built under R version 3.5.1
library(ggplot2)
#> Warning: package 'ggplot2' was built under R version 3.5.1
library(xts)
#> Warning: package 'xts' was built under R version 3.5.1
library(fpp2)
#> Warning: package 'fpp2' was built under R version 3.5.1
#> Loading required package: fma
#> Warning: package 'fma' was built under R version 3.5.1
#> Loading required package: expsmooth
#> Warning: package 'expsmooth' was built under R version 3.5.1
library(tidyverse) 
#> Warning: package 'tidyverse' was built under R version 3.5.1
#> Error: package or namespace load failed for 'tidyverse' in loadNamespace(j <- i[[1L]], c(lib.loc, .libPaths()), versionCheck = vI[[j]]):
#>  there is no package called 'glue'
library(data.table) 
#> 
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:xts':
#> 
#>     first, last
library(dplyr)
#> Error: package or namespace load failed for 'dplyr' in loadNamespace(j <- i[[1L]], c(lib.loc, .libPaths()), versionCheck = vI[[j]]):
#>  there is no package called 'glue'
library(readxl)
library(googlesheets)
#> Warning: package 'googlesheets' was built under R version 3.5.1
#> Error: package or namespace load failed for 'googlesheets' in loadNamespace(j <- i[[1L]], c(lib.loc, .libPaths()), versionCheck = vI[[j]]):
#>  there is no package called 'glue'

#read two csv file from google drive.
D1103orgdata <-"https://docs.google.com/spreadsheets/d/e/2PACX-1vSDZwdXrLWmtglt-audp83HEDBvIqbJYOuk4FcsKLYCGCY7s3666xIWcsTd3crpmkh1zMN9jBDHpInr/pub?gid=1123672661&single=true&output=csv"

D1103interpolated <- "https://docs.google.com/spreadsheets/d/e/2PACX-1vSUI423Hyq6Ulg7a2huyyWw10r73OFm4ybUMRBvn6zxTThtijD1wf9FF8qRmtsZrdPR5_VMoc13DPu0/pub?gid=522057281&single=true&output=csv"

#time diff= 1minute
to.minute <- function(x) as.POSIXct(trunc(as.POSIXct(x, origin = "1970-01-01"), "mins"))

z <- read.csv.zoo(url(D1103orgdata), FUN = to.minute, aggregate = function(x) tail(x, 1))
zz <- na.approx(as.zoo(as.ts(z)))
time(zz) <- as.POSIXct(time(zz), origin = "1970-01-01")
time(z) <- as.POSIXct(time(z), origin = "1970-01-01")
z.interpolated <- as.data.frame(merge(zz, zoo(, time(zz))))

df3 <- data.table::fread(D1103interpolated,blank.lines.skip = TRUE)

# Forecasting D1103-to test & train known data ------------------------------------------
start_time <- as.POSIXct("2015-10-27 19:50")
end_time   <- as.POSIXct("2015-12-31 23:59")
mytsTT2 <- ts(zoo(
  z.interpolated$meter_value,
  order.by = seq.POSIXt(start_time, end_time, by = "mins"),
  frequency = 10080))


# Plotting-forecasting for test & train data ------------------------------
ActualData2Plot <-ts(df3[1:171651,3]) # plot all existing data

# for Accuracy test -------------------------------------------------------------
ActualData <-ts(df3[93851:171651,3])  #1/1/2016 0:00 (refer to csv column number)

# HW-forecasting test and train data --------------------------------------
#Holt-winter
hw <- HoltWinters(mytsTT2, beta = TRUE, gamma = FALSE)

#fcHW <- forecast(hw, 77801) 
# **forecasting HW takes 10hours to get the output**. 

#output of fcHW can be view in this efile:
D1103Read <-"https://docs.google.com/spreadsheets/d/e/2PACX-1vSuJLKwLeqC2z-ha0AtD_Q6_WlXuOeJ2LWcqxZAHkDyDuSNE5S7fwgSP45mth9JoTia3_CWNqCXafzG/pub?output=csv"
D1103fcHW <- data.table::fread(D1103Read,blank.lines.skip = TRUE) 
D1103fcHW
#>            V1 Point.Forecast    Lo.80    Hi.80     Lo.95    Hi.95
#>     1:  93851       187283.0 187283.0 187283.1 187283.04 187283.1
#>     2:  93852       187283.3 187283.3 187283.3 187283.28 187283.3
#>     3:  93853       187283.6 187283.5 187283.6 187283.53 187283.6
#>     4:  93854       187283.8 187283.8 187283.9 187283.77 187283.9
#>     5:  93855       187284.1 187284.0 187284.1 187284.00 187284.2
#>    ---                                                           
#> 77797: 171647       207468.0 117082.5 297853.4  69235.30 345700.6
#> 77798: 171648       207468.2 117081.0 297855.4  69232.89 345703.5
#> 77799: 171649       207468.5 117079.5 297857.4  69230.49 345706.5
#> 77800: 171650       207468.7 117078.0 297859.4  69228.08 345709.4
#> 77801: 171651       207469.0 117076.5 297861.4  69225.68 345712.3
View(D1103fcHW)

# Plotting-forecasting with HoltWinter for test & train data ------------------------------
#fcHW %>% autoplot()+autolayer(ActualData2)+scale_y_continuous(labels = scales::format_format(scientific = FALSE)) +ylab("Meter_value")

#accuracy(as.numeric(fcHW$mean),as.numeric(ActualData))
# ME     RMSE      MAE       MPE    MAPE
# Test set -3889.856 4890.119 3890.256 -1.989017 1.98923
```

<sup>Created on 2018-11-06 by the [reprex package](https://reprex.tidyverse.org) (v0.2.1.9000)</sup>
...