Скользящее процентное изменение по группам с помощью dplyr / rollapply - PullRequest
1 голос
/ 28 февраля 2020

У меня есть некоторые данные, которые выглядят следующим образом:

# A tibble: 208 x 3
# Groups:   ID [2]
   ID    date       sales
   <chr> <date>     <dbl>
 1 KIM   2019-01-02  13.8
 2 KIM   2019-01-03  13.7
 3 KIM   2019-01-04  14.0
 4 KIM   2019-01-07  14.5

Я бы хотел group_by ID и создать новый столбец, в котором процентное соотношение sales превышало бы 10% последние 50 дней.

У меня есть следующий код:

d %>% 
  mutate(
  pct_change = (sales - Lag(sales)) / Lag(sales),
  pct_change_100 = (pct_change * 100),
  Moved_morethan_10perc = ifelse(pct_change > 10, 1, 0)
)

Однако это учитывает весь период. Я хотел бы использовать rollapply (или что-то подобное) для расчета на скользящей основе дней, в которые процентное изменение превышает 10% (скользящий период 50 дней)

Данные:

d <- structure(list(ID = c("KIM", "KIM", "KIM", "KIM", "KIM", "KIM", 
"KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", 
"KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", 
"KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", 
"KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", 
"KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", 
"KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", 
"KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", 
"KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", 
"KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", 
"KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", 
"KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "KIM", "LMT", 
"LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", 
"LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", 
"LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", 
"LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", 
"LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", 
"LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", 
"LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", 
"LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", 
"LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", 
"LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", 
"LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", "LMT", 
"LMT", "LMT", "LMT", "LMT"), date = structure(c(17898, 17899, 
17900, 17903, 17904, 17905, 17906, 17907, 17910, 17911, 17912, 
17913, 17914, 17918, 17919, 17920, 17921, 17924, 17925, 17926, 
17927, 17928, 17931, 17932, 17933, 17934, 17935, 17938, 17939, 
17940, 17941, 17942, 17946, 17947, 17948, 17949, 17952, 17953, 
17954, 17955, 17956, 17959, 17960, 17961, 17962, 17963, 17966, 
17967, 17968, 17969, 17970, 17973, 17974, 17975, 17976, 17977, 
17980, 17981, 17982, 17983, 17984, 17987, 17988, 17989, 17990, 
17991, 17994, 17995, 17996, 17997, 17998, 18001, 18002, 18003, 
18004, 18008, 18009, 18010, 18011, 18012, 18015, 18016, 18017, 
18018, 18019, 18022, 18023, 18024, 18025, 18026, 18029, 18030, 
18031, 18032, 18033, 18036, 18037, 18038, 18039, 18040, 18044, 
18045, 18046, 18047, 17898, 17899, 17900, 17903, 17904, 17905, 
17906, 17907, 17910, 17911, 17912, 17913, 17914, 17918, 17919, 
17920, 17921, 17924, 17925, 17926, 17927, 17928, 17931, 17932, 
17933, 17934, 17935, 17938, 17939, 17940, 17941, 17942, 17946, 
17947, 17948, 17949, 17952, 17953, 17954, 17955, 17956, 17959, 
17960, 17961, 17962, 17963, 17966, 17967, 17968, 17969, 17970, 
17973, 17974, 17975, 17976, 17977, 17980, 17981, 17982, 17983, 
17984, 17987, 17988, 17989, 17990, 17991, 17994, 17995, 17996, 
17997, 17998, 18001, 18002, 18003, 18004, 18008, 18009, 18010, 
18011, 18012, 18015, 18016, 18017, 18018, 18019, 18022, 18023, 
18024, 18025, 18026, 18029, 18030, 18031, 18032, 18033, 18036, 
18037, 18038, 18039, 18040, 18044, 18045, 18046, 18047), class = "Date"), 
    sales = c(13.764036, 13.735715, 13.990604, 14.481503, 14.783594, 
    14.859117, 14.915759, 14.981841, 14.991282, 15.255612, 15.425539, 
    15.633225, 15.623785, 15.529382, 15.368896, 15.482181, 15.765392, 
    16.067482, 16.275169, 16.369576, 16.058041, 16.199646, 16.303492, 
    16.454536, 16.407335, 16.633905, 16.832151, 16.813271, 16.737747, 
    16.813271, 16.690546, 16.766069, 16.784948, 16.577259, 16.643343, 
    16.737747, 16.699986, 16.662222, 16.50174, 16.605583, 16.360134, 
    16.275169, 16.567822, 16.275169, 16.416773, 16.350691, 16.567822, 
    16.690546, 16.813271, 16.888794, 16.671665, 16.548941, 16.624464, 
    16.709427, 17.153122, 17.11536, 17.134243, 17.530737, 17.389133, 
    17.596819, 17.464655, 17.426313, 17.560509, 17.426313, 17.416725, 
    17.68512, 17.541336, 17.407143, 17.694704, 17.512581, 17.483824, 
    17.148335, 16.640308, 16.506111, 16.803261, 16.371916, 16.764917, 
    17.014139, 17.004555, 17.071651, 16.601963, 16.669062, 16.67865, 
    17.244188, 17.445482, 17.522167, 17.215433, 17.244188, 17.263359, 
    17.579679, 17.340044, 17.455067, 17.359215, 17.38797, 17.340044, 
    17.215433, 17.483824, 17.38797, 17.263359, 17.426313, 17.042896, 
    16.582794, 16.697819, 16.67865, 256.459991, 250.017685, 256.760315, 
    259.705292, 261.439392, 262.08847, 268.831024, 269.053833, 
    268.840729, 267.377899, 263.880707, 270.090424, 274.033295, 
    270.855774, 274.65329, 276.338928, 278.072998, 278.877075, 
    282.926483, 282.558411, 280.640228, 281.502411, 289.436554, 
    290.105042, 290.46344, 290.754089, 292.081299, 290.666901, 
    294.609741, 294.910065, 291.596924, 297.632263, 295.646301, 
    297.603241, 294.997284, 297.535431, 296.411652, 296.324493, 
    297.971344, 301.903625, 301.962128, 297.298126, 298.342163, 
    295.961334, 295.658875, 293.648834, 297.015137, 294.595306, 
    296.088196, 291.268036, 288.965271, 290.419159, 289.404358, 
    288.799408, 291.75589, 286.877228, 286.984528, 287.765106, 
    286.818665, 288.42865, 292.878021, 296.907837, 295.746674, 
    290.058105, 292.399872, 295.239258, 298.244568, 295.336853, 
    291.863251, 298.17627, 302.186584, 301.220581, 302.537811, 
    302.967163, 306.635925, 307.611664, 325.01886, 322.111176, 
    320.891479, 320.364563, 320.618256, 325.243256, 323.799225, 
    324.482178, 325.965332, 327.575317, 322.872223, 326.072693, 
    331.126984, 333.068726, 327.233795, 328.014374, 326.043365, 
    331.156281, 329.790222, 333.712738, 335.605621, 333.019897, 
    331.819763, 330.561066, 329.692657, 331.107483, 333.66391, 
    332.46579)), row.names = c(NA, -208L), groups = structure(list(
    ID = c("KIM", "LMT"), .rows = structure(list(1:104, 105:208), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr"))), row.names = 1:2, class = c("tbl_df", "tbl", 
"data.frame"), .drop = FALSE), class = c("grouped_df", "tbl_df", 
"tbl", "data.frame"))

1 Ответ

2 голосов
/ 28 февраля 2020

После группировки по «ID» примените rollapplyr и передайте Lag в пределах FUN и выполните расчет

library(dplyr)
library(quantmod)
d %>% 
    group_by(ID) %>% 
    mutate(Moved_morethan_10perc  = replace_na(+(zoo::rollapplyr(sales, 
      width = 50,  partial = TRUE,  FUN = function(x) 
          (100 * (x - Lag(x))/Lag(x)) > 10)), 0))
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...