У меня есть некоторые данные, которые я хотел бы нормализовать, используя функцию прокрутки. Моя цель - сделать временные ряды трех переменных var1
, var2
и var3
сопоставимыми между собой.
Мой текущий код только масштабирует каждую переменную индивидуально на основе ее значений c min
и max
, однако я хотел бы использовать значения min
и max
из других переменных.
То есть, учитывая эту очень маленькую "мини" выборку данных, мой текущий метод будет принимать для var1
значение min
как 75.9
и значение max
как 76.8
и вычислите весы (согласно функции в коде). Для var2
будет использоваться значение min
в качестве 13
и значение max
в качестве 44
, а для var3
будет приниматься значение min
в качестве 24
и значение max
в качестве 59
.
# A tibble: 6 x 4
date var1 var2 var3
<date> <dbl> <dbl> <dbl>
1 2010-01-01 76.8 13 24
2 2010-01-02 76.4 31 46
3 2010-01-03 75.9 44 59
Однако разные переменные имеют разную величину, но я хочу иметь возможность сравнивать по переменным. Поэтому я хотел бы принять значение min
в качестве 13
и значение max
в качестве 76.8
и масштабировать все три переменные по этим числам. Я хотел бы делать это по мере поступления, обновляя ежедневно.
Используя приведенный ниже код, я хотел бы найти значения min
и max
в течение заданного 90
дневного периода, а затем вычислить нормализованный результат, продвиньтесь на 1 день и вычислите следующий нормализованный результат на 90
день на основе min
и max
этого периода.
Код:
normaliseFunction <- function(x){
(x - min(x) / max(x) - min(x))
}
library(dplyr)
normaliseWindowSize = 90
normalisedData <- data %>%
mutate(
var1Normed = zoo::rollapply(
var1, # normalise variable 1
width = normaliseWindowSize,
by = 1,
FUN = normaliseFunction,
align = "left",
fill = NA
),
var2Normed = zoo::rollapply(
var2, # normalise variable 2
width = normaliseWindowSize,
by = 1,
FUN = normaliseFunction,
align = "left",
fill = NA
),
var3Normed = zoo::rollapply(
var3, # normalise variable 3
width = normaliseWindowSize,
by = 1,
FUN = normaliseFunction,
align = "left",
fill = NA
)
)
Данные:
data <- structure(list(date = structure(c(14610, 14611, 14612, 14613,
14614, 14615, 14616, 14617, 14618, 14619, 14620, 14621, 14622,
14623, 14624, 14625, 14626, 14627, 14628, 14629, 14630, 14631,
14632, 14633, 14634, 14635, 14636, 14637, 14638, 14639, 14640,
14641, 14642, 14643, 14644, 14645, 14646, 14647, 14648, 14649,
14650, 14651, 14652, 14653, 14654, 14655, 14656, 14657, 14658,
14659, 14660, 14661, 14662, 14663, 14664, 14665, 14666, 14667,
14668, 14669, 14670, 14671, 14672, 14673, 14674, 14675, 14676,
14677, 14678, 14679, 14680, 14681, 14682, 14683, 14684, 14685,
14686, 14687, 14688, 14689, 14690, 14691, 14692, 14693, 14694,
14695, 14696, 14697, 14698, 14699, 14700, 14701, 14702, 14703,
14704, 14705, 14706, 14707, 14708, 14709, 14710, 14711, 14712,
14713, 14714, 14715, 14716, 14717, 14718, 14719, 14720, 14721,
14722, 14723, 14724, 14725, 14726, 14727, 14728, 14729, 14730,
14731, 14732, 14733, 14734, 14735, 14736, 14737, 14738, 14739,
14740, 14741, 14742, 14743, 14744, 14745, 14746, 14747, 14748,
14749, 14750, 14751, 14752, 14753, 14754, 14755, 14756, 14757,
14758, 14759, 14760, 14761, 14762, 14763, 14764, 14765, 14766,
14767, 14768, 14769, 14770, 14771, 14772, 14773, 14774, 14775,
14776, 14777, 14778, 14779, 14780, 14781, 14782, 14783, 14784,
14785, 14786, 14787, 14788, 14789, 14790, 14791, 14792, 14793,
14794, 14795, 14796, 14797, 14798, 14799, 14800, 14801, 14802,
14803, 14804, 14805, 14806, 14807, 14808, 14809, 14810, 14811,
14812, 14813, 14814, 14815, 14816, 14817, 14818, 14819, 14820,
14821, 14822, 14823, 14824, 14825, 14826, 14827, 14828, 14829,
14830, 14831, 14832, 14833, 14834, 14835, 14836, 14837, 14838,
14839, 14840, 14841, 14842, 14843, 14844, 14845, 14846, 14847,
14848, 14849, 14850, 14851, 14852, 14853, 14854, 14855, 14856,
14857, 14858, 14859, 14860, 14861, 14862, 14863, 14864, 14865,
14866, 14867, 14868, 14869, 14870, 14871, 14872, 14873, 14874,
14875, 14876, 14877, 14878, 14879, 14880, 14881, 14882, 14883,
14884, 14885, 14886, 14887, 14888, 14889, 14890, 14891, 14892,
14893, 14894, 14895, 14896, 14897, 14898, 14899, 14900, 14901,
14902, 14903, 14904, 14905, 14906, 14907, 14908, 14909, 14910,
14911, 14912, 14913, 14914, 14915, 14916, 14917, 14918, 14919,
14920, 14921, 14922, 14923, 14924, 14925, 14926, 14927, 14928,
14929, 14930, 14931, 14932, 14933, 14934, 14935, 14936, 14937,
14938, 14939, 14940, 14941, 14942, 14943, 14944, 14945, 14946,
14947, 14948, 14949, 14950, 14951, 14952, 14953, 14954, 14955,
14956, 14957, 14958, 14959, 14960, 14961, 14962, 14963, 14964,
14965, 14966, 14967, 14968, 14969, 14970, 14971, 14972, 14973,
14974), class = "Date"), var1 = c(76.7710873995529, 76.3531164480543,
75.935145496561, 75.5171745450677, 75.0992035935744, 74.6812326420812,
74.2632616905879, 73.8452907390946, 73.4273197876013, 73.009348836108,
72.5913778846148, 72.1734069331215, 71.7554359816282, 71.337465030135,
77, 67, 51, 56, 52, 39, 82, 81, 46, 38, 25, 30, 33, 70, 79, 23,
22, 43, 78, 93, 75, 31, 28, 65, 53, 33, 55, 28, 46, 25, 39, 50,
45, 32, 47, 54, 30, 45, 43, 33, 21, 33, 42, 40, 24, 27, 35, 35,
43, 32, 18, 26, 17, 30, 32.1911583173478, 35.8040286890933, 44,
37, 24, 67, 68, 59, 75, 62, 42, 40, 54, 58, 41, 32, 21, 23, 38,
36, 19, 19, 17, 24, 20, 24, 28, 59, 33, 30, 29, 30, 40, 25, 30,
33, 42, 45, 38, 41, 45, 52, 57, 31, 52, 43, 42, 47, 52, 42, 42,
41, 21, 18, 18, 15, 23, 33, 38, 20, 17, 24, 38, 27, 26, 26, 15,
13, 25, 33, 26, 24, 42, 41, 29, 31, 25, 25, 32, 23, 20, 25, 26,
27, 25, 22, 29, 33, 24, 28, 24, 33, 19, 20, 24, 12, 23, 17, 35,
42, 40, 16, 9, 29, 39, 42, 39, 38, 21, 20, 26, 28, 37, 43, 54,
22, 22, 25, 36, 45, 49, 36, 26, 35, 39, 37, 24, 29.4381452808882,
29.5843833126788, 29.7306213444694, 29.87685937626, 30.0230974080506,
30.1693354398412, 30.3155734716318, 30.4618115034224, 30.608049535213,
30.7542875670036, 30.9005255987942, 31.0467636305848, 31.1930016623754,
31.339239694166, 31.4854777259566, 31.6317157577472, 31.7779537895378,
31.9241918213284, 32.070429853119, 32.2166678849096, 32.3629059167003,
32.5091439484909, 32.6553819802815, 32.8016200120721, 32.9478580438627,
33.0940960756533, 33.2403341074439, 33.3865721392345, 33.5328101710251,
33.6790482028157, 33.8252862346063, 33.9715242663969, 34.1177622981875,
33, 38, 34, 37, 32, 22, 29, 28, 47, 56, 28, 15, 13, 30, 39, 32,
44, 44, 49, 49, 56, 19, 21, 37, 47, 48, 36, 37, 64, 61, 50, 35,
24, 30, 55, 63, 60, 54, 22, 13, 21, 43, 53, 47, 61, 64, 57, 28,
27, 54, 68, 53, 54, 37, 26, 55, 24, 50, 55, 55, 32, 15, 45, 78,
87, 76, 88, 74, 47, 21, 73, 92, 108, 76, 24, 16, 15, 48, 83,
61, 73, 64, 39, 23, 15, 36, 42, 72, 57, 47, 30, 65, 36, 46, 68,
29, 23, 47, 84, 67, 59, 45, 41, 50, 56, 58, 36, 43, 50, 64, 53,
36, 45, 34, 53, 63, 50, 55, 89, 52, 28, 65, 95, 81, 28, 57, 63,
60, 38, 39, 41, 65, 90, 90, 76, 67, 57), var2 = c(13, 31, 44,
50, 36, 22, 30, 22, 21, 37, 57, 55, 54, 19, 53, 46, 33, 44, 44,
27, 60, 71, 44, 30, 25, 29, 30, 58, 49, 19, 18, 35, 46, 62, 56,
23, 25, 47, 40, 27, 33, 25, 37, 22, 24, 50, 55, 31, 43, 41, 23,
41, 35, 33, 23, 31, 30, 35, 13, 21, 35, 32, 30, 38, 31, 27, 21,
34, 35, 35, 33, 34, 23, 59, 46, 35, 52, 44, 25, 23, 33, 37, 21,
23, 18, 14, 28, 30, 16, 13, 9, 14, 16, 17, 22, 31, 21, 23, 20,
17, 19, 23, 28, 34, 33, 33, 27, 18, 28, 35, 40, 23, 32, 30, 29,
32, 39, 22, 22, 28, 13, 13, 12, 11, 17, 27, 27, 16, 12, 14, 24,
14, 19, 19, 10, 12, 23, 33, 20, 20, 31, 26, 16, 19, 12, 18, 30,
28, 25, 33, 35, 32, 36, 25, 33, 33, 24, 23, 24, 28, 21, 21, 30,
20, 35, 21, 34, 39, 46, 21, 14, 29, 39, 47, 34, 36, 24, 26, 30,
25, 25, 25, 25, 24, 23, 22, 22, 23, 22, 22, 28, 35, 37, 32, 24,
34, 53, 24, 21, 47, 32, 25, 24, 21, 19, 15, 22, 33, 52, 32, 29,
30, 27, 28, 19, 25, 21, 28, 27, 38, 36, 42, 34, 36, 15, 15, 15,
17, 27, 36, 37, 37, 44, 23, 27, 26, 47, 54, 28, 20, 20, 37, 36,
30, 46, 36, 43, 47, 54, 18, 20, 35, 45, 50, 47, 38, 55, 55, 43,
40, 26, 29, 38, 30, 52, 51, 25, 20, 24, 44, 49, 42, 58, 67, 61,
32, 24, 49, 67, 44, 43, 34, 27, 46, 24, 42, 53, 53, 38, 22, 44,
77, 79, 69, 81, 65, 57, 29, 64, 75, 94, 60, 27, 20, 21, 45, 78,
59, 56, 57, 33, 27, 18, 34, 45, 62, 52, 44, 31, 51, 41, 45, 59,
32, 26, 38, 56, 60, 59, 48, 49, 52, 62, 53, 37, 39, 43, 60, 61,
33, 51, 35, 52, 51, 44, 52, 75, 51, 35, 53, 85, 69, 38, 50, 63,
54, 40, 35, 47, 57, 74, 75, 69, 60, 50), var3 = c(24, 46, 59,
63, 54, 35, 43, 39, 40, 47, 68, 69, 68, 38, 74, 66, 56, 62, 57,
44, 80, 93, 68, 52, 44, 45, 52, 80, 76, 46, 39, 62, 74, 93, 75,
36, 48, 66, 54, 52, 54, 54, 63, 44, 53, 64, 68, 38, 62, 61, 40,
54, 45, 43, 29, 44, 46, 51, 31, 32, 53, 44, 46, 54, 51, 42, 43,
56, 64, 63, 57, 57, 48, 86, 72, 66, 83, 65, 48, 44, 48, 56, 33,
32, 28, 33, 52, 41, 26, 30, 33, 30, 32, 38, 48, 56, 49, 50, 50,
48, 52, 43, 47, 48, 53, 48, 43, 33, 42, 59, 68, 33, 55, 49, 47,
51, 68, 48, 40, 51, 29, 34, 29, 31, 39, 44, 48, 28, 25, 25, 40,
25, 34, 39, 24, 29, 49, 59, 49, 50, 65, 55, 40, 39, 24, 30, 42,
38, 39, 41, 44, 45, 45, 41, 55, 51, 31, 31, 25, 28, 21, 23, 36,
33, 44, 33, 46, 56, 62, 40, 33, 39, 52, 61, 54, 56, 46, 47, 49,
49, 61, 61, 77, 37, 54, 56, 48, 62, 76, 48, 50, 46, 44, 39, 30,
47, 63, 50, 34, 59, 43, 33, 29, 38, 34, 30, 40, 49, 71, 57, 53,
52, 39, 41, 38, 41, 38, 49, 46, 54, 45, 59, 60, 53, 31, 28, 29,
27, 35, 40, 47, 55, 59, 39, 29, 29, 60, 64, 45, 45, 35, 56, 48,
42, 55, 50, 65, 64, 63, 20, 34, 49, 64, 65, 67, 54, 74, 74, 48,
51, 43, 40, 56, 56, 62, 60, 42, 39, 36, 52, 66, 53, 67, 73, 75,
39, 33, 58, 77, 56, 54, 44, 29, 58, 40, 55, 66, 67, 58, 35, 57,
84, 94, 87, 92, 81, 65, 44, 70, 94, 111, 66, 33, 31, 37, 59,
85, 78, 71, 64, 44, 29, 23, 44, 50, 74, 64, 50, 44, 58, 45, 54,
66, 40, 31, 48, 66, 75, 71, 64, 67, 65, 68, 58, 38, 50, 55, 73,
73, 54, 57, 45, 66, 66, 64, 74, 97, 68, 54, 68, 94, 79, 45, 55,
70, 62, 51, 50, 52, 69, 88, 95, 92, 75, 59)), row.names = c(NA,
-365L), class = c("tbl_df", "tbl", "data.frame"))