с большой помощью, ребята, я смог создать следующий рабочий процесс:
results_2018 <- list_of_objects %>%
map(~.x[, seq(from=1, to=length(unique(names(.x))))]) %>%
map(~dplyr::arrange(.x, desc(Germany))) %>%
map(~dplyr::top_n(.x, 10, Germany)) %>%
map(~rename(.x, "Answers" = "Answer.Options"))
results_2019 <- list_of_objects_2 %>%
map(~.x[, seq(from=1, to=length(unique(names(.x))))]) %>%
map(~dplyr::arrange(.x, desc(Germany))) %>%
map(~dplyr::top_n(.x, 10, Germany)) %>%
map(~rename(.x, "Answers" = "Data.Points"))
Получить два года, каждый из которых содержит список фреймов данных, сделать некоторые манипуляции с данными.
map2(results_2018, results_2019, ~
full_join(.x %>% select(Answers, Austria),
.y %>% select(Answers, Austria),
by = "Answers") %>%
mutate(Difference = Austria.y - Austria.x) %>%
rename_at(vars(contains(".x")),
~str_replace(., ".x", "_2018")) %>%
rename_at(vars(contains(".y")),
~str_replace(., ".y", "_2019")) %>%
set_names(c("Answers", "Austria_2018", "Austria_2019"
,"Difference")))
Объедините их всех вместе, чтобы получить новый набор данных. Это работает, но было бы действительно полезно вставить это в один код purrr
и dplyr
.
purrr::map(list_obj, ~map(.x, function(x)
x[, seq(from=1, to=length(unique(names(x))))] %>%
dplyr::arrange(desc(Germany)) %>%
dplyr::top_n(10, Germany) %>%
dplyr::rename(Answers = 1) %>%
map2(.x, .y, ~full_join(.x %>% select(Answers, Austria),
.y %>% select(Answers, Austria),
by = "Answers")
%>%
mutate(Difference = Austria.y - Austria.x) %>%
rename_at(vars(contains(".x")),
~str_replace(., ".x", "_2018")) %>%
rename_at(vars(contains(".y")),
~str_replace(., ".y", "_2019")) %>%
set_names(c("Answers", "Austria_2018", "Austria_2019"
,"Difference")))
))
Это моя попытка, но map2
внутри функции map
кажется не работать таким образом. Есть ли способ получить доступ к results_2018
и results_2019
, не сохраняя их в переменной?
Пример Данные, созданные с помощью следующего кода:
results_2019 <- list_of_objects_2 %>%
map(~.x[, seq(from=1, to=length(unique(names(.x))))]) %>%
map(~dplyr::arrange(.x, desc(Germany))) %>%
map(~dplyr::top_n(.x, 2, Germany)) %>%
map(~dplyr::rename(.x, Answers = 1))
list(df2_A = structure(list(Answers = c("45 to 54", "35 to 44"
), Austria = c(23.4, 20.7), Belgium = c(21.6, 21.4), Denmark = c(22.6,
20.3), France = c(20.9, 22.5), Germany = c(24.2, 21.9), Italy = c(19.1,
24.2), Netherlands = c(22.3, 21), Poland = c(16.9, 22.2), Romania = c(18.7,
24.1), Russia = c(20, 23.9), Spain = c(20.9, 26.9), Sweden = c(20.6,
20), Switzerland = c(23.6, 20.8), UK = c(21.3, 22.2), USA = c(20.6,
20.4)), row.names = c(NA, -2L), class = "data.frame"), df2_B = structure(list(
Answers = c("PC / Laptop", "Smartphone"), Austria = c(88.8,
94.7), Belgium = c(87.9, 82.5), Denmark = c(76.8, 93.5),
France = c(88.9, 83.3), Germany = c(91.5, 86.7), Italy = c(82.2,
91), Netherlands = c(88.5, 85.7), Poland = c(89.8, 87.3),
Romania = c(88, 92.7), Russia = c(89.2, 85.8), Spain = c(88.4,
94), Sweden = c(83.5, 89.8), Switzerland = c(86.7, 94.2),
UK = c(86.6, 87.3), USA = c(84.8, 84.9)), row.names = c(NA,
-2L), class = "data.frame"), df2_C = structure(list(Answers = c("Personal PC / Laptop",
"Smartphone"), Austria = c(84.8, 88.1), Belgium = c(86.3, 72.5
), Denmark = c(78, 85.1), France = c(90.6, 61.4), Germany = c(91.8,
64.4), Italy = c(87.3, 74.5), Netherlands = c(88.5, 65.6), Poland = c(91.9,
68.8), Romania = c(86.5, 86.8), Russia = c(88.3, 68.3), Spain = c(89.7,
78.5), Sweden = c(86.1, 77.6), Switzerland = c(83.8, 85.7), UK = c(88.8,
70.8), USA = c(88.5, 67.6)), row.names = c(NA, -2L), class = "data.frame"),
df2_D = structure(list(Answers = c("Schooling until age 18",
"University degree"), Austria = c(15.1, 30.3), Belgium = c(31.6,
28.1), Denmark = c(22.1, 24), France = c(40.8, 25.3), Germany = c(41.5,
23.8), Italy = c(53.9, 19.8), Netherlands = c(16.1, 28.3),
Poland = c(31.1, 33.7), Romania = c(42.8, 16.6), Russia = c(9.8,
52.6), Spain = c(21.6, 32.6), Sweden = c(41, 31.4), Switzerland = c(10.1,
29.4), UK = c(24.1, 29.9), USA = c(25.2, 29.7)), row.names = c(NA,
-2L), class = "data.frame"), df2_E = structure(list(Answers = c("Male",
"Female"), Austria = c(50.6, 49.4), Belgium = c(50.2, 49.8
), Denmark = c(50.3, 49.7), France = c(49.5, 50.5), Germany = c(51.3,
48.7), Italy = c(52.7, 47.3), Netherlands = c(49.8, 50.2),
Poland = c(49.3, 50.7), Romania = c(50.9, 49.1), Russia = c(51.9,
48.1), Spain = c(50.5, 49.5), Sweden = c(49.2, 50.8),
Switzerland = c(50.4, 49.6), UK = c(49.4, 50.6), USA = c(48.8,
51.2)), row.names = c(NA, -2L), class = "data.frame"),
df2_F = structure(list(Answers = c("Android", "iOS (for iPhone)"
), Austria = c(67.7, 27.6), Belgium = c(51.3, 24.4), Denmark = c(47.3,
47.1), France = c(46.1, 17.7), Germany = c(51.9, 16.9), Italy = c(58.2,
16.9), Netherlands = c(47.2, 19.8), Poland = c(58.4, 6.9),
Romania = c(82.7, 13.7), Russia = c(55.9, 11.4), Spain = c(67.5,
13.5), Sweden = c(44.1, 33.8), Switzerland = c(52.9,
42.5), UK = c(40.6, 30), USA = c(39.4, 33.3)), row.names = c(NA,
-2L), class = "data.frame"), df2_G = structure(list(Answers = c("Clothing",
"Book(s)"), Austria = c(25.8, 21.9), Belgium = c(24, 13.2
), Denmark = c(20.5, 10.3), France = c(22.9, 13.8), Germany = c(27.2,
18.2), Italy = c(22.7, 19.5), Netherlands = c(24, 11), Poland = c(29.3,
20.5), Romania = c(19.9, 13.6), Russia = c(15.4, 8.1), Spain = c(24.3,
16.8), Sweden = c(24.7, 11.1), Switzerland = c(26.5, 16.6
), UK = c(22.9, 15.1), USA = c(20.6, 11.8)), row.names = c(NA,
-2L), class = "data.frame"), df2_H = structure(list(Answers = c("Free delivery",
"Easy returns policy"), Austria = c(72.5, 48.2), Belgium = c(71.4,
37.2), Denmark = c(67.4, 45.9), France = c(71.7, 29.7), Germany = c(68.9,
47.1), Italy = c(66, 31.6), Netherlands = c(69.7, 37.4),
Poland = c(62.5, 29.7), Romania = c(66.4, 39), Russia = c(67.6,
39.4), Spain = c(70.2, 39), Sweden = c(68.4, 32.3), Switzerland = c(70.2,
40.8), UK = c(71.8, 40.4), USA = c(69.6, 40.5)), row.names = c(NA,
-2L), class = "data.frame"), df2_I = structure(list(Answers = c("Clothing",
"Book(s)"), Austria = c(16.5, 15.2), Belgium = c(14.7, 8.7
), Denmark = c(14.1, 7.9), France = c(16.8, 9.6), Germany = c(17.1,
11.7), Italy = c(17.6, 13.5), Netherlands = c(16.6, 8.1),
Poland = c(15.8, 12.4), Romania = c(15.9, 10.8), Russia = c(12.2,
6.9), Spain = c(21, 14.6), Sweden = c(16.3, 8), Switzerland = c(17.1,
12.3), UK = c(10.8, 8.2), USA = c(10.8, 6.6)), row.names = c(NA,
-2L), class = "data.frame"), df2_J = structure(list(Answers = c("YouTube",
"WhatsApp"), Austria = c(88, 79.4), Belgium = c(80.4, 52.8
), Denmark = c(80.9, 20.6), France = c(79, 31.8), Germany = c(77.9,
74.7), Italy = c(88, 82), Netherlands = c(78.8, 80.9), Poland = c(90.3,
34.9), Romania = c(93.7, 70.7), Russia = c(88.1, 59.5), Spain = c(89.5,
85.4), Sweden = c(86.8, 29.1), Switzerland = c(86.3, 81.1
), UK = c(80.5, 58.1), USA = c(81.3, 17.2)), row.names = c(NA,
-2L), class = "data.frame"), df2_K = structure(list(Answers = c("Less than 30 minutes",
"30 minutes to 1 hour"), Austria = c(28.9, 24.6), Belgium = c(25.2,
21.6), Denmark = c(24.4, 23.6), France = c(27.6, 18.6), Germany = c(29.7,
21.2), Italy = c(22.1, 22.8), Netherlands = c(30.6, 23.2),
Poland = c(25.4, 23.9), Romania = c(15.4, 20.9), Russia = c(18.2,
22.6), Spain = c(25.3, 25.9), Sweden = c(25.2, 22.5),
Switzerland = c(30.9, 23.1), UK = c(23.9, 20.8), USA = c(20.9,
19.9)), row.names = c(NA, -2L), class = "data.frame"))