Purrr просматривает 2 списка фреймов данных в одном рабочем процессе - PullRequest
0 голосов
/ 13 февраля 2020

с большой помощью, ребята, я смог создать следующий рабочий процесс:

results_2018 <- list_of_objects %>%
    map(~.x[, seq(from=1, to=length(unique(names(.x))))]) %>%
    map(~dplyr::arrange(.x, desc(Germany))) %>%
    map(~dplyr::top_n(.x, 10, Germany)) %>%
    map(~rename(.x, "Answers" = "Answer.Options"))

results_2019 <- list_of_objects_2 %>%
    map(~.x[, seq(from=1, to=length(unique(names(.x))))]) %>%
    map(~dplyr::arrange(.x, desc(Germany))) %>%
    map(~dplyr::top_n(.x, 10, Germany)) %>%
    map(~rename(.x, "Answers" = "Data.Points"))

Получить два года, каждый из которых содержит список фреймов данных, сделать некоторые манипуляции с данными.

map2(results_2018, results_2019, ~ 
         full_join(.x %>% select(Answers, Austria),
                   .y %>% select(Answers, Austria),
                   by = "Answers") %>%
         mutate(Difference = Austria.y - Austria.x) %>%
         rename_at(vars(contains(".x")),
                   ~str_replace(., ".x", "_2018")) %>%
         rename_at(vars(contains(".y")),
                   ~str_replace(., ".y", "_2019")) %>%
         set_names(c("Answers", "Austria_2018", "Austria_2019"
                     ,"Difference")))

Объедините их всех вместе, чтобы получить новый набор данных. Это работает, но было бы действительно полезно вставить это в один код purrr и dplyr.

purrr::map(list_obj, ~map(.x, function(x)
        x[, seq(from=1, to=length(unique(names(x))))] %>%
            dplyr::arrange(desc(Germany)) %>%
            dplyr::top_n(10, Germany) %>%
            dplyr::rename(Answers = 1) %>%
            map2(.x, .y, ~full_join(.x %>% select(Answers, Austria),
                                    .y %>% select(Answers, Austria),
                                     by = "Answers")

            %>%
            mutate(Difference = Austria.y - Austria.x) %>%
            rename_at(vars(contains(".x")),
                      ~str_replace(., ".x", "_2018")) %>%
            rename_at(vars(contains(".y")),
                      ~str_replace(., ".y", "_2019")) %>%
            set_names(c("Answers", "Austria_2018", "Austria_2019"
                        ,"Difference")))
    ))

Это моя попытка, но map2 внутри функции map кажется не работать таким образом. Есть ли способ получить доступ к results_2018 и results_2019, не сохраняя их в переменной?

Пример Данные, созданные с помощью следующего кода:

results_2019 <- list_of_objects_2 %>%
    map(~.x[, seq(from=1, to=length(unique(names(.x))))]) %>%
    map(~dplyr::arrange(.x, desc(Germany))) %>%
    map(~dplyr::top_n(.x, 2, Germany)) %>%
    map(~dplyr::rename(.x, Answers = 1))

list(df2_A = structure(list(Answers = c("45 to 54", "35 to 44"
), Austria = c(23.4, 20.7), Belgium = c(21.6, 21.4), Denmark = c(22.6, 
20.3), France = c(20.9, 22.5), Germany = c(24.2, 21.9), Italy = c(19.1, 
24.2), Netherlands = c(22.3, 21), Poland = c(16.9, 22.2), Romania = c(18.7, 
24.1), Russia = c(20, 23.9), Spain = c(20.9, 26.9), Sweden = c(20.6, 
20), Switzerland = c(23.6, 20.8), UK = c(21.3, 22.2), USA = c(20.6, 
20.4)), row.names = c(NA, -2L), class = "data.frame"), df2_B = structure(list(
    Answers = c("PC / Laptop", "Smartphone"), Austria = c(88.8, 
    94.7), Belgium = c(87.9, 82.5), Denmark = c(76.8, 93.5), 
    France = c(88.9, 83.3), Germany = c(91.5, 86.7), Italy = c(82.2, 
    91), Netherlands = c(88.5, 85.7), Poland = c(89.8, 87.3), 
    Romania = c(88, 92.7), Russia = c(89.2, 85.8), Spain = c(88.4, 
    94), Sweden = c(83.5, 89.8), Switzerland = c(86.7, 94.2), 
    UK = c(86.6, 87.3), USA = c(84.8, 84.9)), row.names = c(NA, 
-2L), class = "data.frame"), df2_C = structure(list(Answers = c("Personal PC / Laptop", 
"Smartphone"), Austria = c(84.8, 88.1), Belgium = c(86.3, 72.5
), Denmark = c(78, 85.1), France = c(90.6, 61.4), Germany = c(91.8, 
64.4), Italy = c(87.3, 74.5), Netherlands = c(88.5, 65.6), Poland = c(91.9, 
68.8), Romania = c(86.5, 86.8), Russia = c(88.3, 68.3), Spain = c(89.7, 
78.5), Sweden = c(86.1, 77.6), Switzerland = c(83.8, 85.7), UK = c(88.8, 
70.8), USA = c(88.5, 67.6)), row.names = c(NA, -2L), class = "data.frame"), 
    df2_D = structure(list(Answers = c("Schooling until age 18", 
    "University degree"), Austria = c(15.1, 30.3), Belgium = c(31.6, 
    28.1), Denmark = c(22.1, 24), France = c(40.8, 25.3), Germany = c(41.5, 
    23.8), Italy = c(53.9, 19.8), Netherlands = c(16.1, 28.3), 
        Poland = c(31.1, 33.7), Romania = c(42.8, 16.6), Russia = c(9.8, 
        52.6), Spain = c(21.6, 32.6), Sweden = c(41, 31.4), Switzerland = c(10.1, 
        29.4), UK = c(24.1, 29.9), USA = c(25.2, 29.7)), row.names = c(NA, 
    -2L), class = "data.frame"), df2_E = structure(list(Answers = c("Male", 
    "Female"), Austria = c(50.6, 49.4), Belgium = c(50.2, 49.8
    ), Denmark = c(50.3, 49.7), France = c(49.5, 50.5), Germany = c(51.3, 
    48.7), Italy = c(52.7, 47.3), Netherlands = c(49.8, 50.2), 
        Poland = c(49.3, 50.7), Romania = c(50.9, 49.1), Russia = c(51.9, 
        48.1), Spain = c(50.5, 49.5), Sweden = c(49.2, 50.8), 
        Switzerland = c(50.4, 49.6), UK = c(49.4, 50.6), USA = c(48.8, 
        51.2)), row.names = c(NA, -2L), class = "data.frame"), 
    df2_F = structure(list(Answers = c("Android", "iOS (for iPhone)"
    ), Austria = c(67.7, 27.6), Belgium = c(51.3, 24.4), Denmark = c(47.3, 
    47.1), France = c(46.1, 17.7), Germany = c(51.9, 16.9), Italy = c(58.2, 
    16.9), Netherlands = c(47.2, 19.8), Poland = c(58.4, 6.9), 
        Romania = c(82.7, 13.7), Russia = c(55.9, 11.4), Spain = c(67.5, 
        13.5), Sweden = c(44.1, 33.8), Switzerland = c(52.9, 
        42.5), UK = c(40.6, 30), USA = c(39.4, 33.3)), row.names = c(NA, 
    -2L), class = "data.frame"), df2_G = structure(list(Answers = c("Clothing", 
    "Book(s)"), Austria = c(25.8, 21.9), Belgium = c(24, 13.2
    ), Denmark = c(20.5, 10.3), France = c(22.9, 13.8), Germany = c(27.2, 
    18.2), Italy = c(22.7, 19.5), Netherlands = c(24, 11), Poland = c(29.3, 
    20.5), Romania = c(19.9, 13.6), Russia = c(15.4, 8.1), Spain = c(24.3, 
    16.8), Sweden = c(24.7, 11.1), Switzerland = c(26.5, 16.6
    ), UK = c(22.9, 15.1), USA = c(20.6, 11.8)), row.names = c(NA, 
    -2L), class = "data.frame"), df2_H = structure(list(Answers = c("Free delivery", 
    "Easy returns policy"), Austria = c(72.5, 48.2), Belgium = c(71.4, 
    37.2), Denmark = c(67.4, 45.9), France = c(71.7, 29.7), Germany = c(68.9, 
    47.1), Italy = c(66, 31.6), Netherlands = c(69.7, 37.4), 
        Poland = c(62.5, 29.7), Romania = c(66.4, 39), Russia = c(67.6, 
        39.4), Spain = c(70.2, 39), Sweden = c(68.4, 32.3), Switzerland = c(70.2, 
        40.8), UK = c(71.8, 40.4), USA = c(69.6, 40.5)), row.names = c(NA, 
    -2L), class = "data.frame"), df2_I = structure(list(Answers = c("Clothing", 
    "Book(s)"), Austria = c(16.5, 15.2), Belgium = c(14.7, 8.7
    ), Denmark = c(14.1, 7.9), France = c(16.8, 9.6), Germany = c(17.1, 
    11.7), Italy = c(17.6, 13.5), Netherlands = c(16.6, 8.1), 
        Poland = c(15.8, 12.4), Romania = c(15.9, 10.8), Russia = c(12.2, 
        6.9), Spain = c(21, 14.6), Sweden = c(16.3, 8), Switzerland = c(17.1, 
        12.3), UK = c(10.8, 8.2), USA = c(10.8, 6.6)), row.names = c(NA, 
    -2L), class = "data.frame"), df2_J = structure(list(Answers = c("YouTube", 
    "WhatsApp"), Austria = c(88, 79.4), Belgium = c(80.4, 52.8
    ), Denmark = c(80.9, 20.6), France = c(79, 31.8), Germany = c(77.9, 
    74.7), Italy = c(88, 82), Netherlands = c(78.8, 80.9), Poland = c(90.3, 
    34.9), Romania = c(93.7, 70.7), Russia = c(88.1, 59.5), Spain = c(89.5, 
    85.4), Sweden = c(86.8, 29.1), Switzerland = c(86.3, 81.1
    ), UK = c(80.5, 58.1), USA = c(81.3, 17.2)), row.names = c(NA, 
    -2L), class = "data.frame"), df2_K = structure(list(Answers = c("Less than 30 minutes", 
    "30 minutes to 1 hour"), Austria = c(28.9, 24.6), Belgium = c(25.2, 
    21.6), Denmark = c(24.4, 23.6), France = c(27.6, 18.6), Germany = c(29.7, 
    21.2), Italy = c(22.1, 22.8), Netherlands = c(30.6, 23.2), 
        Poland = c(25.4, 23.9), Romania = c(15.4, 20.9), Russia = c(18.2, 
        22.6), Spain = c(25.3, 25.9), Sweden = c(25.2, 22.5), 
        Switzerland = c(30.9, 23.1), UK = c(23.9, 20.8), USA = c(20.9, 
        19.9)), row.names = c(NA, -2L), class = "data.frame"))
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...