Unnesting списки разной глубины в тибле - PullRequest
0 голосов
/ 08 мая 2018

У меня есть тиббл с вложенными списками разной глубины в разных столбцах. Каждый список имеет только одно значение или имеет значение NULL. Как мне извлечь их в обычный tibble / dataframe с одиночными значениями строки и столбца.

Я не могу заставить purrr:map_* работать из-за вложенной глубины списков.

Я пытался использовать map2_*, чтобы удалить список, но он не учитывает нули.

Любая помощь будет оценена.

Тиббл

data <- structure(list(Day = list("1", "1", "1", "1", "2", "2", "4", 
"4", "3", "5", "2", "3", "2", "1", "3", "3", "5", "4", "2", 
"4", "4", "4", "5", "3", "1", "3", "4", "4", "1", "3", "3", 
"3", "2", "2", "1", "4", "5", "2", "3", "3", "4"), TimeOfDay = list(
10L, 11L, 13L, 14L, 16L, 15L, 8L, 20L, 7L, 13L, 8L, 14L, 
12L, 12L, 15L, 9L, 11L, 12L, 13L, 15L, 13L, 11L, 9L, 17L, 
15L, 13L, 14L, 7L, 4L, 8L, 16L, 11L, 9L, 11L, 9L, 9L, 10L, 
10L, 10L, 12L, 10L), team = list("pupil", "pupil", "pupil", 
"pupil", "pupil", "pupil", "pupil", "pupil", "pupil", "pupil", 
"pupil", "pupil", "pupil", "pupil", "pupil", "pupil", "pupil", 
"pupil", "pupil", "pupil", "pupil", "pupil", "pupil", "pupil", 
"pupil", "pupil", "pupil", "pupil", "pupil", "pupil", "pupil", 
"pupil", "pupil", "pupil", "pupil", "pupil", "pupil", "pupil", 
"pupil", "pupil", "pupil"), median = list(605.1044, 56.6954, 
221.6688, 424.6239, 95.7121, 389.2422, 236.3484, 56.1632, 
103.9477, 44.6205, 68.6362, 158.2934, 52.6557, 77.3802, 111.1602, 
211.4475, 396.8566, 79.3398, 94.1856, 0.381, 28.8757, 5766.7778, 
319.767, 304.6234, 224.6323, 47.9941, 236.2954, 161.6516, 
69.3141, 0.3363, 297.0771, 8109.1642, 494.5835, 72.0297, 
14.389, 228.7122, 209.5832, 28.0984, 91.9362, 36.2796, 156.4385), 
Eightyperc = list(784.9551, 7639.3023, 1158.3115, 424.6239, 
    169.3091, 1131.0486, 514.5908, 56.1632, 103.9477, 68.9684, 
    70.2621, 636.0393, 262.0507, 963.6554, 310.2544, 581.9811, 
    8747.5797, 79.3398, 479.218, 0.381, 20195.5093, 5766.7778, 
    242308.6155, 304.6234, 866.1944, 68.8479, 1180.9717, 
    161.6516, 69.3141, 0.3363, 1370.3561, 8493.6893, 4425.4103, 
    1127.0802, 240.395, 30630.0465, 974.4312, 508.2495, 91.9362, 
    36.2796, 754.9999), avg_duration = list(467.740033333333, 
    3847.99885, 615.63206, 424.6239, 132.5106, 577.7318, 
    393.14646, 56.1632, 103.9477, 41.4517333333333, 69.44915, 
    1293.8016, 157.3532, 726.271625, 220.406616666667, 296.056622222222, 
    4572.21815, 79.3398, 206.1527, 0.381, 5162.30205, 5766.7778, 
    80979.7336333333, 304.6234, 1021.4285, 57.4844857142857, 
    934.53983, 161.6516, 69.3141, 0.3363, 994.15694, 5539.36966666667, 
    2396.68795714286, 367.990935714286, 127.392, 8105.9991, 
    1146.64592, 418.222866666667, 91.9362, 36.2796, 377.54555), 
Purpose = list("Target", "Target", "Target", "Target", "Target", 
    "Target", "Target", "Target", "Target", "Target", "Target", 
    "Target", "Target", "Target", "Target", "Target", "Target", 
    "Target", "Target", "Target", "Target", "Target", "Target", 
    "Target", "Target", "Target", "Target", "Target", "Target", 
    "Target", "Target", "Target", "Target", "Target", "Target", 
    "Target", "Target", "Target", "Target", "Target", "Target"), 
HourOfWeek = list(34L, 35L, 37L, 38L, 64L, 63L, 104L, 116L, 
    79L, 133L, 56L, 86L, 60L, 36L, 87L, 81L, 131L, 108L, 
    61L, 111L, 109L, 107L, 129L, 89L, 39L, 85L, 110L, 103L, 
    28L, 80L, 88L, 83L, 57L, 59L, 33L, 105L, 130L, 58L, 82L, 
    84L, 106L), Day1 = list("1", "1", "1", "1", NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL), TimeOfDay1 = list(
    10L, 11L, 13L, 14L, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL), team1 = list("pupil", "pupil", 
    "pupil", "pupil", NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL), median1 = list(874.537, 1070.7642, 
    1.3133, 502.7748, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL), Eightyperc1 = list(2287.3256, 
    2114.1153, 1.3133, 502.7748, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL), avg_duration1 = list(
    1441.39973333333, 1129.34656666667, 1.3133, 502.7748, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL), Purpose1 = list("Actual", "Actual", "Actual", 
    "Actual", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL), HourOfWeek1 = list(34L, 35L, 37L, 38L, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)), .Names = ("Day","TimeOfDay", "team", "median", "Eightyperc", "avg_duration", "Purpose", "HourOfWeek", "Day1", "TimeOfDay1", "team1", "median1", "Eightyperc1", "avg_duration1", "Purpose1", "HourOfWeek1"), row.names = c(NA, -41L),class =c("tbl_df", "tbl", "data.frame"))

1 Ответ

0 голосов
/ 08 мая 2018

Проблема в том, что у вас есть NULL записей в столбце list s, которые будут сброшены при unlist.

.

Обходной путь - заново преобразовать столбцы as.character, которые превращают NULL в "NULL" записей, а затем делают as.numeric для всех числовых столбцов. Это выдаст некоторые предупреждения, которые могут быть проигнорированы и которые происходят из-за неудачного преобразования "NULL" в numeric записей, что приводит к NA s.

data %>%
    mutate_all(as.character) %>%
    mutate_at(vars(-team, -Purpose, -team1, -Purpose1), as.numeric);
## A tibble: 41 x 16
#     Day TimeOfDay team  median Eightyperc avg_duration Purpose HourOfWeek
#   <dbl>     <dbl> <chr>  <dbl>      <dbl>        <dbl> <chr>        <dbl>
# 1    1.       10. pupil  605.       785.         468.  Target         34.
# 2    1.       11. pupil   56.7     7639.        3848.  Target         35.
# 3    1.       13. pupil  222.      1158.         616.  Target         37.
# 4    1.       14. pupil  425.       425.         425.  Target         38.
# 5    2.       16. pupil   95.7      169.         133.  Target         64.
# 6    2.       15. pupil  389.      1131.         578.  Target         63.
# 7    4.        8. pupil  236.       515.         393.  Target        104.
# 8    4.       20. pupil   56.2       56.2         56.2 Target        116.
# 9    3.        7. pupil  104.       104.         104.  Target         79.
#10    5.       13. pupil   44.6       69.0         41.5 Target        133.
## ... with 31 more rows, and 8 more variables: Day1 <dbl>, TimeOfDay1 <dbl>,
##   team1 <chr>, median1 <dbl>, Eightyperc1 <dbl>, avg_duration1 <dbl>,
##   Purpose1 <chr>, HourOfWeek1 <dbl>   

PS. Приведенные вами примеры данных выдают ошибку из-за отсутствия c(...): .Names = ("Day", ... должно быть .Names = c("Day", ....

...