Преобразуйте список во фрейме данных, свернув один столбец и оставив остальные без изменений в R - PullRequest
0 голосов
/ 06 апреля 2020

У меня есть список, состоящий из 12 элементов, каждый из которых представляет собой фрейм данных. Каждый df содержит три столбца, два общих столбца для всех элементов и один другой.

Два общих столбца:

  • coche_OEM
  • dia_hora_OEM

Другой столбец, который отличается в каждом элементе, может быть свернут в уникальный столбец при преобразовании списка во фрейм данных. Например, столбец U0073 в одном из элементов содержит S одно значение с тем же именем, тогда как столбец B1182 содержит другой элемент с тем же именем, что и имя переменной.

Проблема в том, что я хотел бы преобразовать этот список в фрейм данных с тремя столбцами (переменными):

  • coche_OEM
  • dia_hora_OEM
  • DTC: этот столбец со всеми значения, присутствующие в каждом столбце, с их кодами.

Вот этот список:

listdf <- list(structure(list(B1182 = structure(1L, .Label = c("B1182", 
"NULL"), class = "factor"), coche_OEM = structure(3L, .Label = c("356232050832996", 
"356232050836666", "356232050880755", "356232050882736", "356232050899078", 
"356232050905933"), class = "factor"), dia_hora_OEM = structure(1577774413, class = c("POSIXct", 
"POSIXt"), tzone = "UTC")), row.names = c(NA, -1L), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), groups = structure(list(B1182 = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("B1182", 
"NULL"), class = "factor"), coche_OEM = structure(c(1L, 2L, 3L, 
4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("356232050832996", 
"356232050836666", "356232050880755", "356232050882736", "356232050899078", 
"356232050905933"), class = "factor"), .rows = list(integer(0), 
    integer(0), 1L, integer(0), integer(0), integer(0), integer(0), 
    integer(0), integer(0), integer(0), integer(0), integer(0))), .Names = c("B1182", 
"coche_OEM", ".rows"), row.names = c(NA, -12L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = FALSE), .Names = c("B1182", "coche_OEM", 
"dia_hora_OEM")), structure(list(B124D = structure(1L, .Label = c("B124D", 
"NULL"), class = "factor"), coche_OEM = structure(3L, .Label = c("356232050832996", 
"356232050836666", "356232050880755", "356232050882736", "356232050899078", 
"356232050905933"), class = "factor"), dia_hora_OEM = structure(1577774413, class = c("POSIXct", 
"POSIXt"), tzone = "UTC")), row.names = c(NA, -1L), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), groups = structure(list(B124D = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("B124D", 
"NULL"), class = "factor"), coche_OEM = structure(c(1L, 2L, 3L, 
4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("356232050832996", 
"356232050836666", "356232050880755", "356232050882736", "356232050899078", 
"356232050905933"), class = "factor"), .rows = list(integer(0), 
    integer(0), 1L, integer(0), integer(0), integer(0), integer(0), 
    integer(0), integer(0), integer(0), integer(0), integer(0))), .Names = c("B124D", 
"coche_OEM", ".rows"), row.names = c(NA, -12L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = FALSE), .Names = c("B124D", "coche_OEM", 
"dia_hora_OEM")), structure(list(P2000 = structure(1L, .Label = c("c(\"P2000\", \"P2000\", \"P2000\")", 
"NULL"), class = "factor"), coche_OEM = structure(5L, .Label = c("356232050832996", 
"356232050836666", "356232050880755", "356232050882736", "356232050899078", 
"356232050905933"), class = "factor"), dia_hora_OEM = structure(1577793330, class = c("POSIXct", 
"POSIXt"), tzone = "UTC")), row.names = c(NA, -1L), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), groups = structure(list(P2000 = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("c(\"P2000\", \"P2000\", \"P2000\")", 
"NULL"), class = "factor"), coche_OEM = structure(c(1L, 2L, 3L, 
4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("356232050832996", 
"356232050836666", "356232050880755", "356232050882736", "356232050899078", 
"356232050905933"), class = "factor"), .rows = list(integer(0), 
    integer(0), integer(0), integer(0), 1L, integer(0), integer(0), 
    integer(0), integer(0), integer(0), integer(0), integer(0))), .Names = c("P2000", 
"coche_OEM", ".rows"), row.names = c(NA, -12L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = FALSE), .Names = c("P2000", "coche_OEM", 
"dia_hora_OEM")), structure(list(U3003 = structure(c(2L, 2L), .Label = c("NULL", 
"U3003"), class = "factor"), coche_OEM = structure(c(5L, 1L), .Label = c("356232050832996", 
"356232050836666", "356232050880755", "356232050882736", "356232050899078", 
"356232050905933"), class = "factor"), dia_hora_OEM = structure(c(1577793330, 
1582648789), class = c("POSIXct", "POSIXt"), tzone = "UTC")), row.names = c(NA, 
-2L), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), groups = structure(list(
    U3003 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
    2L, 2L), .Label = c("NULL", "U3003"), class = "factor"), 
    coche_OEM = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 
    4L, 5L, 6L), .Label = c("356232050832996", "356232050836666", 
    "356232050880755", "356232050882736", "356232050899078", 
    "356232050905933"), class = "factor"), .rows = list(integer(0), 
        integer(0), integer(0), integer(0), integer(0), integer(0), 
        2L, integer(0), integer(0), integer(0), 1L, integer(0))), .Names = c("U3003", 
"coche_OEM", ".rows"), row.names = c(NA, -12L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = FALSE), .Names = c("U3003", "coche_OEM", 
"dia_hora_OEM")), structure(list(B1D01 = structure(c(1L, 1L, 
2L), .Label = c("B1D01", "c(\"B1D01\", \"B1D01\")", "NULL"), class = "factor"), 
    coche_OEM = structure(c(2L, 1L, 1L), .Label = c("356232050832996", 
    "356232050836666", "356232050880755", "356232050882736", 
    "356232050899078", "356232050905933"), class = "factor"), 
    dia_hora_OEM = structure(c(1581690876, 1582648789, 1582651926
    ), class = c("POSIXct", "POSIXt"), tzone = "UTC")), row.names = c(NA, 
-3L), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), groups = structure(list(
    B1D01 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
    2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("B1D01", "c(\"B1D01\", \"B1D01\")", 
    "NULL"), class = "factor"), coche_OEM = structure(c(1L, 2L, 
    3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 
    6L), .Label = c("356232050832996", "356232050836666", "356232050880755", 
    "356232050882736", "356232050899078", "356232050905933"), class = "factor"), 
    .rows = list(2L, 1L, integer(0), integer(0), integer(0), 
        integer(0), 3L, integer(0), integer(0), integer(0), integer(0), 
        integer(0), integer(0), integer(0), integer(0), integer(0), 
        integer(0), integer(0))), .Names = c("B1D01", "coche_OEM", 
".rows"), row.names = c(NA, -18L), class = c("tbl_df", "tbl", 
"data.frame"), .drop = FALSE), .Names = c("B1D01", "coche_OEM", 
"dia_hora_OEM")), structure(list(U0155 = structure(2L, .Label = c("NULL", 
"U0155"), class = "factor"), coche_OEM = structure(1L, .Label = c("356232050832996", 
"356232050836666", "356232050880755", "356232050882736", "356232050899078", 
"356232050905933"), class = "factor"), dia_hora_OEM = structure(1582648789, class = c("POSIXct", 
"POSIXt"), tzone = "UTC")), row.names = c(NA, -1L), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), groups = structure(list(U0155 = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("NULL", 
"U0155"), class = "factor"), coche_OEM = structure(c(1L, 2L, 
3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("356232050832996", 
"356232050836666", "356232050880755", "356232050882736", "356232050899078", 
"356232050905933"), class = "factor"), .rows = list(integer(0), 
    integer(0), integer(0), integer(0), integer(0), integer(0), 
    1L, integer(0), integer(0), integer(0), integer(0), integer(0))), .Names = c("U0155", 
"coche_OEM", ".rows"), row.names = c(NA, -12L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = FALSE), .Names = c("U0155", "coche_OEM", 
"dia_hora_OEM")), structure(list(C1B00 = structure(1L, .Label = c("C1B00", 
"NULL"), class = "factor"), coche_OEM = structure(1L, .Label = c("356232050832996", 
"356232050836666", "356232050880755", "356232050882736", "356232050899078", 
"356232050905933"), class = "factor"), dia_hora_OEM = structure(1582648789, class = c("POSIXct", 
"POSIXt"), tzone = "UTC")), row.names = c(NA, -1L), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), groups = structure(list(C1B00 = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("C1B00", 
"NULL"), class = "factor"), coche_OEM = structure(c(1L, 2L, 3L, 
4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("356232050832996", 
"356232050836666", "356232050880755", "356232050882736", "356232050899078", 
"356232050905933"), class = "factor"), .rows = list(1L, integer(0), 
    integer(0), integer(0), integer(0), integer(0), integer(0), 
    integer(0), integer(0), integer(0), integer(0), integer(0))), .Names = c("C1B00", 
"coche_OEM", ".rows"), row.names = c(NA, -12L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = FALSE), .Names = c("C1B00", "coche_OEM", 
"dia_hora_OEM")), structure(list(P037D = structure(2L, .Label = c("NULL", 
"P037D"), class = "factor"), coche_OEM = structure(1L, .Label = c("356232050832996", 
"356232050836666", "356232050880755", "356232050882736", "356232050899078", 
"356232050905933"), class = "factor"), dia_hora_OEM = structure(1582648789, class = c("POSIXct", 
"POSIXt"), tzone = "UTC")), row.names = c(NA, -1L), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), groups = structure(list(P037D = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("NULL", 
"P037D"), class = "factor"), coche_OEM = structure(c(1L, 2L, 
3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("356232050832996", 
"356232050836666", "356232050880755", "356232050882736", "356232050899078", 
"356232050905933"), class = "factor"), .rows = list(integer(0), 
    integer(0), integer(0), integer(0), integer(0), integer(0), 
    1L, integer(0), integer(0), integer(0), integer(0), integer(0))), .Names = c("P037D", 
"coche_OEM", ".rows"), row.names = c(NA, -12L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = FALSE), .Names = c("P037D", "coche_OEM", 
"dia_hora_OEM")), structure(list(P0616 = structure(2L, .Label = c("NULL", 
"P0616"), class = "factor"), coche_OEM = structure(1L, .Label = c("356232050832996", 
"356232050836666", "356232050880755", "356232050882736", "356232050899078", 
"356232050905933"), class = "factor"), dia_hora_OEM = structure(1582648789, class = c("POSIXct", 
"POSIXt"), tzone = "UTC")), row.names = c(NA, -1L), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), groups = structure(list(P0616 = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("NULL", 
"P0616"), class = "factor"), coche_OEM = structure(c(1L, 2L, 
3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("356232050832996", 
"356232050836666", "356232050880755", "356232050882736", "356232050899078", 
"356232050905933"), class = "factor"), .rows = list(integer(0), 
    integer(0), integer(0), integer(0), integer(0), integer(0), 
    1L, integer(0), integer(0), integer(0), integer(0), integer(0))), .Names = c("P0616", 
"coche_OEM", ".rows"), row.names = c(NA, -12L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = FALSE), .Names = c("P0616", "coche_OEM", 
"dia_hora_OEM")), structure(list(P0562 = structure(2L, .Label = c("NULL", 
"P0562"), class = "factor"), coche_OEM = structure(1L, .Label = c("356232050832996", 
"356232050836666", "356232050880755", "356232050882736", "356232050899078", 
"356232050905933"), class = "factor"), dia_hora_OEM = structure(1582648789, class = c("POSIXct", 
"POSIXt"), tzone = "UTC")), row.names = c(NA, -1L), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), groups = structure(list(P0562 = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("NULL", 
"P0562"), class = "factor"), coche_OEM = structure(c(1L, 2L, 
3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("356232050832996", 
"356232050836666", "356232050880755", "356232050882736", "356232050899078", 
"356232050905933"), class = "factor"), .rows = list(integer(0), 
    integer(0), integer(0), integer(0), integer(0), integer(0), 
    1L, integer(0), integer(0), integer(0), integer(0), integer(0))), .Names = c("P0562", 
"coche_OEM", ".rows"), row.names = c(NA, -12L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = FALSE), .Names = c("P0562", "coche_OEM", 
"dia_hora_OEM")), structure(list(U0073 = structure(2L, .Label = c("NULL", 
"U0073"), class = "factor"), coche_OEM = structure(1L, .Label = c("356232050832996", 
"356232050836666", "356232050880755", "356232050882736", "356232050899078", 
"356232050905933"), class = "factor"), dia_hora_OEM = structure(1582648789, class = c("POSIXct", 
"POSIXt"), tzone = "UTC")), row.names = c(NA, -1L), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), groups = structure(list(U0073 = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("NULL", 
"U0073"), class = "factor"), coche_OEM = structure(c(1L, 2L, 
3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("356232050832996", 
"356232050836666", "356232050880755", "356232050882736", "356232050899078", 
"356232050905933"), class = "factor"), .rows = list(integer(0), 
    integer(0), integer(0), integer(0), integer(0), integer(0), 
    1L, integer(0), integer(0), integer(0), integer(0), integer(0))), .Names = c("U0073", 
"coche_OEM", ".rows"), row.names = c(NA, -12L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = FALSE), .Names = c("U0073", "coche_OEM", 
"dia_hora_OEM")), structure(list(P0138 = structure(1L, .Label = c("c(\"P0138\", \"P0138\", \"P0138\")", 
"NULL"), class = "factor"), coche_OEM = structure(5L, .Label = c("356232050832996", 
"356232050836666", "356232050880755", "356232050882736", "356232050899078", 
"356232050905933"), class = "factor"), dia_hora_OEM = structure(1583391111, class = c("POSIXct", 
"POSIXt"), tzone = "UTC")), row.names = c(NA, -1L), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), groups = structure(list(P0138 = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("c(\"P0138\", \"P0138\", \"P0138\")", 
"NULL"), class = "factor"), coche_OEM = structure(c(1L, 2L, 3L, 
4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("356232050832996", 
"356232050836666", "356232050880755", "356232050882736", "356232050899078", 
"356232050905933"), class = "factor"), .rows = list(integer(0), 
    integer(0), integer(0), integer(0), 1L, integer(0), integer(0), 
    integer(0), integer(0), integer(0), integer(0), integer(0))), .Names = c("P0138", 
"coche_OEM", ".rows"), row.names = c(NA, -12L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = FALSE), .Names = c("P0138", "coche_OEM", 
"dia_hora_OEM")))

Итак, как я могу преобразовать этот список в фрейм данных с моими требованиями?

1 Ответ

1 голос
/ 06 апреля 2020

Мы можем переименовать все столбцы, которые не являются «coche_OEM» или «dia_hora_OEM», в предопределенную строку (здесь «id»):

map_df(listdf, ~rename_at(.x, vars(-c('coche_OEM', 'dia_hora_OEM')), ~'id'))
# A tibble: 15 x 3
# Groups:   id, coche_OEM [78]
   id                                   coche_OEM       dia_hora_OEM       
   <chr>                                <fct>           <dttm>             
 1 "B1182"                              356232050880755 2019-12-31 06:40:13
 2 "B124D"                              356232050880755 2019-12-31 06:40:13
 3 "c(\"P2000\", \"P2000\", \"P2000\")" 356232050899078 2019-12-31 11:55:30
 4 "U3003"                              356232050899078 2019-12-31 11:55:30
 5 "U3003"                              356232050832996 2020-02-25 16:39:49
 6 "B1D01"                              356232050836666 2020-02-14 14:34:36
 7 "B1D01"                              356232050832996 2020-02-25 16:39:49
 8 "c(\"B1D01\", \"B1D01\")"            356232050832996 2020-02-25 17:32:06
 9 "U0155"                              356232050832996 2020-02-25 16:39:49
10 "C1B00"                              356232050832996 2020-02-25 16:39:49
11 "P037D"                              356232050832996 2020-02-25 16:39:49
12 "P0616"                              356232050832996 2020-02-25 16:39:49
13 "P0562"                              356232050832996 2020-02-25 16:39:49
14 "U0073"                              356232050832996 2020-02-25 16:39:49
15 "c(\"P0138\", \"P0138\", \"P0138\")" 356232050899078 2020-03-05 06:51:51
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...