Я пытаюсь разбить список на основе некоторых данных категории в R.
У меня есть эти данные:
# A tibble: 5 x 2
category to_split
<chr> <chr>
1 cat12 c(1, 5)
2 cat22 c(2, 5, 1)
3 cat33 3
4 cat43 4
5 cat51 c(5, 2)
Где c(1, 5)
соответствует строке 1 и строке 5 вэти данные. c(2, 5, 1)
соответствует строке 2, строке 5 и строке 1.
У меня также есть второй больший кадр данных, который выглядит следующим образом:
# A tibble: 100 x 4
# Groups: station_location [5]
category var1 var2 var3
<chr> <dbl> <dbl> <dbl>
1 cat12 7 0.4 10
2 cat12 20 1.1 155
3 cat12 12 0.4 3
4 cat12 4 0.3 38
5 cat12 13 0.4 40
6 cat12 7 0.3 17
7 cat12 9 0.4 45
8 cat12 3 0.3 17
9 cat12 8 0.5 84
10 cat12 32 2.6 378
# ... with 90 more rows
Я хотел бы создать новые списки так, чтобыc(1, 5)
извлекает данные cat12
и cat51
из большого фрейма данных. Также, где c(2, 5, 1)
извлекает данные cat22
, cat51
и cat12
и сохраняет эти данные во фрейме данных (внутри списка).
Я хотел бы получить структуру списка следующим образом:
list(
c(1, 5) - a data frame containing the two corresponding categories of data
c(2, 5, 1) - a data frame contained the three corresponding categories of data
3
4
c(5, 2)
)
(мне все равно, что имена в списке c(1, 5)...
и т. Д.). Я назову их, из какой категории были получены данные (в маленьком фрейме данных), т.е.
list(
cat12 - data frame containing the two corresponding categories
cat22
cat33
cat43
cat51
)
Я пытаюсь извлечь соответствующие данные из большего фрейма данных с помощью сопоставлений в меньших данныхкадр.
Данные:
data_join <- structure(list(category = c("cat12", "cat22", "cat33", "cat43",
"cat51"), to_split = c("c(1, 5)", "c(2, 5, 1)", "3", "4", "c(5, 2)"
)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-5L))
full_data <- structure(list(category = c("cat12", "cat12", "cat12", "cat12",
"cat12", "cat12", "cat12", "cat12", "cat12", "cat12", "cat12",
"cat12", "cat12", "cat12", "cat12", "cat12", "cat12", "cat12",
"cat12", "cat12", "cat22", "cat22", "cat22", "cat22", "cat22",
"cat22", "cat22", "cat22", "cat22", "cat22", "cat22", "cat22",
"cat22", "cat22", "cat22", "cat22", "cat22", "cat22", "cat22",
"cat22", "cat33", "cat33", "cat33", "cat33", "cat33", "cat33",
"cat33", "cat33", "cat33", "cat33", "cat33", "cat33", "cat33",
"cat33", "cat33", "cat33", "cat33", "cat33", "cat33", "cat33",
"cat43", "cat43", "cat43", "cat43", "cat43", "cat43", "cat43",
"cat43", "cat43", "cat43", "cat43", "cat43", "cat43", "cat43",
"cat43", "cat43", "cat43", "cat43", "cat43", "cat43", "cat51",
"cat51", "cat51", "cat51", "cat51", "cat51", "cat51", "cat51",
"cat51", "cat51", "cat51", "cat51", "cat51", "cat51", "cat51",
"cat51", "cat51", "cat51", "cat51", "cat51"), var1 = c(7, 20,
12, 4, 13, 7, 9, 3, 8, 32, 5, 2, 14, 7, 11, 9, 25, 5, 6, 18,
14, 12, 11, 11, 5, 7, 12, 2, 7, 7, 5, 28, 6, 8, 4, 9, 4, 11,
6, 5, NA, NA, 24, 6, 6, 29, NA, 11, NA, NA, NA, 9, NA, 8, 7,
NA, 17, 6, NA, 6, NA, NA, NA, NA, NA, NA, NA, NA, 13, NA, NA,
NA, NA, 16, 7, 8, NA, NA, 10, 19, 6, 10, 3, 12, 2, 2, 7, 11,
5, 5, 6, 3, 6, 9, 11, 11, 12, 5, 14, 5), var2 = c(0.4, 1.1, 0.4,
0.3, 0.4, 0.3, 0.4, 0.3, 0.5, 2.6, 0.6, 0.3, 0.5, 0.4, 0.4, 0.7,
0.5, 0.3, 0.4, 0.6, 0.5, 0.3, 0.4, 0.2, 0.4, 0.5, 0.5, 0.3, 0.4,
0.3, 0.4, 1.1, 0.4, 0.5, 0.2, 0.5, 0.4, 0.5, 0.6, 0.6, NA, NA,
0.7, 0.1, 0.3, 0.5, NA, 0.7, NA, NA, NA, 0.2, NA, 0.3, 0.2, NA,
0.3, 0.3, NA, 0.1, 0.2, 0.2, 0.5, 0.4, 0.3, 0.4, 0.2, 0.4, 0.3,
0.3, 0.2, 0.3, 0.2, 0.4, 0.2, 0.2, 0.3, 0.3, 0.5, 0.5, 0.4, 0.2,
0.3, 0.7, 0.3, 0.1, 0.3, 0.3, 0.4, 0.6, 0.3, 0.2, 0.4, 0.6, 0.2,
0.7, 0.6, 0.4, 0.6, 0.5), var3 = c(10, 155, 3, 38, 40, 17, 45,
17, 84, 378, 44, 14, 36, 20, 17, 76, 25, 4, 22, 63, 42, 23, 12,
10, 15, 29, 26, 7, 18, 5, 23, 204, 24, 56, 7, 35, 23, 55, 28,
65, 10, 13, 54, 13, 22, 45, 29, 58, 49, 14, 2, 9, 15, 38, 41,
63, 11, 9, 7, 20, 3, 5, 52, 7, 18, 25, 2, 30, 10, 3, 3, 13, 1,
12, 7, 5, 5, 9, 13, 4, 14, 9, 8, 147, 5, 7, 2, 10, 6, 66, 2,
8, 6, 3, 8, 5, 45, 6, 20, 27)), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"), row.names = c(NA, -100L), groups = structure(list(
station_location = c("cat12", "cat22", "cat33", "cat43",
"cat51"), .rows = list(1:20, 21:40, 41:60, 61:80, 81:100)), row.names = c(NA,
-5L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE))