Один из подходов - использовать tidyr
, чтобы развернуть данные дольше и разделить имена столбцов на Variable
, MainType
и SubType
.
Тогда их будет легко фильтровать.
library(tidyr)
library(dplyr)
library(stringr)
data <- data %>%
pivot_longer(everything(), values_to = "Value") %>%
separate(name,into = c("Variable","Type"), sep = 2) %>%
mutate(MainType = str_extract(Type,"[0-9]BP?[0-9]*"),
SubType = str_remove(Type,MainType),
SubType = if_else(SubType == "", NA_character_, SubType))
data
## A tibble: 88 x 5
# Variable Type Value MainType SubType
# <chr> <chr> <int> <chr> <chr>
# 1 XL 1B 1 1B NA
# 2 YL 1B 10 1B NA
# 3 ZL 11B 100 1B 1
# 4 ZL 21B 200 1B 2
# 5 ZL 31B 300 1B 3
# 6 ZL 41B 400 1B 4
# 7 XL 1BP2 1 1BP2 NA
# 8 YL 1BP2 10 1BP2 NA
# 9 ZL 11BP2 100 1BP2 1
#10 ZL 21BP2 200 1BP2 2
## … with 78 more rows
Тогда относительно легко написать функцию, которая создает матрицы:
library(purrr)
mytype <- "1BP2"
data %>%
dplyr::filter(!is.na(Value) & MainType == mytype) %>%
split(.,.$Variable) %>%
map(~.x %>%
pivot_wider(id_cols = c("Row","SubType"),
names_from = SubType,
values_from = Value) %>%
{as.matrix(.[,-1])}
)
$XL
1
[1,] 1
[2,] 2
[3,] 3
[4,] 4
[5,] 5
[6,] 6
[7,] 7
$YL
1
[1,] 10
[2,] 20
[3,] 30
$ZL
1 2 3
[1,] 100 200 300
[2,] 200 300 400
[3,] 300 600 1000
[4,] 400 900 1600
[5,] 500 1200 2200
[6,] 600 1500 2800
[7,] 700 1800 3400
Data
data <- structure(list(XL1B = 1:8, YL1B = c(10L, 20L, 30L, 40L, NA, NA,
NA, NA), ZL11B = c(100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L), ZL21B = c(200L, 300L, 600L, 900L, 1200L, 1500L, 1800L,
2100L), ZL31B = c(300L, 400L, 1000L, 1600L, 2200L, 2800L, 3400L,
4000L), ZL41B = c(400L, 500L, 1700L, 2900L, 4100L, 5300L, 6500L,
7700L), XL1BP2 = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, NA), YL1BP2 = c(10L,
20L, 30L, NA, NA, NA, NA, NA), ZL11BP2 = c(100L, 200L, 300L,
400L, 500L, 600L, 700L, NA), ZL21BP2 = c(200L, 300L, 600L, 900L,
1200L, 1500L, 1800L, NA), ZL31BP2 = c(300L, 400L, 1000L, 1600L,
2200L, 2800L, 3400L, NA)), class = "data.frame", row.names = c(NA,
-8L))