Обновление - dplyr
подход может быть
library(dplyr)
df1 <- df %>%
rowwise() %>%
mutate(mutated = paste(mutated_genes[unlist(
lapply(mutated_genes, function(x) x %in% strsplit(genes, ",")[[1]]))], collapse=","),
path_name = gsub("_.*$", "", path_name)) %>%
select(-genes) %>%
data.frame()
Выход:
df1
path_name mutated
1 hsa04010 MAP4K4,TRAF2
2 hsa04014 MAP4K4
Пример данных:
df <-structure(list(path_name = c("hsa04010__117", "hsa04014__118"
), genes = c("MAP4K4,DUSP10*,DUSP10*,DUSP10*,DUSP10*,DUSP10*,DUSP10*,DUSP10*,DUSP10*,DUSP10*,DUSP10*,DUSP3*,DUSP3*,DUSP3*,DUSP3*,PPM1A,AKT3,AKT3,AKT3,ZAK,MAP3K12,MAP3K13,TRAF2,CASP3,IL1R1,IL1R1,TNFRSF1A,IL1A,IL1A,TNF,RAC1,RAC1,RAC1,RAC1,MAP2K7,MAPK8,MAPK8,MAPK8,MECOM,HSPA1A,HSPA1A,HSPA1A,HSPA1A,HSPA1A,HSPA1A,MAP4K3,MAPK8IP2,MAP4K1",
"MAP4K4,DUSP10*,DUSP10*,DUSP10*,DUSP10*,DUSP10*")), class = "data.frame", row.names = c(NA,
-2L))
mutated_genes <- c("MAP4K4", "TRAF2", "CACNG3")