Можно сделать группу по 'rssnp1' после значений full_join
и filter
для any
как 'Sentinel' в 'Type'
library(dplyr)
full_join(data1, data2, by = 'gene') %>%
group_by(rssnp1) %>%
filter(any(Type == "Sentinel")) #or
#filter("Sentinel" %in% Type)
# A tibble: 2 x 6
# Groups: rssnp1 [1]
# Chr Start End rssnp1 Type gene
# <int> <int> <int> <chr> <chr> <chr>
#1 1 1244733 1244734 rs2286773 LD_SNP ACE
#2 1 1252336 1252336 rs2286773 Sentinel CPEB4
Или используя код OP, его можно расширить с помощью ave
i1 <- with(merged, ave(Type %in% "Sentinel", rssnp1, FUN = any))
merged[i1,]
данные
data1 <- structure(list(Chr = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), Start = c(1244733L, 1257536L, 1252336L, 1252343L, 1254841L,
1256703L, 1269246L, 1370168L, 1371824L, 1372591L), End = c(1244734L,
1257436L, 1252336L, 1252343L, 1254841L, 1267404L, 1269246L, 1370168L,
1371824L, 1372591L), rssnp1 = c("rs2286773", "rs301159", "rs2286773",
"rs301159", "rs301159", "rs301159", "rs301159", "rs301159", "rs301159",
"rs301159"), Type = c("LD_SNP", "LD_SNP", "Sentinel", "LD_SNP",
"LD_SNP", "LD_SNP", "LD_SNP", "LD_SNP", "LD_SNP", "LD_SNP"),
gene = c("ACE", "CPEB4", "CPEB4", "CPEB4", "CPEB4", "CPEB4",
"CPEB4", "GLUPA1", "GLUPA1", "GLUPA1")),
class = "data.frame", row.names = c(NA,
-10L))
data2 <- structure(list(gene = c("CPEB4", "GML", "TBX2", "PNKD", "JMJD1C",
"SKI", "MYH11")), class = "data.frame", row.names = c(NA, -7L
))