Другая возможность - переименовать соответствующие столбцы, по которым вы хотите объединить, а затем использовать purrr::reduce
с dplyr::left_join
(или в базе R Reduce
с merge
)
names(df2) <- c("Symbol", "Description/Aliases")
names(df3) <- c("Description/Aliases", "OMIM", "Aliases")
purrr::reduce(list(df1, df2, df3), dplyr::left_join) %>% dplyr::select(-Aliases)
# Symbol Description/Aliases OMIM
#1 MCL1 MCL1, BCL2 family apoptosis regulator 159552
#2 ABCB1 ATP binding cassette subfamily B member 1 171050
#3 BAX <NA> NA
#4 IKZF1 <NA> NA
#5 WWOX WW domain containing oxidoreductase 605131
#6 BCL2L1 RB transcriptional corepressor 1 NA
#7 BCL2L11 <NA> NA
#8 CCND1 <NA> NA
#9 TNFSF10 <NA> NA
или в базе R
Reduce(function(x, y) merge(x, y, all.x = T), list(df1, df2, df3))
Пример данных
df1 <- read.table(text =
"Symbol
MCL1
ABCB1
BAX
IKZF1
WWOX
BCL2L1
BCL2L11
CCND1
TNFSF10", header = T)
df2 <- read.table(text =
"Symbol2 Aliases
MCL1 'MCL1, BCL2 family apoptosis regulator'
ABCB1 'ATP binding cassette subfamily B member 1'
WWOX 'WW domain containing oxidoreductase'
BCL2L1 'RB transcriptional corepressor 1'
BOK 'peroxisome proliferator activated receptor gamma'
RHOA 'ras homolog family member A'
ABCC1 'C-X-C motif chemokine ligand 12'
PARP1 'poly(ADP-ribose) polymerase 1'
BAK1 'BRCA1, DNA repair associated'", header = T)
df3 <- read.table(text =
"description OMIM Aliases
'MCL1, BCL2 family apoptosis regulator' 159552 'G protein subunit alpha 12'
'ATP binding cassette subfamily B member 1' 171050 'matrix metallopeptidase 9'
'BCL2 associated X, apoptosis regulator' 600040 'cadherin 1'
'IKAROS family zinc finger 1' 603023 'Janus kinase 2'
'WW domain containing oxidoreductase' 605131 'ataxin 3'
'BCL2 like 1' 600039 'RB transcriptional corepressor 1'
'BCL2 like 11' 603827 'transferrin receptor'
'cyclin D1' 168461 'C-C motif chemokine ligand 2'
'TNF superfamily member 10' 603598 'prostaglandin-endoperoxide synthase 2'", header = T)