В качестве решения вашей проблемы вы могли бы написать функцию, которая match
es в строках, похожую на эту:
matchRows <- function(df1, df2, by) {
do.call(rbind, apply(df1, 1, function(x) {
m <- match(x[[by]], df2[[by]])
`rownames<-`(cbind(t(x), df2[m, -which(names(df2) == by)]), NULL)
}))}
matchRows(df1=df_gen, df2=df_tax, by="Genus")
# Genus mean_RA Kingdom Phylum Class Order Family
# 1 Unclassified 0.1357401738 <NA> <NA> <NA> <NA> <NA>
# 2 Lactobacillus 0.0003825068 Bacteria Firmicutes Bacilli Lactobacillales Lactobacillaceae
# 3 Prevotella9 0.0009573787 Bacteria Bacteroidetes Bacteroidia Bacteroidales Prevotellaceae
# 4 Anaerovibrio 0.0049035545 Bacteria Firmicutes Negativicutes Selenomonadales Veillonellaceae
# 5 Roseburia 0.0026672558 Bacteria Firmicutes Clostridia Clostridiales Lachnospiraceae
Данные:
df_gen <- structure(list(Genus = c("Unclassified", "Lactobacillus", "Prevotella9",
"Anaerovibrio", "Roseburia"), mean_RA = c(0.1357401738, 0.0003825068,
0.0009573787, 0.0049035545, 0.0026672558)), row.names = c(NA,
-5L), class = "data.frame")
df_tax <- structure(list(Kingdom = c("Bacteria", "Bacteria", "Bacteria",
"Bacteria", "Bacteria", "Bacteria", "Bacteria", "Bacteria"),
Phylum = c("Bacteroidetes", "Bacteroidetes", "Bacteroidetes",
"Firmicutes", "Firmicutes", "Firmicutes", "Firmicutes", "Firmicutes"
), Class = c("Bacteroidia", "Bacteroidia", "Bacteroidia",
"Bacilli", "Negativicutes", "Negativicutes", "Bacilli", "Clostridia"
), Order = c("Bacteroidales", "Bacteroidales", "Bacteroidales",
"Lactobacillales", "Selenomonadales", "Selenomonadales",
"Lactobacillales", "Clostridiales"), Family = c("Prevotellaceae",
"Prevotellaceae", "Prevotellaceae", "Lactobacillaceae", "Veillonellaceae",
"Veillonellaceae", "Lactobacillaceae", "Lachnospiraceae"),
Genus = c("Prevotella9", "Prevotella9", "Prevotella9", "Lactobacillus",
"Anaerovibrio", "Anaerovibrio", "Lactobacillus", "Roseburia"
)), row.names = c(NA, -8L), class = "data.frame")