Мы можем создать новый столбец путем извлечения подстроки в 'df1' и сделать left
объединение с 'df2'
library(dplyr)
library(stringr)
df1 %>%
mutate(Section2 = str_extract(Section, "S\\d+")) %>%
left_join(df2, by = c("Section2" = "Section")) %>%
select(-Section2)
# Section A B Names
#1 ./100-12-S00.xlsx 0.6032591 0.4554364 Baseline
#2 ./108-15-S01.xlsx 0.7109148 0.5367121 Sample1
#3 ./92-12-S00.xlsx 0.6032591 0.4554364 Baseline
#4 ./124-9-S05.xlsx 0.7109148 0.5367121 Sample5
#5 ./116-15-S11.xlsx 0.6032591 0.4554364 AASHTO
#6 ./108-9-S06.xlsx 0.7109148 0.5367121 Sample6
#7 ./84-12-S08.xlsx 0.6032591 0.4554364 Sample8
#8 ./124-15-S11.xlsx 0.7109148 0.5367121 AASHTO
#9 ./92-12-S00.xlsx 0.6032591 0.4554364 Baseline
#10 ./116-15-S03.xlsx 0.7109148 0.5367121 Sample3
#11 ./100-12-S02.xlsx 0.6032591 0.4554364 Sample2
#12 ./84-9-S05.xlsx 0.7109148 0.5367121 Sample5
Или без особых усилий, используйте regex_left_join
из fuzzyjoin
library(fuzzyjoin)
regex_left_join(df1, df2, by = 'Section') %>%
select(Section = Section.x, A, B, Names)
# A tibble: 12 x 4
# Section A B Names
# <chr> <dbl> <dbl> <chr>
# 1 ./100-12-S00.xlsx 0.603 0.455 Baseline
# 2 ./108-15-S01.xlsx 0.711 0.537 Sample1
# 3 ./92-12-S00.xlsx 0.603 0.455 Baseline
# 4 ./124-9-S05.xlsx 0.711 0.537 Sample5
# 5 ./116-15-S11.xlsx 0.603 0.455 AASHTO
# 6 ./108-9-S06.xlsx 0.711 0.537 Sample6
# 7 ./84-12-S08.xlsx 0.603 0.455 Sample8
# 8 ./124-15-S11.xlsx 0.711 0.537 AASHTO
# 9 ./92-12-S00.xlsx 0.603 0.455 Baseline
#10 ./116-15-S03.xlsx 0.711 0.537 Sample3
#11 ./100-12-S02.xlsx 0.603 0.455 Sample2
#12 ./84-9-S05.xlsx 0.711 0.537 Sample5
data
df1 <- structure(list(Section = c("./100-12-S00.xlsx", "./108-15-S01.xlsx",
"./92-12-S00.xlsx", "./124-9-S05.xlsx", "./116-15-S11.xlsx",
"./108-9-S06.xlsx", "./84-12-S08.xlsx", "./124-15-S11.xlsx",
"./92-12-S00.xlsx", "./116-15-S03.xlsx", "./100-12-S02.xlsx",
"./84-9-S05.xlsx"), A = c(0.6032591, 0.7109148, 0.6032591, 0.7109148,
0.6032591, 0.7109148, 0.6032591, 0.7109148, 0.6032591, 0.7109148,
0.6032591, 0.7109148), B = c(0.4554364, 0.5367121, 0.4554364,
0.5367121, 0.4554364, 0.5367121, 0.4554364, 0.5367121, 0.4554364,
0.5367121, 0.4554364, 0.5367121)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"))
df2 <- structure(list(Section = c("S00", "S01", "S02", "S03", "S04",
"S05", "S06", "S07", "S08", "S09", "S10", "S11"), Names = c("Baseline",
"Sample1", "Sample2", "Sample3", "Sample4", "Sample5", "Sample6",
"Sample7", "Sample8", "Sample9", "Sample10", "AASHTO")),
class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"))