Я предлагаю следующее:
# Here is what your data looks like:
df1 <- data.frame("ID" = 1:6,
"Vehicle_1" = c("FIAT PUNTO 1.4 EVO",
"FIESTA 1.6 120CV",
"FIESTA 1.6 120CV",
"FIESTA 1.6 120CV",
"SEAT IBIZA 1.4 85CV",
"SEAT IBIZA 85CV 1.4"))
df2 <- data.frame("ID" = 1:6,
"Vehicle_2" = c("FIAT PUNTO EVO 1.3 MJT 95CV DPF",
"FIAT PUNTO EVO 1.3/16V MJT 75CV",
"FIAT PUNTO EVO 1.4 77CV",
"FIAT PUNTO EVO 1.4 77CV ",
"FIAT PUNTO EVO 1.4 77CV BENZ+GPL",
"FIAT PUNTO EVO 1.4 M.AIR 105CV "))
head(df1)
head(df2)
ID Vehicle_1
1 1 FIAT PUNTO 1.4 EVO
2 2 FIESTA 1.6 120CV
3 3 FIESTA 1.6 120CV
4 4 FIESTA 1.6 120CV
5 5 SEAT IBIZA 1.4 85CV
6 6 SEAT IBIZA 85CV 1.4
ID Vehicle_2
1 1 FIAT PUNTO EVO 1.3 MJT 95CV DPF
2 2 FIAT PUNTO EVO 1.3/16V MJT 75CV
3 3 FIAT PUNTO EVO 1.4 77CV
4 4 FIAT PUNTO EVO 1.4 77CV
5 5 FIAT PUNTO EVO 1.4 77CV BENZ+GPL
6 6 FIAT PUNTO EVO 1.4 M.AIR 105CV
# I transform to data.table
library(data.table)
df1 <- data.table::setDT(df1)
df2 <- data.table::setDT(df2)
# I use tstrplit to get the elements from both variables that are
# supposed to be matched
df3 <- df1[, tstrsplit(Vehicle_1, split = ' ')]
df4 <- df2[, tstrsplit(Vehicle_2, split = ' ')]
ncol1 <- ncol(df3)
df_to_check <- cbind(df3,df4)
list_result <- apply(df_to_check,MARGIN = 1, FUN = function(x)
intersect(x[1:ncol1], x[ncol1+1:length(x)]))
df1[, c("list_match") := list_result]
head(df1)
И вот результат, который я добавил к df1:
ID Vehicle_1 list_match
1: 1 FIAT PUNTO 1.4 EVO FIAT,PUNTO,EVO
2: 2 FIESTA 1.6 120CV NA
3: 3 FIESTA 1.6 120CV NA
4: 4 FIESTA 1.6 120CV NA
5: 5 SEAT IBIZA 1.4 85CV 1.4
6: 6 SEAT IBIZA 85CV 1.4 1.4