Вы можете попробовать fuzzyjoin
library(dplyr)
library(stringr)
library(fuzzyjoin)
df1 %>%
mutate_if(is.factor, as.character) %>%
fuzzy_left_join((df2 %>% mutate_if(is.factor, as.character)),
by = c("zone" = "zonal_region", "country_name" = "country"),
match_fun = str_detect) %>%
group_by(zone, country_name) %>%
summarise(total_population = sum(population)) %>%
data.frame()
Вывод:
zone country_name total_population
1 M The USA, Canada & Mexico are part of North America 487.19
2 N Canada like Australia is a Commonwealth member 60.42
3 O The UK is still finalizing its exit plans from the EU 65.64
Пример данных:
df1 <- structure(list(zone = structure(1:3, .Label = c("M", "N", "O"
), class = "factor"), country_name = structure(c(3L, 1L, 2L), .Label = c("Canada like Australia is a Commonwealth member",
"The UK is still finalizing its exit plans from the EU", "The USA, Canada & Mexico are part of North America"
), class = "factor")), class = "data.frame", row.names = c(NA,
-3L))
df2 <- structure(list(zonal_region = structure(c(1L, 1L, 1L, 2L, 2L,
2L, 3L, 3L, 3L), .Label = c("M", "N", "O"), class = "factor"),
country = structure(c(5L, 2L, 3L, 2L, 1L, 4L, 1L, 4L, 2L), .Label = c("Australia",
"Canada", "Mexico", "UK", "USA"), class = "factor"), population = c(323.4,
36.29, 127.5, 36.29, 24.13, 65.64, 24.13, 65.64, 36.29)), class = "data.frame", row.names = c(NA,
-9L))