Мы можем использовать regex_left_join
из fuzzyjoin
и выполнить конкатенацию group_by
(paste
)
library(fuzzyjoin)
library(dplyr)
df1 %>%
regex_left_join(df2, by = c('DESCRIPTION' = 'Keywords'),
ignore_case = TRUE) %>%
group_by(ID, NAME, Month, DESCRIPTION) %>%
summarise(Keywords = toString(unique(tolower(Keywords))))
# A tibble: 3 x 5
# Groups: ID, NAME, Month [?]
# ID NAME Month DESCRIPTION Keywords
# <int> <chr> <chr> <chr> <chr>
#1 12 x1 Jan funding recived fund
#2 14 x3 Mar new year event new, event
#3 23 x2 Feb author of the book author, book
data
df1 <- structure(list(ID = c(12L, 23L, 14L), NAME = c("x1", "x2", "x3"
), Month = c("Jan", "Feb", "Mar"), DESCRIPTION = c("funding recived",
"author of the book", "new year event")), .Names = c("ID", "NAME",
"Month", "DESCRIPTION"), class = "data.frame", row.names = c(NA,
-3L))
df2 <- structure(list(Keywords = c("New", "FUND", "EVENT", "Author",
"book")), .Names = "Keywords", class = "data.frame", row.names = c(NA,
-5L))