Я использовал dplyr
grep
, чтобы получить желаемый результат.
Ниже приведен код:
library(dplyr)
pattern <- c("Beach", "sand", "warm")
df <- data.frame(group_id= c(1, 1, 1, 1, 2, 1, 2, 3, 4),
words = c("beach", "sand", "trip", "warm","travel", "water","beach","sand", "trees"),
ID = c("vacation", "vacation", "vacation", "vacation", "meeting","vacation","meeting","onduty", "hiking"))
x <- df %>%
group_by(group_id) %>%
summarise(words = paste(words, collapse = " "))
y <- sapply(pattern, function(d) grep(paste0("\\b",d,"\\b"),x$words , ignore.case = T))
y <- setNames(unlist(y, use.names=F),rep(names(y), lengths(y)))
y <- data.frame(Match_pattern =names(y), group_id=y, row.names=NULL)
y <- y %>%
group_by(group_id) %>%
summarise(Match_pattern = paste(Match_pattern, collapse = ", "))
out <- merge(df, y, by = "group_id", all.x = T)
out$N <- ifelse(is.na(out$Match_pattern), 0, 1)
> out
group_id words ID Match_pattern N
1 1 sand vacation Beach, sand, warm 1
2 1 trip vacation Beach, sand, warm 1
3 1 warm vacation Beach, sand, warm 1
4 1 beach vacation Beach, sand, warm 1
5 1 water vacation Beach, sand, warm 1
6 2 beach meeting Beach 1
7 2 travel meeting Beach 1
8 3 sand onduty sand 1
9 4 trees hiking <NA> 0