Мы можем разделить запятую Ingredients
на разные строки, получить все значения столбцов в длинном формате, group_by
каждое row
и paste
значения unique
отсортированным образом.
library(dplyr)
library(tidyr)
df %>%
mutate(row = row_number()) %>%
#mutate_all(~na_if(., "")) %>% #Use this if you have blank values instead of NA
separate_rows(Ingredients, sep = ",") %>%
pivot_longer(cols = -row, values_drop_na = TRUE) %>%
mutate(value = ifelse(value == 'x', name, value)) %>%
group_by(row) %>%
summarise(Ingredients = toString(sort(unique(value)))) %>%
select(-row)
# A tibble: 4 x 1
# Ingredients
# <chr>
#1 Bread, PB, Jam
#2 Bread, PB, Jam
#3 Bread
#4 Bread, Jam
данные
df <- structure(list(Ingredients = structure(c(2L, 3L, 1L, 1L), .Label = c("Bread",
"Bread,PB,Jam", "PB,Jam"), class = "factor"), Bread = structure(c(NA,
1L, 1L, NA), .Label = "x", class = "factor"), PB = c(NA, NA,
NA, NA), Jam = structure(c(NA, NA, NA, 1L), .Label = "x", class =
"factor")), class = "data.frame", row.names = c("1", "2", "3", "4"))