Мы можем использовать комбинацию unnest
и pivot_wider
из tidyr
.
library(dplyr)
library(tidyr)
library(stringr)
data <- data %>% mutate(ID = 1:nrow(data))
data %>%
mutate(Cardiac.Comorbidity.Types = str_split(Cardiac.Comorbidity.Types, ", ?")) %>%
unnest(Cardiac.Comorbidity.Types) %>%
filter(Cardiac.Comorbidity.Types != "") %>%
pivot_wider(id_cols = "ID", names_from = Cardiac.Comorbidity.Types, values_from = Cardiac.Comorbidity.Types) %>%
right_join(data, by="ID") %>%
mutate_at(vars(-ID,-Cardiac.Comorbidity.Types), ~ as.integer(!is.na(.x))) %>% select(-ID)
# A tibble: 20 x 8
# MI CAD `Previous CABG or PTCA` Pacemaker Arrhythmia CHF PVD Cardiac.Comorbidity.Types
# <int> <int> <int> <int> <int> <int> <int> <fct>
# 1 1 0 0 0 0 0 0 MI,
# 2 0 0 0 0 0 0 0 NA
# 3 1 1 1 1 0 0 0 CAD, Previous CABG or PTCA, MI, Pacemaker,
# 4 0 0 0 0 1 0 0 Arrhythmia,
# 5 1 1 1 0 1 0 0 CAD, Previous CABG or PTCA, MI, Arrhythmia,
...
Данные
data <- c("MI,", NA, "CAD, Previous CABG or PTCA, MI, Pacemaker,", "Arrhythmia,",
"CAD, Previous CABG or PTCA, MI, Arrhythmia,", NA, "CAD, Previous CABG or PTCA, MI,",
"CAD, Previous CABG or PTCA, CHF, Pacemaker,", "CAD, Previous CABG or PTCA,",
"CAD, Previous CABG or PTCA, Arrhythmia,", "CAD, Previous CABG or PTCA,",
"CAD, Previous CABG or PTCA, MI,", "CAD, Previous CABG or PTCA, CHF, Arrhythmia,",
"CAD, Previous CABG or PTCA, Pacemaker,", "CAD, Previous CABG or PTCA, MI, CHF,",
"CAD, Previous CABG or PTCA, MI, CHF,", NA, "CAD, Previous CABG or PTCA, PVD, Pacemaker,",
"PVD,", "CAD, Previous CABG or PTCA,")
data <- data.frame(Cardiac.Comorbidity.Types = data)