Вот один вариант, где мы разбиваем элементы в ;
, затем separate
на два столбца и преобразуем из длинного в широкий формат
library(dplyr)
library(tidyr)
library(stringr)
library(tibble)
df1 %>%
rownames_to_column('rn') %>%
separate_rows(text, sep = ';\\s*') %>%
separate(text, into = c('key', 'val'), sep=":\\s*") %>%
pivot_wider(names_from = key, values_from = val,
values_fill = list(val = "(empty)")) %>%
select(-rn) %>%
imap_dfr(~ case_when(.x != "(empty)" ~ str_c(.y, .x, sep=":"), TRUE ~ .x)) %>%
rename_all(~ str_c(., ".text"))
# A tibble: 4 x 4
# name.text surname.text age.text gender.text
# <chr> <chr> <chr> <chr>
#1 name:john surname:smith age:35 gender:male
#2 name:mark (empty) age:50 (empty)
#3 name:jack surname:brown (empty) (empty)
#4 name:tom surname:travis (empty) gender:male
Или используяbase R
nm1 <- c("name", "surname", "age", "gender")
lst1 <- lapply(strsplit(df1$text, ";\\s*"), function(x) {
prfx <- sub(":.*", "", x)
x1 <- x[match(nm1, prfx)]
replace(x1, is.na(x1), "(empty)")})
out <- do.call(rbind.data.frame, lst1)
names(out) <- paste0(nm1, ".text")
out
# name.text surname.text age.text gender.text
#1 name: john surname: smith age: 35 gender:male
#2 name: mark (empty) age:50 (empty)
#3 name: jack surname: brown (empty) (empty)
#4 name: tom surname: travis (empty) gender: male
data
df1 <- structure(list(text = c("name: john; surname: smith; age: 35; gender:male",
"name: mark; age:50", "name: jack; surname: brown",
"name: tom; surname: travis; gender: male"
)), class = "data.frame", row.names = c(NA, -4L))