Мы можем сделать это с помощью комбинации dplyr
, tidyr
и purrr
:
library(dplyr) # 1.0.0
library(tidyr) # 1.1.0
library(purrr) # 0.3.4
widedata %>% group_by(PizzaNumber) %>% nest() %>%
mutate(
Topping_Category = map(data, function(data) {
as.list(paste(data[, c(1, 3, 5)], data[, c(2, 4, 6)], sep = "_"))
})) %>% select(-data) %>%
unnest_longer(Topping_Category, indices_include = FALSE) %>%
ungroup() %>%
separate(Topping_Category, c("Topping", "Category"), sep = "_") %>%
mutate(Topping = na_if(Topping, "NA"), Category = na_if(Category, "NA")) %>%
filter(Topping != is.na(Topping))
# A tibble: 11 x 3
PizzaNumber Topping Category
<dbl> <chr> <chr>
1 1 cheese vegetarian
2 1 ham carnivorous
3 1 tomato vegetarian
4 2 spinach vegetarian
5 2 tomato vegetarian
6 3 pineapple vegetarian
7 3 cheese vegetarian
8 3 ham carnivorous
9 4 cheese vegetarian
10 4 tomato vegetarian
11 5 beef carnivorous
Кстати, данные, которые вы предоставили с помощью dput(widedata)
, на самом деле являются вашим желаемым длинным форматом.
Вот фактическое начало widedata
:
structure(list(PizzaNumber = c("1", "2", "3", "4", "5"), Topping_1 = c("cheese",
"spinach", "pineapple", "cheese", "beef"), Category_1 = c("vegetarian",
"vegetarian", "vegetarian", "vegetarian", "carnivorous"), Topping_2 = c("ham",
"tomato", "cheese", "tomato", NA), Category_2 = c("carnivorous",
"vegetarian", "vegetarian", "vegetarian", NA), Topping_3 = c("tomato",
NA, "ham", NA, NA), Category_3 = c("vegetarian", NA, "carnivorous",
NA, NA)), row.names = c(NA, -5L), class = c("tbl_df", "tbl",
"data.frame"))