Мы можем выполнить объединение после создания key/val
набора данных
library(tidyverse)
list(Africa = africa, Asia = asia, Europe = europe,
`South America` = south_america, `North America` = north_america,
Oceania = oceania) %>%
stack %>%
right_join(chocolate_data_common_beans2, by = c("values" = "company_location")) %>%
rename(continent = ind)
# values continent cocoa_percent rating
#1 France Europe 63 3.75
#2 Fiji Oceania 72 3.50
#3 Ecuador South America 55 2.75
#4 U.S.A. North America 75 2.75
#5 U.S.A. North America 70 2.75
#6 U.S.A. North America 55 2.75
#7 Canada North America 72 3.75
#8 U.S.A. North America 85 3.50
#9 Australia Oceania 78 3.75
#10 Austria Europe 70 3.75
Или используйте enframe
вместо stack
list(Africa = africa, Asia = asia, Europe = europe,
`South America` = south_america, `North America` = north_america,
Oceania = oceania) %>%
enframe(name = "continent", value = "company_location") %>%
unnest %>%
right_join(chocolate_data_common_beans2)
ПРИМЕЧАНИЕ. Преимущество этого метода состоит в том, что не используется несколько вложенных условий для изменения значения. Нам нужен всего один join
.
Тесты
На немного большем наборе данных
dfN <- chocolate_data_common_beans2[rep(seq_len(nrow(chocolate_data_common_beans2)), each = 1e5),]
library(microbenchmark)
akrun <- function() {
list(Africa = africa, Asia = asia, Europe = europe,
`South America` = south_america, `North America` = north_america,
Oceania = oceania) %>%
enframe(name = "continent", value = "company_location") %>%
unnest %>%
right_join(dfN)
}
iod <- function() {
dfN %>%
mutate(continent = case_when(
company_location %in% africa ~ "Africa",
company_location %in% asia ~ "Asia",
company_location %in% europe ~ "Europe",
company_location %in% south_america ~ "South America",
company_location %in% north_america ~ "North America",
company_location %in% oceania ~ "Oceania"
))
}
microbenchmark(akrun(), iod(), times = 10L, unit = "relative")
# expr min lq mean median uq max neval cld
# akrun() 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 10 a
# iod() 6.332611 6.201221 5.953432 6.125145 5.567748 5.751538 10 b
данные
chocolate_data_common_beans2 <- structure(list(company_location =
c("France", "Fiji", "Ecuador",
"U.S.A.", "U.S.A.", "U.S.A.", "Canada", "U.S.A.", "Australia",
"Austria"), cocoa_percent = c(63L, 72L, 55L, 75L, 70L, 55L, 72L,
85L, 78L, 70L), rating = c(3.75, 3.5, 2.75, 2.75, 2.75, 2.75,
3.75, 3.5, 3.75, 3.75)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10"))