Я отвечаю на ваш первый вопрос, и тогда я подумаю о вашем редактировании. Я сделал бы это tidyverse способом, используя purrr и tidyr
df <- data.frame(Gender=c("Male", "Female", "Other"),
AgeCat=c("young", "adult", "old"),
stringsAsFactors = F)
master <- data.frame(Country=c("US", "US", "ENG", "SPN", "MEX", "ARG"),
Gender=c("Male", "Male", "Male", "Female", "Female", "Female"),
AgeCat=c("young", "young", "old", "adult", "adult", "adult"),
Height=c("134", "125", "169", "189", "176", "191"),
stringsAsFactors = F)
library(tidyverse)
pre <- master %>%
as_tibble(.) %>%
bind_rows(., df) %>%
complete(AgeCat, nesting(Country, Gender), fill = list(Height = NA)) %>%
group_by(Gender, AgeCat) %>%
nest(.)
NEST <- set_names(x = pre$data, nm = sprintf("%s_%s", pre$Gender, pre$AgeCat))
map(names(NEST), function(x){
z <- NEST[[x]]
z %>%
mutate(Gender = str_split(x, "_", simplify = T)[1],
AgeCat = str_split(x, "_", simplify = T)[2]) %>%
filter(!is.na(Country)) %>%
select(Country, Gender, AgeCat, Height)
}) %>%
set_names(., nm = sprintf("%s_%s", pre$Gender, pre$AgeCat))
#> $Female_adult
#> # A tibble: 3 x 4
#> Country Gender AgeCat Height
#> <chr> <chr> <chr> <chr>
#> 1 ARG Female adult 191
#> 2 MEX Female adult 176
#> 3 SPN Female adult 189
#>
#> $Male_adult
#> # A tibble: 2 x 4
#> Country Gender AgeCat Height
#> <chr> <chr> <chr> <chr>
#> 1 ENG Male adult <NA>
#> 2 US Male adult <NA>
#>
#> $Other_adult
#> # A tibble: 0 x 4
#> # … with 4 variables: Country <chr>, Gender <chr>, AgeCat <chr>, Height <chr>
#>
#> $Female_old
#> # A tibble: 3 x 4
#> Country Gender AgeCat Height
#> <chr> <chr> <chr> <chr>
#> 1 ARG Female old <NA>
#> 2 MEX Female old <NA>
#> 3 SPN Female old <NA>
#>
#> $Male_old
#> # A tibble: 2 x 4
#> Country Gender AgeCat Height
#> <chr> <chr> <chr> <chr>
#> 1 ENG Male old 169
#> 2 US Male old <NA>
#>
#> $Other_old
#> # A tibble: 0 x 4
#> # … with 4 variables: Country <chr>, Gender <chr>, AgeCat <chr>, Height <chr>
#>
#> $Female_young
#> # A tibble: 3 x 4
#> Country Gender AgeCat Height
#> <chr> <chr> <chr> <chr>
#> 1 ARG Female young <NA>
#> 2 MEX Female young <NA>
#> 3 SPN Female young <NA>
#>
#> $Male_young
#> # A tibble: 3 x 4
#> Country Gender AgeCat Height
#> <chr> <chr> <chr> <chr>
#> 1 ENG Male young <NA>
#> 2 US Male young 134
#> 3 US Male young 125
#>
#> $Other_young
#> # A tibble: 0 x 4
#> # … with 4 variables: Country <chr>, Gender <chr>, AgeCat <chr>, Height <chr>
Создано в 2020-02-20 представьте пакет (v0.3.0)
РЕДАКТИРОВАТЬ: новый ответ, содержащий все вопросы
pre <- master %>%
as_tibble(.) %>%
mutate(Height = as.numeric(Height)) %>%
bind_rows(., df) %>%
complete(AgeCat, nesting(Country, Gender), fill = list(Height = NA)) %>%
group_by(Gender, AgeCat) %>%
nest(.)
NEST <- set_names(x = pre$data, nm = sprintf("%s_%s", pre$Gender, pre$AgeCat))
DF <- map(names(NEST), function(x){
z <- NEST[[x]]
z %>%
mutate(Gender = str_split(x, "_", simplify = T)[1],
AgeCat = str_split(x, "_", simplify = T)[2]) %>%
filter(!is.na(Country)) %>%
select(Country, Gender, AgeCat, Height)
}) %>%
set_names(., nm = sprintf("%s_%s", pre$Gender, pre$AgeCat)) %>%
bind_rows(., .id = "Conditions")
Count <- DF %>%
count(Conditions, name = "Individuals")
DF %>%
group_by(Conditions) %>%
filter(!is.na(Height)) %>%
summarise(Mean_Height = mean(Height, na.rm = T)) %>%
left_join(., Count) %>%
select(Conditions, Individuals, Mean_Height)
#> Joining, by = "Conditions"
#> # A tibble: 3 x 3
#> Conditions Individuals Mean_Height
#> <chr> <int> <dbl>
#> 1 Female_adult 3 185.
#> 2 Male_old 2 169
#> 3 Male_young 3 130.
Создано в 2020-02-20 пакетом Представления (v0.3.0)
РЕДАКТИРОВАТЬ2: я сделал небольшие изменения в результирующем df, чтобы показать число отдельных лиц в этом случае. Так как среднее значение NA не существует, оно возвращает NaN
pre <- master %>%
as_tibble(.) %>%
mutate(Height = as.numeric(Height)) %>%
bind_rows(., df) %>%
complete(AgeCat, nesting(Country, Gender), fill = list(Height = NA)) %>%
group_by(Gender, AgeCat) %>%
nest(.)
NEST <- set_names(x = pre$data, nm = sprintf("%s_%s", pre$Gender, pre$AgeCat))
DF <- map(names(NEST), function(x){
z <- NEST[[x]]
z %>%
mutate(Gender = str_split(x, "_", simplify = T)[1],
AgeCat = str_split(x, "_", simplify = T)[2]) %>%
filter(!is.na(Country)) %>%
select(Country, Gender, AgeCat, Height)
}) %>%
set_names(., nm = sprintf("%s_%s", pre$Gender, pre$AgeCat)) %>%
bind_rows(., .id = "Conditions")
DF %>%
group_by(Conditions) %>%
mutate(N = case_when(is.na(Height) ~ 0,
TRUE ~ 1)) %>%
summarise(Individuals = sum(N), Mean_Height = mean(Height, na.rm = T))
#> # A tibble: 6 x 3
#> Conditions Individuals Mean_Height
#> <chr> <dbl> <dbl>
#> 1 Female_adult 3 185.
#> 2 Female_old 0 NaN
#> 3 Female_young 0 NaN
#> 4 Male_adult 0 NaN
#> 5 Male_old 1 169
#> 6 Male_young 2 130.
Создано в 2020-02-20 пакетом prex (v0.3.0)