Этого можно добиться, используя комбинацию group_by()
и ungroup()
в dplyr.
library(dplyr)
df <- data.frame(stringsAsFactors=FALSE,
V1 = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10),
launch_year = c(1965, 1966, 1966, 1967, 1967, 1967, 1968, 1969, 1969, 1970),
state_name = c("France", "France", "Japan", "France", "Italy", "Japan",
"I-ELDO", "I-ELDO", "Japan", "China"),
V4 = c(1, 1, 2, 2, 1, 1, 1, 1, 1, 1)
)
df %>%
count(launch_year, state_name) %>%
group_by(launch_year) %>%
mutate(launches_that_year = sum(n)) %>%
ungroup() %>%
group_by(state_name) %>%
mutate(launches_by_state_name = sum(n)) %>%
ungroup() %>%
mutate(prop = (launches_that_year) / (launches_by_state_name) * 100)
#> # A tibble: 10 x 6
#> launch_year state_name n launches_that_ye~ launches_by_state_~ prop
#> <dbl> <chr> <int> <int> <int> <dbl>
#> 1 1965 France 1 1 3 33.3
#> 2 1966 France 1 2 3 66.7
#> 3 1966 Japan 1 2 3 66.7
#> 4 1967 France 1 3 3 100
#> 5 1967 Italy 1 3 1 300
#> 6 1967 Japan 1 3 3 100
#> 7 1968 I-ELDO 1 1 2 50
#> 8 1969 I-ELDO 1 2 2 100
#> 9 1969 Japan 1 2 3 66.7
#> 10 1970 China 1 1 1 100
Создано в 2019-02-10 пакетом представить (v0.2.0).