Графически:
library(babynames)
library(dplyr)
library(ggplot2)
babynames %>%
filter(name == "Alex", year >=1920) %>%
ggplot(aes(year, n, color=sex)) +
geom_line()
Численно:
library(tidyr)
babynames %>%
filter(name == "Alex", year >=1920) %>%
group_by(year) %>%
mutate(pct = n / sum(n, na.rm = TRUE)) %>%
ungroup() %>%
select(year, name, pct, sex) %>%
pivot_wider(names_from = sex, values_from = pct) %>%
mutate(diff = abs(F - M)) %>%
arrange(diff)
Для всех имен:
babynames %>%
filter(year >=1920) %>%
group_by(name, year) %>%
mutate(pct = n / sum(n, na.rm = TRUE),
total = sum(n)) %>%
ungroup() %>%
select(year, name, total, pct, sex) %>%
pivot_wider(names_from = sex, values_from = pct) %>%
mutate(diff = abs(F - M)) %>%
arrange(diff)
Не уверен насчет этого набора данных;)
babynames %>%
filter(name == "Othello", year ==1920)
year sex name n prop
<dbl> <chr> <chr> <int> <dbl>
1 1920 F Othello 8 0.00000643
2 1920 M Othello 8 0.00000727