Мне достаточно графиков @ andriy, чтобы сделать из них упрощенную пользовательскую функцию:
Данные должны выглядеть так, а ageGroup
- упорядоченный коэффициент.
head(population)
# ageGroup sex number
# 1 0-4 male 1.896459
# 2 5-9 male 1.914255
# 3 10-14 male 1.832594
# 4 15-19 male 1.849453
# 5 20-24 male 1.658733
# 6 25-29 male 1.918060
ТогдаВы предоставляете данные и перерывы:
pyramid(population,c(0, 0.5, 1, 1.5, 2))
При необходимости создание возрастных групп можно выполнить с помощью функции age_cat
, которую я взял из этого блога .Смотрите код ниже.Я слегка отредактировал оригинальное имя и параметры по умолчанию.
Например:
age_column <- sample(0:110,10000,TRUE)
table(age_cat(age_column))
# 0-9 10-19 20-29 30-39 40-49 50-59 60-69 70-79 80-89 90-99 100+
# 885 836 885 927 942 953 886 882 935 872 997
функции
pyramid <- function(data,.breaks){
ggplot(data, aes(x = ageGroup, color = sex))+
geom_linerange(data = data[data$sex=="male",],
aes(ymin = -tail(.breaks,1)/7, ymax = -tail(.breaks,1)/7-number), size = 3.5, alpha = 0.8)+
geom_linerange(data = data[data$sex=="female",],
aes(ymin = tail(.breaks,1)/7, ymax = tail(.breaks,1)/7+number), size = 3.5, alpha = 0.8)+
geom_label(aes(x = ageGroup, y = 0, label = ageGroup),
inherit.aes = F,
size = 3.5, label.padding = unit(0.0, "lines"), label.size = NA,
label.r = unit(0.0, "lines"), fill = "white", alpha = 0.9, color = "#5D646F")+
scale_y_continuous(breaks = c(-rev(.breaks) -tail(.breaks,1)/7, .breaks+tail(.breaks,1)/7),
labels = c(rev(.breaks),.breaks))+
coord_flip()+
scale_color_manual(name = "", values = c(male = "#3E606F", female = "#8C3F4D"))+
theme_minimal()+
theme(text = element_text(color = "#3A3F4A"),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major.x = element_line(linetype = "dotted", size = 0.3, color = "#3A3F4A"),
axis.title = element_blank(),
axis.text.x = element_text(size = 12, color = "#5D646F"),
axis.text.y = element_blank(),
strip.text = element_text(color = "#5D646F", size = 18, face = "bold", hjust = 0.030),
legend.position = "none")
}
age_cat <- function(x, lower = 0, upper = 100, by = 5,
sep = "-", above.char = "+") {
labs <- c(paste(seq(lower, upper - by, by = by),
seq(lower + by - 1, upper - 1, by = by),
sep = sep),
paste(upper, above.char, sep = ""))
cut(floor(x), breaks = c(seq(lower, upper, by = by), Inf),
right = FALSE, labels = labs)
}
данные
library(dplyr)
library(ggplot2)
population <- read.csv("https://raw.githubusercontent.com/andriy-gazin/datasets/master/ageSexDistribution.csv")
population <- population %>%
tidyr::gather(sex, number, -year, - ageGroup) %>%
mutate(ageGroup = factor(ageGroup,
ordered = TRUE,
levels = c("0-4", "5-9", "10-14", "15-19", "20-24",
"25-29", "30-34", "35-39", "40-44",
"45-49", "50-54", "55-59", "60-64",
"65-69", "70-74", "75-79", "80-84",
"85-89", "90-94", "95-99", "100+")),
ageGroup = `[<-`(ageGroup,is.na(ageGroup),value="100+"),
number = number/10^6) %>%
dplyr::filter(year == 1990) %>%
select(-year)