Упорядоченный график стека по числу и коэффициенту окраски - PullRequest
0 голосов
/ 03 октября 2019

У меня есть следующий data.frame под названием mydf

structure(list(id = structure(c(1L, 1L, 11L, 11L, 11L, 11L, 11L, 
11L, 13L, 13L, 15L, 15L, 15L, 15L, 15L, 15L, 19L, 27L, 30L, 30L, 
31L, 31L, 31L, 31L, 31L, 31L, 31L, 37L, 38L, 38L, 40L, 42L, 43L, 
43L, 46L, 46L, 10L, 10L, 16L, 24L, 36L, 5L, 12L, 12L, 12L, 12L, 
16L, 17L, 18L, 18L, 20L, 25L, 25L, 27L, 28L, 28L, 32L, 35L, 41L, 
41L, 41L, 41L, 41L, 41L, 44L, 45L, 45L, 2L, 2L, 3L, 3L, 4L, 4L, 
4L, 4L, 6L, 6L, 7L, 8L, 9L, 10L, 12L, 17L, 17L, 17L, 18L, 18L, 
20L, 22L, 22L, 23L, 23L, 24L, 26L, 26L, 26L, 29L, 29L, 29L, 33L, 
34L, 34L, 35L, 36L, 41L, 42L, 42L, 44L, 44L, 45L, 5L, 7L, 7L, 
9L, 9L, 9L, 9L, 9L, 14L, 14L, 19L, 21L, 21L, 25L, 25L, 25L, 32L, 
32L, 33L, 33L, 33L, 39L, 39L, 40L, 41L, 42L, 42L, 46L, 46L, 47L, 
47L), .Label = c("S003", "S004", "S009", "S016", "S025", "S027", 
"S035", "S036", "S042", "S043", "S045", "S047", "S048", "S049", 
"S050", "S058", "S059", "S061", "S063", "S065", "S067", "S069", 
"S070", "S073", "S076", "S077", "S079", "S083", "S087", "S088", 
"S089", "S093", "S095", "S103", "S109", "S112", "S114", "S119", 
"S121", "S123", "S141", "S142", "S224", "S467", "S486", "S503", 
"S532"), class = "factor"), predclass = c("G1", "G1", "G1", "G1", 
"G1", "G1", "G1", "G1", "G1", "G1", "G1", "G1", "G1", "G1", "G1", 
"G1", "G1", "G1", "G1", "G1", "G1", "G1", "G1", "G1", "G1", "G1", 
"G1", "G1", "G1", "G1", "G1", "G1", "G1", "G1", "G1", "G1", "G2", 
"G2", "G2", "G2", "G2", "G3", "G3", "G3", "G3", "G3", "G3", "G3", 
"G3", "G3", "G3", "G3", "G3", "G3", "G3", "G3", "G3", "G3", "G3", 
"G3", "G3", "G3", "G3", "G3", "G3", "G3", "G3", "G4", "G4", "G4", 
"G4", "G4", "G4", "G4", "G4", "G4", "G4", "G4", "G4", "G4", "G4", 
"G4", "G4", "G4", "G4", "G4", "G4", "G4", "G4", "G4", "G4", "G4", 
"G4", "G4", "G4", "G4", "G4", "G4", "G4", "G4", "G4", "G4", "G4", 
"G4", "G4", "G4", "G4", "G4", "G4", "G4", "G5", "G5", "G5", "G5", 
"G5", "G5", "G5", "G5", "G5", "G5", "G5", "G5", "G5", "G5", "G5", 
"G5", "G5", "G5", "G5", "G5", "G5", "G5", "G5", "G5", "G5", "G5", 
"G5", "G5", "G5", "G5", "G5"), status = c("stable", "stable", 
"stable", "stable", "stable", "stable", "stable", "stable", "stable", 
"stable", "stable", "stable", "stable", "stable", "stable", "stable", 
"unstable", "unstable", "stable", "stable", "stable", "stable", 
"stable", "stable", "stable", "stable", "stable", "stable", "stable", 
"stable", "unstable", "unstable", "stable", "stable", "unstable", 
"unstable", "unstable", "unstable", "unstable", "unstable", "unstable", 
"unstable", "unstable", "unstable", "unstable", "unstable", "unstable", 
"unstable", "unstable", "unstable", "unstable", "unstable", "unstable", 
"unstable", "stable", "stable", "unstable", "unstable", "unstable", 
"unstable", "unstable", "unstable", "unstable", "unstable", "unstable", 
"unstable", "unstable", "stable", "stable", "stable", "stable", 
"stable", "stable", "stable", "stable", "stable", "stable", "unstable", 
"stable", "unstable", "unstable", "unstable", "unstable", "unstable", 
"unstable", "unstable", "unstable", "unstable", "stable", "stable", 
"stable", "stable", "unstable", "stable", "stable", "stable", 
"stable", "stable", "stable", "unstable", "stable", "stable", 
"unstable", "unstable", "unstable", "unstable", "unstable", "unstable", 
"unstable", "unstable", "unstable", "unstable", "unstable", "unstable", 
"unstable", "unstable", "unstable", "unstable", "stable", "stable", 
"unstable", "stable", "stable", "unstable", "unstable", "unstable", 
"unstable", "unstable", "unstable", "unstable", "unstable", "stable", 
"stable", "unstable", "unstable", "unstable", "unstable", "unstable", 
"unstable", "stable", "stable")), row.names = c(1L, 2L, 28L, 
29L, 30L, 31L, 32L, 33L, 39L, 40L, 43L, 44L, 45L, 46L, 47L, 48L, 
59L, 80L, 86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 108L, 
109L, 110L, 113L, 124L, 128L, 129L, 136L, 139L, 25L, 27L, 50L, 
69L, 106L, 12L, 34L, 36L, 37L, 38L, 49L, 54L, 56L, 58L, 62L, 
72L, 73L, 79L, 81L, 82L, 97L, 104L, 115L, 116L, 117L, 120L, 121L, 
122L, 131L, 133L, 135L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 13L, 
14L, 16L, 18L, 19L, 26L, 35L, 51L, 52L, 53L, 55L, 57L, 61L, 65L, 
66L, 67L, 68L, 70L, 76L, 77L, 78L, 83L, 84L, 85L, 99L, 102L, 
103L, 105L, 107L, 119L, 123L, 125L, 130L, 132L, 134L, 11L, 15L, 
17L, 20L, 21L, 22L, 23L, 24L, 41L, 42L, 60L, 63L, 64L, 71L, 74L, 
75L, 95L, 96L, 98L, 100L, 101L, 111L, 112L, 114L, 118L, 126L, 
127L, 137L, 138L, 140L, 141L), class = "data.frame")

Я пытаюсь построить его как столбчатую диаграмму стека с гранью status. Я бы хотел упорядочить бары по predclass, чтобы G1 был наверху, а затем G2 и ..., и в пределах каждого группового бара их также нужно упорядочить по их количеству.

Искал многоно не смог найти хорошего решения. У кого-нибудь есть идея? Это мой код:

ggplot(mydf, aes(x = forcats::fct_infreq(id),fill = predclass)) + 
  geom_bar(stat = "count", width = 0.85) + 
  facet_wrap(.~ status, ncol = 2, strip.position="left",scales = "free") + 
  coord_flip() +
  theme_classic()

1 Ответ

1 голос
/ 04 октября 2019

Я не был полностью уверен, как порядок должен быть расположен справа, но я думаю, что следующий код дает вам то, что вы хотите. Хитрость заключается в том, чтобы сначала получить коэффициент id в правильном порядке.

library(tidyverse)

lev <- mydf %>% 
  as_tibble() %>% 
  mutate(predclass = as.ordered(predclass)) %>% 
  group_by(id) %>% 
  mutate(order_id = min(predclass)) %>%
  group_by(order_id, id) %>% 
  summarize(n = n()) %>% 
  arrange(desc(order_id), n) %>% 
  pull(id) %>% 
  as.character()

mydf %>% 
  mutate(id = fct_relevel(id, lev),
         predclass = as.ordered(predclass) %>% fct_rev) %>% 
  ggplot(aes(x = id, fill = predclass)) + 
  geom_bar(stat = "count", width = 0.85) + 
  facet_wrap( ~ status, scales = "free") + 
  coord_flip() +
  theme_classic()
...