Я пытаюсь составить сюжет, где для каждой главы в книге показаны наиболее распространенные слова для этой главы. У меня проблема в том, что я использую функцию top_n
со значением 10
, но я не получаю ровно 10 в каждом facet
. Также я хотел бы знать, в чем здесь разница между count
и add_count
. Вот сюжет:
И код:
library(tidytext)
library(tidyverse)
notw_processed %>%
filter(chapter < 13) %>%
count(chapter, word) %>%
group_by(chapter) %>%
top_n(10, n) %>%
ungroup() %>%
mutate(word = as_factor(word)) %>%
mutate(word = reorder_within(word, n, chapter)) %>%
ggplot(aes(x = word, y = n)) + geom_col() + coord_flip() +
facet_wrap(~chapter, scale = "free_y") + scale_x_reordered()
И образец из данных:
dput(notw_processed[sample(1:50000, size = 200, replace = FALSE),])
structure(list(linenumber = c(1884L, 3131L, 41L, 2756L, 1011L,
538L, 3312L, 1856L, 2764L, 2691L, 3702L, 505L, 2090L, 2796L,
1811L, 270L, 228L, 3088L, 3262L, 778L, 1446L, 1696L, 1839L, 1413L,
3961L, 1375L, 306L, 895L, 1647L, 2037L, 822L, 2412L, 3266L, 1287L,
3919L, 3900L, 141L, 1628L, 1459L, 465L, 3309L, 193L, 60L, 4040L,
3276L, 3522L, 682L, 1338L, 394L, 2023L, 2929L, 3239L, 808L, 160L,
206L, 2173L, 3818L, 203L, 383L, 1443L, 1693L, 645L, 1535L, 1974L,
1557L, 3931L, 1877L, 1683L, 1154L, 1601L, 3548L, 1959L, 1625L,
777L, 704L, 3054L, 2152L, 3624L, 2968L, 2035L, 1621L, 2275L,
3625L, 805L, 2731L, 1334L, 2460L, 2294L, 684L, 896L, 371L, 1837L,
2009L, 903L, 1020L, 3300L, 1504L, 1495L, 611L, 2208L, 2277L,
2025L, 1991L, 584L, 1590L, 1468L, 610L, 2683L, 1697L, 156L, 2640L,
3507L, 1975L, 163L, 2807L, 2285L, 1687L, 219L, 4069L, 3983L,
1365L, 176L, 653L, 2226L, 4020L, 3841L, 1915L, 1455L, 486L, 3881L,
2596L, 2252L, 1248L, 3879L, 364L, 2176L, 2304L, 2900L, 75L, 2488L,
1852L, 3504L, 1547L, 2713L, 1574L, 3275L, 3061L, 3368L, 3628L,
3883L, 1701L, 3637L, 3781L, 3042L, 836L, 354L, 2934L, 1781L,
1964L, 113L, 1707L, 2609L, 2066L, 1882L, 3841L, 2362L, 3894L,
466L, 2296L, 1230L, 2250L, 1816L, 3947L, 1668L, 139L, 1872L,
3296L, 2878L, 206L, 2336L, 3852L, 730L, 3956L, 2311L, 373L, 17L,
83L, 626L, 936L, 2165L, 2686L, 4030L, 1582L, 1120L, 1761L, 1002L,
40L, 734L, 3733L, 3933L), chapter = c(23L, 41L, 1L, 37L, 12L,
6L, 43L, 23L, 37L, 37L, 49L, 6L, 27L, 38L, 23L, 3L, 2L, 40L,
43L, 9L, 17L, 22L, 23L, 16L, 52L, 16L, 3L, 11L, 21L, 26L, 10L,
33L, 43L, 15L, 52L, 52L, 1L, 20L, 18L, 5L, 43L, 2L, 1L, 53L,
43L, 46L, 8L, 16L, 4L, 26L, 39L, 43L, 9L, 1L, 2L, 29L, 50L, 2L,
4L, 17L, 22L, 8L, 20L, 26L, 20L, 52L, 23L, 22L, 14L, 20L, 46L,
26L, 20L, 9L, 8L, 40L, 28L, 46L, 40L, 26L, 20L, 31L, 46L, 9L,
37L, 16L, 35L, 31L, 8L, 11L, 3L, 23L, 26L, 11L, 12L, 43L, 19L,
19L, 7L, 30L, 31L, 26L, 26L, 7L, 20L, 18L, 7L, 37L, 22L, 1L,
36L, 45L, 26L, 1L, 38L, 31L, 22L, 2L, 53L, 52L, 16L, 1L, 8L,
31L, 53L, 51L, 24L, 18L, 6L, 52L, 36L, 31L, 14L, 52L, 3L, 29L,
32L, 39L, 1L, 35L, 23L, 45L, 20L, 37L, 20L, 43L, 40L, 43L, 46L,
52L, 22L, 46L, 50L, 40L, 10L, 3L, 39L, 23L, 26L, 1L, 22L, 36L,
26L, 23L, 51L, 32L, 52L, 5L, 31L, 14L, 31L, 23L, 52L, 21L, 1L,
23L, 43L, 38L, 2L, 32L, 51L, 8L, 52L, 32L, 3L, 1L, 1L, 7L, 12L,
29L, 37L, 53L, 20L, 13L, 22L, 12L, 1L, 8L, 50L, 52L), word = c("choose",
"remember", "demon", "manet", "question", "remembering", "finally",
"times", "marks", "false", "approach", "plum", "unable", "head",
"treated", "kote", "chronicler", "method", "locate", "thousand",
"blinding", "hat", "world", "cinder’s", "rallying", "crack",
"building", "expecting", "wrong", "sow", "god", "husband", "fela",
"counter", "wil", "lump", "stew", "ate", "deep", "forehead",
"untarnished", "horse", "west", "series", "archives", "thumb",
"folk", "slight", "don’t", "leaden", "candle’s", "books", "powerful",
"banished", "dried", "spoken", "you’re", "shape", "limping",
"earlier", "customers", "eager", "wagon", "looked", "strangely",
"yesterday", "finally", "frightening", "indignantly", "bit",
"front", "pints", "squash", "taborlin", "trouble", "whipped",
"skarpi", "command", "smile", "considered", "lay", "purse", "eyes",
"symptoms", "tin", "troupers", "luggage", "penny", "bright",
"bricks", "nodded", "mother", "dead", "imply", "should’ve", "front",
"broke", "play", "story", "pulled", "found", "lay", "skarpi",
"knowing", "smelled", "knots", "chronicler", "worth", "shouted",
"stew", "pennies", "university", "pennies", "fine", "boy", "smells",
"sound", "chronicler", "crescent", "stay", "proper", "soldiers",
"tables", "shirt", "hoping", "riot", "boy", "time", "scribe",
"prove", "sync", "haven’t", "talking", "tired", "smith’s", "half",
"half", "plainly", "it’s", "called", "knees", "beck", "wouldn’t",
"tray", "worth", "physically", "moment", "simmon", "simply",
"meat", "forward", "impressive", "scarred", "ayes", "don’t",
"street", "friends", "tanee", "friends", "eyes", "looked", "namer",
"story", "eyes", "mains", "expressions", "shop", "listening",
"lucky", "words", "half", "wicked", "candle", "fever", "fidget",
"shook", "mind", "law", "incredibly", "favor", "grate", "read",
"fierce", "urchin", "they’re", "broke", "chair", "call", "transferred",
"remembered", "tarbean’s", "heard", "hot", "chronicler", "size",
"silly", "wary", "mended", "thin", "dal")), row.names = c(NA,
-200L), class = c("tbl_df", "tbl", "data.frame"))