Ggplot: geom_bar производит неправильный масштаб - PullRequest
0 голосов
/ 22 января 2019

Я пытаюсь создать гистограмму, на которой вопросы опроса (из моего фрейма данных "final") нанесены на ось x, а число для каждого из трех ответов можно просмотреть на панели.

вот код:

final %>% 
  select(4:6, 14, 15) %>%
  na.omit -> data_items


data_items %>% 
  gather(key = items, value = answer) %>% 
  mutate(answer = factor(answer),
     items = factor(items)) -> data2


data2 %>% 
  dplyr::count(items, answer) %>% 
  mutate(y_pos = cumsum(n)/nrow(data_items) - (0.5 * n/nrow(data_items)),
     y_cumsum = cumsum(n)) %>% 
  mutate(items_num = (items)) -> data3


ggplot(data3, aes(x = items, y = n)) +
  geom_bar(aes(fill = answer), position = "fill", stat = "identity") +
  geom_text(aes(y = 1 - y_pos, label = n),  size = 3)-> p3
p3

Однако, когда я печатаю p3, шкала неверна и выглядит так:

enter image description here

Может кто-нибудь помочь мне выяснить, почему?

вот набор данных, с которым я работаю:

    dput(head(final,20))
    structure(list(idpers = c(5101, 5104, 13102, 27101, 27102, 28101, 
    34101, 34102, 39102, 42101, 42102, 45101, 45102, 61101, 61102, 
73102, 74102, 74103, 74104, 85101), year = c(2011, 2011, 2011, 
2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 
2011, 2011, 2011, 2011, 2011, 2011), canton = c("GE  Geneva", 
"GE  Geneva", "VD  Vaud", "VD  Vaud", "VD  Vaud", "VD  Vaud", 
"VD  Vaud", "VD  Vaud", "GE  Geneva", "VS  Valais", "VS  Valais", 
"VS  Valais", "VS  Valais", "VD  Vaud", "VD  Vaud", "VD  Vaud", 
"GE  Geneva", "GE  Geneva", "GE  Geneva", "GE  Geneva"), unemployment_benefits = c(2, 
1, 2, 3, 2, 1, 2, 2, 3, 2, 3, 2, 3, 3, 3, 3, 3, 3, NA, 2), social_aid = c(1, 
1, 2, 3, 3, 1, 2, 1, 2, 1, 2, NA, 3, 3, 2, 3, 1, 2, NA, 3), social_expenses = c(2, 
NA, 2, 3, 3, 1, 2, 2, 3, 1, 2, 3, 1, 3, 2, 3, 1, 2, NA, 3), income = c(78600, 
67800, 39000, 9600, 84000, 105500, 87100, 81700, NA, 90400, NA, 
20800, 54400, 115000, 26000, NA, NA, 15600, 3700, NA), education = c(15, 
6, 6, 2, 6, 13, 7, 11, 16, 11, 6, 7, 6, 11, 7, 6, 2, 6, 2, 16
), skill_level = c(1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 
0, 0, 0, 0, 0, 1), political_position = c(6, 10, 5, NA, NA, 9, 
2, 5, 7, 6, 5, 4, 7, 5, 5, 5, NA, 5, NA, NA), sex = c(1, 0, 0, 
0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1), age = c(50L, 
50L, 38L, 43L, 43L, 40L, 46L, 49L, 43L, 38L, 36L, 45L, 44L, 53L, 
52L, 41L, 45L, 25L, 18L, 47L), working_status = c(NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_), chances_for_foreigners = c(1, 
3, 1, 3, 1, 1, 3, 3, 1, 1, 1, 2, 3, 3, 2, 1, 3, 1, 1, 1), taxes_on_high_income = c(3, 
2, 2, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 1, 1, NA, 3), risk_of_unemployment = c(0, 
0, 4, 5, 0, 2, 2, 7, 5, 1, NA, 3, 0, 0, 0, 0, 0, 0, 0, NA), job_security = c(3, 
3, 3, 3, 4, 4, 3, 3, 3, 3, NA, 3, 4, 4, 4, 4, 3, 3, NA, NA), 
   financial_situation = c(8, 8, 9, 4, 5, 8, 8, 9, 4, 8, 9, 
    7, 6, 8, 9, 7, 6, 6, 6, 0), trust_in_people = c(3, 8, 7, 
    9, 5, 8, 6, 8, 5, 4, 7, 6, 7, 8, 7, 0, 6, 3, 7, 8), trust_in_government = c(7, 
    7, 5, 6, 5, 2, 6, 4, 4, 4, 7, 5, 9, 5, 8, 5, 8, 7, 10, 2), 
    pay_from_unemployment = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0), pay_from_welfare = c(0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1), tot_pop_canton = c(351947, 
    351947, 585680, 585680, 585680, 585680, 585680, 585680, 351947, 
    263442, 263442, 263442, 263442, 585680, 585680, 585680, 351947, 
    351947, 351947, 351947), abs_lowskill_immigrants_canton = c(68468, 
    68468, 108001, 108001, 108001, 108001, 108001, 108001, 68468, 
    35987, 35987, 35987, 35987, 108001, 108001, 108001, 68468, 
    68468, 68468, 68468), per_cent_lowskill_immigrants_canton = c(19.4540655269117, 
    19.4540655269117, 18.4402745526567, 18.4402745526567, 18.4402745526567, 
    18.4402745526567, 18.4402745526567, 18.4402745526567, 19.4540655269117, 
    13.660312326812, 13.660312326812, 13.660312326812, 13.660312326812, 
    18.4402745526567, 18.4402745526567, 18.4402745526567, 19.4540655269117, 
    19.4540655269117, 19.4540655269117, 19.4540655269117), abs_highskill_immigrants_canton = c(41235, 
    41235, 54447, 54447, 54447, 54447, 54447, 54447, 41235, 8901, 
    8901, 8901, 8901, 54447, 54447, 54447, 41235, 41235, 41235, 
    41235), per_cent_highskill_immigrants_canton = c(11.7162527312351, 
    11.7162527312351, 9.29637344625051, 9.29637344625051, 9.29637344625051, 
    9.29637344625051, 9.29637344625051, 9.29637344625051, 11.7162527312351, 
    3.37873232058669, 3.37873232058669, 3.37873232058669, 3.37873232058669, 
    9.29637344625051, 9.29637344625051, 9.29637344625051, 11.7162527312351, 
    11.7162527312351, 11.7162527312351, 11.7162527312351), net_social_exp_mio_canton = c(684.5, 
    684.5, 697, 697, 697, 697, 697, 697, 684.5, 130.7, 130.7, 
    130.7, 130.7, 697, 697, 697, 684.5, 684.5, 684.5, 684.5), 
    GDP_mio_canton = c(47085, 47085, 48736, 48736, 48736, 48736, 
    48736, 48736, 47085, 17072, 17072, 17072, 17072, 48736, 48736, 
    48736, 47085, 47085, 47085, 47085), social_exp_as_per_cent_GDP_canton = c(1.45375384942126, 
    1.45375384942126, 1.43015430072226, 1.43015430072226, 1.43015430072226, 
    1.43015430072226, 1.43015430072226, 1.43015430072226, 1.45375384942126, 
    0.76558106841612, 0.76558106841612, 0.76558106841612, 0.76558106841612, 
    1.43015430072226, 1.43015430072226, 1.43015430072226, 1.45375384942126, 
    1.45375384942126, 1.45375384942126, 1.45375384942126), unemployment_rate_canton = c(6, 
    6, 5, 5, 5, 5, 5, 5, 6, 3.8, 3.8, 3.8, 3.8, 5, 5, 5, 6, 6, 
    6, 6), low_educ_canton = c(26.5, 26.5, 25.9, 25.9, 25.9, 
    25.9, 25.9, 25.9, 26.5, 31.3, 31.3, 31.3, 31.3, 25.9, 25.9, 
    25.9, 26.5, 26.5, 26.5, 26.5), sec_educ_canton = c(32.1, 
    32.1, 39.1, 39.1, 39.1, 39.1, 39.1, 39.1, 32.1, 45.3, 45.3, 
    45.3, 45.3, 39.1, 39.1, 39.1, 32.1, 32.1, 32.1, 32.1), tert_educ_canton = c(38.7, 
    38.7, 33, 33, 33, 33, 33, 33, 38.7, 21.6, 21.6, 21.6, 21.6, 
    33, 33, 33, 38.7, 38.7, 38.7, 38.7)), row.names = c(NA, -20L
), class = c("tbl_df", "tbl", "data.frame"))***

1 Ответ

0 голосов
/ 22 января 2019

вы можете попробовать это, я немного изменил код после data2.

final %>% 
        select(4:6, 14, 15) %>%
        na.omit -> data_items


data_items %>% 
        gather(key = items, value = answer) %>% 
        mutate(answer = factor(answer),
               items = factor(items)) -> data2


data2 %>% 
        dplyr::count(items, answer) %>% group_by(items)%>%
        mutate(share = n/sum(n)) -> data3


ggplot(data3, aes(x = items, y = share,fill = answer)) +
        geom_bar(  stat = "identity") +
        geom_text(aes(y=share,label = n),  size = 3,position = position_stack(vjust = 0.45))

enter image description here

...