Как сделать гистограмму со средней плотностью и SE баров - PullRequest
1 голос
/ 16 апреля 2020

Я пытаюсь построить график средней плотности каждой зоны (B и D) с помощью столбцов SE и включить буквы над столбцами, чтобы указать существенные различия между зонами.

Примерно так ...

enter image description here

У меня есть следующий фрейм данных. Я пытаюсь взять среднее значение столбца «значение» для зоны B и зоны D и построить его с помощью столбцов ошибок. Я не уверен, является ли мое форматирование данных проблемой или нет. Я думал, что это будет простая задача, однако, я довольно потерян, как заставить это работать. Заранее спасибо за помощь!

structure(list(date = structure(c(14000, 14000, 14031, 14031, 
14061, 14061, 14092, 14092, 14123, 14123, 14184, 14184, 14214, 
14214, 14365, 14365, 14396, 14396, 14426, 14426, 14457, 14457, 
14488, 14488, 14549, 14549, 14579, 14579, 14730, 14730, 14761, 
14761, 14791, 14791, 14822, 14822, 14853, 14853, 14914, 14914, 
14944, 14944, 15095, 15095, 15126, 15126, 15156, 15156, 15187, 
15187, 15218, 15218, 15279, 15279, 15309, 15309, 15461, 15461, 
15492, 15492, 15522, 15522, 15553, 15553, 15584, 15584, 15645, 
15645, 15675, 15675, 15826, 15826, 15857, 15857, 15887, 15887, 
15918, 15918, 15949, 15949, 16010, 16010, 16040, 16040, 16191, 
16191, 16222, 16222, 16252, 16252, 16283, 16283, 16314, 16314, 
16375, 16375, 16405, 16405, 16556, 16556, 16587, 16587, 16617, 
16617, 16648, 16648, 16679, 16679, 16740, 16740, 16770, 16770, 
16953, 16953, 16983, 16983, 17014, 17014, 17045, 17045, 17106, 
17106, 17136, 17136, 17318, 17318, 17348, 17348, 17379, 17379, 
17410, 17410, 17471, 17471, 17501, 17501, 17683, 17683, 17713, 
17713, 17744, 17744, 17775, 17775, 17836, 17836, 17866, 17866
), class = "Date"), year = c(2008L, 2008L, 2008L, 2008L, 2008L, 
2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 
2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 
2009L, 2009L, 2009L, 2009L, 2009L, 2010L, 2010L, 2010L, 2010L, 
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 
2010L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 
2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2012L, 2012L, 2012L, 
2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 
2012L, 2012L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 
2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2014L, 2014L, 
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 
2014L, 2014L, 2014L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 
2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2016L, 
2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 
2016L, 2016L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 
2017L, 2017L, 2017L, 2017L, 2017L, 2018L, 2018L, 2018L, 2018L, 
2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L), month = structure(c(5L, 
5L, 4L, 4L, 3L, 3L, 1L, 1L, 7L, 7L, 6L, 6L, 2L, 2L, 5L, 5L, 4L, 
4L, 3L, 3L, 1L, 1L, 7L, 7L, 6L, 6L, 2L, 2L, 5L, 5L, 4L, 4L, 3L, 
3L, 1L, 1L, 7L, 7L, 6L, 6L, 2L, 2L, 5L, 5L, 4L, 4L, 3L, 3L, 1L, 
1L, 7L, 7L, 6L, 6L, 2L, 2L, 5L, 5L, 4L, 4L, 3L, 3L, 1L, 1L, 7L, 
7L, 6L, 6L, 2L, 2L, 5L, 5L, 4L, 4L, 3L, 3L, 1L, 1L, 7L, 7L, 6L, 
6L, 2L, 2L, 5L, 5L, 4L, 4L, 3L, 3L, 1L, 1L, 7L, 7L, 6L, 6L, 2L, 
2L, 5L, 5L, 4L, 4L, 3L, 3L, 1L, 1L, 7L, 7L, 6L, 6L, 2L, 2L, 4L, 
4L, 3L, 3L, 1L, 1L, 7L, 7L, 6L, 6L, 2L, 2L, 4L, 4L, 3L, 3L, 1L, 
1L, 7L, 7L, 6L, 6L, 2L, 2L, 4L, 4L, 3L, 3L, 1L, 1L, 7L, 7L, 6L, 
6L, 2L, 2L), .Label = c("Aug", "Dec", "July", "June", "May", 
"Nov", "Sept"), class = "factor"), Zone = c("Zone.B", "Zone.D", 
"Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", 
"Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D", 
"Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", 
"Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D", 
"Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", 
"Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D", 
"Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", 
"Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D", 
"Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", 
"Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D", 
"Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", 
"Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D", 
"Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", 
"Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D", 
"Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", 
"Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D", 
"Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", 
"Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D", 
"Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", 
"Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D", 
"Zone.B", "Zone.D", "Zone.B", "Zone.D", "Zone.B", "Zone.D"), 
    value = c(1.5469086, 0.9628121, 0.5436808, 1.3583104, 0.5343952, 
    1.001405, 0.8457998, 1.9633247, 1.0564309, 1.0598237, 0.8400382, 
    0.622455, 0.5000899, 0.462802, 1.685773, 0.4686881, 2.1144817, 
    1.2159128, 1.3032429, 0.9161256, 1.7283975, 1.5130496, 1.1234053, 
    1.53157, 1.1072778, 1.3294973, 1.4293872, 1.2318001, 1.2573056, 
    2.9030824, 0.8183244, 1.9133592, 1.1637721, 1.0880351, 1.2357399, 
    1.447688, 0.8154475, 1.9440145, 0.8625087, 1.7255681, 0.7454908, 
    1.8538506, 1.0643353, 1.9391681, 1.6620765, 2.2622461, 0.8392645, 
    1.4462998, 1.0730535, 2.282335, 1.1551744, 1.1851883, 0.6946148, 
    1.1089916, 1.1289277, 0.9832297, 0.7801685, 1.3918411, 0.702675, 
    1.321903, 1.3585219, 1.671637, 1.282663, 1.6898635, 1.8615806, 
    1.2897994, 1.7114777, 1.0998009, 0.7149941, 0.3424369, 1.3469518, 
    5.3418421, 3.2474936, 3.6502369, 1.2859735, 0.9634012, 2.2181734, 
    2.5195328, 2.2866214, 1.1138549, 0.630082, 0.8241262, 0.8444934, 
    0.5658561, 0.5130557, 0.7943081, 0.2296881, 1.7998841, 0.742587, 
    1.1508025, 0.6200843, 1.2819195, 0.3960585, 1.161959, 0.3980511, 
    0.7375606, 0.3009843, 0.6061867, 1.2674316, 5.4225521, 1.018414, 
    3.5031324, 1.2698522, 1.2475438, 1.0985706, 1.3307636, 1.1795278, 
    1.1892627, 0.6699403, 1.4401562, 0.91998, 1.7972394, 0.5443802, 
    2.3273397, 0.6212349, 0.9363671, 1.2685108, 1.7920707, 1.6758284, 
    1.3687859, 0.8589559, 1.1374661, 0.829899, 0.8522818, 1.2096848, 
    1.8674565, 1.7883816, 1.548762, 2.273268, 1.5071044, 2.3455175, 
    1.8381368, 1.7463599, 1.9304698, 0.8478681, 0.9660615, 4.2659266, 
    1.2897004, 1.8813193, 1.695709, 3.6125893, 1.6265312, 2.5180816, 
    0.9977127, 1.3147816, 1.3784422, 1.4117959, 1.6234253)), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -148L))

Ответы [ 2 ]

2 голосов
/ 16 апреля 2020

Вот вариант с ggpubr , это хороший пакет для добавления статистики в графики.

library (ggpubr)

ggbarplot(df, 
x = "Zone", 
y = "value", 
fill = "Zone", 
error.plot = "upper_errorbar", #adds upper bar for se
add = "mean_se" # adds summary mean and se
)+ 
stat_compare_means(
size = 8, # adjust label size
label.y.npc = 0.3, # adjust y pos of label
aes(
label = cut(..p.., 
breaks = c(-Inf, 0.0001, 0.001, 0.01, 0.05, Inf),
labels = c("aaaa", "aaa", "aa", "a", "ns")))) # adds label for different cut off values of p.


1 голос
/ 16 апреля 2020

Это отвечает на ваш вопрос? Как вы предложили, я вычислил среднее значение по Зоне, а также стандартное отклонение:

library(ggplot2)
library(dplyr)

df %>% 
  group_by(Zone) %>% 
  summarise(Mean = mean(value), SE = sd(value)) %>% 
  ggplot(aes(Zone, Mean)) +
  geom_col() +
  geom_errorbar(aes(ymin = Mean - SE, ymax = Mean + SE), width = .1) +
  geom_text(aes(label = gsub("Zone.", "", Zone), y = Mean + SE + .1))

enter image description here

...