Вот функция для генерации графиков торнадо с использованием ggplot2 :: geom_col () вместе с некоторыми примерами использования. Надеюсь, это поможет ....
# Tornado Plot using ggplot2(), 2019/05/19.
# See Wikipedia: ["Tornado diagram"](https://en.wikipedia.org/wiki/Tornado_diagram).
library( magrittr )
library( tidyverse )
# Function tornado_plot() produces a "tornado plot" given the sensitivity
# analysis results in data_frame df. It plots green bars indicating the levels
# of the response variable when each **x input variable** is moved to its maximum
# level while holding all other variables constant. Similarly, the red bars are
# the outputs when each **x input variable** is moved to its minimum value while
# holding all other variables constant. The input variable to which the output
# is most sensitive is shown at the top of the plot. And the bars are stacked
# from most sensitive to least sensitive, fancifully yielding the shape of a
# tornado.
tornado_plot <-
function(
df,
var_names_col,
min_level_col,
min_output_col,
max_level_col,
max_output_col,
base_level_col,
baseline_output,
title_str = "Tornado Plot",
subtitle_str = "",
caption_str = "",
ylab_str = "output",
baseline_label = "",
scale_breaks = NULL,
limits = NULL
) {
# + The argument df must be a tidyverse::tibble with columns referred to by all of the
# other arguments having "col" in their names.
# + The var_names_col argument must be an unquoted column name that contains characters
# naming the variables that were varied in the sensitivity analysis.
# + The level column arguments -- min_level_col, max_level_col and
# base_level_col -- must be unquoted column names that contain characters to be
# used in forming labels for each variable bar of the plot.
# + The output column arguments -- min_output_col and max_output_col -- must
# be unquoted column names that contain numerical values to be plotted as the
# extents of the bars in the plot.
# + The baseline_output argument is the numeric value of the output (response) variable
# produced by setting all of the variables to their base levels.
var_names_col <- enquo( var_names_col )
min_level_col <- enquo( min_level_col )
max_level_col <- enquo( max_level_col )
base_level_col <- enquo( base_level_col )
min_output_col <- enquo( min_output_col )
max_output_col <- enquo( max_output_col )
have_custom_y_breaks <- !any( is.null(scale_breaks) )
# Create a generic tibble as the data source for plotting.
# Sorts variables from the one to which the output was least sensitive
# to the one to which the output was most sensitive.
# Then creates labels for each variable capturing the min, base, and max
# levels of that variable.
# Finally, it centers all outputs around the baseline output so thta the
# ggplot2::geom_col() function can still work with zero-based bars.
plt_df <- df %>%
mutate(del = abs(!!max_output_col - !!min_output_col) ) %>%
arrange(del) %>%
mutate(
names = sprintf(
"%s\n(min=%s; base=%s; max=%s)",
!!var_names_col,
!!min_level_col,
!!base_level_col,
!!max_level_col
),
names = factor(names,names),
min = !!min_output_col,
max = !!max_output_col
) %>%
dplyr::select(names,min,max) %>%
gather( key = Level, value = output, -names) %>%
mutate( output = output - baseline_output, Level = factor(Level,c("min","max")) ) #%T>% print()
# Generate the tornado plot.
plt <- plt_df %>%
{
ggplot(., aes( fill = Level, x = names, y = output )) +
geom_hline(yintercept = 0, linetype = 1, size = 2, color = "darkgray") +
geom_col( alpha = 0.4, width = 0.98) +
coord_flip() + #*** NOTE THE COORDINATE FLIP ***
geom_text(aes(y = 0, label = names), size = 4, fontface = "bold" ) +
scale_x_discrete( expand = expand_scale(add = 1 ) ) +
scale_fill_manual(values = c(min = "red", max = "green") ) +
ylab( ylab_str ) +
theme( # **Hmmm, references the ACTUAL plotted (post-flipped) x-y axes. **
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.title.y = element_blank(),
panel.grid.major.y = element_blank(), # Remove horizontal grid lines
panel.grid.minor.y = element_blank(),
axis.text.x = element_text( size = 14 ),
axis.title.x = element_text( size = 16 ),
title = element_text( size = 18 ),
legend.position = "bottom"
) +
labs( title = title_str, subtitle = subtitle_str, caption = caption_str )
}
# Set the pre-flipped y-axis (which gets flipped to be the x-axis in the final plot).
if( !is.null(limits) ){
y_limits = limits
} else {
y_limits = c(-max(abs(plt_df$output)),max(abs(plt_df$output)))
}
if( have_custom_y_breaks ){
plt <- plt + scale_y_continuous(
limits = y_limits,
breaks = scale_breaks,
labels = names(scale_breaks)
)
} else {
plt <- plt + scale_y_continuous(
limits = y_limits,
labels = function(x) baseline_output + x
)
}
# Add the baseline output label, if any
if(baseline_label != ""){
return(
plt +
geom_label(
data = tibble( x = 0.25, y = 0, label = baseline_label),
mapping = aes( x = x, y = y, label = label),
fontface = "bold",
show.legend = FALSE,
inherit.aes = FALSE
)
)
} else {
return( plt )
}
}
#--------------------------------------------------------------------------------
# USAGE EXAMPLE:
# Hypothetical Investment Strategy Analysis:
# These are data from a sensitivity analysis on an investment strategy that invests in an
# an S&P 500 index fund and a "safety" value-store (a 0%-real-return investment);
# protecting winnings from market with transfer to safety when strategy criteria are met.
# Disregards taxes and fees. Real values (i.e., inflation-adjusted).
sensitivity_df <- tribble(
~variable, ~min, ~base, ~max, ~Total_at_min, ~Total_base, ~Total_at_max, ~Time_period,
"Start Value", 0, 2000, 100000, 239600, 245900, 554800, "start: 1980, end: 2005",
"Monthly Investment", 0, 500, 1000, 6300, 245900, 485600, "start: 1980, end: 2005",
"Allocation to Safety", 0, 0.3, 0.5, 277800, 245900, 224700, "start: 1980, end: 2005",
"Annual Increase in Mo. Investment", 0, 0.01, 0.03, 222700, 245900, 303800, "start: 1980, end: 2005",
"Protection Rate", 0, 0.0025, 0.03, 310300, 245900, 199500, "start: 1980, end: 2005",
"Start Value", 0, 2000, 100000, 174300, 175900, 253300, "start: 1910, end: 1935",
"Monthly Investment", 0, 500, 1000, 1600, 175900, 350100, "start: 1910, end: 1935",
"Allocation to Safety", 0, 0.3, 0.5, 177700, 175900, 174600, "start: 1910, end: 1935",
"Annual Increase in Mo. Investment", 0, 0.01, 0.03, 155600, 175900, 227100, "start: 1910, end: 1935",
"Protection Rate", 0, 0.0025, 0.03, 171800, 175900, 176000, "start: 1910, end: 1935"
) %>% # Add x-input level labels (overwriting reals min, base, max with character values through mutate_at()).
mutate_at(vars(contains("Total")), ~{100*round(./100)}) %>%
mutate_at(
vars( min, base, max),
~ {
ifelse(
abs(.) >= 1000,
paste0("$",formatC(.,big.mark = ",",format = "f",digits = 0)),
sprintf(
c( "$%.0f", "$%.0f", "%.0f%%", "%.1f%%", "%.2f%%" ),
. * c(1,1,100,100,100)
)
)
}
)
# Generate the tornado plot with generic labeling and axis.
sensitivity_df %>%
filter( grepl("1980.+2005", Time_period ) ) %>%
tornado_plot(
var_names_col = variable,
min_level_col = min,
min_output_col = Total_at_min,
max_level_col = max,
max_output_col = Total_at_max,
base_level_col = base,
baseline_output = .$Total_base[[1]]
) %>% print()
# Generate the tornado plot with customized labeling and axis.
scl_limits = c(0, 6.0e5 )
sensitivity_df %>%
filter( grepl("1980.+2005", Time_period ) ) %>%
tornado_plot(
var_names_col = variable,
min_level_col = min,
min_output_col = Total_at_min,
max_level_col = max,
max_output_col = Total_at_max,
base_level_col = base,
baseline_output = .$Total_base[[1]],
title_str = "Sensitivity of Total Value to Strategy Variables",
subtitle_str = sprintf( "Time period %s", .$Time_period[[1]] ),
caption_str = "Assuming S&P 500 index & 0%-real-return 'safe harbor'",
ylab_str = "Total Value",
baseline_label = paste0("Base Case:\n$",format(100*round(.$Total_base[[1]]/100,0),big.mark = ",")),
scale_breaks = setNames(
seq(min(scl_limits), max(scl_limits), 1e5) - .$Total_base[[1]],
paste0("$",formatC(
seq(min(scl_limits), max(scl_limits), 1e5),big.mark = ",",format = "f",digits = 0)
)
),
limits = scl_limits - .$Total_base[[1]]
) %>% print()
# Generate the tornado plot for another time period, with scaling
# to be comparable with the first time period.
sensitivity_df %>%
filter( grepl("1910.+1935", Time_period ) ) %>%
tornado_plot(
var_names_col = variable,
min_level_col = min,
min_output_col = Total_at_min,
max_level_col = max,
max_output_col = Total_at_max,
base_level_col = base,
baseline_output = .$Total_base[[1]],
title_str = "Sensitivity of Total Value to Strategy Variables",
subtitle_str = sprintf( "Time period %s", .$Time_period[[1]] ),
caption_str = "Assuming S&P 500 index & 0%-real-return 'safe harbor'",
ylab_str = "Total Value",
baseline_label = paste0("Base Case:\n$",format(100*round(.$Total_base[[1]]/100,0),big.mark = ",")),
scale_breaks = setNames(
seq(min(scl_limits), max(scl_limits), 1e5) - .$Total_base[[1]],
paste0("$",formatC(
seq(min(scl_limits), max(scl_limits), 1e5),big.mark = ",",format = "f",digits = 0)
)
),
limits = scl_limits - .$Total_base[[1]]
) %>% print()
Общий сюжет торнадо
Участок торнадо с пользовательскими метками, ось
Сюжет торнадо с пользовательскими метками и в том же масштабе, что и предыдущий