Я пытаюсь написать универсальную функцию для одномерного анализа на R для категориальных переменных. Я могу передать переменные в dplyr, но это не работает для кода ggplot.
Вот мой код -
univariate_catogrical <- function(dataset,variable){
variable <- enquo(variable)
percentage <- dataset %>%
select(!!variable) %>%
group_by(!!variable) %>%
summarise(n = n()) %>%
mutate(percantage = (n / sum(n)) * 100)
print(percentage)
dataset %>%
count(!!variable) %>%
ggplot(mapping = aes_(x = rlang::quo_expr(!!variable),
y = n, fill = rlang::quo_expr(!!variable))) +
geom_bar(stat = 'identity',
colour = 'white') +
labs(x = "Reason.for.absence" , y = "count") +
ggtitle(" Count of Reason for absence") +
theme(legend.position = "bottom") -> p
plot(p)
}
Когда я выполняю вышеуказанную функцию, я получаю
> univariate_catogrical(employee_data_Imputed,Reason.for.absence)
# A tibble: 28 x 3
Reason.for.absence n percantage
<fct> <int> <dbl>
1 1 16 2.23
2 2 1 0.139
3 3 1 0.139
4 4 2 0.279
5 5 3 0.418
6 6 7 0.975
7 7 15 2.09
8 8 6 0.836
9 9 4 0.557
10 10 23 3.20
# ... with 18 more rows
Hide Traceback
Rerun with Debug
Error in grouped_df_impl(data, unname(vars), drop) :
Column `variable` is unknown
Может кто-нибудь подсказать, пожалуйста, как это исправить. Я использую функцию ase_ для передачи аргументов.
Пожалуйста, найдите воспроизводимый пример.
dput(head(employee_data_Imputed,8))
structure(list(ID = structure(c(11L, 36L, 3L, 7L, 11L, 10L, 20L,
14L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8", "9",
"10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20",
"21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31",
"32", "33", "34", "35", "36"), class = "factor"), Reason.for.absence = structure(c(26L,
20L, 23L, 7L, 23L, 22L, 23L, 19L), .Label = c("1", "2", "3",
"4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15",
"16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26",
"27", "28"), class = "factor"), Month.of.absence = structure(c(7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L), .Label = c("1", "2", "3", "4", "5",
"6", "7", "8", "9", "10", "11", "12"), class = "factor"), Day.of.the.week = structure(c(2L,
2L, 3L, 4L, 4L, 5L, 5L, 1L), .Label = c("2", "3", "4", "5", "6"
), class = "factor"), Seasons = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("1", "2", "3", "4"), class = "factor"),
Transportation.expense = c(289, 118, 179, 279, 289, 361,
260, 155), Distance.from.Residence.to.Work = c(36, 13, 51,
5, 36, 52, 50, 12), Service.time = c(13, 18, 18, 14, 13,
3, 11, 14), Age = c(33, 50, 38, 39, 33, 28, 36, 34), Work.load.Average.day = c(239554,
239554, 239554, 239554, 239554, 239554, 239554, 239554),
Hit.target = c(97, 97, 97, 97, 97, 97, 97, 97), Disciplinary.failure = structure(c(1L,
2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("0", "1"), class = "factor"),
Education = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2", "3", "4"), class = "factor"), Son = c(2, 1, 0, 2, 2,
1, 4, 2), Social.drinker = structure(c(2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("0", "1"), class = "factor"), Social.smoker = structure(c(1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L), .Label = c("0", "1"), class = "factor"),
Pet = c(1, 0, 0, 0, 1, 4, 0, 0), Weight = c(90, 98, 89, 68,
90, 80, 65, 95), Height = c(172, 178, 170, 168, 172, 172,
168, 196), Body.mass.index = c(30, 31, 31, 24, 30, 27, 23,
25), Absenteeism.time.in.hours = c(4, 0, 2, 4, 2, 8, 4, 40
)), .Names = c("ID", "Reason.for.absence", "Month.of.absence",
"Day.of.the.week", "Seasons", "Transportation.expense", "Distance.from.Residence.to.Work",
"Service.time", "Age", "Work.load.Average.day", "Hit.target",
"Disciplinary.failure", "Education", "Son", "Social.drinker",
"Social.smoker", "Pet", "Weight", "Height", "Body.mass.index",
"Absenteeism.time.in.hours"), row.names = c(NA, 8L), class = "data.frame")