У меня есть набор данных (первые 100 строк):
structure(list(department = structure(c(21L, 14L, 4L, 11L, 21L,
12L, 15L, 11L, 3L, 18L, 4L, 20L, 25L, 3L, 3L, 13L, 19L, 22L,
18L, 16L, 16L, 16L, 16L, 4L, 20L, 12L, 4L, 27L, 1L, 6L, 16L,
1L, 13L, 13L, 25L, 18L, 8L, 23L, 10L, 16L, 4L, 21L, 2L, 5L, 18L,
10L, 23L, 4L, 7L, 5L, 14L, 15L, 19L, 23L, 11L, 4L, 15L, 6L, 12L,
11L, 23L, 14L, 15L, 11L, 18L, 24L, 27L, 27L, 20L, 5L, 1L, 19L,
4L, 10L, 4L, 26L, 3L, 14L, 15L, 12L, 22L, 14L, 20L, 25L, 2L,
23L, 15L, 13L, 4L, 18L, 26L, 13L, 5L, 10L, 1L, 6L, 10L, 22L,
5L, 14L), .Label = c("Beauty", "Boutique advisor", "Boutique advisors",
"Boutique Stylist", "Clean Beauty Expert", "Conseiller en boutique",
"Design Consultant", "Designer Trade Specialist", "Food", "Furniture",
"In-store Design Expert", "In-store experts", "In-Store Sales Professional",
"In-Store Style Experts", "John Hardy", "Jos. A. Bank LIVE!",
"Levi's Stylists", "Lighting & Home Accessories", "Men's Wearhouse LIVE!",
"Menswear", "Personal advisors", "Styliste en boutique", "Vendeurs",
"Wine", "Women's Accessories", "Women's shoes", "Womenswear"), class = "factor"),
type = c("Completed", "Missed", "Missed", "Missed", "Missed",
"Missed", "Missed", "Completed", "Completed", "Missed", "Missed",
"Completed", "Completed", "Completed", "Completed", "Completed",
"Completed", "Completed", "Completed", "Missed", "Completed",
"Missed", "Completed", "Missed", "Missed", "Completed", "Missed",
"Missed", "Missed", "Completed", "Missed", "Completed", "Missed",
"Completed", "Missed", "Missed", "Completed", "Missed", "Missed",
"Completed", "Completed", "Missed", "Completed", "Missed",
"Completed", "Missed", "Missed", "Completed", "Missed", "Completed",
"Completed", "Missed", "Completed", "Missed", "Completed",
"Completed", "Missed", "Missed", "Missed", "Missed", "Completed",
"Missed", "Completed", "Completed", "Completed", "Missed",
"Missed", "Completed", "Missed", "Completed", "Completed",
"Missed", "Completed", "Completed", "Missed", "Missed", "Completed",
"Completed", "Completed", "Completed", "Missed", "Completed",
"Completed", "Completed", "Completed", "Completed", "Completed",
"Completed", "Completed", "Completed", "Completed", "Missed",
"Missed", "Completed", "Completed", "Completed", "Missed",
"Completed", "Missed", "Completed"), date = structure(c(17889,
17890, 17893, 17893, 17892, 17892, 17893, 17893, 17892, 17888,
17892, 17889, 17888, 17893, 17888, 17889, 17891, 17892, 17893,
17891, 17889, 17888, 17892, 17889, 17889, 17892, 17888, 17889,
17893, 17892, 17893, 17892, 17891, 17893, 17888, 17891, 17892,
17891, 17892, 17888, 17891, 17893, 17893, 17892, 17890, 17888,
17888, 17889, 17891, 17893, 17893, 17890, 17890, 17892, 17889,
17892, 17889, 17889, 17888, 17888, 17893, 17893, 17893, 17891,
17888, 17892, 17892, 17893, 17891, 17888, 17889, 17891, 17889,
17890, 17891, 17888, 17889, 17888, 17890, 17893, 17889, 17889,
17893, 17889, 17892, 17891, 17889, 17892, 17888, 17891, 17893,
17890, 17890, 17889, 17893, 17889, 17889, 17888, 17889, 17892
), class = "Date"), count = c(7L, 9L, 8L, 3L, 5L, 4L, 5L,
10L, 1L, 3L, 5L, 18L, 3L, 7L, 1L, 17L, 277L, 10L, 14L, 50L,
520L, 92L, 791L, 6L, 7L, 4L, 2L, 1L, 3L, 3L, 145L, 17L, 10L,
42L, 1L, 1L, 1L, 2L, 7L, 627L, 3L, 6L, 4L, 3L, 3L, 2L, 1L,
2L, 1L, 20L, 41L, 4L, 283L, 1L, 14L, 5L, 2L, 1L, 3L, 3L,
7L, 12L, 36L, 9L, 14L, 1L, 6L, 13L, 1L, 14L, 12L, 16L, 3L,
2L, 6L, 7L, 4L, 21L, 3L, 5L, 5L, 22L, 12L, 5L, 1L, 5L, 23L,
36L, 13L, 12L, 12L, 9L, 4L, 6L, 6L, 4L, 1L, 4L, 1L, 32L)), row.names = c(NA,
100L), class = "data.frame")
Мне нужно, чтобы это выглядело так (сгруппировано по отделу (строки) и соответствующему количеству каждого типа в день (столбцы)):
![enter image description here](https://i.stack.imgur.com/66aKg.png)
В настоящее время у меня есть два подхода к этому, ни один из которых не дает желаемого результата, но я подозреваю, что я близок, потому что решение, кажется, лежит где-то между ними.
Первый подход:
library(dplyr) # For the purpose of this reproducible example should you need it
dept %>%
group_by(
department
) %>%
summarise(
missed = sum(type == "Missed"),
completed = sum(type == "Completed"),
missed_pct = missed / (missed + completed)
)
Что дает мне это:
# A tibble: 7 x 4
department missed completed missed_pct
<fct> <int> <int> <dbl>
1 Beauty 2 5 0.286
2 Food 0 1 0
3 Menswear 4 6 0.4
4 Wine 1 1 0.5
5 Women's Accessories 2 5 0.286
6 Women's shoes 3 5 0.375
7 Womenswear 4 5 0.444
Второй подход:
library(dplyr) # For the purpose of this reproducible example should you need it
dept %>%
group_by(
department,
date
) %>%
summarise(
missed = sum(type == "Missed"),
completed = sum(type == "Completed"),
missed_pct = missed / (missed + completed)
)
Что дает мне это:
# A tibble: 28 x 5
# Groups: department [?]
department date missed completed missed_pct
<fct> <date> <int> <int> <dbl>
1 Beauty 2018-12-23 0 1 0
2 Beauty 2018-12-24 0 1 0
3 Beauty 2018-12-26 0 1 0
4 Beauty 2018-12-27 1 1 0.5
5 Beauty 2018-12-28 1 1 0.5
6 Food 2018-12-27 0 1 0
7 Menswear 2018-12-23 1 1 0.5
8 Menswear 2018-12-24 1 1 0.5
9 Menswear 2018-12-25 0 1 0
10 Menswear 2018-12-26 1 1 0.5
Как я могу это сделать?