Хотя использование top_n
может работать хорошо, получить «отдых» может быть сложно, особенно если есть связи. В этом случае я создал группирующую переменную (grp
), равную 1, если переменная SUM_activity
находится в верхних 5, а 0 - в противном случае. Затем я объединил это с теми, у которых ZonedID = "Внешние зоны", чтобы создать 3 группы.
library(dplyr)
library(tidyr)
data %>%
mutate(Zone=ifelse(ZoneID=="Outside Zones", 1, 0)) %>%
group_by(District, Period, Zone) %>%
mutate(grp=factor(+(min_rank(desc(SUM_activity))<=5) + Zone,
labels=c("Top 5", "Rest of Zones", "Outside Zones"),
levels=c(1,0,2))) %>%
group_by(District, Period, grp) %>%
summarise(n=sum(SUM_activity)) %>%
pivot_wider(names_from=grp, values_from=n, values_fill=list(n=0))
# A tibble: 3 x 5
# Groups: District, Period [3]
District Period `Top 5` `Rest of Zones` `Outside Zones`
<chr> <chr> <int> <int> <int>
1 Northwestern 2019-02-06 - 2019-03-06 686 302 1501
2 Southern 2019-02-06 - 2019-03-06 0 0 2062
3 Southwestern 2019-02-06 - 2019-03-06 0 0 1351
Данные
data <- structure(list(obs = 1:19, District = c("Northwestern", "Northwestern",
"Northwestern", "Northwestern", "Northwestern", "Northwestern",
"Northwestern", "Northwestern", "Northwestern", "Northwestern",
"Northwestern", "Northwestern", "Northwestern", "Northwestern",
"Northwestern", "Northwestern", "Northwestern", "Southern", "Southwestern"
), ZoneID = c("Northern: 53A", "Northern: 53B", "Northwestern: 61A",
"Northwestern: 61B", "Northwestern: 61D", "Northwestern: 62A",
"Northwestern: 62B", "Northwestern: 62C", "Northwestern: 62D",
"Northwestern: 63A", "Northwestern: 63B", "Northwestern: 63C",
"Northwestern: 63D", "Northwestern: DATA", "Northwestern: DATB",
"Northwestern: DATC", "Outside Zones", "Outside Zones", "Outside Zones"
), Period = c("2019-02-06 - 2019-03-06", "2019-02-06 - 2019-03-06",
"2019-02-06 - 2019-03-06", "2019-02-06 - 2019-03-06", "2019-02-06 - 2019-03-06",
"2019-02-06 - 2019-03-06", "2019-02-06 - 2019-03-06", "2019-02-06 - 2019-03-06",
"2019-02-06 - 2019-03-06", "2019-02-06 - 2019-03-06", "2019-02-06 - 2019-03-06",
"2019-02-06 - 2019-03-06", "2019-02-06 - 2019-03-06", "2019-02-06 - 2019-03-06",
"2019-02-06 - 2019-03-06", "2019-02-06 - 2019-03-06", "2019-02-06 - 2019-03-06",
"2019-02-06 - 2019-03-06", "2019-02-06 - 2019-03-06"), SUM_activity = c(4L,
0L, 88L, 44L, 212L, 38L, 18L, 65L, 4L, 107L, 19L, 56L, 165L,
28L, 26L, 114L, 1501L, 2062L, 1351L)), class = "data.frame", row.names = c(NA,
-19L))