Несколько способов атаковать это.
Во-первых, нам нужно определить 3-дневный период для каждого. Для этого я создам datevec
, который представляет собой последовательность дней.
dat$RealDate <- as.Date(dat$FROM_DATE, format = "%m/%d/%Y")
datevec <- seq(min(dat$RealDate), max(dat$RealDate) + 3, by = 3)
dat$Period1 <- cut(dat$RealDate, datevec,
labels = paste(datevec[-length(datevec)], datevec[-1], sep = " - "))
dat
# OBJECTID District Zone year_value FROM_DATE SUM_Crime RealDate Period1
# 1 1 Northwestern Not in Zone 2019 2/6/2019 4 2019-02-06 2019-02-06 - 2019-02-09
# 2 2 Northwestern Zone 30 2019 2/7/2019 6 2019-02-07 2019-02-06 - 2019-02-09
# 3 3 Northwestern Zone 40 2019 2/8/2019 5 2019-02-08 2019-02-06 - 2019-02-09
# 4 4 Northwestern Zone 30 2019 2/9/2019 2 2019-02-09 2019-02-09 - 2019-02-12
# 5 5 Northwestern Not in Zone 2019 2/10/2019 4 2019-02-10 2019-02-09 - 2019-02-12
# 6 6 Northwestern Zone 40 2019 2/11/2019 4 2019-02-11 2019-02-09 - 2019-02-12
# 7 7 Northwestern Zone 30 2019 2/12/2019 0 2019-02-12 2019-02-12 - 2019-02-15
Если вы хотите сохранить даты в том формате, в котором они у вас есть (которые не являются настоящими Date
в R), тогда мы можем добавить второй столбец для этого формата:
datevec2 <- format(datevec, format = "%m/%d/%Y")
dat$Period2 <- cut(dat$RealDate, datevec,
labels = paste(datevec2[-length(datevec2)], datevec2[-1], sep = " - "))
dat
# OBJECTID District Zone year_value FROM_DATE SUM_Crime RealDate Period1 Period2
# 1 1 Northwestern Not in Zone 2019 2/6/2019 4 2019-02-06 2019-02-06 - 2019-02-09 02/06/2019 - 02/09/2019
# 2 2 Northwestern Zone 30 2019 2/7/2019 6 2019-02-07 2019-02-06 - 2019-02-09 02/06/2019 - 02/09/2019
# 3 3 Northwestern Zone 40 2019 2/8/2019 5 2019-02-08 2019-02-06 - 2019-02-09 02/06/2019 - 02/09/2019
# 4 4 Northwestern Zone 30 2019 2/9/2019 2 2019-02-09 2019-02-09 - 2019-02-12 02/09/2019 - 02/12/2019
# 5 5 Northwestern Not in Zone 2019 2/10/2019 4 2019-02-10 2019-02-09 - 2019-02-12 02/09/2019 - 02/12/2019
# 6 6 Northwestern Zone 40 2019 2/11/2019 4 2019-02-11 2019-02-09 - 2019-02-12 02/09/2019 - 02/12/2019
# 7 7 Northwestern Zone 30 2019 2/12/2019 0 2019-02-12 2019-02-12 - 2019-02-15 02/12/2019 - 02/15/2019
Отсюда, агрегация базового R:
ag <- aggregate(SUM_Crime ~ Zone + Period2, data = dat, FUN = sum)
ag
# Zone Period2 SUM_Crime
# 1 Not in Zone 02/06/2019 - 02/09/2019 4
# 2 Zone 30 02/06/2019 - 02/09/2019 6
# 3 Zone 40 02/06/2019 - 02/09/2019 5
# 4 Not in Zone 02/09/2019 - 02/12/2019 4
# 5 Zone 30 02/09/2019 - 02/12/2019 2
# 6 Zone 40 02/09/2019 - 02/12/2019 4
# 7 Zone 30 02/12/2019 - 02/15/2019 0
xtabs( SUM_Crime ~ Zone + Period2, data = ag)
# Period2
# Zone 02/06/2019 - 02/09/2019 02/09/2019 - 02/12/2019 02/12/2019 - 02/15/2019
# Not in Zone 4 4 0
# Zone 30 6 2 0
# Zone 40 5 4 0
Вы также можете поместить это в dplyr
трубу если вы хотите:
library(dplyr)
library(tidyr) # pivot_wider
dat2 <- dat %>%
mutate(RealDate = as.Date(FROM_DATE, format = "%m/%d/%Y"))
datevec <- seq(min(dat2$RealDate), max(dat2$RealDate) + 3, by = 3)
dat2 %>%
mutate(
Period1 = cut(RealDate, datevec,
labels = paste(datevec[-length(datevec)], datevec[-1], sep = " - "))
) %>%
group_by(Zone, Period1) %>%
summarize(SUM_Crime = sum(SUM_Crime)) %>%
ungroup() %>%
filter(SUM_Crime > 0) %>%
pivot_wider(., "Zone", names_from = "Period1", values_from = "SUM_Crime")
# # A tibble: 3 x 3
# Zone `2019-02-06 - 2019-02-09` `2019-02-09 - 2019-02-12`
# <chr> <int> <int>
# 1 Not in Zone 4 4
# 2 Zone 30 6 2
# 3 Zone 40 5 4
Данные
dat <- read.csv(header = TRUE, stringsAsFactors = FALSE, text = "
OBJECTID,District,Zone,year_value,FROM_DATE,SUM_Crime
1,Northwestern,Not in Zone,2019,2/6/2019,4
2,Northwestern,Zone 30,2019,2/7/2019,6
3,Northwestern,Zone 40,2019,2/8/2019,5
4,Northwestern,Zone 30,2019,2/9/2019,2
5,Northwestern,Not in Zone,2019,2/10/2019,4
6,Northwestern,Zone 40,2019,2/11/2019,4
7,Northwestern,Zone 30,2019,2/12/2019,0")