Идея, лежащая в основе моего предложения, состоит в том, чтобы проверить для каждого события, сколько полных часовых отметок было пройдено, и вставить дополнительную строку для каждого часа и соответственно изменить время ...
Загрузить пример данных:
df <- read.table(text='Event DT.event off.bout.ID on.bout.ID off.time.diff on.time.diff
off 4/27/12-17:25:13 1 0 NA NA
on 4/27/12-17:25:39 1 1 26 NA
off 4/27/12-18:03:29 2 1 NA 2270
on 4/27/12-18:03:57 2 2 28 NA
off 4/27/12-19:41:16 3 2 NA 5839
on 4/27/12-19:43:50 3 3 154 NA
off 4/28/12-6:23:57 4 3 NA 38407
on 4/28/12-6:32:13 4 4 496 NA
off 4/28/12-6:40:20 5 4 NA 487
on 4/28/12-6:40:48 5 5 28 NA
off 4/28/12-8:16:07 6 5 NA 5719', header=T, stringsAsFactors=F)
Установить переменную даты и времени. При необходимости измените аргумент tz
:
df$DT.event <- as.POSIXct(df$DT.event, format = "%m/%d/%y-%H:%M:%S")
library(dplyr)
library(tidyr)
# reshape data
#
df2 <- df %>%
select(Event, DT.event, on.bout.ID) %>%
pivot_wider(names_from = Event,
values_from = DT.event) %>%
select(on.bout.ID, on, off)
df2
- это часть информации df
в более широкой форме:
on.bout.ID on off
<int> <dttm> <dttm>
1 0 NA 2012-04-27 17:25:13
2 1 2012-04-27 17:25:39 2012-04-27 18:03:29
3 2 2012-04-27 18:03:57 2012-04-27 19:41:16
4 3 2012-04-27 19:43:50 2012-04-28 06:23:57
5 4 2012-04-28 06:32:13 2012-04-28 06:40:20
6 5 2012-04-28 06:40:48 2012-04-28 08:16:07
# Make a copy so we don't mutate the object we are using to iterate
#
df3 <- df2
for (i in seq_along(df2$on.bout.ID)) {
# extract current iterations start and end time
#
id <- df2$on.bout.ID[i]
from <- df2$on[i]
to <- df2$off[i]
# calculate number of rows to insert
#
hoursDiff <- as.numeric(format(to, "%H")) - as.numeric(format(from , "%H"))
# compensate for crossing of midnight (00:00AM)
# by adding 24
#
hoursDiff <- as.integer(difftime(as.Date(to), as.Date(from), unit="days")) * 24 + hoursDiff
# if there is at least on pass of the full hour, insert a copy of the
# current row but adapt on and off times
#
if (!is.na(hoursDiff) & hoursDiff > 0) {
for (hour in 1:hoursDiff) {
# startime of this additional row
#
fromTime <- as.POSIXct(paste0(format(from + 3600 * hour, "%m/%d/%y-%H"), ":00:00"), format="%m/%d/%y-%H:%M:%S")
# Maximal endtime of this additional row
#
toTime <- fromTime + 3599
# copy current line
#
insert <- df2[i, ]
# set start time for this new row to full hour
#
insert$on <- fromTime
# if this is the last row to insert do NOT adapt off time
#
if (!(toTime > to)) {
insert$off <- toTime
}
# add additional row
#
df3 <- rbind(df3, insert)
}
# set off-time for the current line to end of first hour
#
df3[df3$on.bout.ID == id & df3$on == from & df3$off == to,]$off <- as.POSIXct(paste0(format(from, "%m/%d/%y-%H"), ":59:59"), format="%m/%d/%y-%H:%M:%S")
}
}
# Use `dplyr` to sort result
#
library(dplyr)
df3 %>% arrange(on.bout.ID, on)
# A tibble: 21 x 3
on.bout.ID on off
<int> <dttm> <dttm>
1 0 NA 2012-04-27 17:25:13
2 1 2012-04-27 17:25:39 2012-04-27 17:59:59
3 1 2012-04-27 18:00:00 2012-04-27 18:03:29
4 2 2012-04-27 18:03:57 2012-04-27 18:59:59
5 2 2012-04-27 19:00:00 2012-04-27 19:41:16
6 3 2012-04-27 19:43:50 2012-04-27 19:59:59
7 3 2012-04-27 20:00:00 2012-04-27 20:59:59
8 3 2012-04-27 21:00:00 2012-04-27 21:59:59
9 3 2012-04-27 22:00:00 2012-04-27 22:59:59
10 3 2012-04-27 23:00:00 2012-04-27 23:59:59
# … with 11 more rows
Это красиво? Нет! Это работает? Я так думаю
Редактировать:
добавлено
hoursDiff <- as.integer(difftime(as.Date(to), as.Date(from), unit="days")) * 24 + hoursDiff
Расширить функционал для пересечения полуночи (ей)