Если ваши данные находятся в data.table
объекте в R, вы можете сделать это
Пример данных
library(data.table)
dt <- fread("
Timestamp Command
13 ON
27 OFF
36 OFF
43 ON
53 OFF
")
dt <- rbind(dt, dt)
dt[, Machine := rep(1:2, each = 5)][]
# Timestamp Command Machine
# 1: 13 ON 1
# 2: 27 OFF 1
# 3: 36 OFF 1
# 4: 43 ON 1
# 5: 53 OFF 1
# 6: 13 ON 2
# 7: 27 OFF 2
# 8: 36 OFF 2
# 9: 43 ON 2
# 10: 53 OFF 2
выход
library(magrittr)
dt[, .(From = Timestamp
, To = shift(Timestamp, 1, type = 'lead') - 1
, Command)
, by = Machine] %>%
.[, .(From = first(From)
, To = last(To)
, State = first(Command))
, by = .(Machine, rleid(Command))] %>%
.[, .(From = c(1, From)
, To = c(From[1] - 1, To)
, State = c(ifelse(State[1] == 'ON', 'OFF', 'ON'), State))
, by = Machine]
# Machine From To State
# 1: 1 1 12 OFF
# 2: 1 13 26 ON
# 3: 1 27 42 OFF
# 4: 1 43 52 ON
# 5: 1 53 NA OFF
# 6: 2 1 12 OFF
# 7: 2 13 26 ON
# 8: 2 27 42 OFF
# 9: 2 43 52 ON
# 10: 2 53 NA OFF
Бенчмарк показывает, что для этого примера требуется ~ 200-300 миллисекунд с 1 миллионом строк. Сделал это на ноутбуке HP.
n = 2
duration = 66
dt <- data.table(Machine = rep(1:n, each = 5),
Timestamp = c(c(13,27,36,43,53), c(1,13,27,36,66)),
Command = c(c(1, 0, 1, 1, 0), c(0, 0, 1, 1, 1)))
dt <- rbindlist(replicate(1e5, dt, simplify = F))
nrow(dt) == 1e6
# TRUE
microbenchmark(useDT(dt))
# Unit: milliseconds
# expr min lq mean median uq max neval
# useDT(dt) 159.6124 171.5623 265.3555 186.8661 232.0942 1157.086 100