Вот еще один вариант (который должен быть быстрее):
D[-D[, {
x <- rowid(rleid(b)) < 2
.I[x & shift(x, -1L, fill=TRUE)]
}]]
код времени:
library(data.table)
set.seed(0L)
nr <- 1e7
nb <- 1e4
DT <- data.table(b=sample(nb, nr, TRUE))
#DT <- data.table(b=c("A", "A", "B", "C", "C", "A", "B", "B", "B", "C", "A", "C", "D", "E", "E"))
DT2 <- copy(DT)
mtd1 <- function(df) {
df[-cumsum(rle(b)$lengths)[rle(b)$lengths==1],]
}
mtd2 <- function(D) {
D[, N :=.N, by = rleid(b)][N > 1, .(b)]
}
mtd3 <- function(D) {
D[-D[, {
x <- rowid(rleid(b)) < 2
.I[x & shift(x, -1L, fill=TRUE)]
}]]
}
bench::mark(mtd1(DT), mtd2(DT2), mtd3(DT), check=FALSE)
время:
# A tibble: 3 x 13
expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time result memory time gc
<bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm> <list> <list> <list> <list>
1 mtd1(DT) 1.1s 1.1s 0.908 1.98GB 10.9 1 12 1.1s <df[,1] [2,014 x ~ <df[,3] [59 x ~ <bch:t~ <tibble [1 x ~
2 mtd2(DT2) 2.88s 2.88s 0.348 267.12MB 0 1 0 2.88s <df[,1] [2,014 x ~ <df[,3] [23 x ~ <bch:t~ <tibble [1 x ~
3 mtd3(DT) 639.91ms 639.91ms 1.56 505.48MB 4.69 1 3 639.91ms <df[,1] [2,014 x ~ <df[,3] [24 x ~ <bch:t~ <tibble [1 x ~