Слишком долго в комментарии, следовательно, комм вики. Подобно Sotos и tmfmnk, если значение не будет найдено в id
, тогда вы можете избежать rowSums
:
DT[, idx := {
x <- max.col(.SD==0, "first")
replace(x, x==1L, NA_integer_)
}]
временного кода:
library(data.table)
set.seed(0L)
nr <- 1e7
DT <- data.table(id=1:nr, a=sample(0:5, nr, TRUE), b=sample(0:5, nr, TRUE), c=sample(0:5, nr, TRUE))
DT0 <- copy(DT)
DT1 <- copy(DT)
DT2 <- copy(DT)
DT3 <- copy(DT)
DT4 <- copy(DT)
mtd0 <- function() {
replace(max.col(-DT0[, -1] == 0, ties.method = 'first') + 1, rowSums(DT == 0) == 0, NA)
#or break it into two lines If you want,
i1 <- max.col(-DT0[,-1] == 0, ties.method = 'first') + 1
replace(i1, rowSums(DT0 == 0) == 0, NA)
}
mtd1 <- function() {
DT1[, idx := apply(.SD, 1, function(x) first(which(x == 0 )))]
}
mtd2 <- function() {
DT2[, idx := which(.SD == 0)[1], by = id]
}
mtd3 <- function() {
DT2[, idx := fifelse(rowSums(.SD == 0) == 0,
NA_integer_,
max.col(-.SD, ties.method = "first"))]
}
mtd4 <- function() {
DT4[, idx := {
x <- max.col(.SD==0, "first")
replace(x, x==1L, NA_integer_)
}]
}
bench::mark(mtd0(), mtd1(), mtd2(),
mtd3(), mtd4(), check=FALSE)
тайминги:
# A tibble: 5 x 13
expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time result memory time gc
<bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm> <list> <list> <list> <list>
1 mtd0() 136.98ms 145.67ms 5.41 347.8MB 1.80 3 1 554.1ms <dbl [1,~ <df[,3] ~ <bch:~ <tibb~
2 mtd1() 8.66s 8.66s 0.115 76.3MB 0.346 1 3 8.66s <df[,5] ~ <df[,3] ~ <bch:~ <tibb~
3 mtd2() 57.3s 57.3s 0.0175 22.6MB 0.436 1 25 57.3s <df[,5] ~ <df[,3] ~ <bch:~ <tibb~
4 mtd3() 86.6ms 90.69ms 10.3 171.7MB 1.72 6 1 582.21ms <df[,5] ~ <df[,3] ~ <bch:~ <tibb~
5 mtd4() 48.9ms 50.12ms 18.4 97.6MB 1.84 10 1 544.43ms <df[,5] ~ <df[,3] ~ <bch:~ <tibb~