У меня есть логический вектор, как
as.logical(c(0,0,1,1,1,0,1,1,0,0,0,1,1,1,1))
но намного дольше. Как я могу преобразовать его в:
c(0,0,1,2,3,0,1,2,0,0,0,1,2,3,4)
подсчитав длину единиц?
немного отличается от Вена, я придумал:
library(data.table) ave(v,rleid(v),FUN=function(x) x *seq_along(x)) # [1] 0 0 1 2 3 0 1 2 0 0 0 1 2 3 4
с C ++ до Rcpp
Rcpp
library(Rcpp) cppFunction('NumericVector seqOfLogical(LogicalVector lv) { size_t n = lv.size(); NumericVector res(n); int foundCounter = 0; for (size_t i = 0; i < n; i++) { if (lv[i] == 1) { foundCounter++; } else { foundCounter = 0; } res[i] = foundCounter; } return res; }') seqOfLogical(x) # [1] 0 0 1 2 3 0 1 2 0 0 0 1 2 3 4
library(microbenchmark) set.seed(1) x <- sample(c(T,F), size = 1e6, replace = T) microbenchmark( symbolix = { symbolix(x) }, thelatemail1 = { thelatemail1(x) }, thelatemail2 = { thelatemail2(x) }, wen = { wen(x) }, maurits = { maurits(x) }, #mhammer = { mhammer(x) }, ## this errors times = 5 ) # Unit: milliseconds # expr min lq mean median uq max neval # symbolix 2.760152 4.579596 34.60909 4.833333 22.31126 138.5611 5 # thelatemail1 154.050925 189.784368 235.16431 235.982093 262.33704 333.6671 5 # thelatemail2 138.876834 146.197278 158.66718 148.547708 179.80223 179.9119 5 # wen 780.432786 898.505231 1091.39099 1093.702177 1279.33318 1404.9816 5 # maurits 1002.267323 1043.590621 1136.35624 1086.967756 1271.38803 1277.5675 5
symbolix <- function(x) { seqOfLogical(x) } thelatemail1 <- function(x) { r <- rle(x) x[x] <- sequence(r$l[r$v]) return(x) } thelatemail2 <- function(x) { x[x] <- sequence(with(rle(x), lengths[values])) return(x) } maurits <- function(x) { unlist(Map(function(l, v) if (!isTRUE(v)) rep(0, l) else 1:l, rle(x)$lengths, rle(x)$values)) } wen <- function(A) { B=data.table::rleid(A) B=ave(B,B,FUN = seq_along) B[!A]=0 B } mhammer <- function(x) { x_counts <- x for(i in seq_along(x)) { if(x[i] == 1) { x_counts[i] <- x_counts[i] + x_counts[i-1] } } return(x_counts) }
Другой вариант rle:
rle
r <- rle(x) x[x] <- sequence(r$l[r$v]) #[1] 0 0 1 2 3 0 1 2 0 0 0 1 2 3 4
или без сохранения r:
r
x[x] <- sequence(with(rle(x), lengths[values]))
Вот решение с использованием базы R rle с Map
Map
x <- as.logical(c(0,0,1,1,1,0,1,1,0,0,0,1,1,1,1)) unlist(Map(function(l, v) if (!isTRUE(v)) rep(0, l) else 1:l, rle(x)$lengths, rle(x)$values)) # [1] 0 0 1 2 3 0 1 2 0 0 0 1 2 3 4
или используя purrr::pmap
purrr::pmap
library(purrr); unlist(pmap(unclass(rle(x)), function(lengths, values) if (!isTRUE(values)) rep(0, lengths) else 1:lengths)) #[1] 0 0 1 2 3 0 1 2 0 0 0 1 2 3 4
x <- c(0,0,1,1,1,0,1,1,0,0,0,1,1,1,1) x_counts <- x for(i in seq_along(x)) { if(x[i] == 1) { x_counts[i] <- x_counts[i] + x_counts[i-1] } } x_counts
Вы можете использовать rleid в data.table
rleid
data.table
A=as.logical(c(0,0,1,1,1,0,1,1,0,0,0,1,1,1,1)) B=data.table::rleid(A) B=ave(B,B,FUN = seq_along) B[!A]=0 B [1] 0 0 1 2 3 0 1 2 0 0 0 1 2 3 4