С vapply
Вот вариант с vapply
I <- which(vec == 0)
vapply(1:(length(I)+1),
function(k) sum(vec[max(I[k-1],1):min(I[k], length(vec), na.rm = TRUE)]),
numeric(1))
# [1] 322 168 105
С Уменьшением
Вот решение с использованием Reduce
ans <- numeric(0)
s <- n <- 0
Reduce(f = function (y,x) {
if(x == 0) {
ans <<- c(ans,s)
s <<- 0
}
n <<- n+1
s <<- x+s
if(n == length(vec))
ans <<- c(ans,s)
s
}, vec, init = 0, accumulate = TRUE)
ans
# [1] 322 168 105
с циклом
Или, возможно, старомодный цикл
I <- which(vec == 0)
n <- length(vec)
N <- length(I) + 1
res <- numeric(N)
for(k in seq_along(res)) {
if (k == 1) {
res[k] <- sum(vec[1:I[1]])
next
}
if (k == N) {
res[k] <- sum(vec[I[N-1]:n])
next
}
res[k] <- sum(vec[I[k-1]:I[k]])
}
res
# [1] 322 168 105
Сравнительный анализ
Данные
Вот данные, используемые для бенчмаркинга
# c.f. @MauritsEvers
# Create sample vector with N entries
set.seed(2018)
N <- 10000
vec <- sample(100, N, replace = T)
vec[sample(length(vec), 100)] <- 0
Функции
Вот функции для вторых показателей бенчмаркинга:
reduce <- function(vec) {
ans <- numeric(0)
s <- n <- 0
Reduce(f = function (y,x) {
if(x == 0) {
ans <<- c(ans,s)
s <<- 0
}
n <<- n+1
s <<- x+s
if(n == length(vec))
ans <<- c(ans,s)
s
}, vec, init = 0, accumulate = TRUE)
ans
}
Vapply <- function (vec) {
I <- which(vec == 0)
vapply(1:(length(I)+1),
function(k) sum(vec[max(I[k-1],1):min(I[k], length(vec), na.rm = TRUE)]),
numeric(1))
}
By <- function (vec) as.numeric(by(vec, cumsum(vec == 0), sum))
Split <- function (vec) sapply(split(vec, cumsum(vec==0)),sum)
Aggregate <- function (vec) aggregate(vec, by = list(cumsum(vec == 0)), FUN = sum)[[2]]
for_loop <- function(vec) {
I <- which(vec == 0)
n <- length(vec)
N <- length(I)+1
res <- numeric(N)
for(k in seq_along(res)) {
if (k == 1) {
res[k] <- sum(vec[1:I[1]])
next
}
if (k == N) {
res[k] <- sum(vec[I[N-1]:n])
next
}
res[k] <- sum(vec[I[k-1]:I[k]])
}
res
}
Rowsum <- function (vec) rowsum(vec, cumsum(vec == 0))
Бенчмаркинг
Вот два комбинированных процесса сравнения:
# c.f. @MauritsEvers
resBoth <- microbenchmark::microbenchmark(
Vapply = {
I <- which(vec == 0)
vapply(1:(length(I)+1),
function(k) sum(vec[max(I[k-1],1):min(I[k], length(vec), na.rm = TRUE)]),
numeric(1))
},
Vapply(vec),
By = {
as.numeric(by(vec, cumsum(vec == 0), sum))
},
By(vec),
Aggregate = {
aggregate(vec, by = list(cumsum(vec == 0)), FUN = sum)[[2]]
},
Aggregate(vec),
Split = {
sapply(split(vec, cumsum(vec == 0)), sum)
},
Split(vec),
reduce = {
ans <- numeric(0)
s <- n <- 0
Reduce(f = function (y,x) {
if(x == 0) {
ans <<- c(ans,s)
s <<- 0
}
n <<- n+1
s <<- x+s
if (n == length(vec))
ans <<- c(ans,s)
s
}, vec, init = 0, accumulate = TRUE)
ans
},
reduce(vec),
for_loop = {
I <- which(vec == 0)
n <- length(vec)
N <- length(I) + 1
res <- numeric(N)
for(k in seq_along(res)) {
if (k == 1) {
res[k] <- sum(vec[1:I[1]])
next
}
if (k == N) {
res[k] <- sum(vec[I[N-1]:n])
next
}
res[k] <- sum(vec[I[k-1]:I[k]])
}
res
},
for_loop(vec),
Rowsum = {rowsum(vec, cumsum(vec == 0))},
Rowsum(vec),
times = 10^3
)
Результаты
Вотрезультаты бенчмаркинга
resBoth
# Unit: microseconds
# expr min lq mean median uq max neval cld
# Vapply 234.121 281.5280 358.0708 311.7955 343.5215 4775.018 1000 ab
# Vapply(vec) 234.850 278.6100 376.3956 306.3260 334.4050 14564.278 1000 ab
# By 1866.029 2108.7175 2468.1208 2209.0025 2370.5520 23316.045 1000 c
# By(vec) 1870.769 2120.5695 2473.1643 2217.3900 2390.6090 21039.762 1000 c
# Aggregate 2738.324 3015.6570 3298.0863 3117.9480 3313.2295 13328.404 1000 d
# Aggregate(vec) 2733.583 2998.1530 3295.6874 3109.1955 3349.1500 8277.694 1000 d
# Split 359.202 412.0800 478.0553 444.1710 492.3080 4622.220 1000 b
# Split(vec) 366.131 410.4395 475.2633 444.1715 490.3025 4601.799 1000 b
# reduce 10862.491 13062.3755 15353.2826 14465.0870 16559.3990 76305.463 1000 g
# reduce(vec) 10403.004 12448.9965 14658.4035 13825.9995 15893.3255 67337.080 1000 f
# for_loop 6687.724 7429.4670 8518.0470 7818.0250 9023.9955 27541.136 1000 e
# for_loop(vec) 123.624 145.8690 187.2201 157.5390 177.4140 9928.200 1000 a
# Rowsum 235.579 264.3880 305.7516 282.2570 322.7360 792.068 1000 ab
# Rowsum(vec) 239.590 264.9350 307.2508 284.8100 322.0060 1778.143 1000 ab