Вы можете использовать data.table
и группировать с помощью целочисленного деления pos - 1
на 5
(или каким-либо другим n
).
library(data.table)
fn_median <- function(df, n){
setDT(df)
df[, .(start = pos[1], end = last(pos), median = median(val))
, by = .(drop = (pos - 1) %/% n)][, -'drop']
}
fn_median(dfr, 5)
# start end median
# 1: 1 5 105
# 2: 6 10 95
# 3: 11 15 115
# 4: 16 20 102
Редактировать: тесты
library(microbenchmark)
dfr <- data.frame(pos = seq_len(1e4), val = sample(1e4))
microbenchmark(fn_median(dfr, 5), fn_median2(dfr, 5), times = 10)
# Unit: milliseconds
# expr min lq mean median uq max neval
# fn_median(dfr, 5) 113.324354 131.217695 147.213517 139.283545 167.387556 188.76767 10
# fn_median2(dfr, 5) 2.896002 3.026053 4.554341 3.448822 3.687797 15.40021 10
dfr <- data.frame(pos = seq_len(1e6), val = sample(1e6))
microbenchmark(fn_median(dfr, 5), fn_median2(dfr, 5), times = 5)
# Unit: milliseconds
# expr min lq mean median uq max neval
# fn_median(dfr, 5) 13295.8565 13710.4458 13729.029 13734.9328 13876.7450 14027.1664 5
# fn_median2(dfr, 5) 97.7186 103.9742 120.471 119.3268 121.1799 160.1556 5
Используемые функции:
library(data.table)
fn_median2 <- function(df, n){
setDT(df)
df[, .(start = pos[1], end = last(pos), median = median(val))
, by = .(drop = (pos - 1) %/% n)][, -'drop']
}
fn_median <- function(dfr,win=5)
{
n <- nrow(dfr)
vec_start <- vector(length=floor(n/win),mode="numeric")
vec_end <- vector(length=floor(n/win),mode="numeric")
vec_median <- vector(length=floor(n/win),mode="numeric")
k <- 1
i <- 1
while(i<=n)
{
vec_start[k] <- dfr$pos[i]
vec_end[k] <- dfr$pos[i+(win-1)]
vec_median[k] <- median(dfr$val[i:(i+(win-1))])
k <- k+1
i <- i+win
}
return(data.frame(start=vec_start,end=vec_end,median=vec_median))
}