Если у вас достаточно оперативной памяти, вы можете попытать счастья с:
library(data.table)
ans <- setDT(data)[,
.SD[.SD, on=.(Y<Y), .(Y=x.Y, Y1=i.Y), nomatch=0L, allow.cartesian=TRUE],
by=.(X, Year)]
setcolorder(ans, c("X", "Y", "Y1", "Year"))
ans
сравнение сроков с подходом Руи:
library(data.table)
x <- 1e5
set.seed(1)
data <- data.frame(
X = sample(seq(from = 20, to = 50, by = 5), size = x, replace = TRUE),
Y = sample(100:50000,size = x, replace = TRUE),
Year = sample(1990:2018,size = x, replace = TRUE)
)
DF <- data
mtd1 <- function() {
ans <- setDT(data)[, .SD[.SD, on=.(Y<Y), .(Y=x.Y, Y1=i.Y), nomatch=0L, allow.cartesian=TRUE], by=.(X, Year)]
setcolorder(ans, c("X", "Y", "Y1", "Year"))
ans
}
bench::mark(mtd1(), funRui(DF), check=FALSE)
тайминги:
# A tibble: 2 x 14
expression min mean median max `itr/sec` mem_alloc n_gc n_itr total_time result memory time gc
<chr> <bch:tm> <bch:tm> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <bch:tm> <list> <list> <list> <list>
1 mtd1() 2.17s 2.17s 2.17s 2.17s 0.460 2.3GB 4 1 2.17s <data.table [24,626,111 x ~ <Rprofmem [7,258 x ~ <bch:t~ <tibble [1 x ~
2 funRui(DF) 7.48s 7.48s 7.48s 7.48s 0.134 4.09GB 5 1 7.48s <data.frame [24,626,111 x ~ <Rprofmem [14,618 x~ <bch:t~ <tibble [1 x ~