Вы можете сэкономить значительное время, векторизовав свой внутренний цикл (я тогда использую apply()
для внешнего цикла):
# We'll need both DescTools and microbenchmark
library(DescTools)
library(microbenchmark)
# Make example data
set.seed(123) # setting seed for reproducibility
n <- 10
x <- sample(seq(as.Date("2008/10/20"), as.Date("2018/10/20"), "day"), n)
y <- sample(seq(as.Date("2008/10/20"), as.Date("2018/10/20"), "day"), n)
(mat <- cbind(x, y))
#> x y
#> [1,] 15222 17667
#> [2,] 17050 15827
#> [3,] 15665 16645
#> [4,] 17395 16262
#> [5,] 17603 14547
#> [6,] 14338 17454
#> [7,] 16098 15069
#> [8,] 17425 14325
#> [9,] 16181 15367
#> [10,] 15835 17650
# First get the answer using nested loops
a <- z <- 1:n
for (i in 1:n) {
for (j in 1:n) {
a[j] <- Overlap(mat[i, ],mat[j, ])
}
# Noticed I've moved this sum to the bottom,
# so that our first element isn't just a sum from one to n
z[i] <- sum(a, na.rm = T)
}
z
#> [1] 16102 9561 7860 7969 18169 18140 6690 18037 6017 12374
apply(mat, 1, function(r) sum(Overlap(r, mat)))
#> [1] 16102 9561 7860 7969 18169 18140 6690 18037 6017 12374
microbenchmark(apply = apply(mat, 1, function(r) sum(Overlap(r, mat))),
loop = for (i in 1:n) {
for (j in 1:n) {
a[j] <- Overlap(mat[i, ],mat[j, ])
}
# Noticed I've moved this sum to the bottom,
# so that our first element isn't just a sum from one to n
z[i] <- sum(a, na.rm = T)
})
#> Unit: milliseconds
#> expr min lq mean median uq max neval
#> apply 7.538967 7.688929 7.894379 7.767989 7.891177 13.57523 100
#> loop 76.051011 77.203810 80.045325 78.158369 79.206538 114.68139 100
#> cld
#> a
#> b
Создано в 2018-10-20 Представьте пакет (v0.2.1)
Теперь давайте попробуем понять, как он масштабируется с (немного) большими примерами данных (если данные становятся слишком большими, тесты выполняются вечно):
#
n <- 100
x <- sample(seq(as.Date("2008/10/20"), as.Date("2018/10/20"), "day"), n, r = T)
y <- sample(seq(as.Date("2008/10/20"), as.Date("2018/10/20"), "day"), n, r = T)
mat <- cbind(x, y)
a <- z <- 1:n
for (i in 1:n) {
for (j in 1:n) {
a[j] <- Overlap(mat[i, ],mat[j, ])
}
z[i] <- sum(a, na.rm = T)
}
# In case you're concerned it still works:
all.equal(z, apply(mat, 1, function(r) sum(Overlap(r, mat))))
#> [1] TRUE
microbenchmark(apply = apply(mat, 1, function(r) sum(Overlap(r, mat))),
loop = for (i in 1:n) {
for (j in 1:n) {
a[j] <- Overlap(mat[i, ],mat[j, ])
}
# Noticed I've moved this sum to the bottom,
# so that our first element isn't just a sum from one to n
z[i] <- sum(a, na.rm = T)
})
#> Unit: milliseconds
#> expr min lq mean median uq max neval
#> apply 258.1151 262.8007 269.8172 265.9643 276.8799 296.2167 100
#> loop 5806.9834 5841.3362 5890.4988 5863.7317 5884.2308 6222.1670 100
#> cld
#> a
#> b
Создано в 2018-10-20 пакетом представительство (v0.2.1)