Кроме того, есть ли еще какая-то особенность, которую я могу использовать, чтобы ускорить ее?
Помимо ответа PoGibas , вы можете избежать вызова ifelse
и получить более быструю функцию следующим образом
contRun <- function(reps = 1e4, n = 1e4, d = 5e-3, l = 10, s = 0.1) {
# Replace rep with numeric
r <- numeric(reps)
theta <- numeric(n)
# Define before loop
ln <- l * n
# Don't use t as it's a function in base R
for (i in 1:reps) {
epsilon <- rnorm(1, 0, d)
# Sum two TRUE vectors
r[i] <- (sum(epsilon > theta) - sum(epsilon < -theta)) / ln
# Define before ifelse
absr <- abs(r[i])
theta <- ifelse(runif(n) < s, absr, theta)
}
mean(r)
}
contRun2 <- function(reps = 1e4, n = 1e4, d = 5e-3, l = 10, s = 0.1) {
r <- numeric(reps)
theta <- numeric(n)
ln <- l * n
for (i in 1:reps) {
epsilon <- rnorm(1, 0, d)
r[i] <- (sum(epsilon > theta) - sum(epsilon < -theta)) / ln
absr <- abs(r[i])
# avoid ifelse
theta[runif(n) < s] <- absr
}
mean(r)
}
contRun3 <- function(reps = 1e4, n = 1e4, d = 5e-3, l = 10, s = 0.1) {
r <- numeric(reps)
theta <- numeric(n)
ln <- l * n
for (i in 1:reps) {
epsilon <- rnorm(1, 0, d)
r[i] <- (sum(epsilon > theta) - sum(epsilon < -theta)) / ln
absr <- abs(r[i])
# replace runif
theta[sample(c(T, F), prob = c(s, 1 - s), size = n, replace = TRUE)] <- absr
}
mean(r)
}
# gives the same
set.seed(1)
o1 <- contRun()
set.seed(1)
o2 <- contRun2()
set.seed(1)
o3 <- contRun3()
all.equal(o1, o2)
#R [1] TRUE
all.equal(o1, o3) # likely will not match
#R [1] [1] "Mean relative difference: 0.1508537"
# but distribution is the same
set.seed(1)
c1 <- replicate(10000, contRun2(reps = 100, n = 100))
c2 <- replicate(10000, contRun3(reps = 100, n = 100))
par(mfcol = c(1, 2), mar = c(5, 4, 2, .5))
hist(c1, breaks = seq(-.015, .015, length.out = 26))
hist(c2, breaks = seq(-.015, .015, length.out = 26))
![enter image description here](https://i.stack.imgur.com/zbzsz.png)
# the latter is faster
microbenchmark::microbenchmark(
contRun = {set.seed(1); contRun ()},
contRun2 = {set.seed(1); contRun2()},
contRun3 = {set.seed(1); contRun3()},
times = 5)
#R Unit: seconds
#R expr min lq mean median uq max neval
#R contRun 7.121264 7.371242 7.388159 7.384997 7.443940 7.619352 5
#R contRun2 3.811267 3.887971 3.892523 3.892158 3.921148 3.950070 5
#R contRun3 1.920594 1.920754 1.998829 1.999755 2.009035 2.144005 5
Теперь единственным узким местом является runif
в contRun2
. Замена на sample
дает значительное улучшение.