Я переписал ваш цикл for
, используя .subset2
здесь и там для эффективности:
repeat{
for (i in 1:nrow(tgt)) {
age.i <- .subset2(tgt,2L)[i]
if(age.i < 4) {
ID <- .subset2(tgt,1L)
id.i <- ID[i]
index.i <- .subset2(pool, 2L) == age.i
factor.i <- .subset2(pool, 4L)[index.i][sample(sum(index.i), 1)]
tgt[ID == id.i,] <- transform(tgt, Age = Age + 1, Amt = Amt * factor.i)[ID == id.i,]
next
}
}
if(min(tgt$Age) == 4) break
}
tgt
# ID Age Amt
# 9 9 4 352.000
# 8 8 4 2101.784
В некоторых больших фреймах данных (pool <-> 100 rows
& tgt <-> 75 rows
) я получаюпримерно на 60% быстрее петли.Вот контрольные цифры:
Результаты контрольных показателей
# 100 times
# Unit: milliseconds
# expr min lq mean median uq max neval cld
# old_loop 89.40558 93.69668 101.68928 96.73567 102.45847 166.89514 100 b
# new_loop 30.32833 32.99900 34.37742 33.96648 35.39198 56.01109 100 a
# 1000 times
# Unit: milliseconds
# expr min lq mean median uq max neval cld
# old_loop 88.21493 96.23644 106.43853 100.00970 110.21998 228.6108 1000 b
# new_loop 29.79882 33.39595 36.97823 35.36317 37.98608 104.7572 1000 a
Код контрольных показателей
n <- 100L
m <- 75L
microbenchmark::microbenchmark(
'old_loop' = {
repeat{
for (i in 1:nrow(tgt)) {
age.i <- tgt[i, 'Age']
if(age.i < 4) {
pool.i <- subset(pool, Age == age.i)
factor.i <- pool.i[sample(nrow(pool.i), 1), 'Factor']
tgt <- tgt %>%
mutate(Age = ifelse(ID == tgt[i, 'ID'], Age + 1, Age),
Amt = ifelse(ID == tgt[i, 'ID'], Amt * factor.i, Amt))
}
}
if(min(tgt$Age) == 4) {
break
}
}
},
'new_loop' = {
repeat{
for (i in 1:nrow(tgt)) {
age.i <- .subset2(tgt,2L)[i]
if(age.i < 4) {
ID <- .subset2(tgt,1L)
id.i <- ID[i]
index.i <- .subset2(pool, 2L) == age.i
factor.i <- .subset2(pool, 4L)[index.i][sample(sum(index.i), 1)]
tgt[ID == id.i,] <- transform(tgt, Age = Age + 1, Amt = Amt * factor.i)[ID == id.i,]
next
}
}
if(min(tgt$Age) == 4) break
}
},
setup = {
set.seed(777)
pool <- data.frame(ID = 1:n,
Age = sample(1:4, n, replace = TRUE),
Amt = round(runif(n, 0, 10)*100,0),
Factor = round(runif(n, 0.5, 2), 2))
tgt <- pool[sample(nrow(pool), m, TRUE), 1:3]
}, times = 10^2)