В настоящее время похоже, что использование
idx <- which(x < y)
x[idx] <- y[idx]
rm(idx)
может быть рекомендовано для подстановки двух векторов с одинаковым индексным вектором.
В случае, если вы используете с одной стороны позицию, а с другойсбоку значение, как в обновлении соединения ,
idx <- match(x, y)
idxn <- which(!is.na(idx))
x[idxn] <- -y[idx[idxn]]
rm(idx, idxn)
может быть рекомендовано.
Подробности:
library(microbenchmark)
fun <- alist(m1 = x[x < y] <- y[x < y]
, m1w = x[which(x < y)] <- y[which(x < y)]
, m2 = {idx <- x < y; x[idx] <- y[idx]; rm(idx)}
, m2w = {idx <- which(x < y); x[idx] <- y[idx]; rm(idx)}
, m2wl = {local({idx <- which(x < y); x[idx] <<- y[idx]})}
, m3 = {for(i in seq_along(x)) {if(x[i] < y[i]) x[i] <- y[i]}}
, m3w = {for(idx in which(x < y)) {x[idx] <- y[idx]}}
, m3wl = {local(for(idx in which(x < y)) {x[idx] <<- y[idx]})}
, m4 = {(function() {delayedAssign("idx", x < y); x[idx] <<- y[idx]})()}
, m4w = {(function() {delayedAssign("idx", which(x < y)); x[idx]<<- y[idx]})()}
, m5 = {evalq({delayedAssign("idx", x < y); x[idx] <<- y[idx]}, envir = new.env(), enclos = parent.frame())}
, m5w = {evalq({delayedAssign("idx", which(x < y)); x[idx] <<- y[idx]}, envir = new.env(), enclos = parent.frame())}
, m6 = x <- ifelse(x < y, y, x)
, m7 = x <- sapply(seq_along(x), function(i) {if(x[i] < y[i]) y[i] else x[i]})
, m8 = with(data.frame(idx = x < y), x[idx] <<- y[idx])
, m8w = with(data.frame(idx = which(x < y)), x[idx] <<- y[idx])
, m9 = x <- pmax(x, y)
)
memUse <- function(list, setup = "", gctort = FALSE) {
as.data.frame(lapply(list, function(z) {
eval(setup)
tt <- sum(.Internal(gc(FALSE, TRUE, TRUE))[13:14])
gctorture(on = gctort)
eval(z)
gctorture(on = FALSE)
sum(.Internal(gc(FALSE, FALSE, TRUE))[13:14]) - tt
}))
}
n <- 1e6L #On my pc m7 takes very long with longer vectors
#50% repacement
xC <- rep_len(0:9, n)
x <- xC
y <- rep_len(9:0, n)
microbenchmark(list = fun, setup = x <- xC, times = 20)
#Unit: milliseconds
# expr min lq mean median uq max neval
# m1 9.362981 9.941294 30.185996 12.315266 23.531700 141.28603 20
# m1w 10.029621 10.971671 17.328038 12.256211 17.934435 81.65052 20
# m2 7.177270 7.368008 11.875662 9.524422 12.633254 37.91534 20
# m2w 7.083238 7.844280 11.196914 8.550595 14.602815 22.86438 20
# m2wl 7.050052 7.258657 10.318978 7.923862 9.745804 25.74206 20
# m3 109.944807 110.882601 114.673555 111.885780 117.075936 129.19246 20
# m3w 35.628572 36.300217 46.996217 37.574246 40.024844 116.80551 20
# m3wl 363.532255 373.069300 431.522826 395.528996 456.413863 597.30239 20
# m4 7.116324 8.001434 11.823148 9.022419 13.660199 27.59086 20
# m4w 6.963074 7.048622 13.242077 8.084297 11.645519 78.60026 20
# m5 7.067611 7.327816 16.069991 9.284865 15.416912 115.43513 20
# m5w 7.034584 8.382096 16.781882 9.726437 16.823329 108.70100 20
# m6 17.651948 18.796132 26.044715 19.585389 27.150359 95.99133 20
# m7 933.336461 1028.588463 1082.014782 1077.447541 1142.779436 1203.75604 20
# m8 7.304279 7.613715 12.683940 9.259327 16.367387 46.73193 20
# m8w 7.306591 7.446765 13.722586 8.317214 13.281609 83.86340 20
# m9 3.404972 3.541755 3.796737 3.685642 4.012589 4.54105 20
memUse(list=fun, setup = quote(x <- xC), gctort = FALSE)
# m1 m1w m2 m2w m2wl m3 m3w m3wl m4 m4w m5 m5w m6 m7 m8 m8w m9
#1 24.8 24.8 21 15.2 15.2 59.2 68 68 21 15.2 21 15.2 34.3 106.1 21.1 15.3 3.8
#100% repacement
xC <- rep_len(0, n)
x <- xC
y <- rep_len(1, n)
microbenchmark(list = fun, setup = x <- xC, times = 20)
#Unit: milliseconds
# expr min lq mean median uq max neval
# m1 13.648232 14.82093 28.621411 15.936936 26.199469 99.553175 20
# m1w 15.920691 16.54614 23.413209 17.117911 19.579158 113.066464 20
# m2 11.551213 11.89491 20.940656 13.153880 22.355784 94.305937 20
# m2w 12.313997 12.47287 19.400817 13.687655 20.209957 89.129126 20
# m2wl 12.276096 12.42314 14.322066 13.107012 14.680684 21.261685 20
# m3 137.168517 139.28248 143.018082 140.939792 144.204528 165.616204 20
# m3w 66.463903 66.66168 69.924619 67.901559 71.515786 79.278128 20
# m3wl 690.311636 703.53159 748.621164 718.447933 748.375469 970.204829 20
# m4 11.441186 11.74227 17.736055 12.714788 16.740723 80.710164 20
# m4w 12.286395 12.40667 14.305248 12.501939 13.737790 31.441008 20
# m5 11.473874 11.53943 17.929906 11.960568 13.638548 83.077290 20
# m5w 12.278804 12.38792 19.689197 13.446629 18.157106 100.649735 20
# m6 18.118748 19.03396 29.584077 19.753099 26.499255 99.333902 20
# m7 847.875768 1032.68963 1071.521727 1072.907583 1151.036749 1249.079078 20
# m8 12.020599 12.77211 14.821545 13.188046 14.576952 24.397931 20
# m8w 12.551370 12.63402 19.263936 12.859222 17.794158 86.014839 20
# m9 3.393315 3.61704 4.039384 3.711276 4.456398 5.489078 20
memUse(list=fun, setup = quote(x <- xC), gctort = FALSE)
# m1 m1w m2 m2w m2wl m3 m3w m3wl m4 m4w m5 m5w m6 m7 m8 m8w m9
#1 38.2 38.2 34.3 26.7 26.7 61.6 72.6 72.6 34.3 26.7 34.3 26.7 42 108.2 34.5 26.8 7.6
#0% repacement
xC <- rep_len(1, n)
x <- xC
y <- rep_len(0, n)
microbenchmark(list = fun, setup = x <- xC, times = 20)
#Unit: milliseconds
# expr min lq mean median uq max neval
# m1 6.589096 6.664210 9.885122 6.684262 13.775726 21.878444 20
# m1w 6.842137 6.904734 14.664578 6.947293 13.320292 109.027443 20
# m2 3.401598 3.413235 4.953130 3.438164 3.963643 14.268839 20
# m2w 3.005197 3.017770 3.094672 3.042470 3.074060 4.049775 20
# m2wl 3.981523 4.005924 9.785378 4.021979 4.156620 92.515420 20
# m3 80.912504 81.154859 81.901925 81.631556 82.670752 84.190109 20
# m3w 5.377001 5.416671 7.138954 5.483028 6.511429 16.243317 20
# m3wl 2.919146 2.947492 4.005262 2.964488 2.981436 12.981625 20
# m4 4.453434 4.485750 5.874224 4.536671 5.562121 13.933019 20
# m4w 3.991118 4.008097 6.597674 4.028864 4.635915 22.966413 20
# m5 4.461602 4.494989 7.666011 4.511920 9.758226 24.383190 20
# m5w 3.959898 4.002523 7.709664 4.050941 4.173436 53.543674 20
# m6 17.661630 17.791470 23.846625 19.567245 28.225758 38.622220 20
# m7 933.661883 1033.959171 1077.598475 1096.121598 1136.886472 1175.809700 20
# m8 4.719020 4.745232 9.919018 4.781921 6.507246 75.527422 20
# m8w 4.239784 4.271047 9.102203 4.299095 7.165185 57.379780 20
# m9 3.366839 3.452929 4.988191 3.481065 3.564584 19.815845 20
memUse(list=fun, setup = quote(x <- xC), gctort = FALSE)
# m1 m1w m2 m2w m2wl m3 m3w m3wl m4 m4w m5 m5w m6 m7 m8 m8w m9
#1 22.9 22.9 11.5 7.6 15.3 44.9 7.6 7.6 19.1 15.3 19.1 15.3 42 128.4 19.2 15.4 7.6
#With a more complex condition than x < y
fun <- alist(m1 = x[tan(x) < tan(y)] <- y[tan(x) < tan(y)]
, m1w = x[which(tan(x) < tan(y))] <- y[which(tan(x) < tan(y))]
, m2 = {idx <- tan(x) < tan(y); x[idx] <- y[idx]; rm(idx)}
, m2w = {idx <- which(tan(x) < tan(y)); x[idx] <- y[idx]; rm(idx)}
, m2wl = {local({idx <- which(tan(x) < tan(y)); x[idx] <<- y[idx]})}
, m3 = {for(i in seq_along(x)) {if(tan(x[i]) < tan(y[i])) x[i] <- y[i]}}
, m3w = {for(idx in which(tan(x) < tan(y))) {x[idx] <- y[idx]}}
, m3wl = {local(for(idx in which(tan(x) < tan(y))) {x[idx] <<- y[idx]})}
, m4 = {(function() {delayedAssign("idx", tan(x) < tan(y)); x[idx] <<- y[idx]})()}
, m4w = {(function() {delayedAssign("idx", which(tan(x) < tan(y))); x[idx] <<- y[idx]})()}
, m5 = {evalq({delayedAssign("idx", tan(x) < tan(y)); x[idx] <<- y[idx]}, envir = new.env(), enclos = parent.frame())}
, m5w = {evalq({delayedAssign("idx", which(tan(x) < tan(y))); x[idx] <<- y[idx]}, envir = new.env(), enclos = parent.frame())}
, m6 = x <- ifelse(tan(x) < tan(y), y, x)
, m7 = x <- sapply(seq_along(x), function(i) {if(tan(x[i]) < tan(y[i])) y[i] else x[i]})
, m8 = with(data.frame(idx = tan(x) < tan(y)), x[idx] <<- y[idx])
, m8w = with(data.frame(idx = which(tan(x) < tan(y))), x[idx] <<- y[idx])
)
#50% repacement
xC <- rep_len(0:9, n)
x <- xC
y <- rep_len(9:0, n)
microbenchmark(list = fun, setup = x <- xC, times = 20)
#Unit: milliseconds
# expr min lq mean median uq max neval
# m1 150.53809 151.25090 164.24548 152.94124 170.73427 223.97944 20
# m1w 150.56266 151.84806 161.74585 153.18321 160.00592 223.86287 20
# m2 76.88262 78.13652 85.72190 82.40141 87.19680 150.77076 20
# m2w 76.53983 77.40472 87.74890 77.94618 82.32798 156.58522 20
# m2wl 76.93366 77.58086 82.68244 78.15460 82.18667 107.52469 20
# m3 232.22004 232.99509 237.67218 234.10627 235.14893 304.04201 20
# m3w 105.59221 105.86459 110.04552 106.89158 111.67208 133.75261 20
# m3wl 435.25394 446.68204 487.41579 453.01685 521.65402 705.62717 20
# m4 77.32980 77.89973 84.78399 79.00021 79.94984 160.29572 20
# m4w 76.99555 77.94441 81.50439 78.60553 85.55179 96.93830 20
# m5 76.66992 77.33471 89.60525 79.20687 86.08836 172.16646 20
# m5w 76.37683 77.05510 82.85917 77.53271 82.16778 144.24078 20
# m6 87.37572 88.45537 98.99907 89.02368 96.81267 165.25716 20
# m7 1027.99068 1150.86229 1199.66760 1201.45385 1239.21803 1315.94626 20
# m8 76.86455 77.93917 79.76095 78.96807 79.60475 89.65809 20
# m8w 78.69934 79.13237 83.26944 80.81390 87.51346 101.49319 20
memUse(list=fun, setup = quote(x <- xC), gctort = FALSE)
# m1 m1w m2 m2w m2wl m3 m3w m3wl m4 m4w m5 m5w m6 m7 m8 m8w
#1 55.3 55.3 36.2 30.5 30.5 55.9 79.5 79.5 36.2 30.5 36.2 30.5 49.6 133.9 36.3 30.6
#100% repacement
xC <- rep_len(0, n)
x <- xC
y <- rep_len(1, n)
microbenchmark(list = fun, setup = x <- xC, times = 20)
#Unit: milliseconds
# expr min lq mean median uq max neval
# m1 72.50779 73.85809 81.62600 75.17883 83.09049 143.10837 20
# m1w 74.33753 75.62555 81.39149 77.06395 85.34476 105.24806 20
# m2 40.73727 41.48906 52.22463 42.93811 50.68852 119.13366 20
# m2w 41.43466 41.72756 50.53189 44.94449 52.37125 121.17726 20
# m2wl 41.60436 42.25114 49.84177 43.86556 50.83141 127.52145 20
# m3 214.33331 215.56117 223.15401 216.54753 221.73404 304.96538 20
# m3w 96.23799 96.71452 98.80659 97.73016 99.23308 106.56888 20
# m3wl 716.11847 736.26573 839.67672 775.01016 932.43366 1054.35836 20
# m4 40.75406 41.78469 45.13181 42.54718 49.61718 56.45373 20
# m4w 41.51614 41.79747 50.39626 43.04746 53.30470 112.03821 20
# m5 40.69414 41.41407 46.72030 42.44355 50.06923 72.67644 20
# m5w 41.53582 41.62817 48.23472 42.68742 50.58984 110.01377 20
# m6 47.13042 48.38094 58.69262 53.26532 59.43038 143.99039 20
# m7 992.82221 1102.50031 1139.33831 1137.81362 1161.28651 1277.99506 20
# m8 40.92674 42.25611 47.05092 46.78702 50.60716 58.58826 20
# m8w 42.59176 43.02294 53.50535 47.65410 53.70309 120.47093 20
memUse(list=fun, setup = quote(x <- xC), gctort = FALSE)
# m1 m1w m2 m2w m2wl m3 m3w m3wl m4 m4w m5 m5w m6 m7 m8 m8w
#1 53.4 53.4 49.6 42 42 64.6 90.9 90.9 49.6 42 49.6 42 49.6 111.8 49.7 42.1
#0% repacement
xC <- rep_len(1, n)
x <- xC
y <- rep_len(0, n)
microbenchmark(list = fun, setup = x <- xC, times = 20)
#Unit: milliseconds
# expr min lq mean median uq max neval
# m1 64.13278 65.17428 76.80236 72.93639 74.11106 139.21633 20
# m1w 64.20926 64.36072 68.26227 65.65780 73.27032 81.91496 20
# m2 32.55641 32.73825 40.57034 33.73059 42.21539 106.96577 20
# m2w 32.08899 32.17692 37.02755 32.42990 33.41896 104.51287 20
# m2wl 33.21636 33.30782 40.70415 34.41295 42.56047 90.67323 20
# m3 143.45435 144.07389 145.11642 144.96065 145.85825 147.79313 20
# m3w 35.12864 35.40720 37.85706 35.58504 40.38525 44.57139 20
# m3wl 32.11977 32.22333 37.60191 32.42593 37.28842 97.82210 20
# m4 33.59010 33.79237 42.43258 34.75375 42.61118 140.07139 20
# m4w 33.18575 33.38007 40.68604 33.50068 42.04799 111.38275 20
# m5 33.69306 34.00844 36.43207 34.82090 36.07427 46.25004 20
# m5w 33.18643 33.28609 39.02025 34.24648 42.53507 71.25813 20
# m6 46.95918 47.91826 57.68163 55.85067 56.73664 139.46774 20
# m7 1012.54640 1101.79435 1136.96233 1143.16804 1169.20692 1315.41621 20
# m8 33.92067 34.15995 39.46636 35.18619 43.15650 57.13766 20
# m8w 34.33384 34.49190 38.55053 35.13948 43.33685 57.35593 20
memUse(list=fun, setup = quote(x <- xC), gctort = FALSE)
# m1 m1w m2 m2w m2wl m3 m3w m3wl m4 m4w m5 m5w m6 m7 m8 m8w
#1 45.8 45.8 26.7 22.9 30.5 56.2 22.9 22.9 34.3 30.5 34.3 30.5 49.6 110.3 34.5 30.6
#update join
n <- 1e5L
fun <- alist(m1 = x[!is.na(match(x, y))] <- -y[match(x, y)[!is.na(match(x, y))]]
, m1b = x[!is.na(match(x, y))] <- -y[na.omit(match(x, y))]
, m2 = {idx <- match(x, y); x[!is.na(idx)] <- -y[idx[!is.na(idx)]]; rm(idx)}
, m2b = {idx <- match(x, y); x[!is.na(idx)] <- -y[na.omit(idx)]; rm(idx)}
, m2c = {idx <- match(x, y); idxn <- !is.na(idx); x[idxn] <- -y[idx[idxn]]; rm(idx, idxn)}
, m2d = {idx <- match(x, y); idxn <- which(!is.na(idx)); x[idxn] <- -y[idx[idxn]]; rm(idx, idxn)}
, m3 = {for(i in seq_along(x)) {idx <- match(x[i], y); if(!is.na(idx)) {x[i] <- -y[idx]}}}
)
#50% repacement
xC <- rep_len(0:9, n)
x <- xC
y <- rep_len(4:0, n)
microbenchmark(list = fun, setup = x <- xC, times = 20)
#Unit: milliseconds
# expr min lq mean median uq max neval
# m1 5.099619 5.171017 5.312834 5.216994 5.283100 6.283539 20
# m1b 4.385434 4.445713 4.605092 4.483453 4.551095 5.875877 20
# m2 2.417861 2.452343 2.492067 2.480543 2.530377 2.660484 20
# m2b 2.926915 2.961653 3.084997 3.021751 3.077324 4.220686 20
# m2c 2.123245 2.195435 2.533441 2.278206 2.856874 3.800974 20
# m2d 2.103713 2.238050 2.674693 2.355113 3.362142 3.637493 20
# m3 9286.882351 9307.611081 9544.051457 9437.133414 9814.396820 9982.323049 20
memUse(list=fun, setup = quote(x <- xC), gctort = FALSE)
# m1 m1b m2 m2b m2c m2d m3
#1 9.9 8.9 5.6 6.7 4.8 4.3 57.5
#100% repacement
xC <- rep_len(0:9, n)
x <- xC
y <- rep_len(9:0, n)
microbenchmark(list = fun, setup = x <- xC, times = 20)
#Unit: milliseconds
# expr min lq mean median uq max neval
# m1 5.883064 5.915986 6.128063 5.995118 6.092747 7.247831 20
# m1b 4.194141 4.218877 4.386549 4.266843 4.332457 5.502114 20
# m2c 2.600136 2.645534 2.758381 2.662184 2.773307 3.999435 20
# m2d 2.692732 2.733519 2.875407 2.753938 2.768791 3.969404 20
# m2 2.904083 2.926344 3.008820 2.954213 3.036742 3.571559 20
# m2b 2.568076 2.587834 2.636594 2.627963 2.678503 2.758791 20
# m3 6618.263068 6635.364325 6907.177674 6779.890271 6923.410849 8135.443601 20
memUse(list=fun, setup = quote(x <- xC), gctort = FALSE)
# m1 m1b m2 m2b m2c m2d m3
#1 10.7 7.4 6.4 5.2 5.6 4.8 57.6
#0% repacement
xC <- rep_len(5:9, n)
x <- xC
y <- rep_len(4:0, n)
microbenchmark(list = fun, setup = x <- xC, times = 20)
#Unit: milliseconds
# expr min lq mean median uq max neval
# m1 4.175989 4.213025 4.319972 4.241362 4.379613 4.853646 20
# m1b 4.068414 4.111130 4.184327 4.162872 4.214517 4.393815 20
# m2 1.736049 1.775315 1.873898 1.849657 1.894493 2.286939 20
# m2b 2.766647 2.807789 2.871928 2.824930 2.902665 3.312260 20
# m2c 1.579129 1.637973 1.724936 1.704182 1.780902 2.028083 20
# m2d 1.507742 1.619949 1.683850 1.681409 1.747972 1.852088 20
# m3 10724.757111 12385.576878 12515.708573 12446.907482 12835.884597 13177.535579 20
memUse(list=fun, setup = quote(x <- xC), gctort = FALSE)
# m1 m1b m2 m2b m2c m2d m3
#1 8.7 8.5 4.5 6.4 3.7 3.3 57.4
version
#platform x86_64-pc-linux-gnu
#arch x86_64
#os linux-gnu
#system x86_64, linux-gnu
#status
#major 3
#minor 6.0
#year 2019
#month 04
#day 26
#svn rev 76424
#language R
#version.string R version 3.6.0 (2019-04-26)
#nickname Planting of a Tree