Общее ускорение: 2,3x
Используя ifelse()
вместо if_else()
, я мог бы ускорить его на фактор ~ 1.6x .
library(microbenchmark)
library(dplyr)
microbenchmark(
setup = { d <- make_d() },
dplyr_mutate_ifelse =
{
d <- d %>%
mutate(
col_5 = ifelse(col_1 == 0, 0L, col_5),
col_6 = ifelse(col_1 == 0, 0L, col_6),
col_7 = ifelse(col_1 == 0, 0L, col_7),
col_8 = ifelse(col_1 == 0, 0L, col_8),
col_9 = ifelse(col_1 == 0, 0L, col_9),
col_2 = ifelse(col_1 == 3, 0L, col_2),
col_3 = ifelse(col_1 == 3, 0L, col_3),
col_4 = ifelse(col_1 == 3, 0L, col_4),
col_5 = ifelse(col_1 == 3, 0L, col_5),
col_6 = ifelse(col_1 == 3, 0L, col_6),
col_7 = ifelse(col_1 == 3, 0L, col_7),
col_8 = ifelse(col_1 == 3, 0L, col_8),
col_9 = ifelse(col_1 == 3, 0L, col_9),
col_7 = ifelse(col_1 == 2, 0L, col_7),
col_9 = ifelse(col_1 == 2, 0L, col_9)
)
},
times = 10
)
## Unit: milliseconds
## expr min lq mean median uq max neval
## dplyr_mutate 370.8031 375.8326 496.1825 481.8754 555.9229 762.9057 10
## dplyr_mutate_ifelse 226.3609 294.5468 317.6726 331.6935 356.0460 364.1252 10
Изменение каждого столбца только один раз привело к ускорению ~ 1.3x .
library(microbenchmark)
library(dplyr)
microbenchmark(
setup = { d <- make_d() },
dplyr_mutate_ifelse2 =
{
d <-
d %>%
mutate(
col_2 = ifelse(col_1 == 3, 0L, col_2),
col_3 = ifelse(col_1 == 3, 0L, col_3),
col_4 = ifelse(col_1 == 3, 0L, col_4),
col_5 = ifelse(col_1 == 3 | col_1 == 0, 0L, col_5),
col_6 = ifelse(col_1 == 3 | col_1 == 0, 0L, col_6),
col_7 = ifelse(col_1 == 3 | col_1 == 0 | col_1 == 2, 0L, col_7),
col_8 = ifelse(col_1 == 3, 0L, col_8),
col_9 = ifelse(col_1 == 3 | col_1 == 0 | col_1 == 2, 0L, col_9)
)
},
times = 10
)
## Unit: milliseconds
## expr min lq mean median uq max neval
## dplyr_mutate 343.0100 420.2813 466.6023 470.1078 541.2145 549.5641 10
## dplyr_mutate_ifelse 216.8928 240.0308 350.4044 338.7416 480.7032 494.0995 10
## dplyr_mutate_ifelse2 156.2432 159.2615 238.6914 265.6903 300.9932 312.6007 10
Моя последняя идея состояла в том, чтобы вычислять каждый логический вектор только один раз, обеспечивая еще одно ~ 1.4x ускорение.
library(microbenchmark)
library(dplyr)
microbenchmark(
setup = { d <- make_d() },
dplyr_mutate_ifelse3 =
{
iffer_1 <- d$col_1 == 3
iffer_2 <- iffer_1 | d$col_1 == 0
iffer_3 <- iffer_2 | d$col_1 == 2
d <-
d %>%
mutate(
col_2 = ifelse(iffer_1, 0L, col_2),
col_3 = ifelse(iffer_1, 0L, col_3),
col_4 = ifelse(iffer_1, 0L, col_4),
col_5 = ifelse(iffer_2, 0L, col_5),
col_6 = ifelse(iffer_2, 0L, col_6),
col_7 = ifelse(iffer_3, 0L, col_7),
col_8 = ifelse(iffer_1, 0L, col_8),
col_9 = ifelse(iffer_3, 0L, col_9)
)
},
times = 10
)
## Unit: milliseconds
## expr min lq mean median uq max neval
## dplyr_mutate 393.9980 415.1171 489.2011 439.3474 538.9772 754.3425 10
## dplyr_mutate_ifelse 245.5530 341.7405 372.2182 360.2816 374.5953 505.7168 10
## dplyr_mutate_ifelse2 154.9945 168.6646 235.9066 271.3282 290.0135 299.2681 10
## dplyr_mutate_ifelse3 120.1260 122.4131 221.2445 188.9764 252.7045 590.2163 10