В настоящее время метод @GKi является самым быстрым и использует минимальную память .
Данные:
dat <- data.frame(species = c("Spruce", "Spruce", "Oak", "Oak", "Fir")
, diameter = c(4, 30, 4, 30, 30)
, height = c(30, 100, 30, 100, 100))
set.seed(0)
size <- 1e5
dat2 <- data.frame(
species = sample(c("Spruce", "Oak", "Fir"), size=size, replace = TRUE)
, diameter = runif(size, 1, 50)
, height = runif(size, 1, 100))
Методы:
funRobinson <- function(species, diameter, height) {
bf_params <- data.frame(species = c("Spruce", "Oak"),
b0_small = c(26.729, 29.790),
b1_small = c( 0.01189, 0.00997),
b0_large = c(32.516, 85.150),
b1_large = c( 0.01181, 0.00841))
dimensions <- data.frame(diameter = diameter,
height = height,
species = as.character(species),
this_order = 1:length(species))
dimensions <- merge(y=dimensions, x=bf_params, all.y=TRUE, all.x=FALSE)
dimensions <- dimensions[order(dimensions$this_order, decreasing=FALSE),]
b0 <- with(dimensions, ifelse(diameter <= 20.5, b0_small, b0_large))
b1 <- with(dimensions, ifelse(diameter <= 20.5, b1_small, b1_large))
b0 + b1 * dimensions$diameter^2 * dimensions$height
}
with(dat, funRobinson(species, diameter, height))
funStupidWolf <- function(species, diameter, height) {
species_avail=c("Spruce", "Oak")
params_b0 = cbind(b0_small = c(26.729, 29.790),
b0_large = c(32.516, 85.150))
rownames(params_b0) = species_avail
params_b1 = cbind(b1_small = c( 0.01189, 0.00997),
b1_large = c( 0.01181, 0.00841))
rownames(params_b1) = species_avail
ROWS = match(species,species_avail)
COLS = +(diameter > 20.5) + 1
idx = cbind(ROWS,COLS)
b0 <- params_b0[idx]
b1 <- params_b1[idx]
b0 + b1 * diameter^2 * height
}
with(dat, funStupidWolf(species, diameter, height))
funGKiCl <- function(params, speciesLevels) {
force(params)
force(speciesLevels)
nSpecies <- length(speciesLevels)
i <- match(speciesLevels, params$species)
params_b0 <- c(params$b0_small[i], params$b0_large[i])
params_b1 <- c(params$b1_small[i], params$b1_large[i])
rm(i, params, speciesLevels)
function(species, diameter, height) {
i <- unclass(species) + (diameter > 20.5) * nSpecies
params_b0[i] + params_b1[i] * diameter * diameter * height
}
}
params <- read.table(header = TRUE, text = "
species b0_small b1_small b0_large b1_large
Spruce 26.729 0.01189 32.516 0.01181
Oak 29.790 0.00997 85.150 0.00841")
funGKi <- compiler::cmpfun(funGKiCl(params, levels(dat$species)))
with(dat, funGKi(species, diameter, height))
rm(funGKiCl, params)
fun <- alist(Robinson = funRobinson(species, diameter, height)
, StupidWolf = funStupidWolf(species, diameter, height)
, GKi = funGKi(species, diameter, height))
Время:
library(microbenchmark)
attach(dat)
microbenchmark(list = fun, check = "equal")
#Unit: microseconds
# expr min lq mean median uq max neval
# Robinson 1876.491 1911.583 1997.00924 1934.8835 1962.3145 3131.453 100
# StupidWolf 15.618 17.371 22.30764 18.9995 26.5125 33.239 100
# GKi 2.270 2.965 4.04041 3.6825 5.0415 7.434 100
microbenchmark(list = fun, check = "equal", control=list(order="block"))
#Unit: microseconds
# expr min lq mean median uq max neval
# Robinson 1887.906 1918.0475 2000.55586 1938.847 1962.9540 3131.112 100
# StupidWolf 15.184 16.2775 16.97111 16.668 17.2230 34.646 100
# GKi 2.063 2.1560 2.37552 2.255 2.4015 5.616 100
attach(dat2)
microbenchmark(list = fun, setup = gc(), check = "equal")
#Unit: milliseconds
# expr min lq mean median uq max neval
# Robinson 189.342408 193.222831 193.682868 193.573419 194.181910 198.231698 100
# StupidWolf 6.755601 6.786439 6.836253 6.804451 6.832409 7.370937 100
# GKi 1.756241 1.767335 1.794328 1.782949 1.806370 1.964409 100
library(bench)
attach(dat)
mark(exprs = fun, iterations = 100)
# expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time
# <bch:expr> <bch:t> <bch:t> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm>
#1 Robinson 1.87ms 1.9ms 521. 0B 10.6 98 2 188.1ms
#2 StupidWolf 16.48µs 17.46µs 55666. 0B 0 100 0 1.8ms
#3 GKi 2.67µs 2.86µs 337265. 0B 0 100 0 296.5µs
attach(dat2)
mark(exprs = fun, iterations = 100)
# expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc
# <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl>
#1 Robinson 188.96ms 216.15ms 4.44 67.71MB 11.4 100 257
#2 StupidWolf 6.79ms 6.85ms 131. 8.77MB 30.0 100 23
#3 GKi 1.7ms 1.72ms 552. 2.29MB 22.1 100 4
#Some expressions had a GC in every iteration; so filtering is disabled.
Память:
memUse <- function(list, setup = "", gctort = FALSE) {
as.data.frame(lapply(list, function(z) {
eval(setup)
tt <- sum(.Internal(gc(FALSE, TRUE, TRUE))[13:14])
gctorture(on = gctort)
eval(z)
gctorture(on = FALSE)
sum(.Internal(gc(FALSE, FALSE, TRUE))[13:14]) - tt
}))
}
attach(dat)
memUse(list=fun, gctort = FALSE)
# Robinson StupidWolf GKi
#1 0.9 0 0
memUse(list=fun, gctort = TRUE)
# Robinson StupidWolf GKi
#1 0 0 0
attach(dat2)
memUse(list=fun, gctort = FALSE)
# Robinson StupidWolf GKi
#1 71.9 8.8 2.3
memUse(list=fun, gctort = TRUE)
# Robinson StupidWolf GKi
#1 29.7 6.5 2.3
object.size(funRobinson)
#109784 bytes
object.size(funStupidWolf)
#68240 bytes
object.size(funGKi)
#21976 bytes