Другое базовое решение, которое должно быть довольно производительным:
#define columns to help automate
cols <- grep('^T', names(df))
Ts <- as.integer(substring(names(df)[cols], 2))
new_cols <- paste0('WTC', Ts[-1] - Ts[1])
# do the column difference calculation
df[, new_cols] <- abs(df[, cols[-1]] - df[, cols[-length(cols)]])
df[['MWTCC']] <- do.call(pmax, df[, new_cols])
df
data.table несколько сложнее с этим подходом из-за способа обработки нестандартной оценки. Вот эквивалент:
library(data.table)
dt <- as.data.table(df)
dt[, (new_cols) := abs(dt[, .SD, .SDcols = cols[-1]] - dt[, .SD, .SDcols = cols[-length(cols)]])]
dt[, MWTCC := do.call(pmax, .SD)]
# or perhaps this - I assume tidyverse would use purrr::map() in a similar fashion
dt[, (new_cols) := lapply(seq_len(length(cols) - 1),
function(i) {
abs(dt[[cols[i+1]]] - dt[[cols[i]]])
}
)]
dt[, MWTCC := do.call(pmax, .SD)]
edit: добавлены некоторые тайминги для справки:
set.seed(0L)
nr <- 1e6
df <- data.frame(T5=rnorm(nr), T15=rnorm(nr), T25=rnorm(nr), T35=rnorm(nr))
cols <- c("T5", "T15", "T25", "T35")
cols <- grep('^T', names(df))
Ts <- as.integer(substring(names(df)[cols], 2))
new_cols <- paste0('WTC', Ts[-1] - Ts[1])
library(data.table)
dt <- as.data.table(df)
mtd0 <- function() {
df[c("MWTCC", new_cols)] <- t(apply(df[cols], 1, function(x) {
vals <- abs(diff(x))
c(max(vals), vals)
}))
df
}
mtd2 <- function() {
# do the column difference calculation
df[, new_cols] <- abs(df[, cols[-1]] - df[, cols[-length(cols)]])
df[['MWTCC']] <- do.call(pmax, df[, new_cols])
df[, c(names(df)[cols], 'MWTCC', new_cols)]
}
mtd_DT <- function() {
dt[, (new_cols) := abs(dt[, .SD, .SDcols = cols[-1]] - dt[, .SD, .SDcols = cols[-length(cols)]])]
dt[, MWTCC := do.call(pmax, .SD)]
# or perhaps this - I assume tidyverse would use purrr::map() in a similar fashion
dt[, (new_cols) := lapply(seq_len(length(cols) - 1),
function(i) {
abs(dt[[cols[i+1]]] - dt[[cols[i]]])
}
)]
dt[, MWTCC := do.call(pmax, .SD)]
}
bench::mark(mtd0(), mtd2(), mtd_DT(), check=FALSE)
тайминги:
# A tibble: 3 x 13
expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time result memory time gc
<bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm> <list> <list> <list> <list>
1 mtd0() 14.1s 14.1s 0.0707 405.8MB 4.39 1 62 14.1s <df[,8] [1,000,000 x 8]> <df[,3] [46,873 x 3]> <bch:tm> <tibble [1 x 3]>
2 mtd2() 30.7ms 42.9ms 16.6 57.2MB 3.69 9 2 542.5ms <df[,8] [1,000,000 x 8]> <df[,3] [8 x 3]> <bch:tm> <tibble [9 x 3]>
3 mtd_DT() 170.4ms 172.6ms 4.85 215.7MB 6.47 3 4 618ms <df[,8] [1,000,000 x 8]> <df[,3] [588 x 3]> <bch:tm> <tibble [3 x 3]>