Создайте data.frame, как показано в исходном вопросе:
df <- structure(list(dage = c(31L, 38L, 32L, 28L, 35L, 33L), ded = c(5L,
5L, 1L, 4L, 4L, 4L), dht = c(65L, 70L, 99L, 99L, 99L, 98L), dwt = c(110L,
148L, 999L, 999L, 999L, 998L), marital = c(1L, 1L, 1L, 1L, 1L,
1L), inc = c(1L, 4L, 2L, 98L, 7L, 99L), smoke = c(0L, 0L, 1L,
3L, 0L, 0L), time = c(0L, 0L, 1L, 4L, 0L, 0L), number = c(0L,
0L, 1L, 2L, 0L, 0L)), row.names = c(NA, -6L), class = "data.frame")
data.table
решение:
library(data.table)
dt <- as.data.table(df)
dt[rowSums(df == 99)==0 & rowSums(df == 999)==0]
base
R решение:
df[!apply(df, 1, function(x) any(x %in% c(99,999))),]
dplyr
решение:
require(dplyr)
filter_all(df, all_vars(.!=99 & .!=999))
Тесты:
microbenchmark::microbenchmark(dt = dt[rowSums(df == 99)==0 & rowSums(df == 999)==0],
base = df[!apply(df, 1, function(x) any(x %in% c(99,999))),],
dplyr = filter_all(df, all_vars(.!=99 & .!=999)), times = 10000)
# Unit: microseconds
#expr min lq mean median uq max neval
#dt 588.000 645.801 701.4309 675.6005 723.2515 5203.801 10000
#base 264.601 296.901 324.2588 314.4005 335.7020 3435.600 10000
#dplyr 3671.400 3854.301 4036.3976 3915.3010 3983.0010 139226.802 10000