Используя только base R
, можно будет циклически перебирать строки, извлекать элементы 2 или 7, получать mean
логического вектора ('i1') и mean
подмножества значения
df[c("probcount_2", "average_27")] <- t(apply(df[-(1:2)], 1,
function(x) {
x1 <- x[x %in% c(2, 7)]
i1 <- x1 == 2
c(if(length(i1) == 0) 0.5 else mean(i1),
if(length(i1) ==0) 4.5 else mean(x1))
}))
df
# id t value_1 value_2 value_3 value_4 value_5 value_6 value_7 value_8 value_9 probcount_2 average_27
#1 1 1 NA NA NA NA NA NA NA NA NA 0.5000000 4.500000
#2 1 2 2 NA NA NA NA NA NA NA NA 1.0000000 2.000000
#3 1 2 2 2 NA NA NA NA NA NA NA 1.0000000 2.000000
#4 1 3 2 2 5 NA NA NA NA NA NA 1.0000000 2.000000
#5 1 4 2 2 5 2 NA NA NA NA NA 1.0000000 2.000000
#6 1 5 2 2 5 2 7 NA NA NA NA 0.7500000 3.250000
#7 1 6 2 2 5 2 7 7 NA NA NA 0.6000000 4.000000
#8 1 7 2 2 5 2 7 7 2 NA NA 0.6666667 3.666667
#9 1 8 2 2 5 2 7 7 2 2 NA 0.7142857 3.428571
#10 1 9 2 2 5 2 7 7 2 2 2 0.7500000 3.250000
#11 2 0 NA NA NA NA NA NA NA NA NA 0.5000000 4.500000
#12 2 1 5 NA NA NA NA NA NA NA NA 0.5000000 4.500000
#13 2 2 5 2 NA NA NA NA NA NA NA 1.0000000 2.000000
#14 2 3 5 2 2 NA NA NA NA NA NA 1.0000000 2.000000
#15 2 4 5 2 2 2 NA NA NA NA NA 1.0000000 2.000000
#16 2 5 5 2 2 2 7 NA NA NA NA 0.7500000 3.250000
#17 2 6 5 2 2 2 7 7 NA NA NA 0.6000000 4.000000
#18 2 7 5 2 2 2 7 7 2 NA NA 0.6666667 3.666667
#19 2 8 5 2 2 2 7 7 2 2 NA 0.7142857 3.428571
#20 2 9 5 2 2 2 7 7 2 2 2 0.7500000 3.250000
Или мы можем векторизовать с rowMeans
m1 <- replace(as.matrix(df[-(1:2)]), !as.matrix(df[-(1:2)]) %in% c(2, 7), NA)
df$probcount_2 <- rowMeans(m1 == 2, na.rm = TRUE)
df$average_27 <- rowMeans(m1, na.rm = TRUE)
i1 <- df$t <= 1
df[i1, c("probcount_2", "average_27")] <- list(0.5, 4.5)
данные
df <- structure(list(id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), t = c(1L, 2L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L
), value_1 = c(NA, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, NA, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), value_2 = c(NA, NA, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, NA, NA, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), value_3 = c(NA, NA, NA, 5L, 5L, 5L, 5L, 5L, 5L, 5L, NA, NA,
NA, 2L, 2L, 2L, 2L, 2L, 2L, 2L), value_4 = c(NA, NA, NA, NA,
2L, 2L, 2L, 2L, 2L, 2L, NA, NA, NA, NA, 2L, 2L, 2L, 2L, 2L, 2L
), value_5 = c(NA, NA, NA, NA, NA, 7L, 7L, 7L, 7L, 7L, NA, NA,
NA, NA, NA, 7L, 7L, 7L, 7L, 7L), value_6 = c(NA, NA, NA, NA,
NA, NA, 7L, 7L, 7L, 7L, NA, NA, NA, NA, NA, NA, 7L, 7L, 7L, 7L
), value_7 = c(NA, NA, NA, NA, NA, NA, NA, 2L, 2L, 2L, NA, NA,
NA, NA, NA, NA, NA, 2L, 2L, 2L), value_8 = c(NA, NA, NA, NA,
NA, NA, NA, NA, 2L, 2L, NA, NA, NA, NA, NA, NA, NA, NA, 2L, 2L
), value_9 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 2, NA, NA,
NA, NA, NA, NA, NA, NA, NA, 2)), row.names = c(NA, -20L), class = "data.frame")