Вот один метод с gather
, мы создаем новый столбец от 'Y' до summarise
, вывод от 'x3' до 'x6'
library(tidyverse)
df %>%
mutate(new = Y) %>%
gather(Variable, val, -new) %>%
group_by(Variable) %>%
summarise(x1 = mean(is.na(val)),
x2 = sum(is.na(val)),
x3 = mean(is.na(val[new == 1])),
x4 = sum(is.na(val[new == 1])),
x5 = mean(is.na(val[new == 0])),
x6 = sum(is.na(val[new == 0])))
# A tibble: 5 x 7
# Variable x1 x2 x3 x4 x5 x6
# <chr> <dbl> <int> <dbl> <int> <dbl> <int>
#1 a 0.6 3 0.667 2 0.5 1
#2 b 0.6 3 0.667 2 0.5 1
#3 c 0 0 0 0 0 0
#4 d 0.4 2 0.333 1 0.5 1
#5 Y 0 0 0 0 0 0
Или используемdata.table
до melt
и dcast
library(data.table)
dM <- melt(setDT(df)[, new := Y], id.var = c('new'))[, value1 := is.na(value)]
dM[, .(x1 = sum(value1), x2 = mean(value1)), variable][dcast(dM,
variable ~ new, value.var = 'value1', c(mean, sum)), on = .(variable)]
данные
df <- structure(list(a = c(NA, NA, 1L, NA, 6L), b = c(NA, 2L, 0L, NA,
NA), c = c(8L, 5L, 7L, 7L, 2L), d = c(3L, 0L, NA, 1L, NA), Y = c(1L,
1L, 0L, 0L, 1L)), class = "data.frame", row.names = c("0", "1",
"2", "3", "4"))