У меня есть такие данные, как это
data=data.table("School"=c(1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,1,1,1,1,1,1,0,1,0,1,0),
"Grade"=c(0,1,1,1,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,0,0,1,0),
"CAT"=c(1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1),
"FOX"=c(1,1,0,1,1,1,1,1,0,0,0,1,1,1,0,0,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0),
"DOG"=c(0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,1))
и wi sh, чтобы создать новую таблицу данных, например:
dataWANT=data.frame("VARIABLE"=c('CAT', 'CAT', 'CAT', 'FOX', 'FOX', 'FOX', 'DOG', 'DOG', 'DOG'),
"SCHOOL"=c(1, 1, 0, 1, 1, 0, 1, 1, 0),
"GRADE"=c(0, 1, 1, 0, 1, 1, 0, 1, 1),
"MEAN"=c(NA))
dataWANT принимает среднее значение для CAT и FOX и DOG от SCHOOL, GRADE и SCHOOL X GRADE, когда они равны 1.
Я знаю, как делать это по одному, но это не подходит для работы с большими данными.
data[, CAT1:=mean(CAT), by=list(SCHOOL)]
data[, FOX1:=mean(FOX), by=list(GRADE)]
data[, DOG1:=mean(DOG), by=list(SCHOOL, GRADE)]
data$CAT2 = unique(data[SCHOOL==1, CAT1])
data$FOX2 = unique(data[GRADE==1, FOX1])
data$DOG2 = unique(data[SCHOOL==1 & GRADE==1, DOG1])
Пожалуйста, используйте только это:
data=data.table("SCHOOL"=c(1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,1,1,1,1,1,1,0,1,0,1,0),
"GRADE"=c(0,1,1,1,0,0,0,1,1,1,0,1,1,0,0,1,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,0,0,0,1,0),
"CAT"=c(1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1),
"FOX"=c(1,0,0,1,1,1,1,1,0,0,0,1,1,1,0,0,1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,1,0,0,1,0),
"DOG"=c(0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,1,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,1))
data[, CAT1:=mean(CAT), by=list(SCHOOL)]
data[, CAT2:=mean(CAT), by=list(GRADE)]
data[, CAT3:=mean(CAT), by=list(SCHOOL, GRADE)]
data[, FOX1:=mean(FOX), by=list(SCHOOL)]
data[, FOX2:=mean(FOX), by=list(GRADE)]
data[, FOX3:=mean(FOX), by=list(SCHOOL, GRADE)]
data[, DOG1:=mean(DOG), by=list(SCHOOL)]
data[, DOG2:=mean(DOG), by=list(GRADE)]
data[, DOG3:=mean(DOG), by=list(SCHOOL, GRADE)]
dataWANT=data.frame("VARIABLE"=c('CAT','CAT','CAT','FOX','FOX','FOX','DOG','DOG','DOG'),
"TYPE"=c(1,2,3,1,2,3,1,2,3),
"MEAN"=c(0.48,0.44,0.428,0.6,0.611,0.6428,0.52,0.61,0.6428))
где ТИП равен 1, когда СРЕДНИЙ по оценке ШКОЛЫ,
ТИП равен 2, когда СРЕДНИЙ оценивается по GRADE,
TYPE равно 3, когда MEAN оценивается SCHOOL и GRADE