Я хотел бы смоделировать мощность для многоуровневого мета-анализа со случайными эффектами с помощью теста на модерацию. Метаанализ будет охватывать 16 величин эффекта от 2 исследований (каждое исследование имеет 8 величин эффекта) и будет проверять, модерируются ли результаты 8 предикторами. Размеры эффекта являются логарифмическими коэффициентами, поскольку результат является двоичным.
Я пытался сделать это, имитируя данные для 2 исследований (на основе ожидаемых величин эффекта), запуская многовариантные регрессионные модели c для каждого исследования, а затем объединяя их в метаанализе и тестировании на модерацию. , Мой код для этого ниже. Тем не менее, я хотел бы указать большую дисперсию между исследованиями, чем оценивается моими текущими симуляциями.
Как бы я go объяснил гетерогенность между исследованиями (а также гетерогенность, обусловленную предиктором) для моделировать мощность для многоуровневого мета-анализа со случайными эффектами с помощью теста на модерацию?
library(MASS) #for mvrnorm function to make multivariate normal distributed vars
library(stats) #for binom function to make binary variables
library(metafor) #for meta-analysis
#-------------------------------------------------------------------------
# Specify parameters
#-------------------------------------------------------------------------
set.seed(666)
mynSims <- 30 # Set number of simulated datasets (here 30 as a quick example)
ptable=matrix(rep(NA,(mynSims*5)),nrow=mynSims) #initialising a results matrix that will hold p values in each run
colnames(ptable)<- c("run", "mod_p", "mod_sig", "total_het", "betweenStudy_het") # Label column names
head(ptable)
nPredVar = 8 #number of simulated predictor variables
PredM <- 0 # Mean score for all predictor variables in the sample
PredVar <- 1 #Variance of all predictor variables in the sample
PredCorr <- 0.05 #correlation between predictor variables
PredCov<-matrix(rep(PredCorr,nPredVar*nPredVar),nrow=nPredVar) #Covariance matrix
diag(PredCov)<-rep(1,nPredVar) #Fix matrix so that the diagonal values correspond to variance of each variable
PredCov
#-------------------------------------------------------------------------
# Run loop to simulate data and run regression model for each simulated dataset
#-------------------------------------------------------------------------
## Start loop (for each simulation)
for (i in 1:mynSims) {
#### Study 1 (n=11000)
## Simulate 8 continuous predictors (z-score) that are correlated r=0.05
data_test = as.data.frame(mvrnorm(11000, rep(PredM,nPredVar), PredCov))
## Simulate binary outcome associated with continuous predictors
z = 0.1 + 0.025*data_test$V1 + 0.03*data_test$V2 + 0.06*data_test$V3 + 0.06*data_test$V4 + 0.00*data_test$V5 + 0.15*data_test$V6 + 0.18*data_test$V7 + 0.227*data_test$V8 # linear combination with a bias
pr = 1/(1+exp(-z)) # pass through an inv-logit function
y = rbinom(n=11000, size=1, prob=pr) # bernoulli response variable (size= number of trials)
## Combine continuous predictors and binary outcome variable into one dataframe
study_1 <- data.frame(cbind(data_test, y))
head(study_1)
## Run multivariate logistic regression
study1_multi = glm(y ~ V1 + V2 + V3 + V4 + V5 + V6 + V7 + V8, data=study_1, family="binomial"(link="logit"))
mysummary = summary(study1_multi)
# Extract results (coefficient and SE for each predictor in multivariate logistic regression model)
study1_V1est <- mysummary$coefficients[2,1]
study1_V1se <- mysummary$coefficients[2,2]
study1_V2est <- mysummary$coefficients[3,1]
study1_V2se <- mysummary$coefficients[3,2]
study1_V3est <- mysummary$coefficients[4,1]
study1_V3se <- mysummary$coefficients[4,2]
study1_V4est <- mysummary$coefficients[5,1]
study1_V4se <- mysummary$coefficients[5,2]
study1_V5est <- mysummary$coefficients[6,1]
study1_V5se <- mysummary$coefficients[6,2]
study1_V6est <- mysummary$coefficients[7,1]
study1_V6se <- mysummary$coefficients[7,2]
study1_V7est <- mysummary$coefficients[8,1]
study1_V7se <- mysummary$coefficients[8,2]
study1_V8est <- mysummary$coefficients[9,1]
study1_V8se <- mysummary$coefficients[9,2]
#### Study 2 (n=7000)
## Simulate 8 continuous predictors (z-score) that are correlated r=0.05
data_test = as.data.frame(mvrnorm(7000, rep(PredM,nPredVar), PredCov))
## Simulate binary outcome associated with continuous predictors (probability of 20%)
z = 0.2 + 0.035*data_test$V1 + 0.015*data_test$V2 + 0.20*data_test$V3 + 0.05*data_test$V4 + 0.15*data_test$V5 + 0.17*data_test$V6 + 0.02*data_test$V7 + 0.45*data_test$V8 # linear combination with a bias
pr = 1/(1+exp(-z)) # pass through an inv-logit function
y = rbinom(n=7000, size=1, prob=pr) # bernoulli response variable (size= number of trials)
## Combine continuous predictors and binary outcome variable into one dataframe
study_2 <- data.frame(cbind(data_test, y))
head(study_2)
## Run multivariate logistic regression
study2_multi = glm(y ~ V1 + V2 + V3 + V4 + V5 + V6 + V7 + V8, data=study_2, family="binomial"(link="logit"))
mysummary = summary(study2_multi)
# Extract results
study2_V1est <- mysummary$coefficients[2,1]
study2_V1se <- mysummary$coefficients[2,2]
study2_V2est <- mysummary$coefficients[3,1]
study2_V2se <- mysummary$coefficients[3,2]
study2_V3est <- mysummary$coefficients[4,1]
study2_V3se <- mysummary$coefficients[4,2]
study2_V4est <- mysummary$coefficients[5,1]
study2_V4se <- mysummary$coefficients[5,2]
study2_V5est <- mysummary$coefficients[6,1]
study2_V5se <- mysummary$coefficients[6,2]
study2_V6est <- mysummary$coefficients[7,1]
study2_V6se <- mysummary$coefficients[7,2]
study2_V7est <- mysummary$coefficients[8,1]
study2_V7se <- mysummary$coefficients[8,2]
study2_V8est <- mysummary$coefficients[9,1]
study2_V8se <- mysummary$coefficients[9,2]
#-------------------------------------------------------------------------
# Within loop, combine results from both studies into a meta-analysis and extract results
#-------------------------------------------------------------------------
studyid <- c(rep("study1", 8), rep("study2", 8))
predictor <- c("P1", "P2", "P3", "P4", "P5", "P6", "P7", "P8","P1", "P2", "P3", "P4", "P5", "P6", "P7", "P8")
est <- c(study2_V1est, study2_V2est, study2_V3est, study2_V4est, study2_V5est, study2_V6est, study2_V7est, study2_V8est,
study2_V1est, study2_V2est, study2_V3est, study2_V4est, study2_V5est, study2_V6est, study2_V7est, study2_V8est)
se <- c(study2_V1se, study2_V2se, study2_V3se, study2_V4se, study2_V5se, study2_V6se, study2_V7se, study2_V8se,
study2_V1se, study2_V2se, study2_V3se, study2_V4se, study2_V5se, study2_V6se, study2_V7se, study2_V8se)
es_id <- c(1:16)
df <- data.frame(studyid, predictor, est, se, es_id)
df$variance <- df$se^2
###### Random-effects multi-level meta-analysis of both studies
meta_analysis_normal <- rma.mv(est, variance, random = list(~ 1 | predictor, ~ 1 | studyid), tdist=TRUE, data=df)
# Calculate total heterogeneity
W <- diag(1/df$variance)
X <- model.matrix(meta_analysis_normal)
P <- W - W %*% X %*% solve(t(X) %*% W %*% X) %*% t(X) %*% W
total_het <- 100 * sum(meta_analysis_normal$sigma2) / (sum(meta_analysis_normal$sigma2) + (meta_analysis_normal$k-meta_analysis_normal$p)/sum(diag(P)))
# Break down variance into that due to differences between cohorts and that due to differences in predictors
prop_het <- 100 * meta_analysis_normal$sigma2 / (sum(meta_analysis_normal$sigma2) + (meta_analysis_normal$k-meta_analysis_normal$p)/sum(diag(P)))
btwnStudy_het <- prop_het[2] # Proportion of variance due to between-study variance
###### Random-effects multi-level meta-analysis with a test for moderation
meta_analysis_mod <- rma.mv(est, variance, random = list(~ 1 | predictor, ~ 1 | studyid), tdist=TRUE, data=df, mods=~factor(predictor))
mysummary <- summary(meta_analysis_mod)
###### Extract results
ptable[i,1]<-1 # stores the 'run' (i.e., the simulated dataset)
ptable[i,2] <-mysummary$QMp # store the p-value of the moderation test
if(ptable[i,2]>0.05) {ptable[i,3]<-0} else {ptable[i,3]<-1} # code to 1 if Moderation test is significant
ptable[i,4] <- total_het # store the total heterogeneity estimate (I2)
ptable[i,5] <- btwnStudy_het # store the proportion of heterogeneity due to between-study variance
}
head(ptable,20)
percent05 <- 100*(sum(ptable[,3])/mynSims)
percent05 # assess power to detect significant moderation effect
ptable=as.data.frame(ptable)
mean(ptable$total_het) # assess total heterogeneity
mean(ptable$betweenStudy_het) # assess proportion of heterogeneity due to between-study variance