Я пытаюсь создать модель glm
с использованием caret
в r
с использованием медицинских данных из CDC. Однако всякий раз, когда я пытаюсь обучить модель с помощью команды train()
в caret
, я получаю следующую ошибку:
Error in `[.default`(y, , "time") : incorrect number of dimensions
Ниже мой код:
#download data
download.file(url = "ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/dataset_documentation/nhamcs/stata/ed2014-stata.zip",destfile = "ed2014-stata.zip")
unzip("ed2014-stata.zip")
library(haven)
nhamcs2014 <- read_dta("ed2014-stata.dta")
dim(nhamcs2014)
#isolate variables of interest
keep2014<- c("SEX","IMMEDR","SEEN72","CANCER","ETOHAB","ALZHD","ASTHMA","CEBVD","CKD","COPD","CHF","CAD","DEPRN",
"DIABTYP1","DIABTYP2","DIABTYP0","ESRD","HPE","EDHIV","HYPLIPID","HTN","OBESITY","OSA","OSTPRSIS",
"SUBSTAB")
new.nhamcs2014 <- nhamcs2014[keep2014]
#remove missing data
e=new.nhamcs2014$IMMEDR==-9
e.clean.nhamcs2014<- new.nhamcs2014[!e,]
f=e.clean.nhamcs2014$IMMEDR==-8
f.clean.nhamcs2014<- e.clean.nhamcs2014[!f,]
g=f.clean.nhamcs2014$SEEN72==-9
g.clean.nhamcs2014 <- f.clean.nhamcs2014[!g,]
h=g.clean.nhamcs2014$SEEN72==-8
h.clean.nhamcs2014 <- g.clean.nhamcs2014[!h,]
i <- h.clean.nhamcs2014$IMMEDR==7
i.clean.nhamcs2014 <- h.clean.nhamcs2014[!i,]
#Convert response variable (IMMEDR) to binomial variable
i.clean.nhamcs2014$IMMEDR[i.clean.nhamcs2014$IMMEDR==3] <- 0
i.clean.nhamcs2014$IMMEDR[i.clean.nhamcs2014$IMMEDR==2] <- 0
i.clean.nhamcs2014$IMMEDR[i.clean.nhamcs2014$IMMEDR==1] <- 0
i.clean.nhamcs2014$IMMEDR[i.clean.nhamcs2014$IMMEDR==5] <- 1
i.clean.nhamcs2014$IMMEDR[i.clean.nhamcs2014$IMMEDR==4] <- 1
#clean data
i.clean.nhamcs2014$SEX[i.clean.nhamcs2014$SEX==1] <- 0
i.clean.nhamcs2014$SEX[i.clean.nhamcs2014$SEX==2] <- 1
i.clean.nhamcs2014$SEEN72[i.clean.nhamcs2014$SEEN72==1] <- 0
i.clean.nhamcs2014$SEEN72[i.clean.nhamcs2014$SEEN72==2] <- 1
View(i.clean.nhamcs2014)
sum(is.na(i.clean.nhamcs2014))
#create glm model using caret
library(caret)
set.seed(1)
inTrain<-createDataPartition(i.clean.nhamcs2014$IMMEDR, p=.75, list = FALSE)
train.nhamcs2014 <- i.clean.nhamcs2014[inTrain,]
test.nhamcs2014 <- i.clean.nhamcs2014[-inTrain,]
control <- trainControl(method = "cv", number = 5, summaryFunction = twoClassSummary,
classProbs = TRUE, verboseIter = TRUE, returnResamp = "final")
model.glm <- train(IMMEDR~.,method = "glm", family = binomial(), metric = "ROC",
maximize = TRUE, data = train.nhamcs2014, trControl = control)
Error in `[.default`(y, , "time") : incorrect number of dimensions
Любой вклад будет принята с благодарностью!