Вменение с помощью MICE в млр - PullRequest
0 голосов
/ 16 июня 2020
• 1000 1001 *
Error in `[.data.frame`(data, ind) : undefined columns selected

Я не знаю, почему и откуда это. Это код, который я написал:

library(survival)
#> Warning: package 'survival' was built under R version 3.6.3
library(mlr)
#> Warning: package 'mlr' was built under R version 3.6.3
#> Loading required package: ParamHelpers
#> Warning: package 'ParamHelpers' was built under R version 3.6.3
#> 'mlr' is in maintenance mode since July 2019. Future development
#> efforts will go into its successor 'mlr3' (<https://mlr3.mlr-org.com>).
library(lattice)
#> Warning: package 'lattice' was built under R version 3.6.3
library(mice)
#> Warning: package 'mice' was built under R version 3.6.3
#> 
#> Attaching package: 'mice'
#> The following objects are masked from 'package:base':
#> 
#>     cbind, rbind

data(pbc)
task_id = "PBC"
pbc[pbc$status == 2, "status"] = 1
pbc.task <- makeSurvTask(id = task_id, data = pbc, target = c("time", "status"))
outer = makeResampleDesc("CV", iters=2, stratify=TRUE)                              # Tuning: 5-fold CV, no repeats

imputeMice = function() {
  makeImputeMethod(
    learn = function(data, target, col) {
      return(list(values = data))
    },
    impute = function(data, target, col, values) {
      data = as.data.frame(data)
      excl = names(data)[ sapply(data, is.factor) ]
      predmat = mice::quickpred(data, minpuc=0, mincor=0, exclude=excl)
      imp_data = mice::mice(data, pred=predmat, seed = 23109, printFlag=FALSE)
      x = mice::complete(imp_data)
      print("Imputation completed")
      return(x)
    }
  )
}

lrn = makeFilterWrapper(
  makeLearner(cl="surv.coxph", id = "cox.filt", predict.type="response"), 
  fw.method="univariate.model.score",
  fw.perc=0.1,
  cache=TRUE
)
lrn = makeImputeWrapper(lrn, classes = list(numeric = imputeMice(), integer = imputeMice(), factor = imputeMice()))

res = resample(learner = lrn, task = pbc.task, resampling = outer, models = TRUE,
               measures = list(cindex), show.info = TRUE, extract = getFilteredFeatures)
#> Resampling: cross-validation
#> Measures:             cindex
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> Error in `[.data.frame`(data, ind): undefined columns selected

Создано 16.06.2020 с помощью пакета реплекс (v0.3.0)

Очевидно, что функция imputeMice () вызывается для каждого столбца data.frame pb c. Но при использовании мышей нам нужно вызвать эту функцию только один раз, и она выполняет вменение в каждом столбце. Возможно ли такое в млр?

1 Ответ

0 голосов
/ 18 июня 2020

Ошибка была моя - я должен был вызвать мышей в функции обучения, а не в функции вменения. Я считаю, что названия этих функций сбивают с толку. Мой новый код ниже, и он работает. Но он вызывает мышей в каждом столбце. Мне действительно нужно позвонить только один раз. Возможно ли это?

library(survival)
#> Warning: package 'survival' was built under R version 3.6.3
library(mlr)
#> Warning: package 'mlr' was built under R version 3.6.3
#> Loading required package: ParamHelpers
#> Warning: package 'ParamHelpers' was built under R version 3.6.3
#> 'mlr' is in maintenance mode since July 2019. Future development
#> efforts will go into its successor 'mlr3' (<https://mlr3.mlr-org.com>).
library(lattice)
#> Warning: package 'lattice' was built under R version 3.6.3
library(mice)
#> Warning: package 'mice' was built under R version 3.6.3
#> 
#> Attaching package: 'mice'
#> The following objects are masked from 'package:base':
#> 
#>     cbind, rbind

data(pbc)
task_id = "PBC"
pbc[pbc$status == 2, "status"] = 1
pbc.task <- makeSurvTask(id = task_id, data = pbc, target = c("time", "status"))
outer = makeResampleDesc("CV", iters=2, stratify=TRUE)                              # Tuning: 5-fold CV, no repeats

imputeMice = function() {
  makeImputeMethod(
    learn = function(data, target, col) {
      data = as.data.frame(data)
      excl = names(data)[ sapply(data, is.factor) ]
      predmat = mice::quickpred(data, minpuc=0, mincor=0, exclude=excl)
      imp_data = mice::mice(data, pred=predmat, seed = 23109, printFlag=FALSE)
      x = mice::complete(imp_data)
      return(list(values = x[[col]]))
    },
    impute = function(data, target, col, values) {
      data[[col]] = values
      return(data[[col]])
    }
  )
}

lrn = makeFilterWrapper(
  makeLearner(cl="surv.coxph", id = "cox.filt", predict.type="response"), 
  fw.method="univariate.model.score",
  fw.perc=0.1,
  cache=TRUE
)
lrn = makeImputeWrapper(lrn, classes = list(numeric = imputeMice(), integer = imputeMice(), factor = imputeMice()))

res = resample(learner = lrn, task = pbc.task, resampling = outer, models = TRUE,
               measures = list(cindex), show.info = TRUE, extract = getFilteredFeatures)
#> Resampling: cross-validation
#> Measures:             cindex
#> [Resample] iter 1:    0.7069869
#> [Resample] iter 2:    0.7138798
#> 
#> Aggregated Result: cindex.test.mean=0.7104333
#> 

Создано 2020-06-19 пакетом REPEX (v0.3.0)

...