R doParallel: не удалось найти функцию - PullRequest
0 голосов
/ 14 февраля 2019

Я настроил следующую функцию:

cv_model <- function(dat, targets, predictors_name){

  library(randomForest)
  library(caret)
  library(MLmetrics)
  library(Metrics)

  # set up error measures
  sumfct <- function(data, lev = NULL, model = NULL){
    mape <- MAPE(y_pred = data$pred, y_true = data$obs)
    RMSE <- sqrt(mean((data$pred - data$obs)^2, na.omit = TRUE))
    MAE <- mean(abs(data$obs - data$pred))
    BIAS <- mean(data$obs - data$pred)
    Rsquared <- R2(pred = data$pred, obs = data$obs, formula = "corr",    na.rm = FALSE)
    c(MAPE = mape, RMSE = RMSE, MAE = MAE, BIAS = BIAS, Rsquared = Rsquared)
  }

  for (k in 1:length(dat)) {

    a <- dat[[k]][dat[[k]]$vari == "a", -c(which(names(dat[[k]]) == "vari"))]
    b <- dat[[k]][dat[[k]]$vari == "b", -c(which(names(dat[[k]]) == "vari"))]
    ab <- list(a, b)

    for (i in 1:length(targets)) {
      for (j in 1:length(ab)) {


        # specifiy trainControl
        control <- trainControl(method="repeatedcv", number=10,   repeats=10, search="grid", savePred =T,
                                summaryFunction = sumfct)

        tunegrid <- expand.grid(mtry=c(1:length(predictors_name)))




        set.seed(42)
        model <- train(formula(paste0(targets[i], 
                                      " ~ ", 
                                      paste(predictors_name, sep = '',    collapse = ' + '))),
                       data = ab[[j]],
                       method="rf",
                       ntree = 25, 
                       metric= "RMSE", 
                       tuneGrid=tunegrid, 
                       trControl=control)


      }
    }
  }

}

В соответствии с этим руководством (https://topepo.github.io/caret/parallel-processing.html) я могу распараллелить мой код, просто вызвав library(doParallel); cl <- makePSOCKcluster(2); registerDoParallel(cl). Когда я затем использую функцию сdoParallel

predictors_name <- c("Time", "Chick")
targets <- "weight"

dat <- as.data.frame(ChickWeight)
dat$vari <- rep(NA, nrow(dat))
dat$vari[c(1:10,320:350)] <- "a"
dat$vari[-c(1:10,320:350)] <- "b"

d <- list(dat[1:300,], dat[301:500,])

## use 2 of the cores
library(doParallel)
cl <- makePSOCKcluster(2)
registerDoParallel(cl)

cv_model(dat = d, targets = targets, predictors_name = predictors_name)

# end parallel computing
stopCluster(cl)

появляется сообщение об ошибке couldn't find function "MAPE".

Как это исправить, не используя синтаксис foreach?

1 Ответ

0 голосов
/ 15 февраля 2019

Если при вызове функции указать пакет, например package::function, он будет работать.Может быть, есть более элегантное решение, но я сделал так, чтобы код работал без ошибок:

cv_model <- function(dat, targets, predictors_name){

  library(randomForest)
  library(caret)
  library(MLmetrics)
  library(Metrics)

  # set up error measures
  sumfct <- function(data, lev = NULL, model = NULL){
            mape <- MLmetrics::MAPE(y_pred = data$pred, y_true = data$obs)
            RMSE <- sqrt(mean((data$pred - data$obs)^2, na.omit = TRUE))
            MAE <- mean(abs(data$obs - data$pred))
            BIAS <- mean(data$obs - data$pred)
            Rsquared <- R2(pred = data$pred, obs = data$obs, formula = "corr", na.rm = FALSE)
            c(MAPE = mape, RMSE = RMSE, MAE = MAE, BIAS = BIAS, Rsquared = Rsquared)
            }

  for (k in 1:length(dat)) {

    a <- dat[[k]][dat[[k]]$vari == "a", -c(which(names(dat[[k]]) == "vari"))]
    b <- dat[[k]][dat[[k]]$vari == "b", -c(which(names(dat[[k]]) == "vari"))]
    ab <- list(a, b)

    for (i in 1:length(targets)) {
      for (j in 1:length(ab)) {


        # specifiy trainControl
        control <- caret::trainControl(method="repeatedcv", number=10, repeats=10, search="grid", savePred =T,
                                       summaryFunction = sumfct)

        tunegrid <- expand.grid(mtry=c(1:length(predictors_name)))

        set.seed(42)
        model <- caret::train(formula(paste0(targets[i], 
                                             " ~ ", 
                                             paste(predictors_name, sep = '', 
                                             collapse = ' + '))),
                              data = ab[[j]],
                              method="rf",
                              ntree = 25, 
                              metric= "RMSE", 
                              tuneGrid=tunegrid, 
                              trControl=control)


      }
    }
  }

}

predictors_name <- c("Time", "Chick", "Diet")
targets <- "weight"

dat <- as.data.frame(ChickWeight)
dat$vari <- rep(NA, nrow(dat))
dat$vari[c(1:10,320:350)] <- "a"
dat$vari[-c(1:10,320:350)] <- "b"

d <- list(dat[1:300,], dat[301:578,])

## use 2 of the cores
library(doParallel)
cl <- makePSOCKcluster(2)
registerDoParallel(cl)

cv_model(dat = d, targets = targets, predictors_name = predictors_name)

# end parallel computing
stopCluster(cl)
...