Ошибка при анализе внешней функции при использовании распараллеливания - PullRequest
0 голосов
/ 28 сентября 2019

Я определил пользовательскую меру, которая позволяет преобразовать prediction$data с помощью внешней функции перед оценкой стандартных мер, таких как rmse.Если я пытаюсь настроить параметры без распараллеливания, все идет гладко, но если я запускаю распараллеленную сессию, кажется, что внешняя функция больше не обнаруживается, хотя она объявлена ​​в глобальной среде.

library(compiler)
library(mlr)
library(parallelMap)
library(parallel)

# define function
inverse_fun = function(x){x^2}
inverse_fun = Vectorize(inverse_fun)
inverse_fun = cmpfun(inverse_fun, options=list(suppressUndefined=T))
assign('inverse_fun', inverse_fun, envir = .GlobalEnv)

tuning_criterion = 'rmse'

# define a new measure that applies inverse_fun to prediction and evaluates rmse
original_measure = eval(parse(text=tuning_criterion))
transf_measure_fun = function(task, model, pred, feats, extra.args){
  # transform back to original value
  pred$data$truth = inverse_fun(pred$data$truth)
  pred$data$response = inverse_fun(pred$data$response)
  return(original_measure$fun(task, model, pred, feats, extra.args))
}
transf_measure = makeMeasure(
  id = 'ii', name = 'ccc',
  properties = original_measure$properties,
  minimize = original_measure$minimize, best = original_measure$best, worst = original_measure$worst,
  fun = transf_measure_fun)

transf_measure = setAggregation(transf_measure, original_measure$aggr)
aggregated_measure = list(transf_measure, setAggregation(transf_measure, test.sd), setAggregation(transf_measure, train.mean), setAggregation(transf_measure, train.sd))

# train and predict
lrn.lm = makeLearner("regr.ksvm")
mod.lm = train(lrn.lm, bh.task)
task.pred.lm = predict(mod.lm, task = bh.task)

# inverse function on prediction
inv_pred = task.pred.lm
inv_pred$data$truth = inverse_fun(inv_pred$data$truth)
inv_pred$data$response = inverse_fun(inv_pred$data$response)

# check for performance match
performance(task.pred.lm, transf_measure)
performance(inv_pred, rmse)

# tuning
discrete_ps = makeParamSet(
  makeDiscreteParam("C", values = c(0.5, 1.0, 1.5, 2.0)),
  makeDiscreteParam("sigma", values = c(0.5, 1.0, 1.5, 2.0))
)
ctrl = makeTuneControlGrid()
rdesc = makeResampleDesc("CV", iters = 3L)

# this works
res = tuneParams(lrn.lm, task = bh.task, resampling = rdesc,
                 par.set = discrete_ps, control = ctrl, measures = transf_measure)

# try with parallelization - doesn't work
current_os = Sys.info()[['sysname']]  # detect OS
if (current_os == "Windows"){
  set.seed(1, "L'Ecuyer-CMRG")
  parallelStart(mode = "socket", cpus = detectCores(), show.info = F)
  parallel::clusterSetRNGStream(iseed = 1)
} else if (current_os == "Linux"){
  set.seed(1, "L'Ecuyer-CMRG")
  parallelStart(mode = "multicore", cpus = detectCores(), show.info = F)
} else {
  cat('\n\n#### OS not recognized, check parallelization init\n\n')
} 
res = tuneParams(lrn.lm, task = bh.task, resampling = rdesc,
                 par.set = discrete_ps, control = ctrl, measures = transf_measure)
parallelStop()

, получая следующую ошибку:

Error in stopWithJobErrorMessages(inds, vcapply(result.list[inds], as.character)) : 
  Errors occurred in 16 slave jobs, displaying at most 10 of them:

00001: Error in inverse_fun(pred$data$truth) : 
  cannot find "inverse_fun"

Я пытался передать функцию с extra.args, но я получаю ошибку

original_measure = eval(parse(text=tuning_criterion))
transf_measure_fun = function(task, model, pred, feats, extra.args){
  # transform back to original value
  pred$data$truth = extra.args$inv_fun(pred$data$truth)
  pred$data$response = extra.args$inv_fun(pred$data$response)
  return(original_measure$fun(task, model, pred, feats, extra.args))
}
transf_measure = makeMeasure(
  id = 'ii', name = 'ccc',
  properties = original_measure$properties,
  minimize = original_measure$minimize, best = original_measure$best, worst = original_measure$worst,
  fun = transf_measure_fun(extra.args = list(inv_fun = inverse_fun))
)

, и я получаю Error in FUN(X[[i]], ...) : argument "pred" is missing, with no default

Заранее спасибо

...