Я пробовал запустить этот рабочий процесс tidymodels, чтобы посмотреть, смогу ли я объединить две модели с разными функциями и отсутствующими значениями. Возможность комбинировать разные источники данных для моделирования одних и тех же результатов была бы очень удобна для реальных данных, но пока не уверен, возможно ли это со стеками tidymodels. Что-то явно не так с этим рабочим процессом для сбоев стека?
library(tidyverse)
library(tidymodels)
library(workflows)
library(probably)
library(tune)
library(stacks)
set.seed(1234)
mtcars_tb <- mtcars %>%
as_tibble() %>%
mutate(vs = c(sample(vs, 0), rep(NA_integer_, 10), sample(vs, 22)),
disp = c(sample(disp, 22), rep(NA_integer_, 10)))
train_test_split <- initial_split(mtcars_tb)
train <- training(train_test_split)
test <- testing(train_test_split)
cv_fold_mtc <- vfold_cv(mtcars_tb)
recipe_naomit <- recipe(mpg ~ disp + vs, data = train) %>%
step_naomit(all_predictors(), skip = TRUE) %>%
step_normalize(all_numeric())
ctrl_grid <- control_stack_grid()
ctrl_res <- control_stack_resamples()
wflow <- workflow() %>%
add_recipe(recipe_naomit)
lasso_mod <-
linear_reg() %>%
set_engine("glmnet") %>%
set_args(penalty = tune(),
mixture = 1)
wflow <-
wflow %>%
add_model(lasso_mod)
lasso_tune <-
tune_grid(
object = wflow,
resamples = cv_fold_mtc,
grid = 10,
control = ctrl_grid
)
recipe_rm_vs <- recipe(mpg ~ disp, data = train) %>%
step_naomit(all_predictors(), skip = TRUE) %>%
step_normalize(all_numeric())
wflow <- wflow %>%
update_recipe(recipe_rm_vs)
linear_mod <-
linear_reg() %>%
set_engine("lm")
wflow <-
wflow %>%
update_model(linear_mod)
linear_tune_disp <- fit_resamples(
wflow,
resamples = cv_fold_mtc,
control = ctrl_res
)
model_st <- stacks() %>%
stack_add(lasso_tune) %>%
stack_add(linear_tune_disp) %>%
stack_blend() %>%
stack_fit()
#> Warning: Values are not uniquely identified; output will contain list-cols.
#> * Use `values_fn = list` to suppress this warning.
#> * Use `values_fn = length` to identify where the duplicates arise
#> * Use `values_fn = {summary_fun}` to summarise duplicates
#> x Fold01: formula: Error in model.frame.default(formula, data = data): invalid typ...
#> x Fold02: formula: Error in model.frame.default(formula, data = data): invalid typ...
#> x Fold03: formula: Error in model.frame.default(formula, data = data): invalid typ...
#> x Fold04: formula: Error in model.frame.default(formula, data = data): invalid typ...
#> x Fold05: formula: Error in model.frame.default(formula, data = data): invalid typ...
#> x Fold06: formula: Error in model.frame.default(formula, data = data): invalid typ...
#> x Fold07: formula: Error in model.frame.default(formula, data = data): invalid typ...
#> x Fold08: formula: Error in model.frame.default(formula, data = data): invalid typ...
#> x Fold09: formula: Error in model.frame.default(formula, data = data): invalid typ...
#> x Fold10: formula: Error in model.frame.default(formula, data = data): invalid typ...
#> Warning: All models failed in tune_grid(). See the `.notes` column.
#> Error: All of the models failed. See the .notes column.
model_st
#> Error in eval(expr, envir, enclos): object 'model_st' not found