Something is wrong; all the RMSE metric values are missing:
In addition: There were 26 warnings (use warnings() to see them)
Я видел предупреждения, и 25 из них говорят, что подгонка модели не удалась для Resample_x. Кроме того, в предупреждении 26 говорится:
26: In nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, ... :
There were missing values in resampled performance measures.
Это код, над которым я работал, чтобы создать свое дерево регрессии:
library("caret")
library("rpart")
Dataset <- read_dta(paste0(Data, "/Master_asegurados.dta"))
Dataset <- as.data.frame(Dataset)
Dataset <- Dataset[, -(1:3)]
Dataset <- na.omit(Dataset)
#Split into training and test
set.seed(2019)
training.samples <- Dataset$asegurados %>%
createDataPartition(p=0.8, list=F)
train.data <- Dataset[training.samples, ]
test.data <- Dataset[-training.samples, ]
#Fit on training set
set.seed(2019)
model <- train(
asegurados ~., data=train.data, method="rpart",
trcontrol=trainControl("cv", number=10),
tuneLength=10
)
Вот некоторые примеры данных набора данных:
structure(list(asegurados = c(18699916, 18853971, 18994318, 19021083,
19047825), conf_consum = c(28.7000007629395, 31.5, 33.7999992370605,
34.9000015258789, 35.9000015258789), conf_emp = c(43.9299697875977,
43.5325355529785, 46.3612785339355, 47.320686340332, 48.1750297546387
), igae_indice = c(110.734420776367, 110.078079223633, 110.579711914062,
110.91381072998, 110.551902770996), igae_varmen = c(0.153188705444336,
-0.592718362808228, 0.45570570230484, 0.302132695913315, -0.32629668712616
), igae_varan = c(3.31220531463623, 1.91346073150635, 2.86124491691589,
3.17998313903809, 2.55803227424622), inflacion_interan = c(4.71999979019165,
4.8600001335144, 5.34999990463257, 5.82000017166138, 6.15999984741211
), pero_total = c(4060513, 4104588, 4148455, 4146732, 4168990
), pero_rem = c(3762625, 3805940, 3845282, 3842497, 3864826),
rem_per = c(99, 97.5999984741211, 104.699996948242, 100.099998474121,
105.199996948242), rem_hra = c(99.4000015258789, 103.800003051758,
100.300003051758, 104.900001525879, 104), pea = c(58.9458541870117,
59.3624877929688, 59.3493804931641, 58.927360534668, 59.424259185791
), po_sprim = c(11.8459482192993, 11.5064420700073, 11.764817237854,
11.5762567520142, 11.3297424316406), po_prim = c(18.6390037536621,
18.0968399047852, 18.3025207519531, 18.1113109588623, 18.3464126586914
), po_sec = c(34.5362091064453, 34.183536529541, 34.1087074279785,
34.6075973510742, 33.5546226501465), po_mes = c(34.9188003540039,
36.1404914855957, 35.74755859375, 35.6310005187988, 36.6706390380859
), po_otro = c(0.0600357055664062, 0.0726876258850098, 0.0763970836997032,
0.0738347694277763, 0.0985818132758141), tsoc_indice = c(7.34650325775146,
7.43973398208618, 6.92216300964355, 7.1840124130249, 6.77571821212769
), tsoc_varmen = c(0.772356331348419, 0.0932305976748466,
-0.517570674419403, 0.261849343776703, -0.408294349908829
), tsoc_varan = c(-0.987880647182465, -0.606864094734192,
-0.864436089992523, 0.143844053149223, -1.516885638237)), na.action = structure(c(`1` = 1L,
`2` = 2L, `3` = 3L, `4` = 4L, `5` = 5L, `6` = 6L, `7` = 7L, `8` = 8L,
`9` = 9L, `10` = 10L, `11` = 11L, `12` = 12L, `13` = 13L, `14` = 14L,
`15` = 15L, `16` = 16L, `17` = 17L, `18` = 18L, `19` = 19L, `20` = 20L,
`21` = 21L, `22` = 22L, `23` = 23L, `24` = 24L, `57` = 57L), class = "omit"), row.names = 25:29, class = "data.frame")
Может кто-нибудь пролить свет на мою проблему? Спасибо