library(caret)
library(rpart.plot)
car_df <- read.csv("TrainingDataSet.csv", sep = ',', header = TRUE)
str(car_df)
set.seed(3033)
intrain <- createDataPartition(y = car_df$Result, p= 0.7, list = FALSE)
training <- car_df[intrain,]
testing <- car_df[-intrain,]
dim(training)
dim(testing)
anyNA(car_df)
trctrl <- trainControl(method = "repeatedcv", number = 10, repeats = 3)
set.seed(3333)
dtree_fit <- train(Result ~., data = training, method = "rpart",
parms = list(split = "infromation"),
trControl=trctrl,
tuneLength = 10)
Я получаю это предупреждение:
Предупреждающее сообщение: в nominalTrainWorkflow (x = x, y = y, wts = weights, info = trainInfo,: отсутствовали значения в производительности с повторной выборкоймеры.
Я пытаюсь классифицировать, ударил ли фильм или провалился, используя количество положительных и отрицательных настроений. Вот мои данные
dput(car_df)
structure(list(MovieName = structure(c(20L, 5L, 31L, 26L, 27L,
12L, 36L, 29L, 38L, 4L, 6L, 8L, 10L, 15L, 18L, 21L, 24L, 34L,
35L, 7L, 37L, 25L, 23L, 2L, 11L, 40L, 33L, 28L, 14L, 3L, 17L,
16L, 32L, 22L, 30L, 1L, 19L, 39L, 9L, 13L), .Label = c("#96Movie",
"#alphamovie", "#APrivateWar", "#AStarIsBorn", "#BlackPanther",
"#BohemianRhapsody", "#CCV", "#Creed2", "#CrimesOfGrindelwald",
"#Deadpool2", "#firstman", "#GameNight", "#GreenBookMovie", "#grinchmovie",
"#Incredibles2", "#indivisiblemovie", "#InstantFamily", "#JurassicWorld",
"#KolamaavuKokila", "#Oceans8", "#Overlord", "#PariyerumPerumal",
"#RalphBreaksTheInternet", "#Rampage", "#Ratchasan", "#ReadyPlayerOne",
"#RedSparrow", "#RobinHoodMovie", "#Sarkar", "#Seemaraja", "#Skyscraper",
"#Suspiria", "#TheLastKey", "#TheNun", "#ThugsOfHindostan", "#TombRaider",
"#VadaChennai", "#Venom", "#Vishwaroopam2", "#WidowsMovie"), class = "factor"),
PositivePercent = c(40.10554, 67.65609, 80.46796, 71.34831,
45.36082, 68.82591, 46.78068, 63.85787, 47.20497, 32.11753,
63.7, 39.2, 82.76553, 88.78613, 72.18274, 72.43187, 31.0089,
38.50932, 38.9, 19.9, 84.26854, 29.4382, 58.13953, 86.9281,
64.54965, 56, 0, 56.61914, 58.82353, 54.98891, 78.21682,
90, 64.3002, 85.8, 51.625, 67.71894, 92.21557, 53.84615,
40.12158, 68.08081), NegativePercent = c(11.34565, 21.28966,
6.408952, 13.10861, 26.80412, 17.10526, 18.61167, 10.55838,
46.48033, 56.231, 9.9, 12.1, 9.018036, 6.473988, 13.90863,
16.77149, 63.20475, 42.54658, 40.9, 5.4, 3.907816, 2.022472,
10.51567, 3.267974, 15.12702, 15.3, 100, 18.12627, 11.76471,
13.41463, 5.775076, 10, 20.08114, 2.1, 5.5, 7.739308, 0,
34.61538, 12.86727, 10.70707), Result = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Flop", "Hit"
), class = "factor")), class = "data.frame", row.names = c(NA,
-40L))