Мне нужна помощь в создании моделей SVM и кривой RO C в R. Я получил несколько ошибок: Ошибка в уровнях (data [, "pred"]): аргумент "data" отсутствует, по умолчанию нет. Ошибка: в каждой строке было найдено хотя бы одно пропущенное значение. Как мне это исправить? Заранее спасибо!! Вот ссылка на диск Google на набор данных: data
Вот код, который я пробовал:
library(caret)
library(pROC)
fitControl <- trainControl(method = "repeatedcv",
number = 10,
repeats= 10,
classProbs = TRUE,
summaryFunction = twoClassSummary())
data<-read.csv("full_train_binary.csv")
data_training<-subset(data[which(data$X==0),])
data_testing<-subset(data[which(data$X==1),])
training<-data_training
testing<-data_testing
cols_remove <- c("patient_sk","New_admitted_dt_tm", "New_discharge_dt_tm")
training<-training[,!(colnames(training)%in%cols_remove)]
testing<-testing[,!(colnames(testing)%in%cols_remove)]
set.seed(825)
start.time <- Sys.time()
svm_one <- train(death~., data = training,
method = 'svmRadial',
trControl = fitControl,
verbose = FALSE,
tunelength=5,
metric="ROC")
svm_one
end.time <- Sys.time()
time.taken <- end.time - start.time
time.taken
svm_one_pred <- predict(svm_one, newdata=testing,type = 'prob')
roc_svm_one <- roc(testing$death, as.vector(svm_one_pred[,1]))
pROC::auc(roc_svm_one)
Результат:
> library(caret)
> library(pROC)
> fitControl <- trainControl(method = "repeatedcv",
+ number = 10,
+ repeats= 10,
+ classProbs = TRUE,
+ summaryFunction = twoClassSummary())
Error in levels(data[, "pred"]) :
argument "data" is missing, with no default
>
> data<-read.csv("full_train_binary.csv")
>
> data_training<-subset(data[which(data$X==0),])
> data_testing<-subset(data[which(data$X==1),])
>
> training<-data_training
> testing<-data_testing
>
> cols_remove <- c("patient_sk","New_admitted_dt_tm", "New_discharge_dt_tm")
>
> training<-training[,!(colnames(training)%in%cols_remove)]
> testing<-testing[,!(colnames(testing)%in%cols_remove)]
>
> set.seed(825)
>
> start.time <- Sys.time()
>
> svm_one <- train(death~., data = training,
+ method = 'svmRadial',
+ trControl = fitControl,
+ verbose = FALSE,
+ tunelength=5,
+ metric="ROC")
Error: Every row has at least one missing value were found
> svm_one
Support Vector Machines with Radial Basis Function Kernel
4911 samples
1954 predictors
2 classes: 'False', 'True'
No pre-processing
Resampling: Cross-Validated (5 fold)
Summary of sample sizes: 3928, 3928, 3929, 3930, 3929
Resampling results across tuning parameters:
sigma C Accuracy Kappa
1.976927e-05 192.56972 0.7448586 -0.0004065338
2.778991e-05 242.26352 0.7446545 0.0007460142
3.273858e-05 14.39494 0.7450623 0.0000000000
Accuracy was used to select the optimal model using the largest value.
The final values used for the model were sigma = 3.273858e-05 and C
= 14.39494.
>
> end.time <- Sys.time()
> time.taken <- end.time - start.time
> time.taken
Time difference of 0.395869 secs
>
>
> svm_one_pred <- predict(svm_one, newdata=testing,type = 'prob')
Error in eval(predvars, data, env) : object 'patient_sk' not found
> roc_svm_one <- roc(testing$death, as.vector(svm_one_pred[,1]))
Error in as.vector(svm_one_pred[, 1]) : object 'svm_one_pred' not found
> pROC::auc(roc_svm_one)
Error in pROC::auc(roc_svm_one) : object 'roc_svm_one' not found