Я пытаюсь создать блестящее приложение для создания модели с использованием xgbtree. Я хочу, чтобы пользователь мог выбрать эти параметры: nfolds, nrepeat, препроцесс и tuneLength
Затем я хочу построить наиболее важные переменные, итоги прогнозирования и т. Д., Но я не не как продолжить? не могли бы вы помочь мне
## load packages
library(shiny)
library(DBI)
library(dplyr)
library(fastDummies)
library(caret)
library(scales)
library(tables)
library(doParallel)
library(PerformanceAnalytics)
library(ggplot2)
library(ggthemes)
library(corrplot)
library(car)
library(psych)
library(caretEnsemble)
# Define UI for application that draws a histogram
ui <- fluidPage(
# Application title
titlePanel("Models For TA-FR"),
#
sliderInput("nfolds", "Number of folds:",
min = 0, max = 20, value = 2
),
sliderInput("nrepeats", "Number of repeats:",
min = 0, max = 20, value = 2
),
checkboxGroupInput("preProcess", "Preprocess to be selected:",
c("corr","nzv","zv","center","scale","range","YeoJohnson")),
sliderInput("tuneLength", "Number of tuneLength:",
min = 0, max = 20, value = 2
),
mainPanel(
plotOutput('plot')
)
)
# Define server
server <- function(input, output) {
rm(list = ls(all.names = TRUE)) #will clear all objects includes hidden objects.
gc() #free up memrory and report the memory usage.
###### import dataset from SQL server #####
## set up connection to the SQL Server ##
con <- dbConnect(odbc::odbc(), "SQLServer")
## load all data ##
sql_query <- "select * from DW_Temp.dbo.TA_CRM_MODEL_TRAINING where Team like 'TA-FR' and
(CallDate between '2018-01-01' and '2018-06-30' or CallDate between '2019-01-01' and '2019-06-30') "
data <- dbGetQuery(con, sql_query)
data1=na.omit(data)
## remove unnecessary columns ##
data1$Custno <- NULL
data1$CallDate <- NULL
## convert all character variables into factors ##
data1 <- data1 %>% mutate_if(is.character,as.factor)
rownames(data1) <- data1$callid
data1$callid <- NULL
## create binary flag. Equals 1 if Contribution > 0 , 0 otherwise ##
data1$target <- as.factor(if_else(data1$ContributionUSD > 0, "1","0"))
data1$target <- factor(data1$target,levels=c("1", "0"),ordered=TRUE)
with(data1, table(Team, target))
## test model for TA-FR Team ##
data_country <- data1
## create train and test datasets ##
set.seed(1234)
inTrain <- createDataPartition(y = data_country$target, # the outcome variable
p = 0.8, # the percentage of data in the training sample
list = FALSE) # should the results be in a list?
train <- data_country[inTrain,]
test <- data_country[-inTrain,]
remove(inTrain)
print(summary(train$target)) ##
print(summary(test$target)) ##
## create a list of indexes to be used during training ##
index_ones <- which(train$target == "1")
index_zeros <- which(train$target == "0")
train_indexes <- reactive({vector(mode = "list", length = input$nfolds*input$nrepeats)})
reactive( {for (i in 1:(input$nfolds*input$nrepeats))
{
train_indexes[[i]] <- c(index_ones, sample(index_zeros, size = length(index_ones), replace = FALSE))
}
} )
## remove outliers in train
pr <- 0.99
for (i in 1:ncol(train))
{
if (is.factor(train[,i]) == FALSE)
{
q <- quantile(train[,i], c(1-pr,pr), na.rm = TRUE)
train[,i] <- squish(train[,i], q)
}
}
summary(train)
str(train)
## remove ContributionUSD ##
train$ContributionUSD <- NULL
## remove Team column ##
train$Team <- NULL
## define the resampling parameters for validating the model ##
trCtrl <- reactive({trainControl(method="repeatedcv"
, number = input$nfolds
, repeats = input$nrepeats
, index = train_indexes
, search = "random"
, allowParallel = TRUE)})
## run the model
cl <- makePSOCKcluster(15)
registerDoParallel(cl)
colnames(train) <- make.names(colnames(train))
set.seed(1234)
###### Binary Models #####
#--------Model 1 ---------#
### model 1 - training
start_time = Sys.time()
modelB <- reactive({train(x = subset(train, select = -c(target))
, y = train$target
, method="xgbTree"
, preProcess = input$preprocess
, tuneLength = input$tunelength
, trControl = trCtrl)})
end_time = Sys.time()
end_time - start_time
stopCluster(cl)
registerDoSEQ()
#####model1 - Prediction
plot(varImp(modelB, scale = FALSE))
colnames(test) <- make.names(colnames(test))
pred_TAB <- predict(modelB, newdata = test, type = "prob")
pred_TAB <- as.data.frame(pred_TAB)
colnames(pred_TAB)[1] = "P_Contribution"
summary(pred_TAB1$P_Contribution)
## store deciles value in prob_value_contribution ##
TA_DecilesB <- quantile(pred_TAB$P_Contribution, probs=seq(0.1,0.9, by=0.1), na.rm=TRUE, include.lowest=TRUE, type=3)
## create 10 equal groups ##
pred_TAB$Scores <- with(pred_TAB,
cut(P_Contribution,
breaks=c(0,TA_DecilesB,1),
labels = c("10","9","8","7","6","5","4","3","2","1"),
include_lowest = TRUE,
right = FALSE,
ordered_result = TRUE))
## reorder the factor levels
pred_TAB$Scores <- factor(pred_TAB$Scores,levels=c("1","2","3","4","5","6","7","8","9","10"), ordered=TRUE)
##summary(pred_TA$Scores)
test$Scores <- pred_TAB$Scores
test$P_Contribution <- pred_TAB$P_Contribution
plot(test$ContributionUSD, test$P_Contribution)
with(test, table(Scores, target))
}
# Run the application
shinyApp(ui = ui, server = server)```