Блестящее приложение, использующее xgbtree из карета - PullRequest
0 голосов
/ 17 февраля 2020

Я пытаюсь создать блестящее приложение для создания модели с использованием xgbtree. Я хочу, чтобы пользователь мог выбрать эти параметры: nfolds, nrepeat, препроцесс и tuneLength

Затем я хочу построить наиболее важные переменные, итоги прогнозирования и т. Д., Но я не не как продолжить? не могли бы вы помочь мне

## load packages
library(shiny)
library(DBI)
library(dplyr)
library(fastDummies)
library(caret)
library(scales)
library(tables)
library(doParallel)
library(PerformanceAnalytics)
library(ggplot2)
library(ggthemes)
library(corrplot)
library(car)
library(psych)
library(caretEnsemble)


# Define UI for application that draws a histogram
ui <- fluidPage(

  # Application title
  titlePanel("Models For TA-FR"),


  #

  sliderInput("nfolds", "Number of folds:",
              min = 0, max = 20, value = 2
  ),
  sliderInput("nrepeats", "Number of repeats:",
              min = 0, max = 20, value = 2
  ),
  checkboxGroupInput("preProcess", "Preprocess to be selected:",
                     c("corr","nzv","zv","center","scale","range","YeoJohnson")),

  sliderInput("tuneLength", "Number of tuneLength:",
              min = 0, max = 20, value = 2
  ),

  mainPanel(
    plotOutput('plot')



  )
)

# Define server 
server <- function(input, output) {
  rm(list = ls(all.names = TRUE)) #will clear all objects includes hidden objects.
  gc() #free up memrory and report the memory usage.

  ###### import dataset from SQL server #####

  ## set up connection to the SQL Server ##
  con <- dbConnect(odbc::odbc(), "SQLServer")

  ## load all data ##
  sql_query <- "select * from DW_Temp.dbo.TA_CRM_MODEL_TRAINING where Team like 'TA-FR' and
(CallDate between '2018-01-01' and '2018-06-30' or CallDate between '2019-01-01' and '2019-06-30') "
  data <- dbGetQuery(con, sql_query)
  data1=na.omit(data)

  ## remove unnecessary columns ## 

  data1$Custno <- NULL
  data1$CallDate <- NULL

  ## convert all character variables into factors ##
  data1 <- data1 %>% mutate_if(is.character,as.factor)

  rownames(data1) <- data1$callid
  data1$callid <- NULL

  ## create binary flag. Equals 1 if Contribution > 0 , 0 otherwise ##

  data1$target <- as.factor(if_else(data1$ContributionUSD > 0, "1","0"))
  data1$target <- factor(data1$target,levels=c("1", "0"),ordered=TRUE)

  with(data1, table(Team, target))

  ## test model for TA-FR Team ##
  data_country <- data1

  ## create train and test datasets ##

  set.seed(1234)
  inTrain <- createDataPartition(y = data_country$target, # the outcome variable
                                 p = 0.8, # the percentage of data in the training sample
                                 list = FALSE) # should the results be in a list?
  train <- data_country[inTrain,]
  test <- data_country[-inTrain,]
  remove(inTrain)

  print(summary(train$target)) ## 
  print(summary(test$target)) ## 

  ## create a list of indexes to be used during training ##

  index_ones <- which(train$target == "1")
  index_zeros <- which(train$target == "0")
  train_indexes <- reactive({vector(mode = "list", length = input$nfolds*input$nrepeats)})

 reactive( {for (i in 1:(input$nfolds*input$nrepeats))
  {
    train_indexes[[i]] <- c(index_ones, sample(index_zeros, size = length(index_ones), replace = FALSE))
  }
} )
  ## remove outliers in train

  pr <- 0.99
  for (i in 1:ncol(train))
  {
    if (is.factor(train[,i]) == FALSE)
    {
      q <- quantile(train[,i], c(1-pr,pr), na.rm = TRUE)
      train[,i] <- squish(train[,i], q)
    }
  }

  summary(train)
  str(train)

  ## remove ContributionUSD ##
  train$ContributionUSD <- NULL
  ## remove Team column ##
  train$Team <- NULL

  ## define the resampling parameters for validating the model ##
  trCtrl <- reactive({trainControl(method="repeatedcv"
                         , number = input$nfolds
                         , repeats = input$nrepeats
                         , index = train_indexes
                         , search = "random"
                         , allowParallel = TRUE)})

  ## run the model

  cl <- makePSOCKcluster(15)
  registerDoParallel(cl)

  colnames(train) <- make.names(colnames(train))



  set.seed(1234)

  ###### Binary Models #####

  #--------Model 1 ---------#

  ### model 1 - training 
  start_time = Sys.time()
  modelB <- reactive({train(x = subset(train, select = -c(target))
                  , y = train$target
                  , method="xgbTree"
                  , preProcess = input$preprocess
                  , tuneLength = input$tunelength
                  , trControl = trCtrl)})

  end_time = Sys.time()
  end_time - start_time

  stopCluster(cl)
  registerDoSEQ()

  #####model1 - Prediction


  plot(varImp(modelB, scale = FALSE))

  colnames(test) <- make.names(colnames(test))

  pred_TAB <- predict(modelB, newdata = test, type = "prob")
  pred_TAB <- as.data.frame(pred_TAB)
  colnames(pred_TAB)[1] = "P_Contribution"
  summary(pred_TAB1$P_Contribution)

  ## store deciles value in prob_value_contribution ##

  TA_DecilesB <- quantile(pred_TAB$P_Contribution, probs=seq(0.1,0.9, by=0.1), na.rm=TRUE, include.lowest=TRUE, type=3)

  ## create 10 equal groups ##
  pred_TAB$Scores <- with(pred_TAB, 
                          cut(P_Contribution, 
                              breaks=c(0,TA_DecilesB,1), 
                              labels = c("10","9","8","7","6","5","4","3","2","1"),
                              include_lowest = TRUE, 
                              right = FALSE, 
                              ordered_result = TRUE))

  ## reorder the factor levels


  pred_TAB$Scores <- factor(pred_TAB$Scores,levels=c("1","2","3","4","5","6","7","8","9","10"), ordered=TRUE)

  ##summary(pred_TA$Scores)

  test$Scores <- pred_TAB$Scores
  test$P_Contribution <- pred_TAB$P_Contribution


  plot(test$ContributionUSD, test$P_Contribution)

  with(test, table(Scores, target))

}


# Run the application 
shinyApp(ui = ui, server = server)```
...