В следующем коде конечный продукт приводит к тому, что дерево решений имеет нулевую глубину (только корень). Чем вызван такой исход и что можно сделать, чтобы его исправить?
install.packages("C50", dependencies=TRUE, repos='http://cran.rstudio.com/')
library(C50) # Gives the decision tree algorithm
#######Step 2: EXploring and Preparing the Data####
credit <- read.csv("german.csv")
credit
str(credit)
table(credit$account_check_status)
table(credit$savings)
summary(credit$duration_in_month)
summary(credit$credit_amount)
# A successful model that identifies applicants who are at
# high risk of default, allowing the bank to refuse the credit
# request before the money is given.
table(credit$default)
# Data Preparation: Create RANDOM training and test datasets
# Use 90% data for training & 10% data for testing
# B/C its not RANDOM (bank sorted data by loan amount, largest
# at end of the file & so train only on the smallest loans)
set.seed(123)
# select 900 values at random out of the sequence of integers
# of 1 to 1,000
train_sample <- sample(1000,900)
# Shows the random selection
str(train_sample)
# The 'train_sample'(900) is passed as selected rows.
credit_train <- credit[train_sample,]
# The REMAINING rows NOT passed (100) become the test
credit_test <- credit[-train_sample,]
# Check to see if randomization was done correctly by having
# 30 percent of loans with default in each of the datasets
prop.table(table(credit_train$default))
prop.table(table(credit_test$default))
#####STEP3: Training a model on the Data ######
credit_train$default <- factor(credit_train$default)
credit_model2 <- C5.0(credit_train[-1],credit_train$default)
credit_model
Вот данные:
Пожалуйста, нажмите здесь для получения данных