размер скрытых слоев в сети (сетка в H2O) - PullRequest
0 голосов
/ 31 августа 2018

Я запустил h2o.grid с набором разных параметров. Ниже вы можете увидеть код, связанный с ним:


    #creditcard <- read.csv("creditcard.csv") #https://www.kaggle.com/mlg-ulb/creditcardfraud
        as_h2o <- function(df) {
          for (colname in colnames(df)) {
            if (class(df[[colname]]) == "character") {
              df[[colname]] <- as.factor(df[[colname]])
          df <- as.h2o(df)


        index <- createDataPartition(creditcard$Class, p = 0.3, list = FALSE)
        train.set <- creditcard[-index, -1]
        test.set <- creditcard[index, -1]

    index.valid <- createDataPartition(creditcard$Class, p = 0.2, list = FALSE)
    train.set <- creditcard[-index.valid, -1]
    valid.set <- creditcard[index.valid, -1]

Y = "Class"
        X = colnames(test.set[,-30])     

     hyper_params <- list(
       activation = c("Rectifier", "Maxout", "Tanh", "RectifierWithDropout", "MaxoutWithDropout", "TanhWithDropout"), 
       hidden = list(c(17,16,15), c(19,15,11), c(16,14,12),c(20,15,10),c(25,17,10),c(15,10,5)),
       epochs = c(50, 100, 200),
       l1 = c(0, 0.001,0.00001, 0.0001), 
       l2 = c(0,0.001, 0.00001, 0.0001),
       rate = c(0, 0.1, 0.005, 0.001),
       rate_annealing = c(1e-8, 1e-7, 1e-6),
       rho = c(0.9, 0.95, 0.99, 0.999),
       epsilon = c(1e-10, 1e-8, 1e-6, 1e-4),
       momentum_start = c(0, 0.5),
       momentum_stable = c(0.99, 0.5, 0),
       input_dropout_ratio = c(0, 0.1, 0.2),
       max_w2 = c(10, 100, 1000, 3.4028235e+38)

     search_criteria <- list(strategy = "RandomDiscrete", 
                             max_models = 100,
                             max_runtime_secs = 900,
                             stopping_tolerance = 0.001,
                             stopping_rounds = 15)

     dl_grid <- h2o.grid(algorithm = "deeplearning", 
                         x = X,
                         y = Y,
                         grid_id = "dl_grid",
                         training_frame = as_h2o(train.set),
                         validation_frame = as_h2o(valid.set),
                         nfolds = 25,                           
                         fold_assignment = "Stratified",
                         hyper_params = hyper_params,
                         search_criteria = search_criteria

но в результате я получил неожиданные размеры нейронных сетей, которые не были упомянуты в hyper_params, например: [10, 10, 10, 10], [50, 50, 50].

Весь результат:

> dl_grid
H2O Grid Details

Grid ID: dl_grid 
Used hyper parameters: 
  -  activation 
  -  epochs 
  -  epsilon 
  -  hidden 
  -  input_dropout_ratio 
  -  l1 
  -  l2 
  -  max_w2 
  -  momentum_stable 
  -  momentum_start 
  -  rate 
  -  rate_annealing 
  -  rho 
Number of models: 13 
Number of failed models: 1 

Hyper-Parameter Search Summary: ordered by increasing logloss
             activation             epochs epsilon           hidden
1             Rectifier 24.666234282086002  1.0E-6     [19, 15, 11]
2             Rectifier  27.58637697029444  1.0E-6 [10, 10, 10, 10]
3             Rectifier  20.26209344328687  1.0E-6     [15, 16, 17]
4             Rectifier  18.57634281485049  1.0E-6     [17, 16, 15]
5             Rectifier 50.032621172309156  1.0E-6     [17, 16, 15]
6             Rectifier 50.032621172309156  1.0E-6     [17, 16, 15]
7                Maxout   8.38177768101728  1.0E-4     [20, 15, 10]
8     MaxoutWithDropout 1.6076279182111595  1.0E-8     [17, 16, 15]
9  RectifierWithDropout 0.5012088413637236 1.0E-10     [15, 15, 15]
10 RectifierWithDropout 0.5012088413637236 1.0E-10     [15, 15, 15]
11    MaxoutWithDropout 28.578195951798776  1.0E-4     [12, 13, 12]
12    MaxoutWithDropout 10.073383841883308  1.0E-4     [15, 16, 17]
13 RectifierWithDropout 0.5012088413637236 1.0E-10     [50, 50, 50]

Кто-нибудь может объяснить, почему это произошло?

Ответы [ 2 ]

0 голосов
/ 01 сентября 2018

При запуске поиска по сетке H2O вы должны видеть только те модели, которые используют то же количество скрытых слоев, которое вы указали для поиска по сетке. Вот пример кода, с которым вы можете поиграть и поиграть, чтобы посмотреть, сможете ли вы воспроизвести вашу проблему.


train = h2o.importFile("https://h2o-public-test-data.s3.amazonaws.com/bigdata/laptop/mnist/train.csv.gz")

# Specify the response and predictor columns
y <- "C785"
x <- setdiff(names(train), y)

# Encode the response column as categorical for multinomial classification
train[,y] <- as.factor(train[,y])
test[,y] <- as.factor(test[,y])

# do a random grid search
hidden_opt <- list(c(32,32), c(32,16,8), c(100,100))
l1_opt <- c(1e-4,1e-3)
hyper_params <- list(hidden = hidden_opt, l1 = l1_opt)
search_criteria = list(strategy = "RandomDiscrete", 
                       max_models = 10, 

# grid search
model_grid <- h2o.grid("deeplearning", 
                       grid_id = "mygrid_1",
                       hyper_params = hyper_params, 
                       search_criteria = search_criteria,
                       x = x,
                       y = y,
                       distribution = "multinomial", 
                       training_frame = train, 
                       nfolds = 3,
                       score_interval = 2,
                       stopping_rounds = 3,
                       stopping_tolerance = 0.05,
                       stopping_metric = "misclassification")

# Output
# H2O Grid Details
# ================
#   Grid ID: mygrid_1 
# Used hyper parameters: 
#   -  hidden 
# -  l1 
# Number of models: 6 
# Number of failed models: 0 
# Hyper-Parameter Search Summary: ordered by increasing logloss
# hidden     l1        model_ids             logloss
# 1  [100, 100] 1.0E-4 mygrid_1_model_0 0.11350390885225858
# 2  [100, 100]  0.001 mygrid_1_model_4 0.13184550642109982
# 3    [32, 32]  0.001 mygrid_1_model_3 0.13869444872607956
# 4 [32, 16, 8]  0.001 mygrid_1_model_5 0.16575514373784073
# 5    [32, 32] 1.0E-4 mygrid_1_model_2 0.17190959951587054
# 6 [32, 16, 8] 1.0E-4 mygrid_1_model_1 0.20832913000853842

Обратите внимание, что в выходных данных вы увидите только модели со скрытыми слоями, равными установленным исходным значениям: c (32,32), c (32,16,8), c (100,100)

0 голосов
/ 31 августа 2018

Лорен, спасибо тебе за твой пост. Вот пример моделирования обнаружения мошенничества с кредитными картами

 hyper_params <- list(
   activation = c("Rectifier", "Maxout", "Tanh", "RectifierWithDropout", "MaxoutWithDropout", "TanhWithDropout"), 
   hidden = list(c(17,16,15), c(19,15,11), c(16,14,12),c(20,15,10),c(25,17,10),c(15,10,5)),
   epochs = c(50, 100, 200),
   l1 = c(0, 0.001,0.00001, 0.0001), 
   l2 = c(0,0.001, 0.00001, 0.0001),
   rate = c(0, 0.1, 0.005, 0.001),
   rate_annealing = c(1e-8, 1e-7, 1e-6),
   rho = c(0.9, 0.95, 0.99, 0.999),
   epsilon = c(1e-10, 1e-8, 1e-6, 1e-4),
   momentum_start = c(0, 0.5),
   momentum_stable = c(0.99, 0.5, 0),
   input_dropout_ratio = c(0, 0.1, 0.2),
   max_w2 = c(10, 100, 1000, 3.4028235e+38)

 search_criteria <- list(strategy = "RandomDiscrete", 
                         max_models = 100,
                         max_runtime_secs = 900,
                         stopping_tolerance = 0.001,
                         stopping_rounds = 15)

 dl_grid <- h2o.grid(algorithm = "deeplearning", 
                     x = X,
                     y = Y,
                     grid_id = "dl_grid",
                     training_frame = as_h2o(train.set),
                     validation_frame = as_h2o(valid.set),
                     nfolds = 25,                           
                     fold_assignment = "Stratified",
                     hyper_params = hyper_params,
                     search_criteria = search_criteria