Обратное распространение нейронной сети не подходит для небольших данных - PullRequest
0 голосов
/ 24 апреля 2018

Я пытаюсь реализовать нейронную сеть с алгоритмом обратного распространения в Racket.Чтобы протестировать реализацию, я решил обучить ее на очень маленьких данных для большого количества итераций и посмотреть, соответствует ли она данным, на которых она обучалась.Однако это не так - используя сигмовидную функцию, он выдает чрезвычайно малые значения (величиной -20), но относительные значения верны (то есть входной вектор с наибольшим целевым значением также дает самое большое значение в обученной сети),Используя функцию relu, выходы по величине ближе к желаемым, но неверны относительно друг друга.Я был бы рад получить любую информацию о том, почему это так.

#lang racket

; activation function. Fn - function, dfn - its derirative
(define-struct activation (fn dfn))

;activation using sigmoid
(define sigmoid-a (let ([sigmoid (lambda (x)
                                   (/ 1 (+ 1 (exp (* x -1)))))])
                    (activation (lambda(x)
                                  (sigmoid x))
                                (lambda(x)
                                  (*(sigmoid x) (- 1 (sigmoid x)))))))

; activation using relu
(define relu-a (activation (lambda(x) (if (< x 0)
                                          (* 0.2 x)
                                          x))
                           (lambda(x) (if (< x 0)
                                          0.2
                                          1))))
; neuron. Bias is implicit first weight
(define-struct neuron (weights) #:transparent )

; neuron output before applying activation function
(define (neuron-out-unactivated neuron inputs)
  (foldl (lambda(w in result)
           (+ result (* w in)))
         0
         (neuron-weights neuron)
         (cons -1 inputs)))

; neuron output with activation function applied
(define (neuron-out neuron inputs activation-fn)
  (activation-fn (neuron-out-unactivated neuron inputs)))

; neuron layer
(define-struct layer (neurons) #:transparent )

; list of layer's neurons' output, before activation function
(define (layer-out-unactivated layer inputs)
  (map (lambda(neuron)
         (neuron-out-unactivated neuron inputs))
       (layer-neurons layer)))

; list of layer's neurons' output with activation function applied
(define (layer-out layer inputs activation-fn)
  (map (lambda(neuron)
         (neuron-out neuron inputs activation-fn))
       (layer-neurons layer)))

; neural network
(define-struct neural-network (layers activation) #:transparent)

; neural network output 
(define (neural-network-out nn inputs)
  (let pass ([layers (neural-network-layers nn)]
             [inputs inputs])
    (if (empty? layers) inputs
        (pass (rest layers)
              (layer-out (first layers)
                         inputs
                         (activation-fn (neural-network-activation nn)))))))

; calculating derirative for the neuron in the last (output) layer
; out-unactivated -- neuron's output before applying activation function
; target-- teaching data / desired result
; activation -- activation fn and its derirative
(define (d-lastlayer out-unactivated target activation)
  (let ([result (* (- ((activation-fn activation) out-unactivated) target) ((activation-dfn activation) out-unactivated))])
    result))

; calculating derirative for the neuron in the inner (hidden) layer
; neuron-index -- place of the neuron in its layer. Needed, because weights are stored in the next layer's neurons.
; out-unactivated -- neuron's output before applying activation function
; d-nextlayer -- deriratives of the next layer
; activation -- activation fn and its derirative
(define (d-innerlayer neuron-index out-unactivated d-nextlayer nextlayer activation)
  (define mp (map (lambda (neur d)
                    (let* ([w (list-ref (neuron-weights neur) 
                                        (add1 neuron-index))]
                           [result (* d w)])
                      result))
                  (layer-neurons nextlayer)
                  d-nextlayer))
  (* (foldl + 0 mp) 
     ((activation-dfn activation) out-unactivated)))

; maps list of layers into list of layer deriratives, where each layer derirative is a list of its neuron deriratives 
(define (backpropagation layers inputs targets activation)
  (let ([output (layer-out-unactivated (first layers) inputs)])
    (if (empty? (rest layers)) 
        (list (map (lambda (out target) (d-lastlayer out target activation)) output targets))
        (let ([next-layer-d (backpropagation (rest layers) output targets activation)])
          (cons (map (lambda(index out)
                       (d-innerlayer index 
                                     out 
                                     (first next-layer-d)
                                     (first (rest layers))
                                     activation)) 
                     (range (length output)) 
                     output) 
                next-layer-d)))))

; calculates new weights for the layer.
(define (transform-layer _layer input derirative train-speed)
  (layer (map (lambda(n d)
                (neuron (map (lambda(w i)
                               (+ w (* (- train-speed) i d))) 
                             (neuron-weights n) 
                             (cons -1 input)))) 
              (layer-neurons _layer)
              derirative)))

; calculates new weights for all layers
(define (update-layers layers inputs deriratives train-speed activation-fn)
  (if (empty? layers) '()
      (cons (transform-layer (first layers)
                             inputs
                             (first deriratives)
                             train-speed) 
            (update-layers (rest layers)
                           (layer-out (first layers)
                                      inputs
                                      activation-fn)
                           (rest deriratives)
                           train-speed
                           activation-fn))))

; performs network update for single input vector
(define (train-neural-network-iteration network inputs target train-speed)
  (let* ([layers (neural-network-layers network)]
         [activation (neural-network-activation network)]
         [deriratives (backpropagation layers inputs target activation)]
         [new-layers (update-layers layers inputs deriratives train-speed (activation-fn activation))])
    (neural-network new-layers (neural-network-activation network))))

; performs network update for each input in teaching-data
(define (train-neural-network-epoch network teaching-data train-speed)
  (let train ([network network]
              [data teaching-data])
    (if (empty? data) network
        (train (train-neural-network-iteration network (car (first data)) (cdr (first data)) train-speed) (rest data)))))

; Trains network for `iterations` amount of epochs
(define (train-neural-network network data iterations train-speed)
  (let it ([i 0] [network network])
    (if (> i iterations) network
        (it (add1 i) (train-neural-network-epoch network data train-speed)))))

; creates a network. Neuron count list -- a list of integers, each telling how many neurons in that layer
(define (create-neural-network inputs-length neuron-count-list activation)
  (let _create ([inputs-l inputs-length] [n-count neuron-count-list] [layers '()])
    (if (empty? n-count) (neural-network (reverse layers) activation)
        (_create (first n-count)
                 (rest n-count)
                 (cons (layer (build-list (first n-count)
                                          (lambda (n)
                                            (neuron (build-list (add1 inputs-l)
                                                                (lambda(n2) (/ (+ (random 50) 14) 64)))))))
                       layers)))
    ))

;test
(define (test-case act)
(define nn (create-neural-network 1 (list 3 1) act))
(define data (list (cons (list 0) (list 0))
                   (cons (list 1) (list 1))
                   (cons (list 2) (list 0))))
(define trained-nn (train-neural-network nn data 1000000 0.001))
(println (~a (neural-network-out trained-nn (list 0))))
(println (~a (neural-network-out trained-nn (list 1))))
(println (~a (neural-network-out trained-nn (list 2))))
(println (~a trained-nn)))

(test-case sigmoid-a)
;outputs
;0->2 * 10^(-29)
;1->5 * 10^(-21)
;2->2 * 10^(-31)

(test-case relu-a)
;outputs
;0 -> ~164
;1 -> ~164
;2 -> ~0

(provide (all-defined-out))

1 Ответ

0 голосов
/ 07 мая 2018

Проблема была в рекурсивном вызове функции backpropogation -

(let ([next-layer-d (backpropagation (rest layers) output targets activation)])

Вывод здесь - это вывод текущего слоя перед функцией активацииоднако это должно было быть после .

...