Ошибка ValueError при добавлении слоя BatchNormalization и Activation в пользовательский слой в Keras
/ 04 апреля 2020

Я создал пользовательский слой следующим образом в Keras, где я определяю функцию прямого распространения, вызывая conv_forward ()

from tensorflow.keras import layers
from tensorflow.keras.layers import Input, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D
from tensorflow.keras.models import Model
from tensorflow.keras import metrics

import tensorflow as tf

tf.keras.backend.clear_session()  # For easy reset of notebook state.


def conv_forward(A_prev, W, b, parameters): # forward prop having a for loop
    Implements the forward propagation for a convolution function

    A_prev -- output activations of the previous layer, numpy array of shape (m, n_H_prev, n_W_prev, n_C_prev)
    W -- Weights, numpy array of shape (f, f, n_C_prev, n_C)
    b -- Biases, numpy array of shape (1, 1, 1, n_C)
    hparameters -- python dictionary containing "stride" and "pad"

    Z -- conv output, numpy array of shape (m, n_H, n_W, n_C)
    cache -- cache of values needed for the conv_backward() function
    expand = hparameters["expand"]
    channels = hparameters["channels"]
    depth_batch = channels // expand

    # Conv2D for 1 step of gradual update
    for i in range(depth_batch):
        # if you dont add b or not use a registered parameter, tensorflow will give error as follows:
        # Gradients do not exist for variables ['layer/Variable:0'] when minimizing the loss
        Z = tf.nn.conv2d(A_prev, W, [1, 1, 1, 1], "SAME") + b
        A_prev = tf.concat([A_prev[:, :, :, :i*expand ], Z, A_prev[:, :, :, i*expand + expand : ]], 3)

    return A_prev

class Gunn2D(layers.Layer): # custom layer definition 

  def __init__(self, input_channels, expansion_rate=32):
    super(Gunn2D, self).__init__()
    self.input_channels = input_channels
    self.expansion_rate = expansion_rate
    self.hparameters = {"expand": self.expansion_rate, "channels": self.input_channels}

  def build(self, input_shape):
    self.w = self.add_weight(shape=(3, 3, self.input_channels, self.expansion_rate), initializer='random_normal', trainable=True)
    self.b = self.add_weight(shape=(1, 1, 1, self.expansion_rate), initializer='random_normal', trainable=True)

  def call(self, inputs):
    output = conv_forward(inputs, self.w, self.b, self.hparameters)
    return output 

def GunnModel(input_shape):
    Implementation of the Model.

    input_shape -- shape of the images of the dataset

    model -- a Model() instance in Keras

    X_input = Input(input_shape)
    Gunn2D_layer = Gunn2D(6, 2)  # At instantiation, we don't know on what inputs this is going to get called
    X = Gunn2D_layer(X_input)  # using the Custom Keras layer
    print('After gunnlayer : {}'.format(X.get_shape()))

    X = Flatten()(X)
    X = Dense(3, activation='softmax', name = 'fc1')(X)

    model = Model(inputs = X_input, outputs = X, name = 'GunnModel')

    return model

Я создаю модель и подгоняю ее

X_train = tf.ones((50, 5, 5, 6))
X_test = tf.ones((20, 5, 5, 6))
Y_train = tf.ones((50, 3))
Y_test = tf.ones((20, 3))

gunnModel = GunnModel(X_train.shape[1:])
gunnModel.compile(optimizer = "adam", loss='categorical_crossentropy', metrics=[metrics.categorical_accuracy])
gunnModel.fit(x = X_train , y = Y_train, epochs = 5, steps_per_epoch = (X_train.shape[0]//10))
preds = gunnModel.evaluate(x=X_test, y=Y_test)
print ("Loss = " + str(preds[0]))
print ("Test Accuracy = " + str(preds[1]))

и это дает мне успешный результат обучения следующим образом:

Epoch 1/5
5/5 [==============================] - 0s 3ms/step - loss: 3.3864 - categorical_accuracy: 1.0000
Epoch 2/5
5/5 [==============================] - 0s 2ms/step - loss: 3.3766 - categorical_accuracy: 0.0000e+00
Epoch 3/5
5/5 [==============================] - 0s 3ms/step - loss: 3.3967 - categorical_accuracy: 0.0000e+00
Epoch 4/5
5/5 [==============================] - 0s 2ms/step - loss: 3.4462 - categorical_accuracy: 0.8000
Epoch 5/5
5/5 [==============================] - 0s 2ms/step - loss: 3.5673 - categorical_accuracy: 1.0000
1/1 [==============================] - 0s 2ms/step - loss: 3.6945 - categorical_accuracy: 1.0000

Loss = 3.69450306892395
Test Accuracy = 1.0

Это фиктивная программа, чтобы заставить слой работать, так что не заботьтесь о точности.

Давайте напишем сводку:



Model: "GunnModel"
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 5, 5, 6)]         0         
gunn2d (Gunn2D)              (None, 5, 5, 6)           110       
flatten (Flatten)            (None, 150)               0         
fc1 (Dense)                  (None, 3)                 453       
Total params: 563
Trainable params: 563
Non-trainable params: 0

Теперь, когда я добавлю больше функций в пользовательский слой, особенно в слой conv_forward (), например Пакетная нормализация и активация, это дает мне ошибку.

Новое определение conv_forward ():

def conv_forward(A_shortcut, W1, b1, W2, b2, W3, b3, hparameters):
    Implements the forward propagation for a convolution function

    A_shortcut -- output activations of the previous layer, numpy array of shape (m, n_H_prev, n_W_prev, n_C_prev)
    W -- Weights, numpy array of shape (f, f, n_C_prev, n_C)
    b -- Biases, numpy array of shape (1, 1, 1, n_C)
    hparameters -- python dictionary containing "stride" and "pad"

    A -- conv output, numpy array of shape (m, n_H, n_W, n_C)
    expand = hparameters["expand"]
    channels = hparameters["channels"]
    depth_batch = channels // expand

    # Conv2D for 1 step of gradual update
    # Note: if you dont add b or not use a registered parameter, tensorflow will give error as follows:
    # Gradients do not exist for variables ['layer/Variable:0'] when minimizing the loss
    A = tf.identity(A_shortcut)
    for i in range(depth_batch):
        Z = tf.nn.conv2d(A, W1, [1, 1, 1, 1], "VALID") + b1
        A = tf.concat([A[:, :, :, :i*expand ], Z, A[:, :, :, i*expand + expand : ]], 3)
    A = BatchNormalization(axis = 3 , name = 'Gunn_BN_1')(A)
    A = Activation('relu')(A)

    for i in range(depth_batch):
        Z = tf.nn.conv2d(A, W2, [1, 1, 1, 1], "SAME") + b2
        A = tf.concat([A[:, :, :, :i*expand ], Z, A[:, :, :, i*expand + expand : ]], 3)
    A = BatchNormalization(axis = 3 , name = 'Gunn_BN_2')(A)
    A = Activation('relu')(A)

    for i in range(channels):
        Z = tf.nn.conv2d(A, W3, [1, 1, 1, 1], "VALID") + b3
        A = tf.concat([A[:, :, :, :i ], Z, A[:, :, :, i + 1 : ]], 3)
    A = BatchNormalization(axis = 3 , name = 'Gunn_BN_3')(A)

    # Add shortcut value to main path. This implements the identity block in Residual Network.
    A = Add()([A , A_shortcut])
    print('Resnet : {}'.format(A.shape))

    return A


Resnet : (None, 5, 5, 6)
After gunnlayer : (None, 5, 5, 6)
Epoch 1/5
Resnet : (10, 5, 5, 6)
ValueError                                Traceback (most recent call last)
<ipython-input-28-d6d9fedc335a> in <module>()
      6 gunnModel = GunnModel(X_train.shape[1:])
      7 gunnModel.compile(optimizer = "adam", loss='categorical_crossentropy', metrics=[metrics.categorical_accuracy])
----> 8 gunnModel.fit(x = X_train , y = Y_train, epochs = 5, steps_per_epoch = (X_train.shape[0]//10))
      9 preds = gunnModel.evaluate(x=X_test, y=Y_test)
     10 print()

9 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
    966           except Exception as e:  # pylint:disable=broad-except
    967             if hasattr(e, "ag_error_metadata"):
--> 968               raise e.ag_error_metadata.to_exception(e)
    969             else:
    970               raise

ValueError: in user code:

    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:505 train_function  *
        outputs = self.distribute_strategy.run(
    <ipython-input-1-3e0d6e941353>:75 call  *
        output = conv_forward(inputs, self.w1, self.b1, self.w2, self.b2, self.w3, self.b3, self.hparameters)
    <ipython-input-27-56c3c46e1785>:37 conv_forward  *
        A = BatchNormalization(axis = 3 , name = 'Gunn_BN_1')(A)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py:897 __call__  **
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py:2416 _maybe_build
        self.build(input_shapes)  # pylint:disable=not-callable
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/layers/normalization.py:400 build
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py:577 add_weight
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/training/tracking/base.py:743 _add_variable_with_custom_getter
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer_utils.py:141 make_variable
        shape=variable_shape if variable_shape else None)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/variables.py:259 __call__
        return cls._variable_v1_call(*args, **kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/variables.py:220 _variable_v1_call
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/variables.py:66 getter
        return captured_getter(captured_previous, **kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2562 creator
        return next_creator(**kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/variables.py:66 getter
        return captured_getter(captured_previous, **kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2562 creator
        return next_creator(**kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/variables.py:66 getter
        return captured_getter(captured_previous, **kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2562 creator
        return next_creator(**kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/variables.py:66 getter
        return captured_getter(captured_previous, **kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py:511 invalid_creator_scope
        *emphasized text*"tf.function-decorated function tried to create "

    ValueError: tf.function-decorated function tried to create variables on non-first call.

Я даже пытался использовать @tf. функция для циклов for, но проблема осталась, и я подозреваю, что ее после добавления нормализации партии, но при печати печатается оператор 'Re sNet', но непосредственно следующий оператор печати 'After gunnlayer' не печатается во время обучения. Оба печатаются при создании модели.
