Ошибка ValueError при добавлении слоя BatchNormalization и Activation в пользовательский слой в Keras - PullRequest
0 голосов
/ 04 апреля 2020

Я создал пользовательский слой следующим образом в Keras, где я определяю функцию прямого распространения, вызывая conv_forward ()

from tensorflow.keras import layers
from tensorflow.keras.layers import Input, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D
from tensorflow.keras.models import Model
from tensorflow.keras import metrics


import tensorflow as tf

tf.keras.backend.clear_session()  # For easy reset of notebook state.

print(tf.__version__)


def conv_forward(A_prev, W, b, parameters): # forward prop having a for loop
    """
    Implements the forward propagation for a convolution function

    Arguments:
    A_prev -- output activations of the previous layer, numpy array of shape (m, n_H_prev, n_W_prev, n_C_prev)
    W -- Weights, numpy array of shape (f, f, n_C_prev, n_C)
    b -- Biases, numpy array of shape (1, 1, 1, n_C)
    hparameters -- python dictionary containing "stride" and "pad"

    Returns:
    Z -- conv output, numpy array of shape (m, n_H, n_W, n_C)
    cache -- cache of values needed for the conv_backward() function
    """
    expand = hparameters["expand"]
    channels = hparameters["channels"]
    depth_batch = channels // expand

    # Conv2D for 1 step of gradual update
    for i in range(depth_batch):
        # if you dont add b or not use a registered parameter, tensorflow will give error as follows:
        # Gradients do not exist for variables ['layer/Variable:0'] when minimizing the loss
        Z = tf.nn.conv2d(A_prev, W, [1, 1, 1, 1], "SAME") + b
        A_prev = tf.concat([A_prev[:, :, :, :i*expand ], Z, A_prev[:, :, :, i*expand + expand : ]], 3)

    return A_prev


class Gunn2D(layers.Layer): # custom layer definition 

  def __init__(self, input_channels, expansion_rate=32):
    super(Gunn2D, self).__init__()
    self.input_channels = input_channels
    self.expansion_rate = expansion_rate
    self.hparameters = {"expand": self.expansion_rate, "channels": self.input_channels}

  def build(self, input_shape):
    self.w = self.add_weight(shape=(3, 3, self.input_channels, self.expansion_rate), initializer='random_normal', trainable=True)
    self.b = self.add_weight(shape=(1, 1, 1, self.expansion_rate), initializer='random_normal', trainable=True)

  def call(self, inputs):
    output = conv_forward(inputs, self.w, self.b, self.hparameters)
    return output 


def GunnModel(input_shape):
    """
    Implementation of the Model.

    Arguments:
    input_shape -- shape of the images of the dataset

    Returns:
    model -- a Model() instance in Keras
    """

    X_input = Input(input_shape)
    Gunn2D_layer = Gunn2D(6, 2)  # At instantiation, we don't know on what inputs this is going to get called
    X = Gunn2D_layer(X_input)  # using the Custom Keras layer
    print('After gunnlayer : {}'.format(X.get_shape()))

    X = Flatten()(X)
    X = Dense(3, activation='softmax', name = 'fc1')(X)

    model = Model(inputs = X_input, outputs = X, name = 'GunnModel')

    return model


Я создаю модель и подгоняю ее

X_train = tf.ones((50, 5, 5, 6))
X_test = tf.ones((20, 5, 5, 6))
Y_train = tf.ones((50, 3))
Y_test = tf.ones((20, 3))

gunnModel = GunnModel(X_train.shape[1:])
gunnModel.compile(optimizer = "adam", loss='categorical_crossentropy', metrics=[metrics.categorical_accuracy])
gunnModel.fit(x = X_train , y = Y_train, epochs = 5, steps_per_epoch = (X_train.shape[0]//10))
preds = gunnModel.evaluate(x=X_test, y=Y_test)
print()
print ("Loss = " + str(preds[0]))
print ("Test Accuracy = " + str(preds[1]))

и это дает мне успешный результат обучения следующим образом:

Epoch 1/5
5/5 [==============================] - 0s 3ms/step - loss: 3.3864 - categorical_accuracy: 1.0000
Epoch 2/5
5/5 [==============================] - 0s 2ms/step - loss: 3.3766 - categorical_accuracy: 0.0000e+00
Epoch 3/5
5/5 [==============================] - 0s 3ms/step - loss: 3.3967 - categorical_accuracy: 0.0000e+00
Epoch 4/5
5/5 [==============================] - 0s 2ms/step - loss: 3.4462 - categorical_accuracy: 0.8000
Epoch 5/5
5/5 [==============================] - 0s 2ms/step - loss: 3.5673 - categorical_accuracy: 1.0000
1/1 [==============================] - 0s 2ms/step - loss: 3.6945 - categorical_accuracy: 1.0000

Loss = 3.69450306892395
Test Accuracy = 1.0

Это фиктивная программа, чтобы заставить слой работать, так что не заботьтесь о точности.

Давайте напишем сводку:

gunnModel.summary()

Вывод:

Model: "GunnModel"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         [(None, 5, 5, 6)]         0         
_________________________________________________________________
gunn2d (Gunn2D)              (None, 5, 5, 6)           110       
_________________________________________________________________
flatten (Flatten)            (None, 150)               0         
_________________________________________________________________
fc1 (Dense)                  (None, 3)                 453       
=================================================================
Total params: 563
Trainable params: 563
Non-trainable params: 0
_________________________________________________________________

Теперь, когда я добавлю больше функций в пользовательский слой, особенно в слой conv_forward (), например Пакетная нормализация и активация, это дает мне ошибку.

Новое определение conv_forward ():

def conv_forward(A_shortcut, W1, b1, W2, b2, W3, b3, hparameters):
    """
    Implements the forward propagation for a convolution function

    Arguments:
    A_shortcut -- output activations of the previous layer, numpy array of shape (m, n_H_prev, n_W_prev, n_C_prev)
    W -- Weights, numpy array of shape (f, f, n_C_prev, n_C)
    b -- Biases, numpy array of shape (1, 1, 1, n_C)
    hparameters -- python dictionary containing "stride" and "pad"

    Returns:
    A -- conv output, numpy array of shape (m, n_H, n_W, n_C)
    """
    expand = hparameters["expand"]
    channels = hparameters["channels"]
    depth_batch = channels // expand

    # Conv2D for 1 step of gradual update
    # Note: if you dont add b or not use a registered parameter, tensorflow will give error as follows:
    # Gradients do not exist for variables ['layer/Variable:0'] when minimizing the loss
    A = tf.identity(A_shortcut)
    for i in range(depth_batch):
        Z = tf.nn.conv2d(A, W1, [1, 1, 1, 1], "VALID") + b1
        A = tf.concat([A[:, :, :, :i*expand ], Z, A[:, :, :, i*expand + expand : ]], 3)
    A = BatchNormalization(axis = 3 , name = 'Gunn_BN_1')(A)
    A = Activation('relu')(A)

    for i in range(depth_batch):
        Z = tf.nn.conv2d(A, W2, [1, 1, 1, 1], "SAME") + b2
        A = tf.concat([A[:, :, :, :i*expand ], Z, A[:, :, :, i*expand + expand : ]], 3)
    A = BatchNormalization(axis = 3 , name = 'Gunn_BN_2')(A)
    A = Activation('relu')(A)

    for i in range(channels):
        Z = tf.nn.conv2d(A, W3, [1, 1, 1, 1], "VALID") + b3
        A = tf.concat([A[:, :, :, :i ], Z, A[:, :, :, i + 1 : ]], 3)
    A = BatchNormalization(axis = 3 , name = 'Gunn_BN_3')(A)

    # Add shortcut value to main path. This implements the identity block in Residual Network.
    A = Add()([A , A_shortcut])
    print('Resnet : {}'.format(A.shape))

    return A

Ошибка:

Resnet : (None, 5, 5, 6)
After gunnlayer : (None, 5, 5, 6)
Epoch 1/5
Resnet : (10, 5, 5, 6)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-28-d6d9fedc335a> in <module>()
      6 gunnModel = GunnModel(X_train.shape[1:])
      7 gunnModel.compile(optimizer = "adam", loss='categorical_crossentropy', metrics=[metrics.categorical_accuracy])
----> 8 gunnModel.fit(x = X_train , y = Y_train, epochs = 5, steps_per_epoch = (X_train.shape[0]//10))
      9 preds = gunnModel.evaluate(x=X_test, y=Y_test)
     10 print()

9 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
    966           except Exception as e:  # pylint:disable=broad-except
    967             if hasattr(e, "ag_error_metadata"):
--> 968               raise e.ag_error_metadata.to_exception(e)
    969             else:
    970               raise

ValueError: in user code:

    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:505 train_function  *
        outputs = self.distribute_strategy.run(
    <ipython-input-1-3e0d6e941353>:75 call  *
        output = conv_forward(inputs, self.w1, self.b1, self.w2, self.b2, self.w3, self.b3, self.hparameters)
    <ipython-input-27-56c3c46e1785>:37 conv_forward  *
        A = BatchNormalization(axis = 3 , name = 'Gunn_BN_1')(A)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py:897 __call__  **
        self._maybe_build(inputs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py:2416 _maybe_build
        self.build(input_shapes)  # pylint:disable=not-callable
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/layers/normalization.py:400 build
        experimental_autocast=False)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py:577 add_weight
        caching_device=caching_device)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/training/tracking/base.py:743 _add_variable_with_custom_getter
        **kwargs_for_getter)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer_utils.py:141 make_variable
        shape=variable_shape if variable_shape else None)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/variables.py:259 __call__
        return cls._variable_v1_call(*args, **kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/variables.py:220 _variable_v1_call
        shape=shape)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/variables.py:66 getter
        return captured_getter(captured_previous, **kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2562 creator
        return next_creator(**kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/variables.py:66 getter
        return captured_getter(captured_previous, **kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2562 creator
        return next_creator(**kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/variables.py:66 getter
        return captured_getter(captured_previous, **kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2562 creator
        return next_creator(**kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/variables.py:66 getter
        return captured_getter(captured_previous, **kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py:511 invalid_creator_scope
        *emphasized text*"tf.function-decorated function tried to create "

    ValueError: tf.function-decorated function tried to create variables on non-first call.

Я даже пытался использовать @tf. функция для циклов for, но проблема осталась, и я подозреваю, что ее после добавления нормализации партии, но при печати печатается оператор 'Re sNet', но непосредственно следующий оператор печати 'After gunnlayer' не печатается во время обучения. Оба печатаются при создании модели.

...