GradientTape возвращает None после нескольких итераций обучения - PullRequest
0 голосов
/ 27 февраля 2020

Я пытаюсь разработать алгоритм мета-обучения (maml), который будет использоваться для набора данных omniglot. Вычисление градиента с использованием градиентной ленты перестало работать после первых нескольких итераций и возвращало список, содержащий None. Я даже поместил переменную watch внутри ленты, и никаких модификаций вне ленты до вычисления градиента, но бесполезно. Вот код:

def contrastive_loss(y_pred, y_true):
    '''Contrastive loss from Hadsell-et-al.'06
    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
    '''
    margin = 1
    square_pred = K.square(y_pred)
    margin_square = K.square(K.maximum(margin - y_pred, 0))
    return K.mean(y_true * square_pred + (1 - y_true) * margin_square)

def copy_model(model, imgs1, imgs2):
    '''Copy model weights to a new model.

    Args:
        model: model to be copied.
        img1: First set of images example used in training. This is used to run
            a forward pass in order to add the weights of the graph
            as variables.
        imgs2: The sencond set of images used in training
    Returns:
        A copy of the model.
    '''
    copied_model = get_siamese_model((105, 105, 1))

    # If we don't run this step the weights are not "initialized"
    # and the gradients will not be computed.
    copied_model.call(inputs = [imgs1,imgs2])

    copied_model.set_weights(model.get_weights())
    return copied_model

def train_maml(model, epochs, dataset, lr_inner=0.01, batch_size=1, log_steps=1000):
    '''Train using the MAML setup.

    The comments in this function that start with:

        Step X:

    Refer to a step described in the Algorithm 1 of the paper.

    Args:
        model: A model.
        epochs: Number of epochs used for training.
        dataset: A dataset used for training.
        lr_inner: Inner learning rate (alpha in Algorithm 1). Default value is 0.01.
        batch_size: Batch size. Default value is 1. The paper does not specify
            which value they use.
        log_steps: At every `log_steps` a log message is printed.

    Returns:
        A strong, fully-developed and trained maml.
    '''
    optimizer = Adam()
    total_loss = 0
    losses = []
    start = time.time()

    # Step 2: instead of checking for convergence, we train for a number
    # of epochs
    for i in range(epochs):
        # Step 3 and 4
        x, y = get_batch(batch_size)
        x1 = tf.cast(tf.convert_to_tensor(x[0]), dtype=tf.float32)
        x2 = tf.cast(tf.convert_to_tensor(x[1]), dtype=tf.float32)
        y1 = tf.cast(tf.convert_to_tensor(y), dtype=tf.float32)

        with tf.GradientTape() as test_tape:
            #test_tape.watch(model.trainable_weights)
            # Step 5
            with tf.GradientTape() as train_tape:
                train_tape.watch(model.trainable_weights)
                y_out = model.call(inputs = [x1,x2])  # run call forward pass to initialize weights
                train_loss = contrastive_loss(y_out, y1)

            # Step 6
            gradients = train_tape.gradient(train_loss, model.trainable_weights)

            model_copy = copy_model(model, x1, x2)

            model_copy.set_weights([w - lr_inner * g for w, g in zip(model.trainable_weights, gradients)])

            # Step 8
            y_out = model_copy.call(inputs = [x1,x2])
            test_loss = contrastive_loss(y_out, y1)

        # Step 8
        gradients = test_tape.gradient(test_loss, model_copy.trainable_weights)

        optimizer.apply_gradients(zip(gradients, model.trainable_weights))

        # Logs
        total_loss += test_loss
        loss = total_loss / (i+1.0)
        losses.append(loss)

        if i % log_steps == 0 and i > 0:
            print('Step {}: loss = {}, Time to run {} steps = {}'.format(i, loss, log_steps, time.time() - start))
            start = time.time()
    plt.plot(losses)
    plt.show()
train_maml(model, 200, X, batch_size=32, log_steps=10)

Вот результат:

Step 10: loss = 0.26724734902381897, Time to run 10 steps = 216.29537153244019
Step 20: loss = 0.26651105284690857, Time to run 10 steps = 193.76108241081238
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-15-5bec10c2de66> in <module>
----> 1 train_maml(model, 200, X, batch_size=32, log_steps=10)

<ipython-input-14-85acebbaae75> in train_maml(model, epochs, dataset, lr_inner, batch_size, log_steps)
     77             model_copy = copy_model(model, x1, x2)
     78 
---> 79             model_copy.set_weights([w - lr_inner * g for w, g in zip(model.trainable_weights, gradients)])
     80 
     81             # Step 8

<ipython-input-14-85acebbaae75> in <listcomp>(.0)
     77             model_copy = copy_model(model, x1, x2)
     78 
---> 79             model_copy.set_weights([w - lr_inner * g for w, g in zip(model.trainable_weights, gradients)])
     80 
     81             # Step 8

TypeError: unsupported operand type(s) for *: 'float' and 'NoneType'

Есть ли другой способ добиться того же?

...