Tensorflow TPU Обучение.InvalidArgumentError: необъявленный вывод вычисления TPU - PullRequest
0 голосов
/ 22 сентября 2019

Я создал ResNet со слоем визуального внимания, пытаясь обучить его на устройстве TPU. Я успешно компилирую его, но когда я вызываю model.fit, возвращает

Traceback (most recent call last):

  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1370, in _do_call
    raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Undeclared output of TPU computation. A common cause of this error is variable initializers that depend on the TPU computation. Edge: node global_max_pooling2d_12/Max (defined at attention_resnet.py:268) :0 -> node tf_op_layer_Reshape_1/Reshape_1 (defined at attention_resnet.py:268) :0

Код модели следующий безпромежуточные ResBlocks


def conv(Input, kernel, filt, stride, dilation, pad='same'):
    x = layers.Conv2D(filters=filt, kernel_size=kernel, strides=stride, dilation_rate=dilation, padding=pad)(Input)
    return x

def conv_group(Input, kernel, filt, stride, dilation, activation=True, pad='same'):
    x = conv(Input, kernel, filt, stride, dilation, pad=pad)
    x = layers.BatchNormalization(axis=-1, fused=True)(x)
    if activation:
        x = layers.LeakyReLU(alpha=0.1)(x)
    return x

def conv_down(Input, filters):
    x = conv(Input, 3, filters, 2 ,1, pad='valid')
    x = layers.BatchNormalization(axis=-1, fused=True)(x)
    x = layers.LeakyReLU(alpha=0.1)(x) 
    return x

def attention(Input):
    x = channel_att(Input)
    x = spatial_att(x)
    return x

def channel_att(Input, ratio=8):
    channel = Input.get_shape()[-1]
    mlp_0 = layers.Dense(units=channel//ratio, activation=layers.ReLU())
    mlp_1 = layers.Dense(units=channel, activation=layers.ReLU())
    ####
    avg_pool = tf.keras.layers.GlobalAveragePooling2D()(Input)
    avg_pool = tf.reshape(avg_pool, ([-1,1,1,channel]))
    avg_ = mlp_1(mlp_0(avg_pool))
    #####
    max_pool = tf.keras.layers.GlobalMaxPooling2D()(Input)
    max_pool = tf.reshape(max_pool, ([-1,1,1,channel]))
    max_ = mlp_1(mlp_0(max_pool))
    scale = keras.activations.sigmoid(avg_+max_)
    return Input*scale


def spatial_att(Input, kernel=7):
    avg_pool = tf.math.reduce_mean(Input, axis=[3], keepdims=True)
    max_pool = tf.math.reduce_max(Input, axis=[3], keepdims=True)
    concat = tf.concat([avg_pool, max_pool], axis=3)
    concat = layers.Conv2D(filters=1, kernel_size=kernel, padding='same',use_bias=False)(concat)
    concat = keras.activations.sigmoid(concat)
    return Input*concat 

def resblock(Input, filters, activ=True):
    y = conv_group(Input, 3, filters, 1, 2)
    x = attention(y)
    x = layers.Add()([x, y])
    x = conv_group(x, 3, filters, 1, 2, pad='valid', activation=activ)
    return x


def create_model():
    Input = layers.Input(shape=(540, 540, 3))
    x = resblock(Input, 32)
    ####################
    x = conv_down(x, 128)
    ####################
    x = resblock(x, 128)
    x = resblock(x, 128)
    x = conv_down(x, 256)
    x = resblock(x, 256)
    x = resblock(x, 128)
    ####################
    x = conv_down(x, 64)
    ####################
    x = resblock(x, 45, activ=False)
    model = tf.keras.Model(inputs=Input, outputs=x)
    model.compile(optimizer=keras.optimizers.SGD(), loss=custom_loss)
    return model

with tpu_strategy.scope():
    model=create_model()

model.fit(get_training_dataset(), validation_data=get_validation_dataset(),  initial_epoch=0, steps_per_epoch=steps_per_epoch ,validation_steps=val_steps, epochs=EPOCHS, verbose=1, callbacks=clbk)

Я использую Tensorflow 1.14, но та же ошибка возвращается и с TF-nightly.Я могу успешно тренироваться с графическими процессорами.

...