Я пытаюсь определить модель tf.keras
. Я получаю
ValueError: Output tensors to a Model must be the output of a TensorFlow Layer (thus holding past layer metadata). Found: Tensor(decoder_activation_softmax/truediv:0, shape=(?, ?, 144), dtype=float32)
при звонке self.model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=[decoder_outputs])
Я видел несколько потоков с одной и той же проблемой, где они часто используют функции тензорного потока вместо tf.keras
слоев, таких как Lambda()
, Add()
и т. Д. Четвертого. Я попытался добавить соответствующие слои, где это применимо, но все еще не могу заставить это работать.
Следующая моя измененная модель:
import tensorflow as tf
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Multiply, Add, Dense, LSTM, GRU, CuDNNLSTM, Input, Embedding, TimeDistributed, Flatten, Dropout, Lambda, Concatenate
import numpy as np
LATENT_DIM = 256
DROPOUT_RATE = 0.2
class BahdanauAttention(Model):
def __init__(self, units, name=None):
super(BahdanauAttention, self).__init__(name=name)
self.W1 = Dense(units)
self.W2 = Dense(units)
self.V = Dense(1)
def __call__(self, query, values):
# hidden shape == (batch_size, hidden size)
# hidden_with_time_axis shape == (batch_size, 1, hiddembeden size)
# we are doing this to perform addition to calculate the score
ones_tensor = Lambda(lambda x: K.ones_like(x))(query)
ones_tensor = ones_tensor[:, 0]
hidden_with_time_axis = Lambda(lambda x: K.expand_dims(x, axis=1))(ones_tensor)
# score shape == (batch_size, max_length, hidden_size)
score = self.V(Dense(1, activation='tanh')(Add()([self.W1(values), self.W2(hidden_with_time_axis)])))
# attention_weights shape == (batch_size, max_length, 1)
# we get 1 at the last axis because we are applying score to self.V
attention_weights = Dense(units=1, activation='softmax')(score)
# context_vector shape after sum == (batch_size, hidden_size)
context_vector = Multiply()([attention_weights, values])
context_vector = Lambda(lambda x: K.sum(x, axis=1))(context_vector)
return context_vector, attention_weights
class Chatbot():
def __init__(self):
''' Configure the chatbot. '''
self.num_encoder_tokens = 500
self.num_decoder_tokens = 500
self.__build_model()
def __build_model(self):
''' Construct the model used to train the chatbot. '''
encoder_inputs = Input(shape=(None, self.num_encoder_tokens), name='encoder_input')
encoder_dropout = (TimeDistributed(Dropout(rate=DROPOUT_RATE, name='encoder_dropout')))(encoder_inputs)
encoder = GRU(LATENT_DIM, return_sequences=True, return_state=True, name='encoder_gru')
encoder_outputs, encoder_state = encoder(encoder_dropout)
# Attention mechanism
attention_layer = BahdanauAttention(LATENT_DIM, name='attention_layer')
attention_result, attention_weights = attention_layer(encoder_state, encoder_outputs)
decoder_inputs = Input(shape=(None, self.num_decoder_tokens), name='decoder_input')
decoder_dropout = (TimeDistributed(Dropout(rate=DROPOUT_RATE, name='decoder_dropout')))(decoder_inputs)
decoder_gru = GRU(LATENT_DIM, return_sequences=True, return_state=True, name='decoder_gru')
decoder_outputs, _ = decoder_gru(decoder_dropout, initial_state=encoder_state)
decoder_outputs = Concatenate(axis=-1, name='concat_layer')([decoder_outputs, attention_weights])
decoder_dense = Dense(self.num_decoder_tokens, activation='softmax', name='decoder_activation_softmax')
dense_time = TimeDistributed(decoder_dense, name='time_distributed_layer')
decoder_outputs = dense_time(decoder_outputs)
self.model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=[decoder_outputs])
self.model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
Ошибка возникает из функции BadhanauAttention
class '__call__()
. Он не возвращает Layers, в результате чего ValueError
выбрасывается при определении модели в классе Chatbot
. Я идентифицировал это как ошибку, возникшую, когда я добавил строку decoder_outputs = Concatenate(axis=-1, name='concat_layer')([decoder_outputs, attention_weights])
. ясно, что attention_weights
из класса внимания - это не слой, а тензор.
Как я уже упоминал, я пытался переписать класс внимания, чтобы использовать слои вместо функций тензорного потока. Это оригинальный класс:
class BahdanauAttention(tf.keras.Model):
def __init__(self, units):
super(BahdanauAttention, self).__init__()
self.W1 = tf.keras.layers.Dense(units)
self.W2 = tf.keras.layers.Dense(units)
self.V = tf.keras.layers.Dense(1)
def call(self, query, values):
# hidden shape == (batch_size, hidden size)
# hidden_with_time_axis shape == (batch_size, 1, hidden size)
# we are doing this to perform addition to calculate the score
hidden_with_time_axis = tf.expand_dims(query, 1)
# score shape == (batch_size, max_length, hidden_size)
score = self.V(tf.nn.tanh(
self.W1(values) + self.W2(hidden_with_time_axis)))
# attention_weights shape == (batch_size, max_length, 1)
# we get 1 at the last axis because we are applying score to self.V
attention_weights = tf.nn.softmax(score, axis=1)
# context_vector shape after sum == (batch_size, hidden_size)
context_vector = attention_weights * values
context_vector = tf.reduce_sum(context_vector, axis=1)
return context_vector, attention_weights
Класс также можно найти здесь: https://www.tensorflow.org/alpha/tutorials/text/nmt_with_attention
Надеюсь, этот вопрос не слишком обширен. Спасибо.