Я пытаюсь задействовать механизм внимания при возврате тензора. Я получаю следующую ошибку
ValueError: Shape mismatch: The shape of labels (received (64, 53)) should equal the shape of logits except for the last dimension (received (64, 1, 500)).
Please find the below code
Вот код для внимания, пожалуйста, поправьте меня, если он неправильный
class Attention(tf.keras.layers.Layer):
def __init__(self):
super().__init__()
def call(self,enc_op,hidden_state):
# print(enc_op.shape,hidden_state.shape)
query_with_time_axis = tf.expand_dims(hidden_state, 1)
context_vector = tf.matmul(enc_op,tf.transpose(query_with_time_axis,perm=[0,2,1]))
context_vector = tf.nn.softmax(context_vector,axis=1)
context_vector = context_vector * enc_op
context_vector = tf.reduce_sum(context_vector, axis=1)
return context_vector
Вот часть декодера, на которую я обращаю внимание класс Decoder (tf.keras.layers.Layer): def init (self, vocab_size, embedding_dim, input_length, dec_units): super (). init () self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.dec_units = dec_units self.input_length = input_length self.attention = Attention ()
def build(self,input_shape):
self.embedding = Embedding(input_dim=self.vocab_size,output_dim = self.embedding_dim,input_shape = input_shape,
mask_zero = True, name = "embedding_layer_decoder")
self.lstm = LSTM(self.dec_units,return_sequences=True,return_state=True,name = "Decoder_LSTM")
def call(self,target_sentances,enc_op,hidden_state,cell_state):
target_embed = self.embedding(target_sentances)
for i in range(target_embed.shape[1]):
context_vector = self.attention(enc_op,hidden_state)
y = tf.concat([context_vector, target_embed[:,i,:]], axis=-1)
y = tf.expand_dims(y, 1)
lstm_output,hidden_state,_ = self.lstm(y,initial_state = [hidden_state,cell_state])
return lstm_output
class Mymodel(Model):
def __init__(self,encoder_inputs_length,decoder_inputs_length,output_vocab_size):
super().__init__()
self.encoder = Encoder(vocab_size = 500, embedding_dim = 50, input_length = encoder_inputs_length, enc_units=64)
self.decoder = Decoder(vocab_size = 500, embedding_dim = 50, input_length = decoder_inputs_length, dec_units=64)
self.dense = Dense(output_vocab_size,activation = "softmax")
def call(self,data):
input,output = data[0],data[1]
print(input.shape,output.shape)
encoder_output,encoder_h,encoder_c = self.encoder(input)
print("="*20, "ENCODER", "="*20)
print("-"*35)
print(encoder_output)
print("ENCODER ==> OUTPUT SHAPE",encoder_output.shape)
print("ENCODER ==> HIDDEN STATE SHAPE",encoder_h.shape)
print("ENCODER ==> CELL STATE SHAPE", encoder_c.shape)
print("="*20,"Decoder","="*20)
decoder_output = self.decoder(output,encoder_output,encoder_h,encoder_c)
output1 = self.dense(decoder_output)
print("-"*35)
print("Final output shape",output.shape)
print("="*50)
return output1
model = Mymodel(encoder_inputs_length=30,decoder_inputs_length=20,output_vocab_size=500)
ENCODER_SEQ_LEN = 30
DECODER_SEQ_LEN = 20
optimizer = tf.keras.optimizers.Adam()
model.compile(optimizer=optimizer,loss=tf.keras.losses.SparseCategoricalCrossentropy())
for (batch, (inp, targ)) in enumerate(dataset.take(steps_per_epoch)):
model.fit([inp, targ], targ, steps_per_epoch=1)
Форма моего ввода и цель - (64, 55) (64, 53) 64 - размер пакета