получить ошибку несоответствия формы между формой этикеток и логитов? - PullRequest
1 голос
/ 20 июня 2020

Я пытаюсь задействовать механизм внимания при возврате тензора. Я получаю следующую ошибку

ValueError: Shape mismatch: The shape of labels (received (64, 53)) should equal the shape of logits except for the last dimension (received (64, 1, 500)).

Please find the below code

Вот код для внимания, пожалуйста, поправьте меня, если он неправильный

class Attention(tf.keras.layers.Layer):
  def __init__(self):
    super().__init__()



  def call(self,enc_op,hidden_state):
    # print(enc_op.shape,hidden_state.shape)
    query_with_time_axis = tf.expand_dims(hidden_state, 1)
    context_vector = tf.matmul(enc_op,tf.transpose(query_with_time_axis,perm=[0,2,1]))
    context_vector = tf.nn.softmax(context_vector,axis=1)
    context_vector = context_vector * enc_op
    context_vector = tf.reduce_sum(context_vector, axis=1)
    return context_vector

Вот часть декодера, на которую я обращаю внимание класс Decoder (tf.keras.layers.Layer): def init (self, vocab_size, embedding_dim, input_length, dec_units): super (). init () self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.dec_units = dec_units self.input_length = input_length self.attention = Attention ()

  def build(self,input_shape):
    self.embedding = Embedding(input_dim=self.vocab_size,output_dim = self.embedding_dim,input_shape = input_shape,
                               mask_zero = True, name = "embedding_layer_decoder")
    self.lstm = LSTM(self.dec_units,return_sequences=True,return_state=True,name = "Decoder_LSTM")
  
  def call(self,target_sentances,enc_op,hidden_state,cell_state):
    target_embed = self.embedding(target_sentances)   
    for i in range(target_embed.shape[1]):
      context_vector = self.attention(enc_op,hidden_state)
      y = tf.concat([context_vector, target_embed[:,i,:]], axis=-1)
      y = tf.expand_dims(y, 1)
      lstm_output,hidden_state,_ = self.lstm(y,initial_state = [hidden_state,cell_state])
      return lstm_output

class Mymodel(Model):
  def __init__(self,encoder_inputs_length,decoder_inputs_length,output_vocab_size):
    super().__init__()
    self.encoder = Encoder(vocab_size = 500, embedding_dim = 50, input_length = encoder_inputs_length, enc_units=64)
    self.decoder = Decoder(vocab_size = 500, embedding_dim = 50, input_length = decoder_inputs_length, dec_units=64)
    self.dense = Dense(output_vocab_size,activation = "softmax")

  def call(self,data):
    input,output = data[0],data[1]
    print(input.shape,output.shape)
    encoder_output,encoder_h,encoder_c = self.encoder(input)
    print("="*20, "ENCODER", "="*20)
    print("-"*35)
    print(encoder_output)
    print("ENCODER ==> OUTPUT SHAPE",encoder_output.shape)
    print("ENCODER ==> HIDDEN STATE SHAPE",encoder_h.shape)
    print("ENCODER ==> CELL STATE SHAPE", encoder_c.shape)
    print("="*20,"Decoder","="*20)
    decoder_output = self.decoder(output,encoder_output,encoder_h,encoder_c)
    output1 = self.dense(decoder_output)
    print("-"*35)
    print("Final output shape",output.shape)
    print("="*50)
    return output1

model  = Mymodel(encoder_inputs_length=30,decoder_inputs_length=20,output_vocab_size=500)

ENCODER_SEQ_LEN = 30
DECODER_SEQ_LEN = 20

optimizer = tf.keras.optimizers.Adam()
model.compile(optimizer=optimizer,loss=tf.keras.losses.SparseCategoricalCrossentropy())
for (batch, (inp, targ)) in enumerate(dataset.take(steps_per_epoch)):
  model.fit([inp, targ], targ, steps_per_epoch=1)

Форма моего ввода и цель - (64, 55) (64, 53) 64 - размер пакета

...