Попытка реализовать cnn-lstm в seq2seq в тензорном потоке - PullRequest
0 голосов
/ 24 декабря 2018

Я пытался реализовать cnn-lstm для суммирования текста, но обучение останавливается на потере 30 и никогда не уменьшается

Я использую вложение слова, а затем вводим вывод cnn в lstm вСлой кодирования

После обучения более 60 тыс. шагов потеря не улучшилась более чем на 30 потерь. Мне интересно, в чем проблема для этого

def weights_init(shape):
   return tf.Variable(tf.truncated_normal(shape=shape, stddev=0.05))

def bias_init(shape):
   return tf.Variable(tf.zeros(shape=shape))

def embeding_layer(word_embedings, inputs , forward_only , glove):

  embed = tf.nn.embedding_lookup(word_embedings, inputs)
  embed_expended = tf.expand_dims(embed, -1) #expend dims to 4d for conv layer
  return embed_expended

def text_conv(input, filter_size, number_of_channels, number_of_filters, strides=(1, 1), activation=tf.nn.relu, max_pool=True):

  weights = weights_init([filter_size, filter_size, number_of_channels, number_of_filters])
  bias = bias_init([number_of_filters])

  layer = tf.nn.conv2d(input, filter=weights, strides=[1, strides[0], strides[1], 1], padding='SAME')

  if activation != None:
    layer = activation(layer)

  if max_pool:
    layer = tf.nn.max_pool(layer, ksize=[1, 2, 2 ,1], strides=[1, 2, 2, 1], padding='SAME')

  return layer


def flatten(layer, batch_size, seq_len):

     dims = layer.get_shape()
     number_of_elements = dims[2:].num_elements()

     reshaped_layer = tf.reshape(layer, [batch_size, int(seq_len/2), number_of_elements])
     return reshaped_layer, number_of_elements





with tf.name_scope("encoder"):
        self.embeddings = tf.get_variable("embeddings", initializer=init_embeddings)

        fw_cells = [self.cell(self.num_hidden) for _ in range(self.num_layers)]
        bw_cells = [self.cell(self.num_hidden) for _ in range(self.num_layers)]
        fw_cells = [rnn.DropoutWrapper(cell) for cell in fw_cells]
        bw_cells = [rnn.DropoutWrapper(cell) for cell in bw_cells]

        #convolutional part
        embed = embeding_layer(self.embeddings, self.X , forward_only , args.glove)
        self.conv_filter_size=5
        self.conv_filters=32
        convolutional_part = text_conv(embed, self.conv_filter_size, 1, self.conv_filters)
        conv_flatten, num_elements = flatten(convolutional_part, self.batch_size, article_max_len)

        #encoder_outputs, encoder_state_fw, encoder_state_bw = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(
        #    fw_cells, bw_cells, self.encoder_emb_inp,
        #    sequence_length=self.X_len, time_major=True, dtype=tf.float32)
        self.encoder_emb_inp = tf.transpose(conv_flatten, perm=[1, 0, 2])
        encoder_outputs, encoder_state_fw, encoder_state_bw = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(
            fw_cells, bw_cells, self.encoder_emb_inp,
            sequence_length=self.X_len, time_major=True, dtype=tf.float32)
...