Я пытался реализовать cnn-lstm для суммирования текста, но обучение останавливается на потере 30 и никогда не уменьшается
Я использую вложение слова, а затем вводим вывод cnn в lstm вСлой кодирования
После обучения более 60 тыс. шагов потеря не улучшилась более чем на 30 потерь. Мне интересно, в чем проблема для этого
def weights_init(shape):
return tf.Variable(tf.truncated_normal(shape=shape, stddev=0.05))
def bias_init(shape):
return tf.Variable(tf.zeros(shape=shape))
def embeding_layer(word_embedings, inputs , forward_only , glove):
embed = tf.nn.embedding_lookup(word_embedings, inputs)
embed_expended = tf.expand_dims(embed, -1) #expend dims to 4d for conv layer
return embed_expended
def text_conv(input, filter_size, number_of_channels, number_of_filters, strides=(1, 1), activation=tf.nn.relu, max_pool=True):
weights = weights_init([filter_size, filter_size, number_of_channels, number_of_filters])
bias = bias_init([number_of_filters])
layer = tf.nn.conv2d(input, filter=weights, strides=[1, strides[0], strides[1], 1], padding='SAME')
if activation != None:
layer = activation(layer)
if max_pool:
layer = tf.nn.max_pool(layer, ksize=[1, 2, 2 ,1], strides=[1, 2, 2, 1], padding='SAME')
return layer
def flatten(layer, batch_size, seq_len):
dims = layer.get_shape()
number_of_elements = dims[2:].num_elements()
reshaped_layer = tf.reshape(layer, [batch_size, int(seq_len/2), number_of_elements])
return reshaped_layer, number_of_elements
with tf.name_scope("encoder"):
self.embeddings = tf.get_variable("embeddings", initializer=init_embeddings)
fw_cells = [self.cell(self.num_hidden) for _ in range(self.num_layers)]
bw_cells = [self.cell(self.num_hidden) for _ in range(self.num_layers)]
fw_cells = [rnn.DropoutWrapper(cell) for cell in fw_cells]
bw_cells = [rnn.DropoutWrapper(cell) for cell in bw_cells]
#convolutional part
embed = embeding_layer(self.embeddings, self.X , forward_only , args.glove)
self.conv_filter_size=5
self.conv_filters=32
convolutional_part = text_conv(embed, self.conv_filter_size, 1, self.conv_filters)
conv_flatten, num_elements = flatten(convolutional_part, self.batch_size, article_max_len)
#encoder_outputs, encoder_state_fw, encoder_state_bw = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(
# fw_cells, bw_cells, self.encoder_emb_inp,
# sequence_length=self.X_len, time_major=True, dtype=tf.float32)
self.encoder_emb_inp = tf.transpose(conv_flatten, perm=[1, 0, 2])
encoder_outputs, encoder_state_fw, encoder_state_bw = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(
fw_cells, bw_cells, self.encoder_emb_inp,
sequence_length=self.X_len, time_major=True, dtype=tf.float32)