Почему результат всегда один и тот же после того, как я обучил свою сеть RNN задаче классификации? - PullRequest
0 голосов
/ 11 июля 2020

Нейронная сеть состоит из одного слоя LSTM, 3 полностью связанных слоев, где relu является функцией активации, и выходного слоя с функцией активации сигмоида. Форма моих входных данных (batch_size, time_step, hidden_units), входные данные для сети разнообразны, а выходные данные почти всегда одинаковы (с небольшими различиями). Не знаю, в чем проблема. Сеть LSTM выглядит следующим образом:

class RNN_eval(object):
    def __init__(
            self,
            cname,
            n_steps,
            input_size,
            learning_rate,
            full1_neurons,
            full2_neurons,
            full3_neurons,
            output_size
    ):

        self.name = cname

        self.n_steps = n_steps
        self.input_size = input_size
        self.lr = learning_rate
        self.full1_neurons = full1_neurons
        self.full2_neurons = full2_neurons
        self.full3_neurons = full3_neurons
        self.output_size = output_size

        self.w_initializer = tf.random_normal_initializer(0., 0.3)
        self.b_initializer = tf.constant_initializer(0.1)

        with tf.name_scope('eval_inputs'):
            self.s = tf.placeholder(tf.float32, [None, self.n_steps, self.input_size], name='input')
            self.q_target = tf.placeholder(tf.float32, [None, output_size], name='q_target')
            self.batch_size = tf.placeholder(tf.int32, [], name='batch_size')
        with tf.name_scope('eval_LSTM_cell'):
            self.add_cell()
        with tf.name_scope('eval_hidden_layers'):
            self.add_fullconnect_layer1()
            self.add_fullconnect_layer2()
            self.add_fullconnect_layer3()
            self.add_output_layer()

        with tf.variable_scope('loss'):
            self.loss = tf.reduce_mean(tf.squared_difference(self.q_target, self.q_eval))  # 基于Q估计与Q现实,构造loss-function
        with tf.variable_scope('train'):
            self._train_op = tf.train.AdamOptimizer(self.lr).minimize(self.loss)  # 进行训练

    def add_cell(self):

        with tf.name_scope('eval_lstm'):
            eval_lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=self.input_size, state_is_tuple=True, name='eval_lstm')
        with tf.name_scope('eval_initial_state'):
            self.cell_init_state = eval_lstm_cell.zero_state(self.batch_size, dtype=tf.float32)
        self.cell_outputs, self.cell_final_state = tf.nn.dynamic_rnn(
            eval_lstm_cell, self.s, dtype=tf.float32, time_major=False
        )

    def add_fullconnect_layer1(self):

        l_out_x = tf.unstack(tf.transpose(self.cell_outputs, [1, 0, 2]))
        w1 = tf.get_variable('w1', [self.input_size, self.full1_neurons], initializer=self.w_initializer, collections=self.name)
        b1 = tf.get_variable('b1', [self.full1_neurons, ], initializer=self.b_initializer, collections=self.name)
        with tf.name_scope('eval_full_connected1'):
            self.full_out1 = tf.nn.relu(tf.matmul(l_out_x[-1], w1) + b1)

    def add_fullconnect_layer2(self):

        w2 = tf.get_variable('w2', [self.full1_neurons, self.full2_neurons], initializer=self.w_initializer,collections=self.name)
        b2 = tf.get_variable('b2', [self.full2_neurons, ], initializer=self.b_initializer, collections=self.name)
        with tf.name_scope('eval_full_connected2'):
            self.full_out2 = tf.nn.relu(tf.matmul(self.full_out1, w2) + b2)

    def add_fullconnect_layer3(self):

        w3 = tf.get_variable('w3', [self.full2_neurons, self.full3_neurons], initializer=self.w_initializer, collections=self.name)
        b3 = tf.get_variable('b3', [self.full3_neurons, ], initializer=self.b_initializer, collections=self.name)
        with tf.name_scope('eval_full_connected3'):
            self.full_out3 = tf.nn.relu(tf.matmul(self.full_out2, w3) + b3)

    def add_output_layer(self):
        w_out = tf.get_variable('w_out', [self.full3_neurons, self.output_size], initializer=self.w_initializer, collections=self.name)
        b_out = tf.get_variable('b_out', [self.output_size, ], initializer=self.b_initializer, collections=self.name)
        with tf.name_scope('eval_output'):
            self.q_eval = tf.nn.sigmoid(tf.matmul(self.full_out3, w_out) + b_out, name="eval_op")

, а обучающая часть выглядит следующим образом: (q_target можно рассматривать как метку входных данных q_eval - результат прямого распространения)

if self.learn_step_counter == 0:

        feed_dic = {
            self.eval_net.s: batch_memory[:, :, :self.n_features],
            self.eval_net.q_target: q_target,
            self.eval_net.batch_size: 32,
            self.eval_net.q_eval: q_eval
        }

    else:

        feed_dic = {
            self.eval_net.s: batch_memory[:, :, :self.n_features],
            self.eval_net.q_target: q_target,
            self.eval_net.batch_size: 32,
            self.eval_net.q_eval: q_eval,
            self.eval_net.cell_init_state: self.last_state
        }

    _, self.cost, self.last_state = self.sess.run([self.eval_net._train_op, self.eval_net.loss, self.eval_net.cell_final_state],
                                 feed_dict=feed_dic)
    self.cost_his.append(self.cost)  # 反向训练
...