Нейронная сеть состоит из одного слоя LSTM, 3 полностью связанных слоев, где relu является функцией активации, и выходного слоя с функцией активации сигмоида. Форма моих входных данных (batch_size, time_step, hidden_units), входные данные для сети разнообразны, а выходные данные почти всегда одинаковы (с небольшими различиями). Не знаю, в чем проблема. Сеть LSTM выглядит следующим образом:
class RNN_eval(object):
def __init__(
self,
cname,
n_steps,
input_size,
learning_rate,
full1_neurons,
full2_neurons,
full3_neurons,
output_size
):
self.name = cname
self.n_steps = n_steps
self.input_size = input_size
self.lr = learning_rate
self.full1_neurons = full1_neurons
self.full2_neurons = full2_neurons
self.full3_neurons = full3_neurons
self.output_size = output_size
self.w_initializer = tf.random_normal_initializer(0., 0.3)
self.b_initializer = tf.constant_initializer(0.1)
with tf.name_scope('eval_inputs'):
self.s = tf.placeholder(tf.float32, [None, self.n_steps, self.input_size], name='input')
self.q_target = tf.placeholder(tf.float32, [None, output_size], name='q_target')
self.batch_size = tf.placeholder(tf.int32, [], name='batch_size')
with tf.name_scope('eval_LSTM_cell'):
self.add_cell()
with tf.name_scope('eval_hidden_layers'):
self.add_fullconnect_layer1()
self.add_fullconnect_layer2()
self.add_fullconnect_layer3()
self.add_output_layer()
with tf.variable_scope('loss'):
self.loss = tf.reduce_mean(tf.squared_difference(self.q_target, self.q_eval)) # 基于Q估计与Q现实,构造loss-function
with tf.variable_scope('train'):
self._train_op = tf.train.AdamOptimizer(self.lr).minimize(self.loss) # 进行训练
def add_cell(self):
with tf.name_scope('eval_lstm'):
eval_lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=self.input_size, state_is_tuple=True, name='eval_lstm')
with tf.name_scope('eval_initial_state'):
self.cell_init_state = eval_lstm_cell.zero_state(self.batch_size, dtype=tf.float32)
self.cell_outputs, self.cell_final_state = tf.nn.dynamic_rnn(
eval_lstm_cell, self.s, dtype=tf.float32, time_major=False
)
def add_fullconnect_layer1(self):
l_out_x = tf.unstack(tf.transpose(self.cell_outputs, [1, 0, 2]))
w1 = tf.get_variable('w1', [self.input_size, self.full1_neurons], initializer=self.w_initializer, collections=self.name)
b1 = tf.get_variable('b1', [self.full1_neurons, ], initializer=self.b_initializer, collections=self.name)
with tf.name_scope('eval_full_connected1'):
self.full_out1 = tf.nn.relu(tf.matmul(l_out_x[-1], w1) + b1)
def add_fullconnect_layer2(self):
w2 = tf.get_variable('w2', [self.full1_neurons, self.full2_neurons], initializer=self.w_initializer,collections=self.name)
b2 = tf.get_variable('b2', [self.full2_neurons, ], initializer=self.b_initializer, collections=self.name)
with tf.name_scope('eval_full_connected2'):
self.full_out2 = tf.nn.relu(tf.matmul(self.full_out1, w2) + b2)
def add_fullconnect_layer3(self):
w3 = tf.get_variable('w3', [self.full2_neurons, self.full3_neurons], initializer=self.w_initializer, collections=self.name)
b3 = tf.get_variable('b3', [self.full3_neurons, ], initializer=self.b_initializer, collections=self.name)
with tf.name_scope('eval_full_connected3'):
self.full_out3 = tf.nn.relu(tf.matmul(self.full_out2, w3) + b3)
def add_output_layer(self):
w_out = tf.get_variable('w_out', [self.full3_neurons, self.output_size], initializer=self.w_initializer, collections=self.name)
b_out = tf.get_variable('b_out', [self.output_size, ], initializer=self.b_initializer, collections=self.name)
with tf.name_scope('eval_output'):
self.q_eval = tf.nn.sigmoid(tf.matmul(self.full_out3, w_out) + b_out, name="eval_op")
, а обучающая часть выглядит следующим образом: (q_target можно рассматривать как метку входных данных q_eval - результат прямого распространения)
if self.learn_step_counter == 0:
feed_dic = {
self.eval_net.s: batch_memory[:, :, :self.n_features],
self.eval_net.q_target: q_target,
self.eval_net.batch_size: 32,
self.eval_net.q_eval: q_eval
}
else:
feed_dic = {
self.eval_net.s: batch_memory[:, :, :self.n_features],
self.eval_net.q_target: q_target,
self.eval_net.batch_size: 32,
self.eval_net.q_eval: q_eval,
self.eval_net.cell_init_state: self.last_state
}
_, self.cost, self.last_state = self.sess.run([self.eval_net._train_op, self.eval_net.loss, self.eval_net.cell_final_state],
feed_dict=feed_dic)
self.cost_his.append(self.cost) # 反向训练