Я пишу код для модели глубокого обучения, он содержит два класса DQN сети и агент. Я хочу визуализировать веса и смещения в каждом слое и посмотреть, изменится ли он по мере обучения сети. Он содержит две сети: одну для прогнозирования Q-значений текущего состояния (сеть q_eval) и сеть для прогнозирования целевых Q-значений (сеть q_next). Я хочу визуализировать весовые коэффициенты для каждой из этих сетей. Также может быть ошибка в сетевой архитектуре, если вы можете мне в этом помочь, это также было бы здорово.
Я пробовал tf.get_collection (tf.GraphKeys.TRAINABLE_VARIABLES ()), но он не работает должным образом, возможно, я делаю это неправильно.
класс DeepQNetwork (объект):
def __init__(self, lr, n_actions, name, fc1_dims=1024,
#input_dims=(210, 160, 4),
input_dims=(3, 4), chkpt_dir="tmp/dqn"):
self.lr = lr
self.name = name
self.n_actions = n_actions
self.fc1_dims = fc1_dims
self.chkpt_dir = chkpt_dir
self.input_dims = input_dims
self.sess = tf.Session()
self.build_network()
self.sess.run(tf.global_variables_initializer())
self.saver = tf.train.Saver()
self.checkpoint_file = os.path.join(chkpt_dir, "deepqnet.ckpt")
self.params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
scope=self.name)
self.write_op = tf.summary.merge([self.accuracy_sum, self.loss_sum, self.summ])
self.writer = tf.summary.FileWriter("tmp/log_dir")
self.writer.add_graph(self.sess.graph)
# The list of values in the collection with the given name
# or an empty list if no value has been added to that collection.
# trainable variables are the whose values are updated while performing optimisation.
def build_network(self):
with tf.variable_scope(self.name):
self.input = tf.placeholder(tf.float32, shape=[None, *self.input_dims],
name='inputs')
# * here indicates that the function can take multiple inputs as arguments into the function.
self.actions = tf.placeholder(tf.float32, shape=[None, self.n_actions],
name='action_taken')
self.q_target = tf.placeholder(tf.float32, shape=[None, self.n_actions],
name='q_target')
# 1st dimension inside shape is set to None because we want to pass
# batches of stacked frame into the neural network.
conv1 = tf.layers.conv2d(inputs=self.input, filters=32,
kernel_size=(8, 8), strides=4, name='conv1',
kernel_initializer=tf.contrib.layers.variance_scaling_initializer(factor=2))
conv1_activated = tf.nn.relu(conv1)
conv2 = tf.layers.conv2d(inputs=conv1_activated, filters=64,
kernel_size=(4, 4), strides=2, name='conv2',
kernel_initializer=tf.contrib.layers.variance_scaling_initializer(factor=2))
conv2_activated = tf.nn.relu(conv2)
conv3 = tf.layers.conv2d(inputs=conv2_activated, filters=128,
kernel_size=(3, 3), strides=1, name='conv3',
kernel_initializer=tf.contrib.layers.variance_scaling_initializer(factor=2))
conv3_activated = tf.nn.relu(conv3)
flat = tf.contrib.layers.flatten(conv3_activated)
dense1 = tf.layers.dense(flat, units=self.fc1_dims, activation=tf.nn.relu,
kernel_initializer=tf.contrib.layers.variance_scaling_initializer(factor=2))
self.Q_values = tf.layers.dense(dense1, units=self.n_actions,
kernel_initializer=tf.contrib.layers.variance_scaling_initializer(factor=2))
self.q = tf.reduce_sum(tf.multiply(self.Q_values, self.actions))
self.accuracy_sum = tf.summary.scalar('Accuracy', self.q)
self.loss = tf.reduce_mean(tf.square(self.q - self.q_target))
self.loss_sum = tf.summary.scalar("Loss", self.loss)
self.train_op = tf.train.AdamOptimizer(self.lr).minimize(self.loss)
for var in tf.trainable_variables():
print(var.name[:-2])
self.summ = tf.summary.histogram(var.name[:-2], var)
Агент класса (объект):
def init (self, alpha, gamma, mem_size, n_actions, epsilon, batch_size,
replace_target = 10000, input_dims = (210, 160, 4),
q_next_dir = "tmp / q_next", q_eval_dir = "tmp / q_eval"):
self.n_actions = n_actions
self.action_space = [i для i в диапазоне (self.n_actions)]
# для n_actions = 3, action_space представляет собой список [0, 1, 2]
self.gamma = гамма
self.mem_size = mem_size
self.mem_cntr = 0
self.epsilon = epsilon
self.batch_size = batch_size
self.replace_target = replace_target
self.q_next = DeepQNetwork(alpha, n_actions, input_dims=input_dims,
name='q_next', chkpt_dir=q_next_dir)
self.q_eval = DeepQNetwork(alpha, n_actions, input_dims=input_dims,
name='q_eval', chkpt_dir=q_eval_dir)
def learn(self):
if self.mem_cntr % self.replace_target == 0:
self.update_graph()
# we update the graph after every K steps, so that the q_target is not fluctuating.
max_mem = self.mem_cntr if self.mem_cntr < self.mem_size else self.mem_size
batch = np.random.choice(max_mem, self.batch_size)
# Batch is of the length equal to batch size with elements that are generated using np.arange(max_mem) (b).
state_batch = self.state_memory[batch]
# Shape of the state batch is equal to (batch_size, input_dims)
# ex: (32, 180, 160, 4)
action_batch = self.action_memory[batch]
action_values = np.array([0, 1, 2], dtype=np.int8)
action_indices = np.dot(action_batch, action_values)
reward_batch = self.reward_memory[batch]
new_state_batch = self.new_state_memory[batch]
terminal_batch = self.terminal_memory[batch]
q_eval = self.q_eval.sess.run(self.q_eval.Q_values,
feed_dict={self.q_eval.input: state_batch})
# It has shape (batch_size, n_actions).
# This gives Q values for each action, in this case 3 actions, using q_eval network for current state batch.
q_next = self.q_next.sess.run(self.q_next.Q_values,
feed_dict={self.q_next.input: new_state_batch})
# This gives Q values for the next state using the q_next network.
q_target = q_eval.copy()
idx = np.arange(self.batch_size)
q_target[idx, action_indices] = reward_batch + \
self.gamma*np.max(q_next, axis=1)*terminal_batch
# axis= 1 means along each row we calculate the maximum value, where rows are the actions.
#q_target = np.zeros(self.batch_size)
#q_target = reward_batch + self.gamma*np.max(q_next, axis =1)*terminal_batch
_ = self.q_eval.sess.run(self.q_eval.train_op,
feed_dict={self.q_eval.input: state_batch,
self.q_eval.actions: action_batch,
self.q_eval.q_target: q_target})
loss = self.q_eval.sess.run(self.q_eval.loss,
feed_dict={self.q_eval.input: state_batch,
self.q_eval.actions: action_batch,
self.q_eval.q_target: q_target})
summary = self.q_eval.sess.run(self.q_eval.write_op,
feed_dict={self.q_eval.input: state_batch,
self.q_eval.actions: action_batch,
self.q_eval.q_target: q_target,
self.q_next.input: new_state_batch})
self.q_eval.writer.add_summary(summary, time.time())
self.q_eval.writer.flush()
Когда я запускаю этот код, я могу визуализировать смещение только одного слоя, то есть смещения для сети q_eval для плотного слоя.