Как визуализировать веса для каждого слоя в тензорной доске? - PullRequest
0 голосов
/ 02 июля 2019

Я пишу код для модели глубокого обучения, он содержит два класса DQN сети и агент. Я хочу визуализировать веса и смещения в каждом слое и посмотреть, изменится ли он по мере обучения сети. Он содержит две сети: одну для прогнозирования Q-значений текущего состояния (сеть q_eval) и сеть для прогнозирования целевых Q-значений (сеть q_next). Я хочу визуализировать весовые коэффициенты для каждой из этих сетей. Также может быть ошибка в сетевой архитектуре, если вы можете мне в этом помочь, это также было бы здорово.

Я пробовал tf.get_collection (tf.GraphKeys.TRAINABLE_VARIABLES ()), но он не работает должным образом, возможно, я делаю это неправильно.

класс DeepQNetwork (объект):

def __init__(self, lr, n_actions, name, fc1_dims=1024,
             #input_dims=(210, 160, 4),
             input_dims=(3, 4), chkpt_dir="tmp/dqn"):
    self.lr = lr
    self.name = name
    self.n_actions = n_actions
    self.fc1_dims = fc1_dims
    self.chkpt_dir = chkpt_dir
    self.input_dims = input_dims
    self.sess = tf.Session()
    self.build_network()
    self.sess.run(tf.global_variables_initializer())
    self.saver = tf.train.Saver()
    self.checkpoint_file = os.path.join(chkpt_dir, "deepqnet.ckpt")
    self.params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                    scope=self.name)
    self.write_op = tf.summary.merge([self.accuracy_sum, self.loss_sum, self.summ])
    self.writer = tf.summary.FileWriter("tmp/log_dir")
    self.writer.add_graph(self.sess.graph)


    # The list of values in the collection with the given name
    # or an empty list if no value has been added to that collection.
    # trainable variables are the whose values are updated while performing optimisation.

def build_network(self):
    with tf.variable_scope(self.name):
        self.input = tf.placeholder(tf.float32, shape=[None, *self.input_dims],
                                    name='inputs')
        # * here indicates that the function can take multiple inputs as arguments into the function.
        self.actions = tf.placeholder(tf.float32, shape=[None, self.n_actions],
                                     name='action_taken')
        self.q_target = tf.placeholder(tf.float32, shape=[None, self.n_actions],
                                       name='q_target')

        # 1st dimension inside shape is set to None because we want to pass
        # batches of stacked frame into the neural network.

        conv1 = tf.layers.conv2d(inputs=self.input, filters=32,
                                 kernel_size=(8, 8), strides=4, name='conv1',
                                 kernel_initializer=tf.contrib.layers.variance_scaling_initializer(factor=2))
        conv1_activated = tf.nn.relu(conv1)


        conv2 = tf.layers.conv2d(inputs=conv1_activated, filters=64,
                                 kernel_size=(4, 4), strides=2, name='conv2',
                                 kernel_initializer=tf.contrib.layers.variance_scaling_initializer(factor=2))
        conv2_activated = tf.nn.relu(conv2)


        conv3 = tf.layers.conv2d(inputs=conv2_activated, filters=128,
                                 kernel_size=(3, 3), strides=1, name='conv3',
                                 kernel_initializer=tf.contrib.layers.variance_scaling_initializer(factor=2))
        conv3_activated = tf.nn.relu(conv3)

        flat = tf.contrib.layers.flatten(conv3_activated)

        dense1 = tf.layers.dense(flat, units=self.fc1_dims, activation=tf.nn.relu,
                                 kernel_initializer=tf.contrib.layers.variance_scaling_initializer(factor=2))

        self.Q_values = tf.layers.dense(dense1, units=self.n_actions,
                                        kernel_initializer=tf.contrib.layers.variance_scaling_initializer(factor=2))


        self.q = tf.reduce_sum(tf.multiply(self.Q_values, self.actions))
        self.accuracy_sum = tf.summary.scalar('Accuracy', self.q)

        self.loss = tf.reduce_mean(tf.square(self.q - self.q_target))
        self.loss_sum = tf.summary.scalar("Loss", self.loss)

        self.train_op = tf.train.AdamOptimizer(self.lr).minimize(self.loss)

        for var in tf.trainable_variables():
            print(var.name[:-2])
            self.summ = tf.summary.histogram(var.name[:-2], var)

Агент класса (объект): def init (self, alpha, gamma, mem_size, n_actions, epsilon, batch_size, replace_target = 10000, input_dims = (210, 160, 4), q_next_dir = "tmp / q_next", q_eval_dir = "tmp / q_eval"): self.n_actions = n_actions self.action_space = [i для i в диапазоне (self.n_actions)] # для n_actions = 3, action_space представляет собой список [0, 1, 2] self.gamma = гамма self.mem_size = mem_size self.mem_cntr = 0 self.epsilon = epsilon self.batch_size = batch_size self.replace_target = replace_target

    self.q_next = DeepQNetwork(alpha, n_actions, input_dims=input_dims,
                               name='q_next', chkpt_dir=q_next_dir)
    self.q_eval = DeepQNetwork(alpha, n_actions, input_dims=input_dims,
                               name='q_eval', chkpt_dir=q_eval_dir)

def learn(self):
    if self.mem_cntr % self.replace_target == 0:
        self.update_graph()
    # we update the graph after every K steps, so that the q_target is not fluctuating.

    max_mem = self.mem_cntr if self.mem_cntr < self.mem_size else self.mem_size

    batch = np.random.choice(max_mem, self.batch_size)
    # Batch is of the length equal to batch size with elements that are generated using np.arange(max_mem) (b).

    state_batch = self.state_memory[batch]
    # Shape of the state batch is equal to (batch_size, input_dims)
    # ex: (32, 180, 160, 4)

    action_batch = self.action_memory[batch]
    action_values = np.array([0, 1, 2], dtype=np.int8)
    action_indices = np.dot(action_batch, action_values)
    reward_batch = self.reward_memory[batch]
    new_state_batch = self.new_state_memory[batch]
    terminal_batch = self.terminal_memory[batch]

    q_eval = self.q_eval.sess.run(self.q_eval.Q_values,
                                  feed_dict={self.q_eval.input: state_batch})
    # It has shape (batch_size, n_actions).
    # This gives Q values for each action, in this case 3 actions, using q_eval network for current state batch.

    q_next = self.q_next.sess.run(self.q_next.Q_values,
                                  feed_dict={self.q_next.input: new_state_batch})
    # This gives Q values for the next state using the q_next network.

    q_target = q_eval.copy()
    idx = np.arange(self.batch_size)
    q_target[idx, action_indices] = reward_batch + \
        self.gamma*np.max(q_next, axis=1)*terminal_batch
    # axis= 1 means along each row we calculate the maximum value, where rows are the actions.

    #q_target = np.zeros(self.batch_size)
    #q_target = reward_batch + self.gamma*np.max(q_next, axis =1)*terminal_batch

    _ = self.q_eval.sess.run(self.q_eval.train_op,
                             feed_dict={self.q_eval.input: state_batch,
                                        self.q_eval.actions: action_batch,
                                        self.q_eval.q_target: q_target})

    loss = self.q_eval.sess.run(self.q_eval.loss,
                                feed_dict={self.q_eval.input: state_batch,
                                           self.q_eval.actions: action_batch,
                                           self.q_eval.q_target: q_target})



    summary = self.q_eval.sess.run(self.q_eval.write_op,
                                   feed_dict={self.q_eval.input: state_batch,
                                              self.q_eval.actions: action_batch,
                                              self.q_eval.q_target: q_target,
                                              self.q_next.input: new_state_batch})

    self.q_eval.writer.add_summary(summary, time.time())
    self.q_eval.writer.flush()

Когда я запускаю этот код, я могу визуализировать смещение только одного слоя, то есть смещения для сети q_eval для плотного слоя.

...