Невозможно сохранить контрольную точку модели после загрузки - PullRequest
0 голосов
/ 31 марта 2019

Я новичок в tenorflow. Я пытаюсь сохранить модель после каждой эпохи (контрольной точки), но я сталкиваюсь с ошибкой, после загрузки модели из предыдущей контрольной точки (она загружается успешно) и пытаюсь восстановить еев другом каталоге (при сохранении возникает ошибка).Вот ошибка

FailedPreconditionError: Ошибка при чтении переменной ресурса forw._lstm_2 / rnn / lstm_cell / kernel_1 / Adadelta_1 из контейнера: localhost.Это может означать, что переменная была неинициализирована.Не найдено: Ресурс localhost / forw._lstm_2 / rnn / lstm_cell / kernel_1 / Adadelta_1 / N10tensorflow3VarE не существует.

[[{{{узел forw._lstm_2 / rnn / lstm_cell / kernel_1 / Adadelta_ar_ Readable_1 Readable Read_ar)}]]

[[{{node forw._lstm_2 / rnn / lstm_cell / kernel_1 / Adadelta / Read / ReadVariableOp}}]]

Вот код

epoch_step = 0
    global_step = 0
    epoch_loss_sum = 0.0
    global_start = time.time()
    saver = tf.train.import_meta_graph('./drive/My Drive/DLSRL/Model/03-27-09-15_epoch_39.ckpt.meta')
    g = tf.get_default_graph()
    with g.as_default():


      model = Model(config, embeddings, label_dict.size(), g)
      sess = tf.Session(graph=g, config=tf.ConfigProto(allow_soft_placement=True,
                                                       log_device_placement=False))
      saver.restore(sess, tf.train.latest_checkpoint('./drive/My Drive/DLSRL/Model/'))
      #sess.run(tf.global_variables_initializer())
      ckpt_saver = tf.train.Saver(max_to_keep=config.max_epochs)
      for epoch in range(39,config.max_epochs):
          # save chckpoint from which to load model
          path = runs_dir / "{}_epoch_{}.ckpt".format(time_of_init, epoch)
          ckpt_saver.save(sess, str(path))
          print('Saved checkpoint.')
          evaluate(dev_data, model, sess, epoch, global_step)
          x1, x2, y = shuffle_stack_pad(train_data, config.train_batch_size)
          epoch_start = time.time()
          for x1_b, x2_b, y_b in get_batches(x1, x2, y, config.train_batch_size):
              feed_dict = make_feed_dict(x1_b, x2_b, y_b, model, config.keep_prob)
              if epoch_step % LOSS_INTERVAL == 0:
                  # tensorboard
                  run_options = tf.RunOptions(trace_level=tf.RunOptions.NO_TRACE)
                  scalar_summaries = sess.run(model.scalar_summaries,
                                     feed_dict=feed_dict,
                                     options=run_options)
                  model.train_writer.add_summary(scalar_summaries, global_step)
                  # print info
                  print("step {:>6} epoch {:>3}: loss={:1.3f}, epoch sec={:3.0f}, total hrs={:.1f}".format(
                      epoch_step,
                      epoch,
                      epoch_loss_sum / max(epoch_step, 1),
                      (time.time() - epoch_start),
                      (time.time() - global_start) / 3600))
              loss, _ = sess.run([model.nonzero_mean_loss, model.update], feed_dict=feed_dict)

              epoch_loss_sum+= loss
              epoch_step += 1
              global_step += 1
          epoch_step = 0
          epoch_loss_sum = 0.0

Код модели

class Model():
    def __init__(self, config, embeddings, num_labels, g):

        # embedding
        #with tf.device('/cpu:0'):


        # stacked bilstm
        with tf.device('/gpu:0'):
            self.word_ids = tf.placeholder(tf.int32, [None, None], name='word_ids')
            embedded = tf.nn.embedding_lookup(embeddings, self.word_ids, name='embedded')

            self.predicate_ids = tf.placeholder(tf.float32, [None, None], name='predicate_ids')
            self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
            self.lengths = tf.placeholder(tf.int32, [None], name='lengths')
            inputs = tf.concat([embedded, tf.expand_dims(self.predicate_ids, -1)], axis=2, name='lstm_inputs')          

            final_outputs = bilstms_interleaved(inputs,
                                                config.num_layers,
                                                config.cell_size,
                                                self.keep_prob,
                                                    self.lengths)


            # projection
            shape0 = tf.shape(final_outputs)[0] * tf.shape(final_outputs)[1]  # both batch_size and seq_len are dynamic
            final_outputs_2d = tf.reshape(final_outputs, [shape0, config.cell_size], name='final_outputs_2d')
            wy = tf.get_variable('Wy', [config.cell_size, num_labels])
            by = tf.get_variable('by', [num_labels])
            logits = tf.nn.xw_plus_b(final_outputs_2d, wy, by, name='logits')  # need [shape0, num_labels]

            # loss
            self.label_ids = tf.placeholder(tf.int32, [None, None], name='label_ids')  # [batch_size, max_seq_len]
            label_ids_flat = tf.reshape(self.label_ids, [-1])  # need [shape0]
            mask = tf.greater(label_ids_flat, 0, 'mask')
            self.nonzero_label_ids_flat = tf.boolean_mask(label_ids_flat, mask,
                                                     name='nonzero_label_ids_flat')  # removes elements
            nonzero_logits = tf.boolean_mask(logits, mask, name='nonzero_logits')
            nonzero_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=nonzero_logits,
                                                                            labels=self.nonzero_label_ids_flat,
                                                                            name='nonzero_losses')
            self.nonzero_mean_loss = tf.reduce_mean(nonzero_losses, name='nonzero_mean_loss')

            # update
            optimizer = tf.train.AdadeltaOptimizer(learning_rate=config.learning_rate, rho=0.95,epsilon=config.epsilon)
            gradients, variables = zip(*optimizer.compute_gradients(self.nonzero_mean_loss))
            gradients, _ = tf.clip_by_global_norm(gradients, config.max_grad_norm)
            self.update = optimizer.apply_gradients(zip(gradients, variables), name='update')

            # predictions
            self.nonzero_predicted_label_ids = tf.cast(tf.argmax(tf.nn.softmax(nonzero_logits), axis=1), tf.int32,
                                                  name='nonzero_predicted_label_ids')

            # tensorboard
            tf.summary.scalar('nonzero_accuracy', tf.reduce_mean(tf.cast(tf.equal(self.nonzero_predicted_label_ids,
                                                                                  self.nonzero_label_ids_flat),
                                                                         tf.float32)))
            tf.summary.scalar('nonzero_mean_xe', self.nonzero_mean_loss)
            self.scalar_summaries = tf.summary.merge_all()
            p = Path("/content/drive/My Drive/DLSRL/Tensorboard log") 
            self.train_writer = tf.summary.FileWriter(str(p), g)

            # confusion matrix
            nonzero_cm = tf.confusion_matrix(self.nonzero_label_ids_flat, self.nonzero_predicted_label_ids)
            size = tf.shape(nonzero_cm)[0]
            self.cm_summary = tf.summary.image('nonzero_cm', tf.reshape(tf.cast(nonzero_cm, tf.float32),
                                                                              [1, size, size, 1]))  # needs 4d


Можете ли вы предложить исправления?

...