Как отладить неверный градиент по индексу 0? - PyTorch - PullRequest
0 голосов
/ 31 января 2020

Я пытаюсь обучить актера-критика c модели, но когда я добираюсь до бэкпроп за критику c, я получаю эту ошибку: RuntimeError: invalid gradient at index 0 - expected type torch.cuda.FloatTensor but got torch.FloatTensor Я не могу определить, к какому градиенту относится ошибка. Кто-нибудь может помочь?

Вот трассировка стека:

Traceback (most recent call last):
  File "train.py", line 338, in <module>
    main()
  File "train.py", line 327, in main
    reinforce_trainer.train(opt.start_reinforce, opt.start_reinforce + opt.critic_pretrain_epochs - 1, True, start_time)
  File "/home/fbommfim/init-tests/treeLSTM/lib/train/reinforce_trainer.py", line 56, in train
    train_reward, critic_loss = self.train_epoch(epoch, pretrain_critic, no_update)
  File "/home/fbommfim/init-tests/treeLSTM/lib/train/reinforce_trainer.py", line 153, in train_epoch
    critic_loss = self.critic.backward(baselines.cuda(), rewards, critic_weights.cuda(), num_words, self.critic_loss_func, regression=True)
  File "/home/fbommfim/init-tests/treeLSTM/lib/model/encoder_decoder/hybrid2seq_model.py", line 67, in backward
    outputs.backward(grad_output)
  File "/home/linuxbrew/.linuxbrew/Cellar/python/3.7.6_1/lib/python3.7/site-packages/torch/tensor.py", line 195, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph)
  File "/home/linuxbrew/.linuxbrew/Cellar/python/3.7.6_1/lib/python3.7/site-packages/torch/autograd/__init__.py", line 99, in backward
    allow_unreachable=True)  # allow_unreachable flag
RuntimeError: invalid gradient at index 0 - expected type torch.cuda.FloatTensor but got torch.FloatTensor

и соответствующий код: train_epoch из reinforce_trainer

def train_epoch(self, epoch, pretrain_critic, no_update):
        self.actor.train() # may also have self.critic.train() ?
        total_reward, report_reward = 0, 0
        total_critic_loss, report_critic_loss = 0, 0
        total_sents, report_sents = 0, 0
        total_words, report_words = 0, 0
        last_time = time.time()
        batch_count = len(self.train_data)
        batch_order = torch.randperm(batch_count)
        with tqdm(total = (batch_count)) as prog:
            for i in range(batch_count):
                batch = self.train_data[i] # batch_order[i]
                if self.opt.data_type == 'code':
                    targets = batch[2]
                    attention_mask = batch[1][2][0].data.eq(lib.Constants.PAD).t()
                elif self.opt.data_type == 'text':
                    targets = batch[2]
                    attention_mask = batch[0][0].data.eq(lib.Constants.PAD).t()
                elif self.opt.data_type == 'hybrid':
                    targets = batch[2]
                    attention_mask_code = batch[1][2][0].data.eq(lib.Constants.PAD).t()
                    attention_mask_txt = batch[0][0].data.eq(lib.Constants.PAD).t()

                batch_size = targets.size(1)

                self.actor.zero_grad()
                self.critic.zero_grad()

                # Sample translations
                if self.opt.has_attn:
                    if self.opt.data_type == 'code' or self.opt.data_type == 'text':
                        self.actor.decoder.attn.applyMask(attention_mask)
                    elif self.opt.data_type == 'hybrid':
                        self.actor.decoder.attn.applyMask(attention_mask_code, attention_mask_txt)
                samples, outputs = self.actor.sample(batch, self.max_length)

                # Calculate rewards
                rewards, samples = self.sent_reward_func(samples.t().tolist(), targets.data.t().tolist())
                reward = sum(rewards)

                # Perturb rewards (if specified).
                if self.pert_func is not None:
                    rewards = self.pert_func(rewards)

                samples = torch.LongTensor(samples).t().contiguous()
                rewards = torch.FloatTensor([rewards] * samples.size(0)).contiguous()
                if self.opt.cuda:
                    samples = samples.cuda()
                    rewards = rewards.cuda()

                # Update critic.
                critic_weights = samples.ne(lib.Constants.PAD).float()
                num_words = critic_weights.data.sum()
                if not no_update:
                    if self.opt.data_type == 'code':
                        baselines = self.critic((batch[0], batch[1], samples, batch[3]), eval=False, regression=True)
                    elif self.opt.data_type == 'text':
                        baselines = self.critic((batch[0], batch[1], samples, batch[3]), eval=False, regression=True)
                    elif self.opt.data_type == 'hybrid':
                        baselines = self.critic((batch[0], batch[1], samples, batch[3]), eval=False, regression=True)

                    critic_loss = self.critic.backward(baselines, rewards, critic_weights, num_words, self.critic_loss_func, regression=True)
                    self.critic_optim.step()
                else:
                    critic_loss = 0

                # Update actor
                if not pretrain_critic and not no_update:
                    # Subtract baseline from reward
                    norm_rewards = (rewards - baselines).data
                    actor_weights = norm_rewards * critic_weights
                    # TODO: can use PyTorch reinforce() here but that function is a black box.
                    # This is an alternative way where you specify an objective that gives the same gradient
                    # as the policy gradient's objective, which looks much like weighted log-likelihood.
                    actor_loss = self.actor.backward(outputs, samples, actor_weights, 1, self.actor_loss_func)
                    self.optim.step()
                else:
                    actor_loss = 0

                # Gather stats
                total_reward += reward
                report_reward += reward
                total_sents += batch_size
                report_sents += batch_size
                total_critic_loss += critic_loss
                report_critic_loss += critic_loss
                total_words += num_words
                report_words += num_words
                self.opt.iteration += 1
                print ("iteration: %s, loss: %s " % (self.opt.iteration, actor_loss))
                print ("iteration: %s, reward: %s " % (self.opt.iteration, (report_reward / report_sents) * 100))

                if i % self.opt.log_interval == 0 and i > 0:
                    print("""Epoch %3d, %6d/%d batches; actor reward: %.4f; critic loss: %f; %5.0f tokens/s; %s elapsed""" %
                          (epoch, i, batch_count, (report_reward / report_sents) * 100,
                          report_critic_loss / report_words,
                          report_words / (time.time() - last_time),
                          str(datetime.timedelta(seconds=int(time.time() - self.start_time)))))

                    report_reward = report_sents = report_critic_loss = report_words = 0
                    last_time = time.time()
                prog.update(1)

        return total_reward / total_sents, total_critic_loss / total_words

и backward для hybrid2seq_model.py:

def backward(self, outputs, targets, weights, normalizer, criterion, regression=False):
        grad_output, loss = self.generator.backward(outputs, targets, weights, normalizer, criterion, regression)
        outputs.cuda()
        grad_output.cuda()
        outputs.backward(grad_output)
        return loss

1 Ответ

1 голос
/ 31 января 2020

Ошибка говорит о том, что он ожидал тензора cuda и получил тензор, отличный от cuda, так что это то, что я искал.

Вызовы типа grad_output.cuda() возвращают тензор cuda. Это не операция на месте. Вы, вероятно, хотели grad_output = grad_output.cuda(), поэтому я бы начал с исправления таких звонков.

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...