pytorch, используя nn.DataParallel в LSTM - PullRequest
0 голосов
/ 15 марта 2020
/pytorch/aten/src/ATen/native/cudnn/RNN.cpp:1266: UserWarning: RNN module weights are not part of single contiguous chunk of memory. 
This means they need to be compacted at every call, possibly greatly increasing memory usage. To compact weights again call flatten_parameters().

Привет. Я использую Pytorch. Я пытаюсь использовать функцию DataParallel в Pytorch, но модель LSTM. Я предупрежден, чтобы снова сплющить модель, но я не знаю, когда и где сплющить. Можете ли вы дать мне знать?

Это моя модель

    import torch.nn as nn
from torchvision import models

class ConvLstm(nn.Module):
    def __init__(self, latent_dim, model, hidden_size, lstm_layers, bidirectional, n_class):
        super(ConvLstm, self).__init__()
        self.conv_model = Pretrained_conv(latent_dim, model)
        self.Lstm = Lstm(latent_dim, hidden_size, lstm_layers, bidirectional)
        self.output_layer = nn.Sequential(
            nn.Linear(2 * hidden_size if bidirectional ==
                    True else hidden_size, n_class),
            nn.Softmax(dim=-1)
        )

    def forward(self, x):
        batch_size, timesteps, channel_x, h_x, w_x = x.shape
        conv_input = x.view(batch_size * timesteps, channel_x, h_x, w_x)
        conv_output = self.conv_model(conv_input)
        lstm_input = conv_output.view(batch_size, timesteps, -1)
        lstm_output = self.Lstm(lstm_input)
        lstm_output = lstm_output[:, -1, :]
        output = self.output_layer(lstm_output)
        return output

class Pretrained_conv(nn.Module):
    def __init__(self, latent_dim, model):
        if model == 'resnet152':
            super(Pretrained_conv, self).__init__()
            self.conv_model = models.resnet152(pretrained=True)
            # ====== freezing all of the layers ======
            for param in self.conv_model.parameters():
                param.requires_grad = False
            # ====== changing the last FC layer to an output with the size we need. this layer is un freezed ======
            self.conv_model.fc = nn.Linear(
                self.conv_model.fc.in_features, latent_dim)

    def forward(self, x):
        return self.conv_model(x)


class Lstm(nn.Module):
    def __init__(self, latent_dim, hidden_size, lstm_layers, bidirectional):
        super(Lstm, self).__init__()
        self.Lstm = nn.LSTM(latent_dim, hidden_size=hidden_size,
                            num_layers=lstm_layers, batch_first=True, bidirectional=bidirectional)
        self.hidden_state = None

    def reset_hidden_state(self):
        self.hidden_state = None

    def forward(self, x):
        output, self.hidden_state = self.Lstm(x, self.hidden_state)
        return output

Введите LSTM и выполните следующий код.

def foward_step (модель, изображения, метки, критерий, mode = ''):

model.module.Lstm.reset_hidden_state()

if mode == 'test':

    with torch.no_grad():

        output = model(images)

else:

    output = model(images)

loss = criterion(output, labels)

# Accuracy calculation

predicted_labels = output.detach().argmax(dim=1)

acc = (predicted_labels == labels).cpu().numpy().sum()

return loss, acc, predicted_labels.cpu()

Это основной

model = nn.DataParallel (model, device_ids = [0,1,2,3]). cuda ()

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...