Question

У меня есть простой класс Hook, который довольно хорошо работает с прямым проходом, но испытывает трудности при обратном проходе.

Перехватчики успешно зарегистрированы на каждом слое моей глубинной модели, но не записывают градиенты, хотя вперед работает нормально.

Модель была запущена на 5 итераций, и проблема с обратным проходом видна ниже. Обратные крючки для слоя 0,1,2 пусты, чего не должно было быть. Для каждой итерации мой код возвращает среднее значение дисперсии, а не полный тензор.

Forward Hook of layer 0
Mean: [-1.511833667755127, -1.6455317735671997, -2.1622400283813477, -2.1323182582855225, -1.8739168643951416], Std: [2.0011091232299805, 1.855371117591858, 2.315523624420166, 1.3520175218582153, 2.242307424545288]

 Forward Hook of layer 1
Mean: [0.11147132515907288, -0.08157677203416824, -0.010751131922006607, -0.0921892449259758, -0.04136423021554947], Std: [0.20785003900527954, 0.24797917902469635, 0.08388562500476837, 0.0, 0.17534032464027405]

 Forward Hook of layer 2
Mean: [-0.17542554438114166, 0.06612542271614075, -0.0477512925863266, 0.03804510459303856, 0.0076016997918486595], Std: [0.13048705458641052, 0.09858430176973343, 0.0582481250166893, 0.02330612950026989, 0.07083183526992798]

 Forward Hook of layer 3
Mean: [0.41979557275772095, -0.5407723188400269, -3.3170995712280273, -9.282533645629883, -19.208200454711914], Std: [0.6125074028968811, 0.375322163105011, 1.5524002313613892, 3.9379470348358154, 4.83458948135376]

 Backward Hook of layer 0
Mean: [], Std: [] # Why is this empty??

 Backward Hook of layer 1
Mean: [], Std: [] # Why is this empty??

 Backward Hook of layer 2
Mean: [], Std: [] # Why is this empty??

 Backward Hook of layer 3
Mean: [5.980815887451172, 5.21722936630249, 6.572475910186768, 5.469051837921143, 4.7423529624938965], Std: [3.251680850982666, 1.677825927734375, 2.518986225128174, 2.810399055480957, 2.7709243297576904]

Кто-нибудь может сообщить мне, почему градиенты пусты для слоев 0,1 и 2?

----------- Мой полный код ниже

import torch
import torch.nn as nn

# creat a dummy deep net
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()

        self.conv1 = nn.Conv2d(1,1, kernel_size=3, stride=1, padding=1, bias=True)
        self.conv2 = nn.Conv2d(1,1, kernel_size=3, stride=1, padding=1, bias=True)
        self.conv3 = nn.Conv2d(1,1, kernel_size=3, stride=1, padding=1, bias=True)
        self.seq = nn.Sequential(
                    nn.Conv2d(1,2, kernel_size=3, stride=1, padding=1, bias=True),
                    nn.LeakyReLU(negative_slope=0, inplace=True),
                    nn.Conv2d(2,1, kernel_size=3, stride=1, padding=1, bias=True),
                    )
        self.relu = nn.LeakyReLU(negative_slope=0, inplace=True)

    def forward(self, x):
        print('conv1')
        out = self.conv1(x)
        print('relu')
        out = self.relu(out)
        print('conv2')
        out =self.conv2(out)
        print('conv3')
        out = self.conv3(out)
        print('ResBlock Identity addition')
        out = out + x
        print('sequential')
        out = self.seq(x)

        return out

net = Net()
print(net)
criterion = nn.MSELoss()


# ------------------The Hook class begins to calculate each layer stats
class Hook():
    def __init__(self, module, backward=False):
        if backward==False:
            self.hook = module.register_forward_hook(self.hook_fn)
        else:
            self.hook = module.register_backward_hook(self.hook_fn)

        self.Mean = []
        self.Std = []
        self.backward = backward

    def hook_fn(self, module, input, output):
        #self.inputMean.append(input[0][0,...].mean().item())
        print('Hook is called on {}'.format(module))
        #calculate only for 1st image in the batch        
        if self.backward:
            self.Mean.append(output[0][0,...].mean().item())
            self.Std.append(output[0][0,...].std().item())

        else:
            self.Mean.append(output[0,...].mean().item())
            self.Std.append(output[0,...].std().item())


# create hooks on each layer
hookF = []
hookB = []
for i,layer in enumerate(list(net.children())):
    if not isinstance(layer,nn.ReLU) and not isinstance(layer,nn.LeakyReLU):
        print('Hooked to {}'.format(layer))
        hookF.append(Hook(layer))
        hookB.append(Hook(layer,backward=True))

optimizer = torch.optim.Adam(net.parameters(),lr=0.1)

for _ in range(5):
    print('Iteration --------')
    data = torch.rand(2,1,3,3)*10
    print('Input mean is {}'.format(data[0,...].mean()))
    target = data.clone()
    print('Forward xxxxxx')
    out = net(data)
    print('backward xxxxxx')
    out.backward(target,retain_graph=True)

    #loss = criterion(out, target)
    #loss.backward()

    optimizer.step()
    optimizer.zero_grad()

for i,h in enumerate(hookF):

    print('\n Forward Hook of layer {}'.format(i))
    print('Mean: {}, Std: {}'.format(h.Mean, h.Std))
    h.hook.remove()

for i,h in enumerate(hookB):
    print('\n Backward Hook of layer {}'.format(i))
    print('Mean: {}, Std: {}'.format(h.Mean, h.Std))
    h.hook.remove()

Спасибо

Сложность обучения с обратным вызовом register_backward_hook в PyTorch

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

Ответы [ 0 ]

Сложность обучения с обратным вызовом register_backward_hook в PyTorch

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

Ответы [ 0 ]

Похожие темы