Модель классификации обучающего изображения передачи Pytorch не сходится - PullRequest
1 голос
/ 04 февраля 2020

Я не могу понять, правильно ли я рассчитываю потери, но мой training_loss не сходится; Я использую набор данных муравьев и пчел с сайта Pytorch:

https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html (просто прокрутите немного вниз)

root_dir = "main_dir"

image_transforms = transforms.Compose([
         transforms.Resize((224,224)),
         transforms.ToTensor(),
         transforms.Normalize(
             mean=[0.485, 0.456, 0.406],
             std=[0.229, 0.224, 0.225]
         )])


# First we create train_data and val_data
train = ImageFolder(root = os.path.join(root_dir, 'train'), 
                         transform=image_transforms)
#valid data
valid = ImageFolder(root = os.path.join(root_dir, 'val'), 
                       transform=image_transforms)


train_dataloader = DataLoader(train, batch_size=4,
                        shuffle=True)

valid_dataloader = DataLoader(valid, batch_size=4,
                        shuffle=True)


optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)
criterion = nn.CrossEntropyLoss() #loss function we want to use


def train_model(optimizer, criterion, model, epochs, train_loader, valid_loader):

    for epoch in range(epochs):
            print('Epoch {}/{}'.format(epoch, epochs - 1))
            print('-' * 10)
            train_loss = 0.0
            model.train()
            for inputs, labels in train_loader:
                inputs = inputs.to(device)
                labels = labels.to(device)

               #allow backprop
                with torch.set_grad_enabled(True):
                    optimizer.zero_grad()
                    outputs = model(inputs)
                    loss = criterion(outputs, labels) #uses softmax -> cross entropy             

                    loss.backward()  # calculate the gradients
                    optimizer.step() # apply gradients to parameters                        

                    # statistics
                    train_loss += loss.item() * inputs.size(0) # total loss instead of average loss

            print('total training loss {:.3f}'.format(train_loss))
            print('training_loss per sample {:.5f}'.format(train_loss/len(train_loader.dataset)))

            model.eval()
            valid_corrects = 0.0
            total_valid_loss = 0.0
            for inputs, labels in valid_loader:

                inputs = inputs.to(device)
                labels = labels.to(device)
               #disallow backprop
                with torch.set_grad_enabled(False):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    _, preds = torch.max(F.softmax(outputs), dim = 1) #values and indicies


                    total_valid_loss += loss.item() * inputs.size(0) # total loss instead of average loss
                    valid_corrects += torch.sum(preds == labels.data)



            valid_loss_per_sample = total_valid_loss/len(valid_loader.dataset)
            accuracy = valid_corrects.item()/len(valid_loader.dataset)


            print('total valid loss {:.3f}'.format(total_valid_loss))
            print('valid_loss per sample {:.3f}'.format(valid_loss_per_sample))
            print('total accuracy {:.3f}'.format(accuracy))


if __name__ == "__main__":
    epochs = 25
    train_model(optimizer, criterion, model, epochs, train_dataloader, valid_dataloader)

Вот как ведет себя моя потеря в течение 24 эпох (как вы можете видеть, это не сходится. Я пытался изменить скорость обучения, но это не очень помогает)

Epoch 5/24
----------
total training loss 35.477
training_loss per sample 0.14540
total valid loss 46.130
valid_loss per sample 0.302
total accuracy 0.850


Epoch 6/24
----------
total training loss 57.476
training_loss per sample 0.23556
total valid loss 55.434
valid_loss per sample 0.362
total accuracy 0.837


Epoch 7/24
----------
total training loss 66.397
training_loss per sample 0.27212
total valid loss 41.781
valid_loss per sample 0.273
total accuracy 0.869
Epoch 8/24
----------
total training loss 61.362
training_loss per sample 0.25148
total valid loss 45.502


Epoch 21/24
----------
total training loss 47.409
training_loss per sample 0.19430
total valid loss 50.288
valid_loss per sample 0.329
total accuracy 0.837
Epoch 22/24
----------
total training loss 35.873
training_loss per sample 0.14702
total valid loss 49.597
valid_loss per sample 0.324
total accuracy 0.863
Epoch 23/24
----------
total training loss 30.655
training_loss per sample 0.12563
total valid loss 44.393
valid_loss per sample 0.290
total accuracy 0.869
Epoch 24/24
----------
total training loss 54.373
training_loss per sample 0.22284
total valid loss 60.207
valid_loss per sample 0.394
total accuracy 0.850
...