Цель 40 выходит за границы для nn.CrossEntropyLoss () - PullRequest
0 голосов
/ 02 мая 2020

Я создаю пользовательский набор данных изображения, например:

from torch.utils.data.dataset import Dataset
from PIL import Image
import torchvision
from torchvision import datasets, models, transforms
import numpy as np

class MyCustomDataset(Dataset):
    def __init__(self, df, transforms=None):


        """
        Args:
            df (pandas.DataFrame): path to images and in dataframe
            transforms: pytorch transforms for transforms and tensor conversion
        """
        # Transforms
        self.transforms = transforms
        # Read the dataframe
        self.data_info = df              
        # First column contains the image paths
        self.image_arr = np.asarray(self.data_info.iloc[:, 0])
        # Second column is the labels
        self.label_arr = np.asarray(self.data_info.iloc[:, 1])
        # Calculate len
        self.data_len = len(self.data_info.index)

    def __getitem__(self, index):
        # Get image name from the pandas df
        single_image_name = self.image_arr[index]
        # Open image
        img_as_img = Image.open(single_image_name)

        img_as_tensor= self.transforms(img_as_img)
        # Get label(class) of the image based on the cropped pandas column
        single_image_label = self.label_arr[index]

        return (img_as_tensor, single_image_label)

    def __len__(self):
        return self.data_len

Ввод df для MyCustomDataset(df, transforms) - pandas фрейм данных, в котором хранятся пути и метки изображений, как показано ниже:

    file_name                          label
0   M:\RealModels\images\001\001001.png 0
1   M:\RealModels\images\001\002001.png 0
2   M:\RealModels\images\001\003001.png 0
3   M:\RealModels\images\001\004001.png 0
4   M:\RealModels\images\001\006001.png 0
... ... ...
3197    M:\RenderedModels\images_rgb\450\116450.png 45
3198    M:\RenderedModels\images_rgb\450\117450.png 45
3199    M:\RenderedModels\images_rgb\450\118450.png 45
3200    M:\RenderedModels\images_rgb\450\119450.png 45
3201    M:\RenderedModels\images_rgb\450\120450.png 45
3202 rows × 2 columns

В моем наборе данных 16 классов. Таблицы классов выглядят так: ['00', '01', '12', '34','35'...,'45']

Вся моя программа:

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import time
import os
import copy
import scipy
from torch.utils.tensorboard import SummaryWriter

from torch.utils.data.dataset import Dataset
import torchvision
from torchvision import datasets, models, transforms


# In[2]:


import sys
sys.path.append(r"M:\program\pytorch\Scripts")
import custom_fun
from custom_fun import custom_dataset
import create_folder
from create_folder import create_tb_folder
import dataset_from_image
from dataset_from_image import MyCustomDataset


# In[3]:


tb_dir = r'../'
path = create_tb_folder(tb_dir)


# In[4]:


mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])


# In[5]:


data_transforms = {
    'train': transforms.Compose([
        transforms.RandomRotation(degrees=15),
        transforms.Resize((224,224)), # 299 for Inception v3
        transforms.ColorJitter(),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ]),
    'val': transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ]),
}


# In[6]:


data_dir = r'M:\dataset\first_att'


# In[7]:


batch_size = 4
lr = 0.003


# In[8]:


data = custom_dataset(data_dir,0.8)


# In[37]:


df = data['train']
df.head()


# In[9]:


image_datasets = {x: MyCustomDataset(data[x], data_transforms[x]) for x in ['train', 'val']}


# In[24]:


dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size,
                                             shuffle=True, num_workers=0)
              for x in ['train', 'val']}


# In[25]:


dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
device = torch.device("cpu")


# In[26]:


# Get a batch of training data
inputs, classes = next(iter(dataloaders['train']))
print(inputs.shape,classes.shape)
print(classes)

# Make a grid from batch
out = torchvision.utils.make_grid(inputs)


# In[27]:


tb1 = SummaryWriter(path[1])
tb2 = SummaryWriter(path[2])


# In[28]:


def train_model(model, criterion, optimizer, scheduler, num_epochs):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase] # images of training data
            epoch_acc = running_corrects.double() / dataset_sizes[phase] # images of val data

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))


            # Tensorboard works here
            if phase == 'train':
                tb1.add_scalar('Loss', epoch_loss, epoch)
                tb1.add_scalar('Accuracy', epoch_acc, epoch)
                for name, weight in model.named_parameters():
                    tb.add_histogram(name, weight, epoch)
                    tb.add_histogram(f'{name}.grad', weight.grad, epoch)

            else:
                tb2.add_scalar('Loss', epoch_loss, epoch)
                tb2.add_scalar('Accuracy', epoch_acc, epoch)




            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model


# In[29]:


model = models.resnet18(pretrained=True) 

# The way below is feature extraction. 
#for param in model.parameters():
    #param.requires_grad = False


# In[30]:


num_ftrs = model.fc.in_features


# In[31]:


model.fc = nn.Linear(num_ftrs, 16)


# In[32]:


model = model.to(device)
criterion = nn.CrossEntropyLoss()


# In[33]:


# Observe that all parameters are being optimized
optimizer = optim.Adam(model.parameters(), lr=lr)


# In[34]:


step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)


# In[35]:


tb = SummaryWriter(path[0])
grid = torchvision.utils.make_grid(inputs)
tb.add_image('images', grid)
tb.add_graph(model, inputs)
tb.close()


# In[36]:


model = train_model(model, criterion, optimizer, step_lr_scheduler, num_epochs=20)


# In[ ]:


torch.save(model.state_dict(), 'first_att_02.pth')


# In[ ]:

Но я получил ошибку:

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-36-edba91d1cb93> in <module>
----> 1 model = train_model(model, criterion, optimizer, step_lr_scheduler, num_epochs=20)

<ipython-input-28-662f652902cf> in train_model(model, criterion, optimizer, scheduler, num_epochs)
     29                     outputs = model(inputs)
     30                     _, preds = torch.max(outputs, 1)
---> 31                     loss = criterion(outputs, labels)
     32 
     33                     # backward + optimize only if in training phase

M:\program\pytorch\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
    530             result = self._slow_forward(*input, **kwargs)
    531         else:
--> 532             result = self.forward(*input, **kwargs)
    533         for hook in self._forward_hooks.values():
    534             hook_result = hook(self, input, result)

M:\program\pytorch\lib\site-packages\torch\nn\modules\loss.py in forward(self, input, target)
    914     def forward(self, input, target):
    915         return F.cross_entropy(input, target, weight=self.weight,
--> 916                                ignore_index=self.ignore_index, reduction=self.reduction)
    917 
    918 

M:\program\pytorch\lib\site-packages\torch\nn\functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
   2019     if size_average is not None or reduce is not None:
   2020         reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2021     return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
   2022 
   2023 

M:\program\pytorch\lib\site-packages\torch\nn\functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
   1836                          .format(input.size(0), target.size(0)))
   1837     if dim == 2:
-> 1838         ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
   1839     elif dim == 4:
   1840         ret = torch._C._nn.nll_loss2d(input, target, weight, _Reduction.get_enum(reduction), ignore_index)

IndexError: Target 40 is out of bounds.

Не могли бы вы сообщить мне, где Я не прав? Большое спасибо.

...