Question

Я запускаю следующий код на GCP. У меня есть 8 CPU и 1 Tesla K80 GPU. Иногда, когда я запускаю его, на первой итерации я получаю значение NAN. Иногда я запускаю его, первая эпоха идет действительно хорошо, кажется, что она учится, потери уменьшаются, точность составляет 0,9 и выше, а затем вторая эпоха, потеря будет go прямой без нан. Любые причины почему?

#!/usr/bin/env python
# coding: utf-8

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split
import keras
import cv2
from keras.utils import Sequence


# In[3]:


from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.layers.pooling import MaxPooling2D
from keras.layers.merge import concatenate
from keras.losses import binary_crossentropy
from keras.callbacks import EarlyStopping




import numpy as np 
import os
import skimage.io as io
import skimage.transform as trans
import numpy as np
from keras.models import *
from keras.layers import *
from keras.optimizers import *


path = '/home/umfarooq0/RooftopSolar/'

long_lat_file = 'polygonVertices_LatitudeLongitude.csv'

pol_long_lat = pd.read_csv(path + long_lat_file)

pol_long_lat.head()

pixel_coord_file = 'polygonVertices_PixelCoordinates.csv'

pol_coord = pd.read_csv(path + pixel_coord_file)

pol_coord.shape

pol_coord.iloc[0,:]

pol_coord['join'] = pol_coord[pol_coord.columns[2:]].apply(
    lambda x: ','.join(x.dropna().astype(str)),
    axis=1)

except_vert_file = 'polygonDataExceptVertices.csv'

except_vert = pd.read_csv(path + except_vert_file)

# we need to create a variable which has image name and
# we assign a class to each image name, based on how many polygons they have
def create_class(x):
    if x['polygon_id'] < 20:
        return int(0)
    elif x['polygon_id'] == 20:
        return int(1)
    elif 20 < x['polygon_id'] < 50:
        return int(2)
    elif x['polygon_id'] > 50:
        return int(3)

df2_vals = except_vert.groupby(['image_name']).count()['polygon_id']
df2_vals = pd.DataFrame(df2_vals)
df2_vals['class'] = df2_vals.apply(create_class,axis = 1)

df_coord = pd.merge(except_vert,pol_coord,on = 'polygon_id')

def rle_to_mask(rle_string,height,width):
    '''
    convert RLE(run length encoding) string to numpy array

    Parameters: 
    rleString (str): Description of arg1 
    height (int): height of the mask
    width (int): width of the mask 

    Returns: 
    numpy.array: numpy array of the mask
    '''
    rows, cols = height, width
    if rle_string == -1:
        return np.zeros((height, width))
    else:
        rleNumbers = [int(float(numstring)) for numstring in rle_string.split(' ')]
        #rleNumbers = rle_string
        rlePairs = np.array(rleNumbers).reshape(-1,2)

        img = np.zeros(rows*cols,dtype=np.uint8)
        for index,length in rlePairs:
            index -= 1
            img[index:index+length] = 255
        img = img.reshape(cols,rows)
        img = img.T
        return img

def mask_to_rle(mask):
    '''
    Convert a mask into RLE

    Parameters: 
    mask (numpy.array): binary mask of numpy array where 1 - mask, 0 - background

    Returns: 
    sring: run length encoding 
    '''
    pixels= mask.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)


# In[13]:


class DataGenerator(Sequence):
    def __init__(self, list_ids, labels, image_dir, batch_size=2,
                 img_h=512, img_w=512, shuffle= False):
        #self.steps_per_epoch = steps_per_epoch
        self.list_ids = list_ids
        self.labels = labels
        self.image_dir = image_dir
        self.batch_size = batch_size
        self.img_h = img_h
        self.img_w = img_w
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'denotes the number of batches per epoch'
        return int(np.floor(len(self.list_ids)) / self.batch_size)

    def __getitem__(self, index):
        'generate one batch of data'
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        # get list of IDs
        list_ids_temp = [self.list_ids[k] for k in indexes]
        # generate data
        X, y = self.__data_generation(list_ids_temp)
        # return data 
        return X, y

    def on_epoch_end(self):
        'update ended after each epoch'
        self.indexes = np.arange(len(self.list_ids))
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_ids_temp):
        'generate data containing batch_size samples'
        X = np.empty((self.batch_size, self.img_h, self.img_w, 1))
        y = np.empty((self.batch_size, self.img_h, self.img_w, 4)) #  this was originally 4, but changed to 1

        cls_ = []
        imn = []

        for idx, id in enumerate(list_ids_temp):
            file_path =  os.path.join(self.image_dir, id+'.tif')
            lc = os.path.exists(file_path)
            if lc is True:

                image = cv2.imread(file_path, 0)

               #print(id + '__load_image')


                im_sz = image.size

                if im_sz > 0:
                    #print('check_size')

                    image_resized = cv2.resize(image, (self.img_w, self.img_h))

                    image_resized = np.array(image_resized, dtype=np.float64)
                    # standardization of the image
                    image_resized -= image_resized.mean()
                    image_resized /= image_resized.std()

                    mask = np.empty((img_h, img_w, 4))


                    rle = self.labels.get(id)
                    total_classes = [0,1,2,3]

                    # we need to get what class each id is
                    class_ =int(df2_vals[df2_vals.index == id ]['class'][0])

                   # cls_.append(class_)
                   # imn.append(id)

                    if rle is None:
                        class_mask = np.zeros((5000, 5000))
                    else:
                        class_mask = rle_to_mask(rle, width=5000, height=5000)

                    class_mask_resized = cv2.resize(class_mask, (self.img_w, self.img_h))
                    mask[...,class_] = class_mask_resized
                    total_classes.remove(class_)
                    for ix in total_classes:
                        class_mask = np.zeros((5000, 5000))
                        class_mask_resized = cv2.resize(class_mask, (self.img_w, self.img_h))
                        mask[...,ix] = class_mask_resized

                    # if there is no mask create empty mask



                    X[idx,] = np.expand_dims(image_resized, axis=2)
                    y[idx,] = mask


        # normalize Y
        #y = (y > 0).astype(int)

        return X,y

in_un = except_vert.image_name.unique()

len(except_vert.image_name.unique())


in_un = pd.DataFrame(in_un,columns = ['image_name'])

sample_data = in_un.merge(df_coord,how = 'inner', on='image_name')

sample_data['join'] = sample_data['join'].apply(lambda x: x.replace(","," "))

train_image_ids = in_un
val_size = 20
train_image_ids = train_image_ids[train_image_ids.image_name != '11ska505815']
train_image_ids = train_image_ids[train_image_ids.image_name != '10sfh465105']



X_train, X_val = train_test_split(train_image_ids, test_size=val_size, random_state=42)


# In[199]:


masks = {}
for index, row in  sample_data[ sample_data['join']!=-1].iterrows():
    masks[row['image_name']] = row['join']

img_h = 512
img_w = 512
train_image_dir = path + 'train_data'
batch_size = 4

params = {'img_h': img_h,
          'img_w': img_w,
          'image_dir': train_image_dir,
          'batch_size': batch_size,

          'shuffle': True}

X_train = np.array(X_train)
X_train = X_train.reshape(X_train.shape[0])

X_val = np.array(X_val)
X_val = X_val.reshape(X_val.shape[0])

training_generator = DataGenerator(X_train, masks, **params)
validation_generator = DataGenerator(X_val, masks, **params)



def unet(pretrained_weights = None,input_size = (512,512,1)):
    inputs = Input(input_size)
    conv1 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(inputs)
    conv1 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
    conv2 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool1)
    conv2 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
    conv3 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool2)
    conv3 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
    conv4 = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool3)
    conv4 = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv4)
    drop4 = Dropout(0.5)(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2))(drop4)

    conv5 = Conv2D(1024, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool4)
    conv5 = Conv2D(1024, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv5)
    drop5 = Dropout(0.5)(conv5)

    up6 = Conv2D(512, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(drop5))
    merge6 = concatenate([drop4,up6], axis = 3)
    conv6 = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge6)
    conv6 = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv6)

    up7 = Conv2D(256, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv6))
    merge7 = concatenate([conv3,up7], axis = 3)
    conv7 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge7)
    conv7 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv7)

    up8 = Conv2D(128, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv7))
    merge8 = concatenate([conv2,up8], axis = 3)
    conv8 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge8)
    conv8 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv8)

    up9 = Conv2D(64, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv8))
    merge9 = concatenate([conv1,up9], axis = 3)
    conv9 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge9)
    conv9 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv9)
    conv9 = Conv2D(2, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv9)
    conv10 = Conv2D(4, 1, activation = 'sigmoid')(conv9)

    model = Model(input = inputs, output = conv10)

    model.compile(optimizer = Adam(lr = 1e-6), loss = 'binary_crossentropy', metrics = ['accuracy'])


    #model.summary()

    if(pretrained_weights):
        model.load_weights(pretrained_weights)

    return model


model = unet()
epochs = 10


history = model.fit_generator(generator=training_generator, validation_data=validation_generator, epochs=epochs, verbose=1)

model.save('RooftopSolar_1.h5')

twcmchang · Answer 1 · 25 марта 2020

Большинство проблем с NAN возникают из-за шага предварительной обработки данных или данных.

Я полагаю, что проблема, вероятно, является результатом шага деления стандартного ввода при нормализации изображений. Так как стандартное значение равно нулю (как все изображение белого или черного цвета), каждое значение пикселя будет разделено на 0, что приведет к NAN в Python.

Кроме того, я бы предложил использовать batchsize = 1 и цикл по всем данным (вместо генератора данных), чтобы выяснить, какое изображение приводит к проблеме.

получение NAN на первой итерации при запуске сегментации изображения

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

1 Ответ

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

получение NAN на первой итерации при запуске сегментации изображения

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

1 Ответ

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Нет похожих вопросов