Я запускаю следующий код на GCP. У меня есть 8 CPU и 1 Tesla K80 GPU. Иногда, когда я запускаю его, на первой итерации я получаю значение NAN. Иногда я запускаю его, первая эпоха идет действительно хорошо, кажется, что она учится, потери уменьшаются, точность составляет 0,9 и выше, а затем вторая эпоха, потеря будет go прямой без нан. Любые причины почему?
#!/usr/bin/env python
# coding: utf-8
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split
import keras
import cv2
from keras.utils import Sequence
# In[3]:
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.layers.pooling import MaxPooling2D
from keras.layers.merge import concatenate
from keras.losses import binary_crossentropy
from keras.callbacks import EarlyStopping
import numpy as np
import os
import skimage.io as io
import skimage.transform as trans
import numpy as np
from keras.models import *
from keras.layers import *
from keras.optimizers import *
path = '/home/umfarooq0/RooftopSolar/'
long_lat_file = 'polygonVertices_LatitudeLongitude.csv'
pol_long_lat = pd.read_csv(path + long_lat_file)
pol_long_lat.head()
pixel_coord_file = 'polygonVertices_PixelCoordinates.csv'
pol_coord = pd.read_csv(path + pixel_coord_file)
pol_coord.shape
pol_coord.iloc[0,:]
pol_coord['join'] = pol_coord[pol_coord.columns[2:]].apply(
lambda x: ','.join(x.dropna().astype(str)),
axis=1)
except_vert_file = 'polygonDataExceptVertices.csv'
except_vert = pd.read_csv(path + except_vert_file)
# we need to create a variable which has image name and
# we assign a class to each image name, based on how many polygons they have
def create_class(x):
if x['polygon_id'] < 20:
return int(0)
elif x['polygon_id'] == 20:
return int(1)
elif 20 < x['polygon_id'] < 50:
return int(2)
elif x['polygon_id'] > 50:
return int(3)
df2_vals = except_vert.groupby(['image_name']).count()['polygon_id']
df2_vals = pd.DataFrame(df2_vals)
df2_vals['class'] = df2_vals.apply(create_class,axis = 1)
df_coord = pd.merge(except_vert,pol_coord,on = 'polygon_id')
def rle_to_mask(rle_string,height,width):
'''
convert RLE(run length encoding) string to numpy array
Parameters:
rleString (str): Description of arg1
height (int): height of the mask
width (int): width of the mask
Returns:
numpy.array: numpy array of the mask
'''
rows, cols = height, width
if rle_string == -1:
return np.zeros((height, width))
else:
rleNumbers = [int(float(numstring)) for numstring in rle_string.split(' ')]
#rleNumbers = rle_string
rlePairs = np.array(rleNumbers).reshape(-1,2)
img = np.zeros(rows*cols,dtype=np.uint8)
for index,length in rlePairs:
index -= 1
img[index:index+length] = 255
img = img.reshape(cols,rows)
img = img.T
return img
def mask_to_rle(mask):
'''
Convert a mask into RLE
Parameters:
mask (numpy.array): binary mask of numpy array where 1 - mask, 0 - background
Returns:
sring: run length encoding
'''
pixels= mask.T.flatten()
pixels = np.concatenate([[0], pixels, [0]])
runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
runs[1::2] -= runs[::2]
return ' '.join(str(x) for x in runs)
# In[13]:
class DataGenerator(Sequence):
def __init__(self, list_ids, labels, image_dir, batch_size=2,
img_h=512, img_w=512, shuffle= False):
#self.steps_per_epoch = steps_per_epoch
self.list_ids = list_ids
self.labels = labels
self.image_dir = image_dir
self.batch_size = batch_size
self.img_h = img_h
self.img_w = img_w
self.shuffle = shuffle
self.on_epoch_end()
def __len__(self):
'denotes the number of batches per epoch'
return int(np.floor(len(self.list_ids)) / self.batch_size)
def __getitem__(self, index):
'generate one batch of data'
indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
# get list of IDs
list_ids_temp = [self.list_ids[k] for k in indexes]
# generate data
X, y = self.__data_generation(list_ids_temp)
# return data
return X, y
def on_epoch_end(self):
'update ended after each epoch'
self.indexes = np.arange(len(self.list_ids))
if self.shuffle:
np.random.shuffle(self.indexes)
def __data_generation(self, list_ids_temp):
'generate data containing batch_size samples'
X = np.empty((self.batch_size, self.img_h, self.img_w, 1))
y = np.empty((self.batch_size, self.img_h, self.img_w, 4)) # this was originally 4, but changed to 1
cls_ = []
imn = []
for idx, id in enumerate(list_ids_temp):
file_path = os.path.join(self.image_dir, id+'.tif')
lc = os.path.exists(file_path)
if lc is True:
image = cv2.imread(file_path, 0)
#print(id + '__load_image')
im_sz = image.size
if im_sz > 0:
#print('check_size')
image_resized = cv2.resize(image, (self.img_w, self.img_h))
image_resized = np.array(image_resized, dtype=np.float64)
# standardization of the image
image_resized -= image_resized.mean()
image_resized /= image_resized.std()
mask = np.empty((img_h, img_w, 4))
rle = self.labels.get(id)
total_classes = [0,1,2,3]
# we need to get what class each id is
class_ =int(df2_vals[df2_vals.index == id ]['class'][0])
# cls_.append(class_)
# imn.append(id)
if rle is None:
class_mask = np.zeros((5000, 5000))
else:
class_mask = rle_to_mask(rle, width=5000, height=5000)
class_mask_resized = cv2.resize(class_mask, (self.img_w, self.img_h))
mask[...,class_] = class_mask_resized
total_classes.remove(class_)
for ix in total_classes:
class_mask = np.zeros((5000, 5000))
class_mask_resized = cv2.resize(class_mask, (self.img_w, self.img_h))
mask[...,ix] = class_mask_resized
# if there is no mask create empty mask
X[idx,] = np.expand_dims(image_resized, axis=2)
y[idx,] = mask
# normalize Y
#y = (y > 0).astype(int)
return X,y
in_un = except_vert.image_name.unique()
len(except_vert.image_name.unique())
in_un = pd.DataFrame(in_un,columns = ['image_name'])
sample_data = in_un.merge(df_coord,how = 'inner', on='image_name')
sample_data['join'] = sample_data['join'].apply(lambda x: x.replace(","," "))
train_image_ids = in_un
val_size = 20
train_image_ids = train_image_ids[train_image_ids.image_name != '11ska505815']
train_image_ids = train_image_ids[train_image_ids.image_name != '10sfh465105']
X_train, X_val = train_test_split(train_image_ids, test_size=val_size, random_state=42)
# In[199]:
masks = {}
for index, row in sample_data[ sample_data['join']!=-1].iterrows():
masks[row['image_name']] = row['join']
img_h = 512
img_w = 512
train_image_dir = path + 'train_data'
batch_size = 4
params = {'img_h': img_h,
'img_w': img_w,
'image_dir': train_image_dir,
'batch_size': batch_size,
'shuffle': True}
X_train = np.array(X_train)
X_train = X_train.reshape(X_train.shape[0])
X_val = np.array(X_val)
X_val = X_val.reshape(X_val.shape[0])
training_generator = DataGenerator(X_train, masks, **params)
validation_generator = DataGenerator(X_val, masks, **params)
def unet(pretrained_weights = None,input_size = (512,512,1)):
inputs = Input(input_size)
conv1 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(inputs)
conv1 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool1)
conv2 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool2)
conv3 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv3)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
conv4 = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool3)
conv4 = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv4)
drop4 = Dropout(0.5)(conv4)
pool4 = MaxPooling2D(pool_size=(2, 2))(drop4)
conv5 = Conv2D(1024, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool4)
conv5 = Conv2D(1024, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv5)
drop5 = Dropout(0.5)(conv5)
up6 = Conv2D(512, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(drop5))
merge6 = concatenate([drop4,up6], axis = 3)
conv6 = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge6)
conv6 = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv6)
up7 = Conv2D(256, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv6))
merge7 = concatenate([conv3,up7], axis = 3)
conv7 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge7)
conv7 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv7)
up8 = Conv2D(128, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv7))
merge8 = concatenate([conv2,up8], axis = 3)
conv8 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge8)
conv8 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv8)
up9 = Conv2D(64, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv8))
merge9 = concatenate([conv1,up9], axis = 3)
conv9 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge9)
conv9 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv9)
conv9 = Conv2D(2, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv9)
conv10 = Conv2D(4, 1, activation = 'sigmoid')(conv9)
model = Model(input = inputs, output = conv10)
model.compile(optimizer = Adam(lr = 1e-6), loss = 'binary_crossentropy', metrics = ['accuracy'])
#model.summary()
if(pretrained_weights):
model.load_weights(pretrained_weights)
return model
model = unet()
epochs = 10
history = model.fit_generator(generator=training_generator, validation_data=validation_generator, epochs=epochs, verbose=1)
model.save('RooftopSolar_1.h5')