Мой текущий подход выглядит следующим образом: сначала я загружаю изображения в память следующим образом:
def load_data_from_directory(root_dir, image_height, image_format = 'jpg', mask_format = 'png'):
"""
Loads train images and corresponding masks with specified image sizes.
Masks should have same name as image.
Output files divided by 256 to be between 0-1.
Folder locations:
> images (Jpg format)
> segmentation
Example of usage:
from common_blocks.data_loaders import load_data_from_directory
data_dir = './data_objects'
x_train, y_train = load_data_from_directory(data_dir, image_height = 256)
"""
data = []
for stage in ['train']: #can be added 'test' stage
directory = os.path.join(root_dir, 'images')
file_names = [filename.replace(image_format , mask_format) for filename in os.listdir(directory)]
fps = [os.path.join(directory, filename) for filename in os.listdir(directory)]
for content in ['images', 'segmentation']:
# construct path to each image
directory = os.path.join(root_dir, content)
if content != 'images':
fps = [os.path.join(directory, filename) for filename in file_names]
# read images
images = [imread(filepath)/255 for filepath in fps]
# if images have different sizes you have to resize them before:
image = [resize(image, (image_height, image_height)) for image in images]
# stack to one np.array
np_images = np.stack(image, axis=0)
data.append(np_images)
del image, file_names
gc.collect()
return data
x_train, y_train = load_data_from_directory_crans('./train', image_width, image_height,'jpg', 'png')
Затем я передаю эти изображения в DataGenerator:
class DataGenerator(Sequence):
'''
Sample usage:
test_generator = DataGenerator(x_train, y_train, 1,
image_sizes, image_sizes, 1, True)
Xtest, ytest = test_generator.__getitem__(1)
plt.imshow(Xtest[0])
plt.show()
plt.imshow(ytest[0, :,:,0])
plt.show()
'''
def __init__(self, X, y, batch_size, height,width, nb_y_features, augmentation = True):
'Initialization'
self.batch_size = batch_size
self.X = X
self.y = y
self.indexes = None
self.currentIndex = 0
self.augmentation = augmentation
self.on_epoch_end()
self.height = height
self.width = width
self.nb_y_features = nb_y_features
def __len__(self):
'Denotes the number of batches per epoch'
return int(np.ceil(len(self.X) / self.batch_size))
def __getitem__(self, index):
'Generate one batch of data'
# Generate indexes of the batch
data_index_min = int(index*self.batch_size)
data_index_max = int(min((index+1)*self.batch_size, len(self.indexes)))
indexes = self.indexes[data_index_min:data_index_max]
this_batch_size = len(indexes) # The last batch can be smaller than the others
X = np.empty((this_batch_size, self.width, self.height, 3)) #, dtype=int)
y = np.empty((this_batch_size, self.width, self.height, self.nb_y_features), dtype=int)
for i, sample_index in enumerate(indexes):
data_index = self.indexes[index * self.batch_size + i]
X_sample, y_sample = self.X[data_index].copy(), self.y[data_index].copy()
if self.augmentation:
augmented = aug()(image=X_sample, mask=y_sample)
image_augm = augmented['image']
mask_augm = augmented['mask']#.reshape(self.width, self.height, self.nb_y_features)
X[i, ...] = image_augm
y[i, ...] = mask_augm
else:
X[i, ...] = X_sample
y[i, ...] = y_sample
return X, y
def on_epoch_end(self):
'Updates indexes after each epoch'
self.indexes = list(range(len(self.X)))
np.random.shuffle(self.indexes)
Затем используется этот генераторк модели поезда:
training_generator = DataGenerator(x_train, y_train, batch_size,
height = image_width, width = image_height, nb_y_features = 1, augmentation = True)
model = Unet(BACKBONE, encoder_weights='imagenet', encoder_freeze = False)
model.compile(optimizer = Adam(),
loss=bce_jaccard_loss, metrics=[iou_score])
history = model.fit_generator(training_generator, shuffle =True,
epochs=10)
Проблема в размере данных.Если он маленький, чтобы уместиться в памяти - все в порядке, как только он станет больше, произойдет сбой из-за нехватки памяти.Как напрямую случайным образом читать файлы из папки?