Вы можете сделать это, используя from_generator()
def map_func(img_name, cap_train, target):
img_tensor = np.load(img_name.decode('ascii')+'.npy').astype(np.float32)
return img_tensor, cap_train, target
def gen():
train = [
[
'image_1.jpg',
[[0,1,2], [1,2,3], [2,3,4]],
[[3], [4], [5]]
],
[
'image_2.jpg',
[[5,6,7], [6,7,8], [7,8,9]],
[[8], [9], [10]]
],
]
for item in train:
yield item[0], item[1], item[2]
dataset = tf.data.Dataset.from_generator(gen, (tf.string, tf.int32, tf.int32))#, (tf.TensorShape([]), tf.TensorShape([3,3]), tf.TensorShape([3,1])))
# Use map to load the numpy files in parallel
dataset = dataset.map(lambda img_name, cap_train, target: tf.numpy_function(
map_func, [img_name, cap_train, target], [tf.float32, tf.int32, tf.int32])).batch(1)
for item in iter(dataset):
print(item)
Редактировать: Создание пакетов данных,
def map_func(img_name, cap_train, target):
img_tensor = np.stack([np.load(img.decode('ascii')+'.npy').astype(np.float32) for img in img_name])
return img_tensor, cap_train, target
def gen(batch_size):
train = [
[
'image_1.jpg',
[[0,1,2], [1,2,3], [2,3,4]],
[[3], [4], [5]]
],
[
'image_2.jpg',
[[5,6,7], [6,7,8], [7,8,9]],
[[8], [9], [10]]
],
]
grp1, grp2, grp3 = zip(*train)
for i in range(0, len(train), batch_size):
yield grp1[i*batch_size:(i+1)*batch_size], grp2[i*batch_size:(i+1)*batch_size], grp3[i*batch_size:(i+1)*batch_size]
dataset = tf.data.Dataset.from_generator(gen, (tf.string, tf.int32, tf.int32),
(tf.TensorShape([None]), tf.TensorShape([None, 3,3]), tf.TensorShape([None, 3,1])), args=[2])
# Use map to load the numpy files in parallel
dataset = dataset.map(lambda img_name, cap_train, target: tf.numpy_function(
map_func, [img_name, cap_train, target], [tf.float32, tf.int32, tf.int32]))