Поскольку у меня нет прямого ответа, я предполагаю, что это невозможно сделать в TF 2.3.
Итак, я сослался на поток , упомянутый AerysS, в частности, для ответа от пользователя timehaven и использовал свой код для генерации пакетов из pandas фрейма данных с использованием load_img
и img_to_array
Keras. Код был написан для Python 2.7, поэтому я внес несколько изменений в его перенос, и он работает для меня с Python 3.6.8.
data_generator.py
from __future__ import print_function
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
import numpy as np
import pandas as pd
import bcolz
import threading
import os
import sys
import glob
import shutil
bcolz_lock = threading.Lock()
# old_blosc_nthreads = bcolz.blosc_set_nthreads(1)
# assert bcolz.blosc_set_nthreads(1) == 1
def safe_bcolz_open(fname, idx=None, debug=False):
with bcolz_lock:
if idx is None:
X2 = bcolz.open(fname)
else:
X2 = bcolz.open(fname)[idx]
if debug:
df_debug = pd.DataFrame(X2, index=idx)
assert X2.shape[0] == len(idx)
assert X2.shape == df_debug.shape
df_debug = df_debug.astype(int)
test_idx = (df_debug.subtract(df_debug.index.values, axis=0) == 0).all(axis=1)
assert test_idx.all(), df_debug[~test_idx]
return X2
class threadsafe_iter:
def __init__(self, it):
self.it = it
self.lock = threading.Lock()
assert self.lock is not bcolz_lock
def __iter__(self):
return self
def next(self):
with self.lock:
return self.it.next()
def __next__(self):
with self.lock:
return next(self.it)
def threadsafe_generator(f):
def g(*a, **kw):
return threadsafe_iter(f(*a, **kw))
return g
@threadsafe_generator
def generator_from_df(df, batch_size, target_size, features=None,
debug_merged=False):
if features is not None:
assert os.path.exists(features)
assert safe_bcolz_open(features).shape[0] == df.shape[0], "Features rows must match df!"
nbatches, n_skipped_per_epoch = divmod(df.shape[0], batch_size)
count = 1
epoch = 0
# New epoch.
while 1:
df = df.sample(frac=1) # frac=1 is same as shuffling df.
epoch += 1
i, j = 0, batch_size
# Mini-batches within epoch.
mini_batches_completed = 0
for _ in range(nbatches):
sub = df.iloc[i:j]
try:
X = np.array([(2 * (img_to_array(load_img(f, target_size=target_size)) / 255.0 - 0.5)) for f in sub.imgpath])
Y = sub.target.values
if features is None:
mini_batches_completed += 1
yield X, Y
else:
X2 = safe_bcolz_open(features, sub.index.values, debug=debug_merged)
mini_batches_completed += 1
yield [X, X2], Y
except IOError as err:
count -= 1
i = j
j += batch_size
count += 1
train. ру
from data_generator import generator_from_df
def construct_dataframe(img_path, labels_path):
data = {}
data['imgpath'] = glob(os.path.join(img_path, '*.png'))
data['target'] = load_labels(labels_path)
return pd.DataFrame(data)
train_df = construct_dataframe(train_x_dir, train_y_dir)
train_generator = generator_from_df(train_df, batch_size, (img_size, img_size))
# load and compile model
# ...
model.fit(train_generator, ...)