Я следую по этой ссылке , чтобы реализовать cDCGAN в моем собственном наборе данных.Мой набор данных содержит почти 391510 изображений.Размер изображения моего набора данных равен 64, тогда как MNIST, используемый в этой ссылке, равен 28. Мой набор данных имеет 2350 меток, в то время как набор данных MNIST имеет 10.
Мой набор данных имеет формат .tfrecords, поэтому я использую get_image() функция для извлечения пакета изображений и меток из него, как показано ниже.Когда я запускаю свой код, я получаю следующую ошибку
`tensorflow.python.framework.errors_impl.InternalError: Dst tensor is not initialized.
[[Node: _arg_Placeholder_3_0_3/_43 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_2488__arg_Placeholder_3_0_3", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
[[Node: discriminator_1/batch_normalization/AssignMovingAvg_1/_86 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_2364_discriminator_1/batch_normalization/AssignMovingAvg_1", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]`
Когда я искал эту ошибку, я обнаружил, что если размер пакета велик, то это происходит, поэтому я изменил размер пакета на 32, а затем получил этоновая ошибка.
` tensorflow.python.framework.errors_impl.ResourceExhaustedError: OOM when allocating tensor with shape[32,64,64,2351] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
[[Node: discriminator/concat = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32, _device="/job:localhost/replica:0/task:0/device:GPU:0"](_arg_Placeholder_0_0/_41, _arg_Placeholder_3_0_3/_43, discriminator/concat/axis)]]
Caused by op 'discriminator/concat', defined at:
File "cdcgan.py", line 221, in <module>
D_real, D_real_logits = discriminator(x, y_fill, isTrain)
File "cdcgan.py", line 48, in discriminator
cat1 = tf.concat([x, y_fill], 3)
`
Мой раздел кода, в котором я изменяю код по умолчанию, находится ниже
IMAGE_WIDTH = 64
IMAGE_HEIGHT = 64
# G(z)
def generator(x, y_label, isTrain=True, reuse=False):
with tf.variable_scope('generator', reuse=reuse):
# initializer
w_init = tf.truncated_normal_initializer(mean=0.0, stddev=0.02)
b_init = tf.constant_initializer(0.0)
# concat layer
cat1 = tf.concat([x, y_label], 3)
# 1st hidden layer
deconv1 = tf.layers.conv2d_transpose(cat1, 256, [16, 16], strides=(1, 1), padding='valid', kernel_initializer=w_init, bias_initializer=b_init)
lrelu1 = lrelu(tf.layers.batch_normalization(deconv1, training=isTrain), 0.2)
# 2nd hidden layer
deconv2 = tf.layers.conv2d_transpose(lrelu1, 128, [5, 5], strides=(2, 2), padding='same', kernel_initializer=w_init, bias_initializer=b_init)
lrelu2 = lrelu(tf.layers.batch_normalization(deconv2, training=isTrain), 0.2)
# output layer
deconv3 = tf.layers.conv2d_transpose(lrelu2, 1, [5, 5], strides=(2, 2), padding='same', kernel_initializer=w_init, bias_initializer=b_init)
o = tf.nn.tanh(deconv3)
return o
# D(x)
def discriminator(x, y_fill, isTrain=True, reuse=False):
with tf.variable_scope('discriminator', reuse=reuse):
# initializer
w_init = tf.truncated_normal_initializer(mean=0.0, stddev=0.02)
b_init = tf.constant_initializer(0.0)
# concat layer
cat1 = tf.concat([x, y_fill], 3)
# 1st hidden layer
conv1 = tf.layers.conv2d(cat1, 128, [5, 5], strides=(2, 2), padding='same', kernel_initializer=w_init, bias_initializer=b_init)
lrelu1 = lrelu(conv1, 0.2)
# 2nd hidden layer
conv2 = tf.layers.conv2d(lrelu1, 256, [5, 5], strides=(2, 2), padding='same', kernel_initializer=w_init, bias_initializer=b_init)
lrelu2 = lrelu(tf.layers.batch_normalization(conv2, training=isTrain), 0.2)
# output layer
conv3 = tf.layers.conv2d(lrelu2, 1, [16, 16], strides=(1, 1), padding='valid', kernel_initializer=w_init)
o = tf.nn.sigmoid(conv3)
return o, conv3
def get_image(files, num_classes):
"""This method defines the retrieval image examples from TFRecords files.
Here we will define how the images will be represented (grayscale,
flattened, floating point arrays) and how labels will be represented
(one-hot vectors).
"""
# Convert filenames to a queue for an input pipeline.
file_queue = tf.train.string_input_producer(files)
# Create object to read TFRecords.
reader = tf.TFRecordReader()
# Read the full set of features for a single example.
key, example = reader.read(file_queue)
# Parse the example to get a dict mapping feature keys to tensors.
# image/class/label: integer denoting the index in a classification layer.
# image/encoded: string containing JPEG encoded image
features = tf.parse_single_example(
example,
features={
'image/class/label': tf.FixedLenFeature([], tf.int64),
'image/encoded': tf.FixedLenFeature([], dtype=tf.string,
default_value='')
})
label = features['image/class/label']
image_encoded = features['image/encoded']
# Decode the JPEG.
image = tf.image.decode_jpeg(image_encoded, channels=1)
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
image = tf.reshape(image, [IMAGE_WIDTH*IMAGE_HEIGHT])
# Represent the label as a one hot vector.
label = tf.stack(tf.one_hot(label, num_classes))
return label, image
# training parameters
batch_size = 32
# lr = 0.0002
train_epoch = 30
global_step = tf.Variable(0, trainable=False)
lr = tf.train.exponential_decay(0.0002, global_step, 500, 0.95, staircase=True)
# load MNIST
#mnist = input_data.read_data_sets("MNIST_data/", one_hot=True, reshape=[])
SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__))
# Default paths.
DEFAULT_LABEL_FILE = os.path.join(SCRIPT_PATH, './labels.txt')
DEFAULT_TFRECORDS_DIR = os.path.join(SCRIPT_PATH, 'tfrecords-output')
MODEL_NAME = 'hangul_tensorflow'
IMAGE_WIDTH = 64
IMAGE_HEIGHT = 64
DEFAULT_NUM_TRAIN_STEPS = 117453 # (for 30 epochs as my training set is 391510)
"""Perform graph definition and model training.
Here we will first create our input pipeline for reading in TFRecords
files and producing random batches of images and labels.
"""
labels = io.open(DEFAULT_LABEL_FILE, 'r', encoding='utf-8').read().splitlines()
num_classes = len(labels)
print('Processing data...')
tf_record_pattern = os.path.join(DEFAULT_TFRECORDS_DIR, '%s-*' % 'train')
train_data_files = tf.gfile.Glob(tf_record_pattern)
label, image = get_image(train_data_files, num_classes)
# Associate objects with a randomly selected batch of labels and images.
image_batch, label_batch = tf.train.shuffle_batch(
[image, label], batch_size=batch_size,
capacity=2000,
min_after_dequeue=1000)
# variables : input
x = tf.placeholder(tf.float32, shape=(None, img_size, img_size, 1))
z = tf.placeholder(tf.float32, shape=(None, 1, 1, 100))
y_label = tf.placeholder(tf.float32, shape=(None, 1, 1, 2350))
y_fill = tf.placeholder(tf.float32, shape=(None, img_size, img_size, 2350))
isTrain = tf.placeholder(dtype=tf.bool)
# Initialize the queue threads.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
# training-loop
print('training start!')
for epoch in range(train_epoch):
G_losses = []
D_losses = []
for iter in range(117453): #steps for 1 epoch
# update discriminator
train_images, train_labels = sess.run([image_batch, label_batch])
x_ = train_images.reshape(-1, img_size, img_size, 1)
y_label_ = train_labels.reshape([batch_size, 1, 1, 2350])
y_fill_ = y_label_ * np.ones([batch_size, img_size, img_size, 2350])
z_ = np.random.normal(0, 1, (batch_size, 1, 1, 100))
loss_d_, _ = sess.run([D_loss, D_optim], {x: x_, z: z_, y_fill: y_fill_, y_label: y_label_, isTrain: True})
# update generator
z_ = np.random.normal(0, 1, (batch_size, 1, 1, 100))
y_ = np.random.randint(0, 9, (batch_size, 1))
y_label_ = onehot[y_.astype(np.int32)].reshape([batch_size, 1, 1, 2350])
y_fill_ = y_label_ * np.ones([batch_size, img_size, img_size, 2350])
loss_g_, _ = sess.run([G_loss, G_optim], {z: z_, x: x_, y_fill: y_fill_, y_label: y_label_, isTrain: True})
Это мои системные характеристики
имя: GeForce GTX 1070 Major: 6 вспомогательных: 1 MemoryClockRate (ГГц): 1.645 pciBusID: 0000: 01: 00.0 totalMemory: 8.00GiB freeMemory: 6.62GiB
Как мне решить мою проблему?