Я пытаюсь создать свою собственную запись tfrecord для классификации изображений.
Я создал скрипт для создания записи tfrecord на основе имени папки.
import os
import tensorflow as tf
import numpy as np
import csv
import glob
from PIL import Image
flags = tf.app.flags
flags.DEFINE_string('data_dir', '', 'Root directory to raw PASCAL VOC dataset.')
flags.DEFINE_string('output_dir', '', 'Path to output TFRecord')
flags.DEFINE_integer('validation_test_size', 10,' In percent' )
FLAGS = flags.FLAGS
desired_size = (160,160)
def _label_to_int(labels):
categories = ['Empty', 'Occupied']
new_labels = []
for label in labels:
new_labels.append(categories.index(label))
return new_labels
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
# images and labels array as input
def convert_to(images, labels, name):
num_examples = labels.shape[0]
if images.shape[0] != num_examples:
raise ValueError("Images size %d does not match label size %d." %
(images.shape[0], num_examples))
rows = images[0].shape[0]
cols = images[0].shape[1]
depth = images[0].shape[2]
filename = os.path.join(FLAGS.output_dir, name + '.tfrecords')
print('Writing', filename)
writer = tf.python_io.TFRecordWriter(filename)
for index in range(num_examples):
image_raw = images[index].tostring()
example = tf.train.Example(features=tf.train.Features(feature={
'filename': tf.train.Feature(bytes_list=tf.train.BytesList(value=['None'.encode('utf-8')])),
'rows': tf.train.Feature(int64_list=tf.train.Int64List(value=[rows])),
'cols': tf.train.Feature(int64_list=tf.train.Int64List(value=[cols])),
'channels': tf.train.Feature(int64_list=tf.train.Int64List(value=[depth])),
'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_raw])),
'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[labels[index]]))}))
writer.write(example.SerializeToString())
writer.close()
def read_images_from(path):
images = []
index = 0
use_data_index = 0
train_labels = []
for r, d, f in os.walk(path):
for file in f:
use_data_index+=1
# if use_data_index != 100:
# continue
if "160x160" in file:
continue
label = r.split('/')[-1]
train_labels.append(label)
filename = os.path.join(r,file)
im = Image.open(filename) # .convert("L") # Convert to greyscale
im = im.resize(desired_size)
im = im.convert('RGB')
im = np.asarray(im, np.uint8)
image_name = filename.split('/')[-1].split('.')[0]
images.append([index, im])
index +=1
use_data_index = 0
images = sorted(images, key=lambda image: image[0])
images_only = [np.asarray(image[1], np.uint8) for image in images] # Use unint8 or you will be !!!
images_only = np.array(images_only)
train_labels = _label_to_int(train_labels)
train_labels = np.array(train_labels, dtype=np.uint32)
return images_only, train_labels
def main(_):
# Extract it into numpy arrays.
dataset_images,dataset_labels = read_images_from(FLAGS.data_dir)
dataset_size = dataset_images.shape[0]
shard = int(dataset_size * FLAGS.validation_test_size / 100);
# Generate a validation set.
validation_images = dataset_images[:shard,:,:,:]
validation_labels = dataset_labels[:shard]
train_images = dataset_images[shard:dataset_size-shard,:,:,:]
train_labels = dataset_labels[shard:dataset_size-shard]
test_images = dataset_images[dataset_size-shard:,:,:,:]
test_labels = dataset_labels[dataset_size-shard:]
# TODO: create test.tfrecords to run tests after training
# Convert to Examples and write the result to TFRecords.
convert_to(train_images, train_labels, 'train')
convert_to(validation_images, validation_labels, 'validation')
convert_to(test_images, test_labels, 'test')
if __name__ == "__main__":
tf.app.run()
Я пытаюсь использовать сгенерированные записи tfrecords при передаче алгоритм обучения:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
# import tensorflow_datasets as tfds
flags = tf.compat.v1.app.flags
flags.DEFINE_string('tfrecords', '', 'Dir where train/validation/test dataset are stored')
FLAGS = flags.FLAGS
IMG_SIZE = 160 # All images will be resized to 160x160
def format_example(image, label):
image = tf.cast(image, tf.float32)
image = (image / 127.5) - 1
image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
return image, label
def main(_):
print(FLAGS.tfrecords)
train = tf.data.TFRecordDataset(os.path.join(FLAGS.tfrecords, "train.tfrecords"))
validation = tf.data.TFRecordDataset(os.path.join(FLAGS.tfrecords, "validation.tfrecords"))
test = tf.data.TFRecordDataset(os.path.join(FLAGS.tfrecords, "test.tfrecords"))
BATCH_SIZE = 32
SHUFFLE_BUFFER_SIZE = 1000
train_batches = train.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
validation_batches = validation.batch(BATCH_SIZE)
test_batches = test.batch(BATCH_SIZE)
for image_batch, label in train_batches.take(1): ### XXX ERROR!!!
pass
print(image_batch.shape)
# Create the base model from the pre-trained model MobileNet V2
IMG_SHAPE = (IMG_SIZE, IMG_SIZE, 3)
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
include_top=False,
weights='imagenet')
feature_batch = base_model(image_batch)
print(feature_batch.shape)
base_model.trainable = False
# Let's take a look at the base model architecture
print(base_model.summary())
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
feature_batch_average = global_average_layer(feature_batch)
print(feature_batch_average.shape)
prediction_layer = tf.keras.layers.Dense(1)
prediction_batch = prediction_layer(feature_batch_average)
print(prediction_batch.shape)
model = tf.keras.Sequential([
base_model,
global_average_layer,
prediction_layer
])
base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=base_learning_rate),
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
metrics=['accuracy'])
print("len(model.trainable_variables) {0} ".format(len(model.trainable_variables)))
initial_epochs = 10
validation_steps = 20
loss0, accuracy0 = model.evaluate(validation_batches, steps=validation_steps)
print("initial loss: {:.2f}".format(loss0))
print("initial accuracy: {:.2f}".format(accuracy0))
history = model.fit(train_batches,
epochs=initial_epochs,
validation_data=validation_batches)
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()), 1])
plt.title('Training and Validation Accuracy')
plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0, 1.0])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()
if __name__ == "__main__":
tf.compat.v1.app.run(
)
Когда я пытаюсь распечатать пакет данных, я получаю это:
(30,)
И когда предыдущий код генерирует эту ошибку:
ValueError: too many values to unpack (expected 2)
Это происходит, когда
Кажется, он не может прочитать мою запись.