Как создать классификатор tfrecord в tenorflow 2.0 - PullRequest
0 голосов
/ 20 апреля 2020

Я пытаюсь создать свою собственную запись tfrecord для классификации изображений.

Я создал скрипт для создания записи tfrecord на основе имени папки.

import os
import tensorflow as tf
import numpy as np
import csv
import glob

from PIL import Image

flags = tf.app.flags
flags.DEFINE_string('data_dir', '', 'Root directory to raw PASCAL VOC dataset.')
flags.DEFINE_string('output_dir', '', 'Path to output TFRecord')
flags.DEFINE_integer('validation_test_size', 10,' In percent' )

FLAGS = flags.FLAGS
desired_size = (160,160)

def _label_to_int(labels):
  categories = ['Empty', 'Occupied']
  new_labels = []

  for label in labels:
    new_labels.append(categories.index(label))
  return new_labels


def _int64_feature(value):
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))


def _bytes_feature(value):
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

# images and labels array as input
def convert_to(images, labels, name):
  num_examples = labels.shape[0]
  if images.shape[0] != num_examples:
    raise ValueError("Images size %d does not match label size %d." %
                     (images.shape[0], num_examples))
  rows = images[0].shape[0]
  cols = images[0].shape[1]
  depth = images[0].shape[2]

  filename = os.path.join(FLAGS.output_dir, name + '.tfrecords')
  print('Writing', filename)
  writer = tf.python_io.TFRecordWriter(filename)
  for index in range(num_examples):
    image_raw = images[index].tostring()
    example = tf.train.Example(features=tf.train.Features(feature={
        'filename': tf.train.Feature(bytes_list=tf.train.BytesList(value=['None'.encode('utf-8')])),
        'rows': tf.train.Feature(int64_list=tf.train.Int64List(value=[rows])),
        'cols': tf.train.Feature(int64_list=tf.train.Int64List(value=[cols])),
        'channels': tf.train.Feature(int64_list=tf.train.Int64List(value=[depth])),
        'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_raw])),
        'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[labels[index]]))}))
    writer.write(example.SerializeToString())

  writer.close()


def read_images_from(path):
  images = []
  index = 0
  use_data_index = 0
  train_labels = []

  for r, d, f in os.walk(path):
      for file in f:
        use_data_index+=1
        # if use_data_index != 100:
        #     continue
        if "160x160" in file:
            continue

        label = r.split('/')[-1]
        train_labels.append(label)

        filename = os.path.join(r,file)
        im = Image.open(filename)  # .convert("L")  # Convert to greyscale
        im = im.resize(desired_size)
        im = im.convert('RGB')

        im = np.asarray(im, np.uint8)
        image_name = filename.split('/')[-1].split('.')[0]
        images.append([index, im])
        index +=1



        use_data_index = 0

  images = sorted(images, key=lambda image: image[0])
  images_only = [np.asarray(image[1], np.uint8) for image in images]  # Use unint8 or you will be !!!
  images_only = np.array(images_only)

  train_labels = _label_to_int(train_labels)
  train_labels = np.array(train_labels, dtype=np.uint32)

  return images_only, train_labels


def main(_):
    # Extract it into numpy arrays.
    dataset_images,dataset_labels = read_images_from(FLAGS.data_dir)


    dataset_size = dataset_images.shape[0]

    shard = int(dataset_size * FLAGS.validation_test_size / 100);
    # Generate a validation set.
    validation_images = dataset_images[:shard,:,:,:]
    validation_labels = dataset_labels[:shard]
    train_images = dataset_images[shard:dataset_size-shard,:,:,:]
    train_labels = dataset_labels[shard:dataset_size-shard]
    test_images = dataset_images[dataset_size-shard:,:,:,:]
    test_labels = dataset_labels[dataset_size-shard:]
    # TODO: create test.tfrecords to run tests after training

    # Convert to Examples and write the result to TFRecords.
    convert_to(train_images, train_labels, 'train')
    convert_to(validation_images, validation_labels, 'validation')
    convert_to(test_images, test_labels, 'test')


if __name__ == "__main__":
    tf.app.run()

Я пытаюсь использовать сгенерированные записи tfrecords при передаче алгоритм обучения:

import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

# import tensorflow_datasets as tfds


flags = tf.compat.v1.app.flags
flags.DEFINE_string('tfrecords', '', 'Dir where train/validation/test dataset are stored')

FLAGS = flags.FLAGS

IMG_SIZE = 160  # All images will be resized to 160x160


def format_example(image, label):
    image = tf.cast(image, tf.float32)
    image = (image / 127.5) - 1
    image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
    return image, label


def main(_):
    print(FLAGS.tfrecords)
    train = tf.data.TFRecordDataset(os.path.join(FLAGS.tfrecords, "train.tfrecords"))
    validation = tf.data.TFRecordDataset(os.path.join(FLAGS.tfrecords, "validation.tfrecords"))
    test = tf.data.TFRecordDataset(os.path.join(FLAGS.tfrecords, "test.tfrecords"))



    BATCH_SIZE = 32
    SHUFFLE_BUFFER_SIZE = 1000

    train_batches = train.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
    validation_batches = validation.batch(BATCH_SIZE)
    test_batches = test.batch(BATCH_SIZE)

    for image_batch, label in train_batches.take(1):  ### XXX ERROR!!!
        pass

    print(image_batch.shape)

    # Create the base model from the pre-trained model MobileNet V2
    IMG_SHAPE = (IMG_SIZE, IMG_SIZE, 3)
    base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
                                                   include_top=False,
                                                   weights='imagenet')

    feature_batch = base_model(image_batch)
    print(feature_batch.shape)

    base_model.trainable = False

    # Let's take a look at the base model architecture
    print(base_model.summary())

    global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
    feature_batch_average = global_average_layer(feature_batch)
    print(feature_batch_average.shape)

    prediction_layer = tf.keras.layers.Dense(1)
    prediction_batch = prediction_layer(feature_batch_average)
    print(prediction_batch.shape)

    model = tf.keras.Sequential([
        base_model,
        global_average_layer,
        prediction_layer
    ])

    base_learning_rate = 0.0001
    model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=base_learning_rate),
                  loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                  metrics=['accuracy'])

    print("len(model.trainable_variables) {0} ".format(len(model.trainable_variables)))

    initial_epochs = 10
    validation_steps = 20
    loss0, accuracy0 = model.evaluate(validation_batches, steps=validation_steps)
    print("initial loss: {:.2f}".format(loss0))
    print("initial accuracy: {:.2f}".format(accuracy0))

    history = model.fit(train_batches,
                        epochs=initial_epochs,
                        validation_data=validation_batches)



    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']

    loss = history.history['loss']
    val_loss = history.history['val_loss']

    plt.figure(figsize=(8, 8))
    plt.subplot(2, 1, 1)
    plt.plot(acc, label='Training Accuracy')
    plt.plot(val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.ylabel('Accuracy')
    plt.ylim([min(plt.ylim()), 1])
    plt.title('Training and Validation Accuracy')

    plt.subplot(2, 1, 2)
    plt.plot(loss, label='Training Loss')
    plt.plot(val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.ylabel('Cross Entropy')
    plt.ylim([0, 1.0])
    plt.title('Training and Validation Loss')
    plt.xlabel('epoch')
    plt.show()




    if __name__ == "__main__":
        tf.compat.v1.app.run(

)

Когда я пытаюсь распечатать пакет данных, я получаю это:

(30,)

И когда предыдущий код генерирует эту ошибку:

   ValueError: too many values to unpack (expected 2)

Это происходит, когда

Кажется, он не может прочитать мою запись.

...