Оценщик Tensorflow Keras не получает данные из input_fn? - PullRequest
0 голосов
/ 05 июня 2019

У меня проблема с получением tf.estimator для работы с моим tf.dataset из списка файлов tfrecords, хотя я могу заставить его работать с фиктивным набором данных.

Я могу прочитать и проанализировать данные и подтвердить, что они имеют правильные размеры и значения. Оценщик работает с фиктивным набором данных, который я создаю с теми же свойствами.

Использование тензор потока 1.13.1 Я использую eagermode, поэтому никакие явные сеансы не запускаются. Я думаю, что Оценщику не нужен явный итератор (как в dataset.make_one_shot_iterator ()) для работы, поэтому input_fn передает сам набор данных.

Модель keras работает с Inputlayer формы = (31, 2323)

Вот код, который не работает:

import tensorflow as tf
import tensorflow.contrib.eager as tfe
tfe.enable_eager_execution(device_policy=tfe.DEVICE_PLACEMENT_SILENT)
import numpy as np
from tensorflow.contrib.data.python.ops import sliding

BATCH_SIZE = 5
EPOCHS = 10

path = 'dummy_data/*.tfrecords'
## checking if dataset is getting parsed and same as generated dummy data:
dataset = dataset_input_fn(path,EPOCHS,BATCH_SIZE)
iterator = dataset.make_one_shot_iterator()
feat, label = iterator.get_next()
print(feat.shape, label.shape)

dataset_dummy = dummy_dataset_fn(EPOCHS,BATCH_SIZE)
iterator = dataset.make_one_shot_iterator()
feat, label = iterator.get_next()
print(feat.shape, label.shape)

model = Keras_model()
estimator = tf.keras.estimator.model_to_estimator(keras_model=model)

# estimator works with dummy data:
estimator.train(input_fn=lambda:dummy_dataset_fn(
                        epochs=EPOCHS,
                        batch_size=BATCH_SIZE
                        ))

print('succesfull run for estimator')
estimator.train(input_fn=lambda:dataset_input_fn(
                        path,
                        epochs=EPOCHS,
                        batch_size=BATCH_SIZE
                        ))

Оба итератора выводят наборы данных (BATCH_SIZE, 31, 2323), как и ожидалось. Используя итератор, я подтвердил, что значения feat и метки анализируются правильно. (при необходимости можете опубликовать пример данных)

Рабочий dummy_dataset_fn:

def dummy_dataset_fn(epochs, batch_size):

    # creates the (?, 31, 2323) (?,1) data

    n_test_samples = 31*100

    feature1 = np.random.rand(n_test_samples, 2323)

    test_dataset=tf.data.Dataset.from_tensor_slices(feature1)

    test_labels = np.random.randint(2,size=100)
    test_labels = np.repeat(test_labels, 31)
    test_labels = tf.data.Dataset.from_tensor_slices(test_labels)

    window = 31
    stride = 31
    test_dataset = test_dataset.apply(sliding.sliding_window_batch(window, stride))
    test_labels = test_labels.apply(sliding.sliding_window_batch(window, stride))
    test_labels = test_labels.map(return_single_val)

    dataset = tf.data.Dataset.zip((test_dataset, test_labels))
    dataset = dataset.batch(batch_size).repeat(epochs)

    return dataset

и «правильный» набор данных (хотя tfrecords все еще являются фиктивными данными)

def return_single_val(line_batch):
    label, _ = tf.unique(line_batch)
    return label

def _parse_function_features(example_proto):
    feature_description = {
        'X': tf.FixedLenSequenceFeature([], tf.float32, default_value=0.0, allow_missing=True),
        'Y': tf.FixedLenFeature([], tf.int64, default_value=0),
    }
    # Parse the input tf.Example proto using the dictionary above.
    parsed_features = tf.parse_single_example(example_proto, feature_description)
    features = parsed_features["X"]
    return features

def _parse_function_labels(example_proto):
    feature_description = {
        'X': tf.FixedLenSequenceFeature([], tf.float32, default_value=0.0, allow_missing=True),
        'Y': tf.FixedLenFeature([], tf.int64, default_value=0),
    }
    # Parse the input tf.Example proto using the dictionary above.
    parsed_features = tf.parse_single_example(example_proto, feature_description)
    labels = parsed_features["Y"]
    return labels

def dataset_input_fn(wildcard, epochs, batch_size):

    filelist = tf.data.Dataset.list_files(wildcard)
    raw_dataset = tf.data.TFRecordDataset(filelist)

    parsed_dataset_features = raw_dataset.map(_parse_function_features)
    parsed_dataset_labels = raw_dataset.map(_parse_function_labels)

    window = 31
    stride = 31
    data_features = parsed_dataset_features.apply(sliding.sliding_window_batch(window, stride))
    data_labels = parsed_dataset_labels.apply(sliding.sliding_window_batch(window, stride))

    data_labels = data_labels.map(return_single_val)

    dataset = tf.data.Dataset.zip((data_features, data_labels))
    dataset = dataset.batch(batch_size).repeat(epochs)

    return dataset

Код не работает на

estimator.train(input_fn=lambda:dataset_input_fn(...
)

и кажется, что входные данные имеют значение 'None':

  File "trainer/reproducible_tensorflow_dummydata.py", line 122, in main
    estimator.train(input_fn=lambda:dataset_input_fn(
  File "{user}/lib/python2.7/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 358, in train
    loss = self._train_model(input_fn, hooks, saving_listeners)
  File "{user}lib/python2.7/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1124, in _train_model
    return self._train_model_default(input_fn, hooks, saving_listeners)
  File "{user}/lib/python2.7/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1154, in _train_model_default
    features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
  File "{user}/lib/python2.7/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1112, in _call_model_fn
    model_fn_results = self._model_fn(features=features, **kwargs)
  File "{user}/lib/python2.7/site-packages/tensorflow_estimator/python/estimator/keras.py", line 278, in model_fn
    labels)
  File "{user}/lib/python2.7/site-packages/tensorflow_estimator/python/estimator/keras.py", line 201, in _clone_and_build_model
    optimizer_iterations=global_step)
  File "{user}/lib/python2.7/site-packages/tensorflow/python/keras/models.py", line 466, in clone_and_build_model
    clone = clone_model(model, input_tensors=input_tensors)
  File "{user}/lib/python2.7/site-packages/tensorflow/python/keras/models.py", line 271, in clone_model
    return _clone_functional_model(model, input_tensors=input_tensors)
  File "{user}/lib/python2.7/site-packages/tensorflow/python/keras/models.py", line 161, in _clone_functional_model
    **kwargs))
  File "{user}/lib/python2.7/site-packages/tensorflow/python/keras/layers/recurrent.py", line 701, in __call__
    return super(RNN, self).__call__(inputs, **kwargs)
  File "{user}/lib/python2.7/site-packages/tensorflow/python/keras/engine/base_layer.py", line 538, in __call__
    self._maybe_build(inputs)
  File "{user}/lib/python2.7/site-packages/tensorflow/python/keras/engine/base_layer.py", line 1603, in _maybe_build
    self.build(input_shapes)
  File "{user}/lib/python2.7/site-packages/tensorflow/python/keras/layers/recurrent.py", line 619, in build
    self.cell.build(step_input_shape)
  File "{user}/lib/python2.7/site-packages/tensorflow/python/keras/utils/tf_utils.py", line 151, in wrapper
    output_shape = fn(instance, input_shape)
  File "{user}/lib/python2.7/site-packages/tensorflow/python/keras/layers/recurrent.py", line 2022, in build
    constraint=self.kernel_constraint)
  File "{user}/lib/python2.7/site-packages/tensorflow/python/keras/engine/base_layer.py", line 349, in add_weight
    aggregation=aggregation)
  File "{user}/lib/python2.7/site-packages/tensorflow/python/training/checkpointable/base.py", line 607, in _add_variable_with_custom_getter
    **kwargs_for_getter)
  File "{user}/lib/python2.7/site-packages/tensorflow/python/keras/engine/base_layer_utils.py", line 145, in make_variable
    aggregation=aggregation)
  File "{user}/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 213, in __call__
    return cls._variable_v1_call(*args, **kwargs)
  File "{user}/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 176, in _variable_v1_call
    aggregation=aggregation)
  File "{user}/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 155, in <lambda>
    previous_getter = lambda **kwargs: default_variable_creator(None, **kwargs)
  File "{user}/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.py", line 2488, in default_variable_creator
    import_scope=import_scope)
  File "{user}/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 217, in __call__
    return super(VariableMetaclass, cls).__call__(*args, **kwargs)
  File "{user}/lib/python2.7/site-packages/tensorflow/python/ops/resource_variable_ops.py", line 294, in __init__
    constraint=constraint)
  File "{user}/lib/python2.7/site-packages/tensorflow/python/ops/resource_variable_ops.py", line 406, in _init_from_args
    initial_value() if init_from_fn else initial_value,
  File "{user}/lib/python2.7/site-packages/tensorflow/python/keras/engine/base_layer_utils.py", line 127, in <lambda>
    shape, dtype=dtype, partition_info=partition_info)
  File "{user}/env/lib/python2.7/site-packages/tensorflow/python/ops/init_ops.py", line 499, in __call__
    scale /= max(1., (fan_in + fan_out) / 2.)

TypeError: unsupported operand type(s) for +: 'NoneType' and 'int'

Так что здесь может пойти не так?

...