Ошибка загрузки файлов .npz в наборе данных tenorflow - PullRequest
0 голосов
/ 18 февраля 2020

Я пытаюсь создать конвейер данных в тензорном потоке, но мои данные находятся в файлах .npz. Следуя документации по https://www.tensorflow.org/guide/data#consuming_sets_of_files для использования наборов файлов в сочетании с использованием tf.py_function () для использования numpy ops, я написал код, подраздел которого:

def load_data(filename):

    from preprocess import normalize

    # sess = tf.compat.v1.Session()
    # fln = sess.run(filename)
    print ('I AM TRYING TO LOAD : ', filename)
    mels = np.load(filename)['arr_0']
    mels = normalize(mels)

    return mels

Я получаю сообщение об ошибке в np.load (имя файла), которое выглядит так:

I AM TRYING TO LOAD :  tf.Tensor(b'/media/prabhatk/Datasets/DCASE/features/augmentations=None features=mono fmax=22050 fmin=0 hop_length=1024 mel_htk=True n_fft=2048 n_mels=60 samplerate=44100/airport-lisbon-1000-40000-a.npz', shape=(), dtype=string)
2020-02-18 19:31:31.048435: W tensorflow/core/framework/op_kernel.cc:1610] Invalid argument: TypeError: expected str, bytes or os.PathLike object, not tensorflow.python.framework.ops.EagerTensor

Исправлено с помощью np.load (имя файла. numpy ()), как заявлено @jdehesa. Теперь я столкнулся с некоторыми проблемами формы, хотя я уже изменил ввод в функции load_data_wrapper ().

Может кто-нибудь помочь мне с этим?

Я прилагаю весь код и весь сообщение об ошибке ниже.

КОД:

import os
from os import path
import sys
import argparse

sys.path.insert(1,'../')
from helpers import locations
from helpers import metadata
from helpers import read_settings as Settings
from models import sbcnn

import numpy as np
import pandas as pd
import librosa as lb

from sklearn.preprocessing import LabelEncoder

import tensorflow as tf
# from tf.data.experimental import AUTOTUNE


def load_data(filename):

    from preprocess import normalize

    # sess = tf.compat.v1.Session()
    # fln = sess.run(filename)
    print ('I AM TRYING TO LOAD : ', filename)
    mels = np.load(filename)['arr_0']
    # mels = tf.io.read_file(filename)['arr_0']
    mels = normalize(mels)

    return mels


def load_data_wrapper(filename):

    # print ('INPUT TO LOAD DATA IS : ', filename)
    [mels,] = tf.py_function(
        load_data, [filename], [tf.float32]
        )
    mels.set_shape((1,60,431,1))

    # return mels
    return mels


def get_input_directory(input_dir, feature_settings):

    base_dir = input_dir if (input_dir != '') else locations.FEATURE_DIR_BASE
    assert path.exists(base_dir), 'Feature directory not found!!'

    settings = Settings.load_settings(feature_settings)
    feature_dir = Settings.settings_to_path(settings)
    feature_dir = path.join(base_dir,feature_dir)
    assert path.exists(feature_dir), 'Data not preprocessed according to given settings!!'

    return feature_dir


def construct_dataset(X, y):

    filepaths = tf.data.Dataset.from_tensor_slices(X)
    with tf.device('/device:CPU:*'):
        files = filepaths.map(load_data_wrapper)

    filelabels = tf.data.Dataset.from_tensor_slices(y)

    return tf.data.Dataset.zip((files, filelabels))


def parseArguments():
    parser = argparse.ArgumentParser()

    parser.add_argument('--training_metadata', type=str, default='', help='CSV file containing training file names and lables')
    parser.add_argument('--validation_metadata', type=str, default='', help='CSV file containing validation file names and lables')
    parser.add_argument('--input_dir', type=str, default='', help='Processed features directory')
    parser.add_argument('--feature_settings', type=str, default='', help='Load data with the given settings')

    args = parser.parse_args()

    return args


def main():
    arguments = parseArguments()

    input_dir = get_input_directory(arguments.input_dir, arguments.feature_settings)
    X_tr, y_tr = metadata.train(arguments.training_metadata, input_dir)
    X_val, y_val = metadata.validation(arguments.validation_metadata, input_dir)

    enc = LabelEncoder()
    enc.fit(y_tr)
    y_tr = enc.transform(y_tr)
    y_val = enc.transform(y_val)

    data_tr = construct_dataset(X_tr, y_tr)
    data_val = construct_dataset(X_val, y_val)

    model = sbcnn.build_model()
    model.compile(loss='categorical_crossentropy', 
        optimizer=tf.keras.optimizers.SGD(),
        metrics=['accuracy'])

    model.summary()

    model.fit(data_tr, epochs=10)


if __name__ == '__main__':
    main()

ОШИБКА:

2020-02-19 10:37:06.375134: W tensorflow/core/framework/op_kernel.cc:1622] OP_REQUIRES failed at conv_ops_fused_impl.h:693 : Invalid argument: input must be 4-dimensional[60,431]
2020-02-19 10:37:06.375184: W tensorflow/core/common_runtime/base_collective_executor.cc:216] BaseCollectiveExecutor::StartAbort Invalid argument: input must be 4-dimensional[60,431]
     [[{{node sequential/conv2d/BiasAdd}}]]
      1/Unknown - 0s 389ms/stepTraceback (most recent call last):
  File "pipeline.py", line 112, in <module>
    main()
  File "pipeline.py", line 108, in main
    model.fit(data_tr, epochs=10)
  File "/home/prabhatk/miniconda3/envs/DL/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py", line 728, in fit
    use_multiprocessing=use_multiprocessing)
  File "/home/prabhatk/miniconda3/envs/DL/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py", line 324, in fit
    total_epochs=epochs)
  File "/home/prabhatk/miniconda3/envs/DL/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py", line 123, in run_one_epoch
    batch_outs = execution_function(iterator)
  File "/home/prabhatk/miniconda3/envs/DL/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py", line 86, in execution_function
    distributed_function(input_fn))
  File "/home/prabhatk/miniconda3/envs/DL/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py", line 457, in __call__
    result = self._call(*args, **kwds)
  File "/home/prabhatk/miniconda3/envs/DL/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py", line 520, in _call
    return self._stateless_fn(*args, **kwds)
  File "/home/prabhatk/miniconda3/envs/DL/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py", line 1823, in __call__
    return graph_function._filtered_call(args, kwargs)  # pylint: disable=protected-access
  File "/home/prabhatk/miniconda3/envs/DL/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py", line 1141, in _filtered_call
    self.captured_inputs)
  File "/home/prabhatk/miniconda3/envs/DL/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py", line 1224, in _call_flat
    ctx, args, cancellation_manager=cancellation_manager)
  File "/home/prabhatk/miniconda3/envs/DL/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py", line 511, in call
    ctx=ctx)
  File "/home/prabhatk/miniconda3/envs/DL/lib/python3.7/site-packages/tensorflow_core/python/eager/execute.py", line 67, in quick_execute
    six.raise_from(core._status_to_exception(e.code, message), None)
  File "<string>", line 3, in raise_from
tensorflow.python.framework.errors_impl.InvalidArgumentError:  input must be 4-dimensional[60,431]
     [[node sequential/conv2d/BiasAdd (defined at /home/prabhatk/miniconda3/envs/DL/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:1751) ]] [Op:__inference_distributed_function_801]

Function call stack:
distributed_function

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...