Ошибка нового конвертера MLIR. Не удалось конвертировать модель с батчнормализацией - PullRequest
0 голосов
/ 07 февраля 2020

У меня есть тензор потока модели tdnn, сохраненный как Checkpoint, с tenorflow 1.15, и я хочу преобразовать его в файл tenorflow lite для тестирования на устройстве android.

По этой ссылке я делюсь моделью CheckPoint.

Чтобы перейти к модели tflite, первым шагом является преобразование в SavedModel (необходимо использовать tennsflow2.x, а для использования нового преобразователя MLIR * 1008 необходимо использовать тензор Flow2.1 или выше. * и поддерживать функции ops). Я делаю это с tenorflow 1.15 и следующим кодом:

import os
import tensorflow as tf

trained_checkPoint_prefix="model-32000"
trained_checkPoint_dir="nnet/"

loaded_graph = tf.Graph()
tf.compat.v1.enable_resource_variables()
with tf.Session(graph=loaded_graph) as sess:
    loader = tf.train.import_meta_graph(trained_checkPoint_dir+trained_checkPoint_prefix+'.meta')
    loader.restore(sess,trained_checkPoint_dir+trained_checkPoint_prefix)

    # Export Checkpoint to SavedModel
    builder = tf.saved_model.builder.SavedModelBuilder(trained_checkPoint_dir+'/v01')

    features = loaded_graph.get_tensor_by_name("features:0")
    is_training = loaded_graph.get_tensor_by_name("is_training:0")

    output = loaded_graph.get_tensor_by_name("t tower_0/tdnn/batch_normalization_5/batchnorm/add_1 :0") 

    builder.add_meta_graph_and_variables(sess, ["tag"], signature_def_map= {
        "model": tf.saved_model.signature_def_utils.predict_signature_def(
            inputs= {"features": features, "is_training":is_training},
            outputs= {"finalnode": output})
        })

    builder.save()

И все нормально.

Затем я собираюсь преобразовать SavedModel в файл tflite. Сейчас я использую tenorflow2.1 или 2.2 (tf-nightly). Код, который я использую, это:

import os
import tensorflow as tf

trained_checkPoint_dir="nnet/"
freezeModel = trained_checkPoint_dir + "v01"

# Load the SavedModel.
saved_model_obj = tf.saved_model.load(export_dir=freezeModel,tags='tag')

# Load the specific concrete function from the SavedModel.
concrete_func = saved_model_obj.signatures['model']
print(concrete_func.inputs[0])

# Set the shape of the input in the concrete function.
if concrete_func.inputs[0].name=='features:0':
    concrete_func.inputs[0].set_shape([1,1000,23])
else:
    concrete_func.inputs[1].set_shape([1,1000,23])

# Convert the model to a TFLite model.
converter =  tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
converter.experimental_new_converter = True
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS,
                                       tf.lite.OpsSet.SELECT_TF_OPS]
tflite_model = converter.convert()

И результаты:

---------------------------------------------------------------------------
ConverterError                            Traceback (most recent call last)
<ipython-input-14-0f34bb0a99ac> in <module>
      4 converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS,
      5                                        tf.lite.OpsSet.SELECT_TF_OPS]
----> 6 tflite_model = converter.convert()

~/anaconda3/envs/py36tf2.1/lib/python3.6/site-packages/tensorflow_core/lite/python/lite.py in convert(self)
    462         input_tensors=input_tensors,
    463         output_tensors=output_tensors,
--> 464         **converter_kwargs)
    465 
    466     if self._is_calibration_quantize():

~/anaconda3/envs/py36tf2.1/lib/python3.6/site-packages/tensorflow_core/lite/python/convert.py in toco_convert_impl(input_data, input_tensors, output_tensors, enable_mlir_converter, *args, **kwargs)
    455       input_data.SerializeToString(),
    456       debug_info_str=debug_info_str,
--> 457       enable_mlir_converter=enable_mlir_converter)
    458   return data
    459 

~/anaconda3/envs/py36tf2.1/lib/python3.6/site-packages/tensorflow_core/lite/python/convert.py in toco_convert_protos(model_flags_str, toco_flags_str, input_data_str, debug_info_str, enable_mlir_converter)
    201       stdout = _try_convert_to_unicode(stdout)
    202       stderr = _try_convert_to_unicode(stderr)
--> 203       raise ConverterError("See console for info.\n%s\n%s\n" % (stdout, stderr))
    204   finally:
    205     # Must manually cleanup files.

ConverterError: See console for info.
2020-02-05 13:22:17.349230: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libnvinfer.so.6'; dlerror: libnvinfer.so.6: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-10.0/lib64
2020-02-05 13:22:17.349295: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libnvinfer_plugin.so.6'; dlerror: libnvinfer_plugin.so.6: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-10.0/lib64
2020-02-05 13:22:17.349304: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:30] Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.
2020-02-05 13:22:17.812215: W tensorflow/compiler/mlir/lite/python/graphdef_to_tfl_flatbuffer.cc:89] Ignored output_format.
2020-02-05 13:22:17.812244: W tensorflow/compiler/mlir/lite/python/graphdef_to_tfl_flatbuffer.cc:95] Ignored drop_control_dependency.
2020-02-05 13:22:17.870309: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2020-02-05 13:22:17.896653: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 3300080000 Hz
2020-02-05 13:22:17.897563: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7efcc41ffdc0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2020-02-05 13:22:17.897600: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2020-02-05 13:22:17.902733: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2020-02-05 13:22:17.922362: E tensorflow/stream_executor/cuda/cuda_driver.cc:351] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2020-02-05 13:22:17.922431: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (invest-rtx2080): /proc/driver/nvidia/version does not exist
error: type of return operand 0 ('tensor<*xf32>') doesn't match function result type ('tensor<?x?x?x?xf32>')
error: type of return operand 0 ('tensor<*xf32>') doesn't match function result type ('tensor<?x?x?x?xf32>')
error: type of return operand 0 ('tensor<*xf32>') doesn't match function result type ('tensor<?x?x?x?xf32>')
Traceback (most recent call last):
  File "/home/investigacion/anaconda3/envs/py36tf2.1/bin/toco_from_protos", line 8, in <module>
    sys.exit(main())
  File "/home/investigacion/anaconda3/envs/py36tf2.1/lib/python3.6/site-packages/tensorflow_core/lite/toco/python/toco_from_protos.py", line 93, in main
    app.run(main=execute, argv=[sys.argv[0]] + unparsed)
  File "/home/investigacion/anaconda3/envs/py36tf2.1/lib/python3.6/site-packages/tensorflow_core/python/platform/app.py", line 40, in run
    _run(main=main, argv=argv, flags_parser=_parse_flags_tolerate_undef)
  File "/home/investigacion/anaconda3/envs/py36tf2.1/lib/python3.6/site-packages/absl/app.py", line 299, in run
    _run_main(main, args)
  File "/home/investigacion/anaconda3/envs/py36tf2.1/lib/python3.6/site-packages/absl/app.py", line 250, in _run_main
    sys.exit(main(argv))
  File "/home/investigacion/anaconda3/envs/py36tf2.1/lib/python3.6/site-packages/tensorflow_core/lite/toco/python/toco_from_protos.py", line 56, in execute
    enable_mlir_converter)
Exception: <unknown>:0: error: type of return operand 0 ('tensor<*xf32>') doesn't match function result type ('tensor<?x?x?x?xf32>')
<unknown>:0: error: type of return operand 0 ('tensor<*xf32>') doesn't match function result type ('tensor<?x?x?x?xf32>')
<unknown>:0: error: type of return operand 0 ('tensor<*xf32>') doesn't match function result type ('tensor<?x?x?x?xf32>')

Я исследовал версии с тензорным потоком с и без графического процессора.

...