Question

ENV:

убунт-18,04

питон - 2.7.15rc1

GPU 0: GeForce RTX 2080Ti и

GPU 1: P1000 Quadro

CUDA: 9.1.85

тензор потока: 1.12.0

pip install magenta-gpu

Если я сделаю pip install magenta, а затем выполню melody_rnn_train, этап обучения пройден с моим пользовательским sequence_example training_melodies.tfrecord.

Но когда я делаю pip install magenta-gpu после pip uninstall magenta и запускаю melody_rnn_train для того же набора данных, я сталкиваюсь с «Ошибка сегментации». Я вижу, что он пытается использовать графический процессор 0: NVIDIA Geforce.

Команда, которую я запускаю:

./.local/bin/melody_rnn_train --config=attention_rnn --run_dir=~/music/run1 --sequence_example_file=~/music/my_midi_sequence_examples/training_melodies.tfrecord --hparams="batch_size=1,rnn_layer_sizes=[64,64]" --num_training_steps=20000

Возврат с использованием gdb python ошибки сегментации выглядит следующим образом:

    (gdb) bt
    0  0x00007fff4631ec08 in ?? ()     from /usr/lib/x86_64-linux-gnu/libcudnn.so.7
    1  0x00007fff4631f114 in ?? ()     from /usr/lib/x86_64-linux-gnu/libcudnn.so.7
    2  0x00007fff45e08850 in ?? ()     from /usr/lib/x86_64-linux-gnu/libcudnn.so.7
    3  0x00007fff45e2b452 in ?? ()     from /usr/lib/x86_64-linux-gnu/libcudnn.so.7
    4  0x00007fff45e2c1de in ?? ()     from /usr/lib/x86_64-linux-gnu/libcudnn.so.7
    5  0x00007fff453d2416 in ?? ()     from /usr/lib/x86_64-linux-gnu/libcudnn.so.7
    6  0x00007fff453d317b in cudnnGetConvolutionBackwardFilterWorkspaceSize ()     from /usr/lib/x86_64-linux-gnu/libcudnn.so.7
    7  0x00007fff6184f184 in stream_executor::cuda::(anonymous namespace)::AllocateCudnnConvolutionBackwardFilterWorkspace(stream_executor::Stream*, stream_executor::cuda::(anonymous namespace)::CudnnHandle const&, stream_executor::cuda::(anonymous namespace)::CudnnTensorDescriptor const&, stream_executor::cuda::(anonymous namespace)::CudnnFilterDescriptor const&, stream_executor::cuda::(anonymous namespace)::CudnnConvolutionDescriptor const&, stream_executor::cuda::(anonymous namespace)::CudnnTensorDescriptor const&, stream_executor::dnn::AlgorithmDesc*, stream_executor::ScratchAllocator*) ()
   from .local/lib/python2.7/site-packages/tensorflow/python/../libtensorflow_framework.so
    8  0x00007fff6184f597 in stream_executor::cuda::(anonymous namespace)::GetCudnnConvolutionBackwardFilterAlgorithm(stream_executor::Stream*, stream_executor::cuda::(anonymous namespace)::CudnnHandle const&, stream_executor::dnn::AlgorithmConfig const&, stream_executor::cuda::(anonymous namespace)::CudnnTensorDescriptor const&, stream_executor::cuda::(anonymous namespace)::CudnnFilterDescriptor const&, stream_executor::cuda::(anonymous namespace)::CudnnConvolutionDescriptor const&, stream_executor::cuda::(anonymous namespace)::CudnnTensorDescriptor const&, stream_executor::ScratchAllocator*, stream_executor::DeviceMemory<unsigned char>*) [clone .constprop.315] ()     from .local/lib/python2.7/site-packages/tensorflow/python/../libtensorflow_framework.so
    9  0x00007fff6185c7c3 in tensorflow::Status stream_executor::cuda::CudnnSupport::DoConvolveBackwardFilterImpl<float>(stream_executor::Stream*, stream_executor::dnn::BatchDescriptor const&, stream_executor::DeviceMemory<float> const&, stream_executor::dnn::BatchDescriptor const&, stream_executor::DeviceMemory<float>, stream_executor::dnn::ConvolutionDescriptor const&, stream_executor::dnn::FilterDescriptor const&, stream_executor::DeviceMemory<float>*, stream_executor::ScratchAllocator*, stream_executor::dnn::AlgorithmConfig const&, stream_executor::dnn::ProfileResult*) ()
   from .local/lib/python2.7/site-packages/tensorflow/python/../libtensorflow_framework.so
    10 0x00007fff6185d212 in stream_executor::cuda::CudnnSupport::DoConvolveBackwardFilter(stream_executor::Stream*, stream_executor::dnn::BatchDescriptor const&, stream_executor::DeviceMemory<float> const&, stream_executor::dnn::BatchDescriptor const&, stream_executor::DeviceMemory<float>, stream_executor::dnn::ConvolutionDescriptor const&, stream_executor::dnn::FilterDescriptor const&, stream_executor::DeviceMemory<float>*, stream_executor::ScratchAllocator*, stream_executor::dnn::AlgorithmConfig const&, stream_executor::dnn::ProfileResult*) ()
   from .local/lib/python2.7/site-packages/tensorflow/python/../libtensorflow_framework.so
    11 0x00007fff617efb2c in stream_executor::Stream::ThenConvolveBackwardFilterWithAlgorithm(stream_executor::dnn::BatchDescriptor const&, stream_executor::DeviceMemory<float> const&, stream_executor::dnn::BatchDescriptor const&, stream_executor::DeviceMemory<float>, stream_executor::dnn::ConvolutionDescriptor const&, stream_executor::dnn::FilterDescriptor const&, stream_executor::DeviceMemory<float>*, stream_executor::ScratchAllocator*, stream_executor::dnn::AlgorithmConfig const&, stream_executor::dnn::ProfileResult*) ()
   from .local/lib/python2.7/site-packages/tensorflow/python/../libtensorflow_framework.so
    12 0x00007fff679e088a in tensorflow::LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, float>::operator()(tensorflow::OpKernelContext*, bool, bool, tensorflow::Tensor const&, tensorflow::Tensor const&, int, int, int, int, tensorflow::Padding const&, tensorflow::Tensor*, tensorflow::TensorFormat) ()     from .local/lib/python2.7/site-packages/tensorflow/python/_pywrap_tensorflow_internal.so
    13 0x00007fff679e12d0 in tensorflow::Conv2DSlowBackpropFilterOp<Eigen::GpuDevice, float>::Compute(tensorflow::OpKernelContext*) ()
   from .local/lib/python2.7/site-packages/tensorflow/python/_pywrap_tensorflow_internal.so
    14 0x00007fff613ee911 in tensorflow::BaseGPUDevice::ComputeHelper(tensorflow::OpKernel*, tensorflow::OpKernelContext*) ()
   from .local/lib/python2.7/site-packages/tensorflow/python/../libtensorflow_framework.so
    15 0x00007fff613eee32 in tensorflow::BaseGPUDevice::Compute(tensorflow::OpKernel*, tensorflow::OpKernelContext*) ()
   from .local/lib/python2.7/site-packages/tensorflow/python/../libtensorflow_framework.so
    16 0x00007fff61438a56 in tensorflow::(anonymous namespace)::ExecutorState::Process(tensorflow::(anonymous namespace)::ExecutorState::TaggedNode, long long) ()
   from .local/lib/python2.7/site-packages/tensorflow/python/../libtensorflow_framework.so
    17 0x00007fff61438eea in std::_Function_handler<void (), tensorflow::(anonymous namespace)::ExecutorState::ScheduleReady(absl::InlinedVector<tensorflow::(anonymous namespace)::ExecutorState::TaggedNode, 8ul, std::allocator<tensorflow::(anonymous namespace)::ExecutorState::TaggedNode> > const&, tensorflow::(anonymous namespace)::ExecutorState::TaggedNodeReadyQueue*)::{lambda()        1}>::_M_invoke(std::_Any_data const&) ()     from .local/lib/python2.7/site-packages/tensorflow/python/../libtensorflow_framework.so
    18 0x00007fff614a81ea in Eigen::NonBlockingThreadPoolTempl<tensorflow::thread::EigenEnvironment>::WorkerLoop(int) ()
   from .local/lib/python2.7/site-packages/tensorflow/python/../libtensorflow_framework.so
    19 0x00007fff614a7242 in std::_Function_handler<void (), tensorflow::thread::EigenEnvironment::CreateThread(std::function<void ()>)::{lambda()        1}>::_M_invoke(std::_Any_data const&) ()
   from .local/lib/python2.7/site-packages/tensorflow/python/../libtensorflow_framework.so
    20 0x00007fff57c128f0 in ?? ()     from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
    21 0x00007ffff77cc6db in start_thread (arg=0x7ffde27fc700) at pthread_create.c:463
    23 0x00007ffff7b0588f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95

Вариант ЦП работал нормально для меня, но я не могу запустить вариант ГП из-за ошибки сегментации.

Может кто-нибудь сообщить, если я что-то пропустил во время установки?

Ошибка сегментации при запуске melody_rnn_train с использованием magenta-gpu

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

Ответы [ 0 ]

Ошибка сегментации при запуске melody_rnn_train с использованием magenta-gpu

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

Ответы [ 0 ]

Похожие темы