ENV:
убунт-18,04
питон - 2.7.15rc1
GPU 0: GeForce RTX 2080Ti и
GPU 1: P1000 Quadro
CUDA: 9.1.85
тензор потока: 1.12.0
pip install magenta-gpu
Если я сделаю pip install magenta
, а затем выполню melody_rnn_train
, этап обучения пройден с моим пользовательским sequence_example training_melodies.tfrecord.
Но когда я делаю pip install magenta-gpu
после pip uninstall magenta
и запускаю melody_rnn_train для того же набора данных, я сталкиваюсь с «Ошибка сегментации». Я вижу, что он пытается использовать графический процессор 0: NVIDIA Geforce.
Команда, которую я запускаю:
./.local/bin/melody_rnn_train --config=attention_rnn --run_dir=~/music/run1 --sequence_example_file=~/music/my_midi_sequence_examples/training_melodies.tfrecord --hparams="batch_size=1,rnn_layer_sizes=[64,64]" --num_training_steps=20000
Возврат с использованием gdb python ошибки сегментации выглядит следующим образом:
(gdb) bt
0 0x00007fff4631ec08 in ?? () from /usr/lib/x86_64-linux-gnu/libcudnn.so.7
1 0x00007fff4631f114 in ?? () from /usr/lib/x86_64-linux-gnu/libcudnn.so.7
2 0x00007fff45e08850 in ?? () from /usr/lib/x86_64-linux-gnu/libcudnn.so.7
3 0x00007fff45e2b452 in ?? () from /usr/lib/x86_64-linux-gnu/libcudnn.so.7
4 0x00007fff45e2c1de in ?? () from /usr/lib/x86_64-linux-gnu/libcudnn.so.7
5 0x00007fff453d2416 in ?? () from /usr/lib/x86_64-linux-gnu/libcudnn.so.7
6 0x00007fff453d317b in cudnnGetConvolutionBackwardFilterWorkspaceSize () from /usr/lib/x86_64-linux-gnu/libcudnn.so.7
7 0x00007fff6184f184 in stream_executor::cuda::(anonymous namespace)::AllocateCudnnConvolutionBackwardFilterWorkspace(stream_executor::Stream*, stream_executor::cuda::(anonymous namespace)::CudnnHandle const&, stream_executor::cuda::(anonymous namespace)::CudnnTensorDescriptor const&, stream_executor::cuda::(anonymous namespace)::CudnnFilterDescriptor const&, stream_executor::cuda::(anonymous namespace)::CudnnConvolutionDescriptor const&, stream_executor::cuda::(anonymous namespace)::CudnnTensorDescriptor const&, stream_executor::dnn::AlgorithmDesc*, stream_executor::ScratchAllocator*) ()
from .local/lib/python2.7/site-packages/tensorflow/python/../libtensorflow_framework.so
8 0x00007fff6184f597 in stream_executor::cuda::(anonymous namespace)::GetCudnnConvolutionBackwardFilterAlgorithm(stream_executor::Stream*, stream_executor::cuda::(anonymous namespace)::CudnnHandle const&, stream_executor::dnn::AlgorithmConfig const&, stream_executor::cuda::(anonymous namespace)::CudnnTensorDescriptor const&, stream_executor::cuda::(anonymous namespace)::CudnnFilterDescriptor const&, stream_executor::cuda::(anonymous namespace)::CudnnConvolutionDescriptor const&, stream_executor::cuda::(anonymous namespace)::CudnnTensorDescriptor const&, stream_executor::ScratchAllocator*, stream_executor::DeviceMemory<unsigned char>*) [clone .constprop.315] () from .local/lib/python2.7/site-packages/tensorflow/python/../libtensorflow_framework.so
9 0x00007fff6185c7c3 in tensorflow::Status stream_executor::cuda::CudnnSupport::DoConvolveBackwardFilterImpl<float>(stream_executor::Stream*, stream_executor::dnn::BatchDescriptor const&, stream_executor::DeviceMemory<float> const&, stream_executor::dnn::BatchDescriptor const&, stream_executor::DeviceMemory<float>, stream_executor::dnn::ConvolutionDescriptor const&, stream_executor::dnn::FilterDescriptor const&, stream_executor::DeviceMemory<float>*, stream_executor::ScratchAllocator*, stream_executor::dnn::AlgorithmConfig const&, stream_executor::dnn::ProfileResult*) ()
from .local/lib/python2.7/site-packages/tensorflow/python/../libtensorflow_framework.so
10 0x00007fff6185d212 in stream_executor::cuda::CudnnSupport::DoConvolveBackwardFilter(stream_executor::Stream*, stream_executor::dnn::BatchDescriptor const&, stream_executor::DeviceMemory<float> const&, stream_executor::dnn::BatchDescriptor const&, stream_executor::DeviceMemory<float>, stream_executor::dnn::ConvolutionDescriptor const&, stream_executor::dnn::FilterDescriptor const&, stream_executor::DeviceMemory<float>*, stream_executor::ScratchAllocator*, stream_executor::dnn::AlgorithmConfig const&, stream_executor::dnn::ProfileResult*) ()
from .local/lib/python2.7/site-packages/tensorflow/python/../libtensorflow_framework.so
11 0x00007fff617efb2c in stream_executor::Stream::ThenConvolveBackwardFilterWithAlgorithm(stream_executor::dnn::BatchDescriptor const&, stream_executor::DeviceMemory<float> const&, stream_executor::dnn::BatchDescriptor const&, stream_executor::DeviceMemory<float>, stream_executor::dnn::ConvolutionDescriptor const&, stream_executor::dnn::FilterDescriptor const&, stream_executor::DeviceMemory<float>*, stream_executor::ScratchAllocator*, stream_executor::dnn::AlgorithmConfig const&, stream_executor::dnn::ProfileResult*) ()
from .local/lib/python2.7/site-packages/tensorflow/python/../libtensorflow_framework.so
12 0x00007fff679e088a in tensorflow::LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, float>::operator()(tensorflow::OpKernelContext*, bool, bool, tensorflow::Tensor const&, tensorflow::Tensor const&, int, int, int, int, tensorflow::Padding const&, tensorflow::Tensor*, tensorflow::TensorFormat) () from .local/lib/python2.7/site-packages/tensorflow/python/_pywrap_tensorflow_internal.so
13 0x00007fff679e12d0 in tensorflow::Conv2DSlowBackpropFilterOp<Eigen::GpuDevice, float>::Compute(tensorflow::OpKernelContext*) ()
from .local/lib/python2.7/site-packages/tensorflow/python/_pywrap_tensorflow_internal.so
14 0x00007fff613ee911 in tensorflow::BaseGPUDevice::ComputeHelper(tensorflow::OpKernel*, tensorflow::OpKernelContext*) ()
from .local/lib/python2.7/site-packages/tensorflow/python/../libtensorflow_framework.so
15 0x00007fff613eee32 in tensorflow::BaseGPUDevice::Compute(tensorflow::OpKernel*, tensorflow::OpKernelContext*) ()
from .local/lib/python2.7/site-packages/tensorflow/python/../libtensorflow_framework.so
16 0x00007fff61438a56 in tensorflow::(anonymous namespace)::ExecutorState::Process(tensorflow::(anonymous namespace)::ExecutorState::TaggedNode, long long) ()
from .local/lib/python2.7/site-packages/tensorflow/python/../libtensorflow_framework.so
17 0x00007fff61438eea in std::_Function_handler<void (), tensorflow::(anonymous namespace)::ExecutorState::ScheduleReady(absl::InlinedVector<tensorflow::(anonymous namespace)::ExecutorState::TaggedNode, 8ul, std::allocator<tensorflow::(anonymous namespace)::ExecutorState::TaggedNode> > const&, tensorflow::(anonymous namespace)::ExecutorState::TaggedNodeReadyQueue*)::{lambda() 1}>::_M_invoke(std::_Any_data const&) () from .local/lib/python2.7/site-packages/tensorflow/python/../libtensorflow_framework.so
18 0x00007fff614a81ea in Eigen::NonBlockingThreadPoolTempl<tensorflow::thread::EigenEnvironment>::WorkerLoop(int) ()
from .local/lib/python2.7/site-packages/tensorflow/python/../libtensorflow_framework.so
19 0x00007fff614a7242 in std::_Function_handler<void (), tensorflow::thread::EigenEnvironment::CreateThread(std::function<void ()>)::{lambda() 1}>::_M_invoke(std::_Any_data const&) ()
from .local/lib/python2.7/site-packages/tensorflow/python/../libtensorflow_framework.so
20 0x00007fff57c128f0 in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
21 0x00007ffff77cc6db in start_thread (arg=0x7ffde27fc700) at pthread_create.c:463
23 0x00007ffff7b0588f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Вариант ЦП работал нормально для меня, но я не могу запустить вариант ГП из-за ошибки сегментации.
Может кто-нибудь сообщить, если я что-то пропустил во время установки?