Неизвестная ошибка с нехваткой памяти GPU - PullRequest
0 голосов
/ 15 апреля 2020

В первой тренировке памяти GPU было недостаточно. Чтобы решить эту проблему, я попытался уменьшить размер изображения. Однако после обучения имеется большое количество дополнительной информации об ошибках в дополнение к ошибке памяти дисплея. Я хотел бы знать, чем эта ошибка отличается от обычной ошибки отображения

    Traceback (most recent call last):
  File "main.py", line 67, in <module>
    loss_dict = model(data)
  File "/root/anaconda3/envs/python367/lib/python3.6/site-packages/torch/nn/modules/module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)
  File "/root/detectron2/detectron2/modeling/meta_arch/rcnn.py", line 124, in forward
    proposals, proposal_losses = self.proposal_generator(images, features, gt_instances)
  File "/root/anaconda3/envs/python367/lib/python3.6/site-packages/torch/nn/modules/module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)
  File "/root/detectron2/detectron2/modeling/proposal_generator/rrpn.py", line 53, in forward
    losses = outputs.losses()
  File "/root/detectron2/detectron2/modeling/proposal_generator/rpn_outputs.py", line 333, in losses
    gt_objectness_logits, gt_anchor_deltas = self._get_ground_truth()
  File "/root/detectron2/detectron2/modeling/proposal_generator/rrpn_outputs.py", line 222, in _get_ground_truth
    match_quality_matrix = pairwise_iou_rotated(gt_boxes_i, anchors_i)
  File "/root/detectron2/detectron2/structures/rotated_boxes.py", line 498, in pairwise_iou
    return pairwise_iou_rotated(boxes1.tensor, boxes2.tensor)
  File "/root/detectron2/detectron2/layers/rotated_boxes.py", line 23, in pairwise_iou_rotated
    return _C.box_iou_rotated(boxes1, boxes2)
RuntimeError: CUDA out of memory. Tried to allocate 6.52 GiB (GPU 0; 10.76 GiB total capacity; 2.40 GiB already allocated; 5.25 GiB free; 4.68 GiB reserved in total by PyTorch) (malloc at /pytorch/c10/cuda/CUDACachingAllocator.cpp:289)
frame #0: c10::Error::Error(c10::SourceLocation, std::string const&) + 0x33 (0x7fcd6baf4193 in /root/anaconda3/envs/python367/lib/python3.6/site-packages/torch/lib/libc10.so)
frame #1: <unknown function> + 0x1bccc (0x7fcd6bd35ccc in /root/anaconda3/envs/python367/lib/python3.6/site-packages/torch/lib/libc10_cuda.so)
frame #2: <unknown function> + 0x1cd5e (0x7fcd6bd36d5e in /root/anaconda3/envs/python367/lib/python3.6/site-packages/torch/lib/libc10_cuda.so)
frame #3: at::native::empty_cuda(c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat>) + 0x284 (0x7fcc8b303094 in /root/anaconda3/envs/python367/lib/python3.6/site-packages/torch/lib/libtorch.so)
frame #4: <unknown function> + 0x455b8d8 (0x7fcc89bd38d8 in /root/anaconda3/envs/python367/lib/python3.6/site-packages/torch/lib/libtorch.so)
frame #5: <unknown function> + 0x1eedc47 (0x7fcc87565c47 in /root/anaconda3/envs/python367/lib/python3.6/site-packages/torch/lib/libtorch.so)
frame #6: <unknown function> + 0x3ead8a5 (0x7fcc895258a5 in /root/anaconda3/envs/python367/lib/python3.6/site-packages/torch/lib/libtorch.so)
frame #7: <unknown function> + 0x1eedc47 (0x7fcc87565c47 in /root/anaconda3/envs/python367/lib/python3.6/site-packages/torch/lib/libtorch.so)
frame #8: at::Tensor c10::KernelFunction::callUnboxedOnly<at::Tensor, c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat> >(c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat>) const + 0xd1 (0x7fcd4b2389ed in /root/detectron2/detectron2/_C.cpython-36m-x86_64-linux-gnu.so)
frame #9: at::Tensor c10::Dispatcher::doCallUnboxedOnly<at::Tensor, c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat> >(c10::DispatchTable const&, c10::LeftRight<ska::flat_hash_map<c10::TensorTypeId, c10::KernelFunction, std::hash<c10::TensorTypeId>, std::equal_to<c10::TensorTypeId>, std::allocator<std::pair<c10::TensorTypeId, c10::KernelFunction> > > > const&, c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat>) const::{lambda(ska::flat_hash_map<c10::TensorTypeId, c10::KernelFunction, std::hash<c10::TensorTypeId>, std::equal_to<c10::TensorTypeId>, std::allocator<std::pair<c10::TensorTypeId, c10::KernelFunction> > > const&)#1}::operator()(ska::flat_hash_map<c10::TensorTypeId, c10::KernelFunction, std::hash<c10::TensorTypeId>, std::equal_to<c10::TensorTypeId>, std::allocator<std::pair<c10::TensorTypeId, c10::KernelFunction> > > const&) const + 0x101 (0x7fcd4b2368f1 in /root/detectron2/detectron2/_C.cpython-36m-x86_64-linux-gnu.so)
frame #10: std::result_of<at::Tensor c10::Dispatcher::doCallUnboxedOnly<at::Tensor, c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat> >(c10::DispatchTable const&, c10::LeftRight<ska::flat_hash_map<c10::TensorTypeId, c10::KernelFunction, std::hash<c10::TensorTypeId>, std::equal_to<c10::TensorTypeId>, std::allocator<std::pair<c10::TensorTypeId, c10::KernelFunction> > > > const&, c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat>) const::{lambda(ska::flat_hash_map<c10::TensorTypeId, c10::KernelFunction, std::hash<c10::TensorTypeId>, std::equal_to<c10::TensorTypeId>, std::allocator<std::pair<c10::TensorTypeId, c10::KernelFunction> > > const&)#1} (ska::flat_hash_map<c10::TensorTypeId, c10::KernelFunction, std::hash<c10::TensorTypeId>, std::equal_to<c10::TensorTypeId>, std::allocator<std::pair<c10::TensorTypeId, c10::KernelFunction> > > const&)>::type c10::LeftRight<ska::flat_hash_map<c10::TensorTypeId, c10::KernelFunction, std::hash<c10::TensorTypeId>, std::equal_to<c10::TensorTypeId>, std::allocator<std::pair<c10::TensorTypeId, c10::KernelFunction> > > >::read<at::Tensor c10::Dispatcher::doCallUnboxedOnly<at::Tensor, c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat> >(c10::DispatchTable const&, c10::LeftRight<ska::flat_hash_map<c10::TensorTypeId, c10::KernelFunction, std::hash<c10::TensorTypeId>, std::equal_to<c10::TensorTypeId>, std::allocator<std::pair<c10::TensorTypeId, c10::KernelFunction> > > > const&, c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat>) const::{lambda(ska::flat_hash_map<c10::TensorTypeId, c10::KernelFunction, std::hash<c10::TensorTypeId>, std::equal_to<c10::TensorTypeId>, std::allocator<std::pair<c10::TensorTypeId, c10::KernelFunction> > > const&)#1}>(at::Tensor c10::Dispatcher::doCallUnboxedOnly<at::Tensor, c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat> >(c10::DispatchTable const&, c10::LeftRight<ska::flat_hash_map<c10::TensorTypeId, c10::KernelFunction, std::hash<c10::TensorTypeId>, std::equal_to<c10::TensorTypeId>, std::allocator<std::pair<c10::TensorTypeId, c10::KernelFunction> > > > const&, c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat>) const::{lambda(ska::flat_hash_map<c10::TensorTypeId, c10::KernelFunction, std::hash<c10::TensorTypeId>, std::equal_to<c10::TensorTypeId>, std::allocator<std::pair<c10::TensorTypeId, c10::KernelFunction> > > const&)#1}&&) const + 0x128 (0x7fcd4b238c4c in /root/detectron2/detectron2/_C.cpython-36m-x86_64-linux-gnu.so)
frame #11: at::Tensor c10::Dispatcher::doCallUnboxedOnly<at::Tensor, c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat> >(c10::DispatchTable const&, c10::LeftRight<ska::flat_hash_map<c10::TensorTypeId, c10::KernelFunction, std::hash<c10::TensorTypeId>, std::equal_to<c10::TensorTypeId>, std::allocator<std::pair<c10::TensorTypeId, c10::KernelFunction> > > > const&, c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat>) const + 0x8d (0x7fcd4b2369a1 in /root/detectron2/detectron2/_C.cpython-36m-x86_64-linux-gnu.so)
frame #12: at::Tensor c10::Dispatcher::callUnboxedOnly<at::Tensor, c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat> >(c10::OperatorHandle const&, c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat>) const::{lambda(c10::DispatchTable const&)#1}::operator()(c10::DispatchTable const&) const + 0xa7 (0x7fcd4b2344a9 in /root/detectron2/detectron2/_C.cpython-36m-x86_64-linux-gnu.so)
frame #13: std::result_of<at::Tensor c10::Dispatcher::callUnboxedOnly<at::Tensor, c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat> >(c10::OperatorHandle const&, c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat>) const::{lambda(c10::DispatchTable const&)#1} (c10::DispatchTable const&)>::type c10::LeftRight<c10::DispatchTable>::read<at::Tensor c10::Dispatcher::callUnboxedOnly<at::Tensor, c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat> >(c10::OperatorHandle const&, c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat>) const::{lambda(c10::DispatchTable const&)#1}>(at::Tensor c10::Dispatcher::callUnboxedOnly<at::Tensor, c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat> >(c10::OperatorHandle const&, c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat>) const::{lambda(c10::DispatchTable const&)#1}&&) const + 0x128 (0x7fcd4b238dd0 in /root/detectron2/detectron2/_C.cpython-36m-x86_64-linux-gnu.so)
frame #14: c10::guts::infer_function_traits<at::Tensor c10::Dispatcher::callUnboxedOnly<at::Tensor, c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat> >(c10::OperatorHandle const&, c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat>) const::{lambda(c10::DispatchTable const&)#1}>::type::return_type c10::impl::OperatorEntry::readDispatchTable<at::Tensor c10::Dispatcher::callUnboxedOnly<at::Tensor, c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat> >(c10::OperatorHandle const&, c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat>) const::{lambda(c10::DispatchTable const&)#1}>(at::Tensor c10::Dispatcher::callUnboxedOnly<at::Tensor, c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat> >(c10::OperatorHandle const&, c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat>) const::{lambda(c10::DispatchTable const&)#1}&&) const + 0x49 (0x7fcd4b236a13 in /root/detectron2/detectron2/_C.cpython-36m-x86_64-linux-gnu.so)
frame #15: at::Tensor c10::Dispatcher::callUnboxedOnly<at::Tensor, c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat> >(c10::OperatorHandle const&, c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat>) const + 0x99 (0x7fcd4b234565 in /root/detectron2/detectron2/_C.cpython-36m-x86_64-linux-gnu.so)
frame #16: <unknown function> + 0xb2dd9 (0x7fcd4b241dd9 in /root/detectron2/detectron2/_C.cpython-36m-x86_64-linux-gnu.so)
frame #17: detectron2::box_iou_rotated_cuda(at::Tensor const&, at::Tensor const&) + 0x370 (0x7fcd4b2421da in /root/detectron2/detectron2/_C.cpython-36m-x86_64-linux-gnu.so)
frame #18: detectron2::box_iou_rotated(at::Tensor const&, at::Tensor const&) + 0x65 (0x7fcd4b1e8cf5 in /root/detectron2/detectron2/_C.cpython-36m-x86_64-linux-gnu.so)
frame #19: <unknown function> + 0x5df31 (0x7fcd4b1ecf31 in /root/detectron2/detectron2/_C.cpython-36m-x86_64-linux-gnu.so)
frame #20: <unknown function> + 0x66530 (0x7fcd4b1f5530 in /root/detectron2/detectron2/_C.cpython-36m-x86_64-linux-gnu.so)
<omitting python frames>

Является ли это дополнительное сообщение об ошибке проблемой со средой или с каркасом Deteron2?

...