Связывание с openCV портит многопоточность? - PullRequest
1 голос
/ 24 октября 2019

Я заметил, что в моей программе соединение с opencv испортило параллельное выполнение потоков. В моих усилиях по отладке я обнаружил, что даже следуя стандартному примеру потока c ++, который не использует opencv, обнаруживается та же проблема. В приведенном ниже примере кода запускаются три потока.

Возможно, кто-то может объяснить нижеприведенное поведение и предложить, что делать, чтобы потоки выполнялись одновременно в моем приложении, поддерживающем openCV.

Я компилирую и связываюПриведенный ниже пример кода представлен двумя способами: один без библиотеки opencv ('mttok') и один с именем 'mttno', с подключенным opencv (хотя это и не нужно!)

g++ -o mttok -O3 main.cpp -lpthread 
c++ -o mttno -O3 main.cpp -lpthread -L/usr/local/opencv/lib64 -lopencv_core 

Когда язапустить 'mttok' все как положено. Три потока работают одновременно (как можно проверить из монитора активности процессора, но это также можно определить из вывода команды linux time):

==> time mttok
All threads are running...
result1: 5e+19
result2: 5e+19
result3: 5e+19
27.072u 0.000s 0:09.04 299.4%   0+0k 0+0io 0pf+0w

Однако, когда я запускаю исполняемый файл сOpenCV подключен, время на стене примерно в три раза выше. Монитор процессора показывает только один процессор на работе.

==> time mttno
All threads are running...
result1: 5e+19
result2: 5e+19
result3: 5e+19
26.829u 0.163s 0:26.53 101.6%   0+0k 0+0io 0pf+0w

Я не понимаю этого поведения. Что можно сделать? Заранее спасибо. Bertwim

Пример кода приведен ниже.

#include <iostream>
#include <thread>   

const unsigned long NMAX=10000000000;

class MTTest
{
public:
   void foo( double& r )
   {
      double s = 0;
      for (unsigned long u=0; u<NMAX; u++)
      {
         s += u;
      }
      r = s;
   }
};

int main()
{
   double s1, s2, s3;

   std::unique_ptr<MTTest> ptr1( new MTTest );
   std::unique_ptr<MTTest> ptr2( new MTTest );
   std::unique_ptr<MTTest> ptr3( new MTTest );

   std::thread t1( &MTTest::foo, ptr1.get(), std::ref(s1) );
   std::thread t2( &MTTest::foo, ptr2.get(), std::ref(s2) );
   std::thread t3( &MTTest::foo, ptr3.get(), std::ref(s3) );

   std::cout << "All threads are running..." << std::endl;

   // synchronize threads:
   t1.join();
   t2.join();
   t3.join();

   std::cout << "result1: " << s1 << std::endl;
   std::cout << "result2: " << s2 << std::endl;
   std::cout << "result3: " << s3 << std::endl;

   return 0;
}
// ---- eof ---

Новая информация: вывод strace

Как и предполагалось, я сравнил вывод команды strace. Это может дать преимущество, поскольку есть некоторые заметные различия. Большинство различий - это просто адреса, которые я проигнорировал. Но выделяются 2 DIFF, которые я извлек из вывода. Ниже следует сначала часть «не-ок» -версии, за которой следует соответствующий вывод «ок» -версии. DIFF были указаны. У версии "not-ok" больше вызовов futex, частично с разными результатами, и это показывает кое-что о процессоре. Ок-вывод не показывает этот тип информации. К сожалению, я не могу это интерпретировать.

brk(NULL)                               = 0x1edc000
brk(0x1efd000)                          = 0x1efd000
++++ BEGIN DIFF 1 +++
fstat(0, {st_mode=S_IFCHR|0600, st_rdev=makedev(136, 4), ...}) = 0
fstat(1, {st_mode=S_IFREG|0664, st_size=23324, ...}) = 0
fstat(2, {st_mode=S_IFREG|0664, st_size=23381, ...}) = 0
sched_getaffinity(0, 512, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) = 64
shmget(IPC_PRIVATE, 4096, IPC_CREAT|0666) = 3932170
shmat(3932170, NULL, 0)                 = 0x7f3fbb62b000
shmctl(3932170, IPC_RMID, NULL)         = 0
shmget(0x510510, 4096, 0666)            = -1 ENOENT (No such file or directory)
shmget(0x510510, 4096, IPC_CREAT|0666)  = 3964939
shmat(3964939, NULL, 0)                 = 0x7f3fbb62a000
open("/sys/devices/system/cpu", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0
getdents(3, /* 30 entries */, 32768)    = 864
getdents(3, /* 0 entries */, 32768)     = 0
close(3)                                = 0
sched_getaffinity(0, 8, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) = 8
open("/sys/devices/system/node", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0
getdents(3, /* 10 entries */, 32768)    = 312
openat(AT_FDCWD, "/sys/devices/system/node/node0/cpumap", O_RDONLY) = 4
read(4, "fff\n", 160)                   = 4
close(4)                                = 0
getdents(3, /* 0 entries */, 32768)     = 0
close(3)                                = 0
openat(AT_FDCWD, "/sys/devices/system/cpu/cpu0/cache/index1/shared_cpu_map", O_RDONLY) = 3
read(3, "041\n", 160)                   = 4
close(3)                                = 0
openat(AT_FDCWD, "/sys/devices/system/cpu/cpu1/cache/index1/shared_cpu_map", O_RDONLY) = 3
read(3, "082\n", 160)                   = 4
close(3)                                = 0
openat(AT_FDCWD, "/sys/devices/system/cpu/cpu2/cache/index1/shared_cpu_map", O_RDONLY) = 3
read(3, "104\n", 160)                   = 4
close(3)                                = 0
openat(AT_FDCWD, "/sys/devices/system/cpu/cpu3/cache/index1/shared_cpu_map", O_RDONLY) = 3
read(3, "208\n", 160)                   = 4
close(3)                                = 0
openat(AT_FDCWD, "/sys/devices/system/cpu/cpu4/cache/index1/shared_cpu_map", O_RDONLY) = 3
read(3, "410\n", 160)                   = 4
close(3)                                = 0
openat(AT_FDCWD, "/sys/devices/system/cpu/cpu5/cache/index1/shared_cpu_map", O_RDONLY) = 3
read(3, "820\n", 160)                   = 4
close(3)                                = 0
openat(AT_FDCWD, "/sys/devices/system/cpu/cpu6/cache/index1/shared_cpu_map", O_RDONLY) = 3
read(3, "041\n", 160)                   = 4
close(3)                                = 0
openat(AT_FDCWD, "/sys/devices/system/cpu/cpu7/cache/index1/shared_cpu_map", O_RDONLY) = 3
read(3, "082\n", 160)                   = 4
close(3)                                = 0
openat(AT_FDCWD, "/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_map", O_RDONLY) = 3
read(3, "104\n", 160)                   = 4
close(3)                                = 0
openat(AT_FDCWD, "/sys/devices/system/cpu/cpu9/cache/index1/shared_cpu_map", O_RDONLY) = 3
read(3, "208\n", 160)                   = 4
close(3)                                = 0
openat(AT_FDCWD, "/sys/devices/system/cpu/cpu10/cache/index1/shared_cpu_map", O_RDONLY) = 3
read(3, "410\n", 160)                   = 4
close(3)                                = 0
openat(AT_FDCWD, "/sys/devices/system/cpu/cpu11/cache/index1/shared_cpu_map", O_RDONLY) = 3
read(3, "820\n", 160)                   = 4
close(3)                                = 0
sched_setaffinity(0, 128, [0])          = 0
set_mempolicy(MPOL_INTERLEAVE, [0x0000000000000001], 64) = 0
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f3fb5899000
mprotect(0x7f3fb589a000, 8388608, PROT_READ|PROT_WRITE) = 0
clone(child_stack=0x7f3fb6098fb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7f3fb60999d0, tls=0x7f3fb6099700, child_tidptr=0x7f3fb60999d0) = 19550
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f3fb3098000
mprotect(0x7f3fb3099000, 8388608, PROT_READ|PROT_WRITE) = 0
clone(child_stack=0x7f3fb3897fb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7f3fb38989d0, tls=0x7f3fb3898700, child_tidptr=0x7f3fb38989d0) = 19551
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f3fb0897000
mprotect(0x7f3fb0898000, 8388608, PROT_READ|PROT_WRITE) = 0
clone(child_stack=0x7f3fb1096fb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7f3fb10979d0, tls=0x7f3fb1097700, child_tidptr=0x7f3fb10979d0) = 19552
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f3fae096000
mprotect(0x7f3fae097000, 8388608, PROT_READ|PROT_WRITE) = 0
clone(child_stack=0x7f3fae895fb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7f3fae8969d0, tls=0x7f3fae896700, child_tidptr=0x7f3fae8969d0) = 19553
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f3fab895000
mprotect(0x7f3fab896000, 8388608, PROT_READ|PROT_WRITE) = 0
clone(child_stack=0x7f3fac094fb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7f3fac0959d0, tls=0x7f3fac095700, child_tidptr=0x7f3fac0959d0) = 19554
==== END DIFF 1 ===
futex(0x7f3fb9f0305c, FUTEX_WAKE_PRIVATE, 2147483647) = 0
futex(0x7f3fb9f03068, FUTEX_WAKE_PRIVATE, 2147483647) = 0
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f3fa9094000
mprotect(0x7f3fa9095000, 8388608, PROT_READ|PROT_WRITE) = 0
clone(child_stack=0x7f3fa9893fb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7f3fa98949d0, tls=0x7f3fa9894700, child_tidptr=0x7f3fa98949d0) = 19555
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f3fa8893000
mprotect(0x7f3fa8894000, 8388608, PROT_READ|PROT_WRITE) = 0
clone(child_stack=0x7f3fa9092fb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7f3fa90939d0, tls=0x7f3fa9093700, child_tidptr=0x7f3fa90939d0) = 19556
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f3fa8092000
mprotect(0x7f3fa8093000, 8388608, PROT_READ|PROT_WRITE) = 0
clone(child_stack=0x7f3fa8891fb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7f3fa88929d0, tls=0x7f3fa8892700, child_tidptr=0x7f3fa88929d0) = 19557
fstat(1, {st_mode=S_IFREG|0664, st_size=30221, ...}) = 0
write(1, "All threads are running...\n", 27All threads are running...
) = 27
++++ BEGIN DIFF 2 +++
futex(0x7f3fa98949d0, FUTEX_WAIT, 19555, NULL) = 0
futex(0x7f3fa90939d0, FUTEX_WAIT, 19556, NULL) = 0
futex(0x7f3fa88929d0, FUTEX_WAIT, 19557, NULL) = 0
write(1, "result1: 5e+19\n", 15result1: 5e+19
)        = 15
write(1, "result2: 5e+19\n", 15result2: 5e+19
)        = 15
write(1, "result3: 5e+19\n", 15result3: 5e+19
)        = 15
futex(0x7f3fb8bfad68, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f3fb8bfade8, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f3fb8bfae68, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f3fb8bfae18, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f3fb8bfaee8, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f3fb8bfaf68, FUTEX_WAKE_PRIVATE, 1) = 1
munmap(0x7f3fa9094000, 8392704)         = 0
munmap(0x7f3fa8893000, 8392704)         = 0
munmap(0x7f3fa8092000, 8392704)         = 0
munmap(0x7f3fb5899000, 8392704)         = 0
munmap(0x7f3fb3899000, 33554432)        = 0
munmap(0x7f3fb1098000, 33554432)        = 0
munmap(0x7f3fae897000, 33554432)        = 0
munmap(0x7f3fac096000, 33554432)        = 0
munmap(0x7f3fa9895000, 33554432)        = 0
shmctl(3964939, IPC_STAT, {shm_perm={uid=1000, gid=100, mode=0666, key=5309712, cuid=1000, cgid=100}, shm_segsz=4096, shm_cpid=19549, shm_lpid=19549, shm_nattch=1, shm_atime=1572184421, shm_dtime=0, shm_ctime=1572184421}) = 0
shmctl(3964939, IPC_RMID, NULL)         = 0
shmdt(0x7f3fbb62a000)                   = 0
shmdt(0x7f3fbb62b000)                   = 0
==== END DIFF 2 ===
exit_group(0)                           = ?
+++ exited with 0 +++

Для правильной позиции 'strace, .mttok' дает:

==> strace ./mttok
brk(NULL)                               = 0x1737000
brk(0x1758000)                          = 0x1758000
++++ DIFF 1 +++
==== DIFF 1 ===
futex(0x7ff40b14f05c, FUTEX_WAKE_PRIVATE, 2147483647) = 0
futex(0x7ff40b14f068, FUTEX_WAKE_PRIVATE, 2147483647) = 0
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7ff409cbc000
mprotect(0x7ff409cbd000, 8388608, PROT_READ|PROT_WRITE) = 0
clone(child_stack=0x7ff40a4bbfb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7ff40a4bc9d0, tls=0x7ff40a4bc700, child_tidptr=0x7ff40a4bc9d0) = 19541
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7ff4094bb000
mprotect(0x7ff4094bc000, 8388608, PROT_READ|PROT_WRITE) = 0
clone(child_stack=0x7ff409cbafb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7ff409cbb9d0, tls=0x7ff409cbb700, child_tidptr=0x7ff409cbb9d0) = 19542
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7ff408cba000
mprotect(0x7ff408cbb000, 8388608, PROT_READ|PROT_WRITE) = 0
clone(child_stack=0x7ff4094b9fb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7ff4094ba9d0, tls=0x7ff4094ba700, child_tidptr=0x7ff4094ba9d0) = 19543
fstat(1, {st_mode=S_IFREG|0664, st_size=14307, ...}) = 0
write(1, "All threads are running...\n", 27All threads are running...
) = 27
++++ DIFF 2 +++
futex(0x7ff40a4bc9d0, FUTEX_WAIT, 19541, NULL) = 0
write(1, "result1: 5e+19\n", 15result1: 5e+19
)        = 15
write(1, "result2: 5e+19\n", 15result2: 5e+19
)        = 15
write(1, "result3: 5e+19\n", 15result3: 5e+19
)        = 15
==== DIFF 2 ===
exit_group(0)                           = ?
+++ exited with 0 +++

Новая информация2: strace -f

Когда установлен флаг -fдобавленный к команде strace, дополнительный вывод является подавляющим. В частности, для случая «не в порядке», есть много * тысяч строк, выглядящих так:

[pid  3836] mprotect(0x7f2b77446000, 8388608, PROT_READ|PROT_WRITE <unfinished ...>
[pid  3841] sched_yield( <unfinished ...>
[pid  3836] <... mprotect resumed> )    = 0
[pid  3841] <... sched_yield resumed> ) = 0
[pid  3836] clone( <unfinished ...>
[pid  3841] sched_yield( <unfinished ...>
[pid  3840] <... sched_yield resumed> ) = 0
[pid  3841] <... sched_yield resumed> ) = 0
[pid  3836] <... clone resumed> child_stack=0x7f2b77c44fb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7f2b77c459d0, tls=0x7f2b77c45700, child_tidptr=0x7f2b77c459d0) = 3844
[pid  3841] sched_yield( <unfinished ...>
[pid  3840] sched_yield( <unfinished ...>
[pid  3836] fstat(1,  <unfinished ...>
[pid  3841] <... sched_yield resumed> ) = 0
[pid  3836] <... fstat resumed> {st_mode=S_IFREG|0664, st_size=37237, ...}) = 0
[pid  3841] sched_yield( <unfinished ...>
[pid  3840] <... sched_yield resumed> ) = 0
[pid  3836] write(1, "All threads are running...\n", 27All threads are running...
 <unfinished ...>
[pid  3841] <... sched_yield resumed> ) = 0
[pid  3836] <... write resumed> )       = 27
[pid  3841] sched_yield( <unfinished ...>
[pid  3836] futex(0x7f2b78c479d0, FUTEX_WAIT, 3842, NULL <unfinished ...>
[pid  3841] <... sched_yield resumed> ) = 0
[pid  3840] sched_yield( <unfinished ...>
[pid  3841] sched_yield( <unfinished ...>
[pid  3840] <... sched_yield resumed> ) = 0
[pid  3841] <... sched_yield resumed> ) = 0
[pid  3840] sched_yield( <unfinished ...>
[pid  3840] <... sched_yield resumed> ) = 0
[pid  3841] <... sched_yield resumed> ) = 0
[pid  3840] sched_yield( <unfinished ...>
--- 
and many thousands lines like these to follow...
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...