Я заметил, что в моей программе соединение с opencv испортило параллельное выполнение потоков. В моих усилиях по отладке я обнаружил, что даже следуя стандартному примеру потока c ++, который не использует opencv, обнаруживается та же проблема. В приведенном ниже примере кода запускаются три потока.
Возможно, кто-то может объяснить нижеприведенное поведение и предложить, что делать, чтобы потоки выполнялись одновременно в моем приложении, поддерживающем openCV.
Я компилирую и связываюПриведенный ниже пример кода представлен двумя способами: один без библиотеки opencv ('mttok') и один с именем 'mttno', с подключенным opencv (хотя это и не нужно!)
g++ -o mttok -O3 main.cpp -lpthread
c++ -o mttno -O3 main.cpp -lpthread -L/usr/local/opencv/lib64 -lopencv_core
Когда язапустить 'mttok' все как положено. Три потока работают одновременно (как можно проверить из монитора активности процессора, но это также можно определить из вывода команды linux time):
==> time mttok
All threads are running...
result1: 5e+19
result2: 5e+19
result3: 5e+19
27.072u 0.000s 0:09.04 299.4% 0+0k 0+0io 0pf+0w
Однако, когда я запускаю исполняемый файл сOpenCV подключен, время на стене примерно в три раза выше. Монитор процессора показывает только один процессор на работе.
==> time mttno
All threads are running...
result1: 5e+19
result2: 5e+19
result3: 5e+19
26.829u 0.163s 0:26.53 101.6% 0+0k 0+0io 0pf+0w
Я не понимаю этого поведения. Что можно сделать? Заранее спасибо. Bertwim
Пример кода приведен ниже.
#include <iostream>
#include <thread>
const unsigned long NMAX=10000000000;
class MTTest
{
public:
void foo( double& r )
{
double s = 0;
for (unsigned long u=0; u<NMAX; u++)
{
s += u;
}
r = s;
}
};
int main()
{
double s1, s2, s3;
std::unique_ptr<MTTest> ptr1( new MTTest );
std::unique_ptr<MTTest> ptr2( new MTTest );
std::unique_ptr<MTTest> ptr3( new MTTest );
std::thread t1( &MTTest::foo, ptr1.get(), std::ref(s1) );
std::thread t2( &MTTest::foo, ptr2.get(), std::ref(s2) );
std::thread t3( &MTTest::foo, ptr3.get(), std::ref(s3) );
std::cout << "All threads are running..." << std::endl;
// synchronize threads:
t1.join();
t2.join();
t3.join();
std::cout << "result1: " << s1 << std::endl;
std::cout << "result2: " << s2 << std::endl;
std::cout << "result3: " << s3 << std::endl;
return 0;
}
// ---- eof ---
Новая информация: вывод strace
Как и предполагалось, я сравнил вывод команды strace. Это может дать преимущество, поскольку есть некоторые заметные различия. Большинство различий - это просто адреса, которые я проигнорировал. Но выделяются 2 DIFF, которые я извлек из вывода. Ниже следует сначала часть «не-ок» -версии, за которой следует соответствующий вывод «ок» -версии. DIFF были указаны. У версии "not-ok" больше вызовов futex, частично с разными результатами, и это показывает кое-что о процессоре. Ок-вывод не показывает этот тип информации. К сожалению, я не могу это интерпретировать.
brk(NULL) = 0x1edc000
brk(0x1efd000) = 0x1efd000
++++ BEGIN DIFF 1 +++
fstat(0, {st_mode=S_IFCHR|0600, st_rdev=makedev(136, 4), ...}) = 0
fstat(1, {st_mode=S_IFREG|0664, st_size=23324, ...}) = 0
fstat(2, {st_mode=S_IFREG|0664, st_size=23381, ...}) = 0
sched_getaffinity(0, 512, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) = 64
shmget(IPC_PRIVATE, 4096, IPC_CREAT|0666) = 3932170
shmat(3932170, NULL, 0) = 0x7f3fbb62b000
shmctl(3932170, IPC_RMID, NULL) = 0
shmget(0x510510, 4096, 0666) = -1 ENOENT (No such file or directory)
shmget(0x510510, 4096, IPC_CREAT|0666) = 3964939
shmat(3964939, NULL, 0) = 0x7f3fbb62a000
open("/sys/devices/system/cpu", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0
getdents(3, /* 30 entries */, 32768) = 864
getdents(3, /* 0 entries */, 32768) = 0
close(3) = 0
sched_getaffinity(0, 8, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) = 8
open("/sys/devices/system/node", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0
getdents(3, /* 10 entries */, 32768) = 312
openat(AT_FDCWD, "/sys/devices/system/node/node0/cpumap", O_RDONLY) = 4
read(4, "fff\n", 160) = 4
close(4) = 0
getdents(3, /* 0 entries */, 32768) = 0
close(3) = 0
openat(AT_FDCWD, "/sys/devices/system/cpu/cpu0/cache/index1/shared_cpu_map", O_RDONLY) = 3
read(3, "041\n", 160) = 4
close(3) = 0
openat(AT_FDCWD, "/sys/devices/system/cpu/cpu1/cache/index1/shared_cpu_map", O_RDONLY) = 3
read(3, "082\n", 160) = 4
close(3) = 0
openat(AT_FDCWD, "/sys/devices/system/cpu/cpu2/cache/index1/shared_cpu_map", O_RDONLY) = 3
read(3, "104\n", 160) = 4
close(3) = 0
openat(AT_FDCWD, "/sys/devices/system/cpu/cpu3/cache/index1/shared_cpu_map", O_RDONLY) = 3
read(3, "208\n", 160) = 4
close(3) = 0
openat(AT_FDCWD, "/sys/devices/system/cpu/cpu4/cache/index1/shared_cpu_map", O_RDONLY) = 3
read(3, "410\n", 160) = 4
close(3) = 0
openat(AT_FDCWD, "/sys/devices/system/cpu/cpu5/cache/index1/shared_cpu_map", O_RDONLY) = 3
read(3, "820\n", 160) = 4
close(3) = 0
openat(AT_FDCWD, "/sys/devices/system/cpu/cpu6/cache/index1/shared_cpu_map", O_RDONLY) = 3
read(3, "041\n", 160) = 4
close(3) = 0
openat(AT_FDCWD, "/sys/devices/system/cpu/cpu7/cache/index1/shared_cpu_map", O_RDONLY) = 3
read(3, "082\n", 160) = 4
close(3) = 0
openat(AT_FDCWD, "/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_map", O_RDONLY) = 3
read(3, "104\n", 160) = 4
close(3) = 0
openat(AT_FDCWD, "/sys/devices/system/cpu/cpu9/cache/index1/shared_cpu_map", O_RDONLY) = 3
read(3, "208\n", 160) = 4
close(3) = 0
openat(AT_FDCWD, "/sys/devices/system/cpu/cpu10/cache/index1/shared_cpu_map", O_RDONLY) = 3
read(3, "410\n", 160) = 4
close(3) = 0
openat(AT_FDCWD, "/sys/devices/system/cpu/cpu11/cache/index1/shared_cpu_map", O_RDONLY) = 3
read(3, "820\n", 160) = 4
close(3) = 0
sched_setaffinity(0, 128, [0]) = 0
set_mempolicy(MPOL_INTERLEAVE, [0x0000000000000001], 64) = 0
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f3fb5899000
mprotect(0x7f3fb589a000, 8388608, PROT_READ|PROT_WRITE) = 0
clone(child_stack=0x7f3fb6098fb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7f3fb60999d0, tls=0x7f3fb6099700, child_tidptr=0x7f3fb60999d0) = 19550
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f3fb3098000
mprotect(0x7f3fb3099000, 8388608, PROT_READ|PROT_WRITE) = 0
clone(child_stack=0x7f3fb3897fb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7f3fb38989d0, tls=0x7f3fb3898700, child_tidptr=0x7f3fb38989d0) = 19551
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f3fb0897000
mprotect(0x7f3fb0898000, 8388608, PROT_READ|PROT_WRITE) = 0
clone(child_stack=0x7f3fb1096fb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7f3fb10979d0, tls=0x7f3fb1097700, child_tidptr=0x7f3fb10979d0) = 19552
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f3fae096000
mprotect(0x7f3fae097000, 8388608, PROT_READ|PROT_WRITE) = 0
clone(child_stack=0x7f3fae895fb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7f3fae8969d0, tls=0x7f3fae896700, child_tidptr=0x7f3fae8969d0) = 19553
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f3fab895000
mprotect(0x7f3fab896000, 8388608, PROT_READ|PROT_WRITE) = 0
clone(child_stack=0x7f3fac094fb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7f3fac0959d0, tls=0x7f3fac095700, child_tidptr=0x7f3fac0959d0) = 19554
==== END DIFF 1 ===
futex(0x7f3fb9f0305c, FUTEX_WAKE_PRIVATE, 2147483647) = 0
futex(0x7f3fb9f03068, FUTEX_WAKE_PRIVATE, 2147483647) = 0
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f3fa9094000
mprotect(0x7f3fa9095000, 8388608, PROT_READ|PROT_WRITE) = 0
clone(child_stack=0x7f3fa9893fb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7f3fa98949d0, tls=0x7f3fa9894700, child_tidptr=0x7f3fa98949d0) = 19555
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f3fa8893000
mprotect(0x7f3fa8894000, 8388608, PROT_READ|PROT_WRITE) = 0
clone(child_stack=0x7f3fa9092fb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7f3fa90939d0, tls=0x7f3fa9093700, child_tidptr=0x7f3fa90939d0) = 19556
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f3fa8092000
mprotect(0x7f3fa8093000, 8388608, PROT_READ|PROT_WRITE) = 0
clone(child_stack=0x7f3fa8891fb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7f3fa88929d0, tls=0x7f3fa8892700, child_tidptr=0x7f3fa88929d0) = 19557
fstat(1, {st_mode=S_IFREG|0664, st_size=30221, ...}) = 0
write(1, "All threads are running...\n", 27All threads are running...
) = 27
++++ BEGIN DIFF 2 +++
futex(0x7f3fa98949d0, FUTEX_WAIT, 19555, NULL) = 0
futex(0x7f3fa90939d0, FUTEX_WAIT, 19556, NULL) = 0
futex(0x7f3fa88929d0, FUTEX_WAIT, 19557, NULL) = 0
write(1, "result1: 5e+19\n", 15result1: 5e+19
) = 15
write(1, "result2: 5e+19\n", 15result2: 5e+19
) = 15
write(1, "result3: 5e+19\n", 15result3: 5e+19
) = 15
futex(0x7f3fb8bfad68, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f3fb8bfade8, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f3fb8bfae68, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f3fb8bfae18, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f3fb8bfaee8, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f3fb8bfaf68, FUTEX_WAKE_PRIVATE, 1) = 1
munmap(0x7f3fa9094000, 8392704) = 0
munmap(0x7f3fa8893000, 8392704) = 0
munmap(0x7f3fa8092000, 8392704) = 0
munmap(0x7f3fb5899000, 8392704) = 0
munmap(0x7f3fb3899000, 33554432) = 0
munmap(0x7f3fb1098000, 33554432) = 0
munmap(0x7f3fae897000, 33554432) = 0
munmap(0x7f3fac096000, 33554432) = 0
munmap(0x7f3fa9895000, 33554432) = 0
shmctl(3964939, IPC_STAT, {shm_perm={uid=1000, gid=100, mode=0666, key=5309712, cuid=1000, cgid=100}, shm_segsz=4096, shm_cpid=19549, shm_lpid=19549, shm_nattch=1, shm_atime=1572184421, shm_dtime=0, shm_ctime=1572184421}) = 0
shmctl(3964939, IPC_RMID, NULL) = 0
shmdt(0x7f3fbb62a000) = 0
shmdt(0x7f3fbb62b000) = 0
==== END DIFF 2 ===
exit_group(0) = ?
+++ exited with 0 +++
Для правильной позиции 'strace, .mttok' дает:
==> strace ./mttok
brk(NULL) = 0x1737000
brk(0x1758000) = 0x1758000
++++ DIFF 1 +++
==== DIFF 1 ===
futex(0x7ff40b14f05c, FUTEX_WAKE_PRIVATE, 2147483647) = 0
futex(0x7ff40b14f068, FUTEX_WAKE_PRIVATE, 2147483647) = 0
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7ff409cbc000
mprotect(0x7ff409cbd000, 8388608, PROT_READ|PROT_WRITE) = 0
clone(child_stack=0x7ff40a4bbfb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7ff40a4bc9d0, tls=0x7ff40a4bc700, child_tidptr=0x7ff40a4bc9d0) = 19541
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7ff4094bb000
mprotect(0x7ff4094bc000, 8388608, PROT_READ|PROT_WRITE) = 0
clone(child_stack=0x7ff409cbafb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7ff409cbb9d0, tls=0x7ff409cbb700, child_tidptr=0x7ff409cbb9d0) = 19542
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7ff408cba000
mprotect(0x7ff408cbb000, 8388608, PROT_READ|PROT_WRITE) = 0
clone(child_stack=0x7ff4094b9fb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7ff4094ba9d0, tls=0x7ff4094ba700, child_tidptr=0x7ff4094ba9d0) = 19543
fstat(1, {st_mode=S_IFREG|0664, st_size=14307, ...}) = 0
write(1, "All threads are running...\n", 27All threads are running...
) = 27
++++ DIFF 2 +++
futex(0x7ff40a4bc9d0, FUTEX_WAIT, 19541, NULL) = 0
write(1, "result1: 5e+19\n", 15result1: 5e+19
) = 15
write(1, "result2: 5e+19\n", 15result2: 5e+19
) = 15
write(1, "result3: 5e+19\n", 15result3: 5e+19
) = 15
==== DIFF 2 ===
exit_group(0) = ?
+++ exited with 0 +++
Новая информация2: strace -f
Когда установлен флаг -fдобавленный к команде strace, дополнительный вывод является подавляющим. В частности, для случая «не в порядке», есть много * тысяч строк, выглядящих так:
[pid 3836] mprotect(0x7f2b77446000, 8388608, PROT_READ|PROT_WRITE <unfinished ...>
[pid 3841] sched_yield( <unfinished ...>
[pid 3836] <... mprotect resumed> ) = 0
[pid 3841] <... sched_yield resumed> ) = 0
[pid 3836] clone( <unfinished ...>
[pid 3841] sched_yield( <unfinished ...>
[pid 3840] <... sched_yield resumed> ) = 0
[pid 3841] <... sched_yield resumed> ) = 0
[pid 3836] <... clone resumed> child_stack=0x7f2b77c44fb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7f2b77c459d0, tls=0x7f2b77c45700, child_tidptr=0x7f2b77c459d0) = 3844
[pid 3841] sched_yield( <unfinished ...>
[pid 3840] sched_yield( <unfinished ...>
[pid 3836] fstat(1, <unfinished ...>
[pid 3841] <... sched_yield resumed> ) = 0
[pid 3836] <... fstat resumed> {st_mode=S_IFREG|0664, st_size=37237, ...}) = 0
[pid 3841] sched_yield( <unfinished ...>
[pid 3840] <... sched_yield resumed> ) = 0
[pid 3836] write(1, "All threads are running...\n", 27All threads are running...
<unfinished ...>
[pid 3841] <... sched_yield resumed> ) = 0
[pid 3836] <... write resumed> ) = 27
[pid 3841] sched_yield( <unfinished ...>
[pid 3836] futex(0x7f2b78c479d0, FUTEX_WAIT, 3842, NULL <unfinished ...>
[pid 3841] <... sched_yield resumed> ) = 0
[pid 3840] sched_yield( <unfinished ...>
[pid 3841] sched_yield( <unfinished ...>
[pid 3840] <... sched_yield resumed> ) = 0
[pid 3841] <... sched_yield resumed> ) = 0
[pid 3840] sched_yield( <unfinished ...>
[pid 3840] <... sched_yield resumed> ) = 0
[pid 3841] <... sched_yield resumed> ) = 0
[pid 3840] sched_yield( <unfinished ...>
---
and many thousands lines like these to follow...