MPI Allreduce - Адрес равен 0 байтов после блока размером 216 - PullRequest
1 голос
/ 05 мая 2019

Я работаю над проблемой MPI и хочу использовать Allreduce. Однако, это вызывает ошибку сегментации.

При работе с valgrind я получаю следующую трассировку стека

==1081== Memcheck, a memory error detector
==1081== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==1081== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==1081== Command: ./test
==1081== 
==1081== Conditional jump or move depends on uninitialised value(s)
==1081==    at 0x57D4375: opal_value_unload (in /usr/lib/x86_64-linux-gnu/openmpi/lib/libopen-pal.so.20.10.1)
==1081==    by 0x4E7F97A: ompi_proc_complete_init (in /usr/lib/x86_64-linux-gnu/openmpi/lib/libmpi.so.20.10.1)
==1081==    by 0x4E838A4: ompi_mpi_init (in /usr/lib/x86_64-linux-gnu/openmpi/lib/libmpi.so.20.10.1)
==1081==    by 0x4EA42AA: PMPI_Init (in /usr/lib/x86_64-linux-gnu/openmpi/lib/libmpi.so.20.10.1)
==1081==    by 0x108CC2: main (test.c:53)
==1081==  Uninitialised value was created by a stack allocation
==1081==    at 0x4E7F90D: ompi_proc_complete_init (in /usr/lib/x86_64-linux-gnu/openmpi/lib/libmpi.so.20.10.1)
==1081== 
==1081== Invalid read of size 8
==1081==    at 0x4C367EE: memmove (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==1081==    by 0x57E163F: ??? (in /usr/lib/x86_64-linux-gnu/openmpi/lib/libopen-pal.so.20.10.1)
==1081==    by 0x1170CEEC: mca_coll_self_allreduce_intra (in /usr/lib/x86_64-linux-gnu/openmpi/lib/openmpi/mca_coll_self.so)
==1081==    by 0x4E92479: PMPI_Allreduce (in /usr/lib/x86_64-linux-gnu/openmpi/lib/libmpi.so.20.10.1)
==1081==    by 0x108EA3: main (test.c:79)
==1081==  Address 0xfaeaee8 is 0 bytes after a block of size 216 alloc'd
==1081==    at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==1081==    by 0x108B4B: initCells (test.c:34)
==1081==    by 0x108E4B: main (test.c:76)
==1081== 
==1081== Invalid write of size 8
==1081==    at 0x4C367E3: memmove (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==1081==    by 0x57E163F: ??? (in /usr/lib/x86_64-linux-gnu/openmpi/lib/libopen-pal.so.20.10.1)
==1081==    by 0x1170CEEC: mca_coll_self_allreduce_intra (in /usr/lib/x86_64-linux-gnu/openmpi/lib/openmpi/mca_coll_self.so)
==1081==    by 0x4E92479: PMPI_Allreduce (in /usr/lib/x86_64-linux-gnu/openmpi/lib/libmpi.so.20.10.1)
==1081==    by 0x108EA3: main (test.c:79)
==1081==  Address 0xfaeadc8 is 0 bytes after a block of size 216 alloc'd
==1081==    at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==1081==    by 0x108B4B: initCells (test.c:34)
==1081==    by 0x108E3A: main (test.c:75)
==1081== 
==1081== Invalid read of size 8
==1081==    at 0x4C367E0: memmove (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==1081==    by 0x57E163F: ??? (in /usr/lib/x86_64-linux-gnu/openmpi/lib/libopen-pal.so.20.10.1)
==1081==    by 0x1170CEEC: mca_coll_self_allreduce_intra (in /usr/lib/x86_64-linux-gnu/openmpi/lib/openmpi/mca_coll_self.so)
==1081==    by 0x4E92479: PMPI_Allreduce (in /usr/lib/x86_64-linux-gnu/openmpi/lib/libmpi.so.20.10.1)
==1081==    by 0x108EA3: main (test.c:79)
==1081==  Address 0xfaeaef8 is 16 bytes after a block of size 216 alloc'd
==1081==    at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==1081==    by 0x108B4B: initCells (test.c:34)
==1081==    by 0x108E4B: main (test.c:76)
==1081== 
==1081== 
==1081== HEAP SUMMARY:
==1081==     in use at exit: 187,536 bytes in 698 blocks
==1081==   total heap usage: 21,481 allocs, 20,783 frees, 4,286,585 bytes allocated
==1081== 
==1081== LEAK SUMMARY:
==1081==    definitely lost: 31,023 bytes in 40 blocks
==1081==    indirectly lost: 2,121 bytes in 60 blocks
==1081==      possibly lost: 0 bytes in 0 blocks
==1081==    still reachable: 154,392 bytes in 598 blocks
==1081==         suppressed: 0 bytes in 0 blocks
==1081== Rerun with --leak-check=full to see details of leaked memory
==1081== 
==1081== For counts of detected and suppressed errors, rerun with: -v
==1081== ERROR SUMMARY: 19 errors from 4 contexts (suppressed: 0 from 0)

Код следующий

File: test.c
01: #include <stdlib.h>
02: #include <stdio.h>
03: #include <time.h>
04: #include <memory.h>
05: #include <mpi.h>
06: #include <stddef.h>
07: #include <math.h>
08: #include <unistd.h>
09: 
10: typedef struct particle_t
11: {
12:     double m;
13:     double x;
14:     double y;
15:     double vx;
16:     double vy;
17:     double fx;
18:     double fy;
19: 
20: } particle_t;
21: 
22: typedef struct centerMass
23: {
24:     double x;
25:     double y;
26:     double totalmass;
27: } centerMass;
28: 
29: int numprocs, my_rank;
30: MPI_Datatype particle_type, old_type;
31: 
32: centerMass *initCells()
33: {
34:     centerMass *gridMasses = (centerMass *)malloc(3 * 3 * sizeof(centerMass));
35:     return gridMasses;
36: }
37: 
38: void reduceMasses(centerMass *in, centerMass *inout, int *len, MPI_Datatype *dtpr)
39: {
40:     for (int i = 0; i < *len; i++)
41:     {
42:         inout[i].totalmass += in[i].totalmass;
43:         inout[i].x += in[i].x;
44:         inout[i].y + in[i].y;
45:     }
46: }
47: 
48: int main(int argc, char *argv[])
49: {
50:     // MPI initializations
51:     MPI_Status status;
52:     MPI_Init(&argc, &argv);
53:     MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
54:     MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
55: 
56:     int count = 7;
57:     MPI_Datatype types[] = {MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE};
58:     int blocklengths[] = {1, 1, 1, 1, 1, 1, 1};
59:     MPI_Aint displacements[] = {
60:         offsetof(particle_t, m),
61:         offsetof(particle_t, x),
62:         offsetof(particle_t, y),
63:         offsetof(particle_t, vx),
64:         offsetof(particle_t, vy),
65:         offsetof(particle_t, fx),
66:         offsetof(particle_t, fy),
67:     };
68:     MPI_Aint lb, extent;
69:     MPI_Type_create_struct(count, blocklengths, displacements, types, &old_type);
70:     MPI_Type_get_extent(old_type, &lb, &extent);
71:     MPI_Type_create_resized(old_type, lb, extent, &particle_type);
72:     MPI_Type_commit(&particle_type);    
73: 
74:     centerMass *newMasses = initCells();
75:     centerMass *oldMasses = initCells();
76:     MPI_Op rM;
77:     MPI_Op_create(reduceMasses, 1, &rM);
78:     MPI_Allreduce(oldMasses, newMasses, 3 * 3, particle_type, rM, MPI_COMM_WORLD);
79: 
80:     MPI_Finalize();
81: }
82: 
83: 
84: 

Я не могу сказать, что я делаю неправильно, потому что, если я прокомментирую предложение Allreduce, это сработает. Я посмотрел онлайн, и эта ошибка обычно связана с нулевыми завершающими строками, которые я вообще не использую.

Спасибо

...