Сбор данных через MPI - PullRequest
       39

Сбор данных через MPI

0 голосов
/ 23 апреля 2020

Я использую команду MPI_Gather для сбора данных с каждого процессора, но получаю следующую информацию об ошибке (523-я строка в MAINp.f90 содержит ошибку).

forrtl: severe (174): SIGSEGV, segmentation fault occurred
Image              PC                Routine            Line        Source             
sot                0000000000427FD3  Unknown               Unknown  Unknown
libpthread-2.26.s  00002AAAB0D1C2F0  Unknown               Unknown  Unknown
sot                000000000041D2AE  MAIN__                    523  MAINp.f90
sot                0000000000409B92  Unknown               Unknown  Unknown
libc-2.26.so       00002AAAB115034A  __libc_start_main     Unknown  Unknown
sot                0000000000409AAA  Unknown               Unknown  Unknown
srun: error: nid01236: task 19: Exited with exit code 174
srun: Terminating job step 14213926.0
slurmstepd: error: *** STEP 14213926.0 ON nid01236 CANCELLED AT 2020-04-23T06:53:35 ***

Я не знаю, почему это неправильно. Я просто хочу собрать данные с каждого процессора. Я помещаю только часть моего MAINp.F90 ниже, а строка ошибки следует за меткой (! ЭТО ОШИБКА). Кто-нибудь, пожалуйста, дайте мне несколько советов? Спасибо.

PROGRAM MAIN
USE MPI
USE CAL
IMPLICIT NONE
!Variables for setting up the parameters in INPUT.dat file
CHARACTER (LEN=50)            :: na(6)                !Array to store the names of Hamiltonian files from wannier90
DOUBLE PRECISION              :: an                   !Angel interval
INTEGER                       :: km(2)                !k point mesh
INTEGER                       :: vd                   !Velocity direction of the Hamiltonian matrix
DOUBLE PRECISION              :: fermi                !Fermi energy value
DOUBLE PRECISION              :: wf                   !Energy window
DOUBLE PRECISION              :: bv                   !Broadening value
DOUBLE PRECISION              :: pi                   !pi
DOUBLE PRECISION              :: hb                   !h_bar
DOUBLE PRECISION              :: es                   !Electron volt
!
!Variables for parameters in '.wout' file
INTEGER                       :: sta                  !Status of files
DOUBLE PRECISION              :: rea_c(3,3)           !Lattice constant of unit cell in real space
DOUBLE PRECISION              :: rec_c(3,3)           !Vectors of unit cell in the reciprocal space
!
!Variables for parameters in Hamiltonian ('_hr.dat') file from wannier90
INTEGER                       :: nu_wa                !Number of wannier function
INTEGER                       :: nu_nr(5)             !Number of Wigner-Seitz grid point
INTEGER, ALLOCATABLE          :: nd1(:)               !Degeneracy of each Wigner-Seitz grid point with magnetizaiton along z axis
INTEGER, ALLOCATABLE          :: nd2(:)               !Degeneracy of each Wigner-Seitz grid point with magnetizaiton along different axes
INTEGER, ALLOCATABLE          :: nd3(:)               !Degeneracy of each Wigner-Seitz grid point with magnetizaiton along different axes
INTEGER, ALLOCATABLE          :: nd4(:)               !Degeneracy of each Wigner-Seitz grid point with magnetizaiton along different axes
INTEGER, ALLOCATABLE          :: nd5(:)               !Degeneracy of each Wigner-Seitz grid point with magnetizaiton along different axes
DOUBLE PRECISION, ALLOCATABLE :: hr1(:,:)             !Array to store the Hamitlonian matrix information in '_hr.dat' file, magnetization along z axis
DOUBLE PRECISION, ALLOCATABLE :: hr2(:,:)             !Array to store the Hamitlonian matrix information in '_hr.dat' file, magnetization along other axes
DOUBLE PRECISION, ALLOCATABLE :: hr3(:,:)             !Array to store the Hamitlonian matrix information in '_hr.dat' file, magnetization along other axes
DOUBLE PRECISION, ALLOCATABLE :: hr4(:,:)             !Array to store the Hamitlonian matrix information in '_hr.dat' file, magnetization along other axes
DOUBLE PRECISION, ALLOCATABLE :: hr5(:,:)             !Array to store the Hamitlonian matrix information in '_hr.dat' file, magnetization along other axes
!
!Internal variables
INTEGER                       :: i, j, k, l, n        !Integer for loop
CHARACTER (LEN=100)           :: str                  !String for transitting data
DOUBLE PRECISION              :: tr(3)                !Array for transitting data
DOUBLE PRECISION, ALLOCATABLE :: kp(:,:)              !Array to store the Cartesian coordinate of k-point mesh
DOUBLE PRECISION, ALLOCATABLE :: ka(:,:,:)            !Array to store the Cartesian coordiantes of all k points
DOUBLE COMPLEX, ALLOCATABLE   :: tb(:,:)              !Array to store the extracted tight binding Hamiltonian matrix
DOUBLE COMPLEX, ALLOCATABLE   :: ec(:,:)              !Array to store the Eigen vector matrix
DOUBLE PRECISION, ALLOCATABLE :: ev(:,:)              !Array to store the Eigen value on single k point
DOUBLE PRECISION              :: dk(2)                !Array to store the Delta kx and ky
INTEGER                       :: nb                   !Number of valence band
DOUBLE PRECISION              :: me                   !Minimum eigen value
DOUBLE COMPLEX, ALLOCATABLE   :: u_s1(:,:)            !Array to store the contribution of each eigen state to the total spin orbit torque
DOUBLE COMPLEX, ALLOCATABLE   :: u_s2(:,:)            !Array to store the contribution of each eigen state to the total spin orbit torque
DOUBLE COMPLEX, ALLOCATABLE   :: u_t1(:,:)            !Array to collect the contribution of each eigen state to the total spin orbit torque from all processors
DOUBLE COMPLEX, ALLOCATABLE   :: u_t2(:,:)            !Array to collect the contribution of each eigen state to the total spin orbit torque from all processors
DOUBLE COMPLEX                :: sr1                  !Sum of Femri surface part for spin orbit torque on all km(1) k points
DOUBLE COMPLEX                :: sr2                  !Sum of Femri surface part for spin orbit torque on all km(1) k points
DOUBLE COMPLEX, ALLOCATABLE   :: crr1_all(:)          !Array of ct
DOUBLE COMPLEX, ALLOCATABLE   :: crr2_all(:)          !Array of ct
DOUBLE COMPLEX                :: crr1                 !Sum of conductivity on all k points
DOUBLE COMPLEX                :: crr2                 !Sum of conductivity on all k points
DOUBLE COMPLEX                :: crr1_total           !Sum of conductivity
DOUBLE COMPLEX                :: crr2_total           !Sum of conductivity
DOUBLE PRECISION, ALLOCATABLE, TARGET :: nme(:)       !Array to store the minimum eigen value
INTEGER, ALLOCATABLE, TARGET          :: nnb(:)       !Array to store the number of valence band
DOUBLE PRECISION, POINTER     :: p1                   !Pointer used to find the minimum eigen value
INTEGER, POINTER              :: p2                   !Pointer used to find the number of valence band
!
!Parameters for timer
INTEGER                       :: cr, t00, t0, t      !Timer variables
DOUBLE PRECISION              :: ra                   !Timer rate
!Parameters for MPI
INTEGER                       :: world_size           !MPI
INTEGER                       :: world_rank, ierr     !MPI
INTEGER                       :: irank, j0            !MPI
!
!Initializing MPI
CALL MPI_Init(ierr)
CALL MPI_Comm_size(MPI_COMM_WORLD, world_size, ierr)
CALL MPI_Comm_rank(MPI_COMM_WORLD, world_rank, ierr)
!
!Allocating the array used to store the contribution of each eigen state to the total spin orbit torque
ALLOCATE (u_s1(2,nu_wa*km(1)))
ALLOCATE (u_s2(2,nu_wa*km(1)))
!
!Initialising array used to store the total conductivity
cr = CMPLX(0.0d0, 0.0d0)
!
!Allocating array to collect the contribution of each eigen state to the total spin orbit torque from all processors
IF (world_rank .EQ. 0) THEN
    ALLOCATE (u_t1(2,nu_wa*km(1)*km(2)))
    ALLOCATE (u_t2(2,nu_wa*km(1)*km(2)))
END IF
u_t1 = CMPLX(0.0d0, 0.0d0)
u_t2 = CMPLX(0.0d0, 0.0d0)
!
!Allocating array to collect the number of valence band and the minimum eigen value
IF (world_rank .EQ. 0) THEN
    ALLOCATE (nme(km(2)))
    ALLOCATE (nnb(km(2)))
END IF
nme = 0.0d0
nnb = 0
!
!Reading the Cartesian coordinates of k-point mesh
DO j = 1, km(2), 1
   IF (mod(j-1, world_size) .NE. world_rank) CYCLE
   DO k = 1, km(1), 1
      kp(k,:) = ka(j,k,:)
   END DO
   !Building up Hamiltonian matrix on k points and diagonalising the matrix to obtain Eigen vectors and values
   CALL HAMSUR(vd,kp,nu_wa,nu_nr,km(1),nd1,nd2,nd3,nd4,nd5,hr1,hr2,hr3,hr4,hr5,tb,ec,ev,fermi,an,wf,bv,dk,u_s1,u_s2,sr1,sr2,nb,me)
   !
   !THIS IS THE ERROR LINE
   CALL MPI_Gather(u_s1, 2*nu_wa*km(1), MPI_DOUBLE_COMPLEX, u_t1(1:2,1+nu_wa*km(1)*(j-1):nu_wa*km(1)*j),&
                   2*nu_wa*km(1), MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD, ierr)
   CALL MPI_Gat**her(u_s2, 2*nu_wa*km(1), MPI_DOUBLE_COMPLEX, u_t2(1:2,1+nu_wa*km(1)*(j-1):nu_wa*km(1)*j),&
                   2*nu_wa*km(1), MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD, ierr)
   crr1 = crr1 + sr1
   crr2 = crr2 + sr2
   CALL MPI_Gather(me, 1, MPI_DOUBLE, nme(j), 1, MPI_INT, 0, MPI_COMM_WORLD, ierr)
   CALL MPI_Gather(nb, 1, MPI_INT, nnb(j), 1, MPI_INT, 0, MPI_COMM_WORLD, ierr)
END DO
!
CALL MPI_Barrier(MPI_COMM_WORLD, ierr)
IF (world_rank .EQ. 0) THEN
    ALLOCATE (crr1_all(world_size))
    ALLOCATE (crr2_all(world_size))
END IF
crr1_all = CMPLX(0.0d0, 0.0d0)
crr2_all = CMPLX(0.0d0, 0.0d0)
CALL MPI_Gather(crr1, 1, MPI_double_complex, crr1_all, 1, MPI_double_complex, 0, MPI_COMM_WORLD, ierr)
CALL MPI_Gather(crr2, 1, MPI_double_complex, crr2_all, 1, MPI_double_complex, 0, MPI_COMM_WORLD, ierr)
!Writing total conductivity value into the file
IF (world_rank .EQ. 0) THEN
    crr1_total = CMPLX(0.0d0, 0.0d0)
    crr2_total = CMPLX(0.0d0, 0.0d0)
    DO i = 1, world_size, 1
       crr1_total = crr1_total + crr1_all(i)
       crr2_total = crr2_total + crr2_all(i)
    END DO
    !Finding the minimum eigen value
    NULLIFY (p1, p2)
    p1 => nme(1)
    p2 => nnb(1)
    DO i = 2, km(2), 1
       IF (p1 .GE. nme(i)) THEN
          p1 => nme(i)
       END IF
       IF (p2 .LE. nnb(i)) THEN
          p2 => nnb(i)
       END IF
    END DO
    WRITE (UNIT=14, FMT='(A27,$)') 'The minimum eigen value is:'
    WRITE (UNIT=14, FMT=*) p1
    WRITE (UNIT=14, FMT='(A30,$)') 'The number of valence band is:'
    WRITE (UNIT=14, FMT=*) p2
    !
    !Constant for the coefficient
    pi = DACOS(-1.0d0)
    hb = 1.054571817d-34 !(unit - J)
    es = 1.602176634d-19 !(unit - J*s)
    !
END IF
!
IF (world_rank .EQ. 0) THEN
   DEALLOCATE (crr1_all)
   DEALLOCATE (crr2_all)
END IF
!Finalising MPI
CALL MPI_Finalize(ierr)
!
!Deallocating array that sotres and collect the fermi-surface-part contribution of each eigen state to the total spin orbit torque
DEALLOCATE (u_s1)
DEALLOCATE (u_s2)
DEALLOCATE (u_t1)
DEALLOCATE (u_t2)
!
STOP
END PROGRAM MAIN
...