Problem with MPI communication
When executing the version with parallel communications and more than 1 OMP thread I get the following error/s in different communications (src values may vary from one execution to another):
Fatal error in MPI_Recv: Message truncated, error stack:
MPI_Recv(224).......................: MPI_Recv(buf=0x7fa4544e6440, count=4050, MPI_DOUBLE_PRECISION, src=255, tag=12, comm=0x84000006, status=0x1) faied
MPIDI_CH3U_Request_unpack_uebuf(618): Message truncated; 64800 bytes received but buffer size is 32400
Fatal error in MPI_Recv: Message truncated, error stack:
MPI_Recv(224).......................: MPI_Recv(buf=0x7f9cfb077440, count=4050, MPI_DOUBLE_PRECISION, src=237, tag=12, comm=0x84000006, status=0x1) faied
MPIDI_CH3U_Request_unpack_uebuf(618): Message truncated; 64800 bytes received but buffer size is 32400
I introduced the following change: an additional parameter for lbc_lnk calls to know which iteration of the do-loop it corresponds to:
CALL lbc_lnk( 'traadv_fct', zwi, 'T', 1.0_wp, pTag=jn)
The subroutine lbc_lnk_pt2pt_dp is the one containing the MPI_ISend and MPI_Recv that fail.
They contain a new allocatable array to act as a second buffer so as to have enough buffers for the different parallel communications and if-else statements to use the corresponding buffer wherever it is needed
REAL(dp), DIMENSION(:), ALLOCATABLE :: buffsnd_dp, buffrcv_dp, buffsnd_dpp, buffrcv_dpp ! MPI send/recv buffers
REAL(sp), DIMENSION(:), ALLOCATABLE :: buffsnd_sp, buffrcv_sp, buffsnd_spp, buffrcv_spp ! sp and dp are the original buffers, spp and dpp are the new ones I declared
INTEGER, DIMENSION(8) :: nreq_p2p, nreq_p2pp ! request id for MPI_Isend in point-2-point communication
iszS = SUM(iszall, mask = llsend) ! send buffer size
iszR = SUM(iszall, mask = llrecv) ! recv buffer size
IF(present(pTag) .AND. (mod(pTag,2)==0)) THEN
IF( ALLOCATED(buffsnd_dpp) ) THEN
CALL mpi_waitall(8, nreq_p2pp, MPI_STATUSES_IGNORE, ierr) ! wait for Isend from the PREVIOUS call
IF( SIZE(buffsnd_dpp) < iszS ) DEALLOCATE(buffsnd_dpp) ! send buffer is too small
ENDIF
IF( .NOT. ALLOCATED(buffsnd_dpp) ) ALLOCATE( buffsnd_dpp(iszS) )
IF( ALLOCATED(buffrcv_dpp) ) THEN
IF( SIZE(buffrcv_dpp) < iszR ) DEALLOCATE(buffrcv_dpp) ! recv buffer is too small
ENDIF
IF( .NOT. ALLOCATED(buffrcv_dpp) ) ALLOCATE( buffrcv_dpp(iszR) )
! default definition when no communication is done. understood by mpi_waitall
nreq_p2pp(:) = MPI_REQUEST_NULL ! WARNING: Must be done after the call to mpi_waitall just above
WRITE(*,*)'Spp= ',SHAPE(buffsnd_dpp),' Rpp= ',SHAPE(buffrcv_dpp), ' rank ',mpprank
ELSE
IF( ALLOCATED(buffsnd_dp) ) THEN
CALL mpi_waitall(8, nreq_p2p, MPI_STATUSES_IGNORE, ierr) ! wait for Isend from the PREVIOUS call
IF( SIZE(buffsnd_dp) < iszS ) DEALLOCATE(buffsnd_dp) ! send buffer is too small
ENDIF
IF( .NOT. ALLOCATED(buffsnd_dp) ) ALLOCATE( buffsnd_dp(iszS) )
IF( ALLOCATED(buffrcv_dp) ) THEN
IF( SIZE(buffrcv_dp) < iszR ) DEALLOCATE(buffrcv_dp) ! recv buffer is too small
ENDIF
IF( .NOT. ALLOCATED(buffrcv_dp) ) ALLOCATE( buffrcv_dp(iszR) )
! default definition when no communication is done. understood by mpi_waitall
nreq_p2p(:) = MPI_REQUEST_NULL ! WARNING: Must be done after the call to mpi_waitall just above
IF(present(pTag))THEN
WRITE(*,*)'Sp= ',SHAPE(buffsnd_dp),' Rp= ',SHAPE(buffrcv_dp), ' rank ',mpprank
ENDIF
ENDIF
IF(present(pTag) .AND. (mod(pTag,2)==0)) THEN
CALL MPI_ISEND( buffsnd_dpp(ishtS(jn)+1), iszall(jn), MPI_DOUBLE_PRECISION, mpiSnei(ihls,jn), iStag(jn), icomm, nreq_p2pp(jn), ierr )
ELSE
CALL MPI_ISEND( buffsnd_dp(ishtS(jn)+1), iszall(jn), MPI_DOUBLE_PRECISION, mpiSnei(ihls,jn), iStag(jn), icomm, nreq_p2p(jn), ierr )
ENDIF
!.....there is a bit more code here in between.......
IF(present(pTag) .AND. (mod(pTag,2)==0)) THEN
CALL MPI_RECV( buffrcv_dpp(ishtR(jn)+1), iszall(jn), MPI_DOUBLE_PRECISION, mpiRnei(ihls,jn), iRtag(jn), icomm, MPI_STATUS_IGNORE, ierr )
ELSE
CALL MPI_RECV( buffrcv_dp(ishtR(jn)+1), iszall(jn), MPI_DOUBLE_PRECISION, mpiRnei(ihls,jn), iRtag(jn), icomm, MPI_STATUS_IGNORE, ierr )
ENDIF
More details
The fact is that the computed value for iszall(jn) sometimes does not match the sending and receiving MPI ranks (for all the errors the size of the buffer on the sending rank is double the size on the receiving rank). And I say sometimes because I have logs that prove that there are communications within the function tra_adv_fct that work correctly.
These are the parts of the code that compute the value for the iszall variable, tho I haven't modified it from the original NEMO, so it should not have any errors:
INTEGER , OPTIONAL, INTENT(in ) :: khls ! halo size, default = nn_hls = 1
INTEGER , INTENT(in ) :: kfld ! number of pt3d arrays
ipi = SIZE(ptab(1)%pt4d,1)
ipj = SIZE(ptab(1)%pt4d,2)
ipk = SIZE(ptab(1)%pt4d,3)
ipl = SIZE(ptab(1)%pt4d,4)
ipf = kfld
!
ihls = nn_hls ! default definition =1
IF( PRESENT( khls ) ) ihls = khls
! ! ________________________
ip0i = 0 ! im0j = inner |__|__|__________|__|__|
ip1i = ihls ! im1j = inner - halo |__|__|__________|__|__|
im1i = ipi-2*ihls ! | | | | | |
im0i = ipi - ihls ! | | | | | |
ip0j = 0 ! | | | | | |
ip1j = ihls ! |__|__|__________|__|__|
im1j = ipj-2*ihls ! ip1j = halo |__|__|__________|__|__|
im0j = ipj - ihls ! ip0j = 0 |__|__|__________|__|__|
!cd sides: west east south north ; corners: so-we, so-ea, no-we, no-ea
isizei(1:4) = (/ ihls, ihls, ipi, ipi /) ; isizei(5:8) = ihls ! i- count
isizej(1:4) = (/ Nj_0, Nj_0, ihls, ihls /) ; isizej(5:8) = ihls ! j- count
iszall(:) = isizei(:) * isizej(:) * ipk * ipl * ipf