! $Id$ ! ! This module takes care of MPI communication. ! ! Data layout for each processor (`-' marks ghost points, `+' real ! points of the processor shown) ! ! n = mz - - - - - - - - - . - - - - - - - - - ! . - - - - - - - - - . - - - - - - - - - ! . - - - - - - - - - . - - - - - - - - - ! . n2 - - - + + + + + + . + + + + + + - - - ! . . - - - + + + + + + . + + + + + + - - - ! . . n2i - - - + + + + + + . + + + + + + - - - ! . . - - - + + + + + + . + + + + + + - - - ! . . - - - + + + + + + . + + + + + + - - - ! . . - - - + + + + + + . + + + + + + - - - ! . . . . . . . . . . . . . . . . . . . ! . . - - - + + + + + + . + + + + + + - - - ! . . - - - + + + + + + . + + + + + + - - - ! . . - - - + + + + + + . + + + + + + - - - ! . . n1i - - - + + + + + + . + + + + + + - - - ! . . - - - + + + + + + . + + + + + + - - - ! . n1 - - - + + + + + + . + + + + + + - - - ! 3 - - - - - - - - - . - - - - - - - - - ! 2 - - - - - - - - - . - - - - - - - - - ! n = 1 - - - - - - - - - . - - - - - - - - - ! ! m1i m2i ! m1. . . . . . . . . . m2 ! m = 1 2 3 . . . . . . . . . . . . . . my ! ! Thus, the indices for some important regions are ! ghost zones: ! 1:nghost (1:m1-1) and my-nghost+1:my (m2+1:my) ! real points: ! m1:m2 ! boundary points (which become ghost points for adjacent processors): ! m1:m1i and m2i:m2 ! inner points for periodic bc (i.e. points where 7-point derivatives are ! unaffected by ghost information): ! m1i+1:m2i-1 ! inner points for general bc (i.e. points where 7-point derivatives are ! unaffected by ghost information plus boundcond for m1,m2): ! m1i+2:m2i-2 ! ! CPARAM logical, parameter :: lmpicomm = .true. ! !*************************************************************** ! module Mpicomm ! use Cdata use General, only: find_proc, ioptest,loptest use Yinyang ! implicit none ! include 'mpif.h' include 'mpicomm.h' ! interface mpisend_cmplx module procedure mpisend_cmplx_arr3 endinterface ! interface mpirecv_cmplx module procedure mpirecv_cmplx_arr3 endinterface ! ! interface mpigather_and_out ! module procedure mpigather_and_out_real ! module procedure mpigather_and_out_cmplx ! endinterface ! integer(kind=MPI_OFFSET_KIND) :: size_of_int = 0, size_of_real = 0, size_of_double = 0 logical :: lcommunicate_y=.false. ! character(LEN=MPI_MAX_PROCESSOR_NAME), dimension(ncpus) :: nodenames ! ! For f-array processor boundaries ! real, dimension (:,:,:,:), allocatable :: lbufxi,ubufxi,lbufxo,ubufxo real, dimension (:,:,:,:), allocatable :: lbufyi,ubufyi,lbufyo,ubufyo real, dimension (:,:,:,:), allocatable :: lbufzi,ubufzi,lbufzo,ubufzo real, dimension (:,:,:,:), allocatable :: llbufi,lubufi,uubufi,ulbufi real, dimension (:,:,:,:), allocatable :: llbufo,lubufo,uubufo,ulbufo integer, dimension(4,2) :: bufsizes_yz integer, dimension(2,4,2) :: bufsizes_yz_corn integer, parameter :: INYU=1, INZU=2, INYL=3, INZL=4 integer, parameter :: INUU=1, INLU=2, INLL=3, INUL=4 integer, parameter :: IRCV=1, ISND=2 ! real, dimension (nghost,my-2*nghost,mz,mcom) :: fahi,falo,fbhi,fblo ! For shear real, dimension (nghost,my-2*nghost,mz,mcom) :: fahihi,falolo,fbhihi,fblolo ! For shear real, dimension (nghost,my-2*nghost,mz,mcom) :: fao,fbo ! For shear integer :: ipx_partner, displs ! For shear integer :: nextnextya, nextya, lastya, lastlastya ! For shear integer :: nextnextyb, nextyb, lastyb, lastlastyb ! For shear integer :: llcorn,lucorn,uucorn,ulcorn ! (the 4 corners in yz-plane) integer :: psfcrn,psbcrn,pnfcrn,pnbcrn ! (the 4 'pole' corners) integer :: llcornr, lucornr, ulcornr, uucornr ! Receiving corner ids integer :: llcorns, lucorns, ulcorns, uucorns ! Sending corner ids integer :: nprocs, mpierr integer :: serial_level = 0 ! ! mpi tags ! integer, parameter :: tolowx=13,touppx=14,tolowy=3,touppy=4,tolowz=5,touppz=6 ! msg. tags integer, parameter :: TOll=7,TOul=8,TOuu=9,TOlu=10 ! msg. tags for corners !integer :: tolowx=13,touppx=14,tolowy=MPI_ANY_TAG,touppy=MPI_ANY_TAG,tolowz=MPI_ANY_TAG,touppz=MPI_ANY_TAG ! msg. tags !integer :: TOll=MPI_ANY_TAG,TOul=MPI_ANY_TAG,TOuu=MPI_ANY_TAG,TOlu=MPI_ANY_TAG ! msg. tags for corners integer, parameter :: io_perm=20,io_succ=21 integer, parameter :: shiftn=22,shifts=23 integer, parameter :: TOnf=24,TOnb=25,TOsf=26,TOsb=27 ! msg. tags for pole corners ! ! mpi tags for radiation ! the values for those have to differ by a number greater than maxdir=190 ! in order to have unique tags for each boundary and each direction ! integer, parameter :: Qtag_yz=250, Qtag_zx=300, Qtag_xy=350 ! integer :: isend_rq_tolowx,isend_rq_touppx,irecv_rq_fromlowx,irecv_rq_fromuppx integer :: isend_rq_tolowy,isend_rq_touppy,irecv_rq_fromlowy,irecv_rq_fromuppy integer :: isend_rq_tolowz,isend_rq_touppz,irecv_rq_fromlowz,irecv_rq_fromuppz integer :: isend_rq_TOll,isend_rq_TOul,isend_rq_TOuu,isend_rq_TOlu !(corners) integer :: irecv_rq_FRuu,irecv_rq_FRlu,irecv_rq_FRll,irecv_rq_FRul !(corners) integer :: isend_rq_tolastya,isend_rq_tonextya, & irecv_rq_fromlastya,irecv_rq_fromnextya ! For shear integer :: isend_rq_tolastyb,isend_rq_tonextyb, & irecv_rq_fromlastyb,irecv_rq_fromnextyb ! For shear integer :: isend_rq_tolastlastya,isend_rq_tonextnextya, & irecv_rq_fromlastlastya,irecv_rq_fromnextnextya ! For shear integer :: isend_rq_tolastlastyb,isend_rq_tonextnextyb, & irecv_rq_fromlastlastyb,irecv_rq_fromnextnextyb ! For shear ! integer, dimension (MPI_STATUS_SIZE) :: isend_stat_tl,isend_stat_tu integer, dimension (MPI_STATUS_SIZE) :: irecv_stat_fl,irecv_stat_fu integer, dimension (MPI_STATUS_SIZE) :: isend_stat_Tll,isend_stat_Tul, & isend_stat_Tuu,isend_stat_Tlu integer, dimension (MPI_STATUS_SIZE) :: irecv_stat_Fuu,irecv_stat_Flu, & irecv_stat_Fll,irecv_stat_Ful integer :: REAL_ARR_MAXSIZE ! ! Data for Yin-Yang communication. ! logical :: lcorner_yz=.false. type (ind_coeffs), dimension(:), allocatable :: intcoeffs integer, parameter :: LEFT=1, MID=2, RIGHT=3, MIDY=MID, NIL=0 integer :: MIDZ=MID!, MIDC=MID integer :: len_cornbuf, len_cornstrip_y, len_cornstrip_z, ycornstart, zcornstart integer, dimension(3) :: yy_buflens ! ! Data for communication with foreign code. ! type foreign_setup character(LEN=labellen) :: name integer :: ncpus,tag,root integer, dimension(:), allocatable :: recv_req real :: dt_out, t_last_recvd integer, dimension(2) :: xpeer_rng=-1 integer, dimension(2) :: ypeer_rng=-1 integer, dimension(3) :: procnums integer, dimension(3) :: dims real, dimension(2,3) :: extents integer, dimension(3) :: proc_multis real, dimension(:), allocatable :: xgrid,ygrid integer, dimension(:,:), allocatable :: xind_rng,yind_rng character(LEN=5) :: unit_system real :: unit_length, unit_time, unit_BB, unit_T, renorm_UU, renorm_t, renorm_L real, dimension(mx,2) :: xweights real, dimension(my,2) :: yweights integer, dimension(mx) :: xinds integer, dimension(my) :: yinds logical, dimension(3) :: linterpol endtype foreign_setup ! type(foreign_setup) :: frgn_setup ! contains ! !*********************************************************************** subroutine mpicomm_init_min ! ! Minimal initialization: size and rank w.r.t. MPI_COMM_WORLD, and derived communicators. ! ! 22-Sep-25/MR: coded ! call MPI_INIT(mpierr) call MPI_COMM_SIZE(MPI_COMM_WORLD, nprocs, mpierr) call MPI_COMM_RANK(MPI_COMM_WORLD, iproc, mpierr) call MPI_COMM_DUP(MPI_COMM_WORLD,MPI_COMM_PENCIL,mpierr) call MPI_COMM_DUP(MPI_COMM_PENCIL,MPI_COMM_GRID,mpierr) lroot = (iproc==root) endsubroutine mpicomm_init_min !*********************************************************************** subroutine mpicomm_init ! ! Part of the initialization which can be done before input parameters are known. ! Get processor number, number of procs, and whether we are root. ! ! 20-aug-01/wolf: coded ! 29-jul-2010/anders: separate subroutine ! use Syscalls, only: sizeof_real ! !$ integer :: thread_support integer :: iapp=0 ! (KIND=ikind8) ? integer :: flag, ndnmlen integer :: nprocs_penc character(LEN=MPI_MAX_PROCESSOR_NAME) :: ndname ! !$ call MPI_INIT_THREAD(MPI_THREAD_MULTIPLE,thread_support,mpierr) !$ if (thread_support < MPI_THREAD_MULTIPLE) then !$ print*, "Asked for multiple thread support=',MPI_THREAD_MULTIPLE,' but got: ",thread_support !$ call die_gracefully !$ endif !$ if (.false.) & call MPI_INIT(mpierr) ! ! Size and rank w.r.t. MPI_COMM_WORLD ! call MPI_COMM_SIZE(MPI_COMM_WORLD, nprocs, mpierr) call MPI_COMM_RANK(MPI_COMM_WORLD, iproc, mpierr) ! ! Let each process know on which node each process runs in array nodenames (can be exploited for signalling). ! call MPI_GET_PROCESSOR_NAME(ndname, ndnmlen, mpierr) ndname = ndname(1:ndnmlen) call MPI_ALLGATHER(ndname,MPI_MAX_PROCESSOR_NAME,MPI_CHARACTER,nodenames,MPI_MAX_PROCESSOR_NAME, & MPI_CHARACTER,MPI_COMM_WORLD,mpierr) ! !if (lroot) print*, 'Pencil0: nprocs,MPI_COMM_WORLD', nprocs, MPI_COMM_WORLD ! ! If mpirun/mpiexec calls also other applications than Pencil: ! Get rank within the set of applications, iapp. ! iapp=0 if there is only one application or Pencil is the first one. ! call MPI_COMM_GET_ATTR(MPI_COMM_WORLD, MPI_APPNUM, iapp, flag, mpierr) iapp=0 !!! ! ! New comm MPI_COMM_PENCIL which comprises only the procs of the Pencil ! application. iproc becomes rank in MPI_COMM_PENCIL. ! Attention: If there is more than one application envisaged, Pencil needs to be compiled ! with FPPFLAGS=-fpp -DMPI_COMM_WORLD=MPI_COMM_PENCIL. ! If there is only one application, iproc is unchanged and MPI_COMM_PENCIL=MPI_COMM_WORLD. ! call MPI_COMM_SPLIT(MPI_COMM_WORLD, iapp, iproc, MPI_COMM_PENCIL, mpierr) call MPI_COMM_SIZE(MPI_COMM_PENCIL, nprocs_penc, mpierr) call MPI_COMM_RANK(MPI_COMM_PENCIL, iproc, mpierr) ! lroot = (iproc==root) ! refers to root of MPI_COMM_PENCIL! ! if (iapp>0) call stop_it('in MPMD mode, Pencil must be the first application') if (sizeof_real() < 8) then mpi_precision = MPI_REAL MPI_CMPLX = MPI_COMPLEX if (lroot) then write (*,*) "" write (*,*) "=====================================================================" write (*,*) "WARNING: using SINGLE PRECISION, you'd better know what you're doing!" write (*,*) "=====================================================================" write (*,*) "" endif if (linterstellar) call stop_it('using single precision: stop and recompile') else mpi_precision = MPI_DOUBLE_PRECISION MPI_CMPLX = MPI_DOUBLE_COMPLEX endif call MPI_COMM_DUP(MPI_COMM_PENCIL,MPI_COMM_GRID,mpierr) iproc_world=iproc ! ! Remember the sizes of some MPI elementary types. ! call MPI_TYPE_SIZE(MPI_INTEGER, size_of_int, mpierr) call stop_it_if_any(mpierr /= MPI_SUCCESS, "unable to find MPI_INTEGER size") ! call MPI_TYPE_SIZE(mpi_precision, size_of_real, mpierr) call stop_it_if_any(mpierr /= MPI_SUCCESS, "unable to find MPI real size") ! call MPI_TYPE_SIZE(MPI_DOUBLE_PRECISION, size_of_double, mpierr) call stop_it_if_any(mpierr /= MPI_SUCCESS, "unable to find MPI_DOUBLE_PRECISION size") ! ! Check consistency in processor layout. ! if (ncpus/=nprocx*nprocy*nprocz) then if (lroot) then print*, 'Laid out for ncpus (per grid) = ', ncpus, & ', but nprocx*nprocy*nprocz=', nprocx*nprocy*nprocz endif call stop_it('mpicomm_init') endif if (ncpus/=nprocs_penc) then if (lroot) then print*, 'Laid out for ncpus (per grid) = ', ncpus, & ', but called with nprocs_penc=', nprocs_penc endif call stop_it('mpicomm_init') endif ! ! Check if parallelization and chosen grid numbers make sense. ! if ((nprocx>1.and.nxgrid==1).or. & (nprocy>1.and.nygrid==1).or. & (nprocz>1.and.nzgrid==1)) then if (lroot) & print*, 'Parallelization in a dimension with ngrid==1 does not work.' call stop_it('mpicomm_init') endif if (mod(nxgrid,nprocx)/=0.or. & mod(nygrid,nprocy)/=0.or. & mod(nzgrid,nprocz)/=0) then if (lroot) then print*,'nxgrid/nprocx=',real(nxgrid)/real(nprocx) print*,'nygrid/nprocy=',real(nygrid)/real(nprocy) print*,'nzgrid/nprocz=',real(nzgrid)/real(nprocz) print*, 'In each dimension the number of grid points has to be '// & 'divisible by the number of processors.' call stop_it('mpicomm_init') endif endif ! ! Avoid overlapping ghost zones. ! if ((nx0) lforeign=.true. ! ! Check total number of processors assigned to Pencil. ! call MPI_COMM_SIZE(MPI_COMM_PENCIL, nprocs_penc, mpierr) nprocs_foreign=nprocs-nprocs_penc if (.not.( lyinyang.and.nprocs_penc==2*ncpus .or. lcubed_sphere.and.nprocs_penc==6*ncpus .or. & (lforeign.and.(nprocs_foreign>0.or.lstart) .or. .not.lforeign).and.nprocs_penc==ncpus )) then if (lroot) then if (lyinyang) then print*, 'Compiled with 2*ncpus = ', 2*ncpus, & ', but running on ', nprocs_penc, ' processors' elseif (lcubed_sphere) then print*, 'Compiled with 6*ncpus = ', 6*ncpus, & ', but running on ', nprocs_penc, ' processors' elseif (nprocs_penc/=ncpus) then print*, 'Compiled with ncpus = ', ncpus, & ', but running on ', nprocs_penc, ' processors' elseif (lforeign.and.nprocs_foreign==0) then print*, 'number of processors = '//trim(itoa(nprocs))// & ' = number of own processors -> no procs for foreign code left' endif call stop_it('initialize_mpicomm') endif endif ! if (lyinyang) then if (lforeign) then print*, 'Yin-Yang grid and foreign companion mutually exclusive!' call stop_it('initialize_mpicomm') endif if (nprocy>5.and..not.lcutoff_corners) then if (lroot) print*, 'Processor layout with nprocz>15, nprocy>5 not implemented for Yin-Yang grid.' call stop_it('initialize_mpicomm') endif if (nprocz/=3*nprocy) then if (lroot) print*, 'Processor layout with nprocz/=3*nprocy not implemented for Yin-Yang grid.' call stop_it('initialize_mpicomm') endif lyang=iproc>=ncpus ! if this proc is in first half of all it is in YIN grid otherwise in YANG grid endif if (lyinyang .or. lcubed_sphere) & call MPI_COMM_SPLIT(MPI_COMM_PENCIL, int(iproc/ncpus), mod(iproc,ncpus), MPI_COMM_GRID, mpierr) ! MPI_COMM_GRID refers to Yin or Yang grid or to PencilCode, when launched as first code (mandatory). ! if (lyinyang) then if (lyang) then iproc=iproc-ncpus cyinyang='Yang' else cyinyang='Yin' endif endif ! MPI_COMM_GRID refers to one of the 6 patches of the cubed sphere grid or to PencilCode, when launched as first code (mandatory). ! if (lcubed_sphere) then iproc=modulo(iproc,ncpus) ipatch=int(iproc/ncpus) endif ! ! Position on the processor grid (WITHIN Yin or Yang grid!). ! x is fastest direction, z slowest (this is the default) ! call find_proc_coords(iproc,ipx,ipy,ipz) !if (lprocz_slowest) then ! ipx = modulo(iproc, nprocx) ! ipy = modulo(iproc/nprocx, nprocy) ! ipz = iproc/nprocxy !else ! ipx = modulo(iproc, nprocx) ! ipy = iproc/nprocxz ! ipz = modulo(iproc/nprocx, nprocz) !endif ! ! Set up flags for leading processors in each possible direction and plane ! lfirst_proc_x = (ipx == 0) lfirst_proc_y = (ipy == 0) lfirst_proc_z = (ipz == 0) lfirst_proc_xy = lfirst_proc_x .and. lfirst_proc_y lfirst_proc_yz = lfirst_proc_y .and. lfirst_proc_z lfirst_proc_xz = lfirst_proc_x .and. lfirst_proc_z lfirst_proc_xyz = lfirst_proc_x .and. lfirst_proc_y .and. lfirst_proc_z ! ! Set up flags for trailing processors in each possible direction and plane ! llast_proc_x = (ipx == nprocx-1) llast_proc_y = (ipy == nprocy-1) llast_proc_z = (ipz == nprocz-1) llast_proc_xy = llast_proc_x .and. llast_proc_y llast_proc_yz = llast_proc_y .and. llast_proc_z llast_proc_xz = llast_proc_x .and. llast_proc_z llast_proc_xyz = llast_proc_x .and. llast_proc_y .and. llast_proc_z ! ! Am I a yz corner? ! lcorner_yz=(lfirst_proc_y.or.llast_proc_y).and.(lfirst_proc_z.or.llast_proc_z) ! ! Set up `lower' and `upper' neighbours, refer to MPI_COMM_GRID. ! xlneigh = find_proc(ipx-1,ipy,ipz) !modulo(ipx-1,nprocx) + ipy*nprocx + ipz*nprocxy xuneigh = find_proc(ipx+1,ipy,ipz) !modulo(ipx+1,nprocx) + ipy*nprocx + ipz*nprocxy ylneigh = find_proc(ipx,ipy-1,ipz) !ipx + modulo(ipy-1,nprocy)*nprocx + ipz*nprocxy yuneigh = find_proc(ipx,ipy+1,ipz) !ipx + modulo(ipy+1,nprocy)*nprocx + ipz*nprocxy zlneigh = find_proc(ipx,ipy,ipz-1) !ipx + ipy*nprocx + modulo(ipz-1,nprocz)*nprocxy zuneigh = find_proc(ipx,ipy,ipz+1) !ipx + ipy*nprocx + modulo(ipz+1,nprocz)*nprocxy ! ! Set the four corners in the yz-plane (in cyclic order). ! llcorn = find_proc(ipx,ipy-1,ipz-1) !ipx + modulo(ipy-1,nprocy)*nprocx + modulo(ipz-1,nprocz)*nprocxy ulcorn = find_proc(ipx,ipy+1,ipz-1) !ipx + modulo(ipy+1,nprocy)*nprocx + modulo(ipz-1,nprocz)*nprocxy uucorn = find_proc(ipx,ipy+1,ipz+1) !ipx + modulo(ipy+1,nprocy)*nprocx + modulo(ipz+1,nprocz)*nprocxy lucorn = find_proc(ipx,ipy-1,ipz+1) !ipx + modulo(ipy-1,nprocy)*nprocx + modulo(ipz+1,nprocz)*nprocxy ! ! Overwrite with settings for boundary processors in Yin-Yang grid. ! if (lyinyang) call set_yy_neighbors ! ! Settings for Yin-Yang grid are done later in yyinit. ! bufsizes_yz(:,IRCV)=(/nz,ny,nz,ny/) bufsizes_yz(:,ISND)=(/nz,ny,nz,ny/) bufsizes_yz_corn=nghost lcommunicate_y = (nprocz>1 .and. lpole(2)) if (.not.lyinyang) then ! ! Allocations for Yin-Yang grid are done later in yyinit. ! if (nprocx>1) & allocate( lbufxi(nghost,ny,nz,mcom),ubufxi(nghost,ny,nz,mcom), & lbufxo(nghost,ny,nz,mcom),ubufxo(nghost,ny,nz,mcom)) if (nprocy>1 .or. lcommunicate_y .or. lyinyang) & allocate( lbufyi(mx,nghost,bufsizes_yz(INYL,IRCV),mcom),ubufyi(mx,nghost,bufsizes_yz(INYU,IRCV),mcom), & lbufyo(mx,nghost,bufsizes_yz(INYL,ISND),mcom),ubufyo(mx,nghost,bufsizes_yz(INYU,ISND),mcom)) if (nprocz>1) & allocate( lbufzi(mx,bufsizes_yz(INZL,IRCV),nghost,mcom),ubufzi(mx,bufsizes_yz(INZU,IRCV),nghost,mcom), & lbufzo(mx,bufsizes_yz(INZL,ISND),nghost,mcom),ubufzo(mx,bufsizes_yz(INZU,ISND),nghost,mcom)) if ((nprocy>1 .or. lcommunicate_y .or. lyinyang).and.nprocz>1) & allocate( llbufi(mx,bufsizes_yz_corn(1,INLL,IRCV),bufsizes_yz_corn(2,INLL,IRCV),mcom), & llbufo(mx,bufsizes_yz_corn(1,INLL,ISND),bufsizes_yz_corn(2,INLL,ISND),mcom), & lubufi(mx,bufsizes_yz_corn(1,INLU,IRCV),bufsizes_yz_corn(2,INLU,IRCV),mcom), & lubufo(mx,bufsizes_yz_corn(1,INLU,ISND),bufsizes_yz_corn(2,INLU,ISND),mcom), & ulbufi(mx,bufsizes_yz_corn(1,INUL,IRCV),bufsizes_yz_corn(2,INUL,IRCV),mcom), & ulbufo(mx,bufsizes_yz_corn(1,INUL,ISND),bufsizes_yz_corn(2,INUL,ISND),mcom), & uubufi(mx,bufsizes_yz_corn(1,INUU,IRCV),bufsizes_yz_corn(2,INUU,IRCV),mcom), & uubufo(mx,bufsizes_yz_corn(1,INUU,ISND),bufsizes_yz_corn(2,INUU,ISND),mcom) ) endif ! ! Set standard processor ids ! llcornr = llcorn; llcorns = llcorn ulcornr = ulcorn; ulcorns = ulcorn uucornr = uucorn; uucorns = uucorn lucornr = lucorn; lucorns = lucorn ! ! Print neighbors in counterclockwise order (including the corners), ! starting with left neighbor. ! ! Example with 4x4 processors ! 3 | 0 1 2 3 | 0 ! ---+----------------+--- ! 15 | 12 13 14 15 | 12 ! 11 | 8 9 10 11 | 8 ! 7 | 4 5 6 7 | 4 ! 3 | 0 1 2 3 | 0 ! ---+----------------+--- ! 15 | 12 13 14 15 | 12 ! should print (3,15,12,13,1,5,4,7) for iproc=0 ! ! Print processor numbers and those of their neighbors. ! if (ip<=7) write(6,'(A,I4,"(",3I4,"): ",8I4)') & 'initialize_mpicomm: MPICOMM neighbors ', & iproc_world,ipx,ipy,ipz, & ylneigh,llcorn,zlneigh,ulcorn,yuneigh,uucorn,zuneigh,lucorn ! ! Define MPI communicators that include all processes sharing the same value ! of ipx, ipy, or ipz. Refer to MPI_COMM_GRID! ! The rank within MPI_COMM_GRID is given by a ! combination of the two other directional processor indices. ! call MPI_COMM_SPLIT(MPI_COMM_GRID, ipy+nprocy*ipz, ipx, & MPI_COMM_XBEAM, mpierr) call MPI_COMM_SPLIT(MPI_COMM_GRID, ipx+nprocx*ipz, ipy, & MPI_COMM_YBEAM, mpierr) call MPI_COMM_SPLIT(MPI_COMM_GRID, ipx+nprocx*ipy, ipz, & MPI_COMM_ZBEAM, mpierr) call MPI_COMM_SPLIT(MPI_COMM_GRID, ipz, ipx+nprocx*ipy, & MPI_COMM_XYPLANE, mpierr) call MPI_COMM_SPLIT(MPI_COMM_GRID, ipy, ipx+nprocx*ipz, & MPI_COMM_XZPLANE, mpierr) call MPI_COMM_SPLIT(MPI_COMM_GRID, ipx, ipy+nprocy*ipz, & MPI_COMM_YZPLANE, mpierr) ! endsubroutine initialize_mpicomm !*********************************************************************** subroutine create_communicators() !used to copy communicators for helper threads !this makes MPI communication thread-safe, since all threads have their own communicator !For code coupling with f.e. EULAG you have to split MPI_COMM_PENCIL !Not done now simply because its simpler this way call MPI_COMM_DUP(MPI_COMM_WORLD, MPI_COMM_PENCIL, mpierr) call MPI_COMM_DUP(MPI_COMM_PENCIL, MPI_COMM_GRID, mpierr) call MPI_COMM_SPLIT(MPI_COMM_GRID, ipy+nprocy*ipz, ipx, & MPI_COMM_XBEAM, mpierr) call MPI_COMM_SPLIT(MPI_COMM_GRID, ipx+nprocx*ipz, ipy, & MPI_COMM_YBEAM, mpierr) call MPI_COMM_SPLIT(MPI_COMM_GRID, ipx+nprocx*ipy, ipz, & MPI_COMM_ZBEAM, mpierr) call MPI_COMM_SPLIT(MPI_COMM_GRID, ipz, ipx+nprocx*ipy, & MPI_COMM_XYPLANE, mpierr) call MPI_COMM_SPLIT(MPI_COMM_GRID, ipy, ipx+nprocx*ipz, & MPI_COMM_XZPLANE, mpierr) call MPI_COMM_SPLIT(MPI_COMM_GRID, ipx, ipy+nprocy*ipz, & MPI_COMM_YZPLANE, mpierr) endsubroutine !*********************************************************************** subroutine update_neighbors ! ! Update neighbor processes for communication. ! ! 27-feb-16/ccyang: adapted from particles_mpicomm and ! particles_mpicomm_blocks ! 3-mar-16/MR: simplified sorting code ! use General, only: find_proc, quick_sort ! integer, dimension(:), allocatable :: order logical :: left, right integer :: dipx, dipy, dipz integer :: ipx_rec, ipy_rec, ipz_rec integer :: iproc_rec ! ! Collect the neighbors. ! iproc_comm = -1 nproc_comm = 0 ! xscan: do dipx = -1, 1 ipx_rec = ipx + dipx left = ipx_rec < 0 right = ipx_rec > nprocx - 1 ipx_rec = modulo(ipx_rec, nprocx) ! assuming periodic boundary conditions ! yscan: do dipy = -1, 1 ipy_rec = ipy + dipy shear: if (lshear) then if (left) ipy_rec = ipy_rec - ceiling(deltay / Lxyz_loc(2) - 0.5) if (right) ipy_rec = ipy_rec + ceiling(deltay / Lxyz_loc(2) - 0.5) endif shear ipy_rec = modulo(ipy_rec, nprocy) ! assuming periodic boundary conditions ! zscan: do dipz = -1, 1 ipz_rec = modulo(ipz + dipz, nprocz) ! assuming periodic boundary conditions ! iproc_rec = find_proc(ipx_rec, ipy_rec, ipz_rec) neighbors(dipx,dipy,dipz) = iproc_rec add: if (iproc_rec /= iproc .and. .not. any(iproc_comm(1:nproc_comm) == iproc_rec)) then nproc_comm = nproc_comm + 1 iproc_comm(nproc_comm) = iproc_rec endif add ! enddo zscan ! enddo yscan ! enddo xscan ! ! Sort the process list. ! neighb: if (nproc_comm > 0) then allocate (order(nproc_comm)) call quick_sort(iproc_comm(:nproc_comm), order) deallocate (order) endif neighb ! endsubroutine update_neighbors !*********************************************************************** elemental integer function index_to_iproc_comm(iproc_in, mask) ! ! Converts iproc_in to the index to iproc_comm, returns 0 if iproc_in ! is iproc itself, and -1 if none of the elements in iproc_comm matches ! iproc_in. ! ! iproc_in itself is returned if mask = .false.. ! ! 29-feb-16/ccyang: coded. ! 3-mar-16/MR: use loptest ! use General, only: binary_search, loptest ! integer, intent(in) :: iproc_in logical, intent(in), optional :: mask ! active: if (loptest(mask,.true.)) then nonlocal: if (iproc_in /= iproc) then ! ! Binary search iproc_comm. ! search: if (any(iproc_comm(1:nproc_comm) == iproc_in)) then index_to_iproc_comm = binary_search(iproc_in, iproc_comm(1:nproc_comm)) else index_to_iproc_comm = -1 endif search else nonlocal ! ! The case of iproc_in == iproc. ! index_to_iproc_comm = 0 endif nonlocal else active ! ! Do nothing if mask = .false. ! index_to_iproc_comm = iproc_in endif active ! endfunction index_to_iproc_comm !*********************************************************************** subroutine set_cubed_sphere_neighbors ! ! Cubed mesh ! ! 20-dec-15/MR: coded ! integer :: patch_neigh_left, patch_neigh_right, patch_neigh_top, patch_neigh_bot integer :: ipatch logical, save :: lcalled=.false. if (lcalled) then return else lcalled=.true. endif ! if (ipatch<5) then patch_neigh_right=modulo(ipatch ,4)+1 patch_neigh_left =modulo(ipatch+2,4)+1 patch_neigh_top =ZPLUS patch_neigh_bot =ZMINUS if (llast_proc_y) yuneigh=find_proc(ipx, 0,ipz)+(patch_neigh_right-1)*ncpus if (lfirst_proc_y) ylneigh=find_proc(ipx,nprocy-1,ipz)+(patch_neigh_left -1)*ncpus if (ipatch==1 .or. ipatch==3) then if (llast_proc_z) zuneigh=find_proc(ipx,ipy, 0)+(patch_neigh_top -1)*ncpus if (lfirst_proc_z) zlneigh=find_proc(ipx,ipy,nprocz-1)+(patch_neigh_bot -1)*ncpus else if (llast_proc_z) zuneigh=find_proc(ipx, 0,ipy)+(patch_neigh_top -1)*ncpus if (lfirst_proc_z) zlneigh=find_proc(ipx,nprocy-1,ipy)+(patch_neigh_bot -1)*ncpus endif print*,'AXEL: patch_neigh_left, patch_neigh_right, patch_neigh_top, patch_neigh_bot=', & patch_neigh_left, patch_neigh_right, patch_neigh_top, patch_neigh_bot elseif (ipatch==5) then elseif (ipatch==6) then endif ! endsubroutine set_cubed_sphere_neighbors !*********************************************************************** subroutine scatter_snapshot(a,f,indvar1,indvar2) ! ! Scatters a full snapshot (array a) residing in root (w/o ghost zones) to the f-arrays of all ranks ! (not filling ghost zones there). It's assumed that a contains indvar2-indvar1+1 variables to be stored ! in f(:,:,:,indvar1:indvar2). ! ! 23-oct-23/MR: coded ! real, dimension(:,:,:,:), intent(in):: a real, dimension(mx,my,mz,mfarray), intent(out):: f integer, intent(in) :: indvar1,indvar2 integer, dimension(4) :: start_get, start_store integer :: type_get, type_store, win INTEGER(KIND=MPI_ADDRESS_KIND) :: size nvar=indvar2-indvar1+1 start_get=(/ipx*nx,ipy*ny,ipz*nz,0/)+1 start_store=(/ipx*nx+1+nghost,ipy*ny+1+nghost,ipz*nz+1+nghost,indvar1/) call MPI_Type_create_subarray(4, (/nxgrid,nygrid,nzgrid,nvar/), (/nx,ny,nz,nvar/), & start_get, MPI_ORDER_FORTRAN, MPI_REAL,type_get,mpierr) call MPI_Type_create_subarray(4, (/mx,my,mz,mfarray/), (/nx,ny,nz,nvar/), & start_store, MPI_ORDER_FORTRAN, MPI_REAL,type_store,mpierr) call MPI_Type_commit(type_get,mpierr) call MPI_Type_commit(type_store,mpierr) size=nwgrid*size_of_real if (lroot) then ! data tb scattered is on root call MPI_Win_create(a, size, 1, MPI_INFO_NULL, MPI_COMM_WORLD, win, mpierr) else ! hence other ranks do not declare a window call MPI_Win_create(a, 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, win, mpierr) endif call MPI_WIN_FENCE(0, win, mpierr) call MPI_Get(f, 1, type_store, root, 0, 1, type_get, win, mpierr) call MPI_WIN_FENCE(0, win, mpierr) call MPI_TYPE_FREE(type_get,mpierr) call MPI_TYPE_FREE(type_store,mpierr) call MPI_WIN_FREE(win,mpierr) endsubroutine scatter_snapshot !*********************************************************************** subroutine yyinit ! ! For Yin-Yang grid runs: allocate all communication buffers both for ! initialization of interpolation (gridbuf_*) and real data exchange. ! Requires set_yy_neighbors to be run first. ! ! 20-dec-15/MR: coded ! real, dimension(:,:,:), allocatable :: gridbuf_midy, gridbuf_midz, &! contains grid request of direct neighbour(s) gridbuf_left, & ! ~ of left corner neighbour gridbuf_right ! ~ of right corner neighbour logical, save :: lcalled=.false. if (lcalled) then return else lcalled=.true. endif ! if (lcorner_yz) then ! ! Procs at yz corners have four outer neighbors: two direct ones for both corner edges ! and two corner neighbors in the directions away from the yz corner. ! (A fifth neighbor at the yz corner could be needed, too, for ncpus>75.) ! if (nprocy>=6) then allocate(intcoeffs(5)) else allocate(intcoeffs(4)) endif MIDZ=4 else ! ! Otherwise only three neighbors. ! allocate(intcoeffs(3)) MIDZ=2 endif ! ! Set the grid transmission buffer sizes and allocate buffers. ! if (lfirst_proc_z.or.llast_proc_z) then if (lfirst_proc_z) then bufsizes_yz(INZL,ISND)=yy_buflens(MID) bufsizes_yz_corn(:,INLL,ISND)=(/yy_buflens(RIGHT),nghost/) bufsizes_yz_corn(:,INUL,ISND)=(/yy_buflens(LEFT),nghost/) endif if (llast_proc_z ) then bufsizes_yz(INZU,ISND)=yy_buflens(MID) bufsizes_yz_corn(:,INLU,ISND)=(/yy_buflens(LEFT),nghost/) bufsizes_yz_corn(:,INUU,ISND)=(/yy_buflens(RIGHT),nghost/) endif if (.not.((llast_proc_y.and.lfirst_proc_z).or.(llast_proc_z.and.lfirst_proc_y))) & allocate(gridbuf_left(2,yy_buflens(LEFT),nghost)) if (.not.((lfirst_proc_y.and.lfirst_proc_z).or.(llast_proc_z.and.llast_proc_y))) & allocate(gridbuf_right(2,yy_buflens(RIGHT),nghost)) allocate(gridbuf_midy(2,yy_buflens(MID),nghost)) endif if (lfirst_proc_y.or.llast_proc_y) then if (lfirst_proc_y) then bufsizes_yz(INYL,ISND)=yy_buflens(MID) bufsizes_yz_corn(:,INLL,ISND)=(/nghost,yy_buflens(LEFT)/) bufsizes_yz_corn(:,INLU,ISND)=(/nghost,yy_buflens(RIGHT)/) endif if (llast_proc_y ) then bufsizes_yz(INYU,ISND)=yy_buflens(MID) bufsizes_yz_corn(:,INUU,ISND)=(/nghost,yy_buflens(LEFT)/) bufsizes_yz_corn(:,INUL,ISND)=(/nghost,yy_buflens(RIGHT)/) endif if (.not.((lfirst_proc_y.and.lfirst_proc_z).or.(llast_proc_z.and.llast_proc_y))) & allocate(gridbuf_left(2,nghost,yy_buflens(LEFT))) if (.not.((llast_proc_y.and.lfirst_proc_z).or.(llast_proc_z.and.lfirst_proc_y))) & allocate(gridbuf_right(2,nghost,yy_buflens(RIGHT))) allocate(gridbuf_midz(2,nghost,yy_buflens(MID))) endif ! ! Create interpolation data. ! call setup_interp_yy(gridbuf_left, gridbuf_midy, gridbuf_midz, gridbuf_right) ! ! Allocate data communication buffers. ! allocate( lbufyi(mx,nghost,bufsizes_yz(INYL,IRCV),mcom),ubufyi(mx,nghost,bufsizes_yz(INYU,IRCV),mcom), & lbufzi(mx,bufsizes_yz(INZL,IRCV),nghost,mcom),ubufzi(mx,bufsizes_yz(INZU,IRCV),nghost,mcom)) if (lfirst_proc_z) then allocate(lbufzo(mx,nghost,bufsizes_yz(INZL,ISND),mcom)) else allocate(lbufzo(mx,bufsizes_yz(INZL,ISND),nghost,mcom)) endif if (llast_proc_z) then allocate(ubufzo(mx,nghost,bufsizes_yz(INZU,ISND),mcom)) else allocate(ubufzo(mx,bufsizes_yz(INZU,ISND),nghost,mcom)) endif if (lfirst_proc_y.and.ipz>=nprocz/3.and.ipz<2*nprocz/3.) then allocate(lbufyo(mx,bufsizes_yz(INYL,ISND),nghost,mcom)) else allocate(lbufyo(mx,nghost,bufsizes_yz(INYL,ISND),mcom)) endif if (llast_proc_y.and.ipz>=nprocz/3.and.ipz<2*nprocz/3.) then allocate(ubufyo(mx,bufsizes_yz(INYU,ISND),nghost,mcom)) else allocate(ubufyo(mx,nghost,bufsizes_yz(INYU,ISND),mcom)) endif allocate( llbufi(mx,bufsizes_yz_corn(1,INLL,IRCV),bufsizes_yz_corn(2,INLL,IRCV),mcom), & lubufi(mx,bufsizes_yz_corn(1,INLU,IRCV),bufsizes_yz_corn(2,INLU,IRCV),mcom), & ulbufi(mx,bufsizes_yz_corn(1,INUL,IRCV),bufsizes_yz_corn(2,INUL,IRCV),mcom), & uubufi(mx,bufsizes_yz_corn(1,INUU,IRCV),bufsizes_yz_corn(2,INUU,IRCV),mcom)) if (lfirst_proc_z.or.lfirst_proc_y.and.ipz>=nprocz/3.and.ipz<2*nprocz/3.) then allocate(llbufo(mx,bufsizes_yz_corn(2,INLL,ISND),bufsizes_yz_corn(1,INLL,ISND),mcom)) else allocate(llbufo(mx,bufsizes_yz_corn(1,INLL,ISND),bufsizes_yz_corn(2,INLL,ISND),mcom)) endif if (lfirst_proc_z.or.llast_proc_y.and.ipz>=nprocz/3.and.ipz<2*nprocz/3.) then allocate(ulbufo(mx,bufsizes_yz_corn(2,INUL,ISND),bufsizes_yz_corn(1,INUL,ISND),mcom) ) else allocate(ulbufo(mx,bufsizes_yz_corn(1,INUL,ISND),bufsizes_yz_corn(2,INUL,ISND),mcom) ) endif if (llast_proc_z.or.llast_proc_y.and.ipz>=nprocz/3.and.ipz<2*nprocz/3.) then allocate(uubufo(mx,bufsizes_yz_corn(2,INUU,ISND),bufsizes_yz_corn(1,INUU,ISND),mcom)) else allocate(uubufo(mx,bufsizes_yz_corn(1,INUU,ISND),bufsizes_yz_corn(2,INUU,ISND),mcom)) endif if (llast_proc_z.or.lfirst_proc_y.and.ipz>=nprocz/3.and.ipz<2*nprocz/3.) then allocate(lubufo(mx,bufsizes_yz_corn(2,INLU,ISND),bufsizes_yz_corn(1,INLU,ISND),mcom) ) else allocate(lubufo(mx,bufsizes_yz_corn(1,INLU,ISND),bufsizes_yz_corn(2,INLU,ISND),mcom) ) endif call mpibarrier endsubroutine yyinit !*********************************************************************** subroutine setup_interp_yy(gridbuf_left, gridbuf_midy, gridbuf_midz, gridbuf_right) ! ! Initializes interpolation of data to be communicated between the grids: ! Each proc at a grid boundary tells its neighbors at which points it needs ! data from them. The neighbors determine the corresponding interpolation data ! and store them in intcoeffs. ! ! 20-dec-15/MR: coded ! 12-dec-17/MR: adaptations for fixing the gap problem: ngap counts the points ! which lie either in a y or a z gap. ! use General, only: yy_transform_strip, transpose_mn, itoa, reset_triangle, notanumber real, dimension(:,:,:) :: gridbuf_midy, gridbuf_midz, gridbuf_left, gridbuf_right real, dimension(:,:,:), allocatable :: thphprime_strip_y, thphprime_strip_z, tmp integer :: size_corn, size_neigh, lenbuf, lenred integer :: nok, noks, noks_all, ngap, ngap_all, nstrip_total logical :: stop_flag character(LEN=linelen) :: msg ! if (cyinyang_intpol_type=='bilinear') then iyinyang_intpol_type=BILIN elseif (cyinyang_intpol_type=='biquadratic') then iyinyang_intpol_type=BIQUAD elseif (cyinyang_intpol_type=='bicubic') then iyinyang_intpol_type=BICUB elseif (cyinyang_intpol_type=='biquintic') then iyinyang_intpol_type=BIQUIN elseif (cyinyang_intpol_type=='quadspline') then iyinyang_intpol_type=QUADSPLINE else call stop_it('setup_interp_yy: Unknown Yin-Yang interpolation type '//trim(cyinyang_intpol_type)) endif ! noks=0; ngap=0 ! if (lfirst_proc_z.or.llast_proc_z) then ! executing proc is at z boundary ! ! z-boundary procs: communicate only to y-boundary ones -> z strip (nghost x ...) expected. ! if (lcorner_yz) then lenbuf=len_cornbuf lenred=lenbuf-len_cornstrip_z else lenbuf=my lenred=my endif allocate(thphprime_strip_z(2,nghost,lenbuf)) ! buffer for full y or corner strip size_neigh=2*nghost*lenbuf ! buffer length for direct neighbors size_corn=2*nghost*lenred ! buffer length for corner neighbors if (lfirst_proc_z) then ! lower z boundary bufsizes_yz(INZL,IRCV) = lenbuf bufsizes_yz_corn(:,INLL,IRCV) = (/lenred,nghost/) !(/my,nghost/) bufsizes_yz_corn(:,INUL,IRCV) = (/lenred,nghost/) !(/my,nghost/) ! ! transformed coordinates into y strip ! if (lcutoff_corners.and.lcorner_yz) then if (llast_proc_y) then call yy_transform_strip(1,nycut,1,nghost,thphprime_strip_z(:,:,:nycut)) call yy_transform_strip(nycut+1,my,1,nghost,thphprime_strip_z(:,:,nycut+1:my),iph_shift_=1) call yy_transform_strip(nycut+1,my,-nghost+1,0,thphprime_strip_z(:,:,my+1:ycornstart-1),iph_shift_=1) call reset_triangle(nghost,2,my,my-(nghost-2),thphprime_strip_z) ! cutoff overlap tip elseif (lfirst_proc_y) then call yy_transform_strip(1,my-nycut,mz-len_cornstrip_z+1,mz-len_cornstrip_z+nghost, & thphprime_strip_z(:,:,:my-nycut),iph_shift_=-1) call yy_transform_strip(1,my-nycut,mz-len_cornstrip_z+1-nghost,mz-len_cornstrip_z, & thphprime_strip_z(:,:,my-nycut+1:2*(my-nycut)),iph_shift_=-1) call reset_triangle(nghost,2,1,nghost-1,thphprime_strip_z) ! cutoff overlap tip call yy_transform_strip(my-nycut+1,my,1,nghost,thphprime_strip_z(:,:,2*(my-nycut)+1:ycornstart-1)) endif else call yy_transform_strip(1,my,1,nghost,thphprime_strip_z(:,:,:my)) endif ! ! At yz corner: create a "cornerstrip" by adding truncated z strip ! if (lfirst_proc_y) then call yy_transform_strip(1,nghost,mz-len_cornstrip_z+1,mz,thphprime_strip_z(:,:,ycornstart:)) ! ll corner elseif (llast_proc_y) then call yy_transform_strip(m2+1,my,mz-len_cornstrip_z+1,mz,thphprime_strip_z(:,:,ycornstart:)) ! ul corner endif if (.false.) then !if (.not.lyang.and.(lfirst_proc_y.or.llast_proc_y)) then !if (lfirst_proc_y) print*, 'lower left' !if (llast_proc_y) print*, 'lower right' !print*, 'iproc, zlneigh, llcorn, ulcorn=', iproc, zlneigh-27, llcorn-27, ulcorn-27 !print*, 'thphprime_strip_z' !if (lfirst_proc_y) print'(2(f10.4,1x))', thphprime_strip_z if (lfirst_proc_y) write(100,'(2(f10.4,1x))') thphprime_strip_z if (llast_proc_y) write(102,'(2(f10.4,1x))') thphprime_strip_z print*, '-----------------' endif ! size(thphprime_strip_z,2), size(thphprime_strip_z,3), sum(thphprime_strip_z(:,:,:57)) call MPI_IRECV(gridbuf_midy,2*nghost*yy_buflens(MID),mpi_precision, & ! receive strip from ~ zlneigh,zlneigh,MPI_COMM_PENCIL,irecv_rq_fromlowz,mpierr) !MPI_ANY_TAG call MPI_ISEND(thphprime_strip_z,size_neigh,mpi_precision, & ! send strip to direct neighbor zlneigh,iproc_world,MPI_COMM_PENCIL,isend_rq_tolowz,mpierr) !tolowz if (llcorn>=0) then ! call MPI_IRECV(gridbuf_right,2*nghost*yy_buflens(RIGHT),mpi_precision, & ! receive strip from ~ llcorn,llcorn,MPI_COMM_PENCIL,irecv_rq_FRll,mpierr) !MPI_ANY_TAG !print*, 'proc ', iproc_world, 'receives ', nghost*yy_buflens(RIGHT), 'from ', llcorn !if (.not.lyang) write(iproc+200,*) iproc, 'sends', size_corn/(2*nghost), 'to ', llcorn call MPI_ISEND(thphprime_strip_z(:,:,:lenred),size_corn,mpi_precision, & ! send strip (without corner part) to right corner neighbor llcorn,iproc_world,MPI_COMM_PENCIL,isend_rq_TOll,mpierr) !TOll endif if (ulcorn>=0) then ! call MPI_IRECV(gridbuf_left,2*nghost*yy_buflens(LEFT),mpi_precision, & ! receive strip from ~ ulcorn,ulcorn,MPI_COMM_PENCIL,irecv_rq_FRul,mpierr) !MPI_ANY_TAG !print*, 'proc ', iproc_world, 'receives ', nghost*yy_buflens(LEFT), 'from ', ulcorn call MPI_ISEND(thphprime_strip_z(:,:,:lenred),size_corn,mpi_precision, & ! send strip (without corner part) to left corner neighbor ulcorn,iproc_world,MPI_COMM_PENCIL,isend_rq_TOul,mpierr) !TOul endif endif if (llast_proc_z) then ! upper z boundary bufsizes_yz(INZU,IRCV) = lenbuf bufsizes_yz_corn(:,INLU,IRCV) = (/lenred,nghost/) !(/my,nghost/) bufsizes_yz_corn(:,INUU,IRCV) = (/lenred,nghost/) !(/my,nghost/) ! ! transformed coordinates into y strip ! if (lcutoff_corners.and.lcorner_yz) then if (llast_proc_y) then call yy_transform_strip(1,nycut,n2+1,mz,thphprime_strip_z(:,:,:nycut)) call yy_transform_strip(nycut+1,my,n2+1,mz,thphprime_strip_z(:,:,nycut+1:my),iph_shift_=-1) call yy_transform_strip(nycut+1,my,mz+1,mz+nghost,thphprime_strip_z(:,:,my+1:ycornstart-1),iph_shift_=-1) call reset_triangle(1,nghost-1,my,my-(nghost-2),thphprime_strip_z) ! reset overlap tip elseif (lfirst_proc_y) then call yy_transform_strip(1,my-nycut,len_cornstrip_z-(nghost-1),len_cornstrip_z, & thphprime_strip_z(:,:,:my-nycut),iph_shift_=1) call yy_transform_strip(1,my-nycut,len_cornstrip_z+1,len_cornstrip_z+nghost, & thphprime_strip_z(:,:,my-nycut+1:2*(my-nycut)),iph_shift_=1) call yy_transform_strip(my-nycut+1,my,n2+1,mz,thphprime_strip_z(:,:,2*(my-nycut)+1:ycornstart-1)) call reset_triangle(1,nghost-1,1,nghost-1,thphprime_strip_z) ! reset overlap tip endif else call yy_transform_strip(1,my,n2+1,mz,thphprime_strip_z(:,:,:my)) endif ! ! At yz corner: create a "cornerstrip" by adding truncated z strip ! if (lfirst_proc_y) then call yy_transform_strip(1,nghost,1,len_cornstrip_z,thphprime_strip_z(:,:,ycornstart:)) ! lu corner elseif (llast_proc_y) then call yy_transform_strip(m2+1,my,1,len_cornstrip_z,thphprime_strip_z(:,:,ycornstart:)) ! uu corner endif if (.false.) then !if (.not.lyang.and.(llast_proc_y.or.lfirst_proc_y)) then !if (lfirst_proc_y) print*, 'upper left' !if (llast_proc_y) print*, 'upper right' !print*, 'thphprime_strip_z' !print'(2(f10.4,1x))', thphprime_strip_z if (lfirst_proc_y) write(24,'(2(f10.4,1x))') thphprime_strip_z if (llast_proc_y) write(26,'(2(f10.4,1x))') thphprime_strip_z !print*, '-----------------' endif !if (lcorner_yz) print*, 'proc ',iproc_world,', sends thphprime_strip to ', zuneigh, & !size(thphprime_strip_z,2), size(thphprime_strip_z,3), sum(thphprime_strip_z(:,:,:57)) call MPI_IRECV(gridbuf_midy,2*nghost*yy_buflens(MID),mpi_precision, & ! receive strip from ~ zuneigh,zuneigh,MPI_COMM_PENCIL,irecv_rq_fromuppz,mpierr) !MPI_ANY_TAG call MPI_ISEND(thphprime_strip_z,size_neigh,mpi_precision, & ! send strip to direct neighbor zuneigh,iproc_world,MPI_COMM_PENCIL,isend_rq_touppz,mpierr) !touppz if (lucorn>=0) then call MPI_IRECV(gridbuf_left,2*nghost*yy_buflens(LEFT),mpi_precision, & ! receive strip from ~ lucorn,lucorn,MPI_COMM_PENCIL,irecv_rq_FRlu,mpierr) !MPI_ANY_TAG !print*, 'proc ', iproc_world, 'receives ', nghost*yy_buflens(LEFT), 'from ', lucorn call MPI_ISEND(thphprime_strip_z(:,:,:lenred),size_corn,mpi_precision, & ! send strip (without corner part) to left corner neighbor lucorn,iproc_world,MPI_COMM_PENCIL,isend_rq_TOlu,mpierr) !TOlu endif if (uucorn>=0) then ! call MPI_IRECV(gridbuf_right,2*nghost*yy_buflens(RIGHT),mpi_precision, & ! receive strip from ~ uucorn,uucorn,MPI_COMM_PENCIL,irecv_rq_FRuu,mpierr) !MPI_ANY_TAG !print*, 'proc ', iproc_world, 'receives ', nghost*yy_buflens(RIGHT), 'from ', uucorn call MPI_ISEND(thphprime_strip_z(:,:,:lenred),size_corn,mpi_precision, & ! send strip to right corner neighbor uucorn,iproc_world,MPI_COMM_PENCIL,isend_rq_TOuu,mpierr) !TOuu endif endif endif if (lfirst_proc_y.or.llast_proc_y) then ! ! y-boundary processors: communicate to y- or z-boundary ones -> z strip (nghost x ...) or y strip (... x nghost) expected. ! if (lcorner_yz) then lenbuf=len_cornbuf lenred=lenbuf-len_cornstrip_y else lenbuf=mz lenred=mz endif allocate(thphprime_strip_y(2,nghost,lenbuf)) ! for communication to y boundary size_neigh=2*nghost*lenbuf size_corn=2*nghost*lenred if (lfirst_proc_y) then ! lower y boundary bufsizes_yz(INYL,IRCV) = lenbuf bufsizes_yz_corn(:,INLL,IRCV) = (/nghost,lenred/) !(/nghost,mz/) bufsizes_yz_corn(:,INLU,IRCV) = (/nghost,lenred/) !(/nghost,mz/) if (lcutoff_corners.and.lcorner_yz) then if (llast_proc_z) then call yy_transform_strip(1,nghost,1,nzcut,thphprime_strip_y(:,:,:nzcut)) call yy_transform_strip(1,nghost,nzcut+1,mz, & thphprime_strip_y(:,:,nzcut+1:mz),ith_shift_=1) call yy_transform_strip(-nghost+1,0,nzcut+1,mz, & thphprime_strip_y(:,:,mz+1:zcornstart-1),ith_shift_=1) call reset_triangle(nghost,2,mz,mz-(nghost-2),thphprime_strip_y) ! reset overlap tip elseif (lfirst_proc_z) then call yy_transform_strip(my-len_cornstrip_y+1,my-len_cornstrip_y+nghost,1,mz-nzcut, & thphprime_strip_y(:,:,:mz-nzcut),ith_shift_=-1) call yy_transform_strip(my-len_cornstrip_y+1-nghost,my-len_cornstrip_y,1,mz-nzcut, & thphprime_strip_y(:,:,mz-nzcut+1:2*(mz-nzcut)),ith_shift_=-1) call yy_transform_strip(1,nghost,mz-nzcut+1,mz,thphprime_strip_y(:,:,2*(mz-nzcut)+1:zcornstart-1)) call reset_triangle(nghost,2,1,nghost-1,thphprime_strip_y) ! reset overlap tip endif else call yy_transform_strip(1,nghost,1,mz,thphprime_strip_y(:,:,:mz)) ! full z strip endif !if (lyang.and.lcorner_yz) print*, 'y-ll/lu:', sum(thphprime_strip_y(:,:,:mz)) ! ! At yz corner: create a "cornerstrip" ! if (lfirst_proc_z) then !print*, 'left lower completed: len_cornstrip_y=', len_cornstrip_y call yy_transform_strip(my-len_cornstrip_y+1,my,1,nghost,thphprime_strip_y(:,:,zcornstart:)) ! left lower corner completed elseif (llast_proc_z) then !print*, 'left upper completed: len_cornstrip_y=', len_cornstrip_y call yy_transform_strip(my-len_cornstrip_y+1,my,n2+1,mz,thphprime_strip_y(:,:,zcornstart:)) ! left upper corner completed endif if (.false.) then !if (.not.lyang.and.(lfirst_proc_z.or.llast_proc_z)) then !print*, 'iproc, ylneigh, llcorn, lucorn=', iproc, ylneigh-27, llcorn-27, lucorn-27 !if (lfirst_proc_z) print*, 'left lower' !if (llast_proc_z) print*, 'left upper' !: ylneigh, llcorn, lucorn=', ylneigh, llcorn, lucorn !print*, 'thphprime_strip_y' if (lfirst_proc_z) write(100,'(2(f10.4,1x))') thphprime_strip_y if (llast_proc_z) write(24,'(2(f10.4,1x))') thphprime_strip_y !print*, '-----------------' endif if (ipz>=nprocz/3.and.ipz<2*nprocz/3) then ! ! Transposition of thphprime_strip_y, can perhaps be avoided. ! allocate(tmp(2,nghost,lenbuf)) tmp=thphprime_strip_y deallocate(thphprime_strip_y) allocate(thphprime_strip_y(2,lenbuf,nghost)) call transpose_mn(tmp,thphprime_strip_y) deallocate(tmp) endif !if (lcorner_yz) print*, 'proc ',iproc_world,', sends thphprime_strip to ', yuneigh, & ! size(thphprime_strip_y,2), size(thphprime_strip_y,3), sum(thphprime_strip_z(:,:,:57)) !print*, 'proc ', iproc_world, 'receives ', nghost*yy_buflens(MID), 'from ', yuneigh !print*, 'proc ', iproc_world, 'receives ', nghost*yy_buflens(MID), 'from ', ylneigh call MPI_IRECV(gridbuf_midz,2*nghost*yy_buflens(MID),mpi_precision, & ! receive strip from ~ ylneigh,ylneigh,MPI_COMM_PENCIL,irecv_rq_fromlowy,mpierr) !MPI_ANY_TAG call MPI_ISEND(thphprime_strip_y,size_neigh,mpi_precision, & ! send strip to direct neighbor ylneigh,iproc_world,MPI_COMM_PENCIL,isend_rq_tolowy,mpierr) !tolowy if (llcorn>=0) then ! call MPI_IRECV(gridbuf_left,2*nghost*yy_buflens(LEFT),mpi_precision, & ! receive strip from ~ llcorn,llcorn,MPI_COMM_PENCIL,irecv_rq_FRll,mpierr) !MPI_ANY_TAG !print*, 'proc ', iproc_world, 'receives ', nghost*yy_buflens(LEFT), 'from ', llcorn call MPI_ISEND(thphprime_strip_y,size_corn,mpi_precision, & ! send strip to left corner neighbor llcorn,iproc_world,MPI_COMM_PENCIL,isend_rq_TOll,mpierr) !TOll endif if (lucorn>=0) then ! call MPI_IRECV(gridbuf_right,2*nghost*yy_buflens(RIGHT),mpi_precision, & ! receive strip from ~ lucorn,lucorn,MPI_COMM_PENCIL,irecv_rq_FRlu,mpierr) !print*, 'proc ', iproc_world, 'receives ', nghost*yy_buflens(RIGHT), 'from ', lucorn call MPI_ISEND(thphprime_strip_y,size_corn,mpi_precision, & ! send strip to right corner neighbor lucorn,iproc_world,MPI_COMM_PENCIL,isend_rq_TOlu,mpierr) !TOlu !MPI_ANY_TAG endif endif if (llast_proc_y) then bufsizes_yz(INYU,IRCV) = lenbuf bufsizes_yz_corn(:,INUL,IRCV) = (/nghost,lenred/) !(/nghost,mz/) bufsizes_yz_corn(:,INUU,IRCV) = (/nghost,lenred/) !(/nghost,mz/) if (lcutoff_corners.and.lcorner_yz) then if (llast_proc_z) then call yy_transform_strip(m2+1,my,1,nzcut,thphprime_strip_y(:,:,:nzcut)) call yy_transform_strip(m2+1,my,nzcut+1,mz,thphprime_strip_y(:,:,nzcut+1:mz), & ith_shift_=-1) call yy_transform_strip(my+1,my+nghost,nzcut+1,mz,thphprime_strip_y(:,:,mz+1:zcornstart-1), & ith_shift_=-1) call reset_triangle(1,nghost-1,mz,mz-(nghost-2),thphprime_strip_y) ! reset overlap tip elseif (lfirst_proc_z) then call yy_transform_strip(len_cornstrip_y-(nghost-1),len_cornstrip_y,1, & mz-nzcut,thphprime_strip_y(:,:,:mz-nzcut),ith_shift_=1) call yy_transform_strip(len_cornstrip_y+1,len_cornstrip_y+nghost,1, & mz-nzcut,thphprime_strip_y(:,:,mz-nzcut+1:2*(mz-nzcut)),ith_shift_=1) call yy_transform_strip(m2+1,my,mz-nzcut+1,mz,thphprime_strip_y(:,:,2*(mz-nzcut)+1:zcornstart-1)) call reset_triangle(1,nghost-1,1,nghost-1,thphprime_strip_y) ! reset overlap tip endif else call yy_transform_strip(m2+1,my,1,mz,thphprime_strip_y(:,:,:mz)) ! full z ghost-strip endif ! ! Create a corner strip. ! if (lfirst_proc_z) then call yy_transform_strip(1,len_cornstrip_y,1,nghost,thphprime_strip_y(:,:,zcornstart:)) ! right lower corner completed elseif (llast_proc_z) then call yy_transform_strip(1,len_cornstrip_y,n2+1,mz,thphprime_strip_y(:,:,zcornstart:)) ! right upper corner completed endif if (.false.) then !if (.not.lyang.and.(lfirst_proc_z.or.llast_proc_z)) then !if (lfirst_proc_z) print*, 'right lower' !if (llast_proc_z) print*, 'right upper' !: yuneigh, ulcorn, uucorn=', yuneigh, ulcorn, uucorn !print*, 'thphprime_strip_y' if (lfirst_proc_z) write(102,'(2(f10.4,1x))') thphprime_strip_y if (llast_proc_z) write(26,'(2(f10.4,1x))') thphprime_strip_y !print*, '-----------------' endif if (ipz>=nprocz/3.and.ipz<2*nprocz/3) then ! ! Transposition of thphprime_strip_y, can perhaps be avoided. ! allocate(tmp(2,nghost,lenbuf)) tmp=thphprime_strip_y deallocate(thphprime_strip_y) allocate(thphprime_strip_y(2,lenbuf,nghost)) call transpose_mn(tmp,thphprime_strip_y) deallocate(tmp) endif !print*, 'at IRECV: proc ,',iproc_world,', from ', yuneigh, size(gridbuf_midz,1), size(gridbuf_midz,2), size(gridbuf_midz,3) call MPI_IRECV(gridbuf_midz,2*nghost*yy_buflens(MID),mpi_precision, & ! receive strip from ~ yuneigh,yuneigh,MPI_COMM_PENCIL,irecv_rq_fromuppy,mpierr) !MPI_ANY_TAG !print*, 'proc ',iproc_world,', sends thphprime_strip to ', yuneigh, & !size(thphprime_strip_y,1), size(thphprime_strip_y,2), size(thphprime_strip_y,3) call MPI_ISEND(thphprime_strip_y,size_neigh,mpi_precision, & ! send strip to direct neighbor yuneigh,iproc_world,MPI_COMM_PENCIL,isend_rq_touppy,mpierr) !touppy if (ulcorn>=0) then ! send strip to right corner neighbor call MPI_IRECV(gridbuf_right,2*nghost*yy_buflens(RIGHT),mpi_precision, & ! receive strip from ~ ulcorn,ulcorn,MPI_COMM_PENCIL,irecv_rq_FRul,mpierr) !MPI_ANY_TAG !print*, 'proc ', iproc_world, 'receives ', nghost*yy_buflens(RIGHT), 'from ', ulcorn call MPI_ISEND(thphprime_strip_y,size_corn,mpi_precision, & ulcorn,iproc_world,MPI_COMM_PENCIL,isend_rq_TOul,mpierr) !TOul endif if (uucorn>=0) then call MPI_IRECV(gridbuf_left,2*nghost*yy_buflens(LEFT),mpi_precision, & ! receive strip from ~ uucorn,uucorn,MPI_COMM_PENCIL,irecv_rq_FRuu,mpierr) !MPI_ANY_TAG !print*, 'proc ', iproc_world, 'receives ', nghost*yy_buflens(LEFT), 'from ', uucorn call MPI_ISEND(thphprime_strip_y,size_corn,mpi_precision, & ! send strip to left corner neighbor uucorn,iproc_world,MPI_COMM_PENCIL,isend_rq_TOuu,mpierr) !TOuu endif endif endif ! ! Now finalize all communications and determine the interpolation elements for the transmitted coordinates. ! if (lfirst_proc_y) then call MPI_WAIT(irecv_rq_fromlowy,irecv_stat_fl,mpierr) nok=prep_interp(gridbuf_midz,intcoeffs(MIDZ),iyinyang_intpol_type,ngap); noks=noks+nok call MPI_WAIT(isend_rq_tolowy,isend_stat_tl,mpierr) !if (lyang.and.iproc==15) print*, 'fromlowy: iproc, nok=', iproc, nok, ngap !if (lyang.and.iproc==18) print*, 'fromlowy: iproc, nok=', iproc, nok, ngap !if (lyang.and.iproc==6) print*, 'fromlowy: iproc, nok=', iproc, nok, ngap !if (lyang.and.iproc==9) print*, 'fromlowy: iproc, nok=', iproc, nok, ngap if (llcorn>=0) then call MPI_WAIT(irecv_rq_FRll,irecv_stat_Fll,mpierr) nok=prep_interp(gridbuf_left,intcoeffs(LEFT),iyinyang_intpol_type,ngap); noks=noks+nok !if (lyang.and.iproc==12) print*, 'fromuppleft: iproc, nok=', iproc, nok, ngap !if (lyang.and.iproc==3) print*, 'fromuppleft: iproc, nok=', iproc, nok, ngap !if (iproc_world==6) print*,'ll:', iproc_world,nok !,maxval(abs(intcoeffs(LEFT)%coeffs)),maxval(intcoeffs(LEFT)%inds),minval(intcoeffs(LEFT)%inds) call MPI_WAIT(isend_rq_TOll,isend_stat_Tll,mpierr) endif if (lucorn>=0) then call MPI_WAIT(irecv_rq_FRlu,irecv_stat_Flu,mpierr) nok=prep_interp(gridbuf_right,intcoeffs(RIGHT),iyinyang_intpol_type,ngap); noks=noks+nok !if (lyang.and.iproc==21) print*, 'fromlowleft: iproc, nok=', iproc, nok, ngap !if (lyang.and.iproc==12) print*, 'fromlowleft: iproc, nok=', iproc, nok, ngap !maxval(abs(intcoeffs(RIGHT)%coeffs)),maxval(intcoeffs(RIGHT)%inds),minval(intcoeffs(RIGHT)%inds) call MPI_WAIT(isend_rq_TOlu,isend_stat_Tlu,mpierr) endif endif if (llast_proc_y) then ! call MPI_WAIT(irecv_rq_fromuppy,irecv_stat_fu,mpierr) nok=prep_interp(gridbuf_midz,intcoeffs(MIDZ),iyinyang_intpol_type,ngap); noks=noks+nok ! print*,'uppy:',iproc,iproc_world,nok,maxval(abs(intcoeffs(MIDZ)%coeffs)),maxval(intcoeffs(MIDZ)%inds), & ! minval(intcoeffs(MIDZ)%inds) call MPI_WAIT(isend_rq_touppy,isend_stat_tu,mpierr) !if (lyang.and.iproc==17) print*, 'fromuppy: iproc, nok=', iproc, nok, ngap !if (lyang.and.iproc==20) print*, 'fromuppy: iproc, nok=', iproc, nok, ngap !if (lyang.and.iproc==11) print*, 'fromuppy: iproc, nok=', iproc, nok, ngap !if (lyang.and.iproc==8) print*, 'fromuppy: iproc, nok=', iproc, nok, ngap if (ulcorn>=0) then call MPI_WAIT(irecv_rq_FRul,irecv_stat_Ful,mpierr) nok=prep_interp(gridbuf_right,intcoeffs(RIGHT),iyinyang_intpol_type,ngap); noks=noks+nok !if (lyang.and.iproc==23) print*, 'fromlowright: iproc, nok=', iproc, nok, ngap !if (lyang.and.iproc==14) print*, 'fromlowright: iproc, nok=', iproc, nok, ngap ! print*,'ul:',iproc,iproc_world,nok,maxval(abs(intcoeffs(RIGHT)%coeffs)),maxval(intcoeffs(RIGHT)%inds), & ! minval(intcoeffs(RIGHT)%inds) call MPI_WAIT(isend_rq_TOul,isend_stat_Tul,mpierr) endif if (uucorn>=0) then call MPI_WAIT(irecv_rq_FRuu,irecv_stat_Fuu,mpierr) nok=prep_interp(gridbuf_left,intcoeffs(LEFT),iyinyang_intpol_type,ngap); noks=noks+nok !if (lyang.and.iproc==14) print*, 'fromuppright: iproc, nok=', iproc, nok, ngap !if (lyang.and.iproc==5) print*, 'fromuppright: iproc, nok=', iproc, nok, ngap ! print*,'uu:',iproc,iproc_world,nok,maxval(abs(intcoeffs(LEFT)%coeffs)),maxval(intcoeffs(LEFT)%inds), & ! minval(intcoeffs(LEFT)%inds) call MPI_WAIT(isend_rq_TOuu,isend_stat_Tuu,mpierr) endif endif if (lfirst_proc_z) then ! call MPI_WAIT(irecv_rq_fromlowz,irecv_stat_fl,mpierr) nok=prep_interp(gridbuf_midy,intcoeffs(MIDY),iyinyang_intpol_type,ngap); noks=noks+nok ! print*,'lowz:',iproc,iproc_world,nok,maxval(abs(intcoeffs(MIDY)%coeffs)),maxval(intcoeffs(MIDY)%inds),& ! minval(intcoeffs(MIDY)%inds) if (.not.lyang.and.notanumber(intcoeffs(MIDY)%coeffs)) print*, 'iproc=', iproc call MPI_WAIT(isend_rq_tolowz,isend_stat_tl,mpierr) ! if (llcorn>=0) then call MPI_WAIT(irecv_rq_FRll,irecv_stat_Fll,mpierr) nok=prep_interp(gridbuf_right,intcoeffs(RIGHT),iyinyang_intpol_type,ngap); noks=noks+nok ! print*,'ll:',iproc,iproc_world,nok,maxval(abs(intcoeffs(RIGHT)%coeffs)),maxval(intcoeffs(RIGHT)%inds), & ! minval(intcoeffs(RIGHT)%inds) if (.not.lyang.and.notanumber(intcoeffs(RIGHT)%coeffs)) print*, 'iproc=', iproc call MPI_WAIT(isend_rq_TOll,isend_stat_Tll,mpierr) endif if (ulcorn>=0) then call MPI_WAIT(irecv_rq_FRul,irecv_stat_Ful,mpierr) nok=prep_interp(gridbuf_left,intcoeffs(LEFT),iyinyang_intpol_type,ngap); noks=noks+nok ! print*,'ul:',iproc,iproc_world,nok,maxval(abs(intcoeffs(LEFT)%coeffs)),maxval(intcoeffs(LEFT)%inds), & ! minval(intcoeffs(LEFT)%inds) if (.not.lyang.and.notanumber(intcoeffs(LEFT)%coeffs)) print*, 'iproc=', iproc call MPI_WAIT(isend_rq_TOul,isend_stat_Tul,mpierr) endif endif if (llast_proc_z) then ! call MPI_WAIT(irecv_rq_fromuppz,irecv_stat_fu,mpierr) nok=prep_interp(gridbuf_midy,intcoeffs(MIDY),iyinyang_intpol_type,ngap); noks=noks+nok ! print*, 'proc ,',iproc_world,', from ', zuneigh, size(gridbuf_midy,1), & ! size(gridbuf_midy,2), size(gridbuf_midy,3), size(intcoeffs(MIDY)%inds,1), size(intcoeffs(MIDY)%inds,2) ! print'(22(f8.5,1x))', gridbuf_midy(1,:,:) ! print'(22(i3,1x))', intcoeffs(MIDY)%inds(:,:,1) ! print*,'upz:',iproc,iproc_world,nok,maxval(abs(intcoeffs(MIDY)%coeffs)),maxval(intcoeffs(MIDY)%inds), & ! minval(intcoeffs(MIDY)%inds) if (.not.lyang.and.notanumber(intcoeffs(MIDY)%coeffs)) print*, 'iproc=', iproc call MPI_WAIT(isend_rq_touppz,isend_stat_tu,mpierr) ! if (lucorn>=0) then call MPI_WAIT(irecv_rq_FRlu,irecv_stat_Flu,mpierr) nok=prep_interp(gridbuf_left,intcoeffs(LEFT),iyinyang_intpol_type,ngap); noks=noks+nok ! print*,'lu:',iproc,iproc_world,nok,maxval(abs(intcoeffs(LEFT)%coeffs)),maxval(intcoeffs(LEFT)%inds), & ! minval(intcoeffs(LEFT)%inds) if (.not.lyang.and.notanumber(intcoeffs(LEFT)%coeffs)) print*, 'iproc=', iproc call MPI_WAIT(isend_rq_TOlu,isend_stat_Tlu,mpierr) endif if (uucorn>=0) then call MPI_WAIT(irecv_rq_FRuu,irecv_stat_Fuu,mpierr) nok=prep_interp(gridbuf_right,intcoeffs(RIGHT),iyinyang_intpol_type,ngap); noks=noks+nok ! print*,'uu:',iproc,iproc_world,nok,maxval(abs(intcoeffs(RIGHT)%coeffs)),maxval(intcoeffs(RIGHT)%inds), & ! minval(intcoeffs(RIGHT)%inds) if (.not.lyang.and.notanumber(intcoeffs(RIGHT)%coeffs)) print*, 'iproc=', iproc call MPI_WAIT(isend_rq_TOuu,isend_stat_Tuu,mpierr) endif endif ! ! Over each of the two grids, sum up numbers of points (nok) for which interpolation data ! could be generated. Sum (noks_all) must agree with total number of ghost zone points ! along boundary of each grid (nstrip_total). ! call mpireduce_sum_int(noks, noks_all,comm=MPI_COMM_YZPLANE) call mpireduce_sum_int(ngap, ngap_all,comm=MPI_COMM_YZPLANE) stop_flag=.false.; msg='' if (iproc==root) then if (lcutoff_corners) then nstrip_total= 2*nghost*((nprocz-1)*mz + (nprocy-1)*my + len_cornstrip_z + len_cornstrip_y) & -4*(nghost-1)*nghost/2 & !?? +4*nghost*(my-nycut)-4*(nghost-1)*nghost/2 ! 12 spec for nghost=3!!! else nstrip_total=2*nghost*(nprocz*mz + nprocy*my - 2*nghost) endif noks_all=noks_all - ngap_all/4 print*, 'noks_all,ngap_all,nstrip_total=', noks_all,ngap_all,nstrip_total if (noks_all/=nstrip_total) then msg='setup_interp_yy: '//trim(cyinyang)//' grid: number of caught points '// & trim(itoa(noks_all))//' unequal goal '// trim(itoa(nstrip_total)) & //'. Reduce dang in start.f90.' if (lcutoff_corners) msg=trim(msg)//' Or increase nycut and nzcut.' stop_flag=.true. endif endif call stop_it_if_any(stop_flag,msg) !call mpibarrier !call mpifinalize !stop endsubroutine setup_interp_yy !*********************************************************************** subroutine initiate_isendrcv_bdry(f,ivar1_opt,ivar2_opt) ! ! Isend and Irecv boundary values. Called in the beginning of pde. ! Does not wait for the receives to finish (done in finalize_isendrcv_bdry) ! leftneigh and rightneigh are initialized by mpicomm_init. ! ! 21-may-02/axel: communication of corners added ! 11-aug-07/axel: communication in the x-direction added ! 22-oct-15/fred: communication for spherical polar coords across poles ! added yz corner communication ! 20-dec-15/MR: modified for Yin-Yang grid ! use General, only: notanumber real, dimension(:,:,:,:), intent(inout):: f integer, optional, intent(in) :: ivar1_opt, ivar2_opt ! integer :: tolowyr,touppyr,tolowys,touppys,tolowzr,touppzr,tolowzs,touppzs ! msg. tags placeholders integer :: TOllr,TOulr,TOuur,TOlur,TOlls,TOuls,TOuus,TOlus ! placeholder tags integer :: ivar1, ivar2, nbufy, nbufz, nbufyz, mxl, comm, bufact, dir,mm ! ivar1=1; ivar2=min(mcom,size(f,4)) if (present(ivar1_opt)) ivar1=ivar1_opt if (present(ivar2_opt)) ivar2=ivar2_opt if (ivar2==0) return ! mxl=size(f,1) ! ! Set communication across x-planes. ! if (nprocx>1) call isendrcv_bdry_x(f,ivar1_opt,ivar2_opt) ! ! Set communication across y-planes. ! Standard message tags from defaults for surfaces and yz-corners. ! tolowyr=tolowy; tolowzr=tolowz; TOlls=TOll; TOllr=TOll tolowys=tolowy; tolowzs=tolowz; TOlus=TOlu; TOlur=TOlu touppyr=touppy; touppzr=touppz; TOuus=TOuu; TOuur=TOuu touppys=touppy; touppzs=touppz; TOuls=TOul; TOulr=TOul ! ! For spherical polar boundary condition across the pole set up edge and corner ! neighbours. Assumes the domain is binary communication between z processors. ! NB nprocz=2*n, n>=1, comms across y-plane parallel in z! ! if (lcommunicate_y) then poleneigh = find_proc(ipx, ipy,ipz +nprocz/2) pnbcrn = find_proc(ipx, 0,ipz-1+nprocz/2) pnfcrn = find_proc(ipx, 0,ipz+1+nprocz/2) psfcrn = find_proc(ipx,nprocy-1,ipz+1+nprocz/2) psbcrn = find_proc(ipx,nprocy-1,ipz-1+nprocz/2) !poleneigh = modulo(ipz +nprocz/2,nprocz)*nprocxy+ ipy*nprocx+ipx !pnbcrn = modulo(ipz-1+nprocz/2,nprocz)*nprocxy+ 0*nprocx+ipx !N rev !pnfcrn = modulo(ipz+1+nprocz/2,nprocz)*nprocxy+ 0*nprocx+ipx !N fwd !psfcrn = modulo(ipz+1+nprocz/2,nprocz)*nprocxy+(nprocy-1)*nprocx+ipx !psbcrn = modulo(ipz-1+nprocz/2,nprocz)*nprocxy+(nprocy-1)*nprocx+ipx endif ! ! Allocate and send/receive buffers across y-planes ! bufact=mxl*nghost*(ivar2-ivar1+1) if (nprocy>1 .or. lcommunicate_y .or. lyinyang) then ! ! Internal, periodic and Yin-Yang-exchange y-plane buffers. ! if (lyinyang.and.lfirst_proc_y) then ! ! Interpolate variables ivar1 ... ivar2 for lower y neighbor in other grid. ! Result in lbufyo. ! call interpolate_yy(f,ivar1,ivar2,lbufyo,MIDZ,iyinyang_intpol_type) ! if (notanumber(lbufyo)) print*, 'lbufyo: iproc=', iproc, iproc_world comm=MPI_COMM_PENCIL; touppyr=ylneigh; tolowys=iproc_world !touppyr=MPI_ANY_TAG else if (lcommunicate_y.and.lfirst_proc_y) then !N-pole lbufyo(:,:,:,ivar1:ivar2)=f(:,m1i:m1:-1,n1:n2,ivar1:ivar2) tolowys=shiftn; touppyr=shiftn; ylneigh=poleneigh else lbufyo(:,:,:,ivar1:ivar2)=f(:,m1:m1i,n1:n2,ivar1:ivar2) !(lower y-zone) endif comm=MPI_COMM_GRID endif nbufy=bufact*bufsizes_yz(INYL,IRCV) !if (ldiagnos.and.lfirst.and.iproc_world==0) print*, iproc_world, ' receives', bufsizes_yz(INYL,IRCV), ' from', ylneigh call MPI_IRECV(lbufyi(:,:,:,ivar1:ivar2),nbufy,mpi_precision, & ylneigh,touppyr,comm,irecv_rq_fromlowy,mpierr) nbufy=bufact*bufsizes_yz(INYL,ISND) call MPI_ISEND(lbufyo(:,:,:,ivar1:ivar2),nbufy,mpi_precision, & ylneigh,tolowys,comm,isend_rq_tolowy,mpierr) if (lyinyang.and.llast_proc_y) then ! ! Interpolate variables ivar1 ... ivar2 for upper y neighbor in other grid. ! Result in ubufyo. ! call interpolate_yy(f,ivar1,ivar2,ubufyo,MIDZ,iyinyang_intpol_type) if (notanumber(ubufyo)) print*, 'ubufyo: iproc=', iproc, iproc_world comm=MPI_COMM_PENCIL; tolowyr=yuneigh; touppys=iproc_world !tolowyr=MPI_ANY_TAG else if (lcommunicate_y.and.llast_proc_y) then !S-pole ! ! North/south-pole y-plane buffers and message tags. NB N:0/S:pi radians. ! Swap N(S) lower(upper) buffers between pi-shifted iprocz same y-plane. ! ubufyo(:,:,:,ivar1:ivar2)=f(:,m2:m2i:-1,n1:n2,ivar1:ivar2) tolowyr=shifts; touppys=shifts; yuneigh=poleneigh else ubufyo(:,:,:,ivar1:ivar2)=f(:,m2i:m2,n1:n2,ivar1:ivar2) !(upper y-zone) endif comm=MPI_COMM_GRID endif ! nbufy=bufact*bufsizes_yz(INYU,IRCV) call MPI_IRECV(ubufyi(:,:,:,ivar1:ivar2),nbufy,mpi_precision, & yuneigh,tolowyr,comm,irecv_rq_fromuppy,mpierr) nbufy=bufact*bufsizes_yz(INYU,ISND) call MPI_ISEND(ubufyo(:,:,:,ivar1:ivar2),nbufy,mpi_precision, & yuneigh,touppys,comm,isend_rq_touppy,mpierr) endif ! ! Set communication across z-planes. ! if (nprocz>1) then ! if (lyinyang.and.lfirst_proc_z) then ! ! Interpolate variables ivar1 ... ivar2 for lower z neighbor in other grid. ! Result in lbufzo. ! call interpolate_yy(f,ivar1,ivar2,lbufzo,MIDY,iyinyang_intpol_type) comm=MPI_COMM_PENCIL; touppzr=zlneigh; tolowzs=iproc_world !touppzr=MPI_ANY_TAG elseif (lcoarse) then do mm=mexts(1),mexts(2) lbufzo(:,mm-nghost,:,ivar1:ivar2)=f(:,mm,nexts(mm,1):nexts(mm,1)+(nghost-1)*nphis(mm):nphis(mm),ivar1:ivar2) enddo if (lfirst_proc_y.and.mexts(2)m1) then lbufzo(:,:mexts(1)-nghost-1,:,ivar1:ivar2)=f(:,m1:mexts(1)-1,n1:n1i,ivar1:ivar2) endif comm=MPI_COMM_PENCIL else lbufzo(:,:,:,ivar1:ivar2)=f(:,m1:m2,n1:n1i,ivar1:ivar2) !lower z-planes comm=MPI_COMM_GRID endif nbufz=bufact*bufsizes_yz(INZL,IRCV) !if (ldiagnos.and.lfirst.and.iproc_world==0) print*, iproc_world, ' receives', bufsizes_yz(INZL,IRCV), ' from', zlneigh call MPI_IRECV(lbufzi(:,:,:,ivar1:ivar2),nbufz,mpi_precision, & zlneigh,touppzr,comm,irecv_rq_fromlowz,mpierr) nbufz=bufact*bufsizes_yz(INZL,ISND) call MPI_ISEND(lbufzo(:,:,:,ivar1:ivar2),nbufz,mpi_precision, & zlneigh,tolowzs,comm,isend_rq_tolowz,mpierr) ! if (lyinyang.and.llast_proc_z) then ! ! Interpolate variables ivar1 ... ivar2 for upper z neighbor in other grid. ! Result in ubufyo. ! call interpolate_yy(f,ivar1,ivar2,ubufzo,MIDY,iyinyang_intpol_type) comm=MPI_COMM_PENCIL; tolowzr=zuneigh; touppzs=iproc_world !tolowzr=MPI_ANY_TAG elseif (lcoarse) then do mm=mexts(1),mexts(2) ubufzo(:,mm-nghost,:,ivar1:ivar2)=f(:,mm,nexts(mm,2)-(nghost-1)*nphis(mm):nexts(mm,2):nphis(mm),ivar1:ivar2) enddo if (lfirst_proc_y.and.mexts(2)m1) then ubufzo(:,:mexts(1)-nghost-1,:,ivar1:ivar2)=f(:,m1:mexts(1)-1,n2i:n2,ivar1:ivar2) endif comm=MPI_COMM_PENCIL else ubufzo(:,:,:,ivar1:ivar2)=f(:,m1:m2,n2i:n2,ivar1:ivar2) !upper z-planes comm=MPI_COMM_GRID endif nbufz=bufact*bufsizes_yz(INZU,IRCV) call MPI_IRECV(ubufzi(:,:,:,ivar1:ivar2),nbufz,mpi_precision, & zuneigh,tolowzr,comm,irecv_rq_fromuppz,mpierr) nbufz=bufact*bufsizes_yz(INZU,ISND) call MPI_ISEND(ubufzo(:,:,:,ivar1:ivar2),nbufz,mpi_precision, & zuneigh,touppzs,comm,isend_rq_touppz,mpierr) !do j=ivar1,ivar2 !if (notanumber(ubufzo(:,:,:,j))) print*, 'ubufzo: iproc,j=', iproc,j !if (notanumber(lbufzo(:,:,:,:))) print*, 'lbufzo: iproc,j=', iproc !if (notanumber(ubufyo(:,:,:,j))) print*, 'ubufyo: iproc,j=', iproc,j !if (notanumber(lbufyo(:,:,:,:))) print*, 'lbufyo: iproc,j=', iproc !enddo endif ! ! The four corners (in counter-clockwise order). ! (NOTE: this should work even for nprocx>1) ! if (nprocz>1.and.(nprocy>1.or.lyinyang)) then ! ! Internal and periodic yz-corner buffers. ! bufact=mxl*(ivar2-ivar1+1) ! North/south-pole yz-corner buffers and message tags. NB N:0/S:pi radians. ! Send N(S) lower(upper) buffers to pi-shifted iprocz +/-1 and receive ! pi-shifted iprocz -/+1 on the same y-plane. ! if (lcommunicate_y) then if (lfirst_proc_y) then !N-pole if (lcoarse) then do mm=min(mexts(2),m1i),m1,-1 lubufo(:,2*nghost-mm+1,:,ivar1:ivar2)=f(:,mm,nexts(mm,2)-(nghost-1)*nphis(mm):nexts(mm,2):nphis(mm),ivar1:ivar2) llbufo(:,2*nghost-mm+1,:,ivar1:ivar2)=f(:,mm,nexts(mm,1):nexts(mm,1)+(nghost-1)*nphis(mm):nphis(mm),ivar1:ivar2) enddo if (m1i>mexts(2)) then lubufo(:,:m1i-mexts(2),:,ivar1:ivar2)= f(:,m1i:mexts(2)+1:-1,n2i:n2,ivar1:ivar2) llbufo(:,:m1i-mexts(2),:,ivar1:ivar2)= f(:,m1i:mexts(2)+1:-1,n1:n1i,ivar1:ivar2) endif else lubufo(:,:,:,ivar1:ivar2)=f(:,m1i:m1:-1,n2i:n2,ivar1:ivar2) llbufo(:,:,:,ivar1:ivar2)=f(:,m1i:m1:-1,n1:n1i,ivar1:ivar2) endif lucorns=pnfcrn;lucornr=pnbcrn;llcornr=pnfcrn;llcorns=pnbcrn TOlus =TOnf; TOulr =TOnf; TOuur =TOnb; TOlls =TOnb endif if (llast_proc_y) then !S-pole if (lcoarse) then do mm=m2,max(mexts(1),m2i),-1 ulbufo(:,m2-mm+1,:,ivar1:ivar2)=f(:,mm,nexts(mm,1):nexts(mm,1)+(nghost-1)*nphis(mm):nphis(mm),ivar1:ivar2) uubufo(:,m2-mm+1,:,ivar1:ivar2)=f(:,mm,nexts(mm,2)-(nghost-1)*nphis(mm):nexts(mm,2):nphis(mm),ivar1:ivar2) enddo if (m2i=0) call interpolate_yy(f,ivar1,ivar2,llbufo,dir,iyinyang_intpol_type) comm=MPI_COMM_PENCIL; TOuur=llcornr; TOlls=iproc_world !TOuur=MPI_ANY_TAG elseif (.not.(lcommunicate_y.and.(lfirst_proc_y.or.llast_proc_y))) then llbufo(:,:,:,ivar1:ivar2)=f(:,m1:m1i,n1:n1i,ivar1:ivar2) comm=MPI_COMM_GRID endif nbufyz=bufact*product(bufsizes_yz_corn(:,INLL,IRCV)) if (llcornr>=0) call MPI_IRECV(llbufi(:,:,:,ivar1:ivar2),nbufyz,mpi_precision, & llcornr,TOuur,comm,irecv_rq_FRll,mpierr) nbufyz=bufact*product(bufsizes_yz_corn(:,INLL,ISND)) if (llcorns>=0) call MPI_ISEND(llbufo(:,:,:,ivar1:ivar2),nbufyz,mpi_precision, & llcorns,TOlls,comm,isend_rq_TOll,mpierr) ! ! Upper y, lower z. ! if (lyinyang.and.(lfirst_proc_z.or.llast_proc_y)) then if (lfirst_proc_z.and.llast_proc_y) then dir=NIL elseif (lfirst_proc_z) then dir=LEFT else dir=RIGHT endif ! if (iproc_world==44) print*, 'size(intcoeffs)=', size(intcoeffs(RIGHT)%inds,1), & ! size(intcoeffs(RIGHT)%inds,2), size(intcoeffs(RIGHT)%inds,3) ! ! Interpolate variables ivar1 ... ivar2 for upper-lower corner neighbor in other grid. ! Result in ulbufo. ! if (ulcorns>=0) call interpolate_yy(f,ivar1,ivar2,ulbufo,dir,iyinyang_intpol_type) comm=MPI_COMM_PENCIL; TOlur=ulcornr; TOuls=iproc_world !TOlur=MPI_ANY_TAG elseif (.not.(lcommunicate_y.and.(lfirst_proc_y.or.llast_proc_y))) then ulbufo(:,:,:,ivar1:ivar2)=f(:,m2i:m2,n1:n1i,ivar1:ivar2) comm=MPI_COMM_GRID endif nbufyz=bufact*product(bufsizes_yz_corn(:,INUL,IRCV)) ! if (ldiagnos.and.lfirst.and.iproc_world==0) print*, iproc_world, ' receives', bufsizes_yz_corn(:,INUL,IRCV), & !' from', ulcornr if (ulcornr>=0) call MPI_IRECV(ulbufi(:,:,:,ivar1:ivar2),nbufyz,mpi_precision, & ulcornr,TOlur,comm,irecv_rq_FRul,mpierr) nbufyz=bufact*product(bufsizes_yz_corn(:,INUL,ISND)) if (ulcorns>=0) call MPI_ISEND(ulbufo(:,:,:,ivar1:ivar2),nbufyz,mpi_precision, & ulcorns,TOuls,comm,isend_rq_TOul,mpierr) ! ! Upper y, upper z. ! if (lyinyang.and.(llast_proc_z.or.llast_proc_y)) then if (llast_proc_y.and.llast_proc_z) then dir=NIL elseif (llast_proc_z) then dir=RIGHT else dir=LEFT endif ! ! Interpolate variables ivar1 ... ivar2 for upper_upper corner neighbor in other grid. ! Result in uubufo. ! if (uucorns>=0) call interpolate_yy(f,ivar1,ivar2,uubufo,dir,iyinyang_intpol_type) comm=MPI_COMM_PENCIL; TOllr=uucornr; TOuus=iproc_world !TOllr=MPI_ANY_TAG elseif (.not.(lcommunicate_y.and.(lfirst_proc_y.or.llast_proc_y))) then uubufo(:,:,:,ivar1:ivar2)=f(:,m2i:m2,n2i:n2,ivar1:ivar2) comm=MPI_COMM_GRID endif nbufyz=bufact*product(bufsizes_yz_corn(:,INUU,IRCV)) if (uucornr>=0) call MPI_IRECV(uubufi(:,:,:,ivar1:ivar2),nbufyz,mpi_precision, & uucornr,TOllr,comm,irecv_rq_FRuu,mpierr) nbufyz=bufact*product(bufsizes_yz_corn(:,INUU,ISND)) if (uucorns>=0) call MPI_ISEND(uubufo(:,:,:,ivar1:ivar2),nbufyz,mpi_precision, & uucorns,TOuus,comm,isend_rq_TOuu,mpierr) ! ! Lower y, upper z. ! if (lyinyang.and.(llast_proc_z.or.lfirst_proc_y)) then if (lfirst_proc_y.and.llast_proc_z) then dir=NIL elseif (llast_proc_z) then dir=LEFT else dir=RIGHT endif ! ! Interpolate variables ivar1 ... ivar2 for lower_upper corner neighbor in other grid. ! Result in lubufo. ! if (lucorns>=0) call interpolate_yy(f,ivar1,ivar2,lubufo,dir,iyinyang_intpol_type) comm=MPI_COMM_PENCIL; TOulr=lucornr; TOlus=iproc_world !TOulr=MPI_ANY_TAG elseif (.not.(lcommunicate_y.and.(lfirst_proc_y.or.llast_proc_y))) then lubufo(:,:,:,ivar1:ivar2)=f(:,m1:m1i,n2i:n2,ivar1:ivar2) comm=MPI_COMM_GRID endif nbufyz=bufact*product(bufsizes_yz_corn(:,INLU,IRCV)) !if (ldiagnos.and.lfirst.and.iproc_world==0) print*, iproc_world, ' receives', bufsizes_yz_corn(:,INLU,IRCV), & !' from', lucornr if (lucornr>=0) call MPI_IRECV(lubufi(:,:,:,ivar1:ivar2),nbufyz,mpi_precision, & lucornr,TOulr,comm,irecv_rq_FRlu,mpierr) nbufyz=bufact*product(bufsizes_yz_corn(:,INLU,ISND)) if (lucorns>=0) call MPI_ISEND(lubufo(:,:,:,ivar1:ivar2),nbufyz,mpi_precision, & lucorns,TOlus,comm,isend_rq_TOlu,mpierr) ! endif !if (itsub>=3.and.it>116) write(78,*) 'after corner comm it,itsub,iproc=', & !it,itsub,iproc ! ! communication sample ! (commented out, because compiler does like this for 0-D runs) ! ! if (ip<7.and.lfirst_proc_y.and.ipz==3) & ! print*,'initiate_isendrcv_bdry: MPICOMM send lu: ',iproc,lubufo(nx/2+4,:,1,2),' to ',lucorn ! endsubroutine initiate_isendrcv_bdry !*********************************************************************** subroutine finalize_isendrcv_bdry(f,ivar1_opt,ivar2_opt) ! ! Make sure the communications initiated with initiate_isendrcv_bdry are ! finished and insert the just received boundary values. ! Receive requests do not need to (and on OSF1 cannot) be explicitly ! freed, since MPI_Wait takes care of this. ! ! 21-may-02/axel: communication of corners added ! 22-oct-15/fred: communication for spherical polar coords across poles ! added yz corner communication ! 20-dec-15/MR: modified for Yin-Yang grid ! use General, only: transpose_mn, notanumber, copy_kinked_strip_z, copy_kinked_strip_y, reset_triangle, notanumber real, dimension(mx,my,mz,mfarray), intent(inout):: f integer, optional, intent(in) :: ivar1_opt, ivar2_opt ! integer :: ivar1, ivar2, j ! ivar1=1; ivar2=min(mcom,size(f,4)) if (present(ivar1_opt)) ivar1=ivar1_opt if (present(ivar2_opt)) ivar2=ivar2_opt if (ivar2==0) return ! ! 1. wait until data received ! 2. set ghost zones ! 3. wait until send completed, will be overwritten in next time step ! ! Communication across y-planes (includes periodic bc) ! if (nprocy>1.or.lyinyang) then call MPI_WAIT(irecv_rq_fromuppy,irecv_stat_fu,mpierr) call MPI_WAIT(irecv_rq_fromlowy,irecv_stat_fl,mpierr) do j=ivar1,ivar2 if (.not. lfirst_proc_y .or. bcy12(j,1)=='p' .or. lyinyang) then ! communication should happen only under these conditions if (lfirst_proc_y.and.lyinyang) then if (lcorner_yz) then if (.not.lcutoff_corners) f(:,:m1-1,:,j)=lbufyi(:,:,:mz,j) ! fill left vertical ghost strip if (notanumber(lbufyi(:,:,:mz,j))) print*, 'lbufyi(1:mz): iproc,j=', iproc, iproc_world, j if (notanumber(lbufyi(:,:,mz+1:,j))) print*, 'lbufyi(mz+1:): iproc,j=', iproc, iproc_world, j if (lfirst_proc_z) then if (lcutoff_corners) then call copy_kinked_strip_y(nzcut,my-nycut+2,lbufyi,f,j,-1,.true.) call reset_triangle(1,my-nycut-nghost,1,mz-nzcut-nghost,f(:,:,:,j)) endif call transpose_mn( lbufyi(:,:,zcornstart:,j), f(:,my-len_cornstrip_y+1:,:nghost,j)) ! fill lower horizontal ghost strip elseif (llast_proc_z) then if (lcutoff_corners) then call copy_kinked_strip_y(nzcut,1,lbufyi,f,j,1,.false.) call reset_triangle(1,my-nycut-nghost,mz,nzcut+1+nghost,f(:,:,:,j)) endif if (notanumber(f(:,:,:,j))) print*, 'f nach lbufyi: iproc,j=', iproc, iproc_world, j call transpose_mn( lbufyi(:,:,zcornstart:,j), f(:,my-len_cornstrip_y+1:,n2+1:,j)) ! fill upper horizontal ghost strip endif else f(:,:m1-1,:,j)=lbufyi(:,:,:mz,j) endif else f(:,1:m1-1,n1:n2,j)=lbufyi(:,:,:,j) ! set lower y-ghostzone endif endif if (.not. llast_proc_y .or. bcy12(j,2)=='p' .or.lyinyang) then if (llast_proc_y.and.lyinyang) then if (lcorner_yz) then if (.not.lcutoff_corners) f(:,m2+1:,:,j)=ubufyi(:,:,:mz,j) ! fill right vertical ghost strip if (notanumber(ubufyi(:,:,:mz,j))) print*, 'ubufyi(1:mz): iproc,j=', iproc, iproc_world, j if (notanumber(ubufyi(:,:,mz+1:,j))) print*, 'ubufyi(mz+1:): iproc,j=', iproc, iproc_world, j if (lfirst_proc_z) then if (lcutoff_corners) then call copy_kinked_strip_y(nzcut,nycut-nghost,ubufyi,f,j,1,.true.) call reset_triangle(my,nycut+1+nghost,1,mz-nzcut-nghost,f(:,:,:,j)) endif call transpose_mn( ubufyi(:,:,zcornstart:,j), f(:,1:len_cornstrip_y,:nghost,j)) ! fill lower horizontal ghost strip elseif (llast_proc_z) then if (lcutoff_corners) then call copy_kinked_strip_y(nzcut,m2+1,ubufyi,f,j,-1,.false.) call reset_triangle(my,nycut+1+nghost,mz,nzcut+1+nghost,f(:,:,:,j)) endif call transpose_mn( ubufyi(:,:,zcornstart:,j), f(:,1:len_cornstrip_y,n2+1:,j)) ! fill upper horizontal ghost strip endif else f(:,m2+1:,:,j)=ubufyi(:,:,:mz,j) endif !if (notanumber(f(:,:,:,j))) print*, 'ubufyi: iproc,j=', iproc, iproc_world, j else f(:,m2+1:,n1:n2,j)=ubufyi(:,:,:,j) ! set upper y-ghostzone endif endif ! ! Spherical y-planes across the poles ! if (lcommunicate_y) then if (lfirst_proc_y) then if (bcy12(j,1)=='pp') f(:, 1:m1-1,n1:n2,j)= lbufyi(:,:,:,j) !N-pole if (bcy12(j,1)=='ap') f(:, 1:m1-1,n1:n2,j)=-lbufyi(:,:,:,j) !N-pole endif if (llast_proc_y) then if (bcy12(j,2)=='pp') f(:,m2+1: ,n1:n2,j)= ubufyi(:,:,:,j) !S-pole if (bcy12(j,2)=='ap') f(:,m2+1: ,n1:n2,j)=-ubufyi(:,:,:,j) !S-pole endif endif enddo call MPI_WAIT(isend_rq_tolowy,isend_stat_tl,mpierr) call MPI_WAIT(isend_rq_touppy,isend_stat_tu,mpierr) endif ! ! Communication in z (includes periodic bc) ! if (nprocz>1) then call MPI_WAIT(irecv_rq_fromuppz,irecv_stat_fu,mpierr) call MPI_WAIT(irecv_rq_fromlowz,irecv_stat_fl,mpierr) do j=ivar1,ivar2 if (.not. lfirst_proc_z .or. bcz12(j,1)=='p'.or.lyinyang) then if (lfirst_proc_z.and.lyinyang) then if (lcorner_yz) then ! fill lower horizontal ghost strip if (.not.lcutoff_corners) f(:,:,:n1-1,j)=f(:,:,:n1-1,j)+lbufzi(:,:my,:,j) if (lfirst_proc_y) then if (lcutoff_corners) call copy_kinked_strip_z(nycut,mz-nzcut+2,lbufzi,f,j,-1,.true.,ladd_=.true.) ! fill left vertical ghost strip call transpose_mn( lbufzi(:,ycornstart:,:,j), f(:,:nghost,mz-len_cornstrip_z+1:,j),ladd=.true.) elseif (llast_proc_y) then if (lcutoff_corners) call copy_kinked_strip_z(nycut,1,lbufzi,f,j,1,.false.,ladd_=.true.) ! fill right vertical ghost strip call transpose_mn( lbufzi(:,ycornstart:,:,j), f(:,m2+1:,mz-len_cornstrip_z+1:,j),ladd=.true.) endif !if (notanumber(f(:,:,:,j))) print*, 'lbufzi: iproc,j=', iproc, iproc_world, j else f(:,:,:n1-1,j)=lbufzi(:,:my,:,j) endif else f(:,m1:m2,1:n1-1,j)=lbufzi(:,:,:,j) ! set lower z-ghostzone endif endif if (.not. llast_proc_z .or. bcz12(j,2)=='p'.or.lyinyang) then if (llast_proc_z.and.lyinyang) then if (lcorner_yz) then ! fill upper horizontal ghost strip if (.not.lcutoff_corners) f(:,:,n2+1:,j)=f(:,:,n2+1:,j)+ubufzi(:,:my,:,j) if (lfirst_proc_y) then if (lcutoff_corners) call copy_kinked_strip_z(nycut,nzcut-nghost,ubufzi,f,j,1,.true.,ladd_=.true.) ! fill left vertical ghost strip call transpose_mn( ubufzi(:,ycornstart:,:,j), f(:,:nghost,:len_cornstrip_z,j),ladd=.true.) elseif (llast_proc_y) then if (lcutoff_corners) call copy_kinked_strip_z(nycut,n2+1,ubufzi,f,j,-1,.false.,ladd_=.true.) ! fill right vertical ghost strip call transpose_mn( ubufzi(:,ycornstart:,:,j), f(:,m2+1:,:len_cornstrip_z,j),ladd=.true.) endif else f(:,:,n2+1:,j)=ubufzi(:,:my,:,j) ! fill upper horizontal ghost strip endif else f(:,m1:m2,n2+1:,j)=ubufzi(:,:,:,j) ! set upper z-ghostzone endif endif !if (notanumber(ubufzi(:,:,:,j))) print*, 'ubufzi: iproc,j=', iproc, j !if (notanumber(lbufzi(:,:,:,j))) print*, 'lbufzi: iproc,j=', iproc, j !if (notanumber(ubufyi(:,:,:,j))) print*, 'ubufyi: iproc,j=', iproc, j !if (notanumber(lbufyi(:,:,:,j))) print*, 'lbufyi: iproc,j=', iproc, j enddo call MPI_WAIT(isend_rq_tolowz,isend_stat_tl,mpierr) call MPI_WAIT(isend_rq_touppz,isend_stat_tu,mpierr) endif ! ! The four yz-corners (in counter-clockwise order) ! if (nprocz>1.and.(nprocy>1.or.lyinyang)) then if (uucornr>=0) call MPI_WAIT(irecv_rq_FRuu,irecv_stat_Fuu,mpierr) if (lucornr>=0) call MPI_WAIT(irecv_rq_FRlu,irecv_stat_Flu,mpierr) if (llcornr>=0) call MPI_WAIT(irecv_rq_FRll,irecv_stat_Fll,mpierr) if (ulcornr>=0) call MPI_WAIT(irecv_rq_FRul,irecv_stat_Ful,mpierr) do j=ivar1,ivar2 ! ! Set ll corner ! if (.not. lfirst_proc_z .or. bcz12(j,1)=='p' .or. lyinyang) then if (.not. lfirst_proc_y .or. bcy12(j,1)=='p' .or. lyinyang) then if ((.not.lfirst_proc_z.or.bcz12(j,1)=='p').and. & (.not.lfirst_proc_y.or.bcy12(j,1)=='p')) then ! inner or periodic proc boundaries f(:,1:m1-1,1:n1-1,j)=llbufi(:,:,:,j) ! fill lower left corner !if (notanumber(llbufi(:,:,:,j))) print*, 'lbufyi: iproc,j=', iproc, j elseif (llcornr>=0.and.lyinyang) then if (lfirst_proc_z) then if (llast_proc_y.and.lcutoff_corners) then call copy_kinked_strip_z(nycut,1,llbufi,f,j,1,.false.,ladd_=.true.) else f(:,:,1:n1-1,j)=f(:,:,1:n1-1,j)+llbufi(:,:,:,j) ! complete lower horizontal strip endif elseif (lfirst_proc_y) then if (llast_proc_z.and.lcutoff_corners) then call copy_kinked_strip_y(nzcut,1,llbufi,f,j,1,.false.,ladd_=.true.) else f(:,1:m1-1,:,j)=f(:,1:m1-1,:,j)+llbufi(:,:,:,j) ! complete left vertical strip endif endif !if (notanumber(f(:,:,:,j))) print*, 'llcorn: iproc,j=', iproc, iproc_world, j endif endif ! ! Set ul corner ! if (.not. llast_proc_y .or. bcy12(j,2)=='p' .or. lyinyang) then if ((.not.lfirst_proc_z.or.bcz12(j,1)=='p').and. & (.not.llast_proc_y .or.bcy12(j,2)=='p')) then ! inner or periodic proc boundaries f(:,m2+1:,1:n1-1,j)=ulbufi(:,:,:,j) ! fill lower right corner elseif (ulcornr>=0.and.lyinyang) then if (lfirst_proc_z) then if (lfirst_proc_y.and.lcutoff_corners) then call copy_kinked_strip_z(nycut,mz-nzcut+2,ulbufi,f,j,-1,.true.,ladd_=.true.) else f(:,:,1:n1-1,j)=f(:,:,1:n1-1,j)+ulbufi(:,:,:,j) ! complete lower horizontal strip endif elseif (llast_proc_y) then if (llast_proc_z.and.lcutoff_corners) then call copy_kinked_strip_y(nzcut,m2+1,ulbufi,f,j,-1,.false.,ladd_=.true.) else f(:,m2+1:,:,j)=f(:,m2+1:,:,j)+ulbufi(:,:,:,j) ! complete right vertical strip endif endif !if (notanumber(f(:,:,:,j))) print*, 'ulcorn: iproc,j=', iproc, iproc_world, j endif endif endif ! ! Set uu corner ! if (.not. llast_proc_z .or. bcz12(j,2)=='p' .or. lyinyang) then if (.not. llast_proc_y .or. bcy12(j,2)=='p' .or. lyinyang) then if ((.not.llast_proc_z.or.bcz12(j,2)=='p').and. & ! inner or periodic proc boundaries (.not.llast_proc_y.or.bcy12(j,2)=='p')) then f(:,m2+1:,n2+1:,j)=uubufi(:,:,:,j) ! fill upper right corner elseif (uucornr>=0.and.lyinyang) then if (llast_proc_z) then if (lfirst_proc_y.and.lcutoff_corners) then call copy_kinked_strip_z(nycut,nzcut-nghost,uubufi,f,j,1,.true.,ladd_=.true.) else f(:,:,n2+1:,j)=f(:,:,n2+1:,j)+uubufi(:,:,:,j) ! complete upper horizontal strip endif elseif (llast_proc_y) then if (lfirst_proc_z.and.lcutoff_corners) then call copy_kinked_strip_y(nzcut,nycut-nghost,uubufi,f,j,1,.true.,ladd_=.true.) else f(:,m2+1:,:,j)=f(:,m2+1:,:,j)+uubufi(:,:,:,j) ! complete right vertical strip endif endif !if (notanumber(f(:,:,:,j))) print*, 'uucorn: iproc,j=', iproc, iproc_world, j endif endif ! ! Set lu corner ! if (.not. lfirst_proc_y .or. bcy12(j,1)=='p' .or. lyinyang) then if ((.not.llast_proc_z .or.bcz12(j,2)=='p').and. & ! inner or periodic proc boundaries (.not.lfirst_proc_y.or.bcy12(j,1)=='p')) then f(:,1:m1-1,n2+1:,j)=lubufi(:,:,:,j) ! fill upper left corner elseif (lucornr>=0.and.lyinyang) then if (llast_proc_z) then if (llast_proc_y.and.lcutoff_corners) then call copy_kinked_strip_z(nycut,n2+1,lubufi,f,j,-1,.false.,ladd_=.true.) else f(:,:,n2+1:,j)=f(:,:,n2+1:,j)+lubufi(:,:,:,j) ! complete upper horizontal strip endif elseif (lfirst_proc_y) then if (lfirst_proc_z.and.lcutoff_corners) then call copy_kinked_strip_y(nzcut,my-nycut+2,lubufi,f,j,-1,.true.,ladd_=.true.) else f(:,1:m1-1,:,j)=f(:,1:m1-1,:,j)+lubufi(:,:,:,j) ! complete left vertical strip endif endif !if (notanumber(f(:,:,:,j))) print*, 'lucorn: iproc,j=', iproc, iproc_world, j endif endif endif ! ! Spherical yz-corners across the poles. If lcommunicate_y then periodic in z ! if (lcommunicate_y) then if (lfirst_proc_y) then if (bcy12(j,1)=='pp') then !N-pole periodic f(:,:m1-1,:n1-1,j)= lubufi(:,:,:,j) !(set ll corner) f(:,:m1-1,n2+1:,j)= llbufi(:,:,:,j) !(set lu corner) endif if (bcy12(j,1)=='ap') then !N-pole antiperiodic f(:,:m1-1,:n1-1,j)=-lubufi(:,:,:,j) !(set ll corner) f(:,:m1-1,n2+1:,j)=-llbufi(:,:,:,j) !(set lu corner) endif endif if (llast_proc_y) then if (bcy12(j,2)=='pp') then !S-pole periodic f(:,m2+1:,:n1-1,j)= uubufi(:,:,:,j) !(set ul corner) f(:,m2+1:,n2+1:,j)= ulbufi(:,:,:,j) !(set uu corner) endif if (bcy12(j,2)=='ap') then !S-pole antiperiodic f(:,m2+1:,:n1-1,j)=-uubufi(:,:,:,j) !(set ul corner) f(:,m2+1:,n2+1:,j)=-ulbufi(:,:,:,j) !(set uu corner) endif endif endif enddo if (llcorns>=0) call MPI_WAIT(isend_rq_TOll,isend_stat_Tll,mpierr) if (ulcorns>=0) call MPI_WAIT(isend_rq_TOul,isend_stat_Tul,mpierr) if (uucorns>=0) call MPI_WAIT(isend_rq_TOuu,isend_stat_Tuu,mpierr) if (lucorns>=0) call MPI_WAIT(isend_rq_TOlu,isend_stat_Tlu,mpierr) endif ! ! communication sample ! (commented out, because compiler does like this for 0-D runs) ! ! if (ip<7.and.ipy==3.and.lfirst_proc_z) & ! print*,'finalize_isendrcv_bdry: MPICOMM recv ul: ', & ! iproc,ulbufi(nx/2+4,:,1,2),' from ',ulcorn ! ! make sure the other processors don't carry on sending new data ! which could be mistaken for an earlier time ! call mpibarrier ! endsubroutine finalize_isendrcv_bdry !*********************************************************************** subroutine isendrcv_bdry_x(f,ivar1_opt,ivar2_opt) ! ! Isend and Irecv boundary values for x-direction. Sends and receives ! before continuing to y and z boundaries, as this allows the edges ! of the grid to be set properly. ! ! 2-may-09/anders: coded ! real, dimension(:,:,:,:), intent(inout) :: f integer, intent(in), optional :: ivar1_opt, ivar2_opt ! integer :: ivar1, ivar2, nbufx, j ! ivar1=1; ivar2=min(mcom,size(f,4)) if (present(ivar1_opt)) ivar1=ivar1_opt if (present(ivar2_opt)) ivar2=ivar2_opt ! if (nprocx>1) then lbufxo(:,:,:,ivar1:ivar2)=f(l1:l1i,m1:m2,n1:n2,ivar1:ivar2) !!(lower x-zone) ubufxo(:,:,:,ivar1:ivar2)=f(l2i:l2,m1:m2,n1:n2,ivar1:ivar2) !!(upper x-zone) nbufx=ny*nz*nghost*(ivar2-ivar1+1) call MPI_IRECV(ubufxi(:,:,:,ivar1:ivar2),nbufx,mpi_precision, & xuneigh,tolowx,MPI_COMM_GRID,irecv_rq_fromuppx,mpierr) call MPI_IRECV(lbufxi(:,:,:,ivar1:ivar2),nbufx,mpi_precision, & xlneigh,touppx,MPI_COMM_GRID,irecv_rq_fromlowx,mpierr) call MPI_ISEND(lbufxo(:,:,:,ivar1:ivar2),nbufx,mpi_precision, & xlneigh,tolowx,MPI_COMM_GRID,isend_rq_tolowx,mpierr) call MPI_ISEND(ubufxo(:,:,:,ivar1:ivar2),nbufx,mpi_precision, & xuneigh,touppx,MPI_COMM_GRID,isend_rq_touppx,mpierr) call MPI_WAIT(irecv_rq_fromuppx,irecv_stat_fu,mpierr) call MPI_WAIT(irecv_rq_fromlowx,irecv_stat_fl,mpierr) ! ! Inner communication or (shear-)periodic boundary conditions in x ! MR: Communication should only happen under these conditions. ! do j=ivar1,ivar2 if (.not. lfirst_proc_x .or. bcx12(j,1)=='p' .or. & (bcx12(j,1)=='she'.and.nygrid==1)) then f( 1:l1-1,m1:m2,n1:n2,j)=lbufxi(:,:,:,j) !!(set lower buffer) endif if (.not. llast_proc_x .or. bcx12(j,2)=='p' .or. & (bcx12(j,2)=='she'.and.nygrid==1)) then f(l2+1:,m1:m2,n1:n2,j)=ubufxi(:,:,:,j) !!(set upper buffer) endif enddo call MPI_WAIT(isend_rq_tolowx,isend_stat_tl,mpierr) call MPI_WAIT(isend_rq_touppx,isend_stat_tu,mpierr) endif ! endsubroutine isendrcv_bdry_x !*********************************************************************** subroutine periodic_bdry_x(buffer,ivar1_opt,ivar2_opt) ! ! Isend and Irecv boundary values for x-direction. Sends and receives ! before continuing to y and z boundaries, as this allows the edges ! of the grid to be set properly. ! ! 19-jan-23/fred: coded ! real, dimension(:,:,:,:), intent(inout) :: buffer integer, intent(in), optional :: ivar1_opt, ivar2_opt ! integer :: ivar1, ivar2, nbuf, j, ny_loc, nz_loc, nvar_loc integer :: isend_rq, irecv_rq integer, dimension (MPI_STATUS_SIZE) :: irecv_stat, isend_stat real, dimension(:,:,:,:), allocatable :: bufo,bufi integer :: comm comm = MPI_COMM_GRID ! if (.not.(lfirst_proc_x.or.llast_proc_x)) return ! if (nprocx>1) then ! ny_loc=size(buffer,2)-2*nghost nz_loc=size(buffer,3)-2*nghost ivar1=1; ivar2=min(mcom,size(buffer,4)) if (present(ivar1_opt)) ivar1=ivar1_opt if (present(ivar2_opt)) ivar2=ivar2_opt nvar_loc=ivar2-ivar1+1 allocate(bufo(nghost,ny_loc,nz_loc,nvar_loc),& bufi(nghost,ny_loc,nz_loc,nvar_loc)) ! nbuf=ny_loc*nz_loc*nghost*nvar_loc if (lfirst_proc_x) then bufo=buffer(l1:l1i,m1:m1+ny_loc-1,n1:n1+nz_loc-1,ivar1:ivar2) !!(lower x-zone) call MPI_ISEND(bufo,nbuf,mpi_precision, & xlneigh,tolowx,comm,isend_rq,mpierr) call MPI_IRECV(bufi,nbuf,mpi_precision, & xlneigh,touppx,comm,irecv_rq,mpierr) call MPI_WAIT(irecv_rq,irecv_stat,mpierr) else bufo=buffer(l2i:l2,m1:m1+ny_loc-1,n1:n1+nz_loc-1,ivar1:ivar2) !!(upper x-zone) call MPI_IRECV(bufi,nbuf,mpi_precision, & xuneigh,tolowx,comm,irecv_rq,mpierr) call MPI_ISEND(bufo,nbuf,mpi_precision, & xuneigh,touppx,comm,isend_rq,mpierr) call MPI_WAIT(irecv_rq,irecv_stat,mpierr) endif ! ! Inner communication or (shear-)periodic boundary conditions in x ! MR: Communication should only happen under these conditions. ! do j=ivar1,ivar2 if (.not. lfirst_proc_x .or. bcx12(j,1)=='p' .or. & (bcx12(j,1)=='she'.and.nygrid==1)) then buffer(1:l1-1,m1:m1+ny_loc-1,n1:n1+nz_loc-1,j)=bufi(:,:,:,j) !!(set lower buffer) endif if (.not. llast_proc_x .or. bcx12(j,2)=='p' .or. & (bcx12(j,2)=='she'.and.nygrid==1)) then buffer(l2+1: ,m1:m1+ny_loc-1,n1:n1+nz_loc-1,j)=bufi(:,:,:,j) !!(set upper buffer) endif enddo call MPI_WAIT(isend_rq,isend_stat,mpierr) endif ! endsubroutine periodic_bdry_x !*********************************************************************** subroutine periodic_bdry_y(buffer,ivar1_opt,ivar2_opt) ! ! Isend and Irecv boundary values for y-direction. ! ! 19-jan-23/fred: coded ! real, dimension(:,:,:,:), intent(inout) :: buffer integer, intent(in), optional :: ivar1_opt, ivar2_opt ! integer :: ivar1, ivar2, nbuf, nx_loc, nz_loc, nvar_loc integer :: isend_rq, irecv_rq integer, dimension (MPI_STATUS_SIZE) :: irecv_stat, isend_stat real, dimension(:,:,:,:), allocatable :: bufo,bufi integer :: comm ! if (.not.(lfirst_proc_y.or.llast_proc_y)) return comm = MPI_COMM_GRID ! if (nprocy>1) then ! nx_loc=size(buffer,1)-2*nghost nz_loc=size(buffer,3)-2*nghost ivar1=1; ivar2=min(mcom,size(buffer,4)) if (present(ivar1_opt)) ivar1=ivar1_opt if (present(ivar2_opt)) ivar2=ivar2_opt nvar_loc=ivar2-ivar1+1 allocate(bufo(nx_loc,nghost,nz_loc,nvar_loc),& bufi(nx_loc,nghost,nz_loc,nvar_loc)) ! nbuf=nx_loc*nz_loc*nghost*nvar_loc if (lfirst_proc_y) then bufo=buffer(l1:l1+nx_loc-1,m1:m1i,n1:n1+nz_loc-1,ivar1:ivar2) !!(lower y-zone) call MPI_ISEND(bufo,nbuf,mpi_precision, & ylneigh,tolowy,comm,isend_rq,mpierr) call MPI_IRECV(bufi,nbuf,mpi_precision, & ylneigh,touppy,comm,irecv_rq,mpierr) call MPI_WAIT(irecv_rq,irecv_stat,mpierr) buffer(l1:l1+nx_loc-1,1:m1-1,n1:n1+nz_loc-1,:)=bufi !!(set lower buffer) else bufo=buffer(l1:l1+nx_loc-1,m2i:m2,n1:n1+nz_loc-1,ivar1:ivar2) !!(upper y-zone) call MPI_IRECV(bufi,nbuf,mpi_precision, & yuneigh,tolowy,comm,irecv_rq,mpierr) call MPI_ISEND(bufo,nbuf,mpi_precision, & yuneigh,touppy,comm,isend_rq,mpierr) call MPI_WAIT(irecv_rq,irecv_stat,mpierr) buffer(l1:l1+nx_loc-1,m2+1: ,n1:n1+nz_loc-1,:)=bufi !!(set upper buffer) endif ! call MPI_WAIT(isend_rq,isend_stat,mpierr) ! endif ! endsubroutine periodic_bdry_y !*********************************************************************** subroutine periodic_bdry_z(buffer,ivar1_opt,ivar2_opt) ! ! Isend and Irecv boundary values for z-direction. ! ! 19-jan-23/fred: coded ! real, dimension(:,:,:,:), intent(inout) :: buffer integer, intent(in), optional :: ivar1_opt, ivar2_opt ! integer :: ivar1, ivar2, nbuf, ny_loc, nx_loc, nvar_loc integer :: isend_rq, irecv_rq integer, dimension (MPI_STATUS_SIZE) :: irecv_stat, isend_stat real, dimension(:,:,:,:), allocatable :: bufo,bufi integer :: comm ! if (.not.(lfirst_proc_z.or.llast_proc_z)) return comm = MPI_COMM_GRID ! if (nprocz>1) then ! nx_loc=size(buffer,1)-2*nghost ny_loc=size(buffer,2)-2*nghost ivar1=1; ivar2=min(mcom,size(buffer,4)) if (present(ivar1_opt)) ivar1=ivar1_opt if (present(ivar2_opt)) ivar2=ivar2_opt nvar_loc=ivar2-ivar1+1 allocate(bufo(nx_loc,ny_loc,nghost,nvar_loc),& bufi(nx_loc,ny_loc,nghost,nvar_loc)) ! nbuf=nx_loc*ny_loc*nghost*nvar_loc if (lfirst_proc_z) then bufo=buffer(l1:l1+nx_loc-1,m1:m1+ny_loc-1,n1:n1i,ivar1:ivar2) !!(lower z-zone) call MPI_ISEND(bufo,nbuf,mpi_precision, & zlneigh,tolowz,comm,isend_rq,mpierr) call MPI_IRECV(bufi,nbuf,mpi_precision, & zlneigh,touppz,comm,irecv_rq,mpierr) call MPI_WAIT(irecv_rq,irecv_stat,mpierr) buffer(l1:l1+nx_loc-1,m1:m1+ny_loc-1,1:n1-1,:)=bufi !!(set lower buffer) else bufo=buffer(l1:l1+nx_loc-1,m1:m1+ny_loc-1,n2i:n2,ivar1:ivar2) !!(upper z-zone) call MPI_IRECV(bufi,nbuf,mpi_precision, & zuneigh,tolowz,comm,irecv_rq,mpierr) call MPI_ISEND(bufo,nbuf,mpi_precision, & zuneigh,touppz,comm,isend_rq,mpierr) call MPI_WAIT(irecv_rq,irecv_stat,mpierr) buffer(l1:l1+nx_loc-1,m1:m1+ny_loc-1,n2+1:,:)=bufi !!(set upper buffer) endif ! call MPI_WAIT(isend_rq,isend_stat,mpierr) ! endif ! endsubroutine periodic_bdry_z !*********************************************************************** subroutine initiate_shearing(f,ivar1_opt,ivar2_opt) ! ! Subroutine for shearing sheet boundary conditions ! ! 27-nov-14/mcnallcp: Now uses 4 shearing neighbours so ! y-ghosts are not needed ! 20-june-02/nils: adapted from pencil_mpi ! real, dimension (mx,my,mz,mfarray) :: f integer, optional :: ivar1_opt, ivar2_opt ! real(KIND=rkind8) :: deltay_dy, frac, c1, c2, c3, c4, c5, c6 integer :: ivar1, ivar2, ystep, nbufx_gh integer :: tolastya=11, tolastyb=12, tonextya=13, tonextyb=14 integer :: tolastlastya=15, tolastlastyb=16, tonextnextya=17, tonextnextyb=18 ! ivar1=1; ivar2=mcom if (present(ivar1_opt)) ivar1=ivar1_opt if (present(ivar2_opt)) ivar2=ivar2_opt ! ! Sixth order interpolation along the y-direction ! deltay_dy=deltay/dy displs=int(deltay_dy) if (nprocx==1 .and. nprocy==1) then if (nygrid==1) then ! Periodic boundary conditions. f( 1:l1-1,m1:m2,:,ivar1:ivar2) = f(l2i:l2,m1:m2,:,ivar1:ivar2) f(l2+1:mx ,m1:m2,:,ivar1:ivar2) = f(l1:l1i,m1:m2,:,ivar1:ivar2) else frac=deltay_dy-displs c1 = - (frac+1.)*frac*(frac-1.)*(frac-2.)*(frac-3.)/120. c2 = +(frac+2.) *frac*(frac-1.)*(frac-2.)*(frac-3.)/24. c3 = -(frac+2.)*(frac+1.) *(frac-1.)*(frac-2.)*(frac-3.)/12. c4 = +(frac+2.)*(frac+1.)*frac *(frac-2.)*(frac-3.)/12. c5 = -(frac+2.)*(frac+1.)*frac*(frac-1.) *(frac-3.)/24. c6 = +(frac+2.)*(frac+1.)*frac*(frac-1.)*(frac-2.) /120. f(1:l1-1,m1:m2,:,ivar1:ivar2) = & c1*cshift(f(l2i:l2,m1:m2,:,ivar1:ivar2),-displs+2,2) & +c2*cshift(f(l2i:l2,m1:m2,:,ivar1:ivar2),-displs+1,2) & +c3*cshift(f(l2i:l2,m1:m2,:,ivar1:ivar2),-displs ,2) & +c4*cshift(f(l2i:l2,m1:m2,:,ivar1:ivar2),-displs-1,2) & +c5*cshift(f(l2i:l2,m1:m2,:,ivar1:ivar2),-displs-2,2) & +c6*cshift(f(l2i:l2,m1:m2,:,ivar1:ivar2),-displs-3,2) f(l2+1:mx,m1:m2,:,ivar1:ivar2) = & c1*cshift(f(l1:l1i,m1:m2,:,ivar1:ivar2), displs-2,2) & +c2*cshift(f(l1:l1i,m1:m2,:,ivar1:ivar2), displs-1,2) & +c3*cshift(f(l1:l1i,m1:m2,:,ivar1:ivar2), displs ,2) & +c4*cshift(f(l1:l1i,m1:m2,:,ivar1:ivar2), displs+1,2) & +c5*cshift(f(l1:l1i,m1:m2,:,ivar1:ivar2), displs+2,2) & +c6*cshift(f(l1:l1i,m1:m2,:,ivar1:ivar2), displs+3,2) endif else if (nygrid==1) return ! Periodic boundary conditions already set. ! ! With more than one CPU in the y-direction it will become necessary to ! interpolate over data from two different CPUs. Likewise two different ! CPUs will require data from this CPU. ! if (lfirst_proc_x .or. llast_proc_x) then ipx_partner=(nprocx-ipx-1) ystep = displs/ny ! Terms in these expressions ! ipz*nprocy*nprocx = the offset to this x-y plane of processors ! modulo(ipy-ystep,nprocy)*nprocx = the offset to the x-row of the shearing neighbor ! ipx_partner = the offset to the shearing neighbour within the x-row, ! either the low-x end or high-x end of the grid. ! Note iproc, ipx, etc are zero-based indexing. nextnextya = find_proc(ipx_partner,ipy-ystep+1,ipz) nextya = find_proc(ipx_partner,ipy-ystep ,ipz) lastya = find_proc(ipx_partner,ipy-ystep-1,ipz) lastlastya = find_proc(ipx_partner,ipy-ystep-2,ipz) ! lastlastyb = find_proc(ipx_partner,ipy+ystep-1,ipz) lastyb = find_proc(ipx_partner,ipy+ystep ,ipz) nextyb = find_proc(ipx_partner,ipy+ystep+1,ipz) nextnextyb = find_proc(ipx_partner,ipy+ystep+2,ipz) ! !!nextnextya = ipz*nprocy*nprocx +modulo(ipy-ystep+1,nprocy)*nprocx + ipx_partner !!nextya = ipz*nprocy*nprocx +modulo(ipy-ystep ,nprocy)*nprocx + ipx_partner !!lastya = ipz*nprocy*nprocx +modulo(ipy-ystep-1,nprocy)*nprocx + ipx_partner !!lastlastya = ipz*nprocy*nprocx +modulo(ipy-ystep-2,nprocy)*nprocx + ipx_partner ! !!lastlastyb = ipz*nprocy*nprocx +modulo(ipy+ystep-1,nprocy)*nprocx + ipx_partner !!lastyb = ipz*nprocy*nprocx +modulo(ipy+ystep ,nprocy)*nprocx + ipx_partner !!nextyb = ipz*nprocy*nprocx +modulo(ipy+ystep+1,nprocy)*nprocx + ipx_partner !!nextnextyb = ipz*nprocy*nprocx +modulo(ipy+ystep+2,nprocy)*nprocx + ipx_partner ! ! The data that gets passed, each set of values goes to 4 places. ! Only pass active grid points in y, the guard cells are not assumed to be filled yet fao(:,:,:,ivar1:ivar2) = f(l1:l1i,m1:m2,:,ivar1:ivar2) fbo(:,:,:,ivar1:ivar2) = f(l2i:l2,m1:m2,:,ivar1:ivar2) ! ! Buffer length for MPI calls, this is the size of the used section of fao and fbo nbufx_gh=(my-2*nghost)*mz*nghost*(ivar2-ivar1+1) ! Here we exchange the fao and fbo data across the shearing boundary. ! These if statements determinie if we need to copy, or post a MPI send/recieve. ! Direct copying is done when we discover we are the reciever. ! the route from send to recieve buffer names is based on values of iproc: ! nextnextyb -> fao => fahihi ! nextyb -> fao => fahi ! lastyb -> fao => falo ! lastlastyb -> fao => falolo ! nextnextya -> fbo = fahihi ! nextya -> fbo = fahi ! lastya -> fbo = falo ! lastlastya -> fbo = falolo ! ! map of sends ! lastlastya -> nextnextyb ! lastya -> nextyb ! nextya -> lastyb ! nextnextya -> lastlastyb ! ! lastlastyb -> nextnextya ! lastyb -> nextya ! nextyb -> lastya ! nextnextyb -> lastlastya ! ! Calls to fill the b-side recieve buffers if (lastlastya/=iproc) then call MPI_ISEND(fao(:,:,:,ivar1:ivar2),nbufx_gh,mpi_precision,lastlastya, & tonextnextyb,MPI_COMM_GRID,isend_rq_tolastlastya,mpierr) endif if (nextnextyb==iproc) then fbhihi(:,:,:,ivar1:ivar2)=fao(:,:,:,ivar1:ivar2) else call MPI_IRECV(fbhihi(:,:,:,ivar1:ivar2),nbufx_gh,mpi_precision,nextnextyb, & tonextnextyb,MPI_COMM_GRID,irecv_rq_fromnextnextyb,mpierr) endif ! if (lastya/=iproc) then call MPI_ISEND(fao(:,:,:,ivar1:ivar2),nbufx_gh,mpi_precision,lastya, & tonextyb,MPI_COMM_GRID,isend_rq_tolastya,mpierr) endif if (nextyb==iproc) then fbhi(:,:,:,ivar1:ivar2)=fao(:,:,:,ivar1:ivar2) else call MPI_IRECV(fbhi(:,:,:,ivar1:ivar2),nbufx_gh,mpi_precision,nextyb, & tonextyb,MPI_COMM_GRID,irecv_rq_fromnextyb,mpierr) endif ! if (nextya/=iproc) then call MPI_ISEND(fao(:,:,:,ivar1:ivar2),nbufx_gh,mpi_precision,nextya, & tolastyb,MPI_COMM_GRID,isend_rq_tonextya,mpierr) endif if (lastyb==iproc) then fblo(:,:,:,ivar1:ivar2)=fao(:,:,:,ivar1:ivar2) else call MPI_IRECV(fblo(:,:,:,ivar1:ivar2),nbufx_gh,mpi_precision,lastyb, & tolastyb,MPI_COMM_GRID,irecv_rq_fromlastyb,mpierr) endif ! if (nextnextya/=iproc) then call MPI_ISEND(fao(:,:,:,ivar1:ivar2),nbufx_gh,mpi_precision,nextnextya, & tolastlastyb,MPI_COMM_GRID,isend_rq_tonextnextya,mpierr) endif if (lastlastyb==iproc) then fblolo(:,:,:,ivar1:ivar2)=fao(:,:,:,ivar1:ivar2) else call MPI_IRECV(fblolo(:,:,:,ivar1:ivar2),nbufx_gh,mpi_precision,lastlastyb, & tolastlastyb,MPI_COMM_GRID,irecv_rq_fromlastlastyb,mpierr) endif ! Now fill a-side recieve buffers if (lastlastyb/=iproc) then call MPI_ISEND(fbo(:,:,:,ivar1:ivar2),nbufx_gh,mpi_precision,lastlastyb, & tonextnextya,MPI_COMM_GRID,isend_rq_tolastlastyb,mpierr) endif if (nextnextya==iproc) then fahihi(:,:,:,ivar1:ivar2)=fbo(:,:,:,ivar1:ivar2) else call MPI_IRECV(fahihi(:,:,:,ivar1:ivar2),nbufx_gh,mpi_precision,nextnextya, & tonextnextya,MPI_COMM_GRID,irecv_rq_fromnextnextya,mpierr) endif ! if (lastyb/=iproc) then call MPI_ISEND(fbo(:,:,:,ivar1:ivar2),nbufx_gh,mpi_precision,lastyb, & tonextya,MPI_COMM_GRID,isend_rq_tolastyb,mpierr) endif if (nextya==iproc) then fahi(:,:,:,ivar1:ivar2)=fbo(:,:,:,ivar1:ivar2) else call MPI_IRECV(fahi(:,:,:,ivar1:ivar2),nbufx_gh,mpi_precision,nextya, & tonextya,MPI_COMM_GRID,irecv_rq_fromnextya,mpierr) endif ! if (nextyb/=iproc) then call MPI_ISEND(fbo(:,:,:,ivar1:ivar2),nbufx_gh,mpi_precision,nextyb, & tolastya,MPI_COMM_GRID,isend_rq_tonextyb,mpierr) endif if (lastya==iproc) then falo(:,:,:,ivar1:ivar2)=fbo(:,:,:,ivar1:ivar2) else call MPI_IRECV(falo(:,:,:,ivar1:ivar2),nbufx_gh,mpi_precision,lastya, & tolastya,MPI_COMM_GRID,irecv_rq_fromlastya,mpierr) endif ! if (nextnextyb/=iproc) then call MPI_ISEND(fbo(:,:,:,ivar1:ivar2),nbufx_gh,mpi_precision,nextnextyb, & tolastlastya,MPI_COMM_GRID,isend_rq_tonextnextyb,mpierr) endif if (lastlastya==iproc) then falolo(:,:,:,ivar1:ivar2)=fbo(:,:,:,ivar1:ivar2) else call MPI_IRECV(falolo(:,:,:,ivar1:ivar2),nbufx_gh,mpi_precision,lastlastya, & tolastlastya,MPI_COMM_GRID,irecv_rq_fromlastlastya,mpierr) endif ! endif endif ! endsubroutine initiate_shearing !*********************************************************************** subroutine finalize_shearing(f,ivar1_opt,ivar2_opt) ! ! Subroutine for shearing sheet boundary conditions ! ! 27-nov-14/mcnallcp: Now uses 4 shearing neighbours so ! y-ghosts are not needed ! 20-june-02/nils: adapted from pencil_mpi ! 02-mar-02/ulf: Sliding periodic boundary conditions in x ! real, dimension (mx,my,mz,mfarray) :: f integer, optional :: ivar1_opt, ivar2_opt ! ! fa, fb are the buffers we collect recieved data into real, dimension (nghost,4*ny,mz,mcom) :: fa, fb integer, dimension (MPI_STATUS_SIZE) :: irecv_stat_fall, irecv_stat_fann integer, dimension (MPI_STATUS_SIZE) :: irecv_stat_fal, irecv_stat_fan integer, dimension (MPI_STATUS_SIZE) :: irecv_stat_fbll, irecv_stat_fbnn integer, dimension (MPI_STATUS_SIZE) :: irecv_stat_fbl, irecv_stat_fbn integer, dimension (MPI_STATUS_SIZE) :: isend_stat_tnna, isend_stat_tlla integer, dimension (MPI_STATUS_SIZE) :: isend_stat_tna, isend_stat_tla integer, dimension (MPI_STATUS_SIZE) :: isend_stat_tnnb, isend_stat_tllb integer, dimension (MPI_STATUS_SIZE) :: isend_stat_tnb, isend_stat_tlb integer :: ivar1, ivar2, m2long real(KIND=rkind8) :: deltay_dy, frac, c1, c2, c3, c4, c5, c6 ! ivar1=1; ivar2=mcom if (present(ivar1_opt)) ivar1=ivar1_opt if (present(ivar2_opt)) ivar2=ivar2_opt ! ! Some special cases have already finished in initiate_shearing. ! if (nygrid/=1 .and. (nprocx>1 .or. nprocy>1) .and. (lfirst_proc_x .or. llast_proc_x)) then ! ! Need to wait till all communication has been recived. ! if (lastlastyb/=iproc) call MPI_WAIT(irecv_rq_fromlastlastyb,irecv_stat_fbll,mpierr) if (lastyb/=iproc) call MPI_WAIT(irecv_rq_fromlastyb,irecv_stat_fbl,mpierr) if (nextyb/=iproc) call MPI_WAIT(irecv_rq_fromnextyb,irecv_stat_fbn,mpierr) if (nextnextyb/=iproc) call MPI_WAIT(irecv_rq_fromnextnextyb,irecv_stat_fbnn,mpierr) ! if (lastlastya/=iproc) call MPI_WAIT(irecv_rq_fromlastlastya,irecv_stat_fall,mpierr) if (lastya/=iproc) call MPI_WAIT(irecv_rq_fromlastya,irecv_stat_fal,mpierr) if (nextya/=iproc) call MPI_WAIT(irecv_rq_fromnextya,irecv_stat_fan,mpierr) if (nextnextya/=iproc) call MPI_WAIT(irecv_rq_fromnextnextya,irecv_stat_fann,mpierr) ! ! Reading communicated information into f. ! deltay_dy=deltay/dy ! fa(:,1 : ny,:,ivar1:ivar2) = falolo(:,:,:,ivar1:ivar2) fa(:,ny+1 :2*ny,:,ivar1:ivar2) = falo(:,:,:,ivar1:ivar2) fa(:,2*ny+1:3*ny,:,ivar1:ivar2) = fahi(:,:,:,ivar1:ivar2) fa(:,3*ny+1:4*ny,:,ivar1:ivar2) = fahihi(:,:,:,ivar1:ivar2) ! fb(:,1 : ny,:,ivar1:ivar2) = fblolo(:,:,:,ivar1:ivar2) fb(:,ny+1 :2*ny,:,ivar1:ivar2) = fblo(:,:,:,ivar1:ivar2) fb(:,2*ny+1:3*ny,:,ivar1:ivar2) = fbhi(:,:,:,ivar1:ivar2) fb(:,3*ny+1:4*ny,:,ivar1:ivar2) = fbhihi(:,:,:,ivar1:ivar2) ! ! displs is the displacement in cells displs = modulo(int(deltay_dy),ny) frac = deltay_dy - int(deltay_dy) c1 = - (frac+1.)*frac*(frac-1.)*(frac-2.)*(frac-3.)/120. c2 = +(frac+2.) *frac*(frac-1.)*(frac-2.)*(frac-3.)/24. c3 = -(frac+2.)*(frac+1.) *(frac-1.)*(frac-2.)*(frac-3.)/12. c4 = +(frac+2.)*(frac+1.)*frac *(frac-2.)*(frac-3.)/12. c5 = -(frac+2.)*(frac+1.)*frac*(frac-1.) *(frac-3.)/24. c6 = +(frac+2.)*(frac+1.)*frac*(frac-1.)*(frac-2.) /120. ! ! These are handy for debugging, and reduce the interpolation order ! c1=0; c2=0; c5=0; c6=0; ! c3 = -(frac-1.) ! c4 = frac ! ! m2 long is the position of the last value from fahi in fa ! m2long = 3*ny f(1:l1-1,m1:m2,:,ivar1:ivar2) = & c1*fa(:,m2long-ny-displs+3:m2long-displs+2,:,ivar1:ivar2) & +c2*fa(:,m2long-ny-displs+2:m2long-displs+1,:,ivar1:ivar2) & +c3*fa(:,m2long-ny-displs+1:m2long-displs-0,:,ivar1:ivar2) & +c4*fa(:,m2long-ny-displs-0:m2long-displs-1,:,ivar1:ivar2) & +c5*fa(:,m2long-ny-displs-1:m2long-displs-2,:,ivar1:ivar2) & +c6*fa(:,m2long-ny-displs-2:m2long-displs-3,:,ivar1:ivar2) ! ! ny+1 is the beginning of the block of interior cell from fblo in fb f(l2+1:mx,m1:m2,:,ivar1:ivar2)= & c1*fb(:,ny+1+displs-2:2*ny+displs-2,:,ivar1:ivar2) & +c2*fb(:,ny+1+displs-1:2*ny+displs-1,:,ivar1:ivar2) & +c3*fb(:,ny+1+displs :2*ny+displs ,:,ivar1:ivar2) & +c4*fb(:,ny+1+displs+1:2*ny+displs+1,:,ivar1:ivar2) & +c5*fb(:,ny+1+displs+2:2*ny+displs+2,:,ivar1:ivar2) & +c6*fb(:,ny+1+displs+3:2*ny+displs+3,:,ivar1:ivar2) ! ! Need to wait till buffer is empty before re-using it again. ! if (nextnextyb/=iproc) call MPI_WAIT(isend_rq_tonextnextyb,isend_stat_tnnb,mpierr) if (nextyb/=iproc) call MPI_WAIT(isend_rq_tonextyb,isend_stat_tnb,mpierr) if (lastyb/=iproc) call MPI_WAIT(isend_rq_tolastyb,isend_stat_tlb,mpierr) if (lastlastyb/=iproc) call MPI_WAIT(isend_rq_tolastlastyb,isend_stat_tllb,mpierr) ! if (nextnextya/=iproc) call MPI_WAIT(isend_rq_tonextnextya,isend_stat_tnna,mpierr) if (nextya/=iproc) call MPI_WAIT(isend_rq_tonextya,isend_stat_tna,mpierr) if (lastya/=iproc) call MPI_WAIT(isend_rq_tolastya,isend_stat_tla,mpierr) if (lastlastya/=iproc) call MPI_WAIT(isend_rq_tolastlastya,isend_stat_tlla,mpierr) ! endif endsubroutine finalize_shearing !*********************************************************************** subroutine radboundary_zx_recv(mrad,idir,Qrecv_zx) ! ! receive intensities from neighboring processor in y ! ! 11-jul-03/tobi: coded ! 20-jul-05/tobi: use non-blocking MPI calls ! integer :: mrad,idir real, dimension(mx,mz) :: Qrecv_zx integer :: isource integer, dimension(MPI_STATUS_SIZE) :: irecv_zx ! ! Identifier ! if (lroot.and.ip<5) print*,'radboundary_zx_recv: ENTER' ! ! source ! if (mrad>0) isource=ylneigh if (mrad<0) isource=yuneigh ! ! actual MPI call ! call MPI_RECV(Qrecv_zx,mx*mz,mpi_precision,isource,Qtag_zx+idir, & MPI_COMM_GRID,irecv_zx,mpierr) ! endsubroutine radboundary_zx_recv !*********************************************************************** subroutine radboundary_xy_recv(nrad,idir,Qrecv_xy) ! ! receive intensities from neighboring processor in z ! ! 11-jul-03/tobi: coded ! 20-jul-05/tobi: use non-blocking MPI calls ! integer :: nrad,idir real, dimension(mx,my) :: Qrecv_xy integer :: isource integer, dimension(MPI_STATUS_SIZE) :: irecv_xy ! ! Identifier ! if (lroot.and.ip<5) print*,'radboundary_xy_recv: ENTER' ! ! source ! if (nrad>0) isource=zlneigh if (nrad<0) isource=zuneigh ! ! actual MPI call ! call MPI_RECV(Qrecv_xy,mx*my,mpi_precision,isource,Qtag_xy+idir, & MPI_COMM_GRID,irecv_xy,mpierr) ! endsubroutine radboundary_xy_recv ! Felipe: Added below !*********************************************************************** subroutine radboundary_yz_recv(lrad,idir,Qrecv_yz) ! ! receive intensities from neighboring processor in z ! ! 11-jul-03/tobi: coded ! 20-jul-05/tobi: use non-blocking MPI calls ! 14-apr-22/felipe: adapted from radboundary_xy_recv ! integer :: lrad,idir real, dimension(my,mz) :: Qrecv_yz integer :: isource integer, dimension(MPI_STATUS_SIZE) :: irecv_yz ! ! Identifier ! if (lroot.and.ip<5) print*,'radboundary_yz_recv: ENTER' ! ! source ! if (lrad>0) isource=xlneigh if (lrad<0) isource=xuneigh ! ! actual MPI call ! call MPI_RECV(Qrecv_yz,my*mz,mpi_precision,isource,Qtag_yz+idir, & MPI_COMM_GRID,irecv_yz,mpierr) ! endsubroutine radboundary_yz_recv ! Felipe: Added above !*********************************************************************** subroutine radboundary_zx_send(mrad,idir,Qsend_zx) ! ! send intensities to neighboring processor in y ! ! 11-jul-03/tobi: coded ! 20-jul-05/tobi: use non-blocking MPI calls ! 2-jun-15/MR: corrected parameters of MPI_SEND (blocking!) ! integer :: mrad,idir real, dimension(mx,mz) :: Qsend_zx integer :: idest ! ! Identifier ! if (lroot.and.ip<5) print*,'radboundary_zx_send: ENTER' ! ! destination ! if (mrad>0) idest=yuneigh if (mrad<0) idest=ylneigh ! ! actual MPI call ! call MPI_SEND(Qsend_zx,mx*mz,mpi_precision,idest,Qtag_zx+idir, & MPI_COMM_GRID,mpierr) ! endsubroutine radboundary_zx_send !*********************************************************************** subroutine radboundary_xy_send(nrad,idir,Qsend_xy) ! ! send intensities to neighboring processor in z ! ! 11-jul-03/tobi: coded ! 20-jul-05/tobi: use non-blocking MPI calls ! 2-jun-15/MR: corrected parameters of MPI_SEND (blocking!) ! integer, intent(in) :: nrad,idir real, dimension(mx,my) :: Qsend_xy integer :: idest ! ! Identifier ! if (lroot.and.ip<5) print*,'radboundary_xy_send: ENTER' ! ! destination ! if (nrad>0) idest=zuneigh if (nrad<0) idest=zlneigh ! ! actual MPI call ! call MPI_SEND(Qsend_xy,mx*my,mpi_precision,idest,Qtag_xy+idir, & MPI_COMM_GRID,mpierr) ! endsubroutine radboundary_xy_send !*********************************************************************** ! Felipe: added below !*********************************************************************** subroutine radboundary_yz_send(lrad,idir,Qsend_yz) ! ! send intensities to neighboring processor in x ! ! 11-jul-03/tobi: coded ! 20-jul-05/tobi: use non-blocking MPI calls ! 2-jun-15/MR: corrected parameters of MPI_SEND (blocking!) ! 14-apr-22/felipe: adapted from radboundary_xy_send ! integer, intent(in) :: lrad,idir real, dimension(my,mz) :: Qsend_yz integer :: idest ! ! Identifier ! if (lroot.and.ip<5) print*,'radboundary_yz_send: ENTER' ! ! destination ! if (lrad>0) idest=xuneigh if (lrad<0) idest=xlneigh ! ! actual MPI call ! call MPI_SEND(Qsend_yz,my*mz,mpi_precision,idest,Qtag_yz+idir, & MPI_COMM_GRID,mpierr) ! endsubroutine radboundary_yz_send !*********************************************************************** ! Felipe: added above subroutine radboundary_yz_sendrecv(lrad,idir,Qsend_yz,Qrecv_yz) ! ! receive intensities from isource and send intensities to idest ! ! 17-nov-14/axel: adapted from radboundary_zx_sendrecv ! integer, intent(in) :: lrad,idir real, dimension(my,mz) :: Qsend_yz,Qrecv_yz integer :: idest,isource integer, dimension(MPI_STATUS_SIZE) :: isendrecv_yz ! ! Identifier ! if (lroot.and.ip<5) print*,'radboundary_yz_sendrecv: ENTER' ! ! destination and source id ! if (lrad>0) then; idest=xuneigh; isource=xlneigh; endif if (lrad<0) then; idest=xlneigh; isource=xuneigh; endif ! ! actual MPI call ! call MPI_SENDRECV(Qsend_yz,my*mz,mpi_precision,idest,Qtag_yz+idir, & Qrecv_yz,my*mz,mpi_precision,isource,Qtag_yz+idir, & MPI_COMM_GRID,isendrecv_yz,mpierr) ! endsubroutine radboundary_yz_sendrecv !*********************************************************************** subroutine radboundary_zx_sendrecv(mrad,idir,Qsend_zx,Qrecv_zx) ! ! receive intensities from isource and send intensities to idest ! ! 04-aug-03/tobi: coded ! integer, intent(in) :: mrad,idir real, dimension(mx,mz) :: Qsend_zx,Qrecv_zx integer :: idest,isource integer, dimension(MPI_STATUS_SIZE) :: isendrecv_zx ! ! Identifier ! if (lroot.and.ip<5) print*,'radboundary_zx_sendrecv: ENTER' ! ! destination and source id ! if (mrad>0) then; idest=yuneigh; isource=ylneigh; endif if (mrad<0) then; idest=ylneigh; isource=yuneigh; endif ! ! actual MPI call ! call MPI_SENDRECV(Qsend_zx,mx*mz,mpi_precision,idest,Qtag_zx+idir, & Qrecv_zx,mx*mz,mpi_precision,isource,Qtag_zx+idir, & MPI_COMM_GRID,isendrecv_zx,mpierr) ! endsubroutine radboundary_zx_sendrecv !*********************************************************************** subroutine radboundary_yz_periodic_ray(Qrad_yz,tau_yz, & Qrad_yz_all,tau_yz_all) ! ! Gather all intrinsic optical depths and heating rates into one rank-3 array ! that is available on each processor. ! ! 17-nov-14/axel: adapted from radboundary_zx_periodic_ray ! real, dimension(ny,nz), intent(in) :: Qrad_yz,tau_yz real, dimension(ny,nz,0:nprocx-1), intent(out) :: Qrad_yz_all,tau_yz_all ! ! Identifier ! if (lroot.and.ip<5) print*,'radboundary_yz_periodic_ray: ENTER' ! ! actual MPI calls ! call MPI_ALLGATHER(tau_yz,ny*nz,mpi_precision,tau_yz_all,ny*nz,mpi_precision, & MPI_COMM_XBEAM,mpierr) ! call MPI_ALLGATHER(Qrad_yz,ny*nz,mpi_precision,Qrad_yz_all,ny*nz,mpi_precision, & MPI_COMM_XBEAM,mpierr) ! endsubroutine radboundary_yz_periodic_ray !*********************************************************************** subroutine radboundary_zx_periodic_ray(Qrad_zx,tau_zx, & Qrad_zx_all,tau_zx_all) ! ! Gather all intrinsic optical depths and heating rates into one rank-3 array ! that is available on each processor. ! ! 19-jul-05/tobi: rewritten ! real, dimension(nx,nz), intent(in) :: Qrad_zx,tau_zx real, dimension(nx,nz,0:nprocy-1), intent(out) :: Qrad_zx_all,tau_zx_all ! ! Identifier ! if (lroot.and.ip<5) print*,'radboundary_zx_periodic_ray: ENTER' ! ! actual MPI calls ! call MPI_ALLGATHER(tau_zx,nx*nz,mpi_precision,tau_zx_all,nx*nz,mpi_precision, & MPI_COMM_YBEAM,mpierr) ! call MPI_ALLGATHER(Qrad_zx,nx*nz,mpi_precision,Qrad_zx_all,nx*nz,mpi_precision, & MPI_COMM_YBEAM,mpierr) ! endsubroutine radboundary_zx_periodic_ray !*********************************************************************** subroutine mpirecv_char_scl(str,proc_src,tag_id,comm) ! ! Receive character scalar from other processor. ! ! 04-sep-06/wlad: coded ! character(LEN=*), intent(OUT) :: str integer, intent(IN ) :: proc_src, tag_id integer, optional,intent(IN) :: comm integer, dimension(MPI_STATUS_SIZE) :: stat ! call MPI_RECV(str, len(str), MPI_CHARACTER, proc_src, & tag_id, ioptest(comm,MPI_COMM_GRID), stat, mpierr) ! endsubroutine mpirecv_char_scl !*********************************************************************** subroutine mpisend_char_scl(str,proc_src,tag_id,comm) ! ! Send character scalar to other processor. ! ! 04-sep-06/wlad: coded ! character(LEN=*) :: str integer :: proc_src, tag_id integer, optional :: comm ! call MPI_SEND(str, len(str), MPI_CHARACTER, proc_src, & tag_id, ioptest(comm,MPI_COMM_GRID), mpierr) ! endsubroutine mpisend_char_scl !*********************************************************************** subroutine mpirecv_logical_scl(bcast_array,proc_src,tag_id,comm) ! ! Receive logical scalar from other processor. ! ! 04-sep-06/wlad: coded ! logical :: bcast_array integer :: proc_src, tag_id integer, dimension(MPI_STATUS_SIZE) :: stat integer, optional :: comm ! call MPI_RECV(bcast_array, 1, MPI_LOGICAL, proc_src, & tag_id, ioptest(comm,MPI_COMM_GRID), stat, mpierr) ! endsubroutine mpirecv_logical_scl !*********************************************************************** subroutine mpirecv_logical_arr(bcast_array,nbcast_array,proc_src,tag_id) ! ! Receive logical array from other processor. ! ! 04-sep-06/anders: coded ! integer :: nbcast_array logical, dimension(nbcast_array) :: bcast_array integer :: proc_src, tag_id integer, dimension(MPI_STATUS_SIZE) :: stat ! if (nbcast_array == 0) return ! call MPI_RECV(bcast_array, nbcast_array, MPI_LOGICAL, proc_src, & tag_id, MPI_COMM_GRID, stat, mpierr) ! endsubroutine mpirecv_logical_arr !*********************************************************************** subroutine mpirecv_real_scl(bcast_array,proc_src,tag_id,comm,nonblock) ! ! Receive real scalar from other processor. ! If present, nonblock is the request id for non-blockin receive. ! ! 02-jul-05/anders: coded ! real :: bcast_array integer :: proc_src, tag_id integer, dimension(MPI_STATUS_SIZE) :: stat ! intent(out) :: bcast_array ! integer, optional :: comm, nonblock if (present(nonblock)) then call MPI_IRECV(bcast_array, 1, mpi_precision, proc_src, & tag_id, ioptest(comm,MPI_COMM_GRID), stat, nonblock, mpierr) else call MPI_RECV(bcast_array, 1, mpi_precision, proc_src, & tag_id, ioptest(comm,MPI_COMM_GRID), stat, mpierr) endif ! endsubroutine mpirecv_real_scl !*********************************************************************** subroutine mpirecv_real_arr(bcast_array,nbcast_array,proc_src,tag_id,comm,nonblock) ! ! Receive real array from other processor. ! If present, nonblock is the request id for non-blockin receive. ! ! 02-jul-05/anders: coded ! integer :: nbcast_array real, dimension(nbcast_array) :: bcast_array integer :: proc_src, tag_id integer, dimension(MPI_STATUS_SIZE) :: stat integer, optional :: comm, nonblock ! intent(out) :: bcast_array ! if (nbcast_array == 0) return ! if (present(nonblock)) then call MPI_IRECV(bcast_array, nbcast_array, mpi_precision, proc_src, & tag_id, ioptest(comm,MPI_COMM_GRID), nonblock, mpierr) else call MPI_RECV(bcast_array, nbcast_array, mpi_precision, proc_src, & tag_id, ioptest(comm,MPI_COMM_GRID), stat, mpierr) endif ! endsubroutine mpirecv_real_arr !*********************************************************************** subroutine mpirecv_real_arr2(bcast_array,nbcast_array,proc_src,tag_id,comm) ! ! Receive real array(:,:) from other processor. ! ! 02-jul-05/anders: coded ! integer, dimension(2) :: nbcast_array real, dimension(nbcast_array(1),nbcast_array(2)) :: bcast_array integer :: proc_src, tag_id, num_elements integer, optional :: comm integer, dimension(MPI_STATUS_SIZE) :: stat ! intent(out) :: bcast_array ! if (any(nbcast_array == 0)) return ! num_elements = product(nbcast_array) call MPI_RECV(bcast_array, num_elements, mpi_precision, proc_src, & tag_id, ioptest(comm,MPI_COMM_GRID), stat, mpierr) ! endsubroutine mpirecv_real_arr2 !*********************************************************************** subroutine mpirecv_real_arr3(bcast_array,nbcast_array,proc_src,tag_id,comm,nonblock) ! ! Receive real array(:,:,:) from other processor. ! If present, nonblock is the request id for non-blockin receive. ! ! 20-may-06/anders: adapted ! integer, dimension(3) :: nbcast_array real, dimension(nbcast_array(1),nbcast_array(2),nbcast_array(3)) :: bcast_array integer :: proc_src, tag_id, num_elements integer, optional :: comm,nonblock integer, dimension(MPI_STATUS_SIZE) :: stat ! intent(out) :: bcast_array ! if (any(nbcast_array == 0)) return ! num_elements = product(nbcast_array) if (present(nonblock)) then call MPI_IRECV(bcast_array, num_elements, mpi_precision, proc_src, & tag_id, ioptest(comm,MPI_COMM_GRID), nonblock, mpierr) else call MPI_RECV(bcast_array, num_elements, mpi_precision, proc_src, & tag_id, ioptest(comm,MPI_COMM_GRID), stat, mpierr) endif ! endsubroutine mpirecv_real_arr3 !*********************************************************************** subroutine mpirecv_cmplx_arr3(bcast_array,nbcast_array,proc_src,tag_id,comm,nonblock) ! ! Receive complex array(:,:,:) from other processor. ! If present, nonblock is the request id for non-blockin receive. ! ! 20-may-06/anders: adapted ! integer, dimension(3) :: nbcast_array complex, dimension(nbcast_array(1),nbcast_array(2),nbcast_array(3)) :: bcast_array integer :: proc_src, tag_id, num_elements integer, optional :: comm,nonblock integer, dimension(MPI_STATUS_SIZE) :: stat ! intent(out) :: bcast_array ! if (any(nbcast_array == 0)) return ! num_elements = product(nbcast_array) if (present(nonblock)) then call MPI_IRECV(bcast_array, num_elements, MPI_CMPLX, proc_src, & tag_id, ioptest(comm,MPI_COMM_GRID), nonblock, mpierr) else call MPI_RECV(bcast_array, num_elements, MPI_CMPLX, proc_src, & tag_id, ioptest(comm,MPI_COMM_GRID), stat, mpierr) endif ! endsubroutine mpirecv_cmplx_arr3 !*********************************************************************** subroutine mpirecv_real_arr4(bcast_array,nbcast_array,proc_src,tag_id,comm,nonblock) ! ! Receive real array(:,:,:,:) from other processor. ! If present, nonblock is the request id for non-blockin receive. ! ! 20-may-06/anders: adapted ! integer, dimension(4) :: nbcast_array real, dimension(nbcast_array(1),nbcast_array(2),nbcast_array(3),nbcast_array(4)) :: bcast_array integer :: proc_src, tag_id, num_elements integer, dimension(MPI_STATUS_SIZE) :: stat integer, optional :: comm,nonblock ! intent(out) :: bcast_array ! if (any(nbcast_array == 0)) return ! num_elements = product(nbcast_array) if (present(nonblock)) then call MPI_IRECV(bcast_array, num_elements, mpi_precision, proc_src, & tag_id, ioptest(comm,MPI_COMM_GRID), nonblock, mpierr) else call MPI_RECV(bcast_array, num_elements, mpi_precision, proc_src, & tag_id, ioptest(comm,MPI_COMM_GRID), stat, mpierr) endif ! endsubroutine mpirecv_real_arr4 !*********************************************************************** subroutine mpirecv_real_arr5(bcast_array,nbcast_array,proc_src,tag_id) ! ! Receive real array(:,:,:,:) from other processor. ! ! 24-apr-17/Jorgen: adapted ! integer, dimension(5) :: nbcast_array real, dimension(nbcast_array(1),nbcast_array(2),nbcast_array(3),nbcast_array(4),nbcast_array(5)) :: bcast_array integer :: proc_src, tag_id, num_elements integer, dimension(MPI_STATUS_SIZE) :: stat ! intent(out) :: bcast_array ! if (any(nbcast_array == 0)) return ! num_elements = product(nbcast_array) call MPI_RECV(bcast_array, num_elements, mpi_precision, proc_src, & tag_id, MPI_COMM_GRID, stat, mpierr) ! endsubroutine mpirecv_real_arr5 !*********************************************************************** subroutine mpirecv_int_scl(bcast_array,proc_src,tag_id,comm,nonblock) ! ! Receive integer scalar from other processor. ! If present, nonblock is the request id for non-blockin receive. ! ! 02-jul-05/anders: coded ! integer :: bcast_array integer :: proc_src, tag_id integer, dimension(MPI_STATUS_SIZE) :: stat integer, optional :: comm, nonblock ! if (present(nonblock)) then call MPI_IRECV(bcast_array, 1, MPI_INTEGER, proc_src, & tag_id, ioptest(comm,MPI_COMM_GRID), nonblock, mpierr) else call MPI_RECV(bcast_array, 1, MPI_INTEGER, proc_src, & tag_id, ioptest(comm,MPI_COMM_GRID), stat, mpierr) endif ! endsubroutine mpirecv_int_scl !*********************************************************************** subroutine mpirecv_int_arr(bcast_array,nbcast_array,proc_src,tag_id,comm,nonblock) ! ! Receive integer array from other processor. ! If present, nonblock is the request id for non-blockin receive. ! ! 02-jul-05/anders: coded ! integer :: nbcast_array integer, dimension(nbcast_array) :: bcast_array integer :: proc_src, tag_id integer, dimension(MPI_STATUS_SIZE) :: stat integer, optional :: comm, nonblock ! if (nbcast_array == 0) return ! if (present(nonblock)) then call MPI_IRECV(bcast_array, nbcast_array, MPI_INTEGER, proc_src, & tag_id, ioptest(comm,MPI_COMM_GRID), nonblock, mpierr) else call MPI_RECV(bcast_array, nbcast_array, MPI_INTEGER, proc_src, & tag_id, ioptest(comm,MPI_COMM_GRID), stat, mpierr) endif ! endsubroutine mpirecv_int_arr !*********************************************************************** subroutine mpirecv_int_arr2(bcast_array,nbcast_array,proc_src,tag_id) ! ! Receive 2D integer array from other processor. ! ! 20-fev-08/wlad: adpated from mpirecv_real_arr2 ! integer, dimension(2) :: nbcast_array integer, dimension(nbcast_array(1),nbcast_array(2)) :: bcast_array integer :: proc_src, tag_id, num_elements integer, dimension(MPI_STATUS_SIZE) :: stat ! intent(out) :: bcast_array ! if (any(nbcast_array == 0)) return ! num_elements = product(nbcast_array) call MPI_RECV(bcast_array, num_elements, MPI_INTEGER, proc_src, & tag_id, MPI_COMM_GRID, stat, mpierr) ! endsubroutine mpirecv_int_arr2 !*********************************************************************** subroutine mpisend_logical_scl(bcast_array,proc_rec,tag_id,comm) ! ! Send logical scalar to other processor. ! ! 04-sep-06/wlad: coded ! logical :: bcast_array integer :: proc_rec, tag_id integer, optional :: comm ! call MPI_SEND(bcast_array, 1, MPI_LOGICAL, proc_rec, & tag_id, ioptest(comm,MPI_COMM_GRID), mpierr) ! endsubroutine mpisend_logical_scl !*********************************************************************** subroutine mpisend_logical_arr(bcast_array,nbcast_array,proc_rec,tag_id) ! ! Send logical array to other processor. ! ! 04-sep-06/wlad: coded ! integer :: nbcast_array logical, dimension(nbcast_array) :: bcast_array integer :: proc_rec, tag_id ! if (nbcast_array == 0) return ! call MPI_SEND(bcast_array, nbcast_array, MPI_LOGICAL, proc_rec, & tag_id, MPI_COMM_GRID, mpierr) ! endsubroutine mpisend_logical_arr !*********************************************************************** subroutine mpisend_real_scl(bcast_array,proc_rec,tag_id,comm) ! ! Send real scalar to other processor. ! ! 02-jul-05/anders: coded ! real :: bcast_array integer :: proc_rec, tag_id integer, optional :: comm ! call MPI_SEND(bcast_array, 1, mpi_precision, proc_rec, & tag_id, ioptest(comm,MPI_COMM_GRID), mpierr) ! endsubroutine mpisend_real_scl !*********************************************************************** subroutine mpisend_real_arr(bcast_array,nbcast_array,proc_rec,tag_id,comm) ! ! Send real array to other processor. ! ! 02-jul-05/anders: coded ! integer :: nbcast_array real, dimension(nbcast_array) :: bcast_array integer :: proc_rec, tag_id integer, optional :: comm ! if (nbcast_array == 0) return ! call MPI_SEND(bcast_array, nbcast_array, mpi_precision, proc_rec, & tag_id, ioptest(comm,MPI_COMM_GRID), mpierr) ! endsubroutine mpisend_real_arr !*********************************************************************** subroutine mpisend_real_arr_assumed(bcast_array,nbcast_array,offset,proc_rec,tag_id,comm) ! ! Sends nbcast_array elements of real array bcast_array from position offset to other processor. ! Avoids compilation error when shapes of bcast_array and actual parameter do not agree. ! ! 06-oct-22/MR: coded ! real, dimension(*) :: bcast_array integer(KIND=ikind8) :: offset integer :: nbcast_array,proc_rec,tag_id integer, optional :: comm ! if (nbcast_array == 0) return ! call MPI_SEND(bcast_array(offset), nbcast_array, mpi_precision, proc_rec, & tag_id, ioptest(comm,MPI_COMM_GRID), mpierr) ! endsubroutine mpisend_real_arr_assumed !*********************************************************************** subroutine mpisend_real_arr_huge(array,len_array,partner,tag,comm) ! ! Allows to communicate arrays with length > max_int. ! 06-oct-22/MR: coded ! real, dimension(*) :: array integer(KIND=ikind8) :: len_array integer :: partner,tag integer, optional :: comm ! integer :: mult,res if (len_array == 0) return mult=len_array/max_int res =len_array-mult*max_int if (mult>0) & call MPI_SEND (array, mult, REAL_ARR_MAXSIZE, partner, tag, MPI_COMM_GRID, mpierr) if (res>0) & call MPI_SEND(array(len_array-res+1), res, mpi_precision, partner, tag+1, & ioptest(comm,MPI_COMM_GRID), mpierr) endsubroutine mpisend_real_arr_huge !*********************************************************************** subroutine mpirecv_real_arr_huge(array,len_array,partner,tag,comm) ! ! Allows to communicate arrays with length > max_int. ! 06-oct-22/MR: coded ! real, dimension(*) :: array integer(KIND=ikind8) :: len_array integer :: partner,tag integer, optional :: comm ! integer :: mult,res integer, dimension(MPI_STATUS_SIZE) :: stat if (len_array == 0) return mult=len_array/max_int res =len_array-mult*max_int if (mult>0) & call MPI_RECV(array, mult, REAL_ARR_MAXSIZE, partner, tag, MPI_COMM_GRID, stat, mpierr) if (res>0) & call MPI_RECV(array(len_array-res+1), res, mpi_precision, partner, tag+1, & ioptest(comm,MPI_COMM_GRID),stat,mpierr) endsubroutine mpirecv_real_arr_huge !*********************************************************************** subroutine mpirecv_real_arr_assumed(bcast_array,nbcast_array,offset,proc_rec,tag_id,comm) ! ! Receives nbcast_array elements of real array bcast_array at position offset from other processor. ! Avoids compilation error when shapes of bcast_array and actual parameter do not agree. ! ! 06-oct-22/MR: coded ! real, dimension(*) :: bcast_array integer(KIND=ikind8) :: offset integer :: nbcast_array,proc_rec,tag_id integer, optional :: comm ! integer, dimension(MPI_STATUS_SIZE) :: stat ! if (nbcast_array == 0) return ! call MPI_RECV(bcast_array(offset), nbcast_array, mpi_precision, proc_rec, & tag_id, ioptest(comm,MPI_COMM_GRID), stat, mpierr) ! endsubroutine mpirecv_real_arr_assumed !*********************************************************************** subroutine mpisend_real_arr2(bcast_array,nbcast_array,proc_rec,tag_id,comm) ! ! Send real array(:,:) to other processor. ! ! 02-jul-05/anders: coded ! integer, dimension(2) :: nbcast_array real, dimension(nbcast_array(1),nbcast_array(2)) :: bcast_array integer :: proc_rec, tag_id, num_elements integer, optional :: comm ! if (any(nbcast_array == 0)) return ! num_elements = product(nbcast_array) call MPI_SEND(bcast_array, num_elements, mpi_precision, proc_rec, & tag_id, ioptest(comm,MPI_COMM_GRID),mpierr) ! endsubroutine mpisend_real_arr2 !*********************************************************************** subroutine mpisend_real_arr3(bcast_array,nbcast_array,proc_rec,tag_id,comm,nonblock) ! ! Send real array(:,:,:) to other processor. ! If present, nonblock is the request id for non-blockin send. ! ! 20-may-06/anders: adapted ! integer, dimension(3) :: nbcast_array real, dimension(nbcast_array(1),nbcast_array(2),nbcast_array(3)) :: bcast_array integer :: proc_rec, tag_id, num_elements integer, optional :: comm,nonblock ! if (any(nbcast_array == 0)) return ! num_elements = product(nbcast_array) if (present(nonblock)) then call MPI_ISEND(bcast_array, num_elements, mpi_precision, proc_rec, & tag_id,ioptest(comm,MPI_COMM_GRID),nonblock, mpierr) else call MPI_SEND(bcast_array, num_elements, mpi_precision, proc_rec, & tag_id,ioptest(comm,MPI_COMM_GRID),mpierr) endif ! endsubroutine mpisend_real_arr3 !*********************************************************************** subroutine mpisend_cmplx_arr3(bcast_array,nbcast_array,proc_rec,tag_id,comm,nonblock) ! ! Send real array(:,:,:) to other processor. ! If present, nonblock is the request id for non-blockin send. ! ! 20-may-06/anders: adapted ! integer, dimension(3) :: nbcast_array complex, dimension(nbcast_array(1),nbcast_array(2),nbcast_array(3)) :: bcast_array integer :: proc_rec, tag_id, num_elements integer, optional :: comm,nonblock ! if (any(nbcast_array == 0)) return ! num_elements = product(nbcast_array) if (present(nonblock)) then call MPI_ISEND(bcast_array, num_elements, MPI_CMPLX, proc_rec, & tag_id,ioptest(comm,MPI_COMM_GRID),nonblock, mpierr) else call MPI_SEND(bcast_array, num_elements, MPI_CMPLX, proc_rec, & tag_id,ioptest(comm,MPI_COMM_GRID),mpierr) endif ! endsubroutine mpisend_cmplx_arr3 !*********************************************************************** subroutine mpisend_real_arr4(bcast_array,nbcast_array,proc_rec,tag_id,comm) ! ! Send real array(:,:,:,:) to other processor. ! ! 20-may-06/anders: adapted ! integer, dimension(4) :: nbcast_array real, dimension(nbcast_array(1),nbcast_array(2),nbcast_array(3),nbcast_array(4)) :: bcast_array integer :: proc_rec, tag_id, num_elements integer, optional :: comm ! if (any(nbcast_array == 0)) return ! num_elements = product(nbcast_array) call MPI_SEND(bcast_array, num_elements, mpi_precision, proc_rec, & tag_id, ioptest(comm,MPI_COMM_GRID),mpierr) ! endsubroutine mpisend_real_arr4 !*********************************************************************** subroutine mpisend_real_arr5(bcast_array,nbcast_array,proc_rec,tag_id) ! ! Send real array(:,:,:,:) to other processor. ! ! 20-may-06/anders: adapted ! integer, dimension(5) :: nbcast_array real, dimension(nbcast_array(1),nbcast_array(2),nbcast_array(3),nbcast_array(4),nbcast_array(5)) :: bcast_array integer :: proc_rec, tag_id, num_elements ! if (any(nbcast_array == 0)) return ! num_elements = product(nbcast_array) call MPI_SEND(bcast_array, num_elements, mpi_precision, proc_rec, & tag_id, MPI_COMM_GRID,mpierr) ! endsubroutine mpisend_real_arr5 !*********************************************************************** subroutine mpisendrecv_int_arr(send_array,sendcnt,proc_dest,sendtag, & recv_array,proc_src,recvtag,comm) integer :: sendcnt integer, dimension(sendcnt) :: send_array, recv_array integer :: proc_src, proc_dest, sendtag, recvtag integer, optional :: comm integer, dimension(MPI_STATUS_SIZE) :: stat intent(out) :: recv_array ! if (sendcnt==0) return call MPI_SENDRECV(send_array,sendcnt,MPI_INTEGER,proc_dest,sendtag, & recv_array,sendcnt,MPI_INTEGER,proc_src,recvtag, & ioptest(comm,MPI_COMM_GRID),stat,mpierr) ! endsubroutine mpisendrecv_int_arr !*********************************************************************** subroutine mpisendrecv_real_scl(send_array,proc_dest,sendtag, & recv_array,proc_src,recvtag) real :: send_array, recv_array integer :: proc_src, proc_dest, sendtag, recvtag integer, dimension(MPI_STATUS_SIZE) :: stat intent(out) :: recv_array call MPI_SENDRECV(send_array,1,mpi_precision,proc_dest,sendtag, & recv_array,1,mpi_precision,proc_src,recvtag, & MPI_COMM_GRID,stat,mpierr) endsubroutine mpisendrecv_real_scl !*********************************************************************** subroutine mpisendrecv_real_arr(send_array,sendcnt,proc_dest,sendtag, & recv_array,proc_src,recvtag,idir) integer :: sendcnt real, dimension(sendcnt) :: send_array real, dimension(sendcnt) :: recv_array integer :: proc_src, proc_dest, sendtag, recvtag integer, optional :: idir integer, dimension(MPI_STATUS_SIZE) :: stat intent(out) :: recv_array ! if (sendcnt==0) return call MPI_SENDRECV(send_array,sendcnt,mpi_precision,proc_dest,sendtag, & recv_array,sendcnt,mpi_precision,proc_src,recvtag, & mpigetcomm(ioptest(idir)),stat,mpierr) ! endsubroutine mpisendrecv_real_arr !*********************************************************************** subroutine mpisendrecv_real_arr2(send_array,nbcast_array,proc_dest,sendtag, & recv_array,proc_src,recvtag,idir) integer, dimension(2) :: nbcast_array real, dimension(nbcast_array(1),nbcast_array(2)) :: send_array real, dimension(nbcast_array(1),nbcast_array(2)) :: recv_array integer :: proc_src, proc_dest, sendtag, recvtag, num_elements integer, dimension(MPI_STATUS_SIZE) :: stat intent(out) :: recv_array integer, optional :: idir if (any(nbcast_array == 0)) return num_elements = product(nbcast_array) call MPI_SENDRECV(send_array,num_elements,mpi_precision,proc_dest,sendtag, & recv_array,num_elements,mpi_precision,proc_src,recvtag, & mpigetcomm(ioptest(idir)),stat,mpierr) endsubroutine mpisendrecv_real_arr2 !*********************************************************************** subroutine mpisendrecv_real_arr3(send_array,nbcast_array,proc_dest,sendtag, & recv_array,proc_src,recvtag) integer, dimension(3) :: nbcast_array real, dimension(nbcast_array(1),nbcast_array(2),nbcast_array(3)) :: send_array real, dimension(nbcast_array(1),nbcast_array(2),nbcast_array(3)) :: recv_array integer :: proc_src, proc_dest, sendtag, recvtag, num_elements integer, dimension(MPI_STATUS_SIZE) :: stat intent(out) :: recv_array if (any(nbcast_array == 0)) return num_elements = product(nbcast_array) call MPI_SENDRECV(send_array,num_elements,mpi_precision,proc_dest,sendtag, & recv_array,num_elements,mpi_precision,proc_src,recvtag, & MPI_COMM_GRID,stat,mpierr) endsubroutine mpisendrecv_real_arr3 !*********************************************************************** subroutine mpisendrecv_real_arr4(send_array,nbcast_array,proc_dest,sendtag, & recv_array,proc_src,recvtag) integer, dimension(4) :: nbcast_array real, dimension(nbcast_array(1),nbcast_array(2),nbcast_array(3),nbcast_array(4)) :: send_array real, dimension(nbcast_array(1),nbcast_array(2),nbcast_array(3),nbcast_array(4)) :: recv_array integer :: proc_src, proc_dest, sendtag, recvtag, num_elements integer, dimension(MPI_STATUS_SIZE) :: stat intent(out) :: recv_array if (any(nbcast_array == 0)) return num_elements = product(nbcast_array) call MPI_SENDRECV(send_array,num_elements,mpi_precision,proc_dest,sendtag, & recv_array,num_elements,mpi_precision,proc_src,recvtag, & MPI_COMM_GRID,stat,mpierr) endsubroutine mpisendrecv_real_arr4 !*********************************************************************** subroutine mpisend_int_scl(bcast_array,proc_rec,tag_id,comm) ! ! Send integer scalar to other processor. ! ! 02-jul-05/anders: coded ! integer :: bcast_array integer :: proc_rec, tag_id integer, optional :: comm ! call MPI_SEND(bcast_array, 1, MPI_INTEGER, proc_rec, & tag_id, ioptest(comm,MPI_COMM_GRID), mpierr) ! endsubroutine mpisend_int_scl !*********************************************************************** subroutine mpirecv_nonblock_int_scl(bcast_array,proc_src,tag_id,ireq,comm) ! ! Receive integer scalar from other processor, with non-blocking communication. ! ! 12-dec-14/wlad: adapted ! integer :: bcast_array integer :: proc_src, tag_id, ireq integer, optional :: comm ! call MPI_IRECV(bcast_array, 1, MPI_INTEGER, proc_src, & tag_id, ioptest(comm,MPI_COMM_GRID), ireq, mpierr) ! endsubroutine mpirecv_nonblock_int_scl !*********************************************************************** subroutine mpirecv_nonblock_int_arr(bcast_array,nbcast_array,proc_src,tag_id,ireq) ! ! Receive integer array from other processor, with non-blocking communication. ! ! 12-dec-14/wlad: adapted ! integer :: nbcast_array integer, dimension(nbcast_array) :: bcast_array integer :: proc_src, tag_id, ireq if (nbcast_array == 0) return ! call MPI_IRECV(bcast_array, nbcast_array, MPI_INTEGER, proc_src, & tag_id, MPI_COMM_GRID, ireq, mpierr) ! endsubroutine mpirecv_nonblock_int_arr !*********************************************************************** subroutine mpirecv_nonblock_int_arr2(bcast_array,nbcast_array,proc_src,tag_id,ireq) ! ! Receive integer array(:,:) from other processor, with non-blocking communication. ! ! 30-apr-17/Jorgen: adapted ! integer, dimension(2) :: nbcast_array integer, dimension(nbcast_array(1),nbcast_array(2)) :: bcast_array integer :: proc_src, tag_id, ireq, num_elements if (any(nbcast_array == 0)) return ! num_elements = product(nbcast_array) call MPI_IRECV(bcast_array, nbcast_array, MPI_INTEGER, proc_src, & tag_id, MPI_COMM_GRID, ireq, mpierr) ! endsubroutine mpirecv_nonblock_int_arr2 !*********************************************************************** subroutine mpirecv_nonblock_real_arr(bcast_array,nbcast_array,proc_src,tag_id,ireq) ! ! Receive real array from other processor, with non-blocking communication. ! ! 12-dec-14/wlad: adapted ! integer :: nbcast_array real, dimension(nbcast_array) :: bcast_array integer :: proc_src, tag_id, ireq ! intent(out) :: bcast_array ! if (nbcast_array == 0) return ! call MPI_IRECV(bcast_array, nbcast_array, mpi_precision, proc_src, & tag_id, MPI_COMM_GRID, ireq, mpierr) ! endsubroutine mpirecv_nonblock_real_arr !*********************************************************************** subroutine mpirecv_nonblock_real_arr2(bcast_array,nbcast_array,proc_src,tag_id,ireq) ! ! Receive real array(:,:) from other processor, with non-blocking communication. ! ! 12-dec-14/wlad: adapted ! integer, dimension(2) :: nbcast_array real, dimension(nbcast_array(1),nbcast_array(2)) :: bcast_array integer :: proc_src, tag_id, ireq, num_elements ! intent(out) :: bcast_array if (any(nbcast_array == 0)) return ! num_elements = product(nbcast_array) call MPI_IRECV(bcast_array, num_elements, mpi_precision, proc_src, & tag_id, MPI_COMM_GRID, ireq, mpierr) ! endsubroutine mpirecv_nonblock_real_arr2 !*********************************************************************** subroutine mpirecv_nonblock_real_arr3(bcast_array,nbcast_array,proc_src,tag_id,ireq,comm) ! ! Receive real array(:,:) from other processor, with non-blocking communication. ! ! 07-jul-17/Jorgen: adapted ! integer, dimension(3) :: nbcast_array real, dimension(nbcast_array(1),nbcast_array(2),nbcast_array(3)) :: bcast_array integer :: proc_src, tag_id, ireq, num_elements integer, optional :: comm ! intent(out) :: bcast_array if (any(nbcast_array == 0)) return ! num_elements = product(nbcast_array) call MPI_IRECV(bcast_array, num_elements, mpi_precision, proc_src, & tag_id, ioptest(comm,MPI_COMM_GRID), ireq, mpierr) ! endsubroutine mpirecv_nonblock_real_arr3 !*********************************************************************** subroutine mpirecv_nonblock_real_arr4(bcast_array,nbcast_array,proc_src,tag_id,ireq,comm) ! ! Receive real array(:,:,:,:) from other processor, with non-blocking communication. ! ! 12-dec-14/wlad: adapted ! integer, dimension(4) :: nbcast_array real, dimension(nbcast_array(1),nbcast_array(2),nbcast_array(3),nbcast_array(4)) :: bcast_array integer :: proc_src, tag_id, ireq, num_elements integer, optional :: comm ! intent(out) :: bcast_array if (any(nbcast_array == 0)) return ! num_elements = product(nbcast_array) call MPI_IRECV(bcast_array, num_elements, mpi_precision, proc_src, & tag_id, ioptest(comm,MPI_COMM_GRID), ireq, mpierr) ! endsubroutine mpirecv_nonblock_real_arr4 !*********************************************************************** subroutine mpirecv_nonblock_real_arr5(bcast_array,nbcast_array,proc_src,tag_id,ireq) ! ! Receive real array(:,:,:,:) from other processor, with non-blocking communication. ! ! 24-apr-17/Jorgen: adapted ! integer, dimension(5) :: nbcast_array real, dimension(nbcast_array(1),nbcast_array(2),nbcast_array(3),nbcast_array(4),nbcast_array(5)) :: bcast_array integer :: proc_src, tag_id, ireq, num_elements ! intent(out) :: bcast_array if (any(nbcast_array == 0)) return ! num_elements = product(nbcast_array) call MPI_IRECV(bcast_array, num_elements, mpi_precision, proc_src, & tag_id, MPI_COMM_GRID, ireq, mpierr) ! endsubroutine mpirecv_nonblock_real_arr5 !*********************************************************************** subroutine mpisend_nonblock_real_arr(bcast_array,nbcast_array,proc_rec,tag_id,ireq) ! ! Send real array to other processor, with non-blocking communication. ! ! 12-dec-14/wlad: adapted ! integer :: nbcast_array real, dimension(nbcast_array) :: bcast_array integer :: proc_rec, tag_id, ireq ! if (nbcast_array == 0) return ! call MPI_ISEND(bcast_array, nbcast_array, mpi_precision, proc_rec, & tag_id, MPI_COMM_GRID, ireq, mpierr) ! endsubroutine mpisend_nonblock_real_arr !*********************************************************************** subroutine mpisend_nonblock_real_arr2(bcast_array,nbcast_array,proc_rec,tag_id,ireq) ! ! Send real array(:,:,:) to other processor, with non-blocking communication. ! ! 03-okt-17/Jorgen: adapted ! integer, dimension(2) :: nbcast_array real, dimension(nbcast_array(1),nbcast_array(2)) :: bcast_array integer :: proc_rec, tag_id, ireq, num_elements ! if (any(nbcast_array == 0)) return ! num_elements = product(nbcast_array) call MPI_ISEND(bcast_array, num_elements, mpi_precision, proc_rec, & tag_id, MPI_COMM_GRID,ireq,mpierr) ! endsubroutine mpisend_nonblock_real_arr2 !*********************************************************************** subroutine mpisend_nonblock_real_arr3(bcast_array,nbcast_array,proc_rec,tag_id,ireq) ! ! Send real array(:,:,:) to other processor, with non-blocking communication. ! ! 03-aug-17/Jorgen: adapted ! integer, dimension(3) :: nbcast_array real, dimension(nbcast_array(1),nbcast_array(2),nbcast_array(3)) :: bcast_array integer :: proc_rec, tag_id, ireq, num_elements ! if (any(nbcast_array == 0)) return ! num_elements = product(nbcast_array) call MPI_ISEND(bcast_array, num_elements, mpi_precision, proc_rec, & tag_id, MPI_COMM_GRID,ireq,mpierr) ! endsubroutine mpisend_nonblock_real_arr3 !*********************************************************************** subroutine mpisend_nonblock_real_arr4(bcast_array,nbcast_array,proc_rec,tag_id,ireq) ! ! Send real array(:,:,:,:) to other processor, with non-blocking communication. ! ! 12-dec-14/wlad: adapted ! integer, dimension(4) :: nbcast_array real, dimension(nbcast_array(1),nbcast_array(2),nbcast_array(3),nbcast_array(4)) :: bcast_array integer :: proc_rec, tag_id, ireq, num_elements ! if (any(nbcast_array == 0)) return ! num_elements = product(nbcast_array) call MPI_ISEND(bcast_array, num_elements, mpi_precision, proc_rec, & tag_id, MPI_COMM_GRID,ireq,mpierr) ! endsubroutine mpisend_nonblock_real_arr4 !*********************************************************************** subroutine mpisend_nonblock_real_arr5(bcast_array,nbcast_array,proc_rec,tag_id,ireq) ! ! Send real array(:,:,:,:) to other processor, with non-blocking communication. ! ! 24-apr-17/Jorgen: adapted ! integer, dimension(5) :: nbcast_array real, dimension(nbcast_array(1),nbcast_array(2),nbcast_array(3),nbcast_array(4),nbcast_array(5)) :: bcast_array integer :: proc_rec, tag_id, ireq, num_elements ! if (any(nbcast_array == 0)) return ! num_elements = product(nbcast_array) call MPI_ISEND(bcast_array, num_elements, mpi_precision, proc_rec, & tag_id, MPI_COMM_GRID,ireq,mpierr) ! endsubroutine mpisend_nonblock_real_arr5 !*********************************************************************** subroutine mpisend_nonblock_int_scl(bcast_array,proc_rec,tag_id,ireq) ! ! Send integer scalar to other processor, with non-blocking communication. ! ! 12-dec-14/wlad: adapted ! integer :: bcast_array integer :: proc_rec, tag_id, ireq ! call MPI_ISEND(bcast_array, 1, MPI_INTEGER, proc_rec, & tag_id, MPI_COMM_GRID, ireq, mpierr) ! endsubroutine mpisend_nonblock_int_scl !*********************************************************************** subroutine mpisend_nonblock_int_arr(bcast_array,nbcast_array,proc_rec,tag_id,ireq) ! ! Send integer array to other processor, with non-blocking communication. ! ! 12-dec-14/wlad: adapted ! integer :: nbcast_array integer, dimension(nbcast_array) :: bcast_array integer :: proc_rec, tag_id, ireq ! if (nbcast_array == 0) return ! call MPI_ISEND(bcast_array, nbcast_array, MPI_INTEGER, proc_rec, & tag_id, MPI_COMM_GRID, ireq, mpierr) ! endsubroutine mpisend_nonblock_int_arr !*********************************************************************** subroutine mpisend_nonblock_int_arr2(bcast_array,nbcast_array,proc_rec,tag_id,ireq) ! ! Send 2d integer array to other processor, with non-blocking communication. ! ! 18-apr-17/Jorgen: adapted ! integer, dimension(2) :: nbcast_array integer, dimension(nbcast_array(1),nbcast_array(2)) :: bcast_array integer :: proc_rec, tag_id, ireq, num_elements ! if (any(nbcast_array == 0)) return ! num_elements = product(nbcast_array) call MPI_ISEND(bcast_array, num_elements, MPI_INTEGER, proc_rec, & tag_id, MPI_COMM_GRID, ireq, mpierr) ! endsubroutine mpisend_nonblock_int_arr2 !*********************************************************************** subroutine mpisend_int_arr(bcast_array,nbcast_array,proc_rec,tag_id,comm,nonblock) ! ! Send integer array to other processor. ! If present, nonblock is the request id for non-blockin send. ! ! 02-jul-05/anders: coded ! integer :: nbcast_array integer, dimension(nbcast_array) :: bcast_array integer :: proc_rec, tag_id integer, optional :: comm,nonblock ! if (nbcast_array == 0) return ! if (present(nonblock)) then call MPI_ISEND(bcast_array, nbcast_array, MPI_INTEGER, proc_rec, & tag_id, ioptest(comm,MPI_COMM_GRID), nonblock, mpierr) else call MPI_SEND(bcast_array, nbcast_array, MPI_INTEGER, proc_rec, & tag_id, ioptest(comm,MPI_COMM_GRID), mpierr) endif ! endsubroutine mpisend_int_arr !*********************************************************************** subroutine mpisend_int_arr2(bcast_array,nbcast_array,proc_rec,tag_id) ! ! Send 2d integer array to other processor. ! ! 20-fev-08/wlad: adapted from mpisend_real_arr2 ! integer, dimension(2) :: nbcast_array integer, dimension(nbcast_array(1),nbcast_array(2)) :: bcast_array integer :: proc_rec, tag_id, num_elements ! if (any(nbcast_array == 0)) return ! num_elements = product(nbcast_array) call MPI_SEND(bcast_array, num_elements, MPI_INTEGER, proc_rec, & tag_id, MPI_COMM_GRID,mpierr) ! endsubroutine mpisend_int_arr2 !*********************************************************************** subroutine mpibcast_logical_scl(lbcast_array,proc,comm) ! ! Communicate logical scalar between processors. ! logical :: lbcast_array integer, optional :: proc,comm ! call MPI_BCAST(lbcast_array,1,MPI_LOGICAL,ioptest(proc,root), & ioptest(comm,MPI_COMM_GRID),mpierr) ! endsubroutine mpibcast_logical_scl !*********************************************************************** subroutine mpibcast_logical_arr(lbcast_array,nbcast_array,proc,comm) ! ! Communicate logical array between processors. ! integer :: nbcast_array logical, dimension (nbcast_array) :: lbcast_array integer, optional :: proc,comm ! if (nbcast_array == 0) return ! call MPI_BCAST(lbcast_array,nbcast_array,MPI_LOGICAL,ioptest(proc,root), & ioptest(comm,MPI_COMM_GRID),mpierr) ! endsubroutine mpibcast_logical_arr !*********************************************************************** subroutine mpibcast_logical_arr2(lbcast_array,nbcast_array,proc,comm) ! ! Communicate logical array(:,:) to other processor. ! ! 25-may-08/wlad: adapted ! integer, dimension(2) :: nbcast_array logical, dimension(nbcast_array(1),nbcast_array(2)) :: lbcast_array integer, optional :: proc,comm ! if (any(nbcast_array == 0)) return ! call MPI_BCAST(lbcast_array, product(nbcast_array), MPI_LOGICAL, ioptest(proc,root), & ioptest(comm,MPI_COMM_GRID),mpierr) ! endsubroutine mpibcast_logical_arr2 !*********************************************************************** subroutine mpibcast_int_scl(ibcast_array,proc,comm) ! ! Communicate integer scalar between processors. ! integer :: ibcast_array integer, optional :: proc, comm call MPI_BCAST(ibcast_array,1,MPI_INTEGER,ioptest(proc,root), & ioptest(comm,MPI_COMM_GRID),mpierr) ! endsubroutine mpibcast_int_scl !*********************************************************************** subroutine mpibcast_int_arr(ibcast_array,nbcast_array,proc,comm) ! ! Communicate integer array between processors. ! integer :: nbcast_array integer, dimension(nbcast_array) :: ibcast_array integer, optional :: proc,comm ! if (nbcast_array == 0) return ! call MPI_BCAST(ibcast_array,nbcast_array,MPI_INTEGER,ioptest(proc,root), & ioptest(comm,MPI_COMM_GRID),mpierr) ! endsubroutine mpibcast_int_arr !*********************************************************************** subroutine mpibcast_int_arr2(ibcast_array,nbcast_array,proc,comm) ! ! Communicate integer array(:,:) to other processors. ! ! 30-apr-17/Jorgen: adapted ! integer, dimension(2) :: nbcast_array integer, dimension(nbcast_array(1),nbcast_array(2)) :: ibcast_array integer, optional :: proc,comm integer :: num_elements ! if (any(nbcast_array == 0)) return ! num_elements = product(nbcast_array) ! call MPI_BCAST(ibcast_array,num_elements,MPI_INTEGER,ioptest(proc,root), & ioptest(comm,MPI_COMM_GRID),mpierr) ! endsubroutine mpibcast_int_arr2 !*********************************************************************** subroutine mpibcast_real_scl(bcast_array,proc,comm) ! ! Communicate real scalar between processors. ! real :: bcast_array integer, optional :: proc, comm call MPI_BCAST(bcast_array,1,mpi_precision,ioptest(proc,root), & ioptest(comm,MPI_COMM_GRID),mpierr) ! endsubroutine mpibcast_real_scl !*********************************************************************** subroutine mpibcast_real_arr(bcast_array,nbcast_array,proc,comm) ! ! Communicate real array between processors. ! integer :: nbcast_array real, dimension(nbcast_array) :: bcast_array integer, optional :: proc,comm ! if (nbcast_array == 0) return ! call MPI_BCAST(bcast_array,nbcast_array,mpi_precision,ioptest(proc,root), & ioptest(comm,MPI_COMM_GRID),mpierr) ! endsubroutine mpibcast_real_arr !*********************************************************************** subroutine mpibcast_real_arr2(bcast_array,nbcast_array,proc,comm) ! ! Communicate real array(:,:) to other processor. ! ! 25-feb-08/wlad: adapted ! integer, dimension(2) :: nbcast_array real, dimension(nbcast_array(1),nbcast_array(2)) :: bcast_array integer, optional :: proc,comm integer :: ibcast_proc ! integer :: num_elements ! if (any(nbcast_array == 0)) return ! if (present(proc)) then ibcast_proc=proc else ibcast_proc=root endif ! num_elements = product(nbcast_array) call MPI_BCAST(bcast_array, num_elements, mpi_precision, ibcast_proc, & ioptest(comm,MPI_COMM_GRID),mpierr) ! endsubroutine mpibcast_real_arr2 !*********************************************************************** subroutine mpibcast_real_arr3(bcast_array,nbcast_array,proc) ! ! Communicate real array(:,:,:) to other processor. ! ! 25-fev-08/wlad: adapted ! integer, dimension(3) :: nbcast_array real, dimension(nbcast_array(1),nbcast_array(2),nbcast_array(3)) :: bcast_array integer, optional :: proc integer :: ibcast_proc ! integer :: num_elements ! if (any(nbcast_array == 0)) return ! if (present(proc)) then ibcast_proc=proc else ibcast_proc=root endif ! num_elements = product(nbcast_array) call MPI_BCAST(bcast_array, num_elements, mpi_precision, ibcast_proc, & MPI_COMM_GRID,mpierr) ! endsubroutine mpibcast_real_arr3 !*********************************************************************** subroutine mpibcast_real_arr4(bcast_array,nbcast_array,proc) ! ! Communicate real array(:,:,:,:) to other processor. ! ! 21-dec-10/ccyang: adapted ! integer, dimension(4) :: nbcast_array real, dimension(nbcast_array(1),nbcast_array(2),nbcast_array(3),nbcast_array(4)) :: bcast_array integer, optional :: proc integer :: ibcast_proc, num_elements ! if (any(nbcast_array == 0)) return ! if (present(proc)) then ibcast_proc=proc else ibcast_proc=root endif ! num_elements = product(nbcast_array) call MPI_BCAST(bcast_array, num_elements, mpi_precision, ibcast_proc, & MPI_COMM_GRID,mpierr) ! endsubroutine mpibcast_real_arr4 !*********************************************************************** subroutine mpibcast_double_scl(bcast_array,proc,comm) ! ! Communicate real scalar between processors. ! real(KIND=rkind8) :: bcast_array integer, optional :: proc,comm ! call MPI_BCAST(bcast_array,1,MPI_DOUBLE_PRECISION,ioptest(proc,root), & ioptest(comm,MPI_COMM_GRID),mpierr) ! endsubroutine mpibcast_double_scl !*********************************************************************** subroutine mpibcast_double_arr(bcast_array,nbcast_array,proc) ! ! Communicate real array between processors. ! integer :: nbcast_array real(KIND=rkind8), dimension(nbcast_array) :: bcast_array integer, optional :: proc integer :: ibcast_proc ! if (nbcast_array == 0) return ! if (present(proc)) then ibcast_proc=proc else ibcast_proc=root endif ! call MPI_BCAST(bcast_array,nbcast_array,MPI_DOUBLE_PRECISION,ibcast_proc, & MPI_COMM_GRID,mpierr) ! endsubroutine mpibcast_double_arr !*********************************************************************** subroutine mpibcast_char_scl(cbcast_array,proc,comm) ! ! Communicate character scalar between processors. ! character(LEN=*) :: cbcast_array integer, optional :: proc,comm ! call MPI_BCAST(cbcast_array,len(cbcast_array),MPI_CHARACTER,ioptest(proc,root), & ioptest(comm,MPI_COMM_GRID),mpierr) ! endsubroutine mpibcast_char_scl !*********************************************************************** subroutine mpibcast_char_arr(cbcast_array,nbcast_array,proc,comm) ! ! Communicate character array between processors. ! integer :: nbcast_array ! character(LEN=*), dimension(nbcast_array) :: cbcast_array integer, optional :: proc,comm ! if (nbcast_array == 0) return ! call MPI_BCAST(cbcast_array,len(cbcast_array(1))*nbcast_array,MPI_CHARACTER, & ioptest(proc,root),ioptest(comm,MPI_COMM_GRID),mpierr) ! endsubroutine mpibcast_char_arr !*********************************************************************** subroutine mpibcast_cmplx_arr(bcast_array,nbcast_array,proc) ! ! Communicate real array between processors. ! integer :: nbcast_array complex, dimension(nbcast_array) :: bcast_array integer, optional :: proc integer :: ibcast_proc ! if (nbcast_array == 0) return ! if (present(proc)) then ibcast_proc=proc else ibcast_proc=root endif ! call MPI_BCAST(bcast_array,nbcast_array,MPI_CMPLX,ibcast_proc, & MPI_COMM_GRID,mpierr) ! endsubroutine mpibcast_cmplx_arr !*********************************************************************** subroutine mpibcast_cmplx_arr_dbl(bcast_array,nbcast_array,proc) ! ! Communicate real array between processors. ! integer :: nbcast_array complex(KIND=rkind8), dimension(nbcast_array) :: bcast_array integer, optional :: proc integer :: ibcast_proc ! if (nbcast_array == 0) return ! if (present(proc)) then ibcast_proc=proc else ibcast_proc=root endif ! call MPI_BCAST(bcast_array,nbcast_array,MPI_DOUBLE_COMPLEX,ibcast_proc, & MPI_COMM_GRID,mpierr) ! endsubroutine mpibcast_cmplx_arr_dbl !*********************************************************************** subroutine mpibcast_cmplx_arr_sgl(bcast_array,nbcast_array,proc) ! ! Communicate real array between processors. ! integer :: nbcast_array complex(KIND=rkind4), dimension(nbcast_array) :: bcast_array integer, optional :: proc integer :: ibcast_proc ! if (nbcast_array == 0) return ! if (present(proc)) then ibcast_proc=proc else ibcast_proc=root endif ! call MPI_BCAST(bcast_array,nbcast_array,MPI_COMPLEX,ibcast_proc, & MPI_COMM_GRID,mpierr) ! endsubroutine mpibcast_cmplx_arr_sgl !*********************************************************************** subroutine mpiscatter_real_arr(src_array,dest_array,proc,comm) ! ! Scatter real 1D-array between processors of communicator comm from rank proc. ! real, dimension(:) :: src_array, dest_array integer, optional :: proc,comm integer :: count,comm_,np comm_=ioptest(comm,MPI_COMM_PENCIL) call MPI_COMM_SIZE(comm_,np,mpierr) count=size(src_array)/np if (size(dest_array)1) return ! buf = 0 if (.not. lroot) then ! root starts, others wait for permission call MPI_RECV(buf,1,MPI_INTEGER,root,io_perm,MPI_COMM_GRID,status,mpierr) endif ! endsubroutine start_serialize !*********************************************************************** subroutine end_serialize ! ! Do block between start_serialize and end_serialize serially in iproc order. ! ! 19-nov-02/wolf: coded ! integer :: i,buf integer, dimension(MPI_STATUS_SIZE) :: status ! serial_level=serial_level-1 if (serial_level>=1) return if (serial_level<0) & call stop_it('end_serialize: too many end_serialize calls') ! buf = 0 if (lroot) then do i=1,ncpus-1 ! send permission, wait for success message call MPI_SEND(buf,1,MPI_INTEGER,i,io_perm,MPI_COMM_GRID,mpierr) call MPI_RECV(buf,1,MPI_INTEGER,i,io_succ,MPI_COMM_GRID,status,mpierr) enddo else ! tell root we're done call MPI_SEND(buf,1,MPI_INTEGER,root,io_succ,MPI_COMM_GRID,mpierr) endif ! endsubroutine end_serialize !*********************************************************************** subroutine mpibarrier(comm) ! ! Synchronize nodes. ! ! 23-jul-2002/wolf: coded ! integer, optional, intent(IN) :: comm ! call MPI_BARRIER(ioptest(comm,MPI_COMM_PENCIL), mpierr) ! endsubroutine mpibarrier !*********************************************************************** subroutine mpifinalize_min ! ! Minimal finalizing. ! ! 22-Sep-25/MR: coded ! call MPI_BARRIER(MPI_COMM_WORLD, mpierr) call MPI_FINALIZE(mpierr) endsubroutine mpifinalize_min !*********************************************************************** subroutine mpifinalize ! ! Send stop signal to foreign code. ! if (lforeign.and.lroot) & call MPI_SEND(.true.,1,MPI_LOGICAL,frgn_setup%root,tag_foreign,MPI_COMM_WORLD,mpierr) call MPI_TYPE_FREE(REAL_ARR_MAXSIZE, mpierr) call MPI_BARRIER(MPI_COMM_WORLD, mpierr) call MPI_FINALIZE(mpierr) ! if (lforeign) then if (allocated(frgn_setup%recv_req)) deallocate(frgn_setup%recv_req) if (allocated(frgn_setup%xgrid)) deallocate(frgn_setup%xgrid) if (allocated(frgn_setup%ygrid)) deallocate(frgn_setup%ygrid) if (allocated(frgn_setup%yind_rng)) deallocate(frgn_setup%yind_rng) endif ! endsubroutine mpifinalize !*********************************************************************** subroutine mpiabort ! ! Aborts the processes in PC (it is documented that in most systems this aborts all MPI processes) ! call MPI_ABORT(MPI_COMM_PENCIL,-1,mpierr) endsubroutine mpiabort !*********************************************************************** function mpiwtime() ! real(KIND=rkind8) :: mpiwtime real(KIND=rkind8) :: MPI_WTIME ! definition needed for mpicomm_ to work ! mpiwtime = MPI_WTIME() ! endfunction mpiwtime !*********************************************************************** function mpiwtick() ! real(KIND=rkind8) :: mpiwtick real(KIND=rkind8) :: MPI_WTICK ! definition needed for mpicomm_ to work ! mpiwtick = MPI_WTICK() ! endfunction mpiwtick !*********************************************************************** subroutine die_gracefully ! ! Stop having shutdown MPI neatly. ! With at least some MPI implementations, this only stops if all ! processors agree to call die_gracefully. ! ! 29-jun-05/tony: coded ! ! Tell the world something went wrong -- mpirun may not propagate ! an error status. ! use General, only: touch_file call touch_file('ERROR') ! ! mpifinalize may fail when called inside a parallel section need to be looked at further ! call mpifinalize if (lroot) then STOP 1 ! Return nonzero exit status else STOP endif ! endsubroutine die_gracefully !*********************************************************************** subroutine die_immediately ! ! Stop without shutting down MPI. ! For those MPI implementations, which only finalize when all ! processors agree to finalize. ! ! 29-jun-05/tony: coded ! ! Tell the world something went wrong -- mpirun may not propagate ! an error status. ! use General, only: touch_file call touch_file('ERROR') ! if (lroot) then STOP 2 ! Return nonzero exit status else STOP endif ! endsubroutine die_immediately !*********************************************************************** subroutine stop_fatal(msg,force) ! ! Print message and stop. If force, stop without shutting down MPI. ! ! 13-dez-10/Bourdin.KIS: coded ! character (len=*) :: msg logical, optional :: force ! logical :: immediately ! immediately = .false. if (present (force)) immediately = force ! if (lroot .or. immediately) write(*,'(A,A)') 'STOPPED FATAL: ', msg ! if (immediately) then call die_immediately else call die_gracefully endif ! endsubroutine stop_fatal !*********************************************************************** subroutine stop_it(msg,code) ! ! Print message and stop. ! With at least some MPI implementations, this only stops if all ! processors agree to call stop_it. To stop (collectively) if only one ! or a few processors find some condition, use stop_it_if_any. ! ! 6-nov-01/wolf: coded ! 4-nov-11/MR: optional parameter code added ! use General, only: itoa ! character (len=*) :: msg integer, optional :: code ! if (lroot) then if (present(code)) then write(*,'(A,A)') 'STOPPED: ', msg, '. CODE: '//trim(itoa(code)) elseif (msg /= '') then write(*,'(A,A)') 'STOPPED: ', msg else write(*,'(A)') 'STOPPED' endif endif ! call die_gracefully ! endsubroutine stop_it !*********************************************************************** subroutine stop_it_if_any(stop_flag,msg,code) ! ! Conditionally print message and stop. ! This works unilaterally, i.e. if STOP_FLAG is true on _any_ processor, ! we will all stop. The error message will be printed together with ! the MPI rank number, if the message is not empty. ! ! 22-nov-04/wolf: coded ! 15-Feb-2012/Bourdin.KIS: optional parameter code added ! logical :: stop_flag character (len=*) :: msg integer, optional :: code ! logical :: global_stop_flag, identical_stop_flag ! ! Get global OR of stop_flag and distribute it, so all processors agree ! on whether to call stop_it: ! call MPI_ALLREDUCE(stop_flag,global_stop_flag,1,MPI_LOGICAL, & MPI_LOR,MPI_COMM_PENCIL,mpierr) call MPI_ALLREDUCE(stop_flag,identical_stop_flag,1,MPI_LOGICAL, & MPI_LAND,MPI_COMM_PENCIL,mpierr) ! if (global_stop_flag) then if ((.not. lroot) .and. (.not. identical_stop_flag) .and. (msg/='')) & write(*,'(A,I8,A,A)') 'RANK ', iproc_world, ' STOPPED: ', msg call stop_it(msg, code) endif ! endsubroutine stop_it_if_any !*********************************************************************** subroutine check_emergency_brake ! ! Check the lemergency_brake flag and stop with any provided ! message if it is set. ! ! 29-jul-06/tony: coded ! logical :: global_stop_flag ! ! Get global OR of lemergency_brake and distribute it, so all ! processors agree on whether to call stop_it: ! call MPI_ALLREDUCE(lemergency_brake,global_stop_flag,1,MPI_LOGICAL, & MPI_LOR,MPI_COMM_PENCIL,mpierr) ! if (global_stop_flag) call stop_it( & "Emergency brake activated. Check for error messages above.") ! endsubroutine check_emergency_brake !*********************************************************************** subroutine transp_single_proc(a,var) ! ! Doing a transpose (dummy version for single processor). ! ! 5-sep-02/axel: adapted from version in mpicomm.f90 ! 5-sep-25/TP: copied from nompicomm.f90 here to enable the more general transposing ! for a single proc (don't have to have nxgrid == nzgrid) needed for 1d-tests/jeans-x ! real, dimension(nx,ny,nz) :: a real, dimension(:,:), allocatable :: tmp character :: var ! integer :: m, n, iy, ibox ! if (ip<10) print*, 'transp for single processor' ! ! Doing x-y transpose if var='y' ! if (var=='y') then if (nygrid/=1) then ! if (mod(nx,ny)/=0) then if (lroot) print*, 'transp: works only if nx is an integer '//& 'multiple of ny!' call stop_it('transp') endif ! allocate (tmp(nx,ny)) do n=1,nz do ibox=0,nx/nygrid-1 iy=ibox*ny tmp=transpose(a(iy+1:iy+ny,:,n)) a(iy+1:iy+ny,:,n)=tmp enddo enddo deallocate (tmp) ! endif ! ! Doing x-z transpose if var='z' ! elseif (var=='z') then if (nzgrid/=1) then ! if (nx/=nz) then if (lroot) print*, 'transp: works only for nx=nz!' call stop_it('transp') endif ! allocate (tmp(nx,nz)) do m=1,ny tmp=transpose(a(:,m,:)) a(:,m,:)=tmp enddo deallocate (tmp) ! endif ! endif ! endsubroutine transp_single_proc !*********************************************************************** subroutine transp(a,var, comm, lsync) ! ! Doing the transpose of information distributed on several processors ! Used for doing FFTs in the y and z directions. ! This routine is presently restricted to the case nxgrid=nygrid (if var=y) ! and nygrid=nzgrid (if var=z) ! ! 03-sep-02/nils: coded ! 26-oct-02/axel: comments added ! 6-jun-03/axel: works now also in 2-D (need only nxgrid=nygrid) ! 5-oct-06/tobi: generalized to nxgrid = n*nygrid ! 25-sep-25/TP: added the easy case where we have one proc ! ! TODO: Implement nxgrid = n*nzgrid ! real, dimension(nx,ny,nz) :: a character :: var ! real, dimension(ny,ny,nz) :: send_buf_y, recv_buf_y real, dimension(nz,ny,nz) :: send_buf_z, recv_buf_z real, dimension(:,:), allocatable :: tmp integer, dimension(MPI_STATUS_SIZE) :: stat integer :: sendc_y,recvc_y,sendc_z,recvc_z,px integer :: ystag=111,yrtag=112,zstag=113,zrtag=114,partner integer :: m,n,ibox,ix integer, optional :: comm logical, optional :: lsync ! ! Doing x-y transpose if var='y' ! if (nprocx*nprocy*nprocz == 1) then call transp_single_proc(a,var) return endif if (loptest(lsync,.true.)) then !$omp barrier endif !$omp single if (var=='y') then ! if (mod(nxgrid,nygrid)/=0) then print*,'transp: nxgrid needs to be an integer multiple of '//& 'nygrid for var==y' call stop_it_if_any(.true.,'Inconsistency: mod(nxgrid,nygrid)/=0') endif ! ! Allocate temporary scratch array ! allocate (tmp(ny,ny)) ! ! Calculate the size of buffers. ! Buffers used for the y-transpose have the same size in y and z. ! sendc_y=ny*ny*nz; recvc_y=sendc_y ! ! Send information to different processors (x-y transpose) ! Divide x-range in as many intervals as we have processors in y. ! The index px counts through all of them; partner is the index of the ! processor we need to communicate with. Thus, px is the ipy of partner, ! but at the same time the x index of the given block. ! ! Example: ipy=1, ipz=0, then partner=0,2,3, ..., nprocy-1. ! ! ! ... | ! 3 | D E F / ! 2 | B C / F' ! ipy= 1 | A / C' E' ! 0 | / A' B' D' ! +-------------- ! px= 0 1 2 3 .. ! ! ! ipy ! ^ ! C D | ! A B | --> px ! ! if ipy=1,px=0, then exchange block A with A' on partner=0 ! if ipy=1,px=2, then exchange block C' with C on partner=2 ! ! if nxgrid is an integer multiple of nygrid, we divide the whole domain ! into nxgrid/nygrid boxes (indexed by ibox below) of unit aspect ratio ! (grid point wise) and only transpose within those boxes. ! ! The following communication patterns is kind of self-regulated. It ! avoids deadlock, because there will always be at least one matching ! pair of processors; the others will have to wait until their partner ! posts the corresponding request. ! Doing send and recv together for a given pair of processors ! (although in an order that avoids deadlocking) allows us to get away ! with only send_buf and recv_buf as buffers ! do px=0,nprocy-1 do ibox=0,nxgrid/nygrid-1 if (px/=ipy) then partner=px+ipz*nprocy ! = iproc + (px-ipy) if (ip<=6) print*,'transp: MPICOMM: ipy,ipz,px,partner=',ipy,ipz,px,partner ix=ibox*nprocy*ny+px*ny send_buf_y=a(ix+1:ix+ny,:,:) if (pxipy) then ! below diagonal: receive first, send then call MPI_RECV(recv_buf_y,recvc_y,mpi_precision,partner,ystag,MPI_COMM_GRID,stat,mpierr) call MPI_SEND(send_buf_y,sendc_y,mpi_precision,partner,yrtag,MPI_COMM_GRID,mpierr) endif a(ix+1:ix+ny,:,:)=recv_buf_y endif enddo enddo ! ! Transposing the received data (x-y transpose) ! Example: ! ! |12 13 | 14 15| | 6 7 | 14 15| | 3 7 | 11 15| ! | 8 9 | 10 11| | 2 3 | 10 11| | 2 6 | 10 14| ! |------+------| -> |------+------| -> |------+------| ! | 4 5 | 6 7| | 4 5 | 12 13| | 1 5 | 9 13| ! | 0 1 | 2 3| | 0 1 | 8 9| | 0 4 | 8 12| ! original 2x2 blocks each block ! transposed transposed ! do px=0,nprocy-1 do ibox=0,nxgrid/nygrid-1 ix=ibox*nprocy*ny+px*ny do n=1,nz tmp=transpose(a(ix+1:ix+ny,:,n)); a(ix+1:ix+ny,:,n)=tmp enddo enddo enddo ! ! Deallocate temporary scratch array ! deallocate (tmp) ! ! Doing x-z transpose if var='z' ! elseif (var=='z') then ! if (nzgrid/=nxgrid) then if (lroot) print*, 'transp: need to have nzgrid=nxgrid for var==z' call stop_it_if_any(.true.,'transp: inconsistency - nzgrid/=nxgrid') endif ! ! Calculate the size of buffers. ! Buffers used for the z-transpose have the same size in z and x. ! sendc_z=nz*ny*nz; recvc_z=sendc_z ! ! Allocate temporary scratch array ! allocate (tmp(nz,nz)) ! ! Send information to different processors (x-z transpose) ! See the discussion above for why we use this communication pattern do px=0,nprocz-1 if (px/=ipz) then partner=ipy+px*nprocy ! = iproc + (px-ipz)*nprocy send_buf_z=a(px*nz+1:(px+1)*nz,:,:) if (pxipz) then ! below diagonal: receive first, send then call MPI_RECV (recv_buf_z,recvc_z,mpi_precision,partner,zstag,MPI_COMM_GRID,stat,mpierr) call MPI_SEND(send_buf_z,sendc_z,mpi_precision,partner,zrtag,MPI_COMM_GRID,mpierr) endif a(px*nz+1:(px+1)*nz,:,:)=recv_buf_z endif enddo ! ! Transposing the received data (x-z transpose) ! do px=0,nprocz-1 do m=1,ny tmp=transpose(a(px*nz+1:(px+1)*nz,m,:)) a(px*nz+1:(px+1)*nz,m,:)=tmp enddo enddo ! else if (lroot) print*,'transp: No clue what var=', var, 'is supposed to mean' endif !$omp end single ! if (loptest(lsync,.true.)) then !$omp barrier endif endsubroutine transp !*********************************************************************** subroutine transp_xy(a,comm,lsync) ! ! Doing the transpose of information distributed on several processors. ! This routine transposes 2D arrays in x and y only. ! ! 6-oct-06/tobi: Adapted from transp ! ! TODO: Implement nygrid = n*nxgrid ! real, dimension(nx,ny), intent(inout) :: a ! real, dimension(ny,ny) :: send_buf_y, recv_buf_y, tmp integer, dimension(MPI_STATUS_SIZE) :: stat integer :: sendc_y,recvc_y,px integer :: ytag=101,partner integer :: ibox,iy integer, optional :: comm logical, optional :: lsync ! if (loptest(lsync,.true.)) then !$omp barrier endif !$omp single if (nprocx>1) then print*,'transp_xy: nprocx must be equal to 1' call stop_it_if_any(.true.,'Inconsistency: nprocx>1') endif ! if (mod(nxgrid,nygrid)/=0) then print*,'transp_xy: nxgrid needs to be an integer multiple of nygrid' call stop_it_if_any(.true.,'Inconsistency: mod(nxgrid,nygrid)/=0') endif ! ! ! Calculate the size of buffers. ! Buffers used for the y-transpose have the same size in y and z. ! sendc_y=ny**2 recvc_y=ny**2 ! ! Send information to different processors (x-y transpose) ! Divide x-range in as many intervals as we have processors in y. ! The index px counts through all of them; partner is the index of the ! processor we need to communicate with. Thus, px is the ipy of partner, ! but at the same time the x index of the given block. ! ! Example: ipy=1, ipz=0, then partner=0,2,3, ..., nprocy-1. ! ! ! ... | ! 3 | D E F / ! 2 | B C / F' ! ipy= 1 | A / C' E' ! 0 | / A' B' D' ! +-------------- ! px= 0 1 2 3 .. ! ! ! ipy ! ^ ! C D | ! A B | --> px ! ! if ipy=1,px=0, then exchange block A with A' on partner=0 ! if ipy=1,px=2, then exchange block C' with C on partner=2 ! ! if nxgrid is an integer multiple of nygrid, we divide the whole domain ! into nxgrid/nygrid boxes (indexed by ibox below) of unit aspect ratio ! (grid point wise) and only transpose within those boxes. ! ! The following communication patterns is kind of self-regulated. It ! avoids deadlocks, because there will always be at least one matching ! pair of processors; the others will have to wait until their partner ! posts the corresponding request. ! Doing send and recv together for a given pair of processors ! (although in an order that avoids deadlocking) allows us to get away ! with only send_buf and recv_buf as buffers ! do px=0,nprocy-1 do ibox=0,nxgrid/nygrid-1 if (px/=ipy) then partner=px+ipz*nprocy ! = iproc + (px-ipy) iy=(ibox*nprocy+px)*ny send_buf_y=a(iy+1:iy+ny,:) if (pxipy) then ! below diagonal: receive first, send then call MPI_RECV(recv_buf_y,recvc_y,mpi_precision,partner,ytag,ioptest(comm,MPI_COMM_GRID),stat,mpierr) call MPI_SEND(send_buf_y,sendc_y,mpi_precision,partner,ytag,ioptest(comm,MPI_COMM_GRID),mpierr) endif a(iy+1:iy+ny,:)=recv_buf_y endif enddo enddo ! ! Transposing the received data (x-y transpose) ! Example: ! ! |12 13 | 14 15| | 6 7 | 14 15| | 3 7 | 11 15| ! | 8 9 | 10 11| | 2 3 | 10 11| | 2 6 | 10 14| ! |------+------| -> |------+------| -> |------+------| ! | 4 5 | 6 7| | 4 5 | 12 13| | 1 5 | 9 13| ! | 0 1 | 2 3| | 0 1 | 8 9| | 0 4 | 8 12| ! original 2x2 blocks each block ! transposed transposed ! do px=0,nprocy-1 do ibox=0,nxgrid/nygrid-1 iy=(ibox*nprocy+px)*ny tmp=transpose(a(iy+1:iy+ny,:)); a(iy+1:iy+ny,:)=tmp enddo enddo !$omp end single if (loptest(lsync,.true.)) then !$omp barrier endif ! endsubroutine transp_xy !*********************************************************************** subroutine transp_xy_other(a,comm,lsync) ! ! Doing the transpose of information distributed on several processors. ! This routine transposes 2D arrays of arbitrary size in x and y only. ! ! 6-oct-06/tobi: Adapted from transp ! ! TODO: Implement nygrid = n*nxgrid ! real, dimension(:,:), intent(inout) :: a ! real, dimension(size(a,2),size(a,2)) :: send_buf_y, recv_buf_y, tmp integer, dimension(MPI_STATUS_SIZE) :: stat integer :: sendc_y,recvc_y,px integer :: ytag=101,partner integer :: ibox,iy,nx_other,ny_other integer :: nxgrid_other,nygrid_other integer, optional :: comm logical, optional :: lsync ! if (loptest(lsync,.true.)) then !$omp barrier endif !$omp single nx_other=size(a,1); ny_other=size(a,2) nxgrid_other=nx_other nygrid_other=ny_other*nprocy ! if (mod(nxgrid_other,nygrid_other)/=0) then print*,'transp: nxgrid_other needs to be an integer multiple of '//& 'nygrid_other for var==y' call stop_it_if_any(.true.,'Inconsistency: mod(nxgrid_other,nygrid_other)/=0') endif ! ! Calculate the size of buffers. ! Buffers used for the y-transpose have the same size in y and z. ! sendc_y=ny_other**2 recvc_y=ny_other**2 ! ! Send information to different processors (x-y transpose) ! Divide x-range in as many intervals as we have processors in y. ! The index px counts through all of them; partner is the index of the ! processor we need to communicate with. Thus, px is the ipy of partner, ! but at the same time the x index of the given block. ! ! Example: ipy=1, ipz=0, then partner=0,2,3, ..., nprocy-1. ! ! ! ... | ! 3 | D E F / ! 2 | B C / F' ! ipy= 1 | A / C' E' ! 0 | / A' B' D' ! +-------------- ! px= 0 1 2 3 .. ! ! ! ipy ! ^ ! C D | ! A B | --> px ! ! if ipy=1,px=0, then exchange block A with A' on partner=0 ! if ipy=1,px=2, then exchange block C' with C on partner=2 ! ! if nxgrid is an integer multiple of nygrid, we divide the whole domain ! into nxgrid/nygrid boxes (indexed by ibox below) of unit aspect ratio ! (grid point wise) and only transpose within those boxes. ! ! The following communication patterns is kind of self-regulated. It ! avoids deadlock, because there will always be at least one matching ! pair of processors; the others will have to wait until their partner ! posts the corresponding request. ! Doing send and recv together for a given pair of processors ! (although in an order that avoids deadlocking) allows us to get away ! with only send_buf and recv_buf as buffers ! do px=0,nprocy-1 do ibox=0,nxgrid_other/nygrid_other-1 if (px/=ipy) then partner=px+ipz*nprocy ! = iproc + (px-ipy) iy=(ibox*nprocy+px)*ny_other send_buf_y=a(iy+1:iy+ny_other,:) if (pxipy) then ! below diagonal: receive first, send then call MPI_RECV(recv_buf_y,recvc_y,mpi_precision,partner,ytag,ioptest(comm,MPI_COMM_GRID),stat,mpierr) call MPI_SEND(send_buf_y,sendc_y,mpi_precision,partner,ytag,ioptest(comm,MPI_COMM_GRID),mpierr) endif a(iy+1:iy+ny_other,:)=recv_buf_y endif enddo enddo ! ! Transposing the received data (x-y transpose) ! Example: ! ! |12 13 | 14 15| | 6 7 | 14 15| | 3 7 | 11 15| ! | 8 9 | 10 11| | 2 3 | 10 11| | 2 6 | 10 14| ! |------+------| -> |------+------| -> |------+------| ! | 4 5 | 6 7| | 4 5 | 12 13| | 1 5 | 9 13| ! | 0 1 | 2 3| | 0 1 | 8 9| | 0 4 | 8 12| ! original 2x2 blocks each block ! transposed transposed ! do px=0,nprocy-1 do ibox=0,nxgrid_other/nygrid_other-1 iy=(ibox*nprocy+px)*ny_other tmp=transpose(a(iy+1:iy+ny_other,:)); a(iy+1:iy+ny_other,:)=tmp enddo enddo !$omp end single if (loptest(lsync,.true.)) then !$omp barrier endif ! endsubroutine transp_xy_other !*********************************************************************** subroutine transp_other(a,var,comm,lsync) ! ! Doing the transpose of information distributed on several processors. ! This routine transposes 3D arrays but is presently restricted to the ! case nxgrid=nygrid (if var=y) and nygrid=nzgrid (if var=z) ! ! 08-may-08/wlad: Adapted from transp ! ! TODO: Implement nxgrid = n*nzgrid ! real, dimension(:,:,:), intent(inout) :: a character :: var ! real, dimension(size(a,2),size(a,2),size(a,3)) :: send_buf_y, recv_buf_y real, dimension(size(a,3),size(a,2),size(a,3)) :: send_buf_z, recv_buf_z real, dimension(:,:), allocatable :: tmp integer, dimension(MPI_STATUS_SIZE) :: stat integer :: sendc_y,recvc_y,sendc_z,recvc_z,px integer :: ytag=101,ztag=202,partner integer :: m,n,ibox,ix,nx_other,ny_other,nz_other integer :: nxgrid_other,nygrid_other,nzgrid_other integer, optional :: comm logical, optional :: lsync ! if (loptest(lsync,.true.)) then !$omp barrier endif !$omp single nx_other=size(a,1); ny_other=size(a,2) ; nz_other=size(a,3) nxgrid_other=nx_other nygrid_other=ny_other*nprocy nzgrid_other=nz_other*nprocz ! if (var=='y') then ! if (mod(nxgrid_other,nygrid_other)/=0) then print*,'transp: nxgrid_other needs to be an integer multiple of '//& 'nygrid_other for var==y' call stop_it_if_any(.true.,'Inconsistency: mod(nxgrid_other,nygrid_other)/=0') endif ! ! Allocate temporary scratch array ! allocate (tmp(ny_other,ny_other)) ! ! Calculate the size of buffers. ! Buffers used for the y-transpose have the same size in y and z. ! sendc_y=ny_other**2*nz_other ; recvc_y=sendc_y ! ! Send information to different processors (x-y transpose) ! Divide x-range in as many intervals as we have processors in y. ! The index px counts through all of them; partner is the index of the ! processor we need to communicate with. Thus, px is the ipy of partner, ! but at the same time the x index of the given block. ! ! Example: ipy=1, ipz=0, then partner=0,2,3, ..., nprocy-1. ! ! ! ... | ! 3 | D E F / ! 2 | B C / F' ! ipy= 1 | A / C' E' ! 0 | / A' B' D' ! +-------------- ! px= 0 1 2 3 .. ! ! ! ipy ! ^ ! C D | ! A B | --> px ! ! if ipy=1,px=0, then exchange block A with A' on partner=0 ! if ipy=1,px=2, then exchange block C' with C on partner=2 ! ! if nxgrid is an integer multiple of nygrid, we divide the whole domain ! into nxgrid/nygrid boxes (indexed by ibox below) of unit aspect ratio ! (grid point wise) and only transpose within those boxes. ! ! The following communication patterns is kind of self-regulated. It ! avoids deadlock, because there will always be at least one matching ! pair of processors; the others will have to wait until their partner ! posts the corresponding request. ! Doing send and recv together for a given pair of processors ! (although in an order that avoids deadlocking) allows us to get away ! with only send_buf and recv_buf as buffers ! do px=0,nprocy-1 do ibox=0,nxgrid_other/nygrid_other-1 if (px/=ipy) then partner=px+ipz*nprocy ! = iproc + (px-ipy) ix=(ibox*nprocy+px)*ny_other send_buf_y=a(ix+1:ix+ny_other,:,:) if (pxipy) then ! below diagonal: receive first, send then call MPI_RECV(recv_buf_y,recvc_y,mpi_precision,partner,ytag,ioptest(comm,MPI_COMM_GRID),stat,mpierr) call MPI_SEND(send_buf_y,sendc_y,mpi_precision,partner,ytag,ioptest(comm,MPI_COMM_GRID),mpierr) endif a(ix+1:ix+ny_other,:,:)=recv_buf_y endif enddo enddo ! ! Transposing the received data (x-y transpose) ! Example: ! ! |12 13 | 14 15| | 6 7 | 14 15| | 3 7 | 11 15| ! | 8 9 | 10 11| | 2 3 | 10 11| | 2 6 | 10 14| ! |------+------| -> |------+------| -> |------+------| ! | 4 5 | 6 7| | 4 5 | 12 13| | 1 5 | 9 13| ! | 0 1 | 2 3| | 0 1 | 8 9| | 0 4 | 8 12| ! original 2x2 blocks each block ! transposed transposed ! do px=0,nprocy-1 do ibox=0,nxgrid_other/nygrid_other-1 ix=(ibox*nprocy+px)*ny_other do n=1,nz tmp=transpose(a(ix+1:ix+ny_other,:,n)); a(ix+1:ix+ny_other,:,n)=tmp enddo enddo enddo ! ! Deallocate temporary scratch array ! deallocate (tmp) ! ! Doing x-z transpose if var='z' ! elseif (var=='z') then ! if (nzgrid_other/=nxgrid_other) then if (lroot) print*, 'transp_other: need to have '//& 'nzgrid_other=nxgrid_other for var==z' call stop_it_if_any(.true.,'transp_other: inconsistency - nzgrid/=nxgrid') endif ! ! Calculate the size of buffers. ! Buffers used for the z-transpose have the same size in z and x. ! sendc_z=nz_other**2*ny_other; recvc_z=sendc_z ! ! Allocate temporary scratch array ! allocate (tmp(nz_other,nz_other)) ! ! Send information to different processors (x-z transpose) ! See the discussion above for why we use this communication pattern do px=0,nprocz-1 if (px/=ipz) then partner=ipy+px*nprocy ! = iproc + (px-ipz)*nprocy send_buf_z=a(px*nz_other+1:(px+1)*nz_other,:,:) if (pxipz) then ! below diagonal: receive first, send then call MPI_RECV (recv_buf_z,recvc_z,mpi_precision,partner,ztag,ioptest(comm,MPI_COMM_GRID),stat,mpierr) call MPI_SEND(send_buf_z,sendc_z,mpi_precision,partner,ztag,ioptest(comm,MPI_COMM_GRID),mpierr) endif a(px*nz_other+1:(px+1)*nz_other,:,:)=recv_buf_z endif enddo ! ! Transposing the received data (x-z transpose) ! do px=0,nprocz-1 do m=1,ny tmp=transpose(a(px*nz_other+1:(px+1)*nz_other,m,:)) a(px*nz_other+1:(px+1)*nz_other,m,:)=tmp enddo enddo ! else if (lroot) print*,'transp_other: No clue what var=', var, & 'is supposed to mean' endif !$omp end single if (loptest(lsync,.true.)) then !$omp barrier endif ! endsubroutine transp_other !*********************************************************************** subroutine transp_xz(a,b,comm,lsync) ! ! Doing the transpose of information distributed on several processors. ! This routine transposes 2D arrays in x and z only. ! ! 19-dec-06/anders: Adapted from transp ! integer, parameter :: nxt=nx/nprocz real, dimension(nx,nz), intent(in) :: a real, dimension(nzgrid,nxt), intent (out) :: b ! real, dimension(nxt,nz) :: buf integer, dimension(MPI_STATUS_SIZE) :: stat integer :: sendc,px integer :: ztag=101,partner integer, optional :: comm logical, optional :: lsync ! if (loptest(lsync,.true.)) then !$omp barrier endif !$omp single if (mod(nxgrid,nprocz)/=0) then print*,'transp_xz: nxgrid needs to be an integer multiple of nprocz' call stop_it_if_any(.true.,'Inconsistency: mod(nxgrid,nprocz)/=0') endif ! ! Calculate the size of buffers. ! Buffers used for the y-transpose have the same size in y and z. ! sendc=nxt*nz ! ! Send information to different processors (x-z transpose) ! b(ipz*nz+1:(ipz+1)*nz,:)=transpose(a(ipz*nxt+1:(ipz+1)*nxt,:)) do px=0,nprocz-1 if (px/=ipz) then partner=ipy+px*nprocy ! = iproc + (px-ipz)*nprocy buf=a(px*nxt+1:(px+1)*nxt,:) call MPI_SENDRECV_REPLACE(buf,sendc,mpi_precision,partner,ztag,partner,ztag,ioptest(comm,MPI_COMM_GRID),stat,mpierr) b(px*nz+1:(px+1)*nz,:)=transpose(buf) endif enddo !$omp end single if (loptest(lsync,.true.)) then !$omp barrier endif ! endsubroutine transp_xz !*********************************************************************** subroutine transp_zx(a,b,comm,lsync) ! ! Doing the transpose of information distributed on several processors. ! This routine transposes 2D arrays in x and z only. ! ! 19-dec-06/anders: Adapted from transp ! integer, parameter :: nxt=nx/nprocz real, dimension(nzgrid,nxt), intent (in) :: a real, dimension(nx,nz), intent(out) :: b ! real, dimension(nz,nxt) :: buf integer, dimension(MPI_STATUS_SIZE) :: stat integer :: sendc,px integer :: ztag=101,partner integer, optional :: comm logical, optional :: lsync ! if (loptest(lsync,.true.)) then !$omp barrier endif !$omp single if (mod(nxgrid,nprocz)/=0) then print*,'transp_zx: nxgrid needs to be an integer multiple of nprocz' call stop_it_if_any(.true.,'Inconsistency: mod(nxgrid,nprocz)/=0') endif ! ! Calculate the size of buffers. ! Buffers used for the y-transpose have the same size in y and z. ! sendc=nz*nxt ! ! Send information to different processors (x-z transpose) ! b(ipz*nxt+1:(ipz+1)*nxt,:)=transpose(a(ipz*nz+1:(ipz+1)*nz,:)) do px=0,nprocz-1 if (px/=ipz) then partner=ipy+px*nprocy ! = iproc + (px-ipz)*nprocy buf=a(px*nz+1:(px+1)*nz,:) call MPI_SENDRECV_REPLACE(buf,sendc,mpi_precision,partner,ztag,partner,ztag,ioptest(comm,MPI_COMM_GRID),stat,mpierr) b(px*nxt+1:(px+1)*nxt,:)=transpose(buf) endif enddo !$omp end single if (loptest(lsync,.true.)) then !$omp barrier endif ! endsubroutine transp_zx !*********************************************************************** subroutine communicate_vect_field_ghosts(f,topbot,start_index,comm) ! ! Helper routine for communication of ghost cell values of a vector field. ! Needed by potential field extrapolations, which only compute nx*ny arrays. ! Can also be used for synchronization of changed uu values with ghost cells, ! if the start_index parameter set to iux (default is iax). ! ! 8-oct-2006/tobi: Coded ! 28-dec-2010/Bourdin.KIS: extended to work for any 3D vector field data. ! real, dimension (:,:,:,:), intent (inout) :: f integer, intent (in) :: topbot integer, intent(in), optional :: start_index ! real, dimension (nx,nghost,nghost+1,3) :: lbufyo,ubufyo,lbufyi,ubufyi real, dimension (nghost,size(f,2),nghost+1,3) :: lbufxo,ubufxo,lbufxi,ubufxi integer :: nbufx,nbufy,nn1,nn2,is,ie integer, optional :: comm ! is = iax if (present (start_index)) is = start_index ie = is + 2 ! nn1=-1; nn2=-1 select case (topbot) case (BOT); nn1=1; nn2=n1 case (TOP); nn1=n2; nn2=size(f,3) case default; call stop_it_if_any(.true.,"communicate_vect_field_ghosts: topbot should be BOT or TOP") end select ! ! Periodic boundaries in y -- communicate along y if necessary ! if (nprocy>1) then ! lbufyo = f(l1:l2, m1:m1i,nn1:nn2,is:ie) ubufyo = f(l1:l2,m2i:m2 ,nn1:nn2,is:ie) ! nbufy=nx*nghost*(nghost+1)*3 ! call MPI_IRECV(ubufyi,nbufy,mpi_precision,yuneigh,tolowy, & ioptest(comm,MPI_COMM_GRID),irecv_rq_fromuppy,mpierr) call MPI_IRECV(lbufyi,nbufy,mpi_precision,ylneigh,touppy, & ioptest(comm,MPI_COMM_GRID),irecv_rq_fromlowy,mpierr) call MPI_ISEND(lbufyo,nbufy,mpi_precision,ylneigh,tolowy, & ioptest(comm,MPI_COMM_GRID),isend_rq_tolowy,mpierr) call MPI_ISEND(ubufyo,nbufy,mpi_precision,yuneigh,touppy, & ioptest(comm,MPI_COMM_GRID),isend_rq_touppy,mpierr) ! call MPI_WAIT(irecv_rq_fromuppy,irecv_stat_fu,mpierr) call MPI_WAIT(irecv_rq_fromlowy,irecv_stat_fl,mpierr) ! f(l1:l2, 1:m1-1,nn1:nn2,is:ie) = lbufyi f(l1:l2,m2+1: ,nn1:nn2,is:ie) = ubufyi ! call MPI_WAIT(isend_rq_tolowy,isend_stat_tl,mpierr) call MPI_WAIT(isend_rq_touppy,isend_stat_tu,mpierr) ! else ! f(l1:l2, 1:m1-1,nn1:nn2,is:ie) = f(l1:l2,m2i:m2 ,nn1:nn2,is:ie) f(l1:l2,m2+1: ,nn1:nn2,is:ie) = f(l1:l2, m1:m1i,nn1:nn2,is:ie) ! endif ! ! Periodic boundaries in x ! if (nprocx>1) then ! lbufxo = f( l1:l1i,:,nn1:nn2,is:ie) ubufxo = f(l2i:l2 ,:,nn1:nn2,is:ie) ! nbufx=nghost*size(f,2)*(nghost+1)*3 ! call MPI_IRECV(ubufxi,nbufx,mpi_precision,xuneigh,tolowx, & ioptest(comm,MPI_COMM_GRID),irecv_rq_fromuppx,mpierr) call MPI_IRECV(lbufxi,nbufx,mpi_precision,xlneigh,touppx, & ioptest(comm,MPI_COMM_GRID),irecv_rq_fromlowx,mpierr) call MPI_ISEND(lbufxo,nbufx,mpi_precision,xlneigh,tolowx, & ioptest(comm,MPI_COMM_GRID),isend_rq_tolowx,mpierr) call MPI_ISEND(ubufxo,nbufx,mpi_precision,xuneigh,touppx, & ioptest(comm,MPI_COMM_GRID),isend_rq_touppx,mpierr) ! call MPI_WAIT(irecv_rq_fromuppx,irecv_stat_fu,mpierr) call MPI_WAIT(irecv_rq_fromlowx,irecv_stat_fl,mpierr) ! f( 1:l1-1,:,nn1:nn2,is:ie) = lbufxi f(l2+1: ,:,nn1:nn2,is:ie) = ubufxi ! call MPI_WAIT(isend_rq_tolowx,isend_stat_tl,mpierr) call MPI_WAIT(isend_rq_touppx,isend_stat_tu,mpierr) ! else ! f( 1:l1-1,:,nn1:nn2,is:ie) = f(l2i:l2 ,:,nn1:nn2,is:ie) f(l2+1: ,:,nn1:nn2,is:ie) = f( l1:l1i,:,nn1:nn2,is:ie) ! endif ! endsubroutine communicate_vect_field_ghosts !*********************************************************************** subroutine communicate_xy_ghosts(data,comm) ! ! Helper routine for communication of ghost cells in horizontal direction. ! ! 11-apr-2011/Bourdin.KIS: adapted from communicate_vect_field_ghosts. ! real, dimension (:,:), intent (inout) :: data ! real, dimension (nx,nghost) :: lbufyo,ubufyo,lbufyi,ubufyi real, dimension (nghost,size(data,2)) :: lbufxo,ubufxo,lbufxi,ubufxi integer, optional :: comm integer :: nbufx,nbufy ! ! Periodic boundaries in y -- communicate along y if necessary ! if (nprocy > 1) then ! lbufyo = data(l1:l2, m1:m1i) ubufyo = data(l1:l2,m2i:m2 ) ! nbufy = nx * nghost ! call MPI_IRECV (ubufyi, nbufy, mpi_precision, yuneigh, tolowy, & ioptest(comm,MPI_COMM_GRID), irecv_rq_fromuppy, mpierr) call MPI_IRECV (lbufyi, nbufy, mpi_precision, ylneigh, touppy, & ioptest(comm,MPI_COMM_GRID), irecv_rq_fromlowy, mpierr) call MPI_ISEND (lbufyo, nbufy, mpi_precision, ylneigh, tolowy, & ioptest(comm,MPI_COMM_GRID), isend_rq_tolowy, mpierr) call MPI_ISEND (ubufyo, nbufy, mpi_precision, yuneigh, touppy, & ioptest(comm,MPI_COMM_GRID), isend_rq_touppy, mpierr) ! call MPI_WAIT (irecv_rq_fromuppy, irecv_stat_fu, mpierr) call MPI_WAIT (irecv_rq_fromlowy, irecv_stat_fl, mpierr) ! data(l1:l2, 1:m1-1) = lbufyi data(l1:l2,m2+1: ) = ubufyi ! call MPI_WAIT (isend_rq_tolowy, isend_stat_tl, mpierr) call MPI_WAIT (isend_rq_touppy, isend_stat_tu, mpierr) ! else ! data(l1:l2, 1:m1-1) = data(l1:l2,m2i:m2 ) data(l1:l2,m2+1: ) = data(l1:l2, m1:m1i) ! endif ! ! Periodic boundaries in x ! if (nprocx > 1) then ! lbufxo = data( l1:l1i,:) ubufxo = data(l2i:l2 ,:) ! nbufx = nghost * size(data,2) ! call MPI_IRECV (ubufxi, nbufx, mpi_precision, xuneigh, tolowx, & ioptest(comm,MPI_COMM_GRID), irecv_rq_fromuppx, mpierr) call MPI_IRECV (lbufxi, nbufx, mpi_precision, xlneigh, touppx, & ioptest(comm,MPI_COMM_GRID), irecv_rq_fromlowx, mpierr) call MPI_ISEND (lbufxo, nbufx, mpi_precision, xlneigh, tolowx, & ioptest(comm,MPI_COMM_GRID), isend_rq_tolowx, mpierr) call MPI_ISEND (ubufxo, nbufx, mpi_precision, xuneigh, touppx, & ioptest(comm,MPI_COMM_GRID), isend_rq_touppx, mpierr) ! call MPI_WAIT (irecv_rq_fromuppx, irecv_stat_fu, mpierr) call MPI_WAIT (irecv_rq_fromlowx, irecv_stat_fl, mpierr) ! data( 1:l1-1,:) = lbufxi data(l2+1: ,:) = ubufxi ! call MPI_WAIT (isend_rq_tolowx, isend_stat_tl, mpierr) call MPI_WAIT (isend_rq_touppx, isend_stat_tu, mpierr) ! else ! data( 1:l1-1,:) = data(l2i:l2 ,:) data(l2+1: ,:) = data( l1:l1i,:) ! endif ! endsubroutine communicate_xy_ghosts !*********************************************************************** subroutine fill_zghostzones_3vec(vec,ivar,comm) ! ! Fills z-direction ghostzones of (mz,3)-array vec depending on the number of ! processors in z-direction. ! ! The three components of vec are supposed to be subject to the same ! z-boundary condiitons like the variables ! ivar, ivar+1, ivar+2 ! ! 18-oct-2009/MR: Coded ! real, dimension(mz,3), intent(inout) :: vec integer, intent(in) :: ivar ! integer :: nbuf, j real, dimension (nghost,3) :: lbufi,ubufi,lbufo,ubufo integer, optional :: comm ! if (nprocz>1) then ! lbufo = vec(n1:n1i,:) !!(lower z-zone) ubufo = vec(n2i:n2,:) !!(upper z-zone) ! nbuf=nghost*3 ! call MPI_IRECV(ubufi,nbuf,mpi_precision, & zuneigh,tolowz,ioptest(comm,MPI_COMM_GRID),irecv_rq_fromuppz,mpierr) call MPI_IRECV(lbufi,nbuf,mpi_precision, & zlneigh,touppz,ioptest(comm,MPI_COMM_GRID),irecv_rq_fromlowz,mpierr) ! call MPI_ISEND(lbufo,nbuf,mpi_precision, & zlneigh,tolowz,ioptest(comm,MPI_COMM_GRID),isend_rq_tolowz,mpierr) call MPI_ISEND(ubufo,nbuf,mpi_precision, & zuneigh,touppz,ioptest(comm,MPI_COMM_GRID),isend_rq_touppz,mpierr) ! call MPI_WAIT(irecv_rq_fromuppz,irecv_stat_fu,mpierr) call MPI_WAIT(irecv_rq_fromlowz,irecv_stat_fl,mpierr) ! do j=1,3 ! if (.not. lfirst_proc_z .or. bcz12(j-1+ivar,1)=='p') & vec(1:n1-1,j)=lbufi(:,j) ! ! Read from buffer in lower ghostzones. ! if (.not. llast_proc_z .or. bcz12(j-1+ivar,2)=='p') & vec(n2+1:mz,j)=ubufi(:,j) ! ! Read from buffer in upper ghostzones. ! enddo ! call MPI_WAIT(isend_rq_tolowz,isend_stat_tl,mpierr) call MPI_WAIT(isend_rq_touppz,isend_stat_tu,mpierr) ! else ! do j=1,3 if ( bcz12(ivar+j-1,1)=='p' ) then vec(1 :n1-1 ,j) = vec(n2i:n2 ,j) vec(n2+1:n2+nghost,j) = vec(n1 :n1i,j) endif enddo ! endif ! endsubroutine fill_zghostzones_3vec !*********************************************************************** subroutine sum_xy(in, out) ! ! Sum up 0D data in the xy-plane and distribute back the sum. ! This routine needs only to be called from all processors in an xy-plane. ! Several xy-planes can call this routine at once. ! ! 19-jan-2011/Bourdin.KIS: coded ! real, intent(in) :: in real, intent(out) :: out ! real :: buffer, sum integer :: px, py, partner integer, parameter :: tag=114 ! if (lfirst_proc_xy) then ! initialize sum with the local data sum = in ! collect and sum up the remote data do px = 0, nprocx-1 do py = 0, nprocy-1 partner = find_proc(px,py,ipz) if (iproc == partner) cycle call mpirecv_real (buffer, partner, tag) sum = sum + buffer enddo enddo else ! send data to collector call mpisend_real (in, find_proc(0,0,ipz), tag) sum = 0.0 endif ! ! distribute back the sum call distribute_xy (out, sum) ! endsubroutine sum_xy !*********************************************************************** subroutine distribute_xy_0D(out, in, source_proc,comm) ! ! This routine distributes a scalar on the source processor ! to all processors in the xy-plane. ! 'source_proc' is the iproc number relative to the first processor ! in the corresponding xy-plane (Default: 0, equals lfirst_proc_xy). ! ! 25-jan-2012/Bourdin.KIS: coded ! real, intent(out) :: out real, intent(in), optional :: in integer, intent(in), optional :: source_proc ! integer :: px, py, broadcaster, partner integer, parameter :: ytag=115 integer, dimension(MPI_STATUS_SIZE) :: stat integer, optional :: comm ! broadcaster = find_proc(ioptest(source_proc,0),0,ipz) ! if (iproc == broadcaster) then ! distribute the data do px = 0, nprocx-1 do py = 0, nprocy-1 partner = find_proc(px,py,ipz) if (iproc == partner) then ! data is local out = in else ! send to partner call MPI_SEND (in, 1, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), mpierr) endif enddo enddo else ! receive from broadcaster call MPI_RECV (out, 1, mpi_precision, broadcaster, ytag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) endif ! endsubroutine distribute_xy_0D !*********************************************************************** subroutine distribute_xy_2D(out, in, source_proc, comm) ! ! This routine divides a large array of 2D data on the source processor ! and distributes it to all processors in the xy-plane. ! 'source_proc' is the iproc number relative to the first processor ! in the corresponding xy-plane (Default: 0, equals lfirst_proc_xy). ! ! 08-jan-2011/Bourdin.KIS: coded ! real, dimension(:,:), intent(out) :: out real, dimension(:,:), intent(in), optional :: in integer, intent(in), optional :: source_proc integer, optional :: comm ! integer :: bnx, bny ! transfer box sizes integer :: px, py, broadcaster, partner, nbox integer, parameter :: ytag=115 integer, dimension(MPI_STATUS_SIZE) :: stat ! bnx = size (out, 1) bny = size (out, 2) nbox = bnx*bny ! broadcaster = find_proc(ioptest(source_proc,0),0,ipz) ! if (iproc == broadcaster) then ! distribute the data if (bnx * nprocx /= size (in, 1)) & call stop_fatal ('distribute_xy_2D: input x dim must be nprocx*output', .true.) if (bny * nprocy /= size (in, 2)) & call stop_fatal ('distribute_xy_2D: input y dim must be nprocy*output', .true.) ! do px = 0, nprocx-1 do py = 0, nprocy-1 partner = find_proc(px,py,ipz) if (iproc /= partner) then ! send to partner out = in(px*bnx+1:(px+1)*bnx,py*bny+1:(py+1)*bny) call MPI_SEND (out, nbox, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), mpierr) endif enddo enddo ! copy local data out = in(ipx*bnx+1:(ipx+1)*bnx,ipy*bny+1:(ipy+1)*bny) else ! receive from broadcaster call MPI_RECV (out, nbox, mpi_precision, broadcaster, ytag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) endif ! endsubroutine distribute_xy_2D !*********************************************************************** subroutine distribute_yz_3D(out, in, comm_) ! ! This routine divides a large array of 3D data on the source processor ! and distributes it to all processors in the yz-plane. ! ! 07-oct-2021/MR: coded ! real, dimension(:,:,:), intent(out):: out real, dimension(:,:,:), intent(in) :: in integer, optional :: comm_ integer :: comm ! integer :: bnx, bny, bnz ! transfer box sizes integer :: pz, py, partner, nbox integer, parameter :: ytag=115 integer, dimension(MPI_STATUS_SIZE) :: stat ! bnx = size (out, 1) bny = size (out, 2) bnz = size (out, 3) comm = ioptest(comm_,MPI_COMM_YZPLANE) ! if (lfirst_proc_yz) then if (bnx /= size (in, 1)) & call stop_fatal ('distribute_yz_3D: x dim must equal between in and out', .true.) if (bny * nprocy /= size (in, 2)) & call stop_fatal ('distribute_yz_3D: input y dim must be nprocy*output', .true.) if (bnz * nprocz /= size (in, 3)) & call stop_fatal ('distribute_yz_3D: input z dim must be nprocz*output', .true.) ! ! Distribute the data. ! nbox = bnx*bny*bnz do pz = 0, nprocz-1 do py = 0, nprocy-1 if (lprocz_slowest) then partner = py + nprocy*pz else partner = pz + nprocz*py endif if (partner==0) then ! copy local data on broadcaster out = in(:,1:bny,1:bnz) else ! send to partner out = in(:,py*bny+1:(py+1)*bny,pz*bnz+1:(pz+1)*bnz) call MPI_SEND (out, nbox, mpi_precision, partner, ytag, comm, mpierr) endif enddo enddo else ! receive from broadcaster call MPI_RECV (out, nbox, mpi_precision, 0, ytag, comm, stat, mpierr) endif ! endsubroutine distribute_yz_3D !*********************************************************************** subroutine distribute_yz_4D(out, in) ! ! This routine divides a large array of 4D data on the source processor ! and distributes it to all processors in the yz-plane. ! ! 07-oct-2021/MR: coded ! real, dimension(:,:,:,:), intent(out):: out real, dimension(:,:,:,:), intent(in) :: in ! integer :: bnx, bny, bnz, bnv ! transfer box sizes integer :: pz, py, partner, nbox integer, parameter :: ytag=115 integer, dimension(MPI_STATUS_SIZE) :: stat ! bnx = size (out, 1) bny = size (out, 2) bnz = size (out, 3) bnv = size (out, 4) ! if (lfirst_proc_yz) then if (bnx /= size (in, 1)) & call stop_fatal ('distribute_yz_3D: x dim must equal between in and out', .true.) if (bny * nprocy /= size (in, 2)) & call stop_fatal ('distribute_yz_3D: input y dim must be nprocy*output', .true.) if (bnz * nprocz /= size (in, 3)) & call stop_fatal ('distribute_yz_3D: input z dim must be nprocz*output', .true.) if (bnv /= size (in, 4)) & call stop_fatal ('distribute_yz_3D: var dim must equal between in and out', .true.) ! ! Distribute the data. ! nbox = bnx*bny*bnz*bnv do pz = 0, nprocz-1 do py = 0, nprocy-1 if (lprocz_slowest) then partner = py + nprocy*pz else partner = pz + nprocz*py endif if (partner==0) then ! copy local data on broadcaster out = in(:,1:bny,1:bnz,:) else ! send to partner out = in(:,py*bny+1:(py+1)*bny,pz*bnz+1:(pz+1)*bnz,:) call MPI_SEND (out, nbox, mpi_precision, partner, ytag, MPI_COMM_YZPLANE, mpierr) endif enddo enddo else ! receive from broadcaster call MPI_RECV (out, nbox, mpi_precision, 0, ytag, MPI_COMM_YZPLANE, stat, mpierr) endif ! endsubroutine distribute_yz_4D !*********************************************************************** subroutine distribute_xy_3D(out, in, source_proc, comm) ! ! This routine divides a large array of 3D data on the source processor ! and distributes it to all processors in the xy-plane. ! 'source_proc' is the iproc number relative to the first processor ! in the corresponding xy-plane (Default: 0, equals lfirst_proc_xy). ! ! 08-jan-2011/Bourdin.KIS: coded ! real, dimension(:,:,:), intent(out) :: out real, dimension(:,:,:), intent(in), optional :: in integer, intent(in), optional :: source_proc ! integer :: bnx, bny, bnz ! transfer box sizes integer :: px, py, broadcaster, partner, nbox integer, parameter :: ytag=115 integer, optional :: comm integer, dimension(MPI_STATUS_SIZE) :: stat ! bnx = size (out, 1) bny = size (out, 2) bnz = size (out, 3) nbox = bnx*bny*bnz ! broadcaster = find_proc(ioptest(source_proc,0),0,ipz) ! if (iproc == broadcaster) then ! distribute the data if (bnx * nprocx /= size (in, 1)) & call stop_fatal ('distribute_xy_3D: input x dim must be nprocx*output', .true.) if (bny * nprocy /= size (in, 2)) & call stop_fatal ('distribute_xy_3D: input y dim must be nprocy*output', .true.) if (bnz /= size (in, 3)) & call stop_fatal ('distribute_xy_3D: z dim must equal between in and out', .true.) ! do px = 0, nprocx-1 do py = 0, nprocy-1 partner = find_proc(px,py,ipz) if (iproc == partner) then ! data is local out = in(px*bnx+1:(px+1)*bnx,py*bny+1:(py+1)*bny,:) else ! send to partner call MPI_SEND (in(px*bnx+1:(px+1)*bnx,py*bny+1:(py+1)*bny,:), & nbox, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), mpierr) endif enddo enddo else ! receive from broadcaster call MPI_RECV (out, nbox, mpi_precision, broadcaster, ytag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) endif ! endsubroutine distribute_xy_3D !*********************************************************************** subroutine distribute_xy_4D(out, in, source_proc, comm) ! ! This routine divides a large array of 4D data on the source processor ! and distributes it to all processors in the xy-plane. ! 'source_proc' is the iproc number relative to the first processor ! in the corresponding xy-plane (Default: 0, equals lfirst_proc_xy). ! ! 08-jan-2011/Bourdin.KIS: coded ! real, dimension(:,:,:,:), intent(out) :: out real, dimension(:,:,:,:), intent(in), optional :: in integer, intent(in), optional :: source_proc integer, optional :: comm ! integer :: bnx, bny, bnz, bna ! transfer box sizes integer :: px, py, broadcaster, partner, nbox integer, parameter :: ytag=115 integer, dimension(MPI_STATUS_SIZE) :: stat ! bnx = size (out, 1) bny = size (out, 2) bnz = size (out, 3) bna = size (out, 4) nbox = bnx*bny*bnz*bna ! broadcaster = find_proc(ioptest(source_proc,0),0,ipz) ! if (iproc == broadcaster) then ! distribute the data if (bnx * nprocx /= size (in, 1)) & call stop_fatal ('distribute_xy_4D: input x dim must be nprocx*output', .true.) if (bny * nprocy /= size (in, 2)) & call stop_fatal ('distribute_xy_4D: input y dim must be nprocy*output', .true.) if (bnz /= size (in, 3)) & call stop_fatal ('distribute_xy_4D: z dim must equal between in and out', .true.) if (bna /= size (in, 4)) & call stop_fatal ('distribute_xy_4D: 4th dim must equal between in and out', .true.) ! do px = 0, nprocx-1 do py = 0, nprocy-1 partner = find_proc(px,py,ipz) if (iproc == partner) then ! data is local out = in(px*bnx+1:(px+1)*bnx,py*bny+1:(py+1)*bny,:,:) else ! send to partner call MPI_SEND (in(px*bnx+1:(px+1)*bnx,py*bny+1:(py+1)*bny,:,:), & nbox, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), mpierr) endif enddo enddo else ! receive from broadcaster call MPI_RECV (out, nbox, mpi_precision, broadcaster, ytag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) endif ! endsubroutine distribute_xy_4D !*********************************************************************** subroutine collect_xy_0D(in, out, dest_proc, comm) ! ! Collect 0D data from all processors in the xy-plane ! and combine it into one large array on one destination processor. ! 'dest_proc' is the iproc number relative to the first processor ! in the corresponding xy-plane (Default: 0, equals lfirst_proc_xy). ! ! 08-jan-2011/Bourdin.KIS: coded ! real, intent(in) :: in real, dimension(:,:), intent(out), optional :: out integer, intent(in), optional :: dest_proc integer, optional :: comm ! integer :: px, py, collector, partner integer, parameter :: ytag=116 integer, dimension(MPI_STATUS_SIZE) :: stat real :: buffer ! collector = ipz * nprocxy if (present (dest_proc)) collector = collector + dest_proc ! if (iproc == collector) then ! collect the data if (nprocx /= size (out, 1)) & call stop_fatal ('collect_xy_0D: output x dim must be nprocx', .true.) if (nprocy /= size (out, 2)) & call stop_fatal ('collect_xy_0D: output y dim must be nprocy', .true.) ! do px = 0, nprocx-1 do py = 0, nprocy-1 partner = find_proc(px,py,ipz) if (iproc == partner) then ! data is local out(px+1,py+1) = in else ! receive from partner call MPI_RECV (buffer, 1, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) out(px+1,py+1) = buffer endif enddo enddo else ! send to collector call MPI_SEND (in, 1, mpi_precision, collector, ytag, ioptest(comm,MPI_COMM_GRID), mpierr) endif ! endsubroutine collect_xy_0D !*********************************************************************** subroutine collect_xy_2D(in, out, dest_proc, comm) ! ! Collect 2D data from all processors in the xy-plane ! and combine it into one large array on one destination processor. ! 'dest_proc' is the iproc number relative to the first processor ! in the corresponding xy-plane (Default: 0, equals lfirst_proc_xy). ! ! 08-jan-2011/Bourdin.KIS: coded ! real, dimension(:,:), intent(in) :: in real, dimension(:,:), intent(out), optional :: out integer, intent(in), optional :: dest_proc integer, optional :: comm ! integer :: bnx, bny ! transfer box sizes integer :: px, py, collector, partner, nbox, alloc_err integer, parameter :: ytag=116 integer, dimension(MPI_STATUS_SIZE) :: stat ! real, dimension(:,:), allocatable :: buffer ! bnx = size (in, 1) bny = size (in, 2) nbox = bnx*bny ! collector = ipz * nprocxy if (present (dest_proc)) collector = collector + dest_proc ! if (iproc == collector) then ! collect the data if (bnx * nprocx /= size (out, 1)) & call stop_fatal ('collect_xy_2D: output x dim must be nprocx*input', .true.) if (bny * nprocy /= size (out, 2)) & call stop_fatal ('collect_xy_2D: output y dim must be nprocy*input', .true.) ! allocate (buffer(bnx,bny), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('collect_xy_2D: not enough memory for buffer!', .true.) ! do px = 0, nprocx-1 do py = 0, nprocy-1 partner = find_proc(px,py,ipz) if (iproc == partner) then ! data is local out(px*bnx+1:(px+1)*bnx,py*bny+1:(py+1)*bny) = in else ! receive from partner call MPI_RECV (buffer, nbox, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) out(px*bnx+1:(px+1)*bnx,py*bny+1:(py+1)*bny) = buffer endif enddo enddo ! deallocate (buffer) else ! send to collector call MPI_SEND (in, nbox, mpi_precision, collector, ytag, ioptest(comm,MPI_COMM_GRID), mpierr) endif ! endsubroutine collect_xy_2D !*********************************************************************** subroutine collect_xy_3D(in, out, dest_proc, comm) ! ! Collect 3D data from all processors in the xy-plane ! and combine it into one large array on one destination processor. ! 'dest_proc' is the iproc number relative to the first processor ! in the corresponding xy-plane (Default: 0, equals lfirst_proc_xy). ! ! 08-jan-2011/Bourdin.KIS: coded ! real, dimension(:,:,:), intent(in) :: in real, dimension(:,:,:), intent(out), optional :: out integer, intent(in), optional :: dest_proc integer, optional :: comm ! integer :: bnx, bny, bnz ! transfer box sizes integer :: px, py, collector, partner, nbox, alloc_err integer, parameter :: ytag=116 integer, dimension(MPI_STATUS_SIZE) :: stat ! real, dimension(:,:,:), allocatable :: buffer ! bnx = size (in, 1) bny = size (in, 2) bnz = size (in, 3) nbox = bnx*bny*bnz ! collector = ipz * nprocxy if (present (dest_proc)) collector = collector + dest_proc ! if (iproc == collector) then ! collect the data if (bnx * nprocx /= size (out, 1)) & call stop_fatal ('collect_xy_3D: output x dim must be nprocx*input', .true.) if (bny * nprocy /= size (out, 2)) & call stop_fatal ('collect_xy_3D: output y dim must be nprocy*input', .true.) ! allocate (buffer(bnx,bny,bnz), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('collect_xy_3D: not enough memory for buffer!', .true.) ! do px = 0, nprocx-1 do py = 0, nprocy-1 partner = find_proc(px,py,ipz) if (iproc == partner) then ! data is local out(px*bnx+1:(px+1)*bnx,py*bny+1:(py+1)*bny,:) = in else ! receive from partner call MPI_RECV (buffer, nbox, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) out(px*bnx+1:(px+1)*bnx,py*bny+1:(py+1)*bny,:) = buffer endif enddo enddo ! deallocate (buffer) else ! send to collector call MPI_SEND (in, nbox, mpi_precision, collector, ytag, ioptest(comm,MPI_COMM_GRID), mpierr) endif ! endsubroutine collect_xy_3D !*********************************************************************** subroutine collect_xy_4D(in, out, dest_proc, comm) ! ! Collect 4D data from all processors in the xy-plane ! and combine it into one large array on one destination processor. ! 'dest_proc' is the iproc number relative to the first processor ! in the corresponding xy-plane (Default: 0, equals lfirst_proc_xy). ! ! 08-jan-2011/Bourdin.KIS: coded ! real, dimension(:,:,:,:), intent(in) :: in real, dimension(:,:,:,:), intent(out), optional :: out integer, intent(in), optional :: dest_proc integer, optional :: comm ! integer :: bnx, bny, bnz, bna ! transfer box sizes integer :: px, py, collector, partner, nbox, alloc_err integer, parameter :: ytag=116 integer, dimension(MPI_STATUS_SIZE) :: stat ! real, dimension(:,:,:,:), allocatable :: buffer ! bnx = size (in, 1) bny = size (in, 2) bnz = size (in, 3) bna = size (in, 4) nbox = bnx*bny*bnz*bna ! collector = ipz * nprocxy if (present (dest_proc)) collector = collector + dest_proc ! if (iproc == collector) then ! collect the data if (bnx * nprocx /= size (out, 1)) & call stop_fatal ('collect_xy_4D: output x dim must be nprocx*input', .true.) if (bny * nprocy /= size (out, 2)) & call stop_fatal ('collect_xy_4D: output y dim must be nprocy*input', .true.) if (bnz /= size (out, 3)) & call stop_fatal ('collect_xy_4D: z dim must equal between in and out', .true.) if (bna /= size (out, 4)) & call stop_fatal ('collect_xy_4D: 4th dim must equal between in and out', .true.) ! allocate (buffer(bnx,bny,bnz,bna), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('collect_xy_4D: not enough memory for buffer!', .true.) ! do px = 0, nprocx-1 do py = 0, nprocy-1 partner = find_proc(px,py,ipz) if (iproc == partner) then ! data is local out(px*bnx+1:(px+1)*bnx,py*bny+1:(py+1)*bny,:,:) = in else ! receive from partner call MPI_RECV (buffer, nbox, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) out(px*bnx+1:(px+1)*bnx,py*bny+1:(py+1)*bny,:,:) = buffer endif enddo enddo ! deallocate (buffer) else ! send to collector call MPI_SEND (in, nbox, mpi_precision, collector, ytag, ioptest(comm,MPI_COMM_GRID), mpierr) endif ! endsubroutine collect_xy_4D !*********************************************************************** subroutine distribute_z_3D(out, in, source_proc, comm) ! ! This routine divides a large array of 3D data on the source processor ! and distributes it to all processors in the z-direction. ! 'source_proc' is the iproc number relative to the first processor ! in the corresponding z-direction (Default: 0, equals lfirst_proc_z). ! ! 08-mar-2011/Bourdin.KIS: coded ! real, dimension(:,:,:), intent(out) :: out real, dimension(:,:,:), intent(in), optional :: in integer, intent(in), optional :: source_proc integer, optional :: comm ! integer :: bnx, bny, bnz ! transfer box sizes integer :: pz, broadcaster, partner, nbox integer, parameter :: ytag=117 integer, dimension(MPI_STATUS_SIZE) :: stat ! bnx = size (out, 1) bny = size (out, 2) bnz = size (out, 3) nbox = bnx*bny*bnz ! broadcaster = find_proc(ipx,ipy,ioptest(source_proc,0)) ! if (iproc == broadcaster) then ! distribute the data if (bnx /= size (in, 1)) & call stop_fatal ('distribute_z_4D: x dim must be equal between in and out', .true.) if (bny /= size (in, 2)) & call stop_fatal ('distribute_z_4D: y dim must be equal between in and out', .true.) if (bnz * nprocz /= size (in, 3)) & call stop_fatal ('distribute_z_4D: input z dim must be nprocz*output', .true.) ! do pz = 0, nprocz-1 partner = find_proc(ipx,ipy,pz) if (iproc == partner) then ! data is local out = in(:,:,pz*bnz+1:(pz+1)*bnz) else ! send to partner call MPI_SEND (in(:,:,pz*bnz+1:(pz+1)*bnz), nbox, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), mpierr) endif enddo else ! receive from broadcaster call MPI_RECV (out, nbox, mpi_precision, broadcaster, ytag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) endif ! endsubroutine distribute_z_3D !*********************************************************************** subroutine distribute_z_4D(out, in, source_proc, comm) ! ! This routine divides a large array of 4D data on the source processor ! and distributes it to all processors in the z-direction. ! 'source_proc' is the iproc number relative to the first processor ! in the corresponding z-direction (Default: 0, equals lfirst_proc_z). ! ! 08-mar-2011/Bourdin.KIS: coded ! real, dimension(:,:,:,:), intent(out) :: out real, dimension(:,:,:,:), intent(in), optional :: in integer, intent(in), optional :: source_proc integer, optional :: comm ! integer :: bnx, bny, bnz, bna ! transfer box sizes integer :: pz, broadcaster, partner, nbox integer, parameter :: ytag=117 integer, dimension(MPI_STATUS_SIZE) :: stat ! ! bnx = size (out, 1) bny = size (out, 2) bnz = size (out, 3) bna = size (out, 4) nbox = bnx*bny*bnz*bna ! broadcaster = find_proc(ipx,ipy,ioptest(source_proc,0)) ! if (iproc == broadcaster) then ! distribute the data if (bnx /= size (in, 1)) & call stop_fatal ('distribute_z_4D: x dim must be equal between in and out', .true.) if (bny /= size (in, 2)) & call stop_fatal ('distribute_z_4D: y dim must be equal between in and out', .true.) if (bnz * nprocz /= size (in, 3)) & call stop_fatal ('distribute_z_4D: input z dim must be nprocz*output', .true.) if (bna /= size (in, 4)) & call stop_fatal ('distribute_z_4D: 4th dim must equal between in and out', .true.) ! do pz = 0, nprocz-1 partner = find_proc(ipx,ipy,pz) if (iproc == partner) then ! data is local out = in(:,:,pz*bnz+1:(pz+1)*bnz,:) else ! send to partner call MPI_SEND (in(:,:,pz*bnz+1:(pz+1)*bnz,:), nbox, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), mpierr) endif enddo else ! receive from broadcaster call MPI_RECV (out, nbox, mpi_precision, broadcaster, ytag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) endif ! endsubroutine distribute_z_4D !*********************************************************************** subroutine collect_z_3D(in, out, dest_proc, comm) ! ! Collect 3D data from all processors in the z-direction ! and combine it into one large array on one destination processor. ! 'dest_proc' is the iproc number relative to the first processor ! in the corresponding z-direction (Default: 0, equals lfirst_proc_z). ! ! 08-mar-2011/Bourdin.KIS: coded ! real, dimension(:,:,:), intent(in) :: in real, dimension(:,:,:), intent(out), optional :: out integer, intent(in), optional :: dest_proc integer, optional :: comm ! integer :: bnx, bny, bnz ! transfer box sizes integer :: pz, collector, partner, nbox, alloc_err integer, parameter :: ytag=118 integer, dimension(MPI_STATUS_SIZE) :: stat ! real, dimension(:,:,:), allocatable :: buffer ! bnx = size (in, 1) bny = size (in, 2) bnz = size (in, 3) nbox = bnx*bny*bnz ! collector = ipx + ipy*nprocx if (present (dest_proc)) collector = collector + dest_proc*nprocxy ! if (iproc == collector) then ! collect the data if (bnx /= size (out, 1)) & call stop_fatal ('collect_z_3D: x dim must equal between in and out', .true.) if (bny /= size (out, 2)) & call stop_fatal ('collect_z_3D: y dim must equal between in and out', .true.) if (bnz * nprocz /= size (out, 3)) & call stop_fatal ('collect_z_3D: output z dim must be nprocz*input', .true.) ! allocate (buffer(bnx,bny,bnz), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('collect_z_3D: not enough memory for buffer!', .true.) ! do pz = 0, nprocz-1 partner = find_proc(ipx,ipy,pz) if (iproc == partner) then ! data is local out(:,:,pz*bnz+1:(pz+1)*bnz) = in else ! receive from partner call MPI_RECV (buffer, nbox, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) out(:,:,pz*bnz+1:(pz+1)*bnz) = buffer endif enddo ! deallocate (buffer) else ! send to collector call MPI_SEND (in, nbox, mpi_precision, collector, ytag, ioptest(comm,MPI_COMM_GRID), mpierr) endif ! endsubroutine collect_z_3D !*********************************************************************** subroutine collect_z_4D(in, out, dest_proc, comm) ! ! Collect 4D data from all processors in the z-direction ! and combine it into one large array on one destination processor. ! 'dest_proc' is the iproc number relative to the first processor ! in the corresponding z-direction (Default: 0, equals lfirst_proc_z). ! ! 08-mar-2011/Bourdin.KIS: coded ! real, dimension(:,:,:,:), intent(in) :: in real, dimension(:,:,:,:), intent(out), optional :: out integer, intent(in), optional :: dest_proc integer, optional :: comm ! integer :: bnx, bny, bnz, bna ! transfer box sizes integer :: pz, collector, partner, nbox, alloc_err integer, parameter :: ytag=118 integer, dimension(MPI_STATUS_SIZE) :: stat ! real, dimension(:,:,:,:), allocatable :: buffer ! bnx = size (in, 1) bny = size (in, 2) bnz = size (in, 3) bna = size (in, 4) nbox = bnx*bny*bnz*bna ! collector = ipx + ipy*nprocx if (present (dest_proc)) collector = collector + dest_proc*nprocxy ! if (iproc == collector) then ! collect the data if (bnx /= size (out, 1)) & call stop_fatal ('collect_z_4D: x dim must equal between in and out', .true.) if (bny /= size (out, 2)) & call stop_fatal ('collect_z_4D: y dim must equal between in and out', .true.) if (bnz * nprocz /= size (out, 3)) & call stop_fatal ('collect_z_4D: output z dim must be nprocz*input', .true.) if (bna /= size (out, 4)) & call stop_fatal ('collect_z_4D: 4th dim must equal between in and out', .true.) ! allocate (buffer(bnx,bny,bnz,bna), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('collect_z_4D: not enough memory for buffer!', .true.) ! do pz = 0, nprocz-1 partner = find_proc(ipx,ipy,pz) if (iproc == partner) then ! data is local out(:,:,pz*bnz+1:(pz+1)*bnz,:) = in else ! receive from partner call MPI_RECV (buffer, nbox, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) out(:,:,pz*bnz+1:(pz+1)*bnz,:) = buffer endif enddo ! deallocate (buffer) else ! send to collector call MPI_SEND (in, nbox, mpi_precision, collector, ytag, ioptest(comm,MPI_COMM_GRID), mpierr) endif ! endsubroutine collect_z_4D !*********************************************************************** subroutine globalize_xy(in, out, dest_proc, source_pz) ! ! Globalizes local 4D data first along the x, then along the y-direction to ! the destination processor. The local data is supposed to include the ghost ! cells. Inner ghost layers are cut away during the combination of the data. ! 'dest_proc' is the destination iproc number relative to the first processor ! in the corresponding xy-plane (Default: 0, equals lfirst_proc_xy). ! 'source_pz' specifies the source pz-layer (Default: ipz). ! ! 23-Apr-2012/Bourdin.KIS: adapted from non-torus-type globalize_xy ! real, dimension(:,:,:,:), intent(in) :: in real, dimension(:,:,:,:), intent(out), optional :: out integer, intent(in), optional :: dest_proc, source_pz ! integer :: bnx, bny, bnz, bna ! transfer box sizes integer :: cnx, cny ! transfer box sizes minus ghosts integer :: rny ! y-row box size integer :: px, py, pz, collector, partner, alloc_err integer(KIND=ikind8) :: nbox, nrow integer :: x_add, x_sub, y_add, y_sub integer, parameter :: xtag=123, ytag=124 integer, dimension(MPI_STATUS_SIZE) :: stat ! real, dimension(:,:,:,:), allocatable :: buffer, y_row ! bnx = size (in, 1) bny = size (in, 2) bnz = size (in, 3) bna = size (in, 4) nbox = bnx*bny*bnz*bna cnx = bnx - 2*nghost cny = bny - 2*nghost rny = cny*nprocy + 2*nghost nrow = bnx*rny*bnz*bna ! collector = ipz * nprocxy if (present (dest_proc)) collector = collector + dest_proc pz = ioptest(source_pz,ipz) ! if (iproc == collector) then if (cnx * nprocx + 2*nghost /= size (out, 1)) & call stop_fatal ('globalize_xy: output x dim must be nprocx*input minus inner ghosts', .true.) if (cny * nprocy + 2*nghost /= size (out, 2)) & call stop_fatal ('globalize_xy: output y dim must be nprocy*input minus inner ghosts', .true.) if (bnz /= size (out, 3)) & call stop_fatal ('globalize_xy: z dim must equal between in and out', .true.) if (bna /= size (out, 4)) & call stop_fatal ('globalize_xy: 4th dim must equal between in and out', .true.) endif ! if (lfirst_proc_y .and. (ipz == pz)) then ! collect the data of each y-row allocate (buffer(bnx,bny,bnz,bna), y_row(bnx,rny,bnz,bna), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('globalize_xy: not enough memory for buffer and y_row!', .true.) ! do py = 0, nprocy-1 partner = find_proc(ipx,py,pz) y_add = nghost y_sub = nghost if (py == 0) y_add = 0 if (py == nprocy-1) y_sub = 0 if (iproc == partner) then ! data is local y_row(:,py*cny+1+y_add:py*cny+my,:,:) = in(:,1+y_add:my,:,:) else ! receive from y-row partner call mpirecv_real_arr_huge(buffer,nbox,partner,ytag) ! old version: call MPI_RECV (buffer, nbox, mpi_precision, partner, ytag, MPI_COMM_GRID, stat, mpierr) y_row(:,py*cny+1+y_add:py*cny+my-y_sub,:,:) = buffer(:,1+y_add:my-y_sub,:,:) endif enddo ! deallocate (buffer) ! if (iproc /= collector) then ! send to collector call mpisend_real_arr_huge(y_row,nrow,collector,xtag) ! old version: call MPI_SEND (y_row, nrow, mpi_precision, collector, xtag, MPI_COMM_GRID, mpierr) deallocate (y_row) endif ! elseif (ipz == pz) then ! send to collector of the y-row (lfirst_proc_y) partner = find_proc(ipx,0,ipz) call mpisend_real_arr_huge(in,nbox,partner,ytag) ! old version: call MPI_SEND (in, nbox, mpi_precision, partner, ytag, MPI_COMM_GRID, mpierr) endif ! if (iproc == collector) then ! collect the y-rows into global data allocate (buffer(bnx,rny,bnz,bna), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('globalize_xy: not enough memory for buffer!', .true.) ! do px = 0, nprocx-1 partner = find_proc(px,ipy,pz) x_add = nghost x_sub = nghost if (px == 0) x_add = 0 if (px == nprocx-1) x_sub = 0 if (iproc == partner) then ! y-row is local out(px*cnx+1+x_add:px*cnx+mx,:,:,:) = y_row(1+x_add:mx,:,:,:) deallocate (y_row) else ! receive from partner call mpirecv_real_arr_huge(buffer,nrow,partner,xtag) ! old version: call MPI_RECV (buffer, nrow, mpi_precision, partner, xtag, MPI_COMM_GRID, stat, mpierr) out(px*cnx+1+x_add:px*cnx+mx-x_sub,:,:,:) = buffer(1+x_add:mx-x_sub,:,:,:) endif enddo ! deallocate (buffer) endif ! endsubroutine globalize_xy !*********************************************************************** recursive subroutine localize_xy(out, in, source_proc, dest_pz) ! ! Localizes global 4D data first along the y, then along the x-direction to ! the destination processor. The global data is supposed to include the outer ! ghost layers. The returned data will include inner ghost layers. ! 'source_proc' is the source iproc number relative to the first processor ! in the corresponding xy-plane (Default: 0, equals lfirst_proc_xy). ! 'dest_pz' specifies the destination pz-layer (Default: ipz). ! ! 23-Apr-2012/Bourdin.KIS: adapted from non-torus-type localize_xy ! real, dimension(:,:,:,:), intent(out) :: out real, dimension(:,:,:,:), intent(in), optional :: in integer, intent(in), optional :: source_proc, dest_pz ! integer :: bnx, bny, bnz, bna ! transfer box sizes integer :: cnx, cny ! transfer box sizes minus ghosts integer :: gnx, gny ! x- and y global data array sizes integer :: rnx, rny ! x- and y-row box sizes integer :: px, py, pz, broadcaster, partner, alloc_err integer(KIND=ikind8) :: nbox,nrow integer, parameter :: xtag=125, ytag=126 integer, dimension(MPI_STATUS_SIZE) :: stat ! real, dimension(:,:,:,:), allocatable :: y_row, buffer, extended ! bnx = size (out, 1) bny = size (out, 2) bnz = size (out, 3) bna = size (out, 4) nbox = bnx*bny*bnz*bna cnx = bnx - 2*nghost cny = bny - 2*nghost gnx = cnx*nprocx gny = cny*nprocy rnx = gnx + 2*nghost rny = gny + 2*nghost nrow = bnx*rny*bnz*bna ! broadcaster = ipz * nprocxy if (present (source_proc)) broadcaster = broadcaster + source_proc pz = ipz if (present (dest_pz)) pz = dest_pz ! if (iproc == broadcaster) then if ((gnx == size (in, 1)) .and. (gny == size (in, 2))) then ! add outer ghost layers allocate (extended(rnx,rny,bnz,bna), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('localize_xy: not enough memory for outer ghost cells extension!', .true.) extended(nghost+1:nghost+gnx,nghost+1:nghost+gnx,:,:) = in if (lperi(1)) then extended(1:nghost,:,:,:) = in(gnx-nghost+1:gnx,:,:,:) extended(rnx-nghost+1:rnx,:,:,:) = in(1:nghost,:,:,:) else write (*,*) "WARNING: localize_xy: extending non-periodic outer ghost cells with zeros!" extended(1:nghost,:,:,:) = 0.0 extended(rnx-nghost+1:rnx,:,:,:) = 0.0 endif if (lperi(2)) then extended(:,1:nghost,:,:) = in(:,gny-nghost+1:gny,:,:) extended(:,rny-nghost+1:rny,:,:) = in(:,1:nghost,:,:) else write (*,*) "WARNING: localize_xy: extending non-periodic outer ghost cells with zeros!" extended(:,1:nghost,:,:) = 0.0 extended(:,rny-nghost+1:rny,:,:) = 0.0 endif call localize_xy(out, extended, broadcaster - ipz * nprocxy, pz) return endif if (cnx * nprocx + 2*nghost /= size (in, 1)) & call stop_fatal ('localize_xy: input x dim must be nprocx*output minus inner ghosts', .true.) if (cny * nprocy + 2*nghost /= size (in, 2)) & call stop_fatal ('localize_xy: input y dim must be nprocy*output minus inner ghosts', .true.) if (bnz /= size (in, 3)) & call stop_fatal ('localize_xy: z dim must equal between in and out', .true.) if (bna /= size (in, 4)) & call stop_fatal ('localize_xy: 4th dim must equal between in and out', .true.) endif ! if (ipz == pz) then allocate (y_row(bnx,rny,bnz,bna), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('localize_xy: not enough memory for y_row!', .true.) endif ! if (iproc == broadcaster) then allocate (buffer(bnx,rny,bnz,bna), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('localize_xy: not enough memory for buffer!', .true.) ! distribute the y-rows do px = 0, nprocx-1 partner = find_proc(px,0,pz) if (iproc == partner) then ! data is local y_row = in(px*cnx+1:px*cnx+mx,:,:,:) else ! send to partner buffer = in(px*cnx+1:px*cnx+mx,:,:,:) call mpisend_real_arr_huge(buffer,nrow,partner,xtag) !old version: call MPI_SEND (buffer, int(nrow), mpi_precision, partner, xtag, MPI_COMM_GRID, mpierr) endif enddo deallocate (buffer) endif ! if (lfirst_proc_y .and. (ipz == pz)) then if (iproc /= broadcaster) then ! receive y-row from broadcaster call mpirecv_real_arr_huge(y_row,nrow,broadcaster,xtag) !old version: call MPI_RECV (y_row, int(nrow), mpi_precision, broadcaster, xtag, MPI_COMM_GRID, stat, mpierr) endif ! allocate (buffer(bnx,bny,bnz,bna), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('localize_xy: not enough memory for buffer!', .true.) ! distribute the data along the y-direction do py = 0, nprocy-1 partner = find_proc(ipx,py,pz) if (iproc == partner) then ! data is local out = y_row(:,py*cny+1:py*cny+my,:,:) else ! send to partner buffer = y_row(:,py*cny+1:py*cny+my,:,:) call mpisend_real_arr_huge(buffer,nbox,partner,ytag) !old version: call MPI_SEND (buffer, int(nbox), mpi_precision, partner, ytag, MPI_COMM_GRID, mpierr) endif enddo deallocate (buffer) elseif (ipz == pz) then ! receive local data from y-row partner (lfirst_proc_y) partner = find_proc(ipx,0,ipz) call mpirecv_real_arr_huge(out,nbox,partner,ytag) !old version: call MPI_RECV (out, int(nbox), mpi_precision, partner, ytag, MPI_COMM_GRID, stat, mpierr) endif ! if (ipz == pz) deallocate (y_row) ! endsubroutine localize_xy !*********************************************************************** subroutine globalize_z(in, out, dest_proc, comm) ! ! Globalizes local 1D data in the z-direction to the destination processor. ! The local data is supposed to include the ghost cells. ! Inner ghost layers are cut away during the combination of the data. ! 'dest_proc' is the destination ipz-layer number relative to the first ! processor in the z-direction (Default: 0, equals lfirst_proc_z). ! ! 13-aug-2011/Bourdin.KIS: coded ! real, dimension(mz), intent(in) :: in real, dimension(mzgrid), intent(out), optional :: out integer, intent(in), optional :: dest_proc integer, optional :: comm ! integer :: pz, z_add, collector, partner, alloc_err integer, parameter :: ytag=119 integer, dimension(MPI_STATUS_SIZE) :: stat ! real, dimension(:), allocatable :: buffer ! collector = ipx + ipy * nprocx if (present (dest_proc)) collector = collector + dest_proc * nprocxy ! if (iproc == collector) then ! collect the data allocate (buffer(mz), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('globalize_z: not enough memory for buffer!', .true.) ! do pz = 0, nprocz-1 partner = find_proc(ipx,ipy,pz) z_add = nghost if (pz == 0) z_add = 0 if (iproc == partner) then ! data is local out(pz*nz+1+z_add:pz*nz+mz) = in(1+z_add:mz) else ! receive from partner call MPI_RECV (buffer, mz, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) out(pz*nz+1+z_add:pz*nz+mz) = buffer(1+z_add:mz) endif enddo ! deallocate (buffer) else ! send to collector call MPI_SEND (in, mz, mpi_precision, collector, ytag, ioptest(comm,MPI_COMM_GRID), mpierr) endif ! endsubroutine globalize_z !*********************************************************************** subroutine localize_z(out, in, source_proc, comm) ! ! Localizes global 1D data to all processors along the z-direction. ! The global data is supposed to include the outer ghost layers. ! The returned data will include inner ghost layers. ! 'source_proc' is the source ipz-layer number relative to the first ! processor in the z-direction (Default: 0, equals lfirst_proc_z). ! ! 11-Feb-2012/Bourdin.KIS: coded ! real, dimension(mz), intent(out) :: out real, dimension(mzgrid), intent(in) :: in integer, intent(in), optional :: source_proc integer, optional :: comm ! integer :: pz, broadcaster, partner integer, parameter :: ytag=120 integer, dimension(MPI_STATUS_SIZE) :: stat ! broadcaster = find_proc(ipx,ipy,ioptest(source_proc,0)) ! if (iproc == broadcaster) then ! collect the data do pz = 0, nprocz-1 partner = find_proc(ipx,ipy,pz) if (iproc == partner) then ! data is local out = in(pz*nz+1:pz*nz+mz) else ! send to partner call MPI_SEND (in(pz*nz+1:pz*nz+mz), mz, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), mpierr) endif enddo else ! receive from broadcaster call MPI_RECV (out, mz, mpi_precision, broadcaster, ytag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) endif ! endsubroutine localize_z !*********************************************************************** subroutine distribute_to_pencil_xy_2D(in, out, broadcaster, comm) ! ! Distribute data to several processors and reform into pencil shape. ! This routine divides global data and distributes it in the xy-plane. ! ! 22-jul-2010/Bourdin.KIS: coded ! real, dimension(:,:), intent(in) :: in real, dimension(:,:), intent(out) :: out integer, intent(in) :: broadcaster integer, optional :: comm ! integer :: bnx, bny ! transfer box sizes integer :: ibox, partner, nbox, alloc_err integer, parameter :: ytag=113 integer, dimension(MPI_STATUS_SIZE) :: stat ! real, dimension(:,:), allocatable :: buffer ! if ((nprocx == 1) .and. (nprocy == 1)) then out = in return endif ! bnx = size (in, 1) bny = size (in, 2) / nprocxy nbox = bnx*bny ! if (mod (size (in, 2), nprocxy) /= 0) & call stop_fatal ('distribute_to_pencil_xy_2D: input y dim must be an integer multiple of nprocx*nprocy', lfirst_proc_xy) ! if ((size (out, 1) /= bnx) .or. ((size (out, 2) /= bny))) & call stop_fatal ('distribute_to_pencil_xy_2D: output array size mismatch /= bnx,bny', lfirst_proc_xy) ! allocate (buffer(bnx,bny), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('distribute_to_pencil_xy_2D: not enough memory for buffer!', .true.) ! if (iproc == broadcaster) then do ibox = 0, nprocxy-1 partner = find_proc(modulo(ibox,nprocx),ibox/nprocx,ipz) if (iproc == partner) then ! data is local out = in(:,bny*ibox+1:bny*(ibox+1)) else ! send to partner buffer = in(:,bny*ibox+1:bny*(ibox+1)) call MPI_SEND (buffer, nbox, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), mpierr) endif enddo else ! receive from broadcaster call MPI_RECV (buffer, nbox, mpi_precision, broadcaster, ytag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) out = buffer endif ! deallocate (buffer) ! endsubroutine distribute_to_pencil_xy_2D !*********************************************************************** subroutine collect_from_pencil_xy_2D(in, out, collector, comm) ! ! Collect 2D data from several processors and combine into global shape. ! This routine collects 2D pencil shaped data distributed in the xy-plane. ! ! 22-jul-2010/Bourdin.KIS: coded ! real, dimension(:,:), intent(in) :: in real, dimension(:,:), intent(out) :: out integer, intent(in) :: collector integer, optional :: comm ! integer :: bnx, bny ! transfer box sizes integer :: ibox, partner, nbox, alloc_err integer, parameter :: ytag=114 integer, dimension(MPI_STATUS_SIZE) :: stat ! real, dimension(:,:), allocatable :: buffer ! if ((nprocx == 1) .and. (nprocy == 1)) then out = in return endif ! bnx = size (out, 1) bny = size (out, 2) / nprocxy nbox = bnx*bny ! if (mod (size (out, 2), nprocxy) /= 0) & call stop_fatal ('collect_from_pencil_xy_2D: output y dim must be an integer multiple of nprocx*nprocy', lfirst_proc_xy) ! if ((size (in, 1) /= bnx) .or. ((size (in, 2) /= bny))) & call stop_fatal ('collect_from_pencil_xy_2D: input array size mismatch /= bnx,bny', lfirst_proc_xy) ! allocate (buffer(bnx,bny), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('collect_from_pencil_xy_2D: not enough memory for buffer!', .true.) ! if (iproc == collector) then do ibox = 0, nprocxy-1 partner = find_proc(modulo(ibox,nprocx),ibox/nprocx,ipz) if (iproc == partner) then ! data is local out(:,bny*ibox+1:bny*(ibox+1)) = in else ! receive from partner call MPI_RECV (buffer, nbox, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) out(:,bny*ibox+1:bny*(ibox+1)) = buffer endif enddo else ! send to collector buffer = in call MPI_SEND (buffer, nbox, mpi_precision, collector, ytag, ioptest(comm,MPI_COMM_GRID), mpierr) endif ! deallocate (buffer) ! endsubroutine collect_from_pencil_xy_2D !*********************************************************************** subroutine remap_to_pencil_x(in, out,comm,lsync) ! ! Remaps data distributed on several processors into pencil shape. ! This routine remaps 1D arrays in x only for nprocx>1. ! ! 08-dec-2010/Bourdin.KIS: coded ! real, dimension(nx), intent(in) :: in real, dimension(nxgrid), intent(out) :: out ! integer :: ibox, partner integer, parameter :: ltag=102, utag=103 integer, dimension(MPI_STATUS_SIZE) :: stat real, dimension(nx) :: recv_buf integer, optional :: comm logical, optional :: lsync ! if (loptest(lsync,.true.)) then !$omp barrier endif !$omp single do ibox = 0, nprocx-1 partner = find_proc(ibox,ipy,ipz) if (iproc == partner) then ! data is local out(nx*ibox+1:nx*(ibox+1)) = in else ! communicate with partner if (iproc > partner) then ! above diagonal: send first, receive then call MPI_SEND (in, nx, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), mpierr) call MPI_RECV (recv_buf, nx, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) else ! below diagonal: receive first, send then call MPI_RECV (recv_buf, nx, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) call MPI_SEND (in, nx, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), mpierr) endif out(nx*ibox+1:nx*(ibox+1)) = recv_buf endif enddo !$omp end single if (loptest(lsync,.true.)) then !$omp barrier endif ! endsubroutine remap_to_pencil_x !*********************************************************************** subroutine unmap_from_pencil_x(in, out) ! ! Unmaps pencil shaped 1D data distributed on several processors back to normal shape. ! This routine is the inverse of the remap function for nprocx>1. ! ! 08-dec-2010/Bourdin.KIS: coded ! real, dimension(nxgrid), intent(in) :: in real, dimension(nx), intent(out) :: out ! !$omp workshare out = in(nx*ipx+1:nx*(ipx+1)) !$omp end workshare ! endsubroutine unmap_from_pencil_x !*********************************************************************** subroutine remap_to_pencil_y_1D(in, out,comm,lsync) ! ! Remaps data distributed on several processors into pencil shape. ! This routine remaps 1D arrays in y only for nprocy>1. ! ! 13-dec-2010/Bourdin.KIS: coded ! real, dimension(ny), intent(in) :: in real, dimension(nygrid), intent(out) :: out ! integer :: ibox, partner integer, parameter :: ltag=102, utag=103 integer, dimension(MPI_STATUS_SIZE) :: stat real, dimension(ny) :: recv_buf integer, optional :: comm logical, optional :: lsync ! if (loptest(lsync,.true.)) then !$omp barrier endif !$omp single do ibox = 0, nprocy-1 partner = find_proc(ipx,ibox,ipz) if (iproc == partner) then ! data is local out(ny*ibox+1:ny*(ibox+1)) = in else ! communicate with partner if (iproc > partner) then ! above diagonal: send first, receive then call MPI_SEND (in, ny, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), mpierr) call MPI_RECV (recv_buf, ny, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) else ! below diagonal: receive first, send then call MPI_RECV (recv_buf, ny, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) call MPI_SEND (in, ny, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), mpierr) endif out(ny*ibox+1:ny*(ibox+1)) = recv_buf endif enddo !$omp end single if (loptest(lsync,.true.)) then !$omp barrier endif ! endsubroutine remap_to_pencil_y_1D !*********************************************************************** subroutine remap_to_pencil_y_2D(in, out,comm,lsync) ! ! Remaps data distributed on several processors into pencil shape. ! This routine remaps 2D arrays in y only for nprocy>1. ! ! 13-dec-2010/Bourdin.KIS: coded ! real, dimension(nx,ny), intent(in) :: in real, dimension(nx,nygrid), intent(out) :: out ! integer :: ibox, partner, nbox integer, parameter :: ltag=102, utag=103 integer, dimension(MPI_STATUS_SIZE) :: stat real, dimension(nx,ny) :: recv_buf integer, optional :: comm logical, optional :: lsync ! if (loptest(lsync,.true.)) then !$omp barrier endif !$omp single nbox = nx*ny ! do ibox = 0, nprocy-1 partner = find_proc(ipx,ibox,ipz) if (iproc == partner) then ! data is local out(:,ny*ibox+1:ny*(ibox+1)) = in else ! communicate with partner if (iproc > partner) then ! above diagonal: send first, receive then call MPI_SEND (in, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), mpierr) call MPI_RECV (recv_buf, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) else ! below diagonal: receive first, send then call MPI_RECV (recv_buf, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) call MPI_SEND (in, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), mpierr) endif out(:,ny*ibox+1:ny*(ibox+1)) = recv_buf endif enddo !$omp end single if (loptest(lsync,.true.)) then !$omp barrier endif ! endsubroutine remap_to_pencil_y_2D !*********************************************************************** subroutine remap_to_pencil_y_3D(in, out,comm,lsync) ! ! Remaps data distributed on several processors into pencil shape. ! This routine remaps 3D arrays in y only for nprocy>1. ! ! 13-dec-2010/Bourdin.KIS: coded ! real, dimension(:,:,:), intent(in) :: in real, dimension(:,:,:), intent(out) :: out ! integer :: ibox, partner, nbox, alloc_err integer, parameter :: ltag=102, utag=103 integer :: inx, inz ! size of the first and third dimension integer, dimension(MPI_STATUS_SIZE) :: stat real, dimension(:,:,:), allocatable :: recv_buf integer, optional :: comm logical, optional :: lsync ! if (loptest(lsync,.true.)) then !$omp barrier endif !$omp single inx = size (in, 1) inz = size (in, 3) nbox = inx*ny*inz ! if (inx /= size (out, 1)) & call stop_fatal ('remap_to_pencil_y_3D: first dimension differs for input and output', lfirst_proc_y) if (inz /= size (out, 3)) & call stop_fatal ('remap_to_pencil_y_3D: third dimension differs for input and output', lfirst_proc_y) ! if (size (in, 2) /= ny) & call stop_fatal ('remap_to_pencil_y_3D: second dimension of input must be ny', lfirst_proc_y) if (size (out, 2) /= nygrid) & call stop_fatal ('remap_to_pencil_y_3D: second dimension of output must be nygrid', lfirst_proc_y) ! allocate (recv_buf(inx,ny,inz), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('remap_to_pencil_y_3D: Could not allocate memory for recv_buf', .true.) ! do ibox = 0, nprocy-1 partner = find_proc(ipx,ibox,ipz) if (iproc == partner) then ! data is local out(:,ny*ibox+1:ny*(ibox+1),:) = in else ! communicate with partner if (iproc > partner) then ! above diagonal: send first, receive then call MPI_SEND (in, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), mpierr) call MPI_RECV (recv_buf, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) else ! below diagonal: receive first, send then call MPI_RECV (recv_buf, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) call MPI_SEND (in, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), mpierr) endif out(:,ny*ibox+1:ny*(ibox+1),:) = recv_buf endif enddo ! if (allocated (recv_buf)) deallocate (recv_buf) !$omp end single if (loptest(lsync,.true.)) then !$omp barrier endif ! endsubroutine remap_to_pencil_y_3D !*********************************************************************** subroutine remap_to_pencil_y_4D(in, out,comm,lsync) ! ! Remaps data distributed on several processors into pencil shape. ! This routine remaps 4D arrays in y only for nprocy>1. ! ! 13-dec-2010/Bourdin.KIS: coded ! real, dimension(:,:,:,:), intent(in) :: in real, dimension(:,:,:,:), intent(out) :: out ! integer :: ibox, partner, nbox, alloc_err integer, parameter :: ltag=102, utag=103 integer :: inx, inz, ina ! size of the first, third, and fourth dimension integer, dimension(MPI_STATUS_SIZE) :: stat real, dimension(:,:,:,:), allocatable :: recv_buf integer, optional :: comm logical, optional :: lsync ! if (loptest(lsync,.true.)) then !$omp barrier endif !$omp single inx = size (in, 1) inz = size (in, 3) ina = size (in, 4) nbox = inx*ny*inz*ina ! if (inx /= size (out, 1)) & call stop_fatal ('remap_to_pencil_y_4D: first dimension differs for input and output', lfirst_proc_y) if (inz /= size (out, 3)) & call stop_fatal ('remap_to_pencil_y_4D: third dimension differs for input and output', lfirst_proc_y) if (ina /= size (out, 4)) & call stop_fatal ('remap_to_pencil_y_4D: fourth dimension differs for input and output', lfirst_proc_y) ! if (size (in, 2) /= ny) & call stop_fatal ('remap_to_pencil_y_4D: second dimension of input must be ny', lfirst_proc_y) if (size (out, 2) /= nygrid) & call stop_fatal ('remap_to_pencil_y_4D: second dimension of output must be nygrid', lfirst_proc_y) ! allocate (recv_buf(inx,ny,inz,ina), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('remap_to_pencil_y_4D: Could not allocate memory for recv_buf', .true.) ! do ibox = 0, nprocy-1 partner = find_proc(ipx,ibox,ipz) if (iproc == partner) then ! data is local out(:,ny*ibox+1:ny*(ibox+1),:,:) = in else ! communicate with partner if (iproc > partner) then ! above diagonal: send first, receive then call MPI_SEND (in, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), mpierr) call MPI_RECV (recv_buf, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) else ! below diagonal: receive first, send then call MPI_RECV (recv_buf, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) call MPI_SEND (in, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), mpierr) endif out(:,ny*ibox+1:ny*(ibox+1),:,:) = recv_buf endif enddo ! deallocate (recv_buf) !$omp end single if (loptest(lsync,.true.)) then !$omp barrier endif ! endsubroutine remap_to_pencil_y_4D !*********************************************************************** subroutine unmap_from_pencil_y_1D(in, out) ! ! Unmaps pencil shaped 1D data distributed on several processors back to normal shape. ! This routine is the inverse of the remap function for nprocy>1. ! ! 13-dec-2010/Bourdin.KIS: coded ! real, dimension(nygrid), intent(in) :: in real, dimension(ny), intent(out) :: out ! !$omp workshare out = in(ny*ipy+1:ny*(ipy+1)) !$omp end workshare ! endsubroutine unmap_from_pencil_y_1D !*********************************************************************** subroutine unmap_from_pencil_y_2D(in, out) ! ! Unmaps pencil shaped 2D data distributed on several processors back to normal shape. ! This routine is the inverse of the remap function for nprocy>1. ! ! 13-dec-2010/Bourdin.KIS: coded ! real, dimension(nx,nygrid), intent(in) :: in real, dimension(nx,ny), intent(out) :: out ! !$omp workshare out = in(:,ny*ipy+1:ny*(ipy+1)) !$omp end workshare ! endsubroutine unmap_from_pencil_y_2D !*********************************************************************** subroutine unmap_from_pencil_y_3D(in, out) ! ! Unmaps pencil shaped 3D data distributed on several processors back to normal shape. ! This routine is the inverse of the remap function for nprocy>1. ! ! 13-dec-2010/Bourdin.KIS: coded ! real, dimension(:,:,:), intent(in) :: in real, dimension(:,:,:), intent(out) :: out ! !$omp workshare out = in(:,ny*ipy+1:ny*(ipy+1),:) !$omp end workshare ! endsubroutine unmap_from_pencil_y_3D !*********************************************************************** subroutine unmap_from_pencil_y_4D(in, out) ! ! Unmaps pencil shaped 4D data distributed on several processors back to normal shape. ! This routine is the inverse of the remap function for nprocy>1. ! ! 13-dec-2010/Bourdin.KIS: coded ! real, dimension(:,:,:,:), intent(in) :: in real, dimension(:,:,:,:), intent(out) :: out ! !$omp workshare out = in(:,ny*ipy+1:ny*(ipy+1),:,:) !$omp end workshare ! endsubroutine unmap_from_pencil_y_4D !*********************************************************************** subroutine remap_to_pencil_z_1D(in, out,comm,lsync) ! ! Remaps data distributed on several processors into pencil shape. ! This routine remaps 1D arrays in z only for nprocz>1. ! ! 13-dec-2010/Bourdin.KIS: coded ! real, dimension(nz), intent(in) :: in real, dimension(nzgrid), intent(out) :: out ! integer :: ibox, partner integer, parameter :: ltag=102, utag=103 integer, dimension(MPI_STATUS_SIZE) :: stat real, dimension(nz) :: recv_buf integer, optional :: comm logical, optional :: lsync ! if (loptest(lsync,.true.)) then !$omp barrier endif !$omp single do ibox = 0, nprocz-1 partner = find_proc(ipx,ipy,ibox) if (iproc == partner) then ! data is local out(nz*ibox+1:nz*(ibox+1)) = in else ! communicate with partner if (iproc > partner) then ! above diagonal: send first, receive then call MPI_SEND (in, nz, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), mpierr) call MPI_RECV (recv_buf, nz, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) else ! below diagonal: receive first, send then call MPI_RECV (recv_buf, nz, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) call MPI_SEND (in, nz, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), mpierr) endif out(nz*ibox+1:nz*(ibox+1)) = recv_buf endif enddo !$omp end single if (loptest(lsync,.true.)) then !$omp barrier endif ! endsubroutine remap_to_pencil_z_1D !*********************************************************************** subroutine remap_to_pencil_z_2D(in, out,comm,lsync) ! ! Remaps data distributed on several processors into pencil shape. ! This routine remaps 2D arrays in z only for nprocz>1. ! ! 13-dec-2010/Bourdin.KIS: coded ! real, dimension(:,:), intent(in) :: in real, dimension(:,:), intent(out) :: out ! integer :: ibox, partner, nbox, alloc_err integer, parameter :: ltag=102, utag=103 integer :: ina ! size of the second dimension integer, dimension(MPI_STATUS_SIZE) :: stat real, dimension(:,:), allocatable :: recv_buf integer, optional :: comm logical, optional :: lsync ! if (loptest(lsync,.true.)) then !$omp barrier endif !$omp single ina = size (in, 2) nbox = nz*ina ! if (size (in, 1) /= nz) & call stop_fatal ('remap_to_pencil_z_2D: first dimension of input must be nz', lfirst_proc_z) if (size (out, 2) /= nzgrid) & call stop_fatal ('remap_to_pencil_z_2D: first dimension of output must be nzgrid', lfirst_proc_y) if (ina /= size (out, 2)) & call stop_fatal ('remap_to_pencil_z_2D: second dimension differs for input and output', lfirst_proc_z) ! ! Allocate memory for large arrays. allocate (recv_buf(nz,ina), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('remap_to_pencil_z_2D: Could not allocate memory for recv_buf', .true.) ! do ibox = 0, nprocz-1 partner = find_proc(ipx,ipy,ibox) if (iproc == partner) then ! data is local out(nz*ibox+1:nz*(ibox+1),:) = in else ! communicate with partner if (iproc > partner) then ! above diagonal: send first, receive then call MPI_SEND (in, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), mpierr) call MPI_RECV (recv_buf, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) else ! below diagonal: receive first, send then call MPI_RECV (recv_buf, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) call MPI_SEND (in, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), mpierr) endif out(nz*ibox+1:nz*(ibox+1),:) = recv_buf endif enddo ! deallocate (recv_buf) !$omp end single if (loptest(lsync,.true.)) then !$omp barrier endif ! endsubroutine remap_to_pencil_z_2D !*********************************************************************** subroutine remap_to_pencil_z_3D(in, out,comm,lsync) ! ! Remaps data distributed on several processors into pencil shape. ! This routine remaps 3D arrays in z only for nprocz>1. ! ! 13-dec-2010/Bourdin.KIS: coded ! real, dimension(:,:,:), intent(in) :: in real, dimension(:,:,:), intent(out) :: out ! integer :: ibox, partner, nbox, alloc_err integer, parameter :: ltag=102, utag=103 integer :: inx, iny ! size of the first and third dimension integer, dimension(MPI_STATUS_SIZE) :: stat real, dimension(:,:,:), allocatable :: recv_buf integer, optional :: comm logical, optional :: lsync ! if (loptest(lsync,.true.)) then !$omp barrier endif !$omp single inx = size (in, 1) iny = size (in, 2) nbox = inx*iny*nz ! if (inx /= size (out, 1)) & call stop_fatal ('remap_to_pencil_z_3D: first dimension differs for input and output', lfirst_proc_y) if (iny /= size (out, 2)) & call stop_fatal ('remap_to_pencil_z_3D: second dimension differs for input and output', lfirst_proc_y) ! if (size (in, 3) /= nz) & call stop_fatal ('remap_to_pencil_z_3D: third dimension of input must be nz', lfirst_proc_y) if (size (out, 3) /= nzgrid) & call stop_fatal ('remap_to_pencil_z_3D: third dimension of output must be nzgrid', lfirst_proc_y) ! allocate (recv_buf(inx,iny,nz), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('remap_to_pencil_z_3D: Could not allocate memory for recv_buf', .true.) ! do ibox = 0, nprocz-1 partner = find_proc(ipx,ipy,ibox) if (iproc == partner) then ! data is local out(:,:,nz*ibox+1:nz*(ibox+1)) = in else ! communicate with partner if (iproc > partner) then ! above diagonal: send first, receive then call MPI_SEND (in, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), mpierr) call MPI_RECV (recv_buf, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) else ! below diagonal: receive first, send then call MPI_RECV (recv_buf, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) call MPI_SEND (in, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), mpierr) endif out(:,:,nz*ibox+1:nz*(ibox+1)) = recv_buf endif enddo ! deallocate (recv_buf) !$omp end single if (loptest(lsync,.true.)) then !$omp barrier endif ! endsubroutine remap_to_pencil_z_3D !*********************************************************************** subroutine remap_to_pencil_z_4D(in, out,comm,lsync) ! ! Remaps data distributed on several processors into pencil shape. ! This routine remaps 4D arrays in z only for nprocz>1. ! ! 13-dec-2010/Bourdin.KIS: coded ! real, dimension(:,:,:,:), intent(in) :: in real, dimension(:,:,:,:), intent(out) :: out ! integer :: ibox, partner, nbox, alloc_err integer, parameter :: ltag=102, utag=103 integer :: inx, iny, ina ! size of the first, second, and fourth dimension integer, dimension(MPI_STATUS_SIZE) :: stat real, dimension(:,:,:,:), allocatable :: recv_buf integer, optional :: comm logical, optional :: lsync ! if (loptest(lsync,.true.)) then !$omp barrier endif !$omp single inx = size (in, 1) iny = size (in, 2) ina = size (in, 4) nbox = inx*iny*nz*ina ! if (inx /= size (out, 1)) & call stop_fatal ('remap_to_pencil_z_4D: first dimension differs for input and output', lfirst_proc_y) if (iny /= size (out, 2)) & call stop_fatal ('remap_to_pencil_z_4D: second dimension differs for input and output', lfirst_proc_y) if (ina /= size (out, 4)) & call stop_fatal ('remap_to_pencil_z_4D: fourth dimension differs for input and output', lfirst_proc_y) ! if (size (in, 3) /= nz) & call stop_fatal ('remap_to_pencil_z_4D: third dimension of input must be nz', lfirst_proc_y) if (size (out, 3) /= nzgrid) & call stop_fatal ('remap_to_pencil_z_4D: third dimension of output must be nzgrid', lfirst_proc_y) ! allocate (recv_buf(inx,iny,nz,ina), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('remap_to_pencil_z_4D: Could not allocate memory for recv_buf', .true.) ! do ibox = 0, nprocz-1 partner = find_proc(ipx,ipy,ibox) if (iproc == partner) then ! data is local out(:,:,nz*ibox+1:nz*(ibox+1),:) = in else ! communicate with partner if (iproc > partner) then ! above diagonal: send first, receive then call MPI_SEND (in, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), mpierr) call MPI_RECV (recv_buf, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) else ! below diagonal: receive first, send then call MPI_RECV (recv_buf, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) call MPI_SEND (in, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), mpierr) endif out(:,:,nz*ibox+1:nz*(ibox+1),:) = recv_buf endif enddo ! deallocate (recv_buf) !$omp end single if (loptest(lsync,.true.)) then !$omp barrier endif ! endsubroutine remap_to_pencil_z_4D !*********************************************************************** subroutine unmap_from_pencil_z_1D(in, out) ! ! Unmaps pencil shaped 1D data distributed on several processors back to normal shape. ! This routine is the inverse of the remap function for nprocz>1. ! ! 13-dec-2010/Bourdin.KIS: coded ! real, dimension(nzgrid), intent(in) :: in real, dimension(nz), intent(out) :: out ! !$omp workshare out = in(nz*ipz+1:nz*(ipz+1)) !$omp end workshare ! endsubroutine unmap_from_pencil_z_1D !*********************************************************************** subroutine unmap_from_pencil_z_2D(in, out) ! ! Unmaps pencil shaped 2D data distributed on several processors back to normal shape. ! This routine is the inverse of the remap function for nprocz>1. ! ! 13-dec-2010/Bourdin.KIS: coded ! real, dimension(:,:), intent(in) :: in real, dimension(:,:), intent(out) :: out ! !$omp workshare out = in(nz*ipz+1:nz*(ipz+1),:) !$omp end workshare ! endsubroutine unmap_from_pencil_z_2D !*********************************************************************** subroutine unmap_from_pencil_z_3D(in, out) ! ! Unmaps pencil shaped 3D data distributed on several processors back to normal shape. ! This routine is the inverse of the remap function for nprocz>1. ! ! 13-dec-2010/Bourdin.KIS: coded ! real, dimension(:,:,:), intent(in) :: in real, dimension(:,:,:), intent(out) :: out ! !$omp workshare out = in(:,:,nz*ipz+1:nz*(ipz+1)) !$omp end workshare ! endsubroutine unmap_from_pencil_z_3D !*********************************************************************** subroutine unmap_from_pencil_z_4D(in, out) ! ! Unmaps pencil shaped 4D data distributed on several processors back to normal shape. ! This routine is the inverse of the remap function for nprocz>1. ! ! 13-dec-2010/Bourdin.KIS: coded ! real, dimension(:,:,:,:), intent(in) :: in real, dimension(:,:,:,:), intent(out) :: out ! !$omp workshare out = in(:,:,nz*ipz+1:nz*(ipz+1),:) !$omp end workshare ! endsubroutine unmap_from_pencil_z_4D !*********************************************************************** subroutine remap_to_pencil_xy_2D(in, out,comm,lsync) ! ! Remaps data distributed on several processors into pencil shape. ! This routine remaps 2D arrays in x and y only for nprocx>1. ! ! 04-jul-2010/Bourdin.KIS: coded ! real, dimension(:,:), intent(in) :: in real, dimension(:,:), intent(out) :: out ! integer, parameter :: inx=nx, iny=ny integer, parameter :: onx=nxgrid, ony=ny/nprocx integer, parameter :: bnx=nx, bny=ny/nprocx ! transfer box sizes integer :: ibox, partner, nbox, alloc_err integer, parameter :: ltag=104, utag=105 integer, dimension(MPI_STATUS_SIZE) :: stat ! real, dimension(:,:), allocatable :: send_buf, recv_buf integer, optional :: comm logical, optional :: lsync ! if (nprocx == 1) then !$omp workshare out = in !$omp end workshare return endif if (loptest(lsync,.true.)) then !$omp barrier endif !$omp single ! nbox = bnx*bny ! if (mod (ny, nprocx) /= 0) & call stop_fatal ('remap_to_pencil_xy_2D: ny needs to be an integer multiple of nprocx', lfirst_proc_xy) ! if ((size (in, 1) /= inx) .or. ((size (in, 2) /= iny))) & call stop_fatal ('remap_to_pencil_xy_2D: input array size mismatch /= nx,ny', lfirst_proc_xy) if ((size (out, 1) /= onx) .or. ((size (out, 2) /= ony))) & call stop_fatal ('remap_to_pencil_xy_2D: output array size mismatch /= nxgrid,ny/nprocx', lfirst_proc_xy) ! allocate (send_buf(bnx,bny), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('remap_to_pencil_xy_2D: not enough memory for send_buf!', .true.) allocate (recv_buf(bnx,bny), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('remap_to_pencil_xy_2D: not enough memory for recv_buf!', .true.) ! do ibox = 0, nprocx-1 partner = find_proc(ibox,ipy,ipz) if (iproc == partner) then ! data is local out(bnx*ibox+1:bnx*(ibox+1),:) = in(:,bny*ibox+1:bny*(ibox+1)) else ! communicate with partner send_buf = in(:,bny*ibox+1:bny*(ibox+1)) if (iproc > partner) then ! above diagonal: send first, receive then call MPI_SEND (send_buf, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), mpierr) call MPI_RECV (recv_buf, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) else ! below diagonal: receive first, send then call MPI_RECV (recv_buf, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) call MPI_SEND (send_buf, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), mpierr) endif out(bnx*ibox+1:bnx*(ibox+1),:) = recv_buf endif enddo ! deallocate (send_buf, recv_buf) !$omp end single if (loptest(lsync,.true.)) then !$omp barrier endif ! endsubroutine remap_to_pencil_xy_2D !*********************************************************************** subroutine remap_to_pencil_xy_2D_other(in, out,comm,lsync) ! ! Remaps data distributed on several processors into pencil shape. ! This routine remaps 2D arrays in x and y only for nprocx>1. ! ! 04-jul-2010/Bourdin.KIS: coded ! real, dimension(:,:), intent(in) :: in real, dimension(:,:), intent(out) :: out ! integer :: nnx,nny,inx, iny, onx, ony, bnx, bny integer :: ibox, partner, nbox, alloc_err integer, parameter :: ltag=104, utag=105 integer, dimension(MPI_STATUS_SIZE) :: stat ! real, dimension(:,:), allocatable :: send_buf, recv_buf integer, optional :: comm logical, optional :: lsync ! if (nprocx == 1) then !$omp workshare out = in !$omp end workshare return endif if (loptest(lsync,.true.)) then !$omp barrier endif !$omp single nnx=size(in,1) ; nny=size(in,2) inx=nnx ; iny=nny onx=nprocx*nnx ; ony=nny/nprocx bnx=nnx ; bny=nny/nprocx ! transfer box sizes ! ! nbox = bnx*bny ! if (mod (nny, nprocx) /= 0) & call stop_fatal ('remap_to_pencil_xy_2D_other: nny needs to be an integer multiple of nprocx', lfirst_proc_xy) ! if ((size (in, 1) /= inx) .or. ((size (in, 2) /= iny))) & call stop_fatal ('remap_to_pencil_xy_2D_other: input array size mismatch /= nx,ny', lfirst_proc_xy) if ((size (out, 1) /= onx) .or. ((size (out, 2) /= ony))) & call stop_fatal ('remap_to_pencil_xy_2D_other: output array size mismatch /= nxgrid,ny/nprocx', lfirst_proc_xy) ! allocate (send_buf(bnx,bny), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('remap_to_pencil_xy_2D_other: not enough memory for send_buf!', .true.) allocate (recv_buf(bnx,bny), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('remap_to_pencil_xy_2D_other: not enough memory for recv_buf!', .true.) ! do ibox = 0, nprocx-1 partner = find_proc(ibox,ipy,ipz) if (iproc == partner) then ! data is local out(bnx*ibox+1:bnx*(ibox+1),:) = in(:,bny*ibox+1:bny*(ibox+1)) else ! communicate with partner send_buf = in(:,bny*ibox+1:bny*(ibox+1)) if (iproc > partner) then ! above diagonal: send first, receive then call MPI_SEND (send_buf, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), mpierr) call MPI_RECV (recv_buf, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) else ! below diagonal: receive first, send then call MPI_RECV (recv_buf, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) call MPI_SEND (send_buf, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), mpierr) endif out(bnx*ibox+1:bnx*(ibox+1),:) = recv_buf endif enddo ! deallocate (send_buf, recv_buf) !$omp end single if (loptest(lsync,.true.)) then !$omp barrier endif ! endsubroutine remap_to_pencil_xy_2D_other !*********************************************************************** subroutine remap_to_pencil_xy_3D(in, out,comm,lsync) ! ! Remaps data distributed on several processors into pencil shape. ! This routine remaps 3D arrays in x and y only for nprocx>1. ! ! 14-jul-2010/Bourdin.KIS: coded ! 13-sep-2014/ccyang: revamped to accommodate either with or without ghost cells. ! real, dimension(:,:,:), intent(in) :: in real, dimension(:,:,:), intent(out) :: out ! integer, parameter :: bnx = nx, bny = ny / nprocx integer, parameter :: ltag = 104, utag = 105 real, dimension(:,:,:), allocatable :: send_buf, recv_buf integer, dimension(MPI_STATUS_SIZE) :: stat integer :: inx, iny, inz integer :: onx, ony, onz integer :: ibox, partner, nbox, alloc_err integer :: ngc integer, optional :: comm logical, optional :: lsync ! ! No need to remap if nprocx = 1. ! if (nprocx == 1) then !$omp workshare out = in !$omp end workshare return endif if (loptest(lsync,.true.)) then !$omp barrier endif !$omp single ! ! Check the dimensions. ! if (mod(ny, nprocx) /= 0) & call stop_fatal('remap_to_pencil_xy_3D: ny needs to be an integer multiple of nprocx', lfirst_proc_xy) ! inx = size(in, 1) iny = size(in, 2) dim: if (inx == nx .and. iny == ny) then onx = nxgrid ony = ny / nprocx ngc = 0 elseif (inx == mx .and. iny == my) then dim onx = mxgrid ony = ny / nprocx + 2 * nghost ngc = nghost else dim call stop_fatal('remap_to_pencil_xy_3D: input array size mismatch', lfirst_proc_xy) endif dim ! inz = size(in, 3) onz = size(out, 3) nbox = (bnx + 2 * ngc) * (bny + 2 * ngc) * onz ! if (size(out,1) /= onx .or. size(out,2) /= ony) & call stop_fatal('remap_to_pencil_xy_3D: output array size mismatch', lfirst_proc_xy) ! if (inz /= onz) call stop_fatal('remap_to_pencil_xy_3D: sizes differ in the z direction', lfirst_proc_xy) ! ! Allocate working arrays. ! allocate(send_buf(bnx+2*ngc,bny+2*ngc,inz), recv_buf(bnx+2*ngc,bny+2*ngc,inz), stat=alloc_err) if (alloc_err > 0) call stop_fatal('remap_to_pencil_xy_3D: allocation failed. ', .true.) ! ! Communicate. ! box: do ibox = 0, nprocx - 1 partner = find_proc(ibox,ipy,ipz) local: if (iproc == partner) then ! data is local recv_buf = in(:,bny*ibox+1:bny*(ibox+1)+2*ngc,:) else local ! communicate with partner send_buf = in(:,bny*ibox+1:bny*(ibox+1)+2*ngc,:) commun: if (iproc > partner) then ! above diagonal: send first, receive then call MPI_SEND(send_buf, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), mpierr) call MPI_RECV(recv_buf, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) else commun ! below diagonal: receive first, send then call MPI_RECV(recv_buf, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) call MPI_SEND(send_buf, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), mpierr) endif commun endif local out(ngc+bnx*ibox+1:ngc+bnx*(ibox+1),:,:) = recv_buf(ngc+1:ngc+bnx,:,:) ghost: if (ngc > 0) then if (ibox == 0) out(1:ngc,:,:) = recv_buf(1:ngc,:,:) if (ibox == nprocx - 1) out(onx-ngc+1:onx,:,:) = recv_buf(ngc+bnx+1:bnx+2*ngc,:,:) endif ghost enddo box ! ! Deallocate working arrays. ! deallocate (send_buf, recv_buf) !$omp end single if (loptest(lsync,.true.)) then !$omp barrier endif ! endsubroutine remap_to_pencil_xy_3D !*********************************************************************** subroutine remap_to_pencil_xy_4D(in, out,comm,lsync) ! ! Remaps data distributed on several processors into pencil shape. ! This routine remaps 4D arrays in x and y only for nprocx>1. ! ! 14-jul-2010/Bourdin.KIS: coded ! real, dimension(:,:,:,:), intent(in) :: in real, dimension(:,:,:,:), intent(out) :: out ! integer, parameter :: inx=nx, iny=ny integer, parameter :: onx=nxgrid, ony=ny/nprocx integer :: inz, ina, onz, ona ! sizes of in and out arrays integer, parameter :: bnx=nx, bny=ny/nprocx ! transfer box sizes integer :: ibox, partner, nbox, alloc_err integer, parameter :: ltag=104, utag=105 integer, dimension(MPI_STATUS_SIZE) :: stat ! real, dimension(:,:,:,:), allocatable :: send_buf, recv_buf integer, optional :: comm logical, optional :: lsync ! if (nprocx == 1) then !$omp workshare out = in !$omp end workshare return endif if (loptest(lsync,.true.)) then !$omp barrier endif !$omp single ! inz = size (in, 3) ina = size (in, 4) onz = size (out, 3) ona = size (out, 4) nbox = bnx*bny*onz*ona ! if (mod (ny, nprocx) /= 0) & call stop_fatal ('remap_to_pencil_xy_4D: ny needs to be an integer multiple of nprocx', lfirst_proc_xy) ! if ((size (in, 1) /= inx) .or. ((size (in, 2) /= iny))) & call stop_fatal ('remap_to_pencil_xy_4D: input array size mismatch /= nx,ny', lfirst_proc_xy) if ((size (out, 1) /= onx) .or. ((size (out, 2) /= ony))) & call stop_fatal ('remap_to_pencil_xy_4D: output array size mismatch /= nxgrid,ny/nprocx', lfirst_proc_xy) if (inz /= onz) & call stop_fatal ('remap_to_pencil_xy_4D: inz/=onz - sizes differ in the z direction', lfirst_proc_xy) if (ina /= ona) & call stop_fatal ('remap_to_pencil_xy_4D: ina/=ona - sizes differ in the 4th dimension', lfirst_proc_xy) ! allocate (send_buf(bnx,bny,onz,ona), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('remap_to_pencil_xy_4D: not enough memory for send_buf!', .true.) allocate (recv_buf(bnx,bny,onz,ona), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('remap_to_pencil_xy_4D: not enough memory for recv_buf!', .true.) ! do ibox = 0, nprocx-1 partner = find_proc(ibox,ipy,ipz) if (iproc == partner) then ! data is local out(bnx*ibox+1:bnx*(ibox+1),:,:,:) = in(:,bny*ibox+1:bny*(ibox+1),:,:) else ! communicate with partner send_buf = in(:,bny*ibox+1:bny*(ibox+1),:,:) if (iproc > partner) then ! above diagonal: send first, receive then call MPI_SEND (send_buf, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), mpierr) call MPI_RECV (recv_buf, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) else ! below diagonal: receive first, send then call MPI_RECV (recv_buf, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) call MPI_SEND (send_buf, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), mpierr) endif out(bnx*ibox+1:bnx*(ibox+1),:,:,:) = recv_buf endif enddo ! deallocate (send_buf, recv_buf) !$omp end single if (loptest(lsync,.true.)) then !$omp barrier endif ! endsubroutine remap_to_pencil_xy_4D !*********************************************************************** subroutine unmap_from_pencil_xy_2D(in, out,comm,lsync) ! ! Unmaps pencil shaped 2D data distributed on several processors back to normal shape. ! This routine is the inverse of the remap function for nprocx>1. ! ! 4-jul-2010/Bourdin.KIS: coded ! real, dimension(:,:), intent(in) :: in real, dimension(:,:), intent(out) :: out ! integer, parameter :: inx=nxgrid, iny=ny/nprocx integer, parameter :: onx=nx, ony=ny integer, parameter :: bnx=nx, bny=ny/nprocx ! transfer box sizes integer :: ibox, partner, nbox, alloc_err integer, parameter :: ltag=106, utag=107 integer, dimension(MPI_STATUS_SIZE) :: stat ! real, dimension(:,:), allocatable :: send_buf, recv_buf integer, optional :: comm logical, optional :: lsync ! if (nprocx == 1) then !$omp workshare out = in !$omp end workshare return endif if (loptest(lsync,.true.)) then !$omp barrier endif !$omp single ! nbox = bnx*bny ! if (mod (ny, nprocx) /= 0) & call stop_fatal ('unmap_from_pencil_xy_2D: ny needs to be an integer multiple of nprocx', lfirst_proc_xy) ! if ((size (in, 1) /= inx) .or. ((size (in, 2) /= iny))) & call stop_fatal ('unmap_from_pencil_xy_2D: input array size mismatch /= nxgrid,ny/nprocx', lfirst_proc_xy) if ((size (out, 1) /= onx) .or. ((size (out, 2) /= ony))) & call stop_fatal ('unmap_from_pencil_xy_2D: output array size mismatch /= nx,ny', lfirst_proc_xy) ! allocate (send_buf(bnx,bny), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('unmap_from_pencil_xy_2D: not enough memory for send_buf!', .true.) allocate (recv_buf(bnx,bny), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('unmap_from_pencil_xy_2D: not enough memory for recv_buf!', .true.) ! do ibox = 0, nprocx-1 partner = find_proc(ibox,ipy,ipz) if (iproc == partner) then ! data is local out(:,bny*ibox+1:bny*(ibox+1)) = in(bnx*ibox+1:bnx*(ibox+1),:) else ! communicate with partner send_buf = in(bnx*ibox+1:bnx*(ibox+1),:) if (iproc > partner) then ! above diagonal: send first, receive then call MPI_SEND (send_buf, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), mpierr) call MPI_RECV (recv_buf, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) else ! below diagonal: receive first, send then call MPI_RECV (recv_buf, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) call MPI_SEND (send_buf, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), mpierr) endif out(:,bny*ibox+1:bny*(ibox+1)) = recv_buf endif enddo ! deallocate (send_buf, recv_buf) !$omp end single if (loptest(lsync,.true.)) then !$omp barrier endif ! endsubroutine unmap_from_pencil_xy_2D !*********************************************************************** subroutine unmap_from_pencil_xy_2D_other(in, out,comm,lsync) ! ! Unmaps pencil shaped 2D data distributed on several processors back to normal shape. ! This routine is the inverse of the remap function for nprocx>1. ! ! 4-jul-2010/Bourdin.KIS: coded ! real, dimension(:,:), intent(in) :: in real, dimension(:,:), intent(out) :: out ! integer :: nnx,nny,inx, iny, onx, ony, bnx, bny, nxgrid_other, nygrid_other integer :: ibox, partner, nbox, alloc_err integer, parameter :: ltag=106, utag=107 integer, dimension(MPI_STATUS_SIZE) :: stat ! real, dimension(:,:), allocatable :: send_buf, recv_buf integer, optional :: comm logical, optional :: lsync if (nprocx == 1) then !$omp workshare out = in !$omp end workshare return endif ! if (loptest(lsync,.true.)) then !$omp barrier endif !$omp single if (nxgrid/=nygrid) & call stop_fatal("unmap_from_pencil_xy_2D_other: this subroutine works only for nxgrid==nygrid",lfirst_proc_xy) ! nxgrid_other=size(in,1) ; nygrid_other=nxgrid_other nnx=nxgrid_other/nprocx ; nny=nygrid_other/nprocy inx=nxgrid_other ; iny=nny/nprocx onx=nnx ; ony=nny bnx=nnx ; bny=nny/nprocx ! transfer box sizes ! nbox = bnx*bny ! if (mod (nny, nprocx) /= 0) & call stop_fatal ('unmap_from_pencil_xy_2D_other: nny needs to be an integer multiple of nprocx', lfirst_proc_xy) ! if ((size (in, 1) /= inx) .or. ((size (in, 2) /= iny))) & call stop_fatal ('unmap_from_pencil_xy_2D_other: input array size mismatch /= nxgrid_other,nny/nprocx', lfirst_proc_xy) if ((size (out, 1) /= onx) .or. ((size (out, 2) /= ony))) & call stop_fatal ('unmap_from_pencil_xy_2D_other: output array size mismatch /= nnx,nny', lfirst_proc_xy) ! allocate (send_buf(bnx,bny), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('unmap_from_pencil_xy_2D_other: not enough memory for send_buf!', .true.) allocate (recv_buf(bnx,bny), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('unmap_from_pencil_xy_2D_other: not enough memory for recv_buf!', .true.) ! do ibox = 0, nprocx-1 partner = find_proc(ibox,ipy,ipz) if (iproc == partner) then ! data is local out(:,bny*ibox+1:bny*(ibox+1)) = in(bnx*ibox+1:bnx*(ibox+1),:) else ! communicate with partner send_buf = in(bnx*ibox+1:bnx*(ibox+1),:) if (iproc > partner) then ! above diagonal: send first, receive then call MPI_SEND (send_buf, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), mpierr) call MPI_RECV (recv_buf, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) else ! below diagonal: receive first, send then call MPI_RECV (recv_buf, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) call MPI_SEND (send_buf, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), mpierr) endif out(:,bny*ibox+1:bny*(ibox+1)) = recv_buf endif enddo ! deallocate (send_buf, recv_buf) !$omp end single if (loptest(lsync,.true.)) then !$omp barrier endif ! endsubroutine unmap_from_pencil_xy_2D_other !*********************************************************************** subroutine unmap_from_pencil_xy_3D(in, out,comm,lsync) ! ! Unmaps pencil shaped 3D data distributed on several processors back to normal shape. ! This routine is the inverse of the remap function for nprocx>1. ! ! 14-jul-2010/Bourdin.KIS: coded ! 14-sep-2014/ccyang: revamped to accommodate either with or without ghost cells. ! real, dimension(:,:,:), intent(in) :: in real, dimension(:,:,:), intent(out) :: out ! integer, parameter :: nypx = ny / nprocx integer, parameter :: bnx = nx, bny = nypx integer, parameter :: ltag = 106, utag = 107 real, dimension(:,:,:), allocatable :: send_buf, recv_buf integer, dimension(MPI_STATUS_SIZE) :: stat integer :: inx, iny, inz integer :: onx, ony, onz integer :: ibox, partner, nbox, alloc_err integer :: ngc integer, optional :: comm logical, optional :: lsync ! ! No need to unmap if nprocx = 1. ! if (nprocx == 1) then !$omp workshare out = in !$omp end workshare return endif if (loptest(lsync,.true.)) then !$omp barrier endif !$omp single ! ! Check the dimensions. ! if (mod(ny, nprocx) /= 0) & call stop_fatal('unmap_from_pencil_xy_3D: ny needs to be an integer multiple of nprocx', lfirst_proc_xy) ! inx = size(in, 1) iny = size(in, 2) dim: if (inx == nxgrid .and. iny == nypx) then onx = nx ony = ny ngc = 0 elseif (inx == mxgrid .and. iny == nypx + 2 * nghost) then dim onx = mx ony = my ngc = nghost else dim call stop_fatal('unmap_from_pencil_xy_3D: input array size mismatch', lfirst_proc_xy) endif dim ! if (size(out, 1) /= onx .or. size(out, 2) /= ony) & call stop_fatal('unmap_from_pencil_xy_3D: output array size mismatch', lfirst_proc_xy) ! inz = size(in, 3) onz = size(out, 3) if (inz /= onz) call stop_fatal('unmap_from_pencil_xy_3D: sizes differ in the z direction', lfirst_proc_xy) ! nbox = (bnx + 2 * ngc) * (bny + 2 * ngc) * onz ! ! Allocate working arrays. ! allocate(send_buf(bnx+2*ngc,bny+2*ngc,inz), recv_buf(bnx+2*ngc,bny+2*ngc,inz), stat=alloc_err) if (alloc_err > 0) call stop_fatal('unmap_from_pencil_xy_3D: allocation failed. ', .true.) ! ! Communicate. ! box: do ibox = 0, nprocx - 1 partner = find_proc(ibox,ipy,ipz) local: if (iproc == partner) then ! data is local recv_buf = in(bnx*ibox+1:bnx*(ibox+1)+2*ngc,:,:) else local ! communicate with partner send_buf = in(bnx*ibox+1:bnx*(ibox+1)+2*ngc,:,:) commun: if (iproc > partner) then ! above diagonal: send first, receive then call MPI_SEND(send_buf, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), mpierr) call MPI_RECV(recv_buf, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) else commun ! below diagonal: receive first, send then call MPI_RECV(recv_buf, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) call MPI_SEND(send_buf, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), mpierr) endif commun endif local out(:,ngc+bny*ibox+1:ngc+bny*(ibox+1),:) = recv_buf(:,ngc+1:ngc+bny,:) ghost: if (ngc > 0) then if (ibox == 0) out(:,1:ngc,:) = recv_buf(:,1:ngc,:) if (ibox == nprocx - 1) out(:,ony-ngc+1:ony,:) = recv_buf(:,ngc+bny+1:bny+2*ngc,:) endif ghost enddo box ! ! Deallocate working arrays. ! deallocate(send_buf, recv_buf) !$omp end single if (loptest(lsync,.true.)) then !$omp barrier endif ! endsubroutine unmap_from_pencil_xy_3D !*********************************************************************** subroutine unmap_from_pencil_xy_4D(in, out,comm,lsync) ! ! Unmaps pencil shaped 4D data distributed on several processors back to normal shape. ! This routine is the inverse of the remap function for nprocx>1. ! ! 14-jul-2010/Bourdin.KIS: coded ! real, dimension(:,:,:,:), intent(in) :: in real, dimension(:,:,:,:), intent(out) :: out ! integer, parameter :: inx=nxgrid, iny=ny/nprocx integer, parameter :: onx=nx, ony=ny integer :: inz, ina, onz, ona ! sizes of in and out arrays integer, parameter :: bnx=nx, bny=ny/nprocx ! transfer box sizes integer :: ibox, partner, nbox, alloc_err integer, parameter :: ltag=106, utag=107 integer, dimension(MPI_STATUS_SIZE) :: stat ! real, dimension(:,:,:,:), allocatable :: send_buf, recv_buf integer, optional :: comm logical, optional :: lsync ! if (nprocx == 1) then !$omp workshare out = in !$omp end workshare return endif if (loptest(lsync,.true.)) then !$omp barrier endif !$omp single ! inz = size (in, 3) ina = size (in, 4) onz = size (out, 3) ona = size (out, 4) nbox = bnx*bny*onz*ona ! if (mod (ny, nprocx) /= 0) & call stop_fatal ('unmap_from_pencil_xy_4D: ny needs to be an integer multiple of nprocx', lfirst_proc_xy) ! if ((size (in, 1) /= inx) .or. ((size (in, 2) /= iny))) & call stop_fatal ('unmap_from_pencil_xy_4D: input array size mismatch /= nxgrid,ny/nprocx', lfirst_proc_xy) if ((size (out, 1) /= onx) .or. ((size (out, 2) /= ony))) & call stop_fatal ('unmap_from_pencil_xy_4D: output array size mismatch /= nx,ny', lfirst_proc_xy) if (inz /= onz) & call stop_fatal ('unmap_from_pencil_xy_4D: inz/=onz - sizes differ in the z direction', lfirst_proc_xy) if (ina /= ona) & call stop_fatal ('unmap_from_pencil_xy_4D: ina/=ona - sizes differ in the 4th dimension', lfirst_proc_xy) ! allocate (send_buf(bnx,bny,onz,ona), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('unmap_from_pencil_xy_4D: not enough memory for send_buf!', .true.) allocate (recv_buf(bnx,bny,onz,ona), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('unmap_from_pencil_xy_4D: not enough memory for recv_buf!', .true.) ! do ibox = 0, nprocx-1 partner = find_proc(ibox,ipy,ipz) if (iproc == partner) then ! data is local out(:,bny*ibox+1:bny*(ibox+1),:,:) = in(bnx*ibox+1:bnx*(ibox+1),:,:,:) else ! communicate with partner send_buf = in(bnx*ibox+1:bnx*(ibox+1),:,:,:) if (iproc > partner) then ! above diagonal: send first, receive then call MPI_SEND (send_buf, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), mpierr) call MPI_RECV (recv_buf, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) else ! below diagonal: receive first, send then call MPI_RECV (recv_buf, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) call MPI_SEND (send_buf, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), mpierr) endif out(:,bny*ibox+1:bny*(ibox+1),:,:) = recv_buf endif enddo ! deallocate (send_buf, recv_buf) !$omp end single if (loptest(lsync,.true.)) then !$omp barrier endif ! endsubroutine unmap_from_pencil_xy_4D !*********************************************************************** subroutine transp_pencil_xy_2D(in, out,comm,lsync) ! ! Transpose 2D data distributed on several processors. ! This routine transposes arrays in x and y only. ! The data must be mapped in pencil shape, especially for nprocx>1. ! ! 4-jul-2010/Bourdin.KIS: coded, adapted parts of transp_xy ! 21-jun-2013/Bourdin.KIS: reworked, parellized MPI communication ! use General, only: count_bits ! real, dimension(:,:), intent(in) :: in real, dimension(:,:), intent(out) :: out ! integer :: inx, iny, onx, ony ! sizes of in and out arrays integer :: bnx, bny, nbox ! destination box sizes and number of elements integer :: num_it ! number of iterations for box-data transfer integer :: it, ibox, partner, alloc_err integer, parameter :: ltag=108, utag=109 integer, dimension(MPI_STATUS_SIZE) :: stat ! real, dimension(:,:), allocatable :: send_buf, recv_buf integer, optional :: comm logical, optional :: lsync ! if (loptest(lsync,.true.)) then !$omp barrier endif !$omp single inx = size (in, 1) iny = size (in, 2) onx = size (out, 1) ony = size (out, 2) bnx = onx/nprocxy bny = ony nbox = bnx*bny ! if (mod (onx, nprocxy) /= 0) & call stop_fatal ('transp_pencil_xy_2D: onx needs to be an integer multiple of nprocxy', lfirst_proc_xy) ! if ((inx /= bny*nprocxy) .or. (iny /= bnx)) & call stop_fatal ('transp_pencil_xy_2D: input array has unmatching size', lfirst_proc_xy) if ((onx /= bnx*nprocxy) .or. (ony /= bny)) & call stop_fatal ('transp_pencil_xy_2D: output array has unmatching size', lfirst_proc_xy) ! allocate (send_buf(bnx,bny), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('transp_pencil_xy_2D: not enough memory for send_buf!', .true.) allocate (recv_buf(bnx,bny), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('transp_pencil_xy_2D: not enough memory for recv_buf!', .true.) ! num_it = 2**count_bits (nprocxy-1) do it = 0, num_it-1 ibox = IEOR ((iproc - ipz*nprocxy), it) partner = ipz*nprocxy + ibox if (ibox >= nprocxy) cycle if (iproc == partner) then ! data is local out(bnx*ibox+1:bnx*(ibox+1),:) = transpose (in(bny*ibox+1:bny*(ibox+1),:)) else ! communicate with partner send_buf = transpose (in(bny*ibox+1:bny*(ibox+1),:)) if (iproc > partner) then ! above diagonal: send first, receive then call MPI_SEND (send_buf, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), mpierr) call MPI_RECV (recv_buf, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) else ! below diagonal: receive first, send then call MPI_RECV (recv_buf, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) call MPI_SEND (send_buf, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), mpierr) endif out(bnx*ibox+1:bnx*(ibox+1),:) = recv_buf endif enddo ! deallocate (send_buf, recv_buf) !$omp end single if (loptest(lsync,.true.)) then !$omp barrier endif ! endsubroutine transp_pencil_xy_2D !*********************************************************************** subroutine transp_pencil_xy_3D(in, out, lghost,comm,lsync) ! ! Transpose 3D data distributed on several processors. ! This routine transposes arrays in x and y only. ! The data must be mapped in pencil shape, especially for nprocx>1. ! ! 14-jul-2010/Bourdin.KIS: coded, adapted parts of transp_xy ! 21-jun-2013/Bourdin.KIS: reworked, parellized MPI communication ! 15-sep-2014/ccyang: revamped to accommodate arrays either with or without ghost cells ! use General, only: count_bits ! real, dimension(:,:,:), intent(in) :: in real, dimension(:,:,:), intent(out) :: out logical, intent(in), optional :: lghost ! integer, parameter :: ltag = 108, utag = 109 real, dimension(:,:,:), allocatable :: send_buf, recv_buf integer, dimension(MPI_STATUS_SIZE) :: stat integer :: inx, iny, inz, onx, ony, onz ! sizes of in and out arrays integer :: bnx, bny, nbox ! destination box sizes and number of elements integer :: ibox, partner, alloc_err, iz, ngc integer, optional :: comm logical, optional :: lsync ! ! Check if ghost cells are included. ! if (loptest(lsync,.true.)) then !$omp barrier endif !$omp single ngc = 0 if (present(lghost)) then if (lghost) ngc = nghost endif ! ! Check the dimensions. ! inx = size(in, 1) iny = size(in, 2) inz = size(in, 3) onx = size(out, 1) ony = size(out, 2) onz = size(out, 3) ! bnx = (inx - 2 * ngc) / nprocxy bny = iny - 2 * ngc ! if ((inx /= bnx * nprocxy + 2 * ngc) .or. (iny /= bny + 2 * ngc)) & call stop_fatal('transp_pencil_xy_3D: input array has unmatching shape', lfirst_proc_xy) if ((onx /= bny * nprocxy + 2 * ngc) .or. (ony /= bnx + 2 * ngc)) & call stop_fatal('transp_pencil_xy_3D: output array has unmatching shape', lfirst_proc_xy) if (inz /= onz) call stop_fatal('transp_pencil_xy_3D: sizes differ in the z direction', lfirst_proc_xy) ! nbox = (bnx + 2 * ngc) * (bny + 2 * ngc) * onz ! ! Allocate working arrays. ! allocate (send_buf(bnx+2*ngc,bny+2*ngc,onz), recv_buf(bnx+2*ngc,bny+2*ngc,onz), stat=alloc_err) if (alloc_err > 0) call stop_fatal('transp_pencil_xy_3D: allocation failed. ', .true.) ! ! Communicate. ! box: do ibox = 0, nprocxy - 1 partner = find_proc(modulo(ibox,nprocx),ibox/nprocx,ipz) local: if (iproc == partner) then ! data is local recv_buf = in(bnx*ibox+1:bnx*(ibox+1)+2*ngc,:,:) else local ! communicate with partner send_buf = in(bnx*ibox+1:bnx*(ibox+1)+2*ngc,:,:) if (iproc > partner) then ! above diagonal: send first, receive then call MPI_SEND(send_buf, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), mpierr) call MPI_RECV(recv_buf, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) else call MPI_RECV(recv_buf, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) call MPI_SEND(send_buf, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), mpierr) endif endif local do iz = 1, inz out(ngc+bny*ibox+1:ngc+bny*(ibox+1),:,iz) = transpose(recv_buf(:,ngc+1:ngc+bny,iz)) if (ngc > 0) then if (ibox == 0) out(1:ngc,:,iz) = transpose(recv_buf(:,1:ngc,iz)) if (ibox == nprocxy - 1) out(onx-ngc+1:onx,:,iz) = transpose(recv_buf(:,ngc+bny+1:bny+2*ngc,iz)) endif enddo enddo box ! ! Deallocate working arrays. ! deallocate (send_buf, recv_buf) !$omp end single if (loptest(lsync,.true.)) then !$omp barrier endif ! endsubroutine transp_pencil_xy_3D !*********************************************************************** subroutine transp_pencil_xy_4D(in, out,comm,lsync) ! ! Transpose 4D data distributed on several processors. ! This routine transposes arrays in x and y only. ! The data must be mapped in pencil shape, especially for nprocx>1. ! ! 14-jul-2010/Bourdin.KIS: coded, adapted parts of transp_xy ! 21-jun-2013/Bourdin.KIS: reworked, parellized MPI communication ! use General, only: count_bits ! real, dimension(:,:,:,:), intent(in) :: in real, dimension(:,:,:,:), intent(out) :: out ! integer :: inx, iny, inz, ina, onx, ony, onz, ona ! sizes of in and out arrays integer :: bnx, bny, nbox ! destination box sizes and number of elements integer :: num_it ! number of iterations for box-data transfer integer :: it, ibox, partner, alloc_err, pos_z, pos_a integer, parameter :: ltag=108, utag=109 integer, dimension(MPI_STATUS_SIZE) :: stat ! real, dimension(:,:,:,:), allocatable :: send_buf, recv_buf integer, optional :: comm logical, optional :: lsync ! if (loptest(lsync,.true.)) then !$omp barrier endif !$omp single inx = size (in, 1) iny = size (in, 2) inz = size (in, 3) ina = size (in, 4) onx = size (out, 1) ony = size (out, 2) onz = size (out, 3) ona = size (out, 4) bnx = onx/nprocxy bny = ony nbox = bnx*bny*onz*ona ! if (mod (onx, nprocxy) /= 0) & call stop_fatal ('transp_pencil_xy_4D: onx needs to be an integer multiple of nprocxy', lfirst_proc_xy) ! if ((inx /= bny*nprocxy) .or. (iny /= bnx)) & call stop_fatal ('transp_pencil_xy_4D: input array has unmatching size', lfirst_proc_xy) if ((onx /= bnx*nprocxy) .or. (ony /= bny)) & call stop_fatal ('transp_pencil_xy_4D: output array has unmatching size', lfirst_proc_xy) if (inz /= onz) & call stop_fatal ('transp_pencil_xy_4D: inz/=onz - sizes differ in the z direction', lfirst_proc_xy) if (ina /= ona) & call stop_fatal ('transp_pencil_xy_4D: ina/=ona - sizes differ in the 4th dimension', lfirst_proc_xy) ! allocate (send_buf(bnx,bny,onz,ona), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('transp_pencil_xy_4D: not enough memory for send_buf!', .true.) allocate (recv_buf(bnx,bny,onz,ona), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('transp_pencil_xy_4D: not enough memory for recv_buf!', .true.) ! num_it = 2**count_bits (nprocxy-1) do it = 0, num_it-1 ibox = IEOR ((iproc - ipz*nprocxy), it) partner = ipz*nprocxy + ibox if (ibox >= nprocxy) cycle if (iproc == partner) then ! data is local do pos_z = 1, onz do pos_a = 1, ona out(bnx*ibox+1:bnx*(ibox+1),:,pos_z,pos_a) = transpose(in(bny*ibox+1:bny*(ibox+1),:,pos_z,pos_a)) enddo enddo else ! communicate with partner do pos_z = 1, onz do pos_a = 1, ona send_buf(:,:,pos_z,pos_a) = transpose(in(bny*ibox+1:bny*(ibox+1),:,pos_z,pos_a)) enddo enddo if (iproc > partner) then ! above diagonal: send first, receive then call MPI_SEND (send_buf, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), mpierr) call MPI_RECV (recv_buf, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) else ! below diagonal: receive first, send then call MPI_RECV (recv_buf, nbox, mpi_precision, partner, utag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) call MPI_SEND (send_buf, nbox, mpi_precision, partner, ltag, ioptest(comm,MPI_COMM_GRID), mpierr) endif out(bnx*ibox+1:bnx*(ibox+1),:,:,:) = recv_buf endif enddo ! deallocate (send_buf, recv_buf) !$omp end single if (loptest(lsync,.true.)) then !$omp barrier endif ! endsubroutine transp_pencil_xy_4D !*********************************************************************** subroutine remap_to_pencil_yz_3D(in, out,comm,lsync) ! ! Remaps data distributed on several processors into z-pencil shape. ! This routine remaps 3D arrays in y and z only for nprocz>1. ! ! 27-oct-2010/Bourdin.KIS: coded ! real, dimension(:,:,:), intent(in) :: in real, dimension(:,:,:), intent(out) :: out ! integer, parameter :: iny=ny, inz=nz integer, parameter :: ony=ny/nprocz, onz=nzgrid integer, parameter :: bny=ny/nprocz, bnz=nz ! transfer box sizes integer :: inx, onx ! sizes of in and out arrays integer :: ibox, partner, nbox, alloc_err integer, parameter :: ytag=110 integer, dimension(MPI_STATUS_SIZE) :: stat ! real, dimension(:,:,:), allocatable :: send_buf, recv_buf integer, optional :: comm logical, optional :: lsync ! if (nprocz == 1) then !$omp workshare out = in !$omp end workshare return endif if (loptest(lsync,.true.)) then !$omp barrier endif !$omp single ! inx = size (in, 1) onx = size (out, 1) nbox = onx*bny*bnz ! if (mod (ny, nprocz) /= 0) & call stop_fatal ('remap_to_pencil_yz_3D: ny needs to be an integer multiple of nprocz', lfirst_proc_yz) ! if ((size (in, 2) /= iny) .or. ((size (in, 3) /= inz))) & call stop_fatal ('remap_to_pencil_yz_3D: input array size mismatch /= ny,nz', lfirst_proc_yz) if ((size (out, 2) /= ony) .or. ((size (out, 3) /= onz))) & call stop_fatal ('remap_to_pencil_yz_3D: output array size mismatch /= ny/nprocz,nzgrid', lfirst_proc_yz) if (inx /= onx) & call stop_fatal ('remap_to_pencil_yz_3D: inx/=onx - sizes differ in the x direction', lfirst_proc_yz) ! allocate (send_buf(onx,bny,bnz), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('remap_to_pencil_yz_3D: not enough memory for send_buf!', .true.) allocate (recv_buf(onx,bny,bnz), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('remap_to_pencil_yz_3D: not enough memory for recv_buf!', .true.) ! do ibox = 0, nprocz-1 partner = find_proc(ipx,ipy,ibox) if (iproc == partner) then ! data is local out(:,:,bnz*ibox+1:bnz*(ibox+1)) = in(:,bny*ibox+1:bny*(ibox+1),:) else ! communicate with partner send_buf = in(:,bny*ibox+1:bny*(ibox+1),:) if (iproc > partner) then ! above diagonal: send first, receive then call MPI_SEND (send_buf, nbox, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), mpierr) call MPI_RECV (recv_buf, nbox, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) else ! below diagonal: receive first, send then call MPI_RECV (recv_buf, nbox, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) call MPI_SEND (send_buf, nbox, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), mpierr) endif ! out(:,:,bnz*ibox+1:bnz*(ibox+1)) = recv_buf endif enddo ! deallocate (send_buf, recv_buf) !$omp end single if (loptest(lsync,.true.)) then !$omp barrier endif ! endsubroutine remap_to_pencil_yz_3D !*********************************************************************** subroutine remap_to_pencil_yz_4D(in, out,comm,lsync) ! ! Remaps data distributed on several processors into z-pencil shape. ! This routine remaps 4D arrays in y and z only for nprocz>1. ! ! 27-oct-2010/Bourdin.KIS: coded ! real, dimension(:,:,:,:), intent(in) :: in real, dimension(:,:,:,:), intent(out) :: out ! integer, parameter :: iny=ny, inz=nz integer, parameter :: ony=ny/nprocz, onz=nzgrid integer, parameter :: bny=ny/nprocz, bnz=nz ! transfer box sizes integer :: inx, ina, onx, ona ! sizes of in and out arrays integer :: ibox, partner, nbox, alloc_err integer, parameter :: ytag=110 integer, dimension(MPI_STATUS_SIZE) :: stat ! real, dimension(:,:,:,:), allocatable :: send_buf, recv_buf integer, optional :: comm logical, optional :: lsync ! if (nprocz == 1) then !$omp workshare out = in !$omp end workshare return endif if (loptest(lsync,.true.)) then !$omp barrier endif !$omp single ! inx = size (in, 1) ina = size (in, 4) onx = size (out, 1) ona = size (out, 4) nbox = onx*bny*bnz*ona ! if (mod (ny, nprocz) /= 0) & call stop_fatal ('remap_to_pencil_yz_4D: ny needs to be an integer multiple of nprocz', lfirst_proc_yz) ! if ((size (in, 2) /= iny) .or. ((size (in, 3) /= inz))) & call stop_fatal ('remap_to_pencil_yz_4D: input array size mismatch /= ny,nz', lfirst_proc_yz) if ((size (out, 2) /= ony) .or. ((size (out, 3) /= onz))) & call stop_fatal ('remap_to_pencil_yz_4D: output array size mismatch /= ny/nprocz,nzgrid', lfirst_proc_yz) if (inx /= onx) & call stop_fatal ('remap_to_pencil_yz_4D: inx/=onx - sizes differ in the x direction', lfirst_proc_yz) if (ina /= ona) & call stop_fatal ('remap_to_pencil_yz_4D: ina/=ona - sizes differ in the 4th dimension', lfirst_proc_yz) ! allocate (send_buf(onx,bny,bnz,ona), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('remap_to_pencil_yz_4D: not enough memory for send_buf!', .true.) allocate (recv_buf(onx,bny,bnz,ona), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('remap_to_pencil_yz_4D: not enough memory for recv_buf!', .true.) ! do ibox = 0, nprocz-1 partner = find_proc(ipx,ipy,ibox) if (iproc == partner) then ! data is local out(:,:,bnz*ibox+1:bnz*(ibox+1),:) = in(:,bny*ibox+1:bny*(ibox+1),:,:) else ! communicate with partner send_buf = in(:,bny*ibox+1:bny*(ibox+1),:,:) if (iproc > partner) then ! above diagonal: send first, receive then call MPI_SEND (send_buf, nbox, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), mpierr) call MPI_RECV (recv_buf, nbox, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) else ! below diagonal: receive first, send then call MPI_RECV (recv_buf, nbox, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) call MPI_SEND (send_buf, nbox, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), mpierr) endif out(:,:,bnz*ibox+1:bnz*(ibox+1),:) = recv_buf endif enddo ! deallocate (send_buf, recv_buf) !$omp end single if (loptest(lsync,.true.)) then !$omp barrier endif ! endsubroutine remap_to_pencil_yz_4D !*********************************************************************** subroutine unmap_from_pencil_yz_3D(in, out,comm,lsync) ! ! Unmaps z-pencil shaped 3D data distributed on several processors back to normal shape. ! This routine is the inverse of the remap function for nprocz>1. ! ! 27-oct-2010/Bourdin.KIS: coded ! real, dimension(:,:,:), intent(in) :: in real, dimension(:,:,:), intent(out) :: out ! integer, parameter :: iny=ny/nprocz, inz=nzgrid integer, parameter :: ony=ny, onz=nz integer :: inx, onx ! sizes of in and out arrays integer, parameter :: bny=ny/nprocz, bnz=nz ! transfer box sizes integer :: ibox, partner, nbox, alloc_err integer, parameter :: ytag=111 integer, dimension(MPI_STATUS_SIZE) :: stat ! real, dimension(:,:,:), allocatable :: send_buf, recv_buf integer, optional :: comm logical, optional :: lsync ! if (nprocz == 1) then !$omp workshare out = in !$omp end workshare return endif if (loptest(lsync,.true.)) then !$omp barrier endif !$omp single ! inx = size (in, 1) onx = size (out, 1) nbox = onx*bny*bnz ! if (mod (ny, nprocz) /= 0) & call stop_fatal ('unmap_from_pencil_yz_3D: ny needs to be an integer multiple of nprocz', lfirst_proc_yz) ! if ((size (in, 2) /= iny) .or. ((size (in, 3) /= inz))) & call stop_fatal ('unmap_from_pencil_yz_3D: input array size mismatch /= ny/nprocz,nygrid', lfirst_proc_yz) if ((size (out, 2) /= ony) .or. ((size (out, 3) /= onz))) & call stop_fatal ('unmap_from_pencil_yz_3D: output array size mismatch /= ny,nz', lfirst_proc_yz) if (inx /= onx) & call stop_fatal ('unmap_from_pencil_yz_3D: inx/=onx - sizes differ in the x direction', lfirst_proc_yz) ! allocate (send_buf(onx,bny,bnz), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('unmap_from_pencil_yz_3D: not enough memory for send_buf!', .true.) allocate (recv_buf(onx,bny,bnz), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('unmap_from_pencil_yz_3D: not enough memory for recv_buf!', .true.) ! do ibox = 0, nprocz-1 partner = find_proc(ipx,ipy,ibox) if (iproc == partner) then ! data is local out(:,bny*ibox+1:bny*(ibox+1),:) = in(:,:,bnz*ibox+1:bnz*(ibox+1)) else ! communicate with partner send_buf = in(:,:,bnz*ibox+1:bnz*(ibox+1)) if (iproc > partner) then ! above diagonal: send first, receive then call MPI_SEND (send_buf, nbox, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), mpierr) call MPI_RECV (recv_buf, nbox, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) else ! below diagonal: receive first, send then call MPI_RECV (recv_buf, nbox, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) call MPI_SEND (send_buf, nbox, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), mpierr) endif out(:,bny*ibox+1:bny*(ibox+1),:) = recv_buf endif enddo ! deallocate (send_buf, recv_buf) !$omp end single if (loptest(lsync,.true.)) then !$omp barrier endif ! endsubroutine unmap_from_pencil_yz_3D !*********************************************************************** subroutine unmap_from_pencil_yz_4D(in, out,comm,lsync) ! ! Unmaps z-pencil shaped 4D data distributed on several processors back to normal shape. ! This routine is the inverse of the remap function for nprocz>1. ! ! 27-oct-2010/Bourdin.KIS: coded ! real, dimension(:,:,:,:), intent(in) :: in real, dimension(:,:,:,:), intent(out) :: out ! integer, parameter :: iny=ny/nprocz, inz=nzgrid integer, parameter :: ony=ny, onz=nz integer :: inx, ina, onx, ona ! sizes of in and out arrays integer, parameter :: bny=ny/nprocz, bnz=nz ! transfer box sizes integer :: ibox, partner, nbox, alloc_err integer, parameter :: ytag=111 integer, dimension(MPI_STATUS_SIZE) :: stat ! real, dimension(:,:,:,:), allocatable :: send_buf, recv_buf integer, optional :: comm logical, optional :: lsync ! if (nprocz == 1) then !$omp workshare out = in !$omp end workshare return endif if (loptest(lsync,.true.)) then !$omp barrier endif !$omp single ! inx = size (in, 1) ina = size (in, 4) onx = size (out, 1) ona = size (out, 4) nbox = onx*bny*bnz*ona ! if (mod (ny, nprocz) /= 0) & call stop_fatal ('unmap_from_pencil_yz_4D: ny needs to be an integer multiple of nprocz', lfirst_proc_yz) ! if ((size (in, 2) /= iny) .or. ((size (in, 3) /= inz))) & call stop_fatal ('unmap_from_pencil_yz_4D: input array size mismatch /= ny/nprocz,nzgrid', lfirst_proc_yz) if ((size (out, 2) /= ony) .or. ((size (out, 3) /= onz))) & call stop_fatal ('unmap_from_pencil_yz_4D: output array size mismatch /= ny,nz', lfirst_proc_yz) if (inx /= onx) & call stop_fatal ('unmap_from_pencil_yz_4D: inz/=onz - sizes differ in the x direction', lfirst_proc_yz) if (ina /= ona) & call stop_fatal ('unmap_from_pencil_yz_4D: ina/=ona - sizes differ in the 4th dimension', lfirst_proc_yz) ! allocate (send_buf(onx,bny,bnz,ona), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('unmap_from_pencil_yz_4D: not enough memory for send_buf!', .true.) allocate (recv_buf(onx,bny,bnz,ona), stat=alloc_err) if (alloc_err > 0) call stop_fatal ('unmap_from_pencil_yz_4D: not enough memory for recv_buf!', .true.) ! do ibox = 0, nprocz-1 partner = find_proc(ipx,ipy,ibox) if (iproc == partner) then ! data is local out(:,bny*ibox+1:bny*(ibox+1),:,:) = in(:,:,bnz*ibox+1:bnz*(ibox+1),:) else ! communicate with partner send_buf = in(:,:,bnz*ibox+1:bnz*(ibox+1),:) if (iproc > partner) then ! above diagonal: send first, receive then call MPI_SEND (send_buf, nbox, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), mpierr) call MPI_RECV (recv_buf, nbox, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) else ! below diagonal: receive first, send then call MPI_RECV (recv_buf, nbox, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), stat, mpierr) call MPI_SEND (send_buf, nbox, mpi_precision, partner, ytag, ioptest(comm,MPI_COMM_GRID), mpierr) endif out(:,bny*ibox+1:bny*(ibox+1),:,:) = recv_buf endif enddo ! deallocate (send_buf, recv_buf) !$omp end single if (loptest(lsync,.true.)) then !$omp barrier endif ! endsubroutine unmap_from_pencil_yz_4D !*********************************************************************** subroutine collect_grid(x, y, z, gx, gy, gz) ! ! This routine collects the global grid on the root processor. ! ! 05-Jul-2011/PABourdin: coded ! 6-oct-2016/MR: modifications for collection of downsampled grid: ! contributions from different processors may have different lengths ! use Cparam, only: l1, m1, n1, nghost ! real, dimension(:), intent(in) :: x,y,z real, dimension(:), intent(out) :: gx,gy,gz ! integer :: px, py, pz, mx, my, mz, l2, m2, n2, ie, rank integer, parameter :: tag_gx=677, tag_gy=678, tag_gz=679 ! mx=size(x); my=size(y); mz=size(z) l2=mx-nghost; m2=my-nghost; n2=mz-nghost if (lroot) then ! collect the global x-data from all leading processors in the yz-plane gx(1:mx) = x; ie=l2 if (nprocx > 1) then do px = 1, nprocx-1 rank=find_proc(px,0,0) if (ldownsampling) call mpirecv_int(l2,rank,tag_gx) call mpirecv_real (gx(ie+1:ie+l2), l2, rank, tag_gx) ie=ie+l2-nghost enddo endif ! collect the global y-data from all leading processors in the xz-plane gy(1:my) = y; ie=m2 if (nprocy > 1) then do py = 1, nprocy-1 rank=find_proc(0,py,0) if (ldownsampling) call mpirecv_int(m2,rank,tag_gy) call mpirecv_real (gy(ie+1:ie+m2), m2, rank, tag_gy) ie=ie+m2-nghost enddo endif ! collect the global z-data from all leading processors in the xy-plane gz(1:mz) = z; ie=n2 if (nprocz > 1) then do pz = 1, nprocz-1 rank=find_proc(0,0,pz) if (ldownsampling) call mpirecv_int(n2,rank,tag_gz) call mpirecv_real (gz(ie+1:ie+n2), n2, rank, tag_gz) ie=ie+n2-nghost enddo endif else ! leading processors send their local coordinates if (lfirst_proc_yz) then if (ldownsampling) call mpisend_int(l2,root,tag_gx) call mpisend_real (x(l1:), l2, root, tag_gx) endif if (lfirst_proc_xz) then if (ldownsampling) call mpisend_int(m2,root,tag_gy) call mpisend_real (y(m1:), m2, root, tag_gy) endif if (lfirst_proc_xy) then if (ldownsampling) call mpisend_int(n2,root,tag_gz) call mpisend_real (z(n1:), n2, root, tag_gz) endif endif ! endsubroutine collect_grid !*********************************************************************** !NOT USED SO ON COMMENT ! subroutine distribute_grid(x, y, z, gx, gy, gz) !! !! This routine distributes the global grid to all processors !! (concurrent communications). !! !! 16-jan-2023/MR: coded !! ! use General, only: indgen !! ! real, dimension(mx), intent(out) :: x ! real, dimension(my), intent(out) :: y ! real, dimension(mz), intent(out) :: z ! real, dimension(mxgrid), intent(inout) :: gx ! real, dimension(mygrid), intent(inout) :: gy ! real, dimension(mzgrid), intent(inout) :: gz !! ! call mpibcast_real (gx, mxgrid, comm=MPI_COMM_WORLD) ! x = gx(1+ipx*nx:nx+2*nghost+ipx*nx) !! ! call mpibcast_real (gy, mygrid, comm=MPI_COMM_WORLD) ! y = gy(1+ipy*ny:ny+2*nghost+ipy*ny) !! ! call mpibcast_real (gz, mzgrid, comm=MPI_COMM_WORLD) ! z = gz(1+ipz*nz:nz+2*nghost+ipz*nz) !! ! endsubroutine distribute_grid !!*********************************************************************** subroutine y2x(a,xi,zj,zproc_no,ay) ! ! Load the y dimension of an array in a 1-d array. ! real, dimension(nx,ny,nz), intent(in) :: a real, dimension(nygrid), intent(out) :: ay real, dimension(nygrid) :: ay_local integer, intent(in) :: xi,zj,zproc_no integer :: my_iniy,my_finy ! if (ipz==(zproc_no-1)) then my_iniy=ipy*ny+1 my_finy=(ipy+1)*ny ay_local(my_iniy:my_finy)=a(xi,:,zj) else ay_local=0. endif call mpireduce_sum(ay_local,ay,nygrid) ! endsubroutine y2x !*********************************************************************** subroutine z2x(a,xi,yj,yproc_no,az) ! ! Load the z dimension of an array in a 1-d array. ! real, dimension(nx,ny,nz), intent(in) :: a real, dimension(nzgrid), intent(out) :: az real, dimension(nzgrid) :: az_local integer, intent(in) :: xi,yj,yproc_no integer :: my_iniz,my_finz ! if (ipy==(yproc_no-1)) then my_iniz=ipz*nz+1 my_finz=(ipz+1)*nz az_local(my_iniz:my_finz)=a(xi,yj,:) else az_local=0. endif call mpireduce_sum(az_local,az,nzgrid) ! endsubroutine z2x !*********************************************************************** subroutine mpigather_xy( sendbuf, recvbuf, lpz) ! ! Gathers the chunks of a 2D array from each processor of the z-layer lpz in ! a big array at the root of the layer. If lpz not present this is done for ! all layers (not checked). ! ! Here no parallelization in x allowed. ! ! 18-nov-10/MR: coded ! real, dimension(nxgrid,ny) :: sendbuf ! nx=nxgrid ! real, dimension(nxgrid,nygrid) :: recvbuf integer, optional, intent(in) :: lpz ! integer :: ncnt logical :: cond integer :: comm ! if (present(lpz)) then cond = ipz==lpz else cond = .true. endif comm = MPI_COMM_XYPLANE ! ncnt = nxgrid*ny ! if (cond) & call MPI_GATHER(sendbuf, ncnt, mpi_precision, recvbuf, ncnt, mpi_precision, root, comm, mpierr) ! endsubroutine mpigather_xy !*********************************************************************** subroutine mpigather_z_1D(sendbuf,recvbuf) ! ! Gathers the chunks of a 1D array from each processor along a z-beam and broadcasts it. ! ! 25-nov-10/MR: coded ! real, dimension(:), intent(in) :: sendbuf real, dimension(:), intent(out) :: recvbuf call MPI_GATHER(sendbuf, size(sendbuf), mpi_precision, recvbuf, size(sendbuf), mpi_precision, 0, & MPI_COMM_ZBEAM, mpierr) endsubroutine mpigather_z_1D !*********************************************************************** subroutine mpigather_z(sendbuf,recvbuf,n1,lproc) ! ! Gathers the chunks of a 2D array from each processor along a z-beam at ! position, defined by lproc at root of the beam. ! ! 25-nov-10/MR: coded ! integer, intent(in) :: n1 real, dimension(n1,nz) , intent(in) :: sendbuf real, dimension(n1,nzgrid), intent(out) :: recvbuf integer, optional, intent(in) :: lproc ! integer :: lpx, lpy ! if (present(lproc)) then lpy = lproc/nprocx lpx = mod(lproc,nprocx) else lpy=0; lpx=0 endif ! if ( ipx==lpx .and. ipy==lpy ) & call MPI_GATHER(sendbuf, n1*nz, mpi_precision, recvbuf, n1*nz, mpi_precision, root, MPI_COMM_ZBEAM, mpierr) ! endsubroutine mpigather_z !*********************************************************************** subroutine mpigather(sendbuf, recvbuf, comm) ! ! Gathers the chunks of a 3D array from each processor in a big array at root. ! ! 19-nov-10/MR: coded ! 22-Oct-2025/Kishore: support parallelization in x direction. ! use General, only: find_proc_coords ! real, dimension(nx,ny,nz), intent(in) :: sendbuf real, dimension(:,:,:), intent(out) :: recvbuf integer, optional, intent(in) :: comm ! integer :: iy, iz, i, ipx, ipy, ipz ! ! MR: These long integer variables would be necessary for big nxgrid*nygrid, ! but there is no MPI_GATHERV which would accept a long int shifts argument. ! integer(KIND=ikind8) :: nlayer, nlayer_y integer(KIND=ikind8), dimension(ncpus) :: shifts integer, dimension(ncpus) :: counts ! if (lroot) then ! nlayer = nz*int8(nxgrid)*int8(nygrid) if (nlayer>max_int) & call stop_it("mpigather: integer overflow in shifts") ! nlayer_y = int8(nxgrid)*ny counts = nx ! do i=1,ncpus call find_proc_coords(i-1,ipx,ipy,ipz) shifts(i) = ipz*nlayer + ipy*nlayer_y + ipx*nx enddo ! endif ! do iz=1,nz do iy=1,ny !TODO: on SNG, segfault below; invalid memory access. I suspect the issue is an integer overflow in shifts (which would explain why it doesn't show up in smaller runs), but not really sure how to get around this. call MPI_GATHERV(sendbuf(1,iy,iz), nx, mpi_precision, & recvbuf(1,iy,iz), counts, int(shifts), mpi_precision, & root, ioptest(comm,MPI_COMM_GRID), mpierr) enddo enddo ! endsubroutine mpigather !*********************************************************************** subroutine mpigather_scl_str(string,string_arr, comm) character(LEN=*) :: string character(LEN=*), dimension(:) :: string_arr integer, optional :: comm call MPI_GATHER(string, len(string), MPI_CHARACTER, string_arr, len(string), & MPI_CHARACTER, root, ioptest(comm,MPI_COMM_GRID), mpierr) endsubroutine mpigather_scl_str !*********************************************************************** logical function get_limits(range, k1g, k2g, ia, ie, is ) integer, dimension(3) :: range integer :: k1g, k2g integer :: ia, ie, is get_limits=.true. if ( range(1) == 0 ) return ! get_limits=.false. is=range(3) if (range(1)>=k1g) then ia = range(1)-k1g+1 else ia = mod(is-mod(k1g-range(1),is),is)+1 endif ie = min(k2g,range(2))-k1g+1 endfunction get_limits !*********************************************************************** subroutine mpigather_and_out_real( sendbuf, unit, ltransp, kxrange, kyrange,zrange,comm) ! ! Transfers the chunks of a 3D array from each processor to root ! and writes them out in right order. ! ! Here no parallelization in x allowed. ! ! 22-nov-10/MR: coded ! 06-apr-11/MR: optional parameters kxrange, kyrange, zrange for selective output added ! 03-feb-14/MR: rewritten ! 10-apr-15/MR: corrected for nx/=ny ! 22-apr-15/MR: another correction: order of loops over processors and index ranges exchanged ! use General, only: write_full_columns, get_range_no ! integer, intent(in ) :: unit real, dimension(:,:,:), intent(inout) :: sendbuf complex, dimension(:,:,:,:), intent(inout) :: sendbuf_cmplx logical, optional,intent(in ) :: ltransp ! if true, transposition x <-> y integer, dimension(3,*), optional,intent(in ) :: kxrange, kyrange, zrange ! integer, dimension(3,10) :: kxrangel,kyrangel,zrangel integer, optional :: comm ! integer, dimension(MPI_STATUS_SIZE) :: status integer :: ipz, ipy, ipx, ic, ncomp, n1g, n2g, m1g, m2g, l1g, l2g, ig, iy, iz, & irz, iry, irx, iza, ize, izs, iya, iye, iys, ixa, ixe, ixs, nsend, tag, unfilled logical :: ltrans, lcomplex real, allocatable :: rowbuf(:) complex, allocatable :: rowbuf_cmplx(:) integer, dimension(2) :: nprxy,nxy ! lcomplex = .false. ncomp = 1 goto 1 ! entry mpigather_and_out_cmplx( sendbuf_cmplx, unit, ltransp, kxrange, kyrange,zrange, comm) ! lcomplex = .true. ncomp = size(sendbuf_cmplx,4) ! 1 if (ALWAYS_FALSE) print*,unit ! if ( .not.present(ltransp) ) then ltrans=.false. else ltrans=ltransp endif ! if ( .not.present(kxrange) ) then kxrangel = 0 kxrangel(:,1) = (/1,nxgrid,1/) else kxrangel = kxrange(:,1:nk_max) endif ! if ( .not.present(kyrange) ) then kyrangel = 0 kyrangel(:,1)=(/1,nygrid,1/) else kyrangel = kyrange(:,1:nk_max) endif ! if ( .not.present(zrange) ) then zrangel = 0 zrangel(:,1) = (/1,nzgrid,1/) else zrangel = zrange(:,1:nz_max) endif ! if (lcomplex) then allocate( rowbuf_cmplx(nx) ) else allocate( rowbuf(nx) ) endif ! if (ltrans) then nxy(1)=ny; nxy(2)=nx nprxy(1)=nprocy; nprxy(2)=nprocx else nxy(1)=nx; nxy(2)=ny nprxy(1)=nprocx; nprxy(2)=nprocy endif ! unfilled=0 ! ! loop over all variables for which spectrum has been calculated ! do ic=1,ncomp ! ! loop over all ranges of z indices in zrangel ! do irz=1,nz_max ! n2g=0 ! ! loop over all processor array layers of z direction ! do ipz=0,nprocz-1 ! ! global lower and upper z index bounds for z layer ipz ! n1g = n2g+1; n2g = n2g+nz if (get_limits( zrangel(:,irz), n1g, n2g, iza, ize, izs )) exit ! ! loop over all z indices in range irz ! do iz = iza, ize, izs ! ! loop over all ranges of ky indices in kyrangel ! do iry=1,nk_max ! ! loop over all processor array beams in x direction in layer ipz ! m2g=0 do ipy=0,nprxy(2)-1 ! ! global lower and upper y index bounds for beam ipy ! m1g=m2g+1; m2g=m2g+nxy(2) if (get_limits( kyrangel(:,iry), m1g, m2g, iya, iye, iys )) exit !if (lroot) print*, 'ipy,ipz,iry,iy*=', ipy,ipz, iry, iya, iye, iys ! ! loop over all ky indices in range iry ! do iy = iya, iye, iys ! ! loop over all ranges of kx indices in kxrangel ! do irx=1,nk_max ! ! ! loop over all processors in beam ! l2g=0 do ipx=0,nprxy(1)-1 ! ! global processor number ! if (ltrans) then ig = ipz*nprocxy + ipx*nprocx + ipy else ig = ipz*nprocxy + ipy*nprocx + ipx endif ! ! global lower and upper x index bounds for processor ipx ! l1g=l2g+1; l2g=l2g+nxy(1) if (get_limits( kxrangel(:,irx), l1g, l2g, ixa, ixe, ixs )) exit !if (lroot) print*, 'ipx,ipy,ix*=', ipx,ipy,ixa, ixe, ixs ! if (ixe>=ixa) then nsend = get_range_no((/ixa,ixe,ixs/),1) tag = ixa ! if (lroot) then if (ig==0) then if (lcomplex) then if (ltrans) then rowbuf_cmplx=sendbuf_cmplx(iy,ixa:ixe:ixs,iz,ic) else rowbuf_cmplx=sendbuf_cmplx(ixa:ixe:ixs,iy,iz,ic) endif else if (ltrans) then rowbuf=sendbuf(iy,ixa:ixe:ixs,iz) else rowbuf=sendbuf(ixa:ixe:ixs,iy,iz) endif endif else if (lcomplex) then call MPI_RECV(rowbuf_cmplx, nsend, MPI_CMPLX, ig, tag, ioptest(comm,MPI_COMM_GRID), status, mpierr) !print*, 'iy,irx,ixa:ixe:ixs,kxrangel(:,irx)=', iy,irx,ixa,ixe,ixs , kxrangel(:,irx) else call MPI_RECV(rowbuf, nsend, mpi_precision, ig, tag, ioptest(comm,MPI_COMM_GRID), status, mpierr) endif endif if (lcomplex) then call write_full_columns( 1, rowbuf_cmplx, (/1,nsend,1/), unfilled ) else call write_full_columns( 1, rowbuf, (/1,nsend,1/), unfilled ) endif else if ( iproc==ig ) then ! if executing processor is hit by index ig: send to root ! if (lcomplex) then if (ltrans) then call MPI_SEND(sendbuf_cmplx(iy,ixa:ixe:ixs,iz,ic), & nsend, MPI_CMPLX, root, tag, ioptest(comm,MPI_COMM_GRID), mpierr) else call MPI_SEND(sendbuf_cmplx(ixa:ixe:ixs,iy,iz,ic), & nsend, MPI_CMPLX, root, tag, ioptest(comm,MPI_COMM_GRID), mpierr) endif else if (ltrans) then call MPI_SEND(sendbuf(iy,ixa:ixe:ixs,iz), & nsend, mpi_precision, root, tag, ioptest(comm,MPI_COMM_GRID), mpierr) else call MPI_SEND(sendbuf(ixa:ixe:ixs,iy,iz), & nsend, mpi_precision, root, tag, ioptest(comm,MPI_COMM_GRID), mpierr) endif endif ! endif endif enddo enddo call MPI_BARRIER(ioptest(comm,MPI_COMM_GRID), mpierr) enddo enddo enddo enddo enddo enddo ! if (lroot.and.unfilled>0) then write(1,'(a)') unfilled=0 endif ! enddo ! endsubroutine mpigather_and_out_real !*********************************************************************** subroutine mpiwait(bwait) ! ! Wrapper to go with non-blocking communication functions. ! ! 12-dec-14/wlad: coded ! integer, dimension (MPI_STATUS_SIZE) :: stat integer :: bwait,ierr ! call MPI_WAIT(bwait,stat,ierr) endsubroutine mpiwait !*********************************************************************** subroutine merge_1d( vec1, vec2, n, type ) ! ! Helper function for mpimerge_1d. ! ! 22-nov-10/MR: coded ! integer, intent(in) :: n, type real, dimension(n), intent(inout) :: vec2 real, dimension(n), intent(in) :: vec1 ! ! merging where ((vec2 < 0.) .and. (vec1 >= 0.)) vec2=vec1 ! if (ALWAYS_FALSE) print *,type ! endsubroutine merge_1d !*********************************************************************** subroutine mpimerge_1d(vector,nk,idir,comm) ! ! Merges vectors of processors along idir by filling invalid values (NaN). ! ! 22-nov-10/MR: coded ! integer, intent(in) :: nk real, dimension(nk), intent(inout) :: vector integer, optional, intent(in) :: idir,comm ! integer :: mpiprocs,merge real, dimension(nk) :: recvbuf ! if (nk==0) return ! if (present(idir)) then mpiprocs=mpigetcomm(idir) else mpiprocs=ioptest(comm,MPI_COMM_GRID) endif ! call MPI_OP_CREATE( merge_1d, .false., merge, mpierr ) call MPI_REDUCE(vector, recvbuf, nk, mpi_precision, merge, root, mpiprocs, mpierr) vector = recvbuf ! endsubroutine mpimerge_1d !*********************************************************************** integer function mpigetcomm(idir) ! ! Derives communicator from index idir. ! ! 23-nov-10/MR: coded ! integer, intent(in) :: idir ! select case(idir) case(IXBEAM) mpigetcomm=MPI_COMM_XBEAM case(IYBEAM) mpigetcomm=MPI_COMM_YBEAM case(IZBEAM) mpigetcomm=MPI_COMM_ZBEAM case(IXYPLANE) mpigetcomm=MPI_COMM_XYPLANE case(IXZPLANE) mpigetcomm=MPI_COMM_XZPLANE case(IYZPLANE) mpigetcomm=MPI_COMM_YZPLANE case default mpigetcomm=MPI_COMM_GRID endselect ! endfunction mpigetcomm !************************************************************************ logical function report_clean_output( flag, message ) ! ! Generates error message for files (like snapshots) which are distributedly ! written. Message contains list of processors at which operation failed. ! ! flag (IN) : indicates failure of I/O operation at local processor ! message(OUT): message fragment containing list of processors where ! operation failed (only relevant for root) ! return value: flag for 'synchronize!', identical for all processors ! ! 14-nov-11/MR: coded ! use General, only: itoa,safe_character_append,safe_character_prepend ! logical, intent(IN) :: flag character (LEN=fnlen), intent(OUT):: message ! integer :: mpierr, i, ia, ie, count logical, dimension(:), allocatable:: flags ! character (LEN=intlen) :: str ! if (lroot) allocate(flags(ncpus)) ! call MPI_GATHER(flag, 1, MPI_LOGICAL, flags, 1, MPI_LOGICAL, root, MPI_COMM_PENCIL, mpierr) ! report_clean_output = .false. if (lroot) then ! count = 0 ia = -1; ie = 0 str = ''; message = '' ! if (lroot) then do i=1,ncpus if ( flags(i) ) then if ( i==ia+1 ) then ie = i count = count+1 else ia = i if ( ie>0 ) then str = '-'//trim(itoa(ie))//',' ie = 0 endif call safe_character_append(message,trim(itoa(ia))//str) str = ',' endif endif enddo endif ! if (count>0) then call safe_character_prepend(message,'"at '//trim(itoa(count))//' node(s): ') report_clean_output = .true. endif ! endif ! ! Broadcast flag for 'sychronization necessary'. ! call MPI_BCAST(report_clean_output,1,MPI_LOGICAL,root,MPI_COMM_PENCIL,mpierr) ! end function report_clean_output !************************************************************************** subroutine set_yy_neighbors ! ! Determines for the boundary processors of one (Yin or Yang) grid the neighbors ! (direct or corner) in the other. Determines further the length of the buffers ! for data exchange between the neighbors and stores them in yy_buflens. ! Implemented for nprocz/nprocy=3 only! ! ! At return neighbor ranks refer to MPI_COMM_PENCIL. ! ! 20-dec-15/MR: coded ! integer, parameter :: nprocz_rd=nprocz/3 integer :: lenred if (lcutoff_corners) then len_cornstrip_y=nycut-1 len_cornstrip_z=nzcut-1 ! ! length of a "cornerstrip" if corner is cut off. Its inclined part has width 2*nghost! ! len_cornbuf=max(2*my-nycut+len_cornstrip_z,2*mz-nzcut+len_cornstrip_y) ycornstart=2*my-nycut+1; zcornstart=2*mz-nzcut+1 lenred=max(2*my-nycut,2*mz-nzcut) else len_cornstrip_y=m2; len_cornstrip_z=n2 len_cornbuf=my+nz+nghost ! length of a "cornerstrip" ycornstart=my+1; zcornstart=mz+1 lenred=max(my,mz) endif ! ! At lower theta boundary (northern hemisphere). ! if (lfirst_proc_y) then if (ipz=2*nprocz_rd) then ! for last third in phi direction yy_buflens=mz ! ! Direct neighbor. ! ylneigh=find_proc(ipx,0,5*nprocz_rd-ipz-1) ! ! Lower left corner neighbor. ! if (ipz==2*nprocz_rd) then ! first proc in last third = corner opposite llcorn=-1 yy_buflens=(/0,len_cornbuf,mz/) else llcorn=find_proc(ipx,0,5*nprocz_rd-ipz) if (ipz==2*nprocz_rd+1) yy_buflens(1)=lenred ! 2nd proc in last third = cross corner opposite endif ! ! Upper left corner neighbor. ! if (llast_proc_z) then ! not used at yz-corner lucorn=-1 else lucorn=find_proc(ipx,0,5*nprocz_rd-ipz-2) endif else ! for middle third in phi direction yy_buflens=my ! ! Direct neighbor. ! ylneigh=find_proc(ipx,nprocy-1-(ipz-nprocz_rd),nprocz-1) ! ! Lower left corner neighbor. ! if (ipz==nprocz_rd) then ! first proc in middle third = corner opposite llcorn=-1 yy_buflens=(/0,len_cornbuf,my/) else llcorn=find_proc(ipx,nprocy-(ipz-nprocz_rd),nprocz-1) if (ipz==nprocz_rd+1) yy_buflens(1)=lenred ! 2nd proc in middle third = cross corner opposite endif ! ! Upper left corner neighbor. ! if (ipz==2*nprocz_rd-1) then ! last proc in middle third = corner opposite lucorn=-1 yy_buflens=(/my,len_cornbuf,0/) else lucorn=find_proc(ipx,nprocy-2-(ipz-nprocz_rd),nprocz-1) if (ipz==2*nprocz_rd-2) yy_buflens(3)=lenred ! penultimate proc in middle third = cross corner opposite endif endif if (.not.lyang) then ! ! For Yin grid: neighbors are in upper half of overall proc number range. ! ylneigh=ylneigh+ncpus if (llcorn/=-1) llcorn=llcorn+ncpus if (lucorn/=-1) lucorn=lucorn+ncpus endif endif ! ! At upper theta boundary (southern hemisphere). ! if (llast_proc_y) then if (ipz=2*nprocz_rd) then ! for last third in phi direction yy_buflens=mz ! ! Direct neighbor. ! yuneigh=find_proc(ipx,0,ipz-2*nprocz_rd) ! ! Upper right neighbor. ! if (ipz==2*nprocz_rd) then ! first proc in last third = corner opposite ulcorn=-1 yy_buflens=(/mz,len_cornbuf,0/) else ulcorn=find_proc(ipx,0,ipz-2*nprocz_rd-1) if (ipz==2*nprocz_rd+1) yy_buflens(3)=lenred ! 2nd proc in last third = cross corner opposite endif ! ! Upper left neighbor. ! if (llast_proc_z) then uucorn=-1 ! not used at yz-corner else uucorn=find_proc(ipx,0,ipz-2*nprocz_rd+1) endif else ! middle third in phi direction ! ! Direct neighbor. ! yy_buflens=my yuneigh=find_proc(ipx,nprocy-1-(ipz-nprocz_rd),0) ! ! Upper right neighbor. ! if (ipz==nprocz_rd) then ! first proc in middle third = corner opposite ulcorn=-1 yy_buflens=(/my,len_cornbuf,0/) else ulcorn=find_proc(ipx,nprocy-(ipz-nprocz_rd),0) if (ipz==nprocz_rd+1) yy_buflens(3)=lenred ! 2nd proc in middle third = cross corner opposite endif ! ! Upper left neighbor. ! if (ipz==2*nprocz_rd-1) then ! last proc in middle third = corner opposite uucorn=-1 yy_buflens=(/0,len_cornbuf,my/) else uucorn=find_proc(ipx,nprocy-2-(ipz-nprocz_rd),0) if (ipz==2*nprocz_rd-2) yy_buflens(1)=lenred ! penultimate proc in middle third = cross corner opposite endif endif if (.not.lyang) then ! ! For Yin grid: neighbors are in upper half of overall proc number range. ! yuneigh=yuneigh+ncpus if (ulcorn/=-1) ulcorn=ulcorn+ncpus if (uucorn/=-1) uucorn=uucorn+ncpus endif endif ! ! At lower phi boundary. ! if (lfirst_proc_z) then ! ! Direct neighbor. ! zlneigh=find_proc(ipx,nprocy-1,2*nprocz_rd-1-ipy) ! ! Lower right neighbor. ! if (lfirst_proc_y) then llcorn=-1 ! not used at yz-corner else llcorn=find_proc(ipx,nprocy-1,2*nprocz_rd-ipy) endif ! ! Lower left neighbor. ! if (llast_proc_y) then ulcorn=-1 ! not used at yz-corner else ulcorn=find_proc(ipx,nprocy-1,2*nprocz_rd-2-ipy) endif yy_buflens=mz if (.not.lyang) then ! ! For Yin grid: neighbors are in upper half of overall proc number range. ! zlneigh=zlneigh+ncpus if (llcorn/=-1) llcorn=llcorn+ncpus if (ulcorn/=-1) ulcorn=ulcorn+ncpus endif endif ! ! At upper phi boundary. ! if (llast_proc_z) then ! ! Direct neighbor. ! zuneigh=find_proc(ipx,0,2*nprocz_rd-1-ipy) ! ! Upper left neighbor. ! if (lfirst_proc_y) then lucorn=-1 ! not used at yz-corner else lucorn=find_proc(ipx,0,2*nprocz_rd-ipy) endif ! ! Upper right neighbor. ! if (llast_proc_y) then uucorn=-1 ! not used at yz-corner else uucorn=find_proc(ipx,0,2*nprocz_rd-2-ipy) endif yy_buflens=mz if (.not.lyang) then ! ! For Yin grid: neighbors are in upper half of overall proc number range. ! zuneigh=zuneigh+ncpus if (lucorn/=-1) lucorn=lucorn+ncpus if (uucorn/=-1) uucorn=uucorn+ncpus endif endif !if (.not.lyang) then if (.false.) then write(iproc_world+10,*) 'ylneigh,yuneigh=',ylneigh,yuneigh write(iproc_world+10,*) 'zlneigh,zuneigh=',zlneigh,zuneigh write(iproc_world+10,*) 'llcorn,lucorn=',llcorn,lucorn write(iproc_world+10,*) 'ulcorn,uucorn=',ulcorn,uucorn endif endsubroutine set_yy_neighbors !*********************************************************************** subroutine interpolate_yy(f,ivar1,ivar2,buffer,pos,type) ! ! Performs bilinear interpolation for a whole ghost zone strip of the ! complementary (Yin or Yang) grid and all variables ivar1:ivar2. Selection ! of the strip (from direct, left or right corner neighbor) by pos. Parameter ! type is for later use with other interpolation schemes. ! Result returned in buffer. ! ! 20-dec-15/MR: coded ! use General, only: var_is_vec use Yinyang, only: bilin_interp, biquad_interp real, dimension(:,:,:,:), intent(IN) :: f integer, intent(IN) :: ivar1, ivar2, pos, type real, dimension(:,:,:,:), intent(OUT):: buffer integer :: nth, nph, i, j, iv, ive, ibuf, jbuf logical :: ltransp if (pos==NIL) return nth=size(intcoeffs(pos)%inds,1); nph=size(intcoeffs(pos)%inds,2) ltransp = size(buffer,3)==nth .and. size(buffer,2)==nph buffer=0. ! ! Loop over all variables. ! iv=ivar1 do while (iv<=ivar2) if (var_is_vec(iv)) then ! ! All components of vectorial variables are interpolated at the same time as transformation is ! needed. ! ive=iv+2 else ive=iv endif !if (.false..and.(iproc_world==52.or.iproc_world==53).and.ive==-1) then ! if (intcoeffs(pos)%pcoors(15,1,1)/=0.) then ! print*, intcoeffs(pos)%pcoors(15,1,:) ! print*,intcoeffs(pos)%coeffs2(15,1,:,:) ! print*,'----' ! endif !endif do i=1,nth; do j=1,nph if (ltransp) then ibuf=j; jbuf=i else ibuf=i; jbuf=j endif if (type==BILIN) then call bilin_interp(intcoeffs(pos),i,j,f(:,:,:,iv:ive), buffer(:,:,:,iv:ive),ibuf,jbuf) elseif (type==BIQUAD .or. type==BICUB .or. type==BIQUIN .or. type==QUADSPLINE) then call biquad_interp(intcoeffs(pos),i,j,f(:,:,:,iv:ive), buffer(:,:,:,iv:ive),ibuf,jbuf) else call stop_it('interpolate_yy: Only bilinear, biquadratic, bicubic and biquintic '// & 'interpolations implemented') endif enddo; enddo iv=ive+1 enddo ! endsubroutine interpolate_yy !*********************************************************************** subroutine mpiscatterv_real_plain(src,counts,dspls,dest,nlocal,comm) real, dimension(:) :: src, dest integer, dimension(:) :: counts,dspls integer :: nlocal integer, optional :: comm call MPI_SCATTERV(src,counts,dspls,mpi_precision,dest,nlocal,mpi_precision, & root,ioptest(comm,MPI_COMM_PENCIL),mpierr) endsubroutine mpiscatterv_real_plain !*********************************************************************** function mpiscatterv_real(nlocal,src,dest) result (lerr) integer :: nlocal real, dimension(:) :: src, dest logical :: lerr integer, dimension(ncpus) :: counts,dspls integer :: i call MPI_GATHER(nlocal,1,MPI_INTEGER,counts,1,MPI_INTEGER,root,MPI_COMM_PENCIL,mpierr) if (lroot) then lerr=(sum(counts)>size(src)) if (.not.lerr) then dspls(1)=0 do i=2,ncpus dspls(i)=dspls(i-1)+counts(i-1) enddo endif endif call mpibcast(lerr) if (lerr) then return else call MPI_SCATTERV(src,counts,dspls,mpi_precision,dest,nlocal,mpi_precision,root,MPI_COMM_PENCIL,mpierr) endif endfunction mpiscatterv_real !*********************************************************************** function mpiscatterv_int(nlocal,src,dest) result (lerr) integer :: nlocal integer, dimension(:) :: src, dest logical :: lerr integer, dimension(ncpus) :: counts,dspls integer :: i call MPI_GATHER(nlocal,1,MPI_INTEGER,counts,1,MPI_INTEGER,root,MPI_COMM_PENCIL,mpierr) if (lroot) then lerr=(sum(counts)>size(src)) if (.not.lerr) then dspls(1)=0 do i=2,ncpus dspls(i)=dspls(i-1)+counts(i-1) enddo endif endif call mpibcast(lerr) if (lerr) then return else call MPI_SCATTERV(src,counts,dspls,MPI_INTEGER,dest,nlocal,MPI_INTEGER,root,MPI_COMM_PENCIL,mpierr) endif endfunction mpiscatterv_int !*********************************************************************** subroutine initialize_foreign_comm(frgn_buffer) ! ! Initializes communication with foreign code by fetching the foreign setup ! and establishing relationships. ! Partly specific for coupling with MagIC. ! ! 20-oct-21/MR: coded ! use General, only: itoa, find_index_range, find_proc_general, get_linterp_weights_1D real, dimension(:,:,:,:), allocatable :: frgn_buffer ! ! Variables for data transfer from foreign code. ! integer, dimension(3) :: intbuf real, dimension(6) :: floatbuf logical :: lok character(LEN=128) :: messg integer :: ind,j,ll,name_len,nxgrid_foreign,nygrid_foreign,il1,il2,im1,im2,lenx,leny,px,py,tag,peer if (lforeign) then frgn_setup%root=ncpus frgn_setup%tag=tag_foreign frgn_setup%t_last_recvd=t !print*, 'iproc, iproc_world, MPI_COMM_UNIVERSE, MPI_COMM_WORLD=', iproc, & ! iproc_world, MPI_COMM_UNIVERSE, MPI_COMM_WORLD if (lroot) then ! lok=.true.; messg='' ! ! Send communication tag to foreign code. ! call mpisend_int(tag_foreign,frgn_setup%root,0,MPI_COMM_WORLD) ! ! Receive length of name of foreign code. ! call mpirecv_int(name_len,frgn_setup%root,tag_foreign,MPI_COMM_WORLD) if (name_len<=0) then call stop_it('initialize_foreign_comm: length of foreign name <=0 or >') elseif (name_len>labellen) then call stop_it('initialize_foreign_comm: length of foreign name > labellen ='// & trim(itoa(labellen))) endif ! ! Receive name of foreign code. ! call mpirecv_char(frgn_setup%name(1:name_len),frgn_setup%root,tag_foreign,MPI_COMM_WORLD) frgn_setup%name=frgn_setup%name(1:name_len) if (.not.(trim(frgn_setup%name)=='MagIC'.or.trim(frgn_setup%name)=='EULAG')) & call stop_it('initialize_foreign_comm: communication with foreign code "'// & trim(frgn_setup%name)//'" not supported') ! ! Receive processor numbers of foreign code. ! call mpirecv_int(intbuf,3,frgn_setup%root,tag_foreign,MPI_COMM_WORLD) frgn_setup%procnums=intbuf frgn_setup%ncpus=product(intbuf) !if (ncpus+frgn_setup%ncpus/=nprocs) & ! call fatal_error('initialize_hydro','no of processors '//trim(itoa(nprocs))// & ! ' /= no of own + no of foreign processors '//trim(itoa(ncpus+frgn_setup%ncpus))) ! ! Calculate the multiplicity of processors ! frgn_setup%proc_multis = (/nprocx,nprocy,nprocz/)/intbuf if ( any(mod((/nprocx,nprocy,nprocz/),intbuf)/=0 ) ) then messg="foreign proc numbers don't match;" lok=.false. endif ! ! Receive gridpoint numbers of foreign code. ! call mpirecv_int(frgn_setup%dims,3,frgn_setup%root,tag_foreign,MPI_COMM_WORLD) !print*, 'Pencil: received frgn_setup%dims=', frgn_setup%dims frgn_setup%linterpol = (frgn_setup%dims/=(/nxgrid,nygrid,nzgrid/)) ! ! Receive units of foreign code. ! call mpirecv_char(frgn_setup%unit_system,frgn_setup%root,tag_foreign,MPI_COMM_WORLD) if (trim(frgn_setup%unit_system)/='SI'.and.trim(frgn_setup%unit_system)/='CGS') then messg=trim(messg)//' foreign unit system unknown;' lok=.false. endif call mpirecv_real(frgn_setup%unit_length,frgn_setup%root,tag_foreign,MPI_COMM_WORLD) if (frgn_setup%unit_length<=0.) then messg=trim(messg)//' foreign length unit <=0;' lok=.false. endif call mpirecv_real(frgn_setup%unit_time,frgn_setup%root,tag_foreign,MPI_COMM_WORLD) if (frgn_setup%unit_time<=0.) then messg=trim(messg)//' foreign time unit <=0;' lok=.false. endif call mpirecv_real(frgn_setup%unit_BB,frgn_setup%root,tag_foreign,MPI_COMM_WORLD) if (frgn_setup%unit_BB<=0.) then messg=trim(messg)//' foreign B unit <=0;' lok=.false. endif call mpirecv_real(frgn_setup%unit_T,frgn_setup%root,tag_foreign,MPI_COMM_WORLD) if (frgn_setup%unit_T<=0.) then messg=trim(messg)//' foreign temperature unit <=0;' lok=.false. endif if (lok) frgn_setup%renorm_UU=frgn_setup%unit_time/frgn_setup%unit_length !not the full truth yet ! ! Receive domain extents of foreign code. j loops over r, theta, phi. ! call mpirecv_real(floatbuf,6,frgn_setup%root,tag_foreign,MPI_COMM_WORLD) ind=1 do j=1,3 frgn_setup%extents(:,j)=floatbuf(ind:ind+1) if (j==1) then floatbuf(1)=floatbuf(1)/floatbuf(2)*xyz1(1) ! renormalize r-range for spherical coordinates floatbuf(2)=xyz1(1) endif !print*, 'floatbuf=', floatbuf(ind:ind+1), xyz0(j),xyz1(j) if (any(abs(floatbuf(ind:ind+1)-(/xyz0(j),xyz1(j)/))>1.e-6)) then if (j==3) then messg=trim(messg)//"foreign "//trim(coornames(j))//" z-domain extents doesn't match;" lok=.false. else frgn_setup%linterpol(j)=.true. print*, "initialize_foreign_comm: Warning -- foreign "//trim(coornames(j))//" domain extent doesn't match" endif endif ind=ind+2 enddo ! ! Receive output timestep of foreign code. ! call mpirecv_real(frgn_setup%dt_out,frgn_setup%root,tag_foreign,MPI_COMM_WORLD) if (frgn_setup%dt_out<=0.) then messg=trim(messg)//' foreign output step<=0;' lok=.false. endif ! ! Send confirmation flag that setup is acceptable. ! call mpisend_logical(lok,frgn_setup%root,tag_foreign,MPI_COMM_WORLD) endif call mpibcast_logical(lok,comm=MPI_COMM_PENCIL) if (.not.lok) call stop_it('initialize_foreign_comm: '//trim(messg)) call mpibcast_char(frgn_setup%name,comm=MPI_COMM_PENCIL) ! ! Broadcast foreign processor numbers and grid size. ! call mpibcast_int(frgn_setup%dims,3,comm=MPI_COMM_PENCIL) if (lfirst_proc_yz) then nxgrid_foreign=frgn_setup%dims(1) allocate(frgn_setup%xgrid(nxgrid_foreign)) endif nygrid_foreign=frgn_setup%dims(2) allocate(frgn_setup%ygrid(nygrid_foreign)) if (lroot) then ! ! Receive vector of foreign global x(r)-grid points. ! call mpirecv_real(frgn_setup%xgrid,nxgrid_foreign,frgn_setup%root,tag_foreign,MPI_COMM_WORLD) ! ! Renormalize X-coord to Pencil domain. ! frgn_setup%xgrid = frgn_setup%xgrid/frgn_setup%xgrid(nxgrid_foreign)*xyz1(1) !print*, 'frgn_setup%xgrid=', frgn_setup%xgrid ! ! Receive vector of foreign global y(theta)-grid points. ! call mpirecv_real(frgn_setup%ygrid,nygrid_foreign,frgn_setup%root,tag_foreign,MPI_COMM_WORLD) ! ! Receive normalized time from foreign side ! call mpirecv_real(frgn_setup%renorm_t,frgn_setup%root,tag_foreign,MPI_COMM_WORLD) !print*,'PENCIL - dt, dt/tnorm',frgn_setup%dt_out ,frgn_setup%dt_out/frgn_setup%renorm_t frgn_setup%renorm_L = frgn_setup%extents(2,1) frgn_setup%renorm_UU=frgn_setup%renorm_L/frgn_setup%renorm_t frgn_setup%dt_out = frgn_setup%dt_out/frgn_setup%renorm_t !print*, 'PENCIL - renormUU, t_last_rcv, dt_out', frgn_setup%renorm_UU,frgn_setup%t_last_recvd,frgn_setup%dt_out, t-frgn_setup%t_last_recvd ! ! Send number of xyz-procs to foreign. ! !call mpisend_int((/nprocx,nprocy,nprocz/),frgn_setup%root,tag_foreign,MPI_COMM_WORLD) !!! call mpisend_int(nprocx,frgn_setup%root,tag_foreign,MPI_COMM_WORLD) call mpisend_int(nprocy,frgn_setup%root,tag_foreign,MPI_COMM_WORLD) call mpisend_int(nprocz,frgn_setup%root,tag_foreign,MPI_COMM_WORLD) ! !print*, 'frgn_setup%linterpol=', frgn_setup%linterpol endif call mpibcast_real(frgn_setup%renorm_L,comm=MPI_COMM_PENCIL) call mpibcast_real(frgn_setup%renorm_t,comm=MPI_COMM_PENCIL) call mpibcast_real(frgn_setup%renorm_UU,comm=MPI_COMM_PENCIL) call mpibcast_real(frgn_setup%dt_out,comm=MPI_COMM_PENCIL) ! call mpibcast_int(frgn_setup%procnums,3,comm=MPI_COMM_PENCIL) call mpibcast_int(frgn_setup%proc_multis,3,comm=MPI_COMM_PENCIL) call mpibcast(frgn_setup%linterpol,3,comm=MPI_COMM_PENCIL) ! ! if (frgn_setup%procnums(1)==1) then !EULAG case !print*, 'PENCIL: frgn_setup%proc_multis=', frgn_setup%proc_multis !frgn_setup%peer_rng = find_proc_general(ipz/frgn_setup%proc_multis(3), & ! ipy/frgn_setup%proc_multis(2), & ! ipx/frgn_setup%proc_multis(1), & ! frgn_setup%procnums(3), frgn_setup%procnums(2), frgn_setup%procnums(1),.true.) !peer=frgn_setup%peer_rng(1) !print*,'PENCIL - peer=', iproc, frgn_setup%peer_rng(1), & !ipz/frgn_setup%proc_multis(3), ipy/frgn_setup%proc_multis(2), ipx/frgn_setup%proc_multis(1) ! endif ! ! Allocate array for index ranges w.r.t. frgn_setup%[xy]grid for each foreign ! proc. frgn_setup%[xy]ind_rng(-1,:) is overall index range. ! allocate(frgn_setup%xind_rng(-1:frgn_setup%procnums(1)-1,2)); frgn_setup%xind_rng=0 allocate(frgn_setup%yind_rng(-1:frgn_setup%procnums(2)-1,2)); frgn_setup%yind_rng=0 ! if (lfirst_proc_xz) then ! on processors of first YBEAM ! ! Broadcast foreign ygrid to all procs with ipx=ipz=0. ! call mpibcast_real(frgn_setup%ygrid,nygrid_foreign,comm=MPI_COMM_YBEAM) ! ! Determine index range frgn_setup%yind_rng in foreign ygrid which is needed ! for individual processors in y direction. ! call find_index_range(frgn_setup%ygrid,nygrid_foreign,y(1),y(my),im1,im2,lextend=.true.) frgn_setup%yind_rng(-1,:)=(/im1,im2/) ! global y index range of rank iproc ! ! Ask all foreign processors in first y beam ! about their share in frgn_setup%yind_rng. No share: receive [0,0]. ! do py=0,frgn_setup%procnums(2)-1 ! ! Determine the peer using PENCIL (Not EULAG) proc grid conventions. ! peer = find_proc_general(0,py,0,frgn_setup%procnums(3),frgn_setup%procnums(2),frgn_setup%procnums(1),.true.) call mpisendrecv_int(frgn_setup%yind_rng(-1,:),2,ncpus+peer,tag_foreign+iproc, & intbuf,ncpus+peer,tag_foreign+peer,MPI_COMM_WORLD) frgn_setup%yind_rng(py,:)=intbuf(1:2) !print*,'PENCIL yind1', iproc, peer,ipy,frgn_setup%yind_rng(py,1),frgn_setup%yind_rng(py,2) !print*,'PENCIL yind2',iproc,frgn_setup%ypeer_rng(1),frgn_setup%ypeer_rng(2) if (frgn_setup%ypeer_rng(1)>=0) then if (frgn_setup%yind_rng(py,1)>=frgn_setup%yind_rng(py,2).and.frgn_setup%ypeer_rng(2)<0) then frgn_setup%ypeer_rng(2)=py-1 endif else if (frgn_setup%yind_rng(py,1)=0) then ! if start of peer range has already been detected ! ! If px has no share, px-1 is last of peer range. ! if (frgn_setup%xind_rng(px,1)>frgn_setup%xind_rng(px,2).and.frgn_setup%ypeer_rng(2)<0) then frgn_setup%xpeer_rng(2)=px-1 endif else ! if start of peer range has not yet been detected ! ! If px has share, it is first of peer range. ! if (frgn_setup%xind_rng(px,1)<=frgn_setup%xind_rng(px,2)) frgn_setup%xpeer_rng(1)=px endif enddo if (frgn_setup%xpeer_rng(2)<0) frgn_setup%xpeer_rng(2)=px-1 if (frgn_setup%linterpol(1)) & call get_linterp_weights_1D(frgn_setup%xgrid,frgn_setup%xind_rng(-1,:),x,frgn_setup%xinds,frgn_setup%xweights) !print*, 'iproc, frgn_setup%xinds=', iproc, frgn_setup%xinds !, sum(frgn_setup%xweights,2) endif ! lfirst_proc_yz ! call mpibcast_int_arr2(frgn_setup%xind_rng,(/frgn_setup%procnums(1)+1,2/),comm=MPI_COMM_YZPLANE) call mpibcast_int_arr(frgn_setup%xpeer_rng,2,comm=MPI_COMM_YZPLANE) lenx=frgn_setup%xind_rng(-1,2)-frgn_setup%xind_rng(-1,1)+1 if (frgn_setup%linterpol(1)) then call mpibcast_int(frgn_setup%xinds,mx,comm=MPI_COMM_YZPLANE) call mpibcast_real(frgn_setup%xweights,(/mx,2/),comm=MPI_COMM_YZPLANE) endif call mpibcast_int_arr2(frgn_setup%yind_rng,(/frgn_setup%procnums(2)+1,2/),comm=MPI_COMM_XZPLANE) call mpibcast_int_arr(frgn_setup%ypeer_rng,2,comm=MPI_COMM_XZPLANE) leny=frgn_setup%yind_rng(-1,2)-frgn_setup%yind_rng(-1,1)+1 if (frgn_setup%linterpol(2)) then call mpibcast_int(frgn_setup%yinds,my,comm=MPI_COMM_XZPLANE) call mpibcast_real(frgn_setup%yweights,(/my,2/),comm=MPI_COMM_XZPLANE) endif if (allocated(frgn_buffer)) deallocate(frgn_buffer) allocate(frgn_buffer(lenx,leny,mz,3)) !print*, 'PENCIL0B',iproc,peer, lbound(frgn_buffer),ubound(frgn_buffer) !print*, 'PENCIL global len*',iproc,peer, lenx, leny, mz if (allocated(frgn_setup%recv_req)) deallocate(frgn_setup%recv_req) allocate(frgn_setup%recv_req(0:frgn_setup%procnums(1)-1)) endif ! if (lforeign) call MPI_BARRIER(MPI_COMM_WORLD, mpierr) print*, 'Pencil: after barrier' !call MPI_FINALIZE(mpierr) !stop endsubroutine initialize_foreign_comm !*********************************************************************** subroutine get_foreign_snap_initiate(nvars,frgn_buffer,lnonblock) ! ! Initializes fetching of data snapshot from foreign code. ! Non-blocking mode not yet operational. ! ! 20-oct-21/MR: coded ! use General, only: loptest, find_proc_general real, dimension(:,:,:,:) :: frgn_buffer integer :: nvars logical, optional :: lnonblock integer :: istart,ixstart,iystart,lenx_loc,leny_loc,px,py,iv,peer logical :: lesav !Not needed in the normal run as savings would come normally now. Retained for now, in case is needed again. !GM: Assume will not save initially lesav = .false. do px=frgn_setup%xpeer_rng(1),frgn_setup%xpeer_rng(2) ixstart=frgn_setup%xind_rng(px,1)-frgn_setup%xind_rng(-1,1)+1!-nghost lenx_loc=frgn_setup%xind_rng(px,2)-frgn_setup%xind_rng(px,1)+1!+2*nghost do py=frgn_setup%ypeer_rng(1),frgn_setup%ypeer_rng(2) peer = find_proc_general(ipz/frgn_setup%proc_multis(3),py,px,frgn_setup%procnums(3), & frgn_setup%procnums(2),frgn_setup%procnums(1),.true.) iystart=frgn_setup%yind_rng(py,1)-frgn_setup%yind_rng(-1,1)+1!-nghost leny_loc=frgn_setup%yind_rng(py,2)-frgn_setup%yind_rng(py,1)+1!+2*nghost do iv=1,nvars if (loptest(lnonblock)) then ! non-blocking call mpirecv_real(frgn_buffer(ixstart:ixstart+lenx_loc-1,iystart:iystart+leny_loc-1,:,iv), & (/lenx_loc,leny_loc,mz/),peer+ncpus,iproc+tag_foreign,MPI_COMM_WORLD,frgn_setup%recv_req(px)) else ! blocking !print*, 'PENCIL1X', iproc,peer,px,ixstart, ixstart+lenx_loc-1 !print*, 'PENCIL1Y', iproc,peer,py,iystart, iystart+leny_loc-1 !print*, 'PENCIL3', iproc,peer,lenx_loc,leny_loc,mz !print*,'PENCIL4', iproc, peer,l1, l2,m1,m2 !print*, 'PENCIL1B', iproc, lbound(frgn_buffer(ixstart:ixstart+lenx_loc-1,iystart:iystart+leny_loc-1,:,iv)),ubound(frgn_buffer(ixstart:ixstart+lenx_loc-1,iystart:iystart+leny_loc-1,:,iv)) !print*, 'PENCIL2B', iproc, size(x),size(y),size(z) call mpirecv_real(frgn_buffer(ixstart:ixstart+lenx_loc-1,iystart:iystart+leny_loc-1,:,iv), & (/lenx_loc,leny_loc,mz/),peer+ncpus,iproc+tag_foreign,MPI_COMM_WORLD) endif ! enddo enddo enddo !GM: Wait for the saving command if (iproc==0) then ! print*, 'PENCIL Before RECEIVED LOGICAL SAVE', lesav call mpirecv_logical(lesav,frgn_setup%root,tag_foreign,MPI_COMM_WORLD) ! if (lesav) then ! call mpirecv_real(time) ! print*, 'Pencil Time', t, 'Eulag time', time_e ! endif endif !GM: Broadcast saving logical call mpibcast_logical(lesav,comm=MPI_COMM_PENCIL) !if (lesav) call write_2daverages() !Not needed for now. !if (lesav) call output_snap(,nv1=nv1_capitalvar,nv2=m snap,file=file) !print *, 'PENCIL - MINMAX W',iproc, minval(frgn_buffer(:,:,:,1)), maxval(frgn_buffer(:,:,:,1)) !print *, 'PENCIL - MINMAX V',iproc, minval(frgn_buffer(:,:,:,2)), maxval(frgn_buffer(:,:,:,2)) !print *, 'PENCIL - MINMAX U',iproc, minval(frgn_buffer(:,:,:,3)), maxval(frgn_buffer(:,:,:,3)) return !END new version ! print*, 'PENCIL-BARRIER', iproc call MPI_BARRIER(MPI_COMM_WORLD, mpierr) call MPI_FINALIZE(mpierr) stop ! ! Below old version. ! do px=0,frgn_setup%procnums(1)-1 if (frgn_setup%xind_rng(px,1)>0) then istart=frgn_setup%xind_rng(px,1)-frgn_setup%xind_rng(-1,1)+1-nghost lenx_loc=frgn_setup%xind_rng(px,2)-frgn_setup%xind_rng(px,1)+1+2*nghost if (loptest(lnonblock)) then do iv=1,nvars call mpirecv_real(frgn_buffer(istart:istart+lenx_loc-1,:,:,iv), & (/lenx_loc,my,mz/),peer,peer-ncpus,MPI_COMM_WORLD,frgn_setup%recv_req(px)) enddo else ! blocking case !peer=frgn_setup%peer_rng(1) !print*, 'PENCIL recv: iproc,peer,tag=', iproc,peer+ncpus,peer do iv=1,nvars call mpirecv_real(frgn_buffer(istart:istart+lenx_loc-1,:,:,iv), & (/lenx_loc,my,mz/),peer+ncpus,peer,MPI_COMM_WORLD) ! if (iv.eq.1) write(100+iproc) frgn_buffer(:,:,:,iv) ! if (iv.eq.2) write(400+iproc) frgn_buffer(:,:,:,iv) ! if (iv.eq.3) write(700+iproc) frgn_buffer(:,:,:,iv) !print *,'PENCIL MINMAX FRGN',iproc, minval(frgn_buffer), maxval(frgn_buffer) enddo ! ! If in time loop, receive signal from foreign code to save snapshot for ! synchronous restart capability. ! if (it>0) call mpibcast_logical(lsave,frgn_setup%root,MPI_COMM_WORLD) endif endif enddo !write(200+iproc) frgn_buffer !if (.not.loptest(lnonblock))then !else ! print*, 'PENCIL INIT NON-BLOCK' !endif !print*, 'PENCIL get_foreign_snap_initiate: successful', iproc !print *, 'PENCIL - MIN MAX W',iproc, minval(frgn_buffer(:,:,:,1)), maxval(frgn_buffer(:,:,:,1)) !print *, 'PENCIL - MIN MAX V',iproc, minval(frgn_buffer(:,:,:,2)), maxval(frgn_buffer(:,:,:,2)) !print *, 'PENCIL - MIN MAX U',iproc, minval(frgn_buffer(:,:,:,3)), maxval(frgn_buffer(:,:,:,3)) !call mpibarrier(MPI_COMM_WORLD) !call MPI_FINALIZE(mpierr) !stop endsubroutine get_foreign_snap_initiate !*********************************************************************** subroutine get_foreign_snap_finalize(f,ivar1,ivar2,frgn_buffer,interp_buffer,lnonblock) ! ! Finalizes fetching of data snapshot from foreign code: when nonblocking, call MPI_WAIT; ! interpolate data and scatter across processors. ! Non-blocking mode not yet operational. ! ! 20-oct-21/MR: coded ! use General, only: loptest,linear_interpolate_1d, itoa ! use IO, only: output_snap, output_snap_finalize real, dimension(:,:,:,:) :: f,frgn_buffer,interp_buffer integer :: ivar1, ivar2, lf1 logical, optional :: lnonblock integer, parameter :: ytag=115 integer :: istart_y,istart_z,istop_y,istop_z,px,py,pz,partner,ll,nvars ! logical :: lesav ! integer, save :: sav_counter = 0 ! character(*), parameter :: filepath = "data/allprocs/" !if (mod(frgn_setup%dims(2),nygrid)==0 .and. mod(frgn_setup%dims(3),nzgrid)==0) then lf1 = 1 ! ! Interpolate/scatter data to array f ! if (trim(frgn_setup%name)=='EULAG') then if (loptest(lnonblock)) call mpiwait(frgn_setup%recv_req(px)) if (size(frgn_buffer,1) > size(f,1)) lf1 = 2 !!! f(:,:,:,ivar1:ivar2)=frgn_buffer(lf1:,:,:,:)/frgn_setup%renorm_UU !ORIGINAL SETUP if (lfirst_proc_y .and. nprocy==1) then if (lfirst_proc_x .and. nprocx==1) then!if only one proc in X f(l1:l2,m1:m2,:,ivar1:ivar2)=frgn_buffer(lf1:,:,:,:)/frgn_setup%renorm_UU ! else if (.not.(lfirst_proc_x .or. llast_proc_x))then!MID of XBEAM f(:,m1:m2,:,ivar1:ivar2)=frgn_buffer(lf1:,:,:,:)/frgn_setup%renorm_UU ! else if (lfirst_proc_x) then! on processors of first XBEAM ! f(l1:,m1:m2,:,ivar1:ivar2)=frgn_buffer(lf1:,:,:,:)/frgn_setup%renorm_UU else if (llast_proc_x) then!on processors of last XBEAM f(:l2,m1:m2,:,ivar1:ivar2)=frgn_buffer(lf1:,:,:,:)/frgn_setup%renorm_UU endif else if (.not.(lfirst_proc_y .or. llast_proc_y)) then!MID of YBEAM ! if (lfirst_proc_x .and. nprocx==1) then!if only one proc in X f(l1:l2,:,:,ivar1:ivar2)=frgn_buffer(lf1:,:,:,:)/frgn_setup%renorm_UU ! else if (.not.(lfirst_proc_x .or. llast_proc_x))then!MID of XBEAM f(:,:,:,ivar1:ivar2)=frgn_buffer(lf1:,:,:,:)/frgn_setup%renorm_UU ! else if (lfirst_proc_x) then! on processors of first XBEAM ! f(l1:,:,:,ivar1:ivar2)=frgn_buffer(lf1:,:,:,:)/frgn_setup%renorm_UU else if (llast_proc_x) then!on processors of last XBEAM f(:l2,:,:,ivar1:ivar2)=frgn_buffer(lf1:,:,:,:)/frgn_setup%renorm_UU endif ! i else if (lfirst_proc_y) then! on processors of first YBEAM if (lfirst_proc_x .and. nprocx==1) then !if only one proc in X f(l1:l2,m1:,:,ivar1:ivar2)=frgn_buffer(lf1:,:,:,:)/frgn_setup%renorm_UU ! else if (.not.(lfirst_proc_x .or. llast_proc_x))then!MID of XBEAM f(:,m1:,:,ivar1:ivar2)=frgn_buffer(lf1:,:,:,:)/frgn_setup%renorm_UU ! else if (lfirst_proc_x) then! on processors of first XBEAM ! f(l1:,m1:,:,ivar1:ivar2)=frgn_buffer(lf1:,:,:,:)/frgn_setup%renorm_UU else if (llast_proc_x) then!on processors of last XBEAM f(:l2,m1:,:,ivar1:ivar2)=frgn_buffer(lf1:,:,:,:)/frgn_setup%renorm_UU endif ! else if (llast_proc_y) then!on processors of last YBEAM if (lfirst_proc_x .and. nprocx==1) then!if only one proc in X f(l1:l2,:m2,:,ivar1:ivar2)=frgn_buffer(lf1:,:,:,:)/frgn_setup%renorm_UU ! else if (.not.(lfirst_proc_x .or. llast_proc_x))then!MID of XBEAM f(:,:m2,:,ivar1:ivar2)=frgn_buffer(lf1:,:,:,:)/frgn_setup%renorm_UU else if (lfirst_proc_x) then! on processors of first XBEAM ! f(l1:,:m2,:,ivar1:ivar2)=frgn_buffer(lf1:,:,:,:)/frgn_setup%renorm_UU else if (llast_proc_x) then!on processors of last XBEAM f(:l2,:m2,:,ivar1:ivar2)=frgn_buffer(lf1:,:,:,:)/frgn_setup%renorm_UU endif endif ! !print*,'PENCILFin0', iproc, m1,m2 !print*,'PENCILFin1', iproc, lbound(f),ubound(f) !print*,'PENCILFin2', iproc, lbound(frgn_buffer),ubound(frgn_buffer) !print*,'PENCILFin3', iproc, ncpus, nghost+1, mx-nghost, my-nghost, mz-nghost !print*,'PENCILFin4', iproc, size(frgn_buffer,1) , size(frgn_buffer,2) !GM: Wait for the saving command/ IF SAVING HERE IS NEEDED !if (iproc==0) then ! print*, 'PENCIL Before RECEIVED LOGICAL SAVE', lesav ! call mpirecv_logical(lesav,frgn_setup%root,tag_foreign,MPI_COMM_WORLD) ! print*, 'PENCIL RECEIVED LOGICAL SAVE', lesav ! print*, 'PENCIL TEST savefile, itoa', 'data'//itoa(sav_counter) ! sav_counter = sav_counter + 1 ! if (lesav) then !call mpirecv_real(time) !print*, 'Pencil Time', t, 'Eulag time', time_e ! endif !endif !GM: Broadcast saving logical !call mpibcast_logical(lesav,comm=MPI_COMM_PENCIL) !if (lesav) then !SAVES FULL DATA ! call output_snap(f,nv1=1,nv2=mfarray,file='data'//itoa(sav_counter)//'.dat',mode=1) ! call output_snap_finalize !endif goto 125!!! !!!f(:,:,:,ivar1:ivar2)=frgn_buffer(lf1:,my::-1,:,:)/frgn_setup%renorm_UU !For invertion of theta ! !print*, 'iproc, weights=', iproc, maxval(frgn_setup%xweights), maxval(frgn_setup%yweights) if (all(frgn_setup%linterpol)) then ! XYZ-interpolation elseif (all(frgn_setup%linterpol(1:2))) then ! ! XY-Interpolation ! call interpolate_2d(frgn_buffer(lf1:,:,:,:),f(:,:,:,ivar1:ivar2),frgn_setup%xinds,frgn_setup%yinds, & frgn_setup%xweights,frgn_setup%yweights,1./frgn_setup%renorm_UU) elseif (all(frgn_setup%linterpol(2:3))) then ! YZ-Interpolation elseif (all(frgn_setup%linterpol((/1,3/)))) then ! XZ-Interpolation elseif (frgn_setup%linterpol(1)) then ! ! X-Interpolation ! call interpolate_1d(frgn_buffer(lf1:,:,:,:),f(:,:,:,ivar1:ivar2),1,frgn_setup%xinds, & frgn_setup%xweights,1./frgn_setup%renorm_UU) elseif (frgn_setup%linterpol(2)) then ! ! Y-Interpolation ! call interpolate_1d(frgn_buffer(lf1:,:,:,:),f(:,:,:,ivar1:ivar2),2,frgn_setup%yinds, & frgn_setup%yweights,1./frgn_setup%renorm_UU) elseif (frgn_setup%linterpol(3)) then ! Z-Interpolation endif ! 125 continue !!! elseif (lfirst_proc_yz) then if (frgn_setup%proc_multis(1)>1) then nvars=ivar2-ivar1+1 do pz=0,nprocz-1 istart_z=pz*nz+1; istop_z=istart_z+nz-1 do py=0,nprocy-1 if (lprocz_slowest) then partner = py + nprocy*pz else partner = pz + nprocz*py endif istart_y=py*ny+1; istop_y=istart_y+ny-1 do ll=l1,l2 call linear_interpolate_1d(frgn_buffer(:,istart_y:istop_y,istart_z:istop_z,:), & frgn_setup%xgrid(frgn_setup%xind_rng(-1,1):frgn_setup%xind_rng(-1,2)), & x(ll),interp_buffer(ll,:,:,:),.true.) enddo interp_buffer=interp_buffer/frgn_setup%renorm_UU call mpisend_real(interp_buffer,(/nx,ny,nz,nvars/),partner,ytag,MPI_COMM_YZPLANE) if (size(f,1)==mx) then ! f has ghosts f(l1:l2,m1:m2,n1:n2,ivar1:ivar2)=interp_buffer else f(:,:,:,ivar1:ivar2)=interp_buffer endif enddo enddo endif !call mpirecv_real(f(l1:l2,m1:m2,n1:n2,ivar1:ivar2),(/nx,ny,nz,nvars/),0,ytag,MPI_COMM_YZPLANE) endif ! if (name==EULAG) endsubroutine get_foreign_snap_finalize !*********************************************************************** subroutine interpolate_2d(inbuffer,outbuffer,xinds,yinds,xweights,yweights,scal_) ! ! At the moment only for first two dimensions. ! use General, only: roptest real, dimension(:,:,:,:) :: inbuffer,outbuffer !real, dimension(mx,my,mz,mvar) :: outbuffer integer, dimension(:) :: xinds,yinds real, dimension(:,:) :: xweights,yweights real, optional :: scal_ real :: yw1,yw2,scal integer :: ll,mm,xind1,xind2,yind1,yind2 scal=roptest(scal_,1.) do mm=1,size(outbuffer,2) yw1=yweights(mm,1)*scal; yw2=yweights(mm,2)*scal yind1=yinds(mm); yind2=yind1+1 do ll=1,size(outbuffer,1) xind1=xinds(ll); xind2=xind1+1 outbuffer(ll,mm,:,:) = (xweights(ll,1)*yw1)*inbuffer(xind1,yind1,:,:) & + (xweights(ll,2)*yw1)*inbuffer(xind2,yind1,:,:) & + (xweights(ll,1)*yw2)*inbuffer(xind1,yind2,:,:) & + (xweights(ll,2)*yw2)*inbuffer(xind2,yind2,:,:) enddo enddo endsubroutine interpolate_2d !*********************************************************************** subroutine interpolate_1d(inbuffer,outbuffer,dim,inds,weights,scal_) ! ! At the moment only for first two dimensions. ! use General, only: roptest real, dimension(:,:,:,:) :: inbuffer,outbuffer !real, dimension(mx,my,mz,mvar) :: outbuffer integer :: dim integer, dimension(:) :: inds real, dimension(:,:) :: weights real, optional :: scal_ real :: w1,w2,scal integer :: ii,ind1,ind2 scal=roptest(scal_,1.) do ii=1,size(outbuffer,dim) w1=weights(ii,1)*scal; w2=weights(ii,2)*scal ind1=inds(ii); ind2=ind1+1 select case (dim) case (1); outbuffer(ii,:,:,:) = w1*inbuffer(ind1,:,:,:) + w2*inbuffer(ind2,:,:,:) case (2); outbuffer(:,ii,:,:) = w1*inbuffer(:,ind1,:,:) + w2*inbuffer(:,ind2,:,:) case (3); outbuffer(:,:,ii,:) = w1*inbuffer(:,:,ind1,:) + w2*inbuffer(:,:,ind2,:) end select enddo endsubroutine interpolate_1d !*********************************************************************** logical function update_foreign_data(t,t_foreign) ! ! Determines whether it is time to fetch new data from foreign code. ! Returns delivery cadence of foreign code in dt_foreign. ! ! 20-oct-21/MR: coded ! real(KIND=rkind8), intent(IN) :: t real, intent(OUT):: t_foreign if (t-frgn_setup%t_last_recvd>0.) then frgn_setup%t_last_recvd=frgn_setup%t_last_recvd+frgn_setup%dt_out update_foreign_data=.true. t_foreign = frgn_setup%t_last_recvd else update_foreign_data=.false. endif endfunction update_foreign_data !*********************************************************************** subroutine set_rslice_communicator integer :: rank call MPI_COMM_SPLIT(MPI_COMM_PENCIL, lwrite_slice_r, iproc, MPI_COMM_RSLICE, mpierr) call MPI_COMM_RANK(MPI_COMM_RSLICE,rank,mpierr) if (rank==0) root_rslice=iproc endsubroutine set_rslice_communicator !*********************************************************************** subroutine mpiallreduce_merge(list,len,comm) ! ! Merges an integer list across all processors. ! Returns list of unique elements. ! ! 19-oct-22/MR: coded ! use General, only: merge_lists integer, dimension(*) :: list integer :: len integer, optional, intent(in) :: comm integer :: maxlen,ip,len_proc integer, dimension(:), allocatable :: buffer call mpireduce_max(len,maxlen,comm) if (lroot) then allocate(buffer(maxlen)) do ip=1,ncpus-1 call mpirecv_int(len_proc,ip,ip,comm) call mpirecv_int(buffer,len_proc,ip,ip,comm) call merge_lists(list,len,buffer(1:len_proc)) enddo else call mpisend_int(len,root,iproc,comm) call mpisend_int(list,len,root,iproc,comm) endif call mpibcast_int(len,root,comm) call mpibcast_int(list,len,root,comm) endsubroutine mpiallreduce_merge !*********************************************************************** endmodule Mpicomm