22 #define AL_GATHER_INDEXES
36 int AL_Decomp1d_(
int gdim,
int lproc,
int lloc,
int *start,
int *end);
53 register int i, nd, nb;
58 int reord, nleft, nright;
59 int stagpt, type_size;
61 MPI_Datatype itype,ivector;
62 MPI_Comm comm, cart_comm;
70 printf(
"AL_Decompose: wrong SZ pointer\n");
85 printf(
"AL_Decompose: wrong mode. Using default AL_MPI_DECOMP\n");
98 for(nd=0;nd<ndim;nd++){
99 s->
lsize[nd] = procs[nd];
114 for(nd=0;nd<ndim;nd++){
115 ldims[nd] = s->
lsize[nd];
121 if(myrank==0) printf(
"MPI_Cart_create: %d x %d x %d, ndim %d, nproc %d\n",ldims[0],ldims[1],ldims[2],ndim,nproc);
125 MPI_Cart_create(comm, ndim, ldims, periods, reord, &cart_comm);
133 MPI_Cart_coords(cart_comm, myrank, ndim, ldims);
135 for(nd=0;nd<ndim;nd++){
136 s->
lrank[nd] = ldims[nd];
137 MPI_Cart_shift(cart_comm, nd, 1, &nleft, &nright);
139 s->
right[nd] = nright;
150 for(nd=0;nd<ndim;nd++){
158 MPI_Cart_sub(cart_comm, rem_dims, &(s->
oned_comm[nd]));
176 for(nd=0;nd<ndim;nd++){
178 ( (s->
end[nd]) - (s->
beg[nd]) + (s->
bg[nd]) + (s->
eg[nd]) + 1 );
181 #ifdef AL_GATHER_INDEXES
192 if( !(s->
begs = (
int *)malloc(
sizeof(
int)*nproc*ndim*2))) {
193 printf(
"AL_Decompose: could not allocate s->begs\n");
198 MPI_Allgather(s->
beg, ndim, MPI_INT, s->
begs, ndim, MPI_INT, comm);
201 MPI_Allgather(s->
end, ndim, MPI_INT, s->
ends, ndim, MPI_INT, comm);
218 for(nd=0;nd<ndim;nd++){
220 blocklen[nd] = s->
bg[nd];
222 for(nb=0;nb<nd;nb++){
226 for(nb=nd+1;nb<ndim;nb++){
232 for(nd=0;nd<ndim;nd++){
234 MPI_Type_vector( count[nd], blocklen[nd], stride[nd], itype, &ivector);
236 MPI_Type_contiguous( blocklen[nd], itype, &ivector);
238 MPI_Type_commit(&ivector);
244 for(nd=0;nd<ndim;nd++){
250 blocklen[nd] = s->
bg[nd]+1;
252 blocklen[nd] = s->
bg[nd];
256 for(nb=0;nb<nd;nb++){
260 for(nb=nd+1;nb<ndim;nb++){
266 for(nd=0;nd<ndim;nd++){
268 MPI_Type_vector( count[nd], blocklen[nd], stride[nd], itype, &ivector);
270 MPI_Type_contiguous( blocklen[nd], itype, &ivector);
272 MPI_Type_commit(&ivector);
282 MPI_Type_size( s->
type, &type_size);
284 for(nd=0;nd<ndim;nd++){
285 s->
tag1[nd] = nd*100;
286 s->
tag2[nd] = nd*100+1;
299 for(nb=0;nb<nd;nb++){
310 s->
sendb1[nd] *= type_size;
311 s->
recvb1[nd] *= type_size;
312 s->
sendb2[nd] *= type_size;
313 s->
recvb2[nd] *= type_size;
323 MPI_Datatype igsubarr, ilsubarr;
332 for(nd=0;nd<ndim;nd++){
333 gdims[nd] = s->
arrdim[nd];
335 starts[nd] = s->
beg[nd] - s->
bg[nd];
339 printf(
"%d, gsubarr: gdims[0:2] %d,%d,%d, ldims[0:2] %d,%d,%d, starts[0:2] %d,%d,%d\n", s->
rank, gdims[0], gdims[1], gdims[2], ldims[0], ldims[1], ldims[2], starts[0], starts[1], starts[2]);
345 MPI_Type_commit(&igsubarr);
353 for (istag = 0; istag < ndim; istag++){
354 for(nd = 0; nd < ndim; nd++){
355 gdims[nd] = s->
arrdim[nd];
357 starts[nd] = s->
beg[nd] - s->
bg[nd];
398 if (s->
beg[istag] == s->
bg[istag]) {
406 printf(
"%d, gsubarr_stag[%d]: gdims[0:2] %d,%d,%d, ldims[0:2] %d,%d,%d, starts[0:2] %d,%d,%d \n", s->
rank, istag, gdims[0], gdims[1], gdims[2], ldims[0], ldims[1], ldims[2], starts[0], starts[1], starts[2]);
412 MPI_Type_commit(igsubarr_stag + istag);
420 for(nd=0;nd<ndim;nd++){
423 starts[nd] = s->
lbeg[nd];
427 printf(
"%d, lsubarr: gdims[0:2] %d,%d,%d, ldims[0:2] %d,%d,%d, starts[0:2] %d,%d,%d \n", s->
rank, gdims[0], gdims[1], gdims[2], ldims[0], ldims[1], ldims[2], starts[0], starts[1], starts[2]);
432 MPI_Type_commit(&ilsubarr);
441 for (istag = 0; istag < ndim; istag++){
442 for(nd = 0; nd < ndim; nd++){
445 starts[nd] = s->
lbeg[nd];
478 if (s->
beg[istag] == s->
bg[istag]) {
487 printf(
"%d, lsubarr_stag[%d]: gdims[0:2] %d,%d,%d, ldims[0:2] %d,%d,%d, starts[0:2] %d,%d,%d \n", s->
rank, istag, gdims[0], gdims[1], gdims[2], ldims[0], ldims[1], ldims[2], starts[0], starts[1], starts[2]);
492 MPI_Type_commit(ilsubarr_stag + istag);
506 printf(
"AL_Decompose: Decomposition successful\n");
507 for(nd=0;nd<ndim;nd++){
508 printf(
"AL_Decompose: %d %d\n", s->
beg[nd], s->
end[nd]);
533 int myid, nproc, ntdim, npdim;
538 s = sz_stack[sz_ptr];
547 if(myid==0) printf(
"AL_Find_decomp_: Using Mode - %d\n",mode);
558 if(myid==0) printf(
"AL_Find_decomp_: Using User Decomp Mode - %d\n",ntdim);
561 for(nd=0;nd<ntdim;nd++){
562 if( ldims[nd] != 0 ){
563 s->
lsize[nd] = ldims[nd];
577 for(nd=0;nd<ntdim;nd++){
585 gdims[npdim] = s->
arrdim[nd];
607 for(nd=0;nd<s->
ndim;nd++){
614 if(myid==0) printf(
"Using mode AL_AUTO_DECOMP\n");
629 if( myid == 0 ) printf(
"Auto Decomp failed: using MPI Decomp Mode\n");
638 if(myid==0) printf(
"Auto Decomp: Mode is: %d %d %d %d\n",mode,
AL_MPI_DECOMP, nproc, npdim);
642 for(i=0;i<npdim;i++){ ldims[
i]=0 ;}
643 MPI_Dims_create(nproc, npdim, ldims);
651 for(i=0;i<npdim;i++) t_ldims[i] = ldims[s_inds[i]];
653 for(i=0;i<npdim;i++) ldims[s_inds[i]] = t_ldims[i];
656 if(myid==0) printf(
"MPI Decomp: %d %d %d %d %d\n", ldims[0],ldims[1],ldims[2],nproc,npdim);
665 if(myid==0) printf(
"End of AL_Fing_decomp_: ");
667 for(i=0;i<npdim;i++){
669 procs[ipdims[
i]] = ldims[
i];
671 if(myid==0) printf(
"%d ",s->
lsize[ipdims[i]]);
675 if(myid==0) printf(
"\n");
693 int myrank, nproc, ndim;
694 int gdim, lproc, lloc;
697 s = sz_stack[sz_ptr];
736 printf(
"%d AL_Global_to_local, dim %d : s->beg %d, s->end %d, lbeg %d, lend %d, bg %d, larrdim_gp %d\n",s->
rank,i, s->
beg[i],s->
end[i], s->
lbeg[i], s->
lend[i], s->
bg[i], s->
larrdim_gp[i]);
760 printf(
"%d Strides: %d %d\n",s->
rank,i,s->
stride[i]);
784 int nlocal, deficit, itmp;
787 *start = lloc * nlocal;
788 deficit = gdim % lproc;
789 itmp = lloc < deficit ? lloc : deficit;
790 *start = *start + itmp;
791 if( lloc < deficit ){ nlocal = nlocal+1; }
792 *end = *start + nlocal -1;
795 printf(
"AL_Decomp1_: %d %d %d %d - %d %d \n", lloc, deficit, *start, *end, nlocal, lproc);
798 if( *end >= gdim || lloc == lproc-1 ) {*end = gdim-1;}
int isperiodic[AL_MAX_DIM]
int AL_Decomp1d_(int gdim, int lproc, int lloc, int *start, int *end)
int isparallel[AL_MAX_DIM]
int AL_Auto_Decomp_(int nproc, int npdim, int *ldims, int *gdims)
int AL_Find_decomp_(int sz_ptr, int mode, int *procs)
MPI_Comm oned_comm[AL_MAX_DIM]
SZ * sz_stack[AL_MAX_ARRAYS]
int AL_Decompose(int sz_ptr, int *procs, int mode)
int larrdim_gp[AL_MAX_DIM]
int AL_Type_create_subarray(int, int *, int *, int *, int, MPI_Datatype, MPI_Datatype *)
int AL_Sort_(int, int *, int *)
MPI_Datatype type_rl[AL_MAX_DIM]
MPI_Datatype lsubarr_stag[AL_MAX_DIM]
int isstaggered[AL_MAX_DIM]
MPI_Datatype strided[AL_MAX_DIM]
#define AL_STAGGERED_OVERLAP
int stack_ptr[AL_MAX_ARRAYS]
int AL_Global_to_local_(int sz_ptr)
MPI_Datatype gsubarr_stag[AL_MAX_DIM]
Internal include file for the ArrayLib.
MPI_Datatype type_lr[AL_MAX_DIM]