PLUTO
al_decompose.c
Go to the documentation of this file.
1 /* ///////////////////////////////////////////////////////////////////// */
2 /*!
3  \file
4  \brief ArrayLib functions to decompose the domain among the MPI processes.
5 
6  ArrayLib functions to decompose the domain among the MPI processes.
7 
8  \authors A. Malagoli (University of Chicago)
9  \authors G. Muscianisi (g.muscianisi@cineca.it)
10  \authors A. Mignone (mignone@ph.unito.it)
11 
12  \date Aug 24, 2012
13 */
14 /* ///////////////////////////////////////////////////////////////////// */
15 #include "al_hidden.h" /*I "al_hidden.h" I*/
16 
17 /*
18  If this is set, then each processor
19  gathers the entire index set for a
20  distributed array from all the group
21 */
22 #define AL_GATHER_INDEXES
23 
24 /*
25  The SZ structure stack is defined and maintained
26  in al_szptr_.c
27  Here we include an external reference to it in
28  order to be able to make internal references to it.
29 */
30 extern SZ *sz_stack[AL_MAX_ARRAYS];
31 extern int stack_ptr[AL_MAX_ARRAYS];
32 
33 /* PROTOTYPES */
34 int AL_Find_decomp_(int sz_ptr, int mode, int *procs);
35 int AL_Global_to_local_(int sz_ptr);
36 int AL_Decomp1d_(int gdim, int lproc, int lloc, int *start, int *end);
37 
38 /* ********************************************************************** */
39 int AL_Decompose(int sz_ptr, int *procs, int mode)
40 /*! Create a distributed array descriptor and compile it
41  *
42  * \param [in] sz_ptr integer pointer to the distributed array descriptor
43  * \param [in,out] procs array with the processor decomposition
44  * \param [in] mode available value:\n
45  AL_AUTO_DECOMP (internal decomposition) [Only for powers of two dimensions];\n
46  AL_MPI_DECOMP (MPI decomposition); \n
47  AL_USER_DECOMP (user defined)
48  ************************************************************************ */
49 {
50  int myrank, nproc;
51  SZ *s;
52 
53  register int i, nd, nb;
54 
55  int ndim;
56  int gdims[AL_MAX_DIM], ldims[AL_MAX_DIM], starts[AL_MAX_DIM];
57  int count[AL_MAX_DIM], blocklen[AL_MAX_DIM], periods[AL_MAX_DIM];
58  int reord, nleft, nright;
59  int stagpt, type_size;
60  int stride[AL_MAX_DIM];
61  MPI_Datatype itype,ivector;
62  MPI_Comm comm, cart_comm;
63 
64  int Find_decomp = AL_TRUE; /* By default, we use the internal decomposition */
65 
66  /* DIAGNOSTICS
67  Check that sz_ptr points to an allocated SZ
68  */
69  if( stack_ptr[sz_ptr] == AL_STACK_FREE){
70  printf("AL_Decompose: wrong SZ pointer\n");
71  }
72 
73  s = sz_stack[sz_ptr];
74 
75  myrank = s->rank;
76  nproc = s->size;
77 
78  ndim = s->ndim;
79 
80  /*
81  Find if we need to find our own decomposition
82  */
83 
84  if( mode != AL_USER_DECOMP && mode != AL_AUTO_DECOMP && mode != AL_MPI_DECOMP ) {
85  printf("AL_Decompose: wrong mode. Using default AL_MPI_DECOMP\n");
86  mode = AL_MPI_DECOMP;
87  }
88 
89  if( mode == AL_USER_DECOMP ){ Find_decomp = AL_FALSE; }
90 
91  /*
92  If Find_decomp is TRUE, we proceed to find the
93  decomposition
94  */
95  if( Find_decomp == AL_TRUE ){
96  AL_Find_decomp_(sz_ptr, mode, procs);
97  } else {
98  for(nd=0;nd<ndim;nd++){
99  s->lsize[nd] = procs[nd];
100  }
101  }
102 
103  /*
104  Now we set the MPI datatypes for the ghost points
105  exchange
106  */
107  comm = s->comm;
108 
109  /*
110  First, we create the cartesian communicator
111  */
112  reord = 0;
113 
114  for(nd=0;nd<ndim;nd++){
115  ldims[nd] = s->lsize[nd];
116  periods[nd] = s->isperiodic[nd];
117  }
118 
119 #ifdef DEBUG
120  if(sz_ptr==4){ /* -- print SZ_stagx -- */
121  if(myrank==0) printf("MPI_Cart_create: %d x %d x %d, ndim %d, nproc %d\n",ldims[0],ldims[1],ldims[2],ndim,nproc);
122  }
123 #endif
124 
125  MPI_Cart_create(comm, ndim, ldims, periods, reord, &cart_comm);
126 
127  s->cart_comm = cart_comm;
128 
129  /*
130  Then we generate the cartesian topology
131  of the processors
132  */
133  MPI_Cart_coords(cart_comm, myrank, ndim, ldims);
134 
135  for(nd=0;nd<ndim;nd++){
136  s->lrank[nd] = ldims[nd];
137  MPI_Cart_shift(cart_comm, nd, 1, &nleft, &nright);
138  s->left[nd] = nleft;
139  s->right[nd] = nright;
140  }
141 
142  /*
143  We also generate a set of 1D communicators that
144  are useful when doing global operations along
145  single directions.
146  */
147  {
148  int rem_dims[AL_MAX_DIM];
149 
150  for(nd=0;nd<ndim;nd++){
151  for(i=0;i<ndim;i++){
152  if( i==nd ) {
153  rem_dims[i] = AL_TRUE;
154  } else {
155  rem_dims[i] = AL_FALSE;
156  }
157  }
158  MPI_Cart_sub(cart_comm, rem_dims, &(s->oned_comm[nd]));
159  }
160  }
161 
162  /*
163  Now we create the strided data types for the
164  exchange of the ghost points
165  */
166 
167  /*
168  Get the local array indexes (in global addressing mode)
169  */
170  AL_Global_to_local_(sz_ptr);
171 
172  /*
173  Set the size of the local buffer for this array
174  */
175  s->buffsize = 1;
176  for(nd=0;nd<ndim;nd++){
177  s->buffsize *=
178  ( (s->end[nd]) - (s->beg[nd]) + (s->bg[nd]) + (s->eg[nd]) + 1 );
179  }
180 
181 #ifdef AL_GATHER_INDEXES
182 
183  /*
184  If it has not yet been done, allocate the array of begs
185  and ends, then gather the indexing information from all
186  processors. We check that the array of 'begs' was not
187  previously allocated, in order to avoid problems with
188  duplicated descriptors.
189  */
190 /* if( (s->begs) ) free(s->begs); */
191 
192  if( !(s->begs = (int *)malloc(sizeof(int)*nproc*ndim*2))) {
193  printf("AL_Decompose: could not allocate s->begs\n");
194  }
195  s->ends = s->begs + nproc*ndim;
196 
197  /* Gather the 'beg' index information from all nodes */
198  MPI_Allgather(s->beg, ndim, MPI_INT, s->begs, ndim, MPI_INT, comm);
199 
200  /* Gather the 'end' index information from all nodes */
201  MPI_Allgather(s->end, ndim, MPI_INT, s->ends, ndim, MPI_INT, comm);
202 
203 #endif /* End #ifdef AL_GATHER_INDEXES */
204 
205  /* -- Set the type size of the array -- */
206 
207  MPI_Type_size(s->type, &(s->type_size));
208 
209  /*--------------------- Begin creation of strided data types -------*/
210 
211  /*
212  Set the count, blocklen, and stride elements for
213  the MPI_Vector calls.
214  This is VERY, VERY cryptic, but it seems to work ..
215  */
216 
217  /* Right->Left exchange */
218  for(nd=0;nd<ndim;nd++){
219  count[nd] = 1;
220  blocklen[nd] = s->bg[nd];
221  stride[nd] = s->larrdim_gp[nd];
222  for(nb=0;nb<nd;nb++){
223  blocklen[nd] *= s->larrdim_gp[nb];
224  stride[nd] *= s->larrdim_gp[nb];
225  }
226  for(nb=nd+1;nb<ndim;nb++){
227  count[nd] *= s->larrdim_gp[nb];
228  }
229  }
230 
231  itype = s->type;
232  for(nd=0;nd<ndim;nd++){
233  if( count[nd] != 1){
234  MPI_Type_vector( count[nd], blocklen[nd], stride[nd], itype, &ivector);
235  } else {
236  MPI_Type_contiguous( blocklen[nd], itype, &ivector);
237  }
238  MPI_Type_commit(&ivector);
239  s->strided[nd] = ivector;
240  s->type_rl[nd] = ivector;
241  }
242 
243  /* Left->Right exchange */
244  for(nd=0;nd<ndim;nd++){
245  count[nd] = 1;
246  /* If the array is staggered, we give
247  unique ownership of the overlapping
248  point to the LEFT processor */
249  if( s->isstaggered[nd] == AL_TRUE ){
250  blocklen[nd] = s->bg[nd]+1;
251  } else {
252  blocklen[nd] = s->bg[nd];
253  }
254 
255  stride[nd] = s->larrdim_gp[nd];
256  for(nb=0;nb<nd;nb++){
257  blocklen[nd] *= s->larrdim_gp[nb];
258  stride[nd] *= s->larrdim_gp[nb];
259  }
260  for(nb=nd+1;nb<ndim;nb++){
261  count[nd] *= s->larrdim_gp[nb];
262  }
263  }
264 
265  itype = s->type;
266  for(nd=0;nd<ndim;nd++){
267  if( count[nd] != 1){
268  MPI_Type_vector( count[nd], blocklen[nd], stride[nd], itype, &ivector);
269  } else {
270  MPI_Type_contiguous( blocklen[nd], itype, &ivector);
271  }
272  MPI_Type_commit(&ivector);
273  s->type_lr[nd] = ivector;
274  }
275 
276  /*--------------------- End creation of strided data types -------*/
277 
278  /*
279  Create the buffer pointers to the array
280  for the SendRecv operations
281  */
282  MPI_Type_size( s->type, &type_size);
283 
284  for(nd=0;nd<ndim;nd++){
285  s->tag1[nd] = nd*100;
286  s->tag2[nd] = nd*100+1;
287  /* This is the correction for a staggered mesh */
288  if( s->isstaggered[nd] == AL_TRUE ){
289  stagpt = 1;
290  } else {
291  stagpt = 0;
292  }
293 
294  s->sendb1[nd] = s->lbeg[nd] + stagpt;
295  s->recvb1[nd] = s->lend[nd]+1;
296  s->sendb2[nd] = s->lend[nd]-s->bg[nd]+1 - stagpt;
297  s->recvb2[nd] = 0;
298 
299  for(nb=0;nb<nd;nb++){
300  s->sendb1[nd] *= s->larrdim_gp[nb];
301  s->recvb1[nd] *= s->larrdim_gp[nb];
302  s->sendb2[nd] *= s->larrdim_gp[nb];
303  s->recvb2[nd] *= s->larrdim_gp[nb];
304  }
305 
306  /*
307  The buffer will be considered as char, so we
308  have to correct for the size of the data type
309  */
310  s->sendb1[nd] *= type_size;
311  s->recvb1[nd] *= type_size;
312  s->sendb2[nd] *= type_size;
313  s->recvb2[nd] *= type_size;
314 
315  }
316 
317  /*
318  Create the subarrays for the MPI-IO operations
319  */
320  {
321  int istag;
322 
323  MPI_Datatype igsubarr, ilsubarr;
324 
325  MPI_Datatype igsubarr_stag[AL_MAX_DIM];
326  MPI_Datatype ilsubarr_stag[AL_MAX_DIM];
327 
328  /* -----------------------------------------------------
329  Create the global subarray for MPI_Set_file_view
330  ----------------------------------------------------- */
331 
332  for(nd=0;nd<ndim;nd++){
333  gdims[nd] = s->arrdim[nd];
334  ldims[nd] = s->larrdim[nd];
335  starts[nd] = s->beg[nd] - s->bg[nd];
336  }
337 #ifdef DEBUG
338  if(sz_ptr==4){ /* print SZ_stagx */
339 printf("%d, gsubarr: gdims[0:2] %d,%d,%d, ldims[0:2] %d,%d,%d, starts[0:2] %d,%d,%d\n", s->rank, gdims[0], gdims[1], gdims[2], ldims[0], ldims[1], ldims[2], starts[0], starts[1], starts[2]);
340 }
341 #endif
342 
343  AL_Type_create_subarray(ndim, gdims, ldims, starts,
344  AL_ORDER_FORTRAN, s->type, &igsubarr);
345  MPI_Type_commit(&igsubarr);
346  s->gsubarr = igsubarr;
347 
348  /* -----------------------------------------------------
349  Create the global staggered subarrays
350  for MPI_Set_file_view
351  ----------------------------------------------------- */
352 
353  for (istag = 0; istag < ndim; istag++){
354  for(nd = 0; nd < ndim; nd++){
355  gdims[nd] = s->arrdim[nd];
356  ldims[nd] = s->larrdim[nd];
357  starts[nd] = s->beg[nd] - s->bg[nd];
358  }
359  /* --------------------------------------------------------------- */
360  /*! <b> Bug fixed on Aug 26, 2012:</b>
361  When the global view of the file for a staggered
362  variable is computed, the gdims[nd] has to be computed
363  and then passed to the function AL_Type_create_subarray.
364 
365  Since gdims[nd]=s->arrdim[nd], and in s->arrdim[nd] is
366  taking into account that the variable is staggered, we comment
367  the line "gdims[istag]++;" because it is no more needed. */
368  /* --------------------------------------------------------------- */
369  /* gdims[istag]++; */
370 
371 
372 
373  /* --------------------------------------------------------------- */
374  /*! <b> Bug fixed on Aug 26, 2012: </b>
375  The following "if" has been modified:
376 
377  OLD version:
378  if (s->beg[istag] == s->bg[istag]) {
379  ldims[istag]++;
380  }else{
381  starts[istag]++;
382  }
383 
384  NEW version:
385  if (s->beg[istag] == s->bg[istag]) {
386  }else{
387  starts[istag]++;
388  ldims[istag]--;
389  }
390 
391  \li "ldims[istag]++;" has been cancelled from the firt part
392  of the if for the same motivation explaned before;
393  \li "ldims[istag]--;" has been added in the second part of the
394  if because in PLUTO the index of the staggered variables
395  start from -1, while in the ArrayLib they start from 0. */
396  /* --------------------------------------------------------------- */
397 
398  if (s->beg[istag] == s->bg[istag]) {
399  }else{
400  starts[istag]++;
401  ldims[istag]--;
402  }
403 
404 #ifdef DEBUG
405  if(sz_ptr==4){ /* print SZ_stagx */
406  printf("%d, gsubarr_stag[%d]: gdims[0:2] %d,%d,%d, ldims[0:2] %d,%d,%d, starts[0:2] %d,%d,%d \n", s->rank, istag, gdims[0], gdims[1], gdims[2], ldims[0], ldims[1], ldims[2], starts[0], starts[1], starts[2]);
407 }
408 #endif
409 
410  AL_Type_create_subarray(ndim, gdims, ldims, starts,
411  AL_ORDER_FORTRAN, s->type, igsubarr_stag + istag);
412  MPI_Type_commit(igsubarr_stag + istag);
413  s->gsubarr_stag[istag] = igsubarr_stag[istag];
414  }
415 
416  /* -----------------------------------------------------
417  Create the local subarray for the MPI_Set_write_all
418  ----------------------------------------------------- */
419 
420  for(nd=0;nd<ndim;nd++){
421  gdims[nd] = s->larrdim_gp[nd];
422  ldims[nd] = s->larrdim[nd];
423  starts[nd] = s->lbeg[nd];
424  }
425 #ifdef DEBUG
426  if(sz_ptr==4){ /* print SZ_stagx */
427 printf("%d, lsubarr: gdims[0:2] %d,%d,%d, ldims[0:2] %d,%d,%d, starts[0:2] %d,%d,%d \n", s->rank, gdims[0], gdims[1], gdims[2], ldims[0], ldims[1], ldims[2], starts[0], starts[1], starts[2]);
428 }
429 #endif
430  AL_Type_create_subarray(ndim, gdims, ldims, starts,
431  AL_ORDER_FORTRAN, s->type, &ilsubarr);
432  MPI_Type_commit(&ilsubarr);
433  s->lsubarr = ilsubarr;
434 
435 
436  /* -----------------------------------------------------
437  Create the local staggered subarrays
438  for MPI_Set_write_all
439  ----------------------------------------------------- */
440 
441  for (istag = 0; istag < ndim; istag++){
442  for(nd = 0; nd < ndim; nd++){
443  gdims[nd] = s->larrdim_gp[nd];
444  ldims[nd] = s->larrdim[nd];
445  starts[nd] = s->lbeg[nd];
446  }
447  /* --------------------------------------------------------------- */
448  /*! <b> Bugs fixed on Aug 26, 2012: </b>
449  The following if has been modified:
450 
451  OLD version:
452  if (s->beg[istag] == s->bg[istag]) {
453  ldims[istag]++;
454  starts[istag]--;
455  }
456 
457  NEW version:
458  if (s->beg[istag] == s->bg[istag]) {
459  }else{
460  ldims[istag]--;
461  starts[istag]++;
462  }
463 
464  \li "ldims[istag]++;" has been cancelled from the firt part of
465  the if because when the local subarray for the MPI_Set_write_all
466  for a staggered variable is computed, the ldims[nd] has to be
467  computed and then passed to the function AL_Type_create_subarray.
468  Since ldims[nd]=s->larrdim[nd], and in s->larrdim[nd] is
469  taking into account that the variable is staggered, we remouved
470  the line "ldims[istag]++;" because it is no more needed;
471  \li "starts[istag]--;" has been cancelled from the first part of
472  the if because in PLUTO the indexes for the staggered variables
473  start locally from -1, while in ArrayLib they start from 0;
474  \li the "else" has been added to take into account the motivation
475  explaned in the point before. */
476  /* --------------------------------------------------------------- */
477 
478  if (s->beg[istag] == s->bg[istag]) {
479  }else{
480  ldims[istag]--;
481  starts[istag]++;
482  }
483 
484 
485 #ifdef DEBUG
486  if(sz_ptr==4){ /* print SZ_stagx */
487 printf("%d, lsubarr_stag[%d]: gdims[0:2] %d,%d,%d, ldims[0:2] %d,%d,%d, starts[0:2] %d,%d,%d \n", s->rank, istag, gdims[0], gdims[1], gdims[2], ldims[0], ldims[1], ldims[2], starts[0], starts[1], starts[2]);
488 }
489 #endif
490  AL_Type_create_subarray(ndim, gdims, ldims, starts,
491  AL_ORDER_FORTRAN, s->type, ilsubarr_stag + istag);
492  MPI_Type_commit(ilsubarr_stag + istag);
493  s->lsubarr_stag[istag] = ilsubarr_stag[istag];
494  }
495 
496  }
497 
498  /*
499  Now array is officially compiled
500  */
501  s->compiled = AL_TRUE;
502 
503  /* DIAGNOSTICS */
504 #ifdef DEBUG
505  if(sz_ptr==4){ /* print SZ_stagx */
506  printf("AL_Decompose: Decomposition successful\n");
507  for(nd=0;nd<ndim;nd++){
508  printf("AL_Decompose: %d %d\n", s->beg[nd], s->end[nd]);
509  }
510 }
511 #endif
512 
513  return (int) AL_SUCCESS;
514 }
515 
516 /* -------------- INTERNAL ROUTINES ------------------*/
517 
518 /* ********************************************************************* */
519 int AL_Find_decomp_(int sz_ptr, int mode, int *procs)
520 /*!
521  Find the decomposition for a distributed array given its
522  global size and the number of nodes
523  *
524  * \param [in] sz_ptr integer pointer to the SZ structure
525  * \param [in] mode mode parameter: AL_AUTO_DECOMP, AL_MPI_DECOMP, AL_USER_DECOMP
526  * \param [in,out] procs array of processor decomposition
527  *********************************************************************** */
528 {
529  register int i, nd;
530 
531  SZ *s;
532 
533  int myid, nproc, ntdim, npdim;
534  int ldims[AL_MAX_DIM], ipdims[AL_MAX_DIM];
535  int s_inds[AL_MAX_DIM], t_ldims[AL_MAX_DIM];
536  int gdims[AL_MAX_DIM];
537 
538  s = sz_stack[sz_ptr];
539 
540  myid = s->rank;
541  nproc = s->size;
542 
543  ntdim = s->ndim;
544 
545 #ifdef DEBUG
546  if(sz_ptr==4){ /* print SZ_stagx */
547  if(myid==0) printf("AL_Find_decomp_: Using Mode - %d\n",mode);
548 }
549 #endif
550 
551  /*
552  If mode is AL_USER_DECOMP, we get the user defined decomposition
553 
554  */
555  if( mode == AL_USER_DECOMP ){
556 #ifdef DEBUG
557  if(sz_ptr==4){ /* print SZ_stagx */
558  if(myid==0) printf("AL_Find_decomp_: Using User Decomp Mode - %d\n",ntdim);
559 }
560 #endif
561  for(nd=0;nd<ntdim;nd++){
562  if( ldims[nd] != 0 ){
563  s->lsize[nd] = ldims[nd];
564  } else {
565  s->lsize[nd] = 1; /* This is a safety patch, a more
566  sophisticated tratement will be needed */
567  }
568  }
569  return 0;
570  }
571 
572  /*
573  Determine the parallel directions and
574  save them in ipdims[]
575  */
576  npdim = 0;
577  for(nd=0;nd<ntdim;nd++){
578  s->lsize[nd] = 1;
579  if( s->isparallel[nd] == AL_TRUE ){
580  ipdims[npdim] = nd;
581  /* Correct for the overlapping points in a staggered mesh */
582  if( s->isstaggered[nd] == AL_TRUE ) {
583  gdims[npdim] = s->arrdim[nd]-AL_STAGGERED_OVERLAP;
584  } else {
585  gdims[npdim] = s->arrdim[nd];
586  }
587  npdim = npdim+1;
588  } else {
589  /* The dimension is NOT parallel */
590  procs[nd] = 1;
591  }
592  }
593 
594  /*
595  Introduce special test for power of two
596  dimensions and number of processors.
597 
598  We check whether both the size of the communicator
599  AND each of the dimensions are powers of two.
600  If this is the case, then the AL_AUTO_DECOMP is used.
601  */
602  {
603  int isp;
604  isp = 0;
605  if(s->ndim <4){
606  if( AL_POWEROF2(s->size) ){
607  for(nd=0;nd<s->ndim;nd++){
608  if(AL_POWEROF2(s->arrdim[nd])) isp++;
609  }
610  }
611  if(isp==s->ndim) {
612  mode = AL_AUTO_DECOMP;
613 #ifdef DEBUG
614  if(myid==0) printf("Using mode AL_AUTO_DECOMP\n");
615 #endif
616  }
617  }
618  }
619 
620  /*
621  If mode is AL_AUTO_DECOMP, AL finds the decomposition
622  We try to find a "maximally cubic" decomposition, which
623  is useful for the application of the multigrid algorithm.
624  */
625  if( mode == AL_AUTO_DECOMP ){
626  if( AL_Auto_Decomp_(nproc,npdim,ldims,gdims) == AL_FAILURE) {
627  mode = AL_MPI_DECOMP;
628 #ifdef DEBUG
629  if( myid == 0 ) printf("Auto Decomp failed: using MPI Decomp Mode\n");
630 #endif
631  }
632  }
633 
634  /*
635  If mode is AL_MPI_DECOMP, MPI finds the decomposition
636  */
637 #ifdef DEBUG
638  if(myid==0) printf("Auto Decomp: Mode is: %d %d %d %d\n",mode, AL_MPI_DECOMP, nproc, npdim);
639 #endif
640 
641  if( mode == AL_MPI_DECOMP ){
642  for(i=0;i<npdim;i++){ ldims[i]=0 ;}
643  MPI_Dims_create(nproc, npdim, ldims);
644  /* This may not get released */
645  /* AL_Dims_create(nproc, npdim, ldims); */
646  {
647  /* Make an attempt at aligning the decomposition with the
648  array dimensions. We try to match the longest dimensions
649  of the decomposition with those of the array */
650  AL_Sort_(npdim,ldims,s_inds);
651  for(i=0;i<npdim;i++) t_ldims[i] = ldims[s_inds[i]];
652  AL_Sort_(npdim,gdims,s_inds);
653  for(i=0;i<npdim;i++) ldims[s_inds[i]] = t_ldims[i];
654  }
655 #ifdef DEBUG
656  if(myid==0) printf("MPI Decomp: %d %d %d %d %d\n", ldims[0],ldims[1],ldims[2],nproc,npdim);
657 #endif
658  }
659 
660  /*
661  Fill the decomposition information back
662  int the SZ structure
663  */
664 #ifdef DEBUG
665  if(myid==0) printf("End of AL_Fing_decomp_: ");
666 #endif
667  for(i=0;i<npdim;i++){
668  s->lsize[ipdims[i]]=ldims[i];
669  procs[ipdims[i]] = ldims[i];
670 #ifdef DEBUG
671  if(myid==0) printf("%d ",s->lsize[ipdims[i]]);
672 #endif
673  }
674 #ifdef DEBUG
675  if(myid==0) printf("\n");
676 #endif
677  return 0;
678 }
679 
680 
681 /* ********************************************************************* */
682 int AL_Global_to_local_(int sz_ptr)
683 /*!
684  * Compute addresses of local portions of array from
685  * global dimensions and processor decomposition
686  *
687  * \param [in] sz_ptr integer pointer to SZ structure
688  *********************************************************************** */
689 {
690  register int i, j;
691  SZ *s;
692 
693  int myrank, nproc, ndim;
694  int gdim, lproc, lloc;
695  int start, end;
696 
697  s = sz_stack[sz_ptr];
698 
699  myrank = s->rank;
700  nproc = s->size;
701  ndim = s->ndim;
702 
703  for(i=0;i<ndim;i++){
704  gdim = s->arrdim[i];
705  lproc = s->lsize[i];
706  lloc = s->lrank[i];
707 
708  /* We apply the following trick if the array is staggered */
709  if( s->isstaggered[i] == AL_TRUE ){ gdim = gdim-1;}
710 
711  AL_Decomp1d_(gdim, lproc, lloc, &start, &end);
712 
713  s->beg[i] = start+s->bg[i];
714  s->end[i] = end+s->bg[i];
715  if( s->isstaggered[i] == AL_TRUE ){ s->end[i] = end+1+s->bg[i];}
716  s->lbeg[i] = s->bg[i];
717  /* -------------------------------------------------------------------- */
718  /*! <b> Bug fixed on Dec 7, 2011: </b>
719  The lines:
720 
721  if( s->isstaggered[i] == AL_TRUE ){ s->lend[i] = (end-start+1+s->lbeg[i]);}
722 
723  if( s->isstaggered[i] == AL_TRUE ){ s->larrdim_gp[i] = (end-start+1+s->bg[i]+s->eg[i]+1);}
724 
725  if( s->isstaggered[i] == AL_TRUE ){ s->larrdim[i] = (end-start+1+1);}
726 
727  have been added to manage in the right way the correspondence among
728  global and local indeces for staggered variables. */
729  /* -------------------------------------------------------------------- */
730  s->lend[i] = (end-start+s->lbeg[i]);
731  if( s->isstaggered[i] == AL_TRUE ){ s->lend[i] = (end-start+1+s->lbeg[i]);}
732  s->larrdim_gp[i] = (end-start+s->bg[i]+s->eg[i]+1);
733  if( s->isstaggered[i] == AL_TRUE ){ s->larrdim_gp[i] = (end-start+1+s->bg[i]+s->eg[i]+1);}
734 #ifdef DEBUG
735  if(sz_ptr==4){ /* print SZ_stagx */
736  printf("%d AL_Global_to_local, dim %d : s->beg %d, s->end %d, lbeg %d, lend %d, bg %d, larrdim_gp %d\n",s->rank,i, s->beg[i],s->end[i], s->lbeg[i], s->lend[i], s->bg[i], s->larrdim_gp[i]);
737 }
738 #endif
739  s->larrdim[i] = end-start+1;
740  if( s->isstaggered[i] == AL_TRUE ){ s->larrdim[i] = (end-start+1+1);}
741 
742  }
743 
744  /*
745  These are the offsets for the DO loop index computations.
746 
747  We will gradually replace "offset" with "stride", so that
748  the definition is more in line with the MPI strided data
749  types definitions.
750  */
751  for(i=0;i<ndim;i++){
752  s->offset[i] = 1;
753  s->stride[i] = 1;
754  for(j=0;j<=i-1;j++){
755  s->offset[i] *= s->larrdim_gp[j];
756  s->stride[i] *= s->larrdim_gp[j];
757  }
758 #ifdef DEBUG
759  if(sz_ptr==4){ /* print solo SZ_stagx */
760  printf("%d Strides: %d %d\n",s->rank,i,s->stride[i]);
761 }
762 #endif
763  }
764 
765  return (int) AL_SUCCESS;
766 }
767 
768 /* ********************************************************************** */
769 /* -- NOTE: the following routine, AL_Decomp1d_, has been
770  modified from the MPE_Decomp1d_ routine, which
771  is part of the MPICH distribution. -- */
772 
773 int AL_Decomp1d_(int gdim, int lproc, int lloc, int *start, int *end)
774 /*!
775  * Decompose a 1D array, given the number of processors along the direction
776  *
777  * \param [in] gdim integer size of the global dimension
778  * \param [in] lproc integer size of the number of processors along the dimension
779  * \param [in] lloc integer location of this node along the dimension
780  * \param [out] start integer pointer to start address for the array (C-convention)
781  * \param [out] end integer pointer to end address for the array (C-convention)
782  *********************************************************************** */
783 {
784  int nlocal, deficit, itmp;
785 
786  nlocal = gdim/lproc;
787  *start = lloc * nlocal;
788  deficit = gdim % lproc;
789  itmp = lloc < deficit ? lloc : deficit;
790  *start = *start + itmp;
791  if( lloc < deficit ){ nlocal = nlocal+1; }
792  *end = *start + nlocal -1;
793 
794 #ifdef DEBUG
795  printf("AL_Decomp1_: %d %d %d %d - %d %d \n", lloc, deficit, *start, *end, nlocal, lproc);
796 #endif
797 
798  if( *end >= gdim || lloc == lproc-1 ) {*end = gdim-1;}
799 
800  return (int) AL_SUCCESS;
801 }
802 
int recvb2[AL_MAX_DIM]
Definition: al_hidden.h:95
int isperiodic[AL_MAX_DIM]
Definition: al_hidden.h:80
#define AL_MAX_DIM
Definition: al_codes.h:18
#define AL_MPI_DECOMP
Definition: al_codes.h:41
int AL_Decomp1d_(int gdim, int lproc, int lloc, int *start, int *end)
Definition: al_decompose.c:773
int end[AL_MAX_DIM]
Definition: al_hidden.h:60
int lrank[AL_MAX_DIM]
Definition: al_hidden.h:88
int isparallel[AL_MAX_DIM]
Definition: al_hidden.h:78
int offset[AL_MAX_DIM]
Definition: al_hidden.h:76
int AL_Auto_Decomp_(int nproc, int npdim, int *ldims, int *gdims)
Definition: al_decomp_.c:19
int tag1[AL_MAX_DIM]
Definition: al_hidden.h:96
#define AL_SUCCESS
Definition: al_codes.h:32
int right[AL_MAX_DIM]
Definition: al_hidden.h:86
#define AL_TRUE
Definition: al_codes.h:28
#define AL_STACK_FREE
Definition: al_codes.h:24
int AL_Find_decomp_(int sz_ptr, int mode, int *procs)
Definition: al_decompose.c:519
int type_size
Definition: al_hidden.h:42
int * begs
Definition: al_hidden.h:63
MPI_Datatype gsubarr
Definition: al_hidden.h:110
MPI_Comm oned_comm[AL_MAX_DIM]
Definition: al_hidden.h:50
int recvb1[AL_MAX_DIM]
Definition: al_hidden.h:94
SZ * sz_stack[AL_MAX_ARRAYS]
Definition: al_szptr_.c:18
int AL_Decompose(int sz_ptr, int *procs, int mode)
Definition: al_decompose.c:39
#define AL_USER_DECOMP
Definition: al_codes.h:42
MPI_Comm cart_comm
Definition: al_hidden.h:49
int arrdim[AL_MAX_DIM]
Definition: al_hidden.h:53
int larrdim_gp[AL_MAX_DIM]
Definition: al_hidden.h:56
int sendb1[AL_MAX_DIM]
Definition: al_hidden.h:92
int larrdim[AL_MAX_DIM]
Definition: al_hidden.h:54
int AL_Type_create_subarray(int, int *, int *, int *, int, MPI_Datatype, MPI_Datatype *)
Definition: al_subarray_.c:18
int rank
Definition: al_hidden.h:44
#define AL_AUTO_DECOMP
Definition: al_codes.h:40
int * ends
Definition: al_hidden.h:63
int AL_Sort_(int, int *, int *)
Definition: al_sort_.c:15
MPI_Datatype type_rl[AL_MAX_DIM]
Definition: al_hidden.h:106
int stride[AL_MAX_DIM]
Definition: al_hidden.h:77
int bg[AL_MAX_DIM]
Definition: al_hidden.h:72
MPI_Datatype lsubarr
Definition: al_hidden.h:111
int left[AL_MAX_DIM]
Definition: al_hidden.h:84
int beg[AL_MAX_DIM]
Definition: al_hidden.h:58
MPI_Datatype lsubarr_stag[AL_MAX_DIM]
Definition: al_hidden.h:114
#define AL_ORDER_FORTRAN
Definition: al_codes.h:46
int j
Definition: analysis.c:2
int sendb2[AL_MAX_DIM]
Definition: al_hidden.h:93
int isstaggered[AL_MAX_DIM]
Definition: al_hidden.h:82
int eg[AL_MAX_DIM]
Definition: al_hidden.h:74
int lsize[AL_MAX_DIM]
Definition: al_hidden.h:90
#define AL_FAILURE
Definition: al_codes.h:33
MPI_Datatype strided[AL_MAX_DIM]
Definition: al_hidden.h:97
#define s
int size
Definition: al_hidden.h:45
int tag2[AL_MAX_DIM]
Definition: al_hidden.h:96
#define AL_STAGGERED_OVERLAP
Definition: al_defs.h:25
int i
Definition: analysis.c:2
MPI_Comm comm
Definition: al_hidden.h:47
long int buffsize
Definition: al_hidden.h:52
MPI_Datatype type
Definition: al_hidden.h:40
int stack_ptr[AL_MAX_ARRAYS]
Definition: al_szptr_.c:26
#define AL_FALSE
Definition: al_codes.h:29
int AL_Global_to_local_(int sz_ptr)
Definition: al_decompose.c:682
Definition: al_hidden.h:38
MPI_Datatype gsubarr_stag[AL_MAX_DIM]
Definition: al_hidden.h:113
Internal include file for the ArrayLib.
int compiled
Definition: al_hidden.h:39
int lbeg[AL_MAX_DIM]
Definition: al_hidden.h:68
MPI_Datatype type_lr[AL_MAX_DIM]
Definition: al_hidden.h:104
#define AL_MAX_ARRAYS
Definition: al_codes.h:21
#define AL_POWEROF2(x)
Definition: al_defs.h:48
int lend[AL_MAX_DIM]
Definition: al_hidden.h:70
int ndim
Definition: al_hidden.h:43