PLUTO
al_decomp_.c
Go to the documentation of this file.
1 /* ///////////////////////////////////////////////////////////////////// */
2 /*!
3  \file
4  \brief Miscellaneous of functions act to find the processors
5  distribution
6 
7  Find a "maximally cubic" processors distribution in 1D, 2D and 3D.
8 
9  \author A. Malagoli (University of Chicago)
10  \date Jul 17, 1999
11 */
12 /* ///////////////////////////////////////////////////////////////////// */
13 #include "al_hidden.h" /*I "al_hidden.h" I*/
14 
15 int AL_Decomp_2d_(int nproc,int npdim,int *ldims, int *gdims);
16 int AL_Decomp_3d_(int nproc,int npdim,int *ldims, int *gdims);
17 
18 /* ********************************************************************* */
19 int AL_Auto_Decomp_(int nproc,int npdim,int *ldims, int *gdims)
20 /*!
21  * Find a "maximally cubic" processors distribution.
22  *
23  * \param [in] nproc number of processors
24  * \param [in] npdim number of parallel dimensions
25  * \param [out] ldims processor decomposition along directions
26  * \param [in] gdims global array sizes
27  ********************************************************************* */
28 {
29  /*
30  1D case. We do not need to do anything, really
31  */
32  if( npdim == 1 ) {
33  ldims[0] = nproc;
34  return (int) AL_SUCCESS;
35  }
36 
37  /*
38  2D case. Slightly more complicated
39  */
40  if( npdim == 2 ) {
41  if( AL_Decomp_2d_(nproc, npdim, ldims, gdims) == AL_FAILURE) {
42  return (int) AL_FAILURE;
43  } else {
44  return (int) AL_SUCCESS;
45  }
46  }
47 
48  /*
49  3D case. Even more complicated
50  */
51  if( npdim == 3 ) {
52  if( AL_Decomp_3d_(nproc, npdim, ldims, gdims) == AL_FAILURE) {
53  return (int) AL_FAILURE;
54  } else {
55  return (int) AL_SUCCESS;
56  }
57  }
58 
59 
60 return 0;
61 }
62 
63 /* ********************************************************************* */
64 int AL_Decomp_3d_(int nproc,int npdim,int *ldims, int *gdims)
65 /*!
66  * Find a "maximally cubic" processors distribution in 3D.
67  *
68  * \param [in] nproc number of processors
69  * \param [in] npdim number of parallel dimensions
70  * \param [out] ldims processor decomposition along directions
71  * \param [in] gdims global array sizes
72  ********************************************************************* */
73 {
74  int nx, ny, nz, nnz;
75  int pow3, powz, myrank;
76  int ndim, minxy, maxxy, nproc2, minp2, maxp2, nprocz;
77  int nprocx_old, nprocy_old, nprocz_old;
78  int nprocx, nprocy;
79  register int ipz;
80  int l2dims[AL_MAX_DIM], g2dims[AL_MAX_DIM];
81 
82  MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
83 
84  ndim = npdim;
85 
86  nx = gdims[0];
87  ny = gdims[1];
88  nz = gdims[2];
89 
90  pow3 = floor( log((double) nproc)/log(2.0)+0.5 );
91 
92 #ifdef DEBUG
93  printf("AL_Decomp_3d_: pow3 = %d\n",pow3);
94 #endif
95 
96 /* if( pow3 != 0 ) { */
97 /* npw3 = pow(2,pow3); */
98 /* } else { */
99 /* npw3 = 1; */
100 /* } */
101 
102 /* if( npw3 != nproc ){ */
103  if( !AL_POWEROF2(nproc) ) {
104 #ifdef DEBUG
105  if( myrank == 0 ) printf("AL_Decompose3d: nproc is not a power of two\n");
106 #endif
107 
108 /* MPI_Abort(MPI_COMM_WORLD, -14); */
109  return (int) AL_FAILURE;
110  }
111 
112  powz = rint( log((double) nz)/log(2.0) );
113  powz = floor( log((double) nz)/log(2.0) +0.5);
114 
115  if( powz != 0 ) {
116  nnz = pow(2,powz);
117  } else {
118  nnz = 1;
119  }
120 
121  maxxy = nx > ny ? nx : ny;
122  minxy = nx < ny ? nx : ny;
123 
124  for( ipz=0; ipz<=pow3; ipz++){
125 
126 #ifdef DEBUG
127  printf("AL_Decomp_3d_: In Loop ipz = %d\n",ipz);
128 #endif
129 
130  if( ipz != 0 ) {
131  nprocz = pow(2,ipz);
132  } else {
133  nprocz = 1;
134  }
135 
136  if( ipz != pow3 ) {
137  nproc2 = pow(2,(pow3-ipz));
138  } else {
139  nproc2 = 1;
140  }
141 
142  g2dims[0] = nx;
143  g2dims[1] = ny;
144 
145  if( AL_Decomp_2d_(nproc2, 2, l2dims, g2dims) == AL_FAILURE )
146  return (int) AL_FAILURE;
147 
148  nprocx = l2dims[0];
149  nprocy = l2dims[1];
150 
151  maxp2 = nprocx > nprocy ? nprocx : nprocy;
152  minp2 = nprocx < nprocy ? nprocx : nprocy;
153 
154  if( nz >= maxxy ) {
155  if( nprocz >= maxp2) break;
156  }
157 
158  if( nz < maxxy && nz > minxy ) {
159  if( nprocz < maxp2 && nprocx > minp2) break;
160  }
161 
162  if( nz <= minxy ) {
163  if( nprocz <= minp2 ){
164  nprocx_old = nprocx;
165  nprocy_old = nprocy;
166  nprocz_old = nprocz;
167  } else {
168  nprocx = nprocx_old;
169  nprocy = nprocy_old;
170  nprocz = nprocz_old;
171  }
172  }
173 
174  }
175 
176 #ifdef DEBUG
177  printf("AL_decompose3d_: %d %d %d\n", nprocx, nprocy, nprocz);
178 #endif
179  ldims[0] = nprocx;
180  ldims[1] = nprocy;
181  ldims[2] = nprocz;
182 
183  return (int) AL_SUCCESS;
184 }
185 
186 /* ********************************************************************* */
187 int AL_Decomp_2d_(int nproc,int npdim,int *ldims, int *gdims)
188 /*!
189  * Find a "maximally cubic" processors distribution in 2D.
190  *
191  * \param [in] nproc number of processors
192  * \param [in] npdim number of parallel dimensions
193  * \param [out] ldims processor decomposition along directions
194  * \param [in] gdims global array sizes
195  ********************************************************************* */
196 {
197  int nx, ny;
198  int np1, np2, nproc1, nproc2, nproc1_old, nproc2_old;
199  int np1_old, np2_old, n1, n2;
200  int pow2;
201  int nprocx, nprocy;
202  int myrank;
203  register int ip;
204 
205  MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
206 
207  if( nproc == 1 ){
208  ldims[0] = 1;
209  ldims[1] = 1;
210  return (int) AL_SUCCESS;
211  }
212 
213  /*
214  We assume the we have a power of 2 number of grid points
215  */
216  nx = gdims[0];
217  ny = gdims[1];
218 
219  if( nx < ny ){
220  n1 = nx;
221  n2 = ny;
222  } else {
223  n1 = ny;
224  n2 = nx;
225  }
226  pow2 = floor( log((double) nproc)/log(2.0)+0.5 );
227 
228 #ifdef DEBUG
229  printf("AL_Decomp2d_ [0]: %d %d\n",pow2, nproc);
230 #endif
231 
232 /* if( pow2 != 0 ) { */
233 /* npw2 = pow(2,pow2); */
234 /* } else { */
235 /* npw2 = 1; */
236 /* } */
237 
238 /* if( npw2 != nproc ){ */
239  if( ! AL_POWEROF2(nproc) ) {
240 #ifdef DEBUG
241  if( myrank == 0 ) printf("AL_Decompose2d: nproc is not a power of two\n");
242 #endif
243 /* MPI_Abort(MPI_COMM_WORLD, -14); */
244  return (int) AL_FAILURE;
245  }
246 
247 /* powx = floor( log((double) nx)/log(2.0) +0.5); */
248 
249 /* if( powx != 0 ) { */
250 /* nnx = pow(2,powx); */
251 /* } else { */
252 /* nnx = 1; */
253 /* } */
254 
255 /* if( nnx != nx ){ */
256  if( ! AL_POWEROF2(nx) ) {
257 #ifdef DEBUG
258  if( myrank == 0 ) printf("AL_Decompose2d: nx is not a power of two: %d\n",nx);
259 #endif
260 /* MPI_Abort(MPI_COMM_WORLD, -14); */
261  return (int) AL_FAILURE;
262  }
263 
264 /* powy = rint ( log((double) ny)/log(2.0) ); */
265 /* powy = floor( log((double) ny)/log(2.0) +0.5); */
266 
267 /* if( powy != 0 ) { */
268 /* nny = pow(2,powy); */
269 /* } else { */
270 /* nny = 1; */
271 /* } */
272 
273 /* if( nny != ny ){ */
274  if( ! AL_POWEROF2(ny) ) {
275 #ifdef DEBUG
276  if( myrank == 0 ) printf("AL_Decompose2d: ny is not a power of two: %d\n",ny);
277 #endif
278 
279 /* MPI_Abort(MPI_COMM_WORLD, -14); */
280  return (int) AL_FAILURE;
281  }
282 
283 
284  for( ip=0; ip<=pow2; ip++){
285  if( ip != 0 ) {
286  nproc1 = pow(2,ip);
287  } else {
288  nproc1 = 1;
289  }
290 
291  if( ip != pow2 ) {
292  nproc2 = pow(2,(pow2-ip));
293  } else {
294  nproc2 = 1;
295  }
296 
297 
298  np1 = n1/nproc1;
299  np2 = n2/nproc2;
300 
301 #ifdef DEBUG
302  printf("AL_decomp2d_ np1, np2: %d %d | %d %d\n",n1,n2,nproc1,nproc2);
303 #endif
304 
305 
306  nproc1_old = nproc1;
307  nproc2_old = nproc2;
308 
309  np1_old = np1;
310  np2_old = np2;
311 
312  if( np1 < np2 ) break;
313 
314  }
315 
316  if( nx < ny ){
317  nprocx = nproc1_old;
318  nprocy = nproc2_old;
319  np1 = np1_old;
320  np2 = np2_old;
321  } else {
322  nprocx = nproc2_old;
323  nprocy = nproc1_old;
324  np1 = np2_old;
325  np2 = np1_old;
326  }
327 
328 #ifdef DEBUG
329  printf("AL_Decomp2d_: %d %d | %d %d\n",nprocx, nprocy, gdims[0], gdims[1]);
330 #endif
331 
332  ldims[0] = nprocx;
333  ldims[1] = nprocy;
334 
335  return (int) AL_SUCCESS;
336 }
#define AL_MAX_DIM
Definition: al_codes.h:18
int AL_Auto_Decomp_(int nproc, int npdim, int *ldims, int *gdims)
Definition: al_decomp_.c:19
#define AL_SUCCESS
Definition: al_codes.h:32
int AL_Decomp_2d_(int nproc, int npdim, int *ldims, int *gdims)
Definition: al_decomp_.c:187
#define AL_FAILURE
Definition: al_codes.h:33
int AL_Decomp_3d_(int nproc, int npdim, int *ldims, int *gdims)
Definition: al_decomp_.c:64
Internal include file for the ArrayLib.
#define AL_POWEROF2(x)
Definition: al_defs.h:48