parmetis_partitioner.C
Go to the documentation of this file.
1 // The libMesh Finite Element Library.
2 // Copyright (C) 2002-2014 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License as published by the Free Software Foundation; either
7 // version 2.1 of the License, or (at your option) any later version.
8 
9 // This library is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // Lesser General Public License for more details.
13 
14 // You should have received a copy of the GNU Lesser General Public
15 // License along with this library; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 
18 
19 
20 // C++ Includes -----------------------------------
21 
22 // Local Includes -----------------------------------
23 #include "libmesh/libmesh_config.h"
24 #include "libmesh/mesh_base.h"
25 #include "libmesh/parallel.h" // also includes mpi.h
27 #include "libmesh/mesh_tools.h"
32 #include "libmesh/elem.h"
33 
34 #ifdef LIBMESH_HAVE_PARMETIS
35 
36 // Include the ParMETIS header files
37 namespace Parmetis {
38  extern "C" {
39 # include "libmesh/ignore_warnings.h"
40 # include "parmetis.h"
42  }
43 }
44 
45 #endif // #ifdef LIBMESH_HAVE_PARMETIS ... else ...
46 
47 namespace libMesh
48 {
49 
50 // Minimum elements on each processor required for us to choose
51 // Parmetis over Metis.
52 const unsigned int MIN_ELEM_PER_PROC = 4;
53 
54 
55 // ------------------------------------------------------------
56 // ParmetisPartitioner implementation
58  const unsigned int n_sbdmns)
59 {
60  this->_do_repartition (mesh, n_sbdmns);
61 }
62 
63 
64 
66  const unsigned int n_sbdmns)
67 {
68  libmesh_assert_greater (n_sbdmns, 0);
69 
70  // Check for an easy return
71  if (n_sbdmns == 1)
72  {
73  this->single_partition(mesh);
74  return;
75  }
76 
77  // This function must be run on all processors at once
79 
80 // What to do if the Parmetis library IS NOT present
81 #ifndef LIBMESH_HAVE_PARMETIS
82 
83  libmesh_here();
84  libMesh::err << "ERROR: The library has been built without" << std::endl
85  << "Parmetis support. Using a Metis" << std::endl
86  << "partitioner instead!" << std::endl;
87 
89 
90  mp.partition (mesh, n_sbdmns);
91 
92 // What to do if the Parmetis library IS present
93 #else
94 
95  // Revert to METIS on one processor.
96  if (mesh.n_processors() == 1)
97  {
99  mp.partition (mesh, n_sbdmns);
100  return;
101  }
102 
103  START_LOG("repartition()", "ParmetisPartitioner");
104 
105  // Initialize the data structures required by ParMETIS
106  this->initialize (mesh, n_sbdmns);
107 
108  // Make sure all processors have enough active local elements.
109  // Parmetis tends to crash when it's given only a couple elements
110  // per partition.
111  {
112  bool all_have_enough_elements = true;
113  for (processor_id_type pid=0; pid<_n_active_elem_on_proc.size(); pid++)
115  all_have_enough_elements = false;
116 
117  // Parmetis will not work unless each processor has some
118  // elements. Specifically, it will abort when passed a NULL
119  // partition array on *any* of the processors.
120  if (!all_have_enough_elements)
121  {
122  // FIXME: revert to METIS, although this requires a serial mesh
123  MeshSerializer serialize(mesh);
124 
125  STOP_LOG ("repartition()", "ParmetisPartitioner");
126 
127  MetisPartitioner mp;
128  mp.partition (mesh, n_sbdmns);
129 
130  return;
131  }
132  }
133 
134  // build the graph corresponding to the mesh
135  this->build_graph (mesh);
136 
137 
138  // Partition the graph
139  std::vector<int> vsize(_vwgt.size(), 1);
140  float itr = 1000000.0;
141  MPI_Comm mpi_comm = mesh.comm().get();
142 
143  // Call the ParMETIS adaptive repartitioning method. This respects the
144  // original partitioning when computing the new partitioning so as to
145  // minimize the required data redistribution.
146  Parmetis::ParMETIS_V3_AdaptiveRepart(_vtxdist.empty() ? NULL : &_vtxdist[0],
147  _xadj.empty() ? NULL : &_xadj[0],
148  _adjncy.empty() ? NULL : &_adjncy[0],
149  _vwgt.empty() ? NULL : &_vwgt[0],
150  vsize.empty() ? NULL : &vsize[0],
151  NULL,
152  &_wgtflag,
153  &_numflag,
154  &_ncon,
155  &_nparts,
156  _tpwgts.empty() ? NULL : &_tpwgts[0],
157  _ubvec.empty() ? NULL : &_ubvec[0],
158  &itr,
159  &_options[0],
160  &_edgecut,
161  _part.empty() ? NULL : &_part[0],
162  &mpi_comm);
163 
164  // Assign the returned processor ids
165  this->assign_partitioning (mesh);
166 
167 
168  STOP_LOG ("repartition()", "ParmetisPartitioner");
169 
170 #endif // #ifndef LIBMESH_HAVE_PARMETIS ... else ...
171 
172 }
173 
174 
175 
176 // Only need to compile these methods if ParMETIS is present
177 #ifdef LIBMESH_HAVE_PARMETIS
178 
180  const unsigned int n_sbdmns)
181 {
182  const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
183 
184  // Set parameters.
185  _wgtflag = 2; // weights on vertices only
186  _ncon = 1; // one weight per vertex
187  _numflag = 0; // C-style 0-based numbering
188  _nparts = static_cast<int>(n_sbdmns); // number of subdomains to create
189  _edgecut = 0; // the numbers of edges cut by the
190  // partition
191 
192  // Initialize data structures for ParMETIS
193  _vtxdist.resize (mesh.n_processors()+1); std::fill (_vtxdist.begin(), _vtxdist.end(), 0);
194  _tpwgts.resize (_nparts); std::fill (_tpwgts.begin(), _tpwgts.end(), 1./_nparts);
195  _ubvec.resize (_ncon); std::fill (_ubvec.begin(), _ubvec.end(), 1.05);
196  _part.resize (n_active_local_elem); std::fill (_part.begin(), _part.end(), 0);
197  _options.resize (5);
198  _vwgt.resize (n_active_local_elem);
199 
200  // Set the options
201  _options[0] = 1; // don't use default options
202  _options[1] = 0; // default (level of timing)
203  _options[2] = 15; // random seed (default)
204  _options[3] = 2; // processor distribution and subdomain distribution are decoupled
205 
206  // Find the number of active elements on each processor. We cannot use
207  // mesh.n_active_elem_on_proc(pid) since that only returns the number of
208  // elements assigned to pid which are currently stored on the calling
209  // processor. This will not in general be correct for parallel meshes
210  // when (pid!=mesh.processor_id()).
211  _n_active_elem_on_proc.resize(mesh.n_processors());
212  mesh.comm().allgather(n_active_local_elem, _n_active_elem_on_proc);
213 
214  // count the total number of active elements in the mesh. Note we cannot
215  // use mesh.n_active_elem() in general since this only returns the number
216  // of active elements which are stored on the calling processor.
217  // We should not use n_active_elem for any allocation because that will
218  // be inheritly unscalable, but it can be useful for libmesh_assertions.
219  dof_id_type n_active_elem=0;
220 
221  // Set up the vtxdist array. This will be the same on each processor.
222  // ***** Consult the Parmetis documentation. *****
223  libmesh_assert_equal_to (_vtxdist.size(),
224  libmesh_cast_int<std::size_t>(mesh.n_processors()+1));
225  libmesh_assert_equal_to (_vtxdist[0], 0);
226 
227  for (processor_id_type pid=0; pid<mesh.n_processors(); pid++)
228  {
229  _vtxdist[pid+1] = _vtxdist[pid] + _n_active_elem_on_proc[pid];
230  n_active_elem += _n_active_elem_on_proc[pid];
231  }
232  libmesh_assert_equal_to (_vtxdist.back(), static_cast<int>(n_active_elem));
233 
234  // ParMetis expects the elements to be numbered in contiguous blocks
235  // by processor, i.e. [0, ne0), [ne0, ne0+ne1), ...
236  // Since we only partition active elements we should have no expectation
237  // that we currently have such a distribution. So we need to create it.
238  // Also, at the same time we are going to map all the active elements into a globally
239  // unique range [0,n_active_elem) which is *independent* of the current partitioning.
240  // This can be fed to ParMetis as the initial partitioning of the subdomains (decoupled
241  // from the partitioning of the objects themselves). This allows us to get the same
242  // resultant partitioning independed of the input partitioning.
245 
246  _global_index_by_pid_map.clear();
247 
248  // Maps active element ids into a contiguous range independent of partitioning.
249  // (only needs local scope)
250  vectormap<dof_id_type, dof_id_type> global_index_map;
251 
252  {
253  std::vector<dof_id_type> global_index;
254 
255  // create the mapping which is contiguous by processor
256  dof_id_type pid_offset=0;
257  for (processor_id_type pid=0; pid<mesh.n_processors(); pid++)
258  {
261 
262  // note that we may not have all (or any!) the active elements which belong on this processor,
263  // but by calling this on all processors a unique range in [0,_n_active_elem_on_proc[pid])
264  // is constructed. Only the indices for the elements we pass in are returned in the array.
266  bbox, it, end,
267  global_index);
268 
269  for (dof_id_type cnt=0; it != end; ++it)
270  {
271  const Elem *elem = *it;
273  libmesh_assert_less (cnt, global_index.size());
274  libmesh_assert_less (global_index[cnt], _n_active_elem_on_proc[pid]);
275 
276  _global_index_by_pid_map.insert(std::make_pair(elem->id(), global_index[cnt++] + pid_offset));
277  }
278 
279  pid_offset += _n_active_elem_on_proc[pid];
280  }
281 
282  // create the unique mapping for all active elements independent of partitioning
283  {
286 
287  // Calling this on all processors a unique range in [0,n_active_elem) is constructed.
288  // Only the indices for the elements we pass in are returned in the array.
290  bbox, it, end,
291  global_index);
292 
293  for (dof_id_type cnt=0; it != end; ++it)
294  {
295  const Elem *elem = *it;
296  libmesh_assert (!global_index_map.count(elem->id()));
297  libmesh_assert_less (cnt, global_index.size());
298  libmesh_assert_less (global_index[cnt], n_active_elem);
299 
300  global_index_map.insert(std::make_pair(elem->id(), global_index[cnt++]));
301  }
302  }
303  // really, shouldn't be close!
304  libmesh_assert_less_equal (global_index_map.size(), n_active_elem);
305  libmesh_assert_less_equal (_global_index_by_pid_map.size(), n_active_elem);
306 
307  // At this point the two maps should be the same size. If they are not
308  // then the number of active elements is not the same as the sum over all
309  // processors of the number of active elements per processor, which means
310  // there must be some unpartitioned objects out there.
311  if (global_index_map.size() != _global_index_by_pid_map.size())
312  {
313  libMesh::err << "ERROR: ParmetisPartitioner cannot handle unpartitioned objects!"
314  << std::endl;
315  libmesh_error();
316  }
317  }
318 
319  // Finally, we need to initialize the vertex (partition) weights and the initial subdomain
320  // mapping. The subdomain mapping will be independent of the processor mapping, and is
321  // defined by a simple mapping of the global indices we just found.
322  {
323  std::vector<dof_id_type> subdomain_bounds(mesh.n_processors());
324 
325  const dof_id_type first_local_elem = _vtxdist[mesh.processor_id()];
326 
327  for (processor_id_type pid=0; pid<mesh.n_processors(); pid++)
328  {
329  dof_id_type tgt_subdomain_size = 0;
330 
331  // watch out for the case that n_subdomains < n_processors
332  if (pid < static_cast<unsigned int>(_nparts))
333  {
334  tgt_subdomain_size = n_active_elem/std::min
335  (libmesh_cast_int<int>(mesh.n_processors()),
336  _nparts);
337 
338  if (pid < n_active_elem%_nparts)
339  tgt_subdomain_size++;
340  }
341  if (pid == 0)
342  subdomain_bounds[0] = tgt_subdomain_size;
343  else
344  subdomain_bounds[pid] = subdomain_bounds[pid-1] + tgt_subdomain_size;
345  }
346 
347  libmesh_assert_equal_to (subdomain_bounds.back(), n_active_elem);
348 
351 
352  for (; elem_it != elem_end; ++elem_it)
353  {
354  const Elem *elem = *elem_it;
355 
357  const dof_id_type global_index_by_pid =
358  _global_index_by_pid_map[elem->id()];
359  libmesh_assert_less (global_index_by_pid, n_active_elem);
360 
361  const dof_id_type local_index =
362  global_index_by_pid - first_local_elem;
363 
364  libmesh_assert_less (local_index, n_active_local_elem);
365  libmesh_assert_less (local_index, _vwgt.size());
366 
367  // TODO:[BSK] maybe there is a better weight?
368  _vwgt[local_index] = elem->n_nodes();
369 
370  // find the subdomain this element belongs in
371  libmesh_assert (global_index_map.count(elem->id()));
372  const dof_id_type global_index =
373  global_index_map[elem->id()];
374 
375  libmesh_assert_less (global_index, subdomain_bounds.back());
376 
377  const unsigned int subdomain_id =
378  std::distance(subdomain_bounds.begin(),
379  std::lower_bound(subdomain_bounds.begin(),
380  subdomain_bounds.end(),
381  global_index));
382  libmesh_assert_less (subdomain_id, static_cast<unsigned int>(_nparts));
383  libmesh_assert_less (local_index, _part.size());
384 
385  _part[local_index] = subdomain_id;
386  }
387  }
388 }
389 
390 
391 
393 {
394  // build the graph in distributed CSR format. Note that
395  // the edges in the graph will correspond to
396  // face neighbors
397  const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
398 
399  std::vector<const Elem*> neighbors_offspring;
400 
401  std::vector<std::vector<dof_id_type> > graph(n_active_local_elem);
402  dof_id_type graph_size=0;
403 
404  const dof_id_type first_local_elem = _vtxdist[mesh.processor_id()];
405 
408 
409  for (; elem_it != elem_end; ++elem_it)
410  {
411  const Elem* elem = *elem_it;
412 
414  const dof_id_type global_index_by_pid =
415  _global_index_by_pid_map[elem->id()];
416 
417  const dof_id_type local_index =
418  global_index_by_pid - first_local_elem;
419  libmesh_assert_less (local_index, n_active_local_elem);
420 
421  std::vector<dof_id_type> &graph_row = graph[local_index];
422 
423  // Loop over the element's neighbors. An element
424  // adjacency corresponds to a face neighbor
425  for (unsigned int ms=0; ms<elem->n_neighbors(); ms++)
426  {
427  const Elem* neighbor = elem->neighbor(ms);
428 
429  if (neighbor != NULL)
430  {
431  // If the neighbor is active treat it
432  // as a connection
433  if (neighbor->active())
434  {
436  const dof_id_type neighbor_global_index_by_pid =
437  _global_index_by_pid_map[neighbor->id()];
438 
439  graph_row.push_back(neighbor_global_index_by_pid);
440  graph_size++;
441  }
442 
443 #ifdef LIBMESH_ENABLE_AMR
444 
445  // Otherwise we need to find all of the
446  // neighbor's children that are connected to
447  // us and add them
448  else
449  {
450  // The side of the neighbor to which
451  // we are connected
452  const unsigned int ns =
453  neighbor->which_neighbor_am_i (elem);
454  libmesh_assert_less (ns, neighbor->n_neighbors());
455 
456  // Get all the active children (& grandchildren, etc...)
457  // of the neighbor.
458  neighbor->active_family_tree (neighbors_offspring);
459 
460  // Get all the neighbor's children that
461  // live on that side and are thus connected
462  // to us
463  for (unsigned int nc=0; nc<neighbors_offspring.size(); nc++)
464  {
465  const Elem* child =
466  neighbors_offspring[nc];
467 
468  // This does not assume a level-1 mesh.
469  // Note that since children have sides numbered
470  // coincident with the parent then this is a sufficient test.
471  if (child->neighbor(ns) == elem)
472  {
473  libmesh_assert (child->active());
475  const dof_id_type child_global_index_by_pid =
476  _global_index_by_pid_map[child->id()];
477 
478  graph_row.push_back(child_global_index_by_pid);
479  graph_size++;
480  }
481  }
482  }
483 
484 #endif /* ifdef LIBMESH_ENABLE_AMR */
485 
486 
487  }
488  }
489  }
490 
491  // Reserve space in the adjacency array
492  _xadj.clear();
493  _xadj.reserve (n_active_local_elem + 1);
494  _adjncy.clear();
495  _adjncy.reserve (graph_size);
496 
497  for (std::size_t r=0; r<graph.size(); r++)
498  {
499  _xadj.push_back(_adjncy.size());
500  std::vector<dof_id_type> graph_row; // build this emtpy
501  graph_row.swap(graph[r]); // this will deallocate at the end of scope
502  _adjncy.insert(_adjncy.end(),
503  graph_row.begin(),
504  graph_row.end());
505  }
506 
507  // The end of the adjacency array for the last elem
508  _xadj.push_back(_adjncy.size());
509 
510  libmesh_assert_equal_to (_xadj.size(), n_active_local_elem+1);
511  libmesh_assert_equal_to (_adjncy.size(), graph_size);
512 }
513 
514 
515 
517 {
518  // This function must be run on all processors at once
519  libmesh_parallel_only(mesh.comm());
520 
521  const dof_id_type
522  first_local_elem = _vtxdist[mesh.processor_id()];
523 
524  std::vector<std::vector<dof_id_type> >
525  requested_ids(mesh.n_processors()),
526  requests_to_fill(mesh.n_processors());
527 
530 
531  for (; elem_it != elem_end; ++elem_it)
532  {
533  Elem *elem = *elem_it;
534 
535  // we need to get the index from the owning processor
536  // (note we cannot assign it now -- we are iterating
537  // over elements again and this will be bad!)
538  libmesh_assert_less (elem->processor_id(), requested_ids.size());
539  requested_ids[elem->processor_id()].push_back(elem->id());
540  }
541 
542  // Trade with all processors (including self) to get their indices
543  for (processor_id_type pid=0; pid<mesh.n_processors(); pid++)
544  {
545  // Trade my requests with processor procup and procdown
546  const processor_id_type procup = (mesh.processor_id() + pid) %
547  mesh.n_processors();
548  const processor_id_type procdown = (mesh.n_processors() +
549  mesh.processor_id() - pid) %
550  mesh.n_processors();
551 
552  mesh.comm().send_receive (procup, requested_ids[procup],
553  procdown, requests_to_fill[procdown]);
554 
555  // we can overwrite these requested ids in-place.
556  for (std::size_t i=0; i<requests_to_fill[procdown].size(); i++)
557  {
558  const dof_id_type requested_elem_index =
559  requests_to_fill[procdown][i];
560 
561  libmesh_assert(_global_index_by_pid_map.count(requested_elem_index));
562 
563  const dof_id_type global_index_by_pid =
564  _global_index_by_pid_map[requested_elem_index];
565 
566  const dof_id_type local_index =
567  global_index_by_pid - first_local_elem;
568 
569  libmesh_assert_less (local_index, _part.size());
570  libmesh_assert_less (local_index, mesh.n_active_local_elem());
571 
572  const unsigned int elem_procid =
573  static_cast<unsigned int>(_part[local_index]);
574 
575  libmesh_assert_less (elem_procid, static_cast<unsigned int>(_nparts));
576 
577  requests_to_fill[procdown][i] = elem_procid;
578  }
579 
580  // Trade back
581  mesh.comm().send_receive (procdown, requests_to_fill[procdown],
582  procup, requested_ids[procup]);
583  }
584 
585  // and finally assign the partitioning.
586  // note we are iterating in exactly the same order
587  // used to build up the request, so we can expect the
588  // required entries to be in the proper sequence.
589  elem_it = mesh.active_elements_begin();
590  elem_end = mesh.active_elements_end();
591 
592  for (std::vector<unsigned int> counters(mesh.n_processors(), 0);
593  elem_it != elem_end; ++elem_it)
594  {
595  Elem *elem = *elem_it;
596 
597  const processor_id_type current_pid = elem->processor_id();
598 
599  libmesh_assert_less (counters[current_pid], requested_ids[current_pid].size());
600 
601  const processor_id_type elem_procid =
602  requested_ids[current_pid][counters[current_pid]++];
603 
604  libmesh_assert_less (elem_procid, static_cast<unsigned int>(_nparts));
605  elem->processor_id() = elem_procid;
606  }
607 }
608 
609 #endif // #ifdef LIBMESH_HAVE_PARMETIS
610 
611 } // namespace libMesh

Site Created By: libMesh Developers
Last modified: February 07 2014 16:57:06 UTC

Hosted By:
SourceForge.net Logo