parallel_sort.C
Go to the documentation of this file.
1 // The libMesh Finite Element Library.
2 // Copyright (C) 2002-2014 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License as published by the Free Software Foundation; either
7 // version 2.1 of the License, or (at your option) any later version.
8 
9 // This library is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // Lesser General Public License for more details.
13 
14 // You should have received a copy of the GNU Lesser General Public
15 // License along with this library; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 
18 
19 // System Includes
20 #include <algorithm>
21 #include <iostream>
22 
23 // Local Includes
24 #include "libmesh/libmesh_common.h"
25 #include "libmesh/parallel.h"
27 #include "libmesh/parallel_sort.h"
29 #ifdef LIBMESH_HAVE_LIBHILBERT
30 # include "hilbert.h"
31 #endif
32 
33 namespace libMesh
34 {
35 
36 
37 namespace Parallel {
38 
39 // The Constructor sorts the local data using
40 // std::sort(). Therefore, the construction of
41 // a Parallel::Sort object takes O(nlogn) time,
42 // where n is the length of _data.
43 template <typename KeyType, typename IdxType>
45  std::vector<KeyType>& d) :
46  ParallelObject(comm_in),
47  _n_procs(comm_in.size()),
48  _proc_id(comm_in.rank()),
49  _bin_is_sorted(false),
50  _data(d)
51 {
52  std::sort(_data.begin(), _data.end());
53 
54  // Allocate storage
55  _local_bin_sizes.resize(_n_procs);
56 }
57 
58 
59 
60 template <typename KeyType, typename IdxType>
62 {
63  // Find the global data size. The sorting
64  // algorithms assume they have a range to
65  // work with, so catch the degenerate cases here
66  IdxType global_data_size = libmesh_cast_int<IdxType>(_data.size());
67 
68  this->comm().sum (global_data_size);
69 
70  if (global_data_size < 2)
71  {
72  // the entire global range is either empty
73  // or contains only one element
74  _my_bin = _data;
75 
76  this->comm().allgather (static_cast<IdxType>(_my_bin.size()),
77  _local_bin_sizes);
78  }
79  else
80  {
81  if (this->n_processors() > 1)
82  {
83  this->binsort();
84  this->communicate_bins();
85  }
86  else
87  _my_bin = _data;
88 
89  this->sort_local_bin();
90  }
91 
92  // Set sorted flag to true
93  _bin_is_sorted = true;
94 }
95 
96 
97 
98 template <typename KeyType, typename IdxType>
100 {
101  // Find the global min and max from all the
102  // processors.
103  std::vector<KeyType> global_min_max(2);
104 
105  // Insert the local min and max for this processor
106  global_min_max[0] = -_data.front();
107  global_min_max[1] = _data.back();
108 
109  // Communicate to determine the global
110  // min and max for all processors.
111  this->comm().max(global_min_max);
112 
113  // Multiply the min by -1 to obtain the true min
114  global_min_max[0] *= -1;
115 
116  // Bin-Sort based on the global min and max
117  Parallel::BinSorter<KeyType> bs(this->comm(), _data);
118  bs.binsort(_n_procs, global_min_max[1], global_min_max[0]);
119 
120  // Now save the local bin sizes in a vector so
121  // we don't have to keep around the BinSorter.
122  for (processor_id_type i=0; i<_n_procs; ++i)
123  _local_bin_sizes[i] = bs.sizeof_bin(i);
124 }
125 
126 
127 
128 #if defined(LIBMESH_HAVE_LIBHILBERT) && defined(LIBMESH_HAVE_MPI)
129 // Full specialization for HilbertIndices, there is a fair amount of
130 // code duplication here that could potentially be consolidated with the
131 // above method
132 template <>
134 {
135  // Find the global min and max from all the
136  // processors. Do this using MPI_Allreduce.
137  Hilbert::HilbertIndices
138  local_min, local_max,
139  global_min, global_max;
140 
141  if (_data.empty())
142  {
143  local_min.rack0 = local_min.rack1 = local_min.rack2 = static_cast<Hilbert::inttype>(-1);
144  local_max.rack0 = local_max.rack1 = local_max.rack2 = 0;
145  }
146  else
147  {
148  local_min = _data.front();
149  local_max = _data.back();
150  }
151 
152  MPI_Op hilbert_max, hilbert_min;
153 
154  MPI_Op_create ((MPI_User_function*)__hilbert_max_op, true, &hilbert_max);
155  MPI_Op_create ((MPI_User_function*)__hilbert_min_op, true, &hilbert_min);
156 
157  // Communicate to determine the global
158  // min and max for all processors.
159  MPI_Allreduce(&local_min,
160  &global_min,
161  1,
163  hilbert_min,
164  this->comm().get());
165 
166  MPI_Allreduce(&local_max,
167  &global_max,
168  1,
170  hilbert_max,
171  this->comm().get());
172 
173  MPI_Op_free (&hilbert_max);
174  MPI_Op_free (&hilbert_min);
175 
176  // Bin-Sort based on the global min and max
178  bs.binsort(_n_procs, global_max, global_min);
179 
180  // Now save the local bin sizes in a vector so
181  // we don't have to keep around the BinSorter.
182  for (processor_id_type i=0; i<_n_procs; ++i)
183  _local_bin_sizes[i] = bs.sizeof_bin(i);
184 }
185 
186 #endif // #ifdef LIBMESH_HAVE_LIBHILBERT
187 
188 
189 template <typename KeyType, typename IdxType>
191 {
192 #ifdef LIBMESH_HAVE_MPI
193  // Create storage for the global bin sizes. This
194  // is the number of keys which will be held in
195  // each bin over all processors.
196  std::vector<IdxType> global_bin_sizes = _local_bin_sizes;
197 
198  // Sum to find the total number of entries in each bin.
199  this->comm().sum(global_bin_sizes);
200 
201  // Create a vector to temporarily hold the results of MPI_Gatherv
202  // calls. The vector dest may be saved away to _my_bin depending on which
203  // processor is being MPI_Gatherv'd.
204  std::vector<KeyType> dest;
205 
206  IdxType local_offset = 0;
207 
208  for (processor_id_type i=0; i<_n_procs; ++i)
209  {
210  // Vector to receive the total bin size for each
211  // processor. Processor i's bin size will be
212  // held in proc_bin_size[i]
213  std::vector<IdxType> proc_bin_size;
214 
215  // Find the number of contributions coming from each
216  // processor for this bin. Note: allgather combines
217  // the MPI_Gather and MPI_Bcast operations into one.
218  this->comm().allgather(_local_bin_sizes[i], proc_bin_size);
219 
220  // Compute the offsets into my_bin for each processor's
221  // portion of the bin. These are basically partial sums
222  // of the proc_bin_size vector.
223  std::vector<IdxType> displacements(_n_procs);
224  for (processor_id_type j=1; j<_n_procs; ++j)
225  displacements[j] = proc_bin_size[j-1] + displacements[j-1];
226 
227  // Resize the destination buffer
228  dest.resize (global_bin_sizes[i]);
229 
230  MPI_Gatherv((_data.size() > local_offset) ?
231  &_data[local_offset] :
232  NULL, // Points to the beginning of the bin to be sent
233  _local_bin_sizes[i], // How much data is in the bin being sent.
234  Parallel::StandardType<KeyType>(), // The data type we are sorting
235  (dest.empty()) ?
236  NULL :
237  &dest[0], // Enough storage to hold all bin contributions
238  (int*) &proc_bin_size[0], // How much is to be received from each processor
239  (int*) &displacements[0], // Offsets into the receive buffer
240  Parallel::StandardType<KeyType>(), // The data type we are sorting
241  i, // The root process (we do this once for each proc)
242  this->comm().get());
243 
244  // Copy the destination buffer if it
245  // corresponds to the bin for this processor
246  if (i == _proc_id)
247  _my_bin = dest;
248 
249  // Increment the local offset counter
250  local_offset += _local_bin_sizes[i];
251  }
252 #endif // LIBMESH_HAVE_MPI
253 }
254 
255 
256 
257 #if defined(LIBMESH_HAVE_LIBHILBERT) && defined(LIBMESH_HAVE_MPI)
258 // Full specialization for HilbertIndices, there is a fair amount of
259 // code duplication here that could potentially be consolidated with the
260 // above method
261 template <>
263 {
264  // Create storage for the global bin sizes. This
265  // is the number of keys which will be held in
266  // each bin over all processors.
267  std::vector<unsigned int> global_bin_sizes(_n_procs);
268 
269  libmesh_assert_equal_to (_local_bin_sizes.size(), global_bin_sizes.size());
270 
271  // Sum to find the total number of entries in each bin.
272  // This is stored in global_bin_sizes. Note, we
273  // explicitly know that we are communicating MPI_UNSIGNED's here.
274  MPI_Allreduce(&_local_bin_sizes[0],
275  &global_bin_sizes[0],
276  _n_procs,
277  MPI_UNSIGNED,
278  MPI_SUM,
279  this->comm().get());
280 
281  // Create a vector to temporarily hold the results of MPI_Gatherv
282  // calls. The vector dest may be saved away to _my_bin depending on which
283  // processor is being MPI_Gatherv'd.
284  std::vector<Hilbert::HilbertIndices> sendbuf, dest;
285 
286  unsigned int local_offset = 0;
287 
288  for (unsigned int i=0; i<_n_procs; ++i)
289  {
290  // Vector to receive the total bin size for each
291  // processor. Processor i's bin size will be
292  // held in proc_bin_size[i]
293  std::vector<unsigned int> proc_bin_size(_n_procs);
294 
295  // Find the number of contributions coming from each
296  // processor for this bin. Note: Allgather combines
297  // the MPI_Gather and MPI_Bcast operations into one.
298  // Note: Here again we know that we are communicating
299  // MPI_UNSIGNED's so there is no need to check the MPI_traits.
300  MPI_Allgather(&_local_bin_sizes[i], // Source: # of entries on this proc in bin i
301  1, // Number of items to gather
302  MPI_UNSIGNED,
303  &proc_bin_size[0], // Destination: Total # of entries in bin i
304  1,
305  MPI_UNSIGNED,
306  this->comm().get());
307 
308  // Compute the offsets into my_bin for each processor's
309  // portion of the bin. These are basically partial sums
310  // of the proc_bin_size vector.
311  std::vector<unsigned int> displacements(_n_procs);
312  for (unsigned int j=1; j<_n_procs; ++j)
313  displacements[j] = proc_bin_size[j-1] + displacements[j-1];
314 
315  // Resize the destination buffer
316  dest.resize (global_bin_sizes[i]);
317 
318  MPI_Gatherv((_data.size() > local_offset) ?
319  &_data[local_offset] :
320  NULL, // Points to the beginning of the bin to be sent
321  _local_bin_sizes[i], // How much data is in the bin being sent.
322  Parallel::StandardType<Hilbert::HilbertIndices>(), // The data type we are sorting
323  (dest.empty()) ?
324  NULL :
325  &dest[0], // Enough storage to hold all bin contributions
326  (int*) &proc_bin_size[0], // How much is to be received from each processor
327  (int*) &displacements[0], // Offsets into the receive buffer
328  Parallel::StandardType<Hilbert::HilbertIndices>(), // The data type we are sorting
329  i, // The root process (we do this once for each proc)
330  this->comm().get());
331 
332  // Copy the destination buffer if it
333  // corresponds to the bin for this processor
334  if (i == _proc_id)
335  _my_bin = dest;
336 
337  // Increment the local offset counter
338  local_offset += _local_bin_sizes[i];
339  }
340 }
341 
342 #endif // #if defined(LIBMESH_HAVE_LIBHILBERT) && defined(LIBMESH_HAVE_MPI)
343 
344 
345 
346 template <typename KeyType, typename IdxType>
348 {
349  std::sort(_my_bin.begin(), _my_bin.end());
350 }
351 
352 
353 
354 template <typename KeyType, typename IdxType>
355 const std::vector<KeyType>& Sort<KeyType,IdxType>::bin()
356 {
357  if (!_bin_is_sorted)
358  {
359  libMesh::out << "Warning! Bin is not yet sorted!" << std::endl;
360  }
361 
362  return _my_bin;
363 }
364 
365 }
366 
367 
368 
369 // Explicitly instantiate for int, double
370 template class Parallel::Sort<int, unsigned int>;
372 #if defined(LIBMESH_HAVE_LIBHILBERT) && defined(LIBMESH_HAVE_MPI)
374 #endif
375 
376 } // namespace libMesh

Site Created By: libMesh Developers
Last modified: February 07 2014 16:57:06 UTC

Hosted By:
SourceForge.net Logo