Note: this is documentation for an old release. View the latest documentation at docs.fenicsproject.org/v0.3.0/v0.9.0/cpp
DOLFINx  0.3.0
DOLFINx C++ interface
sort.h
1 // Copyright (C) 2021 Igor Baratta
2 //
3 // This file is part of DOLFINx (https://www.fenicsproject.org)
4 //
5 // SPDX-License-Identifier: LGPL-3.0-or-later
6 
7 #pragma once
8 
9 #include <algorithm>
10 #include <bitset>
11 #include <cstdint>
12 #include <dolfinx/common/Timer.h>
13 #include <numeric>
14 #include <vector>
15 #include <xtensor/xtensor.hpp>
16 #include <xtensor/xview.hpp>
17 #include <xtl/xspan.hpp>
18 
19 namespace dolfinx
20 {
21 
27 template <typename T, int BITS = 8>
28 void radix_sort(xtl::span<T> array)
29 {
30  static_assert(std::is_integral<T>(), "This function only sorts integers.");
31 
32  if (array.size() <= 1)
33  return;
34 
35  T max_value = *std::max_element(array.begin(), array.end());
36 
37  // Sort N bits at a time
38  constexpr int bucket_size = 1 << BITS;
39  T mask = (T(1) << BITS) - 1;
40 
41  // Compute number of iterations, most significant digit (N bits) of
42  // maxvalue
43  int its = 0;
44  while (max_value)
45  {
46  max_value >>= BITS;
47  its++;
48  }
49 
50  // Adjacency list arrays for computing insertion position
51  std::array<std::int32_t, bucket_size> counter;
52  std::array<std::int32_t, bucket_size + 1> offset;
53 
54  std::int32_t mask_offset = 0;
55  std::vector<T> buffer(array.size());
56  xtl::span<T> current_perm = array;
57  xtl::span<T> next_perm = buffer;
58  for (int i = 0; i < its; i++)
59  {
60  // Zero counter array
61  std::fill(counter.begin(), counter.end(), 0);
62 
63  // Count number of elements per bucket
64  for (T c : current_perm)
65  counter[(c & mask) >> mask_offset]++;
66 
67  // Prefix sum to get the inserting position
68  offset[0] = 0;
69  std::partial_sum(counter.begin(), counter.end(), std::next(offset.begin()));
70  for (T c : current_perm)
71  {
72  std::int32_t bucket = (c & mask) >> mask_offset;
73  std::int32_t new_pos = offset[bucket + 1] - counter[bucket];
74  next_perm[new_pos] = c;
75  counter[bucket]--;
76  }
77 
78  mask = mask << BITS;
79  mask_offset += BITS;
80 
81  std::swap(current_perm, next_perm);
82  }
83 
84  // Copy data back to array
85  if (its % 2 != 0)
86  std::copy(buffer.begin(), buffer.end(), array.begin());
87 }
88 
98 template <typename T, int BITS = 16>
99 void argsort_radix(const xtl::span<const T>& array,
100  xtl::span<std::int32_t> perm)
101 {
102  if (array.size() <= 1)
103  return;
104 
105  const auto [min, max] = std::minmax_element(array.begin(), array.end());
106  T range = *max - *min + 1;
107 
108  // Sort N bits at a time
109  constexpr int bucket_size = 1 << BITS;
110  T mask = (T(1) << BITS) - 1;
111  std::int32_t mask_offset = 0;
112 
113  // Compute number of iterations, most significant digit (N bits) of
114  // maxvalue
115  int its = 0;
116  while (range)
117  {
118  range >>= BITS;
119  its++;
120  }
121 
122  // Adjacency list arrays for computing insertion position
123  std::array<std::int32_t, bucket_size> counter;
124  std::array<std::int32_t, bucket_size + 1> offset;
125 
126  std::vector<std::int32_t> perm2(perm.size());
127  xtl::span<std::int32_t> current_perm = perm;
128  xtl::span<std::int32_t> next_perm = perm2;
129  for (int i = 0; i < its; i++)
130  {
131  // Zero counter
132  std::fill(counter.begin(), counter.end(), 0);
133 
134  // Count number of elements per bucket
135  for (auto cp : current_perm)
136  {
137  T value = array[cp] - *min;
138  std::int32_t bucket = (value & mask) >> mask_offset;
139  counter[bucket]++;
140  }
141 
142  // Prefix sum to get the inserting position
143  offset[0] = 0;
144  std::partial_sum(counter.begin(), counter.end(), std::next(offset.begin()));
145 
146  // Sort py permutation
147  for (auto cp : current_perm)
148  {
149  T value = array[cp] - *min;
150  std::int32_t bucket = (value & mask) >> mask_offset;
151  std::int32_t pos = offset[bucket + 1] - counter[bucket];
152  next_perm[pos] = cp;
153  counter[bucket]--;
154  }
155 
156  std::swap(current_perm, next_perm);
157 
158  mask = mask << BITS;
159  mask_offset += BITS;
160  }
161 
162  if (its % 2 == 1)
163  std::copy(perm2.begin(), perm2.end(), perm.begin());
164 }
165 
166 template <typename T, int BITS = 16>
167 std::vector<std::int32_t> sort_by_perm(const xt::xtensor<T, 2>& array)
168 {
169  // Sort the list and label uniquely
170  const int cols = array.shape(1);
171  const int size = array.shape(0);
172  std::vector<std::int32_t> perm(size);
173  std::iota(perm.begin(), perm.end(), 0);
174 
175  // Sort each column at a time from right to left.
176  // Col 0 has the most signficant "digit".
177  for (int i = 0; i < cols; i++)
178  {
179  int col = cols - 1 - i;
180  xt::xtensor<std::int32_t, 1> column = xt::view(array, xt::all(), col);
181  argsort_radix<std::int32_t, BITS>(xtl::span<const std::int32_t>(column),
182  perm);
183  }
184 
185  return perm;
186 }
187 
188 } // namespace dolfinx
int size(MPI_Comm comm)
Return size of the group (number of processes) associated with the communicator.
Definition: MPI.cpp:82