12 #include <dolfinx/common/Timer.h>
15 #include <type_traits>
26 template <
typename T,
int BITS = 8>
27 void radix_sort(
const std::span<T>& array)
29 static_assert(std::is_integral<T>(),
"This function only sorts integers.");
31 if (array.size() <= 1)
34 T max_value = *std::max_element(array.begin(), array.end());
37 constexpr
int bucket_size = 1 << BITS;
38 T mask = (T(1) << BITS) - 1;
50 std::array<std::int32_t, bucket_size> counter;
51 std::array<std::int32_t, bucket_size + 1> offset;
53 std::int32_t mask_offset = 0;
54 std::vector<T> buffer(array.size());
55 std::span<T> current_perm = array;
56 std::span<T> next_perm = buffer;
57 for (
int i = 0; i < its; i++)
60 std::fill(counter.begin(), counter.end(), 0);
63 for (T c : current_perm)
64 counter[(c & mask) >> mask_offset]++;
68 std::partial_sum(counter.begin(), counter.end(), std::next(offset.begin()));
69 for (T c : current_perm)
71 std::int32_t bucket = (c & mask) >> mask_offset;
72 std::int32_t new_pos = offset[bucket + 1] - counter[bucket];
73 next_perm[new_pos] = c;
80 std::swap(current_perm, next_perm);
85 std::copy(buffer.begin(), buffer.end(), array.begin());
96 template <
typename T,
int BITS = 16>
97 void argsort_radix(
const std::span<const T>& array,
98 std::span<std::int32_t> perm)
100 static_assert(std::is_integral_v<T>,
"Integral required.");
102 if (array.size() <= 1)
105 const auto [min, max] = std::minmax_element(array.begin(), array.end());
106 T range = *max - *min + 1;
109 constexpr
int bucket_size = 1 << BITS;
110 T mask = (T(1) << BITS) - 1;
111 std::int32_t mask_offset = 0;
123 std::array<std::int32_t, bucket_size> counter;
124 std::array<std::int32_t, bucket_size + 1> offset;
126 std::vector<std::int32_t> perm2(perm.size());
127 std::span<std::int32_t> current_perm = perm;
128 std::span<std::int32_t> next_perm = perm2;
129 for (
int i = 0; i < its; i++)
132 std::fill(counter.begin(), counter.end(), 0);
135 for (
auto cp : current_perm)
137 T value = array[cp] - *min;
138 std::int32_t bucket = (value & mask) >> mask_offset;
144 std::partial_sum(counter.begin(), counter.end(), std::next(offset.begin()));
147 for (
auto cp : current_perm)
149 T value = array[cp] - *min;
150 std::int32_t bucket = (value & mask) >> mask_offset;
151 std::int32_t pos = offset[bucket + 1] - counter[bucket];
156 std::swap(current_perm, next_perm);
163 std::copy(perm2.begin(), perm2.end(), perm.begin());
175 template <
typename T,
int BITS = 16>
176 std::vector<std::int32_t> sort_by_perm(
const std::span<const T>& x,
179 static_assert(std::is_integral_v<T>,
"Integral required.");
181 assert(x.size() % shape1 == 0);
182 const std::size_t shape0 = x.size() / shape1;
183 std::vector<std::int32_t> perm(shape0);
184 std::iota(perm.begin(), perm.end(), 0);
188 std::vector<T> column(shape0);
189 for (std::size_t i = 0; i < shape1; ++i)
191 int col = shape1 - 1 - i;
192 for (std::size_t j = 0; j < shape0; ++j)
193 column[j] = x[j * shape1 + col];
194 argsort_radix<T, BITS>(column, perm);