52 : _bs(
bs), _remote_inds(0, alloc), _local_inds(0, alloc),
53 _src(map.src().begin(), map.src().end()),
54 _dest(map.dest().begin(), map.dest().end())
60 assert(std::is_sorted(_src.begin(), _src.end()));
61 assert(std::is_sorted(_dest.begin(), _dest.end()));
67 MPI_Dist_graph_create_adjacent(
68 map.
comm(), _src.size(), _src.data(), MPI_UNWEIGHTED, _dest.size(),
69 _dest.data(), MPI_UNWEIGHTED, MPI_INFO_NULL,
false, &comm0);
73 MPI_Dist_graph_create_adjacent(
74 map.
comm(), _dest.size(), _dest.data(), MPI_UNWEIGHTED, _src.size(),
75 _src.data(), MPI_UNWEIGHTED, MPI_INFO_NULL,
false, &comm1);
79 std::span owners = map.
owners();
80 std::vector<std::int32_t> perm(owners.size());
81 std::iota(perm.begin(), perm.end(), 0);
82 dolfinx::argsort_radix<std::int32_t>(owners, perm);
86 std::span ghosts = map.
ghosts();
87 std::vector<int> owners_sorted(owners.size());
88 std::vector<std::int64_t> ghosts_sorted(owners.size());
89 std::transform(perm.begin(), perm.end(), owners_sorted.begin(),
90 [&owners](
auto idx) { return owners[idx]; });
91 std::transform(perm.begin(), perm.end(), ghosts_sorted.begin(),
92 [&ghosts](
auto idx) { return ghosts[idx]; });
100 _sizes_remote.resize(_src.size(), 0);
101 _displs_remote.resize(_src.size() + 1, 0);
102 std::vector<std::int32_t>::iterator begin = owners_sorted.begin();
103 for (std::size_t i = 0; i < _src.size(); i++)
105 auto upper = std::upper_bound(begin, owners_sorted.end(), _src[i]);
106 int num_ind = std::distance(begin, upper);
107 _displs_remote[i + 1] = _displs_remote[i] + num_ind;
108 _sizes_remote[i] = num_ind;
119 _sizes_local.resize(_dest.size());
120 _displs_local.resize(_sizes_local.size() + 1);
121 _sizes_remote.reserve(1);
122 _sizes_local.reserve(1);
123 MPI_Neighbor_alltoall(_sizes_remote.data(), 1, MPI_INT32_T,
124 _sizes_local.data(), 1, MPI_INT32_T, _comm1.comm());
125 std::partial_sum(_sizes_local.begin(), _sizes_local.end(),
126 std::next(_displs_local.begin()));
128 assert((std::int32_t)ghosts_sorted.size() == _displs_remote.back());
129 assert((std::int32_t)ghosts_sorted.size() == _displs_remote.back());
133 std::vector<std::int64_t> recv_buffer(_displs_local.back(), 0);
134 MPI_Neighbor_alltoallv(ghosts_sorted.data(), _sizes_remote.data(),
135 _displs_remote.data(), MPI_INT64_T,
136 recv_buffer.data(), _sizes_local.data(),
137 _displs_local.data(), MPI_INT64_T, _comm1.comm());
139 const std::array<std::int64_t, 2> range = map.
local_range();
142 std::for_each(recv_buffer.begin(), recv_buffer.end(), [range](
auto idx)
143 { assert(idx >= range[0] and idx < range[1]); });
148 auto rescale = [](
auto& x,
int bs)
150 std::transform(x.begin(), x.end(), x.begin(),
151 [
bs](
auto e) { return e *= bs; });
153 rescale(_sizes_local,
bs);
154 rescale(_displs_local,
bs);
155 rescale(_sizes_remote,
bs);
156 rescale(_displs_remote,
bs);
161 _local_inds = std::vector<std::int32_t, allocator_type>(
162 recv_buffer.size() * _bs, alloc);
163 std::int64_t offset = range[0] * _bs;
164 for (std::size_t i = 0; i < recv_buffer.size(); i++)
165 for (
int j = 0; j < _bs; j++)
166 _local_inds[i * _bs + j] = (recv_buffer[i] * _bs + j) - offset;
170 = std::vector<std::int32_t, allocator_type>(perm.size() * _bs, alloc);
171 for (std::size_t i = 0; i < perm.size(); i++)
172 for (
int j = 0; j < _bs; j++)
173 _remote_inds[i * _bs + j] = perm[i] * _bs + j;
198 std::span<T> recv_buffer,
199 std::span<MPI_Request> requests,
203 if (_sizes_local.empty() and _sizes_remote.empty())
210 assert(requests.size() == std::size_t(1));
211 MPI_Ineighbor_alltoallv(
212 send_buffer.data(), _sizes_local.data(), _displs_local.data(),
213 dolfinx::MPI::mpi_type<T>(), recv_buffer.data(), _sizes_remote.data(),
214 _displs_remote.data(), dolfinx::MPI::mpi_type<T>(), _comm0.comm(),
220 assert(requests.size() == _dest.size() + _src.size());
221 for (std::size_t i = 0; i < _src.size(); i++)
223 MPI_Irecv(recv_buffer.data() + _displs_remote[i], _sizes_remote[i],
224 dolfinx::MPI::mpi_type<T>(), _src[i], MPI_ANY_TAG,
225 _comm0.comm(), &requests[i]);
228 for (std::size_t i = 0; i < _dest.size(); i++)
230 MPI_Isend(send_buffer.data() + _displs_local[i], _sizes_local[i],
231 dolfinx::MPI::mpi_type<T>(), _dest[i], 0, _comm0.comm(),
232 &requests[i + _src.size()]);
237 throw std::runtime_error(
"Scatter::type not recognized");
341 std::span<T> remote_data)
const
343 std::vector<MPI_Request> requests(1, MPI_REQUEST_NULL);
346 auto pack_fn = [](
auto&& in,
auto&& idx,
auto&& out)
348 for (std::size_t i = 0; i < idx.size(); ++i)
352 std::span<T>(remote_buffer), pack_fn,
353 std::span<MPI_Request>(requests));
355 auto unpack_fn = [](
auto&& in,
auto&& idx,
auto&& out,
auto op)
357 for (std::size_t i = 0; i < idx.size(); ++i)
358 out[idx[i]] = op(out[idx[i]], in[i]);
361 scatter_fwd_end(std::span<const T>(remote_buffer), remote_data, unpack_fn,
362 std::span<MPI_Request>(requests));
393 std::span<T> recv_buffer,
394 std::span<MPI_Request> requests,
398 if (_sizes_local.empty() and _sizes_remote.empty())
407 assert(requests.size() == 1);
408 MPI_Ineighbor_alltoallv(send_buffer.data(), _sizes_remote.data(),
409 _displs_remote.data(), MPI::mpi_type<T>(),
410 recv_buffer.data(), _sizes_local.data(),
411 _displs_local.data(), MPI::mpi_type<T>(),
412 _comm1.comm(), &requests[0]);
417 assert(requests.size() == _dest.size() + _src.size());
419 for (std::size_t i = 0; i < _dest.size(); i++)
421 MPI_Irecv(recv_buffer.data() + _displs_local[i], _sizes_local[i],
422 dolfinx::MPI::mpi_type<T>(), _dest[i], MPI_ANY_TAG,
423 _comm0.comm(), &requests[i]);
428 for (std::size_t i = 0; i < _src.size(); i++)
430 MPI_Isend(send_buffer.data() + _displs_remote[i], _sizes_remote[i],
431 dolfinx::MPI::mpi_type<T>(), _src[i], 0, _comm0.comm(),
432 &requests[i + _dest.size()]);
437 throw std::runtime_error(
"Scatter::type not recognized");
538 void scatter_rev(std::span<T> local_data, std::span<const T> remote_data,
543 auto pack_fn = [](
auto&& in,
auto&& idx,
auto&& out)
545 for (std::size_t i = 0; i < idx.size(); ++i)
548 auto unpack_fn = [](
auto&& in,
auto&& idx,
auto&& out,
auto op)
550 for (std::size_t i = 0; i < idx.size(); ++i)
551 out[idx[i]] = op(out[idx[i]], in[i]);
553 std::vector<MPI_Request> request(1, MPI_REQUEST_NULL);
555 std::span<T>(local_buffer), pack_fn,
556 std::span<MPI_Request>(request));
557 scatter_rev_end(std::span<const T>(local_buffer), local_data, unpack_fn, op,
558 std::span<MPI_Request>(request));