52      : _bs(
bs), _remote_inds(0, alloc), _local_inds(0, alloc),
 
   53        _src(map.src().begin(), map.src().end()),
 
   54        _dest(map.dest().begin(), map.dest().end())
 
   60    assert(std::is_sorted(_src.begin(), _src.end()));
 
   61    assert(std::is_sorted(_dest.begin(), _dest.end()));
 
   67    MPI_Dist_graph_create_adjacent(
 
   68        map.
comm(), _src.size(), _src.data(), MPI_UNWEIGHTED, _dest.size(),
 
   69        _dest.data(), MPI_UNWEIGHTED, MPI_INFO_NULL, 
false, &comm0);
 
   73    MPI_Dist_graph_create_adjacent(
 
   74        map.
comm(), _dest.size(), _dest.data(), MPI_UNWEIGHTED, _src.size(),
 
   75        _src.data(), MPI_UNWEIGHTED, MPI_INFO_NULL, 
false, &comm1);
 
   79    std::span owners = map.
owners();
 
   80    std::vector<std::int32_t> perm(owners.size());
 
   81    std::iota(perm.begin(), perm.end(), 0);
 
   82    dolfinx::argsort_radix<std::int32_t>(owners, perm);
 
   86    std::span ghosts = map.
ghosts();
 
   87    std::vector<int> owners_sorted(owners.size());
 
   88    std::vector<std::int64_t> ghosts_sorted(owners.size());
 
   89    std::transform(perm.begin(), perm.end(), owners_sorted.begin(),
 
   90                   [&owners](
auto idx) { return owners[idx]; });
 
   91    std::transform(perm.begin(), perm.end(), ghosts_sorted.begin(),
 
   92                   [&ghosts](
auto idx) { return ghosts[idx]; });
 
  100    _sizes_remote.resize(_src.size(), 0);
 
  101    _displs_remote.resize(_src.size() + 1, 0);
 
  102    std::vector<std::int32_t>::iterator begin = owners_sorted.begin();
 
  103    for (std::size_t i = 0; i < _src.size(); i++)
 
  105      auto upper = std::upper_bound(begin, owners_sorted.end(), _src[i]);
 
  106      int num_ind = std::distance(begin, upper);
 
  107      _displs_remote[i + 1] = _displs_remote[i] + num_ind;
 
  108      _sizes_remote[i] = num_ind;
 
  119    _sizes_local.resize(_dest.size());
 
  120    _displs_local.resize(_sizes_local.size() + 1);
 
  121    _sizes_remote.reserve(1);
 
  122    _sizes_local.reserve(1);
 
  123    MPI_Neighbor_alltoall(_sizes_remote.data(), 1, MPI_INT32_T,
 
  124                          _sizes_local.data(), 1, MPI_INT32_T, _comm1.comm());
 
  125    std::partial_sum(_sizes_local.begin(), _sizes_local.end(),
 
  126                     std::next(_displs_local.begin()));
 
  128    assert((std::int32_t)ghosts_sorted.size() == _displs_remote.back());
 
  129    assert((std::int32_t)ghosts_sorted.size() == _displs_remote.back());
 
  133    std::vector<std::int64_t> recv_buffer(_displs_local.back(), 0);
 
  134    MPI_Neighbor_alltoallv(ghosts_sorted.data(), _sizes_remote.data(),
 
  135                           _displs_remote.data(), MPI_INT64_T,
 
  136                           recv_buffer.data(), _sizes_local.data(),
 
  137                           _displs_local.data(), MPI_INT64_T, _comm1.comm());
 
  139    const std::array<std::int64_t, 2> range = map.
local_range();
 
  142    std::for_each(recv_buffer.begin(), recv_buffer.end(), [range](
auto idx)
 
  143                  { assert(idx >= range[0] and idx < range[1]); });
 
  148      auto rescale = [](
auto& x, 
int bs)
 
  150        std::transform(x.begin(), x.end(), x.begin(),
 
  151                       [
bs](
auto e) { return e *= bs; });
 
  153      rescale(_sizes_local, 
bs);
 
  154      rescale(_displs_local, 
bs);
 
  155      rescale(_sizes_remote, 
bs);
 
  156      rescale(_displs_remote, 
bs);
 
  161    _local_inds = std::vector<std::int32_t, allocator_type>(
 
  162        recv_buffer.size() * _bs, alloc);
 
  163    std::int64_t offset = range[0] * _bs;
 
  164    for (std::size_t i = 0; i < recv_buffer.size(); i++)
 
  165      for (
int j = 0; j < _bs; j++)
 
  166        _local_inds[i * _bs + j] = (recv_buffer[i] * _bs + j) - offset;
 
  170        = std::vector<std::int32_t, allocator_type>(perm.size() * _bs, alloc);
 
  171    for (std::size_t i = 0; i < perm.size(); i++)
 
  172      for (
int j = 0; j < _bs; j++)
 
  173        _remote_inds[i * _bs + j] = perm[i] * _bs + j;
 
 
  198                         std::span<T> recv_buffer,
 
  199                         std::span<MPI_Request> requests,
 
  203    if (_sizes_local.empty() and _sizes_remote.empty())
 
  210      assert(requests.size() == std::size_t(1));
 
  211      MPI_Ineighbor_alltoallv(
 
  212          send_buffer.data(), _sizes_local.data(), _displs_local.data(),
 
  213          dolfinx::MPI::mpi_type<T>(), recv_buffer.data(), _sizes_remote.data(),
 
  214          _displs_remote.data(), dolfinx::MPI::mpi_type<T>(), _comm0.comm(),
 
  220      assert(requests.size() == _dest.size() + _src.size());
 
  221      for (std::size_t i = 0; i < _src.size(); i++)
 
  223        MPI_Irecv(recv_buffer.data() + _displs_remote[i], _sizes_remote[i],
 
  224                  dolfinx::MPI::mpi_type<T>(), _src[i], MPI_ANY_TAG,
 
  225                  _comm0.comm(), &requests[i]);
 
  228      for (std::size_t i = 0; i < _dest.size(); i++)
 
  230        MPI_Isend(send_buffer.data() + _displs_local[i], _sizes_local[i],
 
  231                  dolfinx::MPI::mpi_type<T>(), _dest[i], 0, _comm0.comm(),
 
  232                  &requests[i + _src.size()]);
 
  237      throw std::runtime_error(
"Scatter::type not recognized");
 
 
  341                   std::span<T> remote_data)
 const 
  343    std::vector<MPI_Request> requests(1, MPI_REQUEST_NULL);
 
  346    auto pack_fn = [](
auto&& in, 
auto&& idx, 
auto&& out)
 
  348      for (std::size_t i = 0; i < idx.size(); ++i)
 
  352                      std::span<T>(remote_buffer), pack_fn,
 
  353                      std::span<MPI_Request>(requests));
 
  355    auto unpack_fn = [](
auto&& in, 
auto&& idx, 
auto&& out, 
auto op)
 
  357      for (std::size_t i = 0; i < idx.size(); ++i)
 
  358        out[idx[i]] = op(out[idx[i]], in[i]);
 
  361    scatter_fwd_end(std::span<const T>(remote_buffer), remote_data, unpack_fn,
 
  362                    std::span<MPI_Request>(requests));
 
 
  393                         std::span<T> recv_buffer,
 
  394                         std::span<MPI_Request> requests,
 
  398    if (_sizes_local.empty() and _sizes_remote.empty())
 
  407      assert(requests.size() == 1);
 
  408      MPI_Ineighbor_alltoallv(send_buffer.data(), _sizes_remote.data(),
 
  409                              _displs_remote.data(), MPI::mpi_type<T>(),
 
  410                              recv_buffer.data(), _sizes_local.data(),
 
  411                              _displs_local.data(), MPI::mpi_type<T>(),
 
  412                              _comm1.comm(), &requests[0]);
 
  417      assert(requests.size() == _dest.size() + _src.size());
 
  419      for (std::size_t i = 0; i < _dest.size(); i++)
 
  421        MPI_Irecv(recv_buffer.data() + _displs_local[i], _sizes_local[i],
 
  422                  dolfinx::MPI::mpi_type<T>(), _dest[i], MPI_ANY_TAG,
 
  423                  _comm0.comm(), &requests[i]);
 
  428      for (std::size_t i = 0; i < _src.size(); i++)
 
  430        MPI_Isend(send_buffer.data() + _displs_remote[i], _sizes_remote[i],
 
  431                  dolfinx::MPI::mpi_type<T>(), _src[i], 0, _comm0.comm(),
 
  432                  &requests[i + _dest.size()]);
 
  437      throw std::runtime_error(
"Scatter::type not recognized");
 
 
  538  void scatter_rev(std::span<T> local_data, std::span<const T> remote_data,
 
  543    auto pack_fn = [](
auto&& in, 
auto&& idx, 
auto&& out)
 
  545      for (std::size_t i = 0; i < idx.size(); ++i)
 
  548    auto unpack_fn = [](
auto&& in, 
auto&& idx, 
auto&& out, 
auto op)
 
  550      for (std::size_t i = 0; i < idx.size(); ++i)
 
  551        out[idx[i]] = op(out[idx[i]], in[i]);
 
  553    std::vector<MPI_Request> request(1, MPI_REQUEST_NULL);
 
  555                      std::span<T>(local_buffer), pack_fn,
 
  556                      std::span<MPI_Request>(request));
 
  557    scatter_rev_end(std::span<const T>(local_buffer), local_data, unpack_fn, op,
 
  558                    std::span<MPI_Request>(request));