13 #include <dolfinx/common/Timer.h>
14 #include <dolfinx/common/log.h>
15 #include <dolfinx/graph/AdjacencyList.h>
18 #include <type_traits>
21 #include <xtl/xspan.hpp>
23 #define MPICH_IGNORE_CXX_SEEK 1
43 explicit Comm(MPI_Comm
comm,
bool duplicate =
true);
61 MPI_Comm
comm()
const noexcept;
69 int rank(MPI_Comm comm);
73 int size(MPI_Comm comm);
90 const std::int64_t n = N /
size;
91 const std::int64_t r = N %
size;
95 return {
rank * (n + 1),
rank * (n + 1) + n + 1};
97 return {
rank * n + r,
rank * n + r + n};
111 const std::size_t n = N /
size;
112 const std::size_t r = N %
size;
114 if (index < r * (n + 1))
117 return index / (n + 1);
122 return r + (index - r * (n + 1)) / n;
130 std::array<std::vector<int>, 2>
neighbors(MPI_Comm comm);
156 const xtl::span<const int>& edges);
183 const xtl::span<const int>& edges);
204 template <
typename T>
205 std::pair<std::vector<std::int32_t>, std::vector<T>>
207 std::array<std::int64_t, 2> shape,
208 std::int64_t rank_offset);
231 template <
typename T>
233 MPI_Comm comm,
const xtl::span<const std::int64_t>& indices,
234 const xtl::span<const T>& x, std::array<std::int64_t, 2> shape,
235 std::int64_t rank_offset);
260 template <
typename T>
262 const xtl::span<const std::int64_t>& indices,
263 const xtl::span<const T>& x,
int shape1);
273 template <
typename T>
277 template <
typename T>
283 template <
typename T>
286 if constexpr (std::is_same<T, float>::value)
288 else if constexpr (std::is_same<T, double>::value)
290 else if constexpr (std::is_same<T, std::complex<double>>::value)
291 return MPI_C_DOUBLE_COMPLEX;
292 else if constexpr (std::is_same<T, std::complex<float>>::value)
293 return MPI_C_FLOAT_COMPLEX;
294 else if constexpr (std::is_same<T, short int>::value)
296 else if constexpr (std::is_same<T, int>::value)
298 else if constexpr (std::is_same<T, unsigned int>::value)
300 else if constexpr (std::is_same<T, long int>::value)
302 else if constexpr (std::is_same<T, unsigned long>::value)
303 return MPI_UNSIGNED_LONG;
304 else if constexpr (std::is_same<T, long long>::value)
305 return MPI_LONG_LONG;
306 else if constexpr (std::is_same<T, unsigned long long>::value)
307 return MPI_UNSIGNED_LONG_LONG;
308 else if constexpr (std::is_same<T, bool>::value)
310 else if constexpr (std::is_same<T, std::int8_t>::value)
314 static_assert(!std::is_same<T, T>::value);
318 template <
typename T>
319 std::pair<std::vector<std::int32_t>, std::vector<T>>
321 std::array<std::int64_t, 2> shape,
322 std::int64_t rank_offset)
326 assert(x.size() % shape[1] == 0);
327 const std::int32_t shape0_local = x.size() / shape[1];
329 LOG(2) <<
"Sending data to post offices (distribute_to_postoffice)";
332 std::vector<int> row_to_dest(shape0_local);
333 for (std::int32_t i = 0; i < shape0_local; ++i)
336 row_to_dest[i] = dest;
341 std::vector<std::array<std::int32_t, 2>> dest_to_index;
342 dest_to_index.reserve(shape0_local);
343 for (std::int32_t i = 0; i < shape0_local; ++i)
345 std::size_t idx = i + rank_offset;
347 dest_to_index.push_back({dest, i});
349 std::sort(dest_to_index.begin(), dest_to_index.end());
353 std::vector<int> dest;
354 std::vector<std::int32_t> num_items_per_dest,
355 pos_to_neigh_rank(shape0_local, -1);
357 auto it = dest_to_index.begin();
358 while (it != dest_to_index.end())
360 const int neigh_rank = dest.size();
363 dest.push_back((*it)[0]);
367 = std::find_if(it, dest_to_index.end(),
368 [r = dest.back()](
auto& idx) { return idx[0] != r; });
371 num_items_per_dest.push_back(std::distance(it, it1));
374 for (
auto e = it; e != it1; ++e)
375 pos_to_neigh_rank[(*e)[1]] = neigh_rank;
385 <<
"Number of neighbourhood source ranks in distribute_to_postoffice: "
390 MPI_Dist_graph_create_adjacent(comm, src.size(), src.data(), MPI_UNWEIGHTED,
391 dest.size(), dest.data(), MPI_UNWEIGHTED,
392 MPI_INFO_NULL,
false, &neigh_comm);
395 std::vector<std::int32_t> send_disp = {0};
396 std::partial_sum(num_items_per_dest.begin(), num_items_per_dest.end(),
397 std::back_insert_iterator(send_disp));
400 std::vector<T> send_buffer_data(shape[1] * send_disp.back());
401 std::vector<std::int64_t> send_buffer_index(send_disp.back());
403 std::vector<std::int32_t> send_offsets = send_disp;
404 for (std::int32_t i = 0; i < shape0_local; ++i)
406 if (
int neigh_dest = pos_to_neigh_rank[i]; neigh_dest != -1)
408 std::size_t pos = send_offsets[neigh_dest];
409 send_buffer_index[pos] = i + rank_offset;
410 std::copy_n(std::next(x.begin(), i * shape[1]), shape[1],
411 std::next(send_buffer_data.begin(), shape[1] * pos));
412 ++send_offsets[neigh_dest];
419 std::vector<int> num_items_recv(src.size());
420 num_items_per_dest.reserve(1);
421 num_items_recv.reserve(1);
422 MPI_Neighbor_alltoall(num_items_per_dest.data(), 1, MPI_INT,
423 num_items_recv.data(), 1, MPI_INT, neigh_comm);
426 std::vector<std::int32_t> recv_disp(num_items_recv.size() + 1, 0);
427 std::partial_sum(num_items_recv.begin(), num_items_recv.end(),
428 std::next(recv_disp.begin()));
431 std::vector<std::int64_t> recv_buffer_index(recv_disp.back());
432 MPI_Neighbor_alltoallv(send_buffer_index.data(), num_items_per_dest.data(),
433 send_disp.data(), MPI_INT64_T,
434 recv_buffer_index.data(), num_items_recv.data(),
435 recv_disp.data(), MPI_INT64_T, neigh_comm);
438 MPI_Datatype compound_type;
439 MPI_Type_contiguous(shape[1], dolfinx::MPI::mpi_type<T>(), &compound_type);
440 MPI_Type_commit(&compound_type);
441 std::vector<T> recv_buffer_data(shape[1] * recv_disp.back());
442 MPI_Neighbor_alltoallv(send_buffer_data.data(), num_items_per_dest.data(),
443 send_disp.data(), compound_type,
444 recv_buffer_data.data(), num_items_recv.data(),
445 recv_disp.data(), compound_type, neigh_comm);
446 MPI_Type_free(&compound_type);
447 MPI_Comm_free(&neigh_comm);
449 LOG(2) <<
"Completed send data to post offices.";
453 std::vector<std::int32_t> index_local(recv_buffer_index.size());
454 std::transform(recv_buffer_index.cbegin(), recv_buffer_index.cend(),
455 index_local.begin(), [r0](
auto idx) { return idx - r0; });
457 return {index_local, recv_buffer_data};
460 template <
typename T>
462 MPI_Comm comm,
const xtl::span<const std::int64_t>& indices,
463 const xtl::span<const T>& x, std::array<std::int64_t, 2> shape,
464 std::int64_t rank_offset)
467 assert(shape[1] > 0);
471 assert(x.size() % shape[1] == 0);
472 const std::int64_t shape0_local = x.size() / shape[1];
479 comm, x, {shape[0], shape[1]}, rank_offset);
480 assert(post_indices.size() == post_x.size() / shape[1]);
486 std::vector<std::tuple<int, std::int64_t, std::int32_t>> src_to_index;
487 for (std::size_t i = 0; i < indices.size(); ++i)
489 std::size_t idx = indices[i];
491 src_to_index.push_back({src, idx, i});
493 std::sort(src_to_index.begin(), src_to_index.end());
497 std::vector<std::int32_t> num_items_per_src;
498 std::vector<int> src;
500 auto it = src_to_index.begin();
501 while (it != src_to_index.end())
503 src.push_back(std::get<0>(*it));
504 auto it1 = std::find_if(it, src_to_index.end(),
505 [r = src.back()](
auto& idx)
506 { return std::get<0>(idx) != r; });
507 num_items_per_src.push_back(std::distance(it, it1));
514 const std::vector<int> dest
516 LOG(INFO) <<
"Neighbourhood destination ranks from post office in "
517 "distribute_data (rank, num dests, num dests/mpi_size): "
518 <<
rank <<
", " << dest.size() <<
", "
519 <<
static_cast<double>(dest.size()) /
size;
523 MPI_Comm neigh_comm0;
524 MPI_Dist_graph_create_adjacent(comm, dest.size(), dest.data(), MPI_UNWEIGHTED,
525 src.size(), src.data(), MPI_UNWEIGHTED,
526 MPI_INFO_NULL,
false, &neigh_comm0);
529 std::vector<int> num_items_recv(dest.size());
530 num_items_per_src.reserve(1);
531 num_items_recv.reserve(1);
532 MPI_Neighbor_alltoall(num_items_per_src.data(), 1, MPI_INT,
533 num_items_recv.data(), 1, MPI_INT, neigh_comm0);
536 std::vector<std::int32_t> send_disp = {0};
537 std::partial_sum(num_items_per_src.begin(), num_items_per_src.end(),
538 std::back_insert_iterator(send_disp));
539 std::vector<std::int32_t> recv_disp = {0};
540 std::partial_sum(num_items_recv.begin(), num_items_recv.end(),
541 std::back_insert_iterator(recv_disp));
545 assert(send_disp.back() == (
int)src_to_index.size());
546 std::vector<std::int64_t> send_buffer_index(src_to_index.size());
547 std::transform(src_to_index.cbegin(), src_to_index.cend(),
548 send_buffer_index.begin(),
549 [](
auto& x) { return std::get<1>(x); });
552 std::vector<std::int64_t> recv_buffer_index(recv_disp.back());
553 MPI_Neighbor_alltoallv(send_buffer_index.data(), num_items_per_src.data(),
554 send_disp.data(), MPI_INT64_T,
555 recv_buffer_index.data(), num_items_recv.data(),
556 recv_disp.data(), MPI_INT64_T, neigh_comm0);
558 MPI_Comm_free(&neigh_comm0);
566 const std::array<std::int64_t, 2> postoffice_range
568 std::vector<std::int32_t> post_indices_map(
569 postoffice_range[1] - postoffice_range[0], -1);
570 for (std::size_t i = 0; i < post_indices.size(); ++i)
572 assert(post_indices[i] < (
int)post_indices_map.size());
573 post_indices_map[post_indices[i]] = i;
577 std::vector<T> send_buffer_data(shape[1] * recv_disp.back());
578 for (std::size_t p = 0; p < recv_disp.size() - 1; ++p)
580 int offset = recv_disp[p];
581 for (std::int32_t i = recv_disp[p]; i < recv_disp[p + 1]; ++i)
583 std::int64_t index = recv_buffer_index[i];
584 if (index >= rank_offset and index < (rank_offset + shape0_local))
587 std::int32_t local_index = index - rank_offset;
588 std::copy_n(std::next(x.begin(), shape[1] * local_index), shape[1],
589 std::next(send_buffer_data.begin(), shape[1] * offset));
594 auto local_index = index - postoffice_range[0];
595 std::int32_t pos = post_indices_map[local_index];
597 std::copy_n(std::next(post_x.begin(), shape[1] * pos), shape[1],
598 std::next(send_buffer_data.begin(), shape[1] * offset));
605 MPI_Dist_graph_create_adjacent(comm, src.size(), src.data(), MPI_UNWEIGHTED,
606 dest.size(), dest.data(), MPI_UNWEIGHTED,
607 MPI_INFO_NULL,
false, &neigh_comm0);
609 MPI_Datatype compound_type0;
610 MPI_Type_contiguous(shape[1], dolfinx::MPI::mpi_type<T>(), &compound_type0);
611 MPI_Type_commit(&compound_type0);
613 std::vector<T> recv_buffer_data(shape[1] * send_disp.back());
614 MPI_Neighbor_alltoallv(send_buffer_data.data(), num_items_recv.data(),
615 recv_disp.data(), compound_type0,
616 recv_buffer_data.data(), num_items_per_src.data(),
617 send_disp.data(), compound_type0, neigh_comm0);
619 MPI_Type_free(&compound_type0);
620 MPI_Comm_free(&neigh_comm0);
622 std::vector<std::int32_t> index_pos_to_buffer(indices.size(), -1);
623 for (std::size_t i = 0; i < src_to_index.size(); ++i)
624 index_pos_to_buffer[std::get<2>(src_to_index[i])] = i;
627 std::vector<T> x_new(shape[1] * indices.size());
628 for (std::size_t i = 0; i < indices.size(); ++i)
630 const std::int64_t index = indices[i];
631 if (index >= rank_offset and index < (rank_offset + shape0_local))
634 auto local_index = index - rank_offset;
635 std::copy_n(std::next(x.begin(), shape[1] * local_index), shape[1],
636 std::next(x_new.begin(), shape[1] * i));
643 auto local_index = index - postoffice_range[0];
644 std::int32_t pos = post_indices_map[local_index];
646 std::copy_n(std::next(post_x.begin(), shape[1] * pos), shape[1],
647 std::next(x_new.begin(), shape[1] * i));
652 std::int32_t pos = index_pos_to_buffer[i];
654 std::copy_n(std::next(recv_buffer_data.begin(), shape[1] * pos),
655 shape[1], std::next(x_new.begin(), shape[1] * i));
663 template <
typename T>
665 const xtl::span<const std::int64_t>& indices,
666 const xtl::span<const T>& x,
int shape1)
669 assert(x.size() % shape1 == 0);
670 const std::int64_t shape0_local = x.size() / shape1;
672 std::int64_t shape0(0), rank_offset(0);
673 MPI_Allreduce(&shape0_local, &shape0, 1, MPI_INT64_T, MPI_SUM, comm);
674 MPI_Exscan(&shape0_local, &rank_offset, 1, MPI_INT64_T, MPI_SUM, comm);
680 template <
typename T>
685 int indegree(-1), outdegree(-2), weighted(-1);
686 MPI_Dist_graph_neighbors_count(comm, &indegree, &outdegree, &weighted);
690 std::vector<int> send_sizes(outdegree, 0);
691 std::vector<int> recv_sizes(indegree);
692 std::adjacent_difference(std::next(send_data.
offsets().begin()),
693 send_data.
offsets().end(), send_sizes.begin());
695 send_sizes.reserve(1);
696 recv_sizes.reserve(1);
697 MPI_Neighbor_alltoall(send_sizes.data(), 1, MPI_INT, recv_sizes.data(), 1,
701 std::vector<int> recv_offsets(indegree + 1);
703 std::partial_sum(recv_sizes.begin(), recv_sizes.end(),
704 std::next(recv_offsets.begin(), 1));
706 std::vector<T> recv_data(recv_offsets[recv_offsets.size() - 1]);
707 MPI_Neighbor_alltoallv(
708 send_data.
array().data(), send_sizes.data(), send_data.
offsets().data(),
709 dolfinx::MPI::mpi_type<T>(), recv_data.data(), recv_sizes.data(),
710 recv_offsets.data(), dolfinx::MPI::mpi_type<T>(), comm);
A duplicate MPI communicator and manage lifetime of the communicator.
Definition: MPI.h:40
Comm(MPI_Comm comm, bool duplicate=true)
Duplicate communicator and wrap duplicate.
Definition: MPI.cpp:12
~Comm()
Destructor (frees wrapped communicator)
Definition: MPI.cpp:39
MPI_Comm comm() const noexcept
Return the underlying MPI_Comm object.
Definition: MPI.cpp:73
A timer can be used for timing tasks. The basic usage is.
Definition: Timer.h:31
This class provides a static adjacency list data structure. It is commonly used to store directed gra...
Definition: AdjacencyList.h:46
const std::vector< T > & array() const
Return contiguous array of links for all nodes (const version)
Definition: AdjacencyList.h:148
const std::vector< std::int32_t > & offsets() const
Offset for each node in array() (const version)
Definition: AdjacencyList.h:154
MPI support functionality.
Definition: MPI.h:28
std::vector< T > distribute_data(MPI_Comm comm, const xtl::span< const std::int64_t > &indices, const xtl::span< const T > &x, int shape1)
Distribute rows of a rectangular data array to ranks where they are required (scalable version).
Definition: MPI.h:664
std::vector< int > compute_graph_edges_pcx(MPI_Comm comm, const xtl::span< const int > &edges)
Determine incoming graph edges using the PCX consensus algorithm.
Definition: MPI.cpp:91
std::vector< T > distribute_from_postoffice(MPI_Comm comm, const xtl::span< const std::int64_t > &indices, const xtl::span< const T > &x, std::array< std::int64_t, 2 > shape, std::int64_t rank_offset)
Distribute rows of a rectangular data array from post office ranks to ranks where they are required.
Definition: MPI.h:461
std::array< std::vector< int >, 2 > neighbors(MPI_Comm comm)
Return list of neighbors ranks (sources and destinations) for a neighborhood communicator.
Definition: MPI.cpp:224
constexpr int index_owner(int size, std::size_t index, std::size_t N)
Return which rank owns index in global range [0, N - 1] (inverse of MPI::local_range).
Definition: MPI.h:106
int size(MPI_Comm comm)
Return size of the group (number of processes) associated with the communicator.
Definition: MPI.cpp:83
int rank(MPI_Comm comm)
Return process rank for the communicator.
Definition: MPI.cpp:75
std::vector< int > compute_graph_edges_nbx(MPI_Comm comm, const xtl::span< const int > &edges)
Determine incoming graph edges using the NBX consensus algorithm.
Definition: MPI.cpp:151
std::pair< std::vector< std::int32_t >, std::vector< T > > distribute_to_postoffice(MPI_Comm comm, const xtl::span< const T > &x, std::array< std::int64_t, 2 > shape, std::int64_t rank_offset)
Distribute row data to 'post office' ranks.
Definition: MPI.h:320
graph::AdjacencyList< T > neighbor_all_to_all(MPI_Comm comm, const graph::AdjacencyList< T > &send_data)
Send in_values[n0] to neighbor process n0 and receive values from neighbor process n1 in out_values[n...
Definition: MPI.h:682
constexpr MPI_Datatype mpi_type()
MPI Type.
Definition: MPI.h:284
constexpr std::array< std::int64_t, 2 > local_range(int rank, std::int64_t N, int size)
Return local range for the calling process, partitioning the global [0, N - 1] range across all ranks...
Definition: MPI.h:82
tag
MPI communication tags.
Definition: MPI.h:32