316 std::array<std::int64_t, 2> shape,
317 std::int64_t rank_offset)
319 assert(rank_offset >= 0 or x.empty());
320 using T =
typename std::remove_reference_t<typename U::value_type>;
324 assert(x.size() % shape[1] == 0);
325 const std::int32_t shape0_local = x.size() / shape[1];
327 spdlog::debug(
"Sending data to post offices (distribute_to_postoffice)");
330 std::vector<int> row_to_dest(shape0_local);
331 for (std::int32_t i = 0; i < shape0_local; ++i)
334 row_to_dest[i] = dest;
339 std::vector<std::array<std::int32_t, 2>> dest_to_index;
340 dest_to_index.reserve(shape0_local);
341 for (std::int32_t i = 0; i < shape0_local; ++i)
343 std::size_t idx = i + rank_offset;
345 dest_to_index.push_back({dest, i});
347 std::ranges::sort(dest_to_index);
351 std::vector<int> dest;
352 std::vector<std::int32_t> num_items_per_dest,
353 pos_to_neigh_rank(shape0_local, -1);
355 auto it = dest_to_index.begin();
356 while (it != dest_to_index.end())
358 const int neigh_rank = dest.size();
361 dest.push_back((*it)[0]);
365 = std::find_if(it, dest_to_index.end(),
366 [r = dest.back()](
auto& idx) { return idx[0] != r; });
369 num_items_per_dest.push_back(std::distance(it, it1));
372 for (
auto e = it; e != it1; ++e)
373 pos_to_neigh_rank[(*e)[1]] = neigh_rank;
383 "Number of neighbourhood source ranks in distribute_to_postoffice: {}",
384 static_cast<int>(src.size()));
388 int err = MPI_Dist_graph_create_adjacent(
389 comm, src.size(), src.data(), MPI_UNWEIGHTED, dest.size(), dest.data(),
390 MPI_UNWEIGHTED, MPI_INFO_NULL,
false, &neigh_comm);
394 std::vector<std::int32_t> send_disp = {0};
395 std::partial_sum(num_items_per_dest.begin(), num_items_per_dest.end(),
396 std::back_inserter(send_disp));
399 std::vector<T> send_buffer_data(shape[1] * send_disp.back());
400 std::vector<std::int64_t> send_buffer_index(send_disp.back());
402 std::vector<std::int32_t> send_offsets = send_disp;
403 for (std::int32_t i = 0; i < shape0_local; ++i)
405 if (
int neigh_dest = pos_to_neigh_rank[i]; neigh_dest != -1)
407 std::size_t pos = send_offsets[neigh_dest];
408 send_buffer_index[pos] = i + rank_offset;
409 std::copy_n(std::next(x.begin(), i * shape[1]), shape[1],
410 std::next(send_buffer_data.begin(), shape[1] * pos));
411 ++send_offsets[neigh_dest];
418 std::vector<int> num_items_recv(src.size());
419 num_items_per_dest.reserve(1);
420 num_items_recv.reserve(1);
421 err = MPI_Neighbor_alltoall(num_items_per_dest.data(), 1, MPI_INT,
422 num_items_recv.data(), 1, MPI_INT, neigh_comm);
426 std::vector<std::int32_t> recv_disp(num_items_recv.size() + 1, 0);
427 std::partial_sum(num_items_recv.begin(), num_items_recv.end(),
428 std::next(recv_disp.begin()));
431 std::vector<std::int64_t> recv_buffer_index(recv_disp.back());
432 err = MPI_Neighbor_alltoallv(
433 send_buffer_index.data(), num_items_per_dest.data(), send_disp.data(),
434 MPI_INT64_T, recv_buffer_index.data(), num_items_recv.data(),
435 recv_disp.data(), MPI_INT64_T, neigh_comm);
439 MPI_Datatype compound_type;
441 MPI_Type_commit(&compound_type);
442 std::vector<T> recv_buffer_data(shape[1] * recv_disp.back());
443 err = MPI_Neighbor_alltoallv(
444 send_buffer_data.data(), num_items_per_dest.data(), send_disp.data(),
445 compound_type, recv_buffer_data.data(), num_items_recv.data(),
446 recv_disp.data(), compound_type, neigh_comm);
448 err = MPI_Type_free(&compound_type);
450 err = MPI_Comm_free(&neigh_comm);
453 spdlog::debug(
"Completed send data to post offices.");
457 std::vector<std::int32_t> index_local(recv_buffer_index.size());
458 std::ranges::transform(recv_buffer_index, index_local.begin(),
459 [r0](
auto idx) { return idx - r0; });
461 return {index_local, recv_buffer_data};
467 const U& x, std::array<std::int64_t, 2> shape,
468 std::int64_t rank_offset)
470 assert(rank_offset >= 0 or x.empty());
471 using T =
typename std::remove_reference_t<typename U::value_type>;
474 assert(shape[1] > 0);
478 assert(x.size() % shape[1] == 0);
479 const std::int64_t shape0_local = x.size() / shape[1];
486 comm, x, {shape[0], shape[1]}, rank_offset);
487 assert(post_indices.size() == post_x.size() / shape[1]);
493 std::vector<std::tuple<int, std::int64_t, std::int32_t>> src_to_index;
494 for (std::size_t i = 0; i < indices.size(); ++i)
496 std::size_t idx = indices[i];
498 src_to_index.push_back({src, idx, i});
500 std::ranges::sort(src_to_index);
504 std::vector<std::int32_t> num_items_per_src;
505 std::vector<int> src;
507 auto it = src_to_index.begin();
508 while (it != src_to_index.end())
510 src.push_back(std::get<0>(*it));
512 = std::find_if(it, src_to_index.end(), [r = src.back()](
auto& idx)
513 { return std::get<0>(idx) != r; });
514 num_items_per_src.push_back(std::distance(it, it1));
521 const std::vector<int> dest
524 "Neighbourhood destination ranks from post office in "
525 "distribute_data (rank, num dests, num dests/mpi_size): {}, {}, {}",
526 rank,
static_cast<int>(dest.size()),
527 static_cast<double>(dest.size()) /
size);
531 MPI_Comm neigh_comm0;
532 int err = MPI_Dist_graph_create_adjacent(
533 comm, dest.size(), dest.data(), MPI_UNWEIGHTED, src.size(), src.data(),
534 MPI_UNWEIGHTED, MPI_INFO_NULL,
false, &neigh_comm0);
538 std::vector<int> num_items_recv(dest.size());
539 num_items_per_src.reserve(1);
540 num_items_recv.reserve(1);
541 err = MPI_Neighbor_alltoall(num_items_per_src.data(), 1, MPI_INT,
542 num_items_recv.data(), 1, MPI_INT, neigh_comm0);
546 std::vector<std::int32_t> send_disp = {0};
547 std::partial_sum(num_items_per_src.begin(), num_items_per_src.end(),
548 std::back_inserter(send_disp));
549 std::vector<std::int32_t> recv_disp = {0};
550 std::partial_sum(num_items_recv.begin(), num_items_recv.end(),
551 std::back_inserter(recv_disp));
555 assert(send_disp.back() == (
int)src_to_index.size());
556 std::vector<std::int64_t> send_buffer_index(src_to_index.size());
557 std::ranges::transform(src_to_index, send_buffer_index.begin(),
558 [](
auto x) { return std::get<1>(x); });
561 std::vector<std::int64_t> recv_buffer_index(recv_disp.back());
562 err = MPI_Neighbor_alltoallv(
563 send_buffer_index.data(), num_items_per_src.data(), send_disp.data(),
564 MPI_INT64_T, recv_buffer_index.data(), num_items_recv.data(),
565 recv_disp.data(), MPI_INT64_T, neigh_comm0);
568 err = MPI_Comm_free(&neigh_comm0);
577 const std::array<std::int64_t, 2> postoffice_range
579 std::vector<std::int32_t> post_indices_map(
580 postoffice_range[1] - postoffice_range[0], -1);
581 for (std::size_t i = 0; i < post_indices.size(); ++i)
583 assert(post_indices[i] < (
int)post_indices_map.size());
584 post_indices_map[post_indices[i]] = i;
588 std::vector<T> send_buffer_data(shape[1] * recv_disp.back());
589 for (std::size_t p = 0; p < recv_disp.size() - 1; ++p)
591 int offset = recv_disp[p];
592 for (std::int32_t i = recv_disp[p]; i < recv_disp[p + 1]; ++i)
594 std::int64_t index = recv_buffer_index[i];
595 if (index >= rank_offset and index < (rank_offset + shape0_local))
598 std::int32_t local_index = index - rank_offset;
599 std::copy_n(std::next(x.begin(), shape[1] * local_index), shape[1],
600 std::next(send_buffer_data.begin(), shape[1] * offset));
605 auto local_index = index - postoffice_range[0];
606 std::int32_t pos = post_indices_map[local_index];
608 std::copy_n(std::next(post_x.begin(), shape[1] * pos), shape[1],
609 std::next(send_buffer_data.begin(), shape[1] * offset));
616 err = MPI_Dist_graph_create_adjacent(
617 comm, src.size(), src.data(), MPI_UNWEIGHTED, dest.size(), dest.data(),
618 MPI_UNWEIGHTED, MPI_INFO_NULL,
false, &neigh_comm0);
621 MPI_Datatype compound_type0;
623 MPI_Type_commit(&compound_type0);
625 std::vector<T> recv_buffer_data(shape[1] * send_disp.back());
626 err = MPI_Neighbor_alltoallv(
627 send_buffer_data.data(), num_items_recv.data(), recv_disp.data(),
628 compound_type0, recv_buffer_data.data(), num_items_per_src.data(),
629 send_disp.data(), compound_type0, neigh_comm0);
632 err = MPI_Type_free(&compound_type0);
634 err = MPI_Comm_free(&neigh_comm0);
637 std::vector<std::int32_t> index_pos_to_buffer(indices.size(), -1);
638 for (std::size_t i = 0; i < src_to_index.size(); ++i)
639 index_pos_to_buffer[std::get<2>(src_to_index[i])] = i;
642 std::vector<T> x_new(shape[1] * indices.size());
643 for (std::size_t i = 0; i < indices.size(); ++i)
645 const std::int64_t index = indices[i];
646 if (index >= rank_offset and index < (rank_offset + shape0_local))
649 auto local_index = index - rank_offset;
650 std::copy_n(std::next(x.begin(), shape[1] * local_index), shape[1],
651 std::next(x_new.begin(), shape[1] * i));
659 auto local_index = index - postoffice_range[0];
660 std::int32_t pos = post_indices_map[local_index];
662 std::copy_n(std::next(post_x.begin(), shape[1] * pos), shape[1],
663 std::next(x_new.begin(), shape[1] * i));
668 std::int32_t pos = index_pos_to_buffer[i];
670 std::copy_n(std::next(recv_buffer_data.begin(), shape[1] * pos),
671 shape[1], std::next(x_new.begin(), shape[1] * i));