314 std::array<std::int64_t, 2> shape,
315 std::int64_t rank_offset)
317 using T =
typename std::remove_reference_t<typename U::value_type>;
321 assert(x.size() % shape[1] == 0);
322 const std::int32_t shape0_local = x.size() / shape[1];
324 LOG(2) <<
"Sending data to post offices (distribute_to_postoffice)";
327 std::vector<int> row_to_dest(shape0_local);
328 for (std::int32_t i = 0; i < shape0_local; ++i)
331 row_to_dest[i] = dest;
336 std::vector<std::array<std::int32_t, 2>> dest_to_index;
337 dest_to_index.reserve(shape0_local);
338 for (std::int32_t i = 0; i < shape0_local; ++i)
340 std::size_t idx = i + rank_offset;
342 dest_to_index.push_back({dest, i});
344 std::sort(dest_to_index.begin(), dest_to_index.end());
348 std::vector<int> dest;
349 std::vector<std::int32_t> num_items_per_dest,
350 pos_to_neigh_rank(shape0_local, -1);
352 auto it = dest_to_index.begin();
353 while (it != dest_to_index.end())
355 const int neigh_rank = dest.size();
358 dest.push_back((*it)[0]);
362 = std::find_if(it, dest_to_index.end(),
363 [r = dest.back()](
auto& idx) { return idx[0] != r; });
366 num_items_per_dest.push_back(std::distance(it, it1));
369 for (
auto e = it; e != it1; ++e)
370 pos_to_neigh_rank[(*e)[1]] = neigh_rank;
380 <<
"Number of neighbourhood source ranks in distribute_to_postoffice: "
385 int err = MPI_Dist_graph_create_adjacent(
386 comm, src.size(), src.data(), MPI_UNWEIGHTED, dest.size(), dest.data(),
387 MPI_UNWEIGHTED, MPI_INFO_NULL,
false, &neigh_comm);
391 std::vector<std::int32_t> send_disp = {0};
392 std::partial_sum(num_items_per_dest.begin(), num_items_per_dest.end(),
393 std::back_inserter(send_disp));
396 std::vector<T> send_buffer_data(shape[1] * send_disp.back());
397 std::vector<std::int64_t> send_buffer_index(send_disp.back());
399 std::vector<std::int32_t> send_offsets = send_disp;
400 for (std::int32_t i = 0; i < shape0_local; ++i)
402 if (
int neigh_dest = pos_to_neigh_rank[i]; neigh_dest != -1)
404 std::size_t pos = send_offsets[neigh_dest];
405 send_buffer_index[pos] = i + rank_offset;
406 std::copy_n(std::next(x.begin(), i * shape[1]), shape[1],
407 std::next(send_buffer_data.begin(), shape[1] * pos));
408 ++send_offsets[neigh_dest];
415 std::vector<int> num_items_recv(src.size());
416 num_items_per_dest.reserve(1);
417 num_items_recv.reserve(1);
418 err = MPI_Neighbor_alltoall(num_items_per_dest.data(), 1, MPI_INT,
419 num_items_recv.data(), 1, MPI_INT, neigh_comm);
423 std::vector<std::int32_t> recv_disp(num_items_recv.size() + 1, 0);
424 std::partial_sum(num_items_recv.begin(), num_items_recv.end(),
425 std::next(recv_disp.begin()));
428 std::vector<std::int64_t> recv_buffer_index(recv_disp.back());
429 err = MPI_Neighbor_alltoallv(
430 send_buffer_index.data(), num_items_per_dest.data(), send_disp.data(),
431 MPI_INT64_T, recv_buffer_index.data(), num_items_recv.data(),
432 recv_disp.data(), MPI_INT64_T, neigh_comm);
436 MPI_Datatype compound_type;
437 MPI_Type_contiguous(shape[1], dolfinx::MPI::mpi_type<T>(), &compound_type);
438 MPI_Type_commit(&compound_type);
439 std::vector<T> recv_buffer_data(shape[1] * recv_disp.back());
440 err = MPI_Neighbor_alltoallv(
441 send_buffer_data.data(), num_items_per_dest.data(), send_disp.data(),
442 compound_type, recv_buffer_data.data(), num_items_recv.data(),
443 recv_disp.data(), compound_type, neigh_comm);
445 err = MPI_Type_free(&compound_type);
447 err = MPI_Comm_free(&neigh_comm);
450 LOG(2) <<
"Completed send data to post offices.";
454 std::vector<std::int32_t> index_local(recv_buffer_index.size());
455 std::transform(recv_buffer_index.cbegin(), recv_buffer_index.cend(),
456 index_local.begin(), [r0](
auto idx) { return idx - r0; });
458 return {index_local, recv_buffer_data};
464 const U& x, std::array<std::int64_t, 2> shape,
465 std::int64_t rank_offset)
467 using T =
typename std::remove_reference_t<typename U::value_type>;
470 assert(shape[1] > 0);
474 assert(x.size() % shape[1] == 0);
475 const std::int64_t shape0_local = x.size() / shape[1];
482 comm, x, {shape[0], shape[1]}, rank_offset);
483 assert(post_indices.size() == post_x.size() / shape[1]);
489 std::vector<std::tuple<int, std::int64_t, std::int32_t>> src_to_index;
490 for (std::size_t i = 0; i < indices.size(); ++i)
492 std::size_t idx = indices[i];
494 src_to_index.push_back({src, idx, i});
496 std::sort(src_to_index.begin(), src_to_index.end());
500 std::vector<std::int32_t> num_items_per_src;
501 std::vector<int> src;
503 auto it = src_to_index.begin();
504 while (it != src_to_index.end())
506 src.push_back(std::get<0>(*it));
507 auto it1 = std::find_if(it, src_to_index.end(),
508 [r = src.back()](
auto& idx)
509 { return std::get<0>(idx) != r; });
510 num_items_per_src.push_back(std::distance(it, it1));
517 const std::vector<int> dest
519 LOG(INFO) <<
"Neighbourhood destination ranks from post office in "
520 "distribute_data (rank, num dests, num dests/mpi_size): "
521 <<
rank <<
", " << dest.size() <<
", "
522 <<
static_cast<double>(dest.size()) /
size;
526 MPI_Comm neigh_comm0;
527 int err = MPI_Dist_graph_create_adjacent(
528 comm, dest.size(), dest.data(), MPI_UNWEIGHTED, src.size(), src.data(),
529 MPI_UNWEIGHTED, MPI_INFO_NULL,
false, &neigh_comm0);
533 std::vector<int> num_items_recv(dest.size());
534 num_items_per_src.reserve(1);
535 num_items_recv.reserve(1);
536 err = MPI_Neighbor_alltoall(num_items_per_src.data(), 1, MPI_INT,
537 num_items_recv.data(), 1, MPI_INT, neigh_comm0);
541 std::vector<std::int32_t> send_disp = {0};
542 std::partial_sum(num_items_per_src.begin(), num_items_per_src.end(),
543 std::back_inserter(send_disp));
544 std::vector<std::int32_t> recv_disp = {0};
545 std::partial_sum(num_items_recv.begin(), num_items_recv.end(),
546 std::back_inserter(recv_disp));
550 assert(send_disp.back() == (
int)src_to_index.size());
551 std::vector<std::int64_t> send_buffer_index(src_to_index.size());
552 std::transform(src_to_index.cbegin(), src_to_index.cend(),
553 send_buffer_index.begin(),
554 [](
auto& x) { return std::get<1>(x); });
557 std::vector<std::int64_t> recv_buffer_index(recv_disp.back());
558 err = MPI_Neighbor_alltoallv(
559 send_buffer_index.data(), num_items_per_src.data(), send_disp.data(),
560 MPI_INT64_T, recv_buffer_index.data(), num_items_recv.data(),
561 recv_disp.data(), MPI_INT64_T, neigh_comm0);
564 err = MPI_Comm_free(&neigh_comm0);
573 const std::array<std::int64_t, 2> postoffice_range
575 std::vector<std::int32_t> post_indices_map(
576 postoffice_range[1] - postoffice_range[0], -1);
577 for (std::size_t i = 0; i < post_indices.size(); ++i)
579 assert(post_indices[i] < (
int)post_indices_map.size());
580 post_indices_map[post_indices[i]] = i;
584 std::vector<T> send_buffer_data(shape[1] * recv_disp.back());
585 for (std::size_t p = 0; p < recv_disp.size() - 1; ++p)
587 int offset = recv_disp[p];
588 for (std::int32_t i = recv_disp[p]; i < recv_disp[p + 1]; ++i)
590 std::int64_t index = recv_buffer_index[i];
591 if (index >= rank_offset and index < (rank_offset + shape0_local))
594 std::int32_t local_index = index - rank_offset;
595 std::copy_n(std::next(x.begin(), shape[1] * local_index), shape[1],
596 std::next(send_buffer_data.begin(), shape[1] * offset));
601 auto local_index = index - postoffice_range[0];
602 std::int32_t pos = post_indices_map[local_index];
604 std::copy_n(std::next(post_x.begin(), shape[1] * pos), shape[1],
605 std::next(send_buffer_data.begin(), shape[1] * offset));
612 err = MPI_Dist_graph_create_adjacent(
613 comm, src.size(), src.data(), MPI_UNWEIGHTED, dest.size(), dest.data(),
614 MPI_UNWEIGHTED, MPI_INFO_NULL,
false, &neigh_comm0);
617 MPI_Datatype compound_type0;
618 MPI_Type_contiguous(shape[1], dolfinx::MPI::mpi_type<T>(), &compound_type0);
619 MPI_Type_commit(&compound_type0);
621 std::vector<T> recv_buffer_data(shape[1] * send_disp.back());
622 err = MPI_Neighbor_alltoallv(
623 send_buffer_data.data(), num_items_recv.data(), recv_disp.data(),
624 compound_type0, recv_buffer_data.data(), num_items_per_src.data(),
625 send_disp.data(), compound_type0, neigh_comm0);
628 err = MPI_Type_free(&compound_type0);
630 err = MPI_Comm_free(&neigh_comm0);
633 std::vector<std::int32_t> index_pos_to_buffer(indices.size(), -1);
634 for (std::size_t i = 0; i < src_to_index.size(); ++i)
635 index_pos_to_buffer[std::get<2>(src_to_index[i])] = i;
638 std::vector<T> x_new(shape[1] * indices.size());
639 for (std::size_t i = 0; i < indices.size(); ++i)
641 const std::int64_t index = indices[i];
642 if (index >= rank_offset and index < (rank_offset + shape0_local))
645 auto local_index = index - rank_offset;
646 std::copy_n(std::next(x.begin(), shape[1] * local_index), shape[1],
647 std::next(x_new.begin(), shape[1] * i));
654 auto local_index = index - postoffice_range[0];
655 std::int32_t pos = post_indices_map[local_index];
657 std::copy_n(std::next(post_x.begin(), shape[1] * pos), shape[1],
658 std::next(x_new.begin(), shape[1] * i));
663 std::int32_t pos = index_pos_to_buffer[i];
665 std::copy_n(std::next(recv_buffer_data.begin(), shape[1] * pos),
666 shape[1], std::next(x_new.begin(), shape[1] * i));