409 _block_mode(mode), _bs({p.block_size(0), p.block_size(1)}),
410 _data(p.num_nonzeros() * _bs[0] * _bs[1], 0),
411 _cols(p.graph().first.begin(), p.graph().first.end()),
412 _row_ptr(p.graph().second.begin(), p.graph().second.end()),
418 for (
int i = 0; i < 2; ++i)
420 const auto im = _index_maps[i];
421 const int size_local = im->size_local() * _bs[i];
422 const std::vector<std::int64_t>& ghost_i = im->ghosts();
423 std::vector<std::int64_t> ghosts;
424 const std::vector<int> ghost_owner_i = im->owners();
425 std::vector<int> src_rank;
426 for (std::size_t j = 0; j < ghost_i.size(); ++j)
428 for (
int k = 0; k < _bs[i]; ++k)
430 ghosts.push_back(ghost_i[j] * _bs[i] + k);
431 src_rank.push_back(ghost_owner_i[j]);
434 const std::array<std::vector<int>, 2> src_dest0
435 = {_index_maps[i]->src(), _index_maps[i]->dest()};
436 _index_maps[i] = std::make_shared<common::IndexMap>(
437 _index_maps[i]->comm(), size_local, src_dest0, ghosts, src_rank);
442 column_container_type new_cols;
443 new_cols.reserve(_data.size());
444 rowptr_container_type new_row_ptr = {0};
445 new_row_ptr.reserve(_row_ptr.size() * _bs[0]);
446 std::span<const std::int32_t> num_diag_nnz = p.off_diagonal_offsets();
448 for (std::size_t i = 0; i < _row_ptr.size() - 1; ++i)
451 for (
int q0 = 0; q0 < _bs[0]; ++q0)
453 _off_diagonal_offset.push_back(new_row_ptr.back()
454 + num_diag_nnz[i] * _bs[1]);
455 for (
auto j = _row_ptr[i]; j < _row_ptr[i + 1]; ++j)
457 for (
int q1 = 0; q1 < _bs[1]; ++q1)
458 new_cols.push_back(_cols[j] * _bs[1] + q1);
460 new_row_ptr.push_back(new_cols.size());
464 _row_ptr = new_row_ptr;
471 std::span<const std::int32_t> num_diag_nnz = p.off_diagonal_offsets();
472 _off_diagonal_offset.reserve(num_diag_nnz.size());
473 std::transform(num_diag_nnz.begin(), num_diag_nnz.end(), _row_ptr.begin(),
474 std::back_inserter(_off_diagonal_offset), std::plus{});
478 const std::array local_size
479 = {_index_maps[0]->size_local(), _index_maps[1]->size_local()};
481 = {_index_maps[0]->local_range(), _index_maps[1]->local_range()};
482 const std::vector<std::int64_t>& ghosts1 = _index_maps[1]->ghosts();
484 const std::vector<std::int64_t>& ghosts0 = _index_maps[0]->ghosts();
485 const std::vector<int>& src_ranks = _index_maps[0]->src();
486 const std::vector<int>& dest_ranks = _index_maps[0]->dest();
490 MPI_Dist_graph_create_adjacent(_index_maps[0]->comm(), dest_ranks.size(),
491 dest_ranks.data(), MPI_UNWEIGHTED,
492 src_ranks.size(), src_ranks.data(),
493 MPI_UNWEIGHTED, MPI_INFO_NULL,
false, &comm);
498 _ghost_row_to_rank.reserve(_index_maps[0]->owners().
size());
499 for (
int r : _index_maps[0]->owners())
501 auto it = std::lower_bound(src_ranks.begin(), src_ranks.end(), r);
502 assert(it != src_ranks.end() and *it == r);
503 int pos = std::distance(src_ranks.begin(), it);
504 _ghost_row_to_rank.push_back(pos);
508 std::vector<std::int32_t> data_per_proc(src_ranks.size(), 0);
509 for (std::size_t i = 0; i < _ghost_row_to_rank.size(); ++i)
511 assert(_ghost_row_to_rank[i] < data_per_proc.size());
512 std::size_t pos = local_size[0] + i;
513 data_per_proc[_ghost_row_to_rank[i]] += _row_ptr[pos + 1] - _row_ptr[pos];
517 _val_send_disp.resize(src_ranks.size() + 1, 0);
518 std::partial_sum(data_per_proc.begin(), data_per_proc.end(),
519 std::next(_val_send_disp.begin()));
522 std::vector<std::int64_t> ghost_index_data(2 * _val_send_disp.back());
524 std::vector<int> insert_pos = _val_send_disp;
525 for (std::size_t i = 0; i < _ghost_row_to_rank.size(); ++i)
527 const int rank = _ghost_row_to_rank[i];
528 int row_id = local_size[0] + i;
529 for (
int j = _row_ptr[row_id]; j < _row_ptr[row_id + 1]; ++j)
532 const std::int32_t idx_pos = 2 * insert_pos[
rank];
535 ghost_index_data[idx_pos] = ghosts0[i];
536 if (std::int32_t col_local = _cols[j]; col_local < local_size[1])
537 ghost_index_data[idx_pos + 1] = col_local + local_range[1][0];
539 ghost_index_data[idx_pos + 1] = ghosts1[col_local - local_size[1]];
541 insert_pos[
rank] += 1;
547 std::vector<std::int64_t> ghost_index_array;
548 std::vector<int> recv_disp;
550 std::vector<int> send_sizes;
551 std::transform(data_per_proc.begin(), data_per_proc.end(),
552 std::back_inserter(send_sizes),
553 [](
auto x) { return 2 * x; });
555 std::vector<int> recv_sizes(dest_ranks.size());
556 send_sizes.reserve(1);
557 recv_sizes.reserve(1);
558 MPI_Neighbor_alltoall(send_sizes.data(), 1, MPI_INT, recv_sizes.data(), 1,
559 MPI_INT, _comm.comm());
562 std::vector<int> send_disp = {0};
563 std::partial_sum(send_sizes.begin(), send_sizes.end(),
564 std::back_inserter(send_disp));
566 std::partial_sum(recv_sizes.begin(), recv_sizes.end(),
567 std::back_inserter(recv_disp));
569 ghost_index_array.resize(recv_disp.back());
570 MPI_Neighbor_alltoallv(ghost_index_data.data(), send_sizes.data(),
571 send_disp.data(), MPI_INT64_T,
572 ghost_index_array.data(), recv_sizes.data(),
573 recv_disp.data(), MPI_INT64_T, _comm.comm());
578 _val_recv_disp.resize(recv_disp.size());
579 const int bs2 = _bs[0] * _bs[1];
580 std::transform(recv_disp.begin(), recv_disp.end(), _val_recv_disp.begin(),
581 [&bs2](
auto d) { return bs2 * d / 2; });
582 std::transform(_val_send_disp.begin(), _val_send_disp.end(),
583 _val_send_disp.begin(), [&bs2](
auto d) { return d * bs2; });
586 std::vector<std::pair<std::int64_t, std::int32_t>> global_to_local;
587 global_to_local.reserve(ghosts1.size());
588 for (std::int64_t idx : ghosts1)
589 global_to_local.push_back({idx, global_to_local.size() + local_size[1]});
590 std::sort(global_to_local.begin(), global_to_local.end());
594 for (std::size_t i = 0; i < ghost_index_array.size(); i += 2)
597 const std::int32_t local_row = ghost_index_array[i] -
local_range[0][0];
598 assert(local_row >= 0 and local_row < local_size[0]);
601 std::int32_t local_col = ghost_index_array[i + 1] -
local_range[1][0];
602 if (local_col < 0 or local_col >= local_size[1])
604 auto it = std::lower_bound(global_to_local.begin(), global_to_local.end(),
605 std::pair(ghost_index_array[i + 1], -1),
607 { return a.first < b.first; });
608 assert(it != global_to_local.end()
609 and it->first == ghost_index_array[i + 1]);
610 local_col = it->second;
612 auto cit0 = std::next(_cols.begin(), _row_ptr[local_row]);
613 auto cit1 = std::next(_cols.begin(), _row_ptr[local_row + 1]);
616 auto cit = std::lower_bound(cit0, cit1, local_col);
618 assert(*cit == local_col);
619 std::size_t d = std::distance(_cols.begin(), cit);
620 _unpack_pos.push_back(d);
647 const std::int32_t local_size0 = _index_maps[0]->size_local();
648 const std::int32_t num_ghosts0 = _index_maps[0]->num_ghosts();
649 const int bs2 = _bs[0] * _bs[1];
652 std::vector<int> insert_pos = _val_send_disp;
653 _ghost_value_data.resize(_val_send_disp.back());
654 for (
int i = 0; i < num_ghosts0; ++i)
656 const int rank = _ghost_row_to_rank[i];
660 const std::int32_t val_pos = insert_pos[rank];
661 std::copy(std::next(_data.data(), _row_ptr[local_size0 + i] * bs2),
662 std::next(_data.data(), _row_ptr[local_size0 + i + 1] * bs2),
663 std::next(_ghost_value_data.begin(), val_pos));
665 += bs2 * (_row_ptr[local_size0 + i + 1] - _row_ptr[local_size0 + i]);
668 _ghost_value_data_in.resize(_val_recv_disp.back());
671 std::vector<int> val_send_count(_val_send_disp.size() - 1);
672 std::adjacent_difference(std::next(_val_send_disp.begin()),
673 _val_send_disp.end(), val_send_count.begin());
675 std::vector<int> val_recv_count(_val_recv_disp.size() - 1);
676 std::adjacent_difference(std::next(_val_recv_disp.begin()),
677 _val_recv_disp.end(), val_recv_count.begin());
679 int status = MPI_Ineighbor_alltoallv(
680 _ghost_value_data.data(), val_send_count.data(), _val_send_disp.data(),
681 dolfinx::MPI::mpi_type<value_type>(), _ghost_value_data_in.data(),
682 val_recv_count.data(), _val_recv_disp.data(),
683 dolfinx::MPI::mpi_type<value_type>(), _comm.comm(), &_request);
684 assert(status == MPI_SUCCESS);