624MatrixCSR<U, V, W, X>::MatrixCSR(
const SparsityType& p,
BlockMode mode)
625 : _index_maps({p.index_map(0), p.index_map(1)}), _block_mode(mode),
626 _bs({p.block_size(0), p.block_size(1)}),
627 _data(p.graph().first.size() * _bs[0] * _bs[1], 0),
628 _cols(p.graph().first.begin(), p.graph().first.end()),
629 _row_ptr(p.graph().second.begin(), p.graph().second.end()),
635 for (
int i = 0; i < 2; ++i)
637 auto im = _index_maps[i];
638 std::int32_t size_local = im->size_local() * _bs[i];
639 std::span ghost_i = im->ghosts();
640 std::vector<std::int64_t> ghosts;
641 const std::vector<int> ghost_owner_i(im->owners().begin(),
643 std::vector<int> src_rank;
644 for (std::size_t j = 0; j < ghost_i.size(); ++j)
646 for (
int k = 0; k < _bs[i]; ++k)
648 ghosts.push_back(ghost_i[j] * _bs[i] + k);
649 src_rank.push_back(ghost_owner_i[j]);
653 std::array<std::vector<int>, 2> src_dest0
654 = {std::vector(_index_maps[i]->src().begin(),
655 _index_maps[i]->src().end()),
656 std::vector(_index_maps[i]->dest().begin(),
657 _index_maps[i]->dest().end())};
658 _index_maps[i] = std::make_shared<common::IndexMap>(
659 _index_maps[i]->comm(), size_local, src_dest0, ghosts, src_rank);
664 column_container_type new_cols;
665 new_cols.reserve(_data.size());
666 rowptr_container_type new_row_ptr{0};
667 new_row_ptr.reserve(_row_ptr.size() * _bs[0]);
668 std::span<const std::int32_t> num_diag_nnz = p.off_diagonal_offsets();
669 for (std::size_t i = 0; i < _row_ptr.size() - 1; ++i)
672 for (
int q0 = 0; q0 < _bs[0]; ++q0)
674 _off_diagonal_offset.push_back(new_row_ptr.back()
675 + num_diag_nnz[i] * _bs[1]);
676 for (
auto j = _row_ptr[i]; j < _row_ptr[i + 1]; ++j)
678 for (
int q1 = 0; q1 < _bs[1]; ++q1)
679 new_cols.push_back(_cols[j] * _bs[1] + q1);
681 new_row_ptr.push_back(new_cols.size());
685 _row_ptr = new_row_ptr;
692 std::span<const std::int32_t> num_diag_nnz = p.off_diagonal_offsets();
693 _off_diagonal_offset.reserve(num_diag_nnz.size());
694 std::ranges::transform(num_diag_nnz, _row_ptr,
695 std::back_inserter(_off_diagonal_offset),
700 std::array local_size
701 = {_index_maps[0]->size_local(), _index_maps[1]->size_local()};
703 = {_index_maps[0]->local_range(), _index_maps[1]->local_range()};
704 std::span ghosts1 = _index_maps[1]->ghosts();
706 std::span ghosts0 = _index_maps[0]->ghosts();
707 std::span src_ranks = _index_maps[0]->src();
708 std::span dest_ranks = _index_maps[0]->dest();
712 MPI_Dist_graph_create_adjacent(_index_maps[0]->comm(), dest_ranks.size(),
713 dest_ranks.data(), MPI_UNWEIGHTED,
714 src_ranks.size(), src_ranks.data(),
715 MPI_UNWEIGHTED, MPI_INFO_NULL,
false, &comm);
716 _comm = dolfinx::MPI::Comm(comm,
false);
720 _ghost_row_to_rank.reserve(_index_maps[0]->owners().
size());
721 for (
int r : _index_maps[0]->owners())
723 auto it = std::ranges::lower_bound(src_ranks, r);
724 assert(it != src_ranks.end() and *it == r);
725 std::size_t pos = std::distance(src_ranks.begin(), it);
726 _ghost_row_to_rank.push_back(pos);
730 std::vector<std::int32_t> data_per_proc(src_ranks.size(), 0);
731 for (std::size_t i = 0; i < _ghost_row_to_rank.size(); ++i)
733 assert(_ghost_row_to_rank[i] < (
int)data_per_proc.size());
734 std::size_t pos = local_size[0] + i;
735 data_per_proc[_ghost_row_to_rank[i]] += _row_ptr[pos + 1] - _row_ptr[pos];
739 _val_send_disp.resize(src_ranks.size() + 1, 0);
740 std::partial_sum(data_per_proc.begin(), data_per_proc.end(),
741 std::next(_val_send_disp.begin()));
744 std::vector<std::int64_t> ghost_index_data(2 * _val_send_disp.back());
746 std::vector<int> insert_pos = _val_send_disp;
747 for (std::size_t i = 0; i < _ghost_row_to_rank.size(); ++i)
749 int rank = _ghost_row_to_rank[i];
750 std::int32_t row_id = local_size[0] + i;
751 for (
int j = _row_ptr[row_id]; j < _row_ptr[row_id + 1]; ++j)
754 std::int32_t idx_pos = 2 * insert_pos[
rank];
757 ghost_index_data[idx_pos] = ghosts0[i];
758 if (std::int32_t col_local = _cols[j]; col_local < local_size[1])
759 ghost_index_data[idx_pos + 1] = col_local + local_range[1][0];
761 ghost_index_data[idx_pos + 1] = ghosts1[col_local - local_size[1]];
763 insert_pos[
rank] += 1;
769 std::vector<std::int64_t> ghost_index_array;
770 std::vector<int> recv_disp;
772 std::vector<int> send_sizes;
773 std::ranges::transform(data_per_proc, std::back_inserter(send_sizes),
774 [](
auto x) {
return 2 * x; });
776 std::vector<int> recv_sizes(dest_ranks.size());
777 send_sizes.reserve(1);
778 recv_sizes.reserve(1);
779 MPI_Neighbor_alltoall(send_sizes.data(), 1, MPI_INT, recv_sizes.data(), 1,
780 MPI_INT, _comm.comm());
783 std::vector<int> send_disp{0};
784 std::partial_sum(send_sizes.begin(), send_sizes.end(),
785 std::back_inserter(send_disp));
787 std::partial_sum(recv_sizes.begin(), recv_sizes.end(),
788 std::back_inserter(recv_disp));
790 ghost_index_array.resize(recv_disp.back());
791 MPI_Neighbor_alltoallv(ghost_index_data.data(), send_sizes.data(),
792 send_disp.data(), MPI_INT64_T,
793 ghost_index_array.data(), recv_sizes.data(),
794 recv_disp.data(), MPI_INT64_T, _comm.comm());
799 _val_recv_disp.resize(recv_disp.size());
800 int bs2 = _bs[0] * _bs[1];
801 std::ranges::transform(recv_disp, _val_recv_disp.begin(),
802 [&bs2](
auto d) { return bs2 * d / 2; });
803 std::ranges::transform(_val_send_disp, _val_send_disp.begin(),
804 [&bs2](
auto d) { return d * bs2; });
807 std::vector<std::pair<std::int64_t, std::int32_t>> global_to_local;
808 global_to_local.reserve(ghosts1.size());
809 for (std::int64_t idx : ghosts1)
810 global_to_local.push_back({idx, global_to_local.size() + local_size[1]});
811 std::ranges::sort(global_to_local);
815 for (std::size_t i = 0; i < ghost_index_array.size(); i += 2)
818 std::int32_t local_row = ghost_index_array[i] -
local_range[0][0];
819 assert(local_row >= 0 and local_row < local_size[0]);
822 std::int32_t local_col = ghost_index_array[i + 1] -
local_range[1][0];
823 if (local_col < 0 or local_col >= local_size[1])
825 auto it = std::ranges::lower_bound(
826 global_to_local, std::pair(ghost_index_array[i + 1], -1),
827 [](
auto a,
auto b) {
return a.first < b.first; });
828 assert(it != global_to_local.end()
829 and it->first == ghost_index_array[i + 1]);
830 local_col = it->second;
832 auto cit0 = std::next(_cols.begin(), _row_ptr[local_row]);
833 auto cit1 = std::next(_cols.begin(), _row_ptr[local_row + 1]);
836 auto cit = std::lower_bound(cit0, cit1, local_col);
838 assert(*cit == local_col);
839 std::size_t d = std::distance(_cols.begin(), cit);
840 _unpack_pos.push_back(d);
843 _unpack_pos.shrink_to_fit();
929 std::span<const std::int64_t> Arow_ptr(
row_ptr().data(), nrowslocal + 1);
930 std::span<const std::int32_t> Acols(
cols().data(), Arow_ptr[nrowslocal]);
931 std::span<const std::int64_t> Aoff_diag_offset(
off_diag_offset().data(),
933 std::span<const Scalar> Avalues(
values().data(),
934 Arow_ptr[nrowslocal] * _bs[0] * _bs[1]);
936 std::span<const Scalar> _x = x.
array();
937 std::span<Scalar> _y = y.
array();
939 std::span<const std::int64_t> Arow_begin(Arow_ptr.data(), nrowslocal);
940 std::span<const std::int64_t> Arow_end(Arow_ptr.data() + 1, nrowslocal);
945 int ncolslocal =
index_map(1)->size_local();
946 std::fill(std::next(_y.begin(), ncolslocal * _bs[1]), _y.end(), Scalar(0));
948 impl::spmvT<Scalar, 1>(Avalues, Aoff_diag_offset, Arow_end, Acols, _x, _y,
951 impl::spmvT<Scalar, -1>(Avalues, Aoff_diag_offset, Arow_end, Acols, _x, _y,
957 impl::spmvT<Scalar, 1>(Avalues, Arow_begin, Aoff_diag_offset, Acols, _x, _y,
960 impl::spmvT<Scalar, -1>(Avalues, Arow_begin, Aoff_diag_offset, Acols, _x,