9 #include "SparsityPattern.h" 
   10 #include <dolfinx/common/IndexMap.h> 
   11 #include <dolfinx/common/MPI.h> 
   12 #include <dolfinx/graph/AdjacencyList.h> 
   36 template <
typename U, 
typename V, 
typename W, 
typename X>
 
   37 void set_csr(U&& data, 
const V& cols, 
const V& row_ptr, 
const W& x,
 
   38              const X& xrows, 
const X& xcols,
 
   39              [[maybe_unused]] 
typename X::value_type local_size)
 
   41   assert(x.size() == xrows.size() * xcols.size());
 
   42   for (std::size_t r = 0; r < xrows.size(); ++r)
 
   46     using T = 
typename W::value_type;
 
   47     const T* xr = x.data() + r * xcols.size();
 
   50     if (row >= local_size)
 
   51       throw std::runtime_error(
"Local row out of range");
 
   55     auto cit0 = std::next(cols.begin(), row_ptr[row]);
 
   56     auto cit1 = std::next(cols.begin(), row_ptr[row + 1]);
 
   57     for (std::size_t c = 0; c < xcols.size(); ++c)
 
   60       auto it = std::lower_bound(cit0, cit1, xcols[c]);
 
   62       std::size_t d = std::distance(cols.begin(), it);
 
   63       assert(d < data.size());
 
   78 template <
typename U, 
typename V, 
typename W, 
typename X>
 
   79 void add_csr(U&& data, 
const V& cols, 
const V& row_ptr, 
const W& x,
 
   80              const X& xrows, 
const X& xcols)
 
   82   assert(x.size() == xrows.size() * xcols.size());
 
   83   for (std::size_t r = 0; r < xrows.size(); ++r)
 
   87     using T = 
typename W::value_type;
 
   88     const T* xr = x.data() + r * xcols.size();
 
   91     if (row >= (
int)row_ptr.size())
 
   92       throw std::runtime_error(
"Local row out of range");
 
   96     auto cit0 = std::next(cols.begin(), row_ptr[row]);
 
   97     auto cit1 = std::next(cols.begin(), row_ptr[row + 1]);
 
   98     for (std::size_t c = 0; c < xcols.size(); ++c)
 
  101       auto it = std::lower_bound(cit0, cit1, xcols[c]);
 
  103       std::size_t d = std::distance(cols.begin(), it);
 
  104       assert(d < data.size());
 
  125 template <
typename T, 
class Allocator = std::allocator<T>>
 
  142     return [&](
const std::span<const std::int32_t>& rows,
 
  143                const std::span<const std::int32_t>& 
cols,
 
  144                const std::span<const T>& data) -> 
int 
  157     return [&](
const std::span<const std::int32_t>& rows,
 
  158                const std::span<const std::int32_t>& 
cols,
 
  159                const std::span<const T>& data) -> 
int 
  175         _cols(p.
graph().array().begin(), p.
graph().array().end()),
 
  176         _row_ptr(p.
graph().offsets().begin(), p.
graph().offsets().end()),
 
  180     if (_bs[0] > 1 or _bs[1] > 1)
 
  181       throw std::runtime_error(
"Block size not yet supported");
 
  185     _off_diagonal_offset.reserve(num_diag_nnz.size());
 
  186     std::transform(num_diag_nnz.begin(), num_diag_nnz.end(), _row_ptr.begin(),
 
  187                    std::back_inserter(_off_diagonal_offset), std::plus{});
 
  190     const std::array local_size
 
  191         = {_index_maps[0]->size_local(), _index_maps[1]->size_local()};
 
  192     const std::array local_range
 
  193         = {_index_maps[0]->local_range(), _index_maps[1]->local_range()};
 
  194     const std::vector<std::int64_t>& ghosts1 = _index_maps[1]->ghosts();
 
  196     const std::vector<std::int64_t>& ghosts0 = _index_maps[0]->ghosts();
 
  197     const std::vector<int>& src_ranks = _index_maps[0]->src();
 
  198     const std::vector<int>& dest_ranks = _index_maps[0]->dest();
 
  202     MPI_Dist_graph_create_adjacent(_index_maps[0]->comm(), dest_ranks.size(),
 
  203                                    dest_ranks.data(), MPI_UNWEIGHTED,
 
  204                                    src_ranks.size(), src_ranks.data(),
 
  205                                    MPI_UNWEIGHTED, MPI_INFO_NULL, 
false, &comm);
 
  210     _ghost_row_to_rank.reserve(_index_maps[0]->owners().
size());
 
  211     for (
int r : _index_maps[0]->owners())
 
  213       auto it = std::lower_bound(src_ranks.begin(), src_ranks.end(), r);
 
  214       assert(it != src_ranks.end() and *it == r);
 
  215       int pos = std::distance(src_ranks.begin(), it);
 
  216       _ghost_row_to_rank.push_back(pos);
 
  220     std::vector<std::int32_t> data_per_proc(src_ranks.size(), 0);
 
  221     for (std::size_t i = 0; i < _ghost_row_to_rank.size(); ++i)
 
  223       assert(_ghost_row_to_rank[i] < data_per_proc.size());
 
  224       std::size_t pos = local_size[0] + i;
 
  225       data_per_proc[_ghost_row_to_rank[i]] += _row_ptr[pos + 1] - _row_ptr[pos];
 
  229     _val_send_disp.resize(src_ranks.size() + 1, 0);
 
  230     std::partial_sum(data_per_proc.begin(), data_per_proc.end(),
 
  231                      std::next(_val_send_disp.begin()));
 
  234     std::vector<std::int64_t> ghost_index_data(2 * _val_send_disp.back());
 
  236       std::vector<int> insert_pos = _val_send_disp;
 
  237       for (std::size_t i = 0; i < _ghost_row_to_rank.size(); ++i)
 
  239         const int rank = _ghost_row_to_rank[i];
 
  240         int row_id = local_size[0] + i;
 
  241         for (
int j = _row_ptr[row_id]; j < _row_ptr[row_id + 1]; ++j)
 
  244           const std::int32_t idx_pos = 2 * insert_pos[
rank];
 
  247           ghost_index_data[idx_pos] = ghosts0[i];
 
  248           if (std::int32_t col_local = _cols[j]; col_local < local_size[1])
 
  249             ghost_index_data[idx_pos + 1] = col_local + local_range[1][0];
 
  251             ghost_index_data[idx_pos + 1] = ghosts1[col_local - local_size[1]];
 
  253           insert_pos[
rank] += 1;
 
  259     std::vector<std::int64_t> ghost_index_array;
 
  260     std::vector<int> recv_disp;
 
  262       std::vector<int> send_sizes;
 
  263       std::transform(data_per_proc.begin(), data_per_proc.end(),
 
  264                      std::back_inserter(send_sizes),
 
  265                      [](
auto x) { return 2 * x; });
 
  267       std::vector<int> recv_sizes(dest_ranks.size());
 
  268       send_sizes.reserve(1);
 
  269       recv_sizes.reserve(1);
 
  270       MPI_Neighbor_alltoall(send_sizes.data(), 1, MPI_INT, recv_sizes.data(), 1,
 
  271                             MPI_INT, _comm.comm());
 
  274       std::vector<int> send_disp = {0};
 
  275       std::partial_sum(send_sizes.begin(), send_sizes.end(),
 
  276                        std::back_inserter(send_disp));
 
  278       std::partial_sum(recv_sizes.begin(), recv_sizes.end(),
 
  279                        std::back_inserter(recv_disp));
 
  281       ghost_index_array.resize(recv_disp.back());
 
  282       MPI_Neighbor_alltoallv(ghost_index_data.data(), send_sizes.data(),
 
  283                              send_disp.data(), MPI_INT64_T,
 
  284                              ghost_index_array.data(), recv_sizes.data(),
 
  285                              recv_disp.data(), MPI_INT64_T, _comm.comm());
 
  290     _val_recv_disp.resize(recv_disp.size());
 
  291     std::transform(recv_disp.begin(), recv_disp.end(), _val_recv_disp.begin(),
 
  292                    [](
int d) { return d / 2; });
 
  295     std::vector<std::pair<std::int64_t, std::int32_t>> global_to_local;
 
  296     global_to_local.reserve(ghosts1.size());
 
  297     std::int32_t local_i = local_size[1];
 
  298     for (std::int64_t idx : ghosts1)
 
  299       global_to_local.push_back({idx, global_to_local.size() + local_size[1]});
 
  300     std::sort(global_to_local.begin(), global_to_local.end());
 
  304     for (std::size_t i = 0; i < ghost_index_array.size(); i += 2)
 
  307       const std::int32_t local_row = ghost_index_array[i] - 
local_range[0][0];
 
  308       assert(local_row >= 0 and local_row < local_size[0]);
 
  311       std::int32_t local_col = ghost_index_array[i + 1] - 
local_range[1][0];
 
  312       if (local_col < 0 or local_col >= local_size[1])
 
  314         const auto it = std::lower_bound(
 
  315             global_to_local.begin(), global_to_local.end(),
 
  316             std::pair(ghost_index_array[i + 1], -1),
 
  317             [](
auto& a, 
auto& b) { return a.first < b.first; });
 
  318         assert(it != global_to_local.end()
 
  319                and it->first == ghost_index_array[i + 1]);
 
  320         local_col = it->second;
 
  322       auto cit0 = std::next(_cols.begin(), _row_ptr[local_row]);
 
  323       auto cit1 = std::next(_cols.begin(), _row_ptr[local_row + 1]);
 
  326       auto cit = std::lower_bound(cit0, cit1, local_col);
 
  328       assert(*cit == local_col);
 
  329       std::size_t d = std::distance(_cols.begin(), cit);
 
  330       _unpack_pos.push_back(d);
 
  341   void set(T x) { std::fill(_data.begin(), _data.end(), x); }
 
  355   void set(
const std::span<const T>& x,
 
  356            const std::span<const std::int32_t>& rows,
 
  357            const std::span<const std::int32_t>& 
cols)
 
  359     impl::set_csr(_data, _cols, _row_ptr, x, rows, 
cols,
 
  360                   _index_maps[0]->size_local());
 
  375   void add(
const std::span<const T>& x,
 
  376            const std::span<const std::int32_t>& rows,
 
  377            const std::span<const std::int32_t>& 
cols)
 
  379     impl::add_csr(_data, _cols, _row_ptr, x, rows, 
cols);
 
  398     const std::size_t ncols
 
  399         = _index_maps[1]->size_local() + _index_maps[1]->num_ghosts();
 
  400     std::vector<T> A(nrows * ncols);
 
  401     for (std::size_t r = 0; r < nrows; ++r)
 
  402       for (std::int32_t j = _row_ptr[r]; j < _row_ptr[r + 1]; ++j)
 
  403         A[r * ncols + _cols[j]] = _data[j];
 
  426     const std::int32_t local_size0 = _index_maps[0]->size_local();
 
  427     const std::int32_t num_ghosts0 = _index_maps[0]->num_ghosts();
 
  430     std::vector<int> insert_pos = _val_send_disp;
 
  431     std::vector<T> ghost_value_data(_val_send_disp.back());
 
  432     for (
int i = 0; i < num_ghosts0; ++i)
 
  434       const int rank = _ghost_row_to_rank[i];
 
  438       const std::int32_t val_pos = insert_pos[rank];
 
  439       std::copy(std::next(_data.data(), _row_ptr[local_size0 + i]),
 
  440                 std::next(_data.data(), _row_ptr[local_size0 + i + 1]),
 
  441                 std::next(ghost_value_data.begin(), val_pos));
 
  443           += _row_ptr[local_size0 + i + 1] - _row_ptr[local_size0 + i];
 
  446     _ghost_value_data_in.resize(_val_recv_disp.back());
 
  449     std::vector<int> val_send_count(_val_send_disp.size() - 1);
 
  450     std::adjacent_difference(std::next(_val_send_disp.begin()),
 
  451                              _val_send_disp.end(), val_send_count.begin());
 
  453     std::vector<int> val_recv_count(_val_recv_disp.size() - 1);
 
  454     std::adjacent_difference(std::next(_val_recv_disp.begin()),
 
  455                              _val_recv_disp.end(), val_recv_count.begin());
 
  457     int status = MPI_Ineighbor_alltoallv(
 
  458         ghost_value_data.data(), val_send_count.data(), _val_send_disp.data(),
 
  459         dolfinx::MPI::mpi_type<T>(), _ghost_value_data_in.data(),
 
  460         val_recv_count.data(), _val_recv_disp.data(),
 
  461         dolfinx::MPI::mpi_type<T>(), _comm.comm(), &_request);
 
  462     assert(status == MPI_SUCCESS);
 
  472     int status = MPI_Wait(&_request, MPI_STATUS_IGNORE);
 
  473     assert(status == MPI_SUCCESS);
 
  476     assert(_ghost_value_data_in.size() == _unpack_pos.size());
 
  477     for (std::size_t i = 0; i < _ghost_value_data_in.size(); ++i)
 
  478       _data[_unpack_pos[i]] += _ghost_value_data_in[i];
 
  481     const std::int32_t local_size0 = _index_maps[0]->size_local();
 
  482     std::fill(std::next(_data.begin(), _row_ptr[local_size0]), _data.end(), 0);
 
  491     const double norm_sq_local = std::accumulate(
 
  492         _data.cbegin(), std::next(_data.cbegin(), _row_ptr[
num_owned_rows]),
 
  493         double(0), [](
double norm, T y) { return norm + std::norm(y); });
 
  495     MPI_Allreduce(&norm_sq_local, &norm_sq, 1, MPI_DOUBLE, MPI_SUM,
 
  506   const std::array<std::shared_ptr<const common::IndexMap>, 2>&
 
  514   std::vector<T>& 
values() { 
return _data; }
 
  518   const std::vector<T>& 
values()
 const { 
return _data; }
 
  522   const std::vector<std::int32_t>& 
row_ptr()
 const { 
return _row_ptr; }
 
  526   const std::vector<std::int32_t>& 
cols()
 const { 
return _cols; }
 
  537     return _off_diagonal_offset;
 
  542   std::array<std::shared_ptr<const common::IndexMap>, 2> _index_maps;
 
  545   std::array<int, 2> _bs;
 
  548   std::vector<T, Allocator> _data;
 
  549   std::vector<std::int32_t> _cols, _row_ptr;
 
  552   std::vector<std::int32_t> _off_diagonal_offset;
 
  560   MPI_Request _request;
 
  563   std::vector<int> _unpack_pos;
 
  567   std::vector<int> _val_send_disp, _val_recv_disp;
 
  571   std::vector<int> _ghost_row_to_rank;
 
  574   std::vector<T> _ghost_value_data_in;
 
A duplicate MPI communicator and manage lifetime of the communicator.
Definition: MPI.h:41
Distributed sparse matrix.
Definition: MatrixCSR.h:127
auto mat_set_values()
Insertion functor for setting values in matrix. It is typically used in finite element assembly funct...
Definition: MatrixCSR.h:140
void set(const std::span< const T > &x, const std::span< const std::int32_t > &rows, const std::span< const std::int32_t > &cols)
Set values in the matrix.
Definition: MatrixCSR.h:355
std::vector< T > to_dense() const
Copy to a dense matrix.
Definition: MatrixCSR.h:395
Allocator allocator_type
The allocator type.
Definition: MatrixCSR.h:133
void finalize()
Transfer ghost row data to the owning ranks accumulating received values on the owned rows,...
Definition: MatrixCSR.h:411
std::int32_t num_owned_rows() const
Number of local rows excluding ghost rows.
Definition: MatrixCSR.h:383
MatrixCSR(const SparsityPattern &p, const Allocator &alloc=Allocator())
Create a distributed matrix.
Definition: MatrixCSR.h:170
const std::vector< std::int32_t > & cols() const
Get local column indices.
Definition: MatrixCSR.h:526
auto mat_add_values()
Insertion functor for accumulating values in matrix. It is typically used in finite element assembly ...
Definition: MatrixCSR.h:155
void set(T x)
Set all non-zero local entries to a value including entries in ghost rows.
Definition: MatrixCSR.h:341
MatrixCSR(MatrixCSR &&A)=default
Move constructor.
void finalize_end()
End transfer of ghost row data to owning ranks.
Definition: MatrixCSR.h:470
const std::array< std::shared_ptr< const common::IndexMap >, 2 > & index_maps() const
Index maps for the row and column space. The row IndexMap contains ghost entries for rows which may b...
Definition: MatrixCSR.h:507
const std::vector< std::int32_t > & row_ptr() const
Get local row pointers.
Definition: MatrixCSR.h:522
const std::vector< T > & values() const
Get local values (const version)
Definition: MatrixCSR.h:518
void add(const std::span< const T > &x, const std::span< const std::int32_t > &rows, const std::span< const std::int32_t > &cols)
Accumulate values in the matrix.
Definition: MatrixCSR.h:375
std::vector< T > & values()
Get local data values.
Definition: MatrixCSR.h:514
std::int32_t num_all_rows() const
Number of local rows including ghost rows.
Definition: MatrixCSR.h:386
double norm_squared() const
Compute the Frobenius norm squared.
Definition: MatrixCSR.h:486
T value_type
The value type.
Definition: MatrixCSR.h:130
void finalize_begin()
Begin transfer of ghost row data to owning ranks, where it will be accumulated into existing owned ro...
Definition: MatrixCSR.h:424
const std::vector< std::int32_t > & off_diag_offset() const
Get the start of off-diagonal (unowned columns) on each row, allowing the matrix to be split (virtual...
Definition: MatrixCSR.h:535
This class provides a sparsity pattern data structure that can be used to initialize sparse matrices....
Definition: SparsityPattern.h:34
std::shared_ptr< const common::IndexMap > index_map(int dim) const
Index map for given dimension dimension. Returns the index map for rows and columns that will be set ...
Definition: SparsityPattern.cpp:194
int block_size(int dim) const
Return index map block size for dimension dim.
Definition: SparsityPattern.cpp:225
const graph::AdjacencyList< std::int32_t > & graph() const
Sparsity pattern graph after assembly. Uses local indices for the columns.
Definition: SparsityPattern.cpp:415
std::int64_t num_nonzeros() const
Number of nonzeros on this rank after assembly, including ghost rows.
Definition: SparsityPattern.cpp:394
common::IndexMap column_index_map() const
Builds the index map for columns after assembly of the sparsity pattern.
Definition: SparsityPattern.cpp:214
std::span< const int > off_diagonal_offset() const
Row-wise start of off-diagonal (unowned columns) on each row.
Definition: SparsityPattern.cpp:422
int size(MPI_Comm comm)
Return size of the group (number of processes) associated with the communicator.
Definition: MPI.cpp:84
int rank(MPI_Comm comm)
Return process rank for the communicator.
Definition: MPI.cpp:76
constexpr std::array< std::int64_t, 2 > local_range(int rank, std::int64_t N, int size)
Return local range for the calling process, partitioning the global [0, N - 1] range across all ranks...
Definition: MPI.h:83
Linear algebra interface.
Definition: sparsitybuild.h:15
auto norm(const Vector< T, Allocator > &a, Norm type=Norm::l2)
Compute the norm of the vector.
Definition: Vector.h:252