Note: this is documentation for an old release. View the latest documentation at docs.fenicsproject.org/dolfinx/v0.9.0/cpp/doxygen/dd/d65/MatrixCSR_8h_source.html
DOLFINx  0.4.1
DOLFINx C++ interface
MatrixCSR.h
1 // Copyright (C) 2021-2022 Garth N. Wells and Chris N. Richardson
2 //
3 // This file is part of DOLFINx (https://www.fenicsproject.org)
4 //
5 // SPDX-License-Identifier: LGPL-3.0-or-later
6 
7 #pragma once
8 
9 #include "SparsityPattern.h"
10 #include <dolfinx/common/MPI.h>
11 #include <dolfinx/graph/AdjacencyList.h>
12 #include <mpi.h>
13 #include <numeric>
14 #include <vector>
15 #include <xtensor/xtensor.hpp>
16 #include <xtl/xspan.hpp>
17 
18 namespace dolfinx::la
19 {
20 
21 namespace impl
22 {
35 template <typename U, typename V, typename W, typename X>
36 void set_csr(U&& data, const V& cols, const V& row_ptr, const W& x,
37  const X& xrows, const X& xcols,
38  [[maybe_unused]] typename X::value_type local_size)
39 {
40  assert(x.size() == xrows.size() * xcols.size());
41  for (std::size_t r = 0; r < xrows.size(); ++r)
42  {
43  // Row index and current data row
44  auto row = xrows[r];
45  using T = typename W::value_type;
46  const T* xr = x.data() + r * xcols.size();
47 
48 #ifndef NDEBUG
49  if (row >= local_size)
50  throw std::runtime_error("Local row out of range");
51 #endif
52 
53  // Columns indices for row
54  auto cit0 = std::next(cols.begin(), row_ptr[row]);
55  auto cit1 = std::next(cols.begin(), row_ptr[row + 1]);
56  for (std::size_t c = 0; c < xcols.size(); ++c)
57  {
58  // Find position of column index
59  auto it = std::lower_bound(cit0, cit1, xcols[c]);
60  assert(it != cit1);
61  std::size_t d = std::distance(cols.begin(), it);
62  assert(d < data.size());
63  data[d] = xr[c];
64  }
65  }
66 }
67 
77 template <typename U, typename V, typename W, typename X>
78 void add_csr(U&& data, const V& cols, const V& row_ptr, const W& x,
79  const X& xrows, const X& xcols)
80 {
81  assert(x.size() == xrows.size() * xcols.size());
82  for (std::size_t r = 0; r < xrows.size(); ++r)
83  {
84  // Row index and current data row
85  auto row = xrows[r];
86  using T = typename W::value_type;
87  const T* xr = x.data() + r * xcols.size();
88 
89 #ifndef NDEBUG
90  if (row >= (int)row_ptr.size())
91  throw std::runtime_error("Local row out of range");
92 #endif
93 
94  // Columns indices for row
95  auto cit0 = std::next(cols.begin(), row_ptr[row]);
96  auto cit1 = std::next(cols.begin(), row_ptr[row + 1]);
97  for (std::size_t c = 0; c < xcols.size(); ++c)
98  {
99  // Find position of column index
100  auto it = std::lower_bound(cit0, cit1, xcols[c]);
101  assert(it != cit1);
102  std::size_t d = std::distance(cols.begin(), it);
103  assert(d < data.size());
104  data[d] += xr[c];
105  }
106  }
107 }
108 } // namespace impl
109 
124 template <typename T, class Allocator = std::allocator<T>>
126 {
127 public:
129  using value_type = T;
130 
132  using allocator_type = Allocator;
133 
140  {
141  return [&](const xtl::span<const std::int32_t>& rows,
142  const xtl::span<const std::int32_t>& cols,
143  const xtl::span<const T>& data) -> int
144  {
145  this->set(data, rows, cols);
146  return 0;
147  };
148  }
149 
155  {
156  return [&](const xtl::span<const std::int32_t>& rows,
157  const xtl::span<const std::int32_t>& cols,
158  const xtl::span<const T>& data) -> int
159  {
160  this->add(data, rows, cols);
161  return 0;
162  };
163  }
164 
169  MatrixCSR(const SparsityPattern& p, const Allocator& alloc = Allocator())
170  : _index_maps({p.index_map(0),
171  std::make_shared<common::IndexMap>(p.column_index_map())}),
172  _bs({p.block_size(0), p.block_size(1)}),
173  _data(p.num_nonzeros(), 0, alloc),
174  _cols(p.graph().array().begin(), p.graph().array().end()),
175  _row_ptr(p.graph().offsets().begin(), p.graph().offsets().end()),
176  _comm(p.index_map(0)->comm(common::IndexMap::Direction::reverse)),
177  _ghost_row_to_neighbor_rank(_index_maps[0]->ghost_owners())
178  {
179  // TODO: handle block sizes
180  if (_bs[0] > 1 or _bs[1] > 1)
181  throw std::runtime_error("Block size not yet supported");
182 
183  // Compute off-diagonal offset for each row
184  xtl::span<const std::int32_t> num_diag_nnz = p.off_diagonal_offset();
185  _off_diagonal_offset.reserve(num_diag_nnz.size());
186  std::transform(num_diag_nnz.begin(), num_diag_nnz.end(), _row_ptr.begin(),
187  std::back_inserter(_off_diagonal_offset),
188  std::plus<std::int32_t>());
189 
190  // Precompute some data for ghost updates via MPI
191  const std::array local_size
192  = {_index_maps[0]->size_local(), _index_maps[1]->size_local()};
193  const std::array local_range
194  = {_index_maps[0]->local_range(), _index_maps[1]->local_range()};
195 
196  const std::vector<std::int64_t>& ghosts0 = _index_maps[0]->ghosts();
197  const std::vector<std::int64_t>& ghosts1 = _index_maps[1]->ghosts();
198 
199  int outdegree(-1);
200  {
201  int indegree(-1), weighted(-1);
202  MPI_Dist_graph_neighbors_count(_comm.comm(), &indegree, &outdegree,
203  &weighted);
204  }
205 
206  // Compute size of data to send to each neighbor
207  std::vector<std::int32_t> data_per_proc(outdegree, 0);
208  for (std::size_t i = 0; i < _ghost_row_to_neighbor_rank.size(); ++i)
209  {
210  assert(_ghost_row_to_neighbor_rank[i] < data_per_proc.size());
211  data_per_proc[_ghost_row_to_neighbor_rank[i]]
212  += _row_ptr[local_size[0] + i + 1] - _row_ptr[local_size[0] + i];
213  }
214 
215  // Compute send displacements for values and indices (x2)
216  _val_send_disp.resize(outdegree + 1, 0);
217  std::partial_sum(data_per_proc.begin(), data_per_proc.end(),
218  std::next(_val_send_disp.begin()));
219 
220  std::vector<int> index_send_disp(outdegree + 1);
221  std::transform(_val_send_disp.begin(), _val_send_disp.end(),
222  index_send_disp.begin(), [](int d) { return d * 2; });
223 
224  // For each ghost row, pack and send indices to neighborhood
225  std::vector<int> insert_pos(index_send_disp);
226  std::vector<std::int64_t> ghost_index_data(index_send_disp.back());
227  for (std::size_t i = 0; i < _ghost_row_to_neighbor_rank.size(); ++i)
228  {
229  const int neighbor_rank = _ghost_row_to_neighbor_rank[i];
230  int row_id = local_size[0] + i;
231  for (int j = _row_ptr[row_id]; j < _row_ptr[row_id + 1]; ++j)
232  {
233  // Get position in send buffer to place data for this neighbour
234  const std::int32_t idx_pos = insert_pos[neighbor_rank];
235 
236  // Pack send data (row, col) as global indices
237  ghost_index_data[idx_pos] = ghosts0[i];
238  if (const std::int32_t col_local = _cols[j]; col_local < local_size[1])
239  ghost_index_data[idx_pos + 1] = col_local + local_range[1][0];
240  else
241  ghost_index_data[idx_pos + 1] = ghosts1[col_local - local_size[1]];
242 
243  insert_pos[neighbor_rank] += 2;
244  }
245  }
246 
247  // Create and communicate adjacency list to neighborhood
248  const graph::AdjacencyList<std::int64_t> ghost_index_data_out(
249  std::move(ghost_index_data), std::move(index_send_disp));
250  const graph::AdjacencyList<std::int64_t> ghost_index_data_in
251  = dolfinx::MPI::neighbor_all_to_all(_comm.comm(), ghost_index_data_out);
252 
253  // Store received offsets for future use, when transferring data values.
254  _val_recv_disp.resize(ghost_index_data_in.offsets().size());
255  std::transform(ghost_index_data_in.offsets().begin(),
256  ghost_index_data_in.offsets().end(), _val_recv_disp.begin(),
257  [](int d) { return d / 2; });
258 
259  // Global to local map for ghost columns
260  std::map<std::int64_t, std::int32_t> global_to_local;
261  std::int32_t local_i = local_size[1];
262  for (std::int64_t global_i : ghosts1)
263  global_to_local.insert({global_i, local_i++});
264 
265  // Compute location in which data for each index should be stored
266  // when received
267  const std::vector<std::int64_t>& ghost_index_array
268  = ghost_index_data_in.array();
269  for (std::size_t i = 0; i < ghost_index_array.size(); i += 2)
270  {
271  // Row must be on this process
272  const std::int32_t local_row = ghost_index_array[i] - local_range[0][0];
273  assert(local_row >= 0 and local_row < local_size[0]);
274 
275  // Column may be owned or unowned
276  std::int32_t local_col = ghost_index_array[i + 1] - local_range[1][0];
277  if (local_col < 0 or local_col >= local_size[1])
278  {
279  const auto it = global_to_local.find(ghost_index_array[i + 1]);
280  assert(it != global_to_local.end());
281  local_col = it->second;
282  }
283  auto cit0 = std::next(_cols.begin(), _row_ptr[local_row]);
284  auto cit1 = std::next(_cols.begin(), _row_ptr[local_row + 1]);
285 
286  // Find position of column index and insert data
287  auto cit = std::lower_bound(cit0, cit1, local_col);
288  assert(cit != cit1);
289  assert(*cit == local_col);
290  std::size_t d = std::distance(_cols.begin(), cit);
291  _unpack_pos.push_back(d);
292  }
293  }
294 
297  MatrixCSR(MatrixCSR&& A) = default;
298 
302  void set(T x) { std::fill(_data.begin(), _data.end(), x); }
303 
316  void set(const xtl::span<const T>& x,
317  const xtl::span<const std::int32_t>& rows,
318  const xtl::span<const std::int32_t>& cols)
319  {
320  impl::set_csr(_data, _cols, _row_ptr, x, rows, cols,
321  _index_maps[0]->size_local());
322  }
323 
336  void add(const xtl::span<const T>& x,
337  const xtl::span<const std::int32_t>& rows,
338  const xtl::span<const std::int32_t>& cols)
339  {
340  impl::add_csr(_data, _cols, _row_ptr, x, rows, cols);
341  }
342 
344  std::int32_t num_owned_rows() const { return _index_maps[0]->size_local(); }
345 
347  std::int32_t num_all_rows() const { return _row_ptr.size() - 1; }
348 
355  xt::xtensor<T, 2> to_dense() const
356  {
357  const std::int32_t nrows = num_all_rows();
358  const std::int32_t ncols
359  = _index_maps[1]->size_local() + _index_maps[1]->num_ghosts();
360  xt::xtensor<T, 2> A = xt::zeros<T>({nrows, ncols});
361  for (std::size_t r = 0; r < nrows; ++r)
362  for (int j = _row_ptr[r]; j < _row_ptr[r + 1]; ++j)
363  A(r, _cols[j]) = _data[j];
364  return A;
365  }
366 
370  void finalize()
371  {
372  finalize_begin();
373  finalize_end();
374  }
375 
384  {
385  const std::int32_t local_size0 = _index_maps[0]->size_local();
386  const std::int32_t num_ghosts0 = _index_maps[0]->num_ghosts();
387 
388  // For each ghost row, pack and send values to send to neighborhood
389  std::vector<int> insert_pos(_val_send_disp);
390  std::vector<T> ghost_value_data(_val_send_disp.back());
391  for (int i = 0; i < num_ghosts0; ++i)
392  {
393  const int neighbor_rank = _ghost_row_to_neighbor_rank[i];
394 
395  // Get position in send buffer to place data to send to this neighbour
396  const std::int32_t val_pos = insert_pos[neighbor_rank];
397  std::copy(std::next(_data.data(), _row_ptr[local_size0 + i]),
398  std::next(_data.data(), _row_ptr[local_size0 + i + 1]),
399  std::next(ghost_value_data.begin(), val_pos));
400  insert_pos[neighbor_rank]
401  += _row_ptr[local_size0 + i + 1] - _row_ptr[local_size0 + i];
402  }
403 
404  _ghost_value_data_in.resize(_val_recv_disp.back());
405 
406  // Compute data sizes for send and receive from displacements
407  std::vector<int> val_send_count(_val_send_disp.size() - 1);
408  std::adjacent_difference(std::next(_val_send_disp.begin()),
409  _val_send_disp.end(), val_send_count.begin());
410 
411  std::vector<int> val_recv_count(_val_recv_disp.size() - 1);
412  std::adjacent_difference(std::next(_val_recv_disp.begin()),
413  _val_recv_disp.end(), val_recv_count.begin());
414 
415  int status = MPI_Ineighbor_alltoallv(
416  ghost_value_data.data(), val_send_count.data(), _val_send_disp.data(),
417  dolfinx::MPI::mpi_type<T>(), _ghost_value_data_in.data(),
418  val_recv_count.data(), _val_recv_disp.data(),
419  dolfinx::MPI::mpi_type<T>(), _comm.comm(), &_request);
420  assert(status == MPI_SUCCESS);
421  }
422 
429  {
430  int status = MPI_Wait(&_request, MPI_STATUS_IGNORE);
431  assert(status == MPI_SUCCESS);
432 
433  // Add to local rows
434  assert(_ghost_value_data_in.size() == _unpack_pos.size());
435  for (std::size_t i = 0; i < _ghost_value_data_in.size(); ++i)
436  _data[_unpack_pos[i]] += _ghost_value_data_in[i];
437 
438  // Set ghost row data to zero
439  const std::int32_t local_size0 = _index_maps[0]->size_local();
440  std::fill(std::next(_data.begin(), _row_ptr[local_size0]), _data.end(), 0);
441  }
442 
444  double norm_squared() const
445  {
446  const std::size_t num_owned_rows = _index_maps[0]->size_local();
447  assert(num_owned_rows < _row_ptr.size());
448 
449  const double norm_sq_local = std::accumulate(
450  _data.cbegin(), std::next(_data.cbegin(), _row_ptr[num_owned_rows]),
451  double(0), [](double norm, T y) { return norm + std::norm(y); });
452  double norm_sq;
453  MPI_Allreduce(&norm_sq_local, &norm_sq, 1, MPI_DOUBLE, MPI_SUM,
454  _comm.comm());
455  return norm_sq;
456  }
457 
464  const std::array<std::shared_ptr<const common::IndexMap>, 2>&
465  index_maps() const
466  {
467  return _index_maps;
468  }
469 
472  std::vector<T>& values() { return _data; }
473 
476  const std::vector<T>& values() const { return _data; }
477 
480  const std::vector<std::int32_t>& row_ptr() const { return _row_ptr; }
481 
484  const std::vector<std::int32_t>& cols() const { return _cols; }
485 
493  const std::vector<std::int32_t>& off_diag_offset() const
494  {
495  return _off_diagonal_offset;
496  }
497 
498 private:
499  // Maps describing the data layout for rows and columns
500  std::array<std::shared_ptr<const common::IndexMap>, 2> _index_maps;
501 
502  // Block sizes
503  std::array<int, 2> _bs;
504 
505  // Matrix data
506  std::vector<T, Allocator> _data;
507  std::vector<std::int32_t> _cols, _row_ptr;
508 
509  // Start of off-diagonal (unowned columns) on each row
510  std::vector<std::int32_t> _off_diagonal_offset;
511 
512  // Neighborhood communicator (ghost->owner communicator for rows)
513  dolfinx::MPI::Comm _comm;
514 
515  // -- Precomputed data for finalize/update
516 
517  // Request in non-blocking communication
518  MPI_Request _request;
519 
520  // Position in _data to add received data
521  std::vector<int> _unpack_pos;
522 
523  // Displacements for alltoall for each neighbor when sending and
524  // receiving
525  std::vector<int> _val_send_disp, _val_recv_disp;
526 
527  // Ownership of each row, by neighbor
528  std::vector<int> _ghost_row_to_neighbor_rank;
529 
530  // Temporary store for finalize data during non-blocking communication
531  std::vector<T> _ghost_value_data_in;
532 };
533 
534 } // namespace dolfinx::la
A duplicate MPI communicator and manage lifetime of the communicator.
Definition: MPI.h:40
Distributed sparse matrix.
Definition: MatrixCSR.h:126
auto mat_set_values()
Insertion functor for setting values in matrix. It is typically used in finite element assembly funct...
Definition: MatrixCSR.h:139
Allocator allocator_type
The allocator type.
Definition: MatrixCSR.h:132
void add(const xtl::span< const T > &x, const xtl::span< const std::int32_t > &rows, const xtl::span< const std::int32_t > &cols)
Accumulate values in the matrix.
Definition: MatrixCSR.h:336
void finalize()
Transfer ghost row data to the owning ranks accumulating received values on the owned rows,...
Definition: MatrixCSR.h:370
std::int32_t num_owned_rows() const
Number of local rows excluding ghost rows.
Definition: MatrixCSR.h:344
MatrixCSR(const SparsityPattern &p, const Allocator &alloc=Allocator())
Create a distributed matrix.
Definition: MatrixCSR.h:169
const std::vector< std::int32_t > & cols() const
Get local column indices.
Definition: MatrixCSR.h:484
auto mat_add_values()
Insertion functor for accumulating values in matrix. It is typically used in finite element assembly ...
Definition: MatrixCSR.h:154
void set(T x)
Set all non-zero local entries to a value including entries in ghost rows.
Definition: MatrixCSR.h:302
MatrixCSR(MatrixCSR &&A)=default
Move constructor.
void finalize_end()
End transfer of ghost row data to owning ranks.
Definition: MatrixCSR.h:428
void set(const xtl::span< const T > &x, const xtl::span< const std::int32_t > &rows, const xtl::span< const std::int32_t > &cols)
Set values in the matrix.
Definition: MatrixCSR.h:316
const std::array< std::shared_ptr< const common::IndexMap >, 2 > & index_maps() const
Index maps for the row and column space. The row IndexMap contains ghost entries for rows which may b...
Definition: MatrixCSR.h:465
const std::vector< std::int32_t > & row_ptr() const
Get local row pointers.
Definition: MatrixCSR.h:480
const std::vector< T > & values() const
Get local values (const version)
Definition: MatrixCSR.h:476
xt::xtensor< T, 2 > to_dense() const
Copy to a dense matrix.
Definition: MatrixCSR.h:355
std::vector< T > & values()
Get local data values.
Definition: MatrixCSR.h:472
std::int32_t num_all_rows() const
Number of local rows including ghost rows.
Definition: MatrixCSR.h:347
double norm_squared() const
Compute the Frobenius norm squared.
Definition: MatrixCSR.h:444
T value_type
The value type.
Definition: MatrixCSR.h:129
void finalize_begin()
Begin transfer of ghost row data to owning ranks, where it will be accumulated into existing owned ro...
Definition: MatrixCSR.h:383
const std::vector< std::int32_t > & off_diag_offset() const
Get the start of off-diagonal (unowned columns) on each row, allowing the matrix to be split (virtual...
Definition: MatrixCSR.h:493
This class provides a sparsity pattern data structure that can be used to initialize sparse matrices....
Definition: SparsityPattern.h:34
std::shared_ptr< const common::IndexMap > index_map(int dim) const
Index map for given dimension dimension. Returns the index map for rows and columns that will be set ...
Definition: SparsityPattern.cpp:208
int block_size(int dim) const
Return index map block size for dimension dim.
Definition: SparsityPattern.cpp:245
const graph::AdjacencyList< std::int32_t > & graph() const
Sparsity pattern graph after assembly. Uses local indices for the columns.
Definition: SparsityPattern.cpp:424
std::int64_t num_nonzeros() const
Number of nonzeros on this rank after assembly, including ghost rows.
Definition: SparsityPattern.cpp:403
common::IndexMap column_index_map() const
Builds the index map for columns after assembly of the sparsity pattern.
Definition: SparsityPattern.cpp:228
xtl::span< const int > off_diagonal_offset() const
Row-wise start of off-diagonal (unowned columns) on each row.
Definition: SparsityPattern.cpp:431
graph::AdjacencyList< T > neighbor_all_to_all(MPI_Comm comm, const graph::AdjacencyList< T > &send_data)
Send in_values[n0] to neighbor process n0 and receive values from neighbor process n1 in out_values[n...
Definition: MPI.h:682
constexpr std::array< std::int64_t, 2 > local_range(int rank, std::int64_t N, int size)
Return local range for the calling process, partitioning the global [0, N - 1] range across all ranks...
Definition: MPI.h:82
Linear algebra interface.
Definition: sparsitybuild.h:13