Note: this is documentation for an old release. View the latest documentation at docs.fenicsproject.org/dolfinx/v0.9.0/cpp/doxygen/dd/d65/MatrixCSR_8h_source.html
DOLFINx  0.5.1
DOLFINx C++ interface
MatrixCSR.h
1 // Copyright (C) 2021-2022 Garth N. Wells and Chris N. Richardson
2 //
3 // This file is part of DOLFINx (https://www.fenicsproject.org)
4 //
5 // SPDX-License-Identifier: LGPL-3.0-or-later
6 
7 #pragma once
8 
9 #include "SparsityPattern.h"
10 #include <dolfinx/common/IndexMap.h>
11 #include <dolfinx/common/MPI.h>
12 #include <dolfinx/graph/AdjacencyList.h>
13 #include <mpi.h>
14 #include <numeric>
15 #include <span>
16 #include <utility>
17 #include <vector>
18 
19 namespace dolfinx::la
20 {
21 
22 namespace impl
23 {
36 template <typename U, typename V, typename W, typename X>
37 void set_csr(U&& data, const V& cols, const V& row_ptr, const W& x,
38  const X& xrows, const X& xcols,
39  [[maybe_unused]] typename X::value_type local_size)
40 {
41  assert(x.size() == xrows.size() * xcols.size());
42  for (std::size_t r = 0; r < xrows.size(); ++r)
43  {
44  // Row index and current data row
45  auto row = xrows[r];
46  using T = typename W::value_type;
47  const T* xr = x.data() + r * xcols.size();
48 
49 #ifndef NDEBUG
50  if (row >= local_size)
51  throw std::runtime_error("Local row out of range");
52 #endif
53 
54  // Columns indices for row
55  auto cit0 = std::next(cols.begin(), row_ptr[row]);
56  auto cit1 = std::next(cols.begin(), row_ptr[row + 1]);
57  for (std::size_t c = 0; c < xcols.size(); ++c)
58  {
59  // Find position of column index
60  auto it = std::lower_bound(cit0, cit1, xcols[c]);
61  assert(it != cit1);
62  std::size_t d = std::distance(cols.begin(), it);
63  assert(d < data.size());
64  data[d] = xr[c];
65  }
66  }
67 }
68 
78 template <typename U, typename V, typename W, typename X>
79 void add_csr(U&& data, const V& cols, const V& row_ptr, const W& x,
80  const X& xrows, const X& xcols)
81 {
82  assert(x.size() == xrows.size() * xcols.size());
83  for (std::size_t r = 0; r < xrows.size(); ++r)
84  {
85  // Row index and current data row
86  auto row = xrows[r];
87  using T = typename W::value_type;
88  const T* xr = x.data() + r * xcols.size();
89 
90 #ifndef NDEBUG
91  if (row >= (int)row_ptr.size())
92  throw std::runtime_error("Local row out of range");
93 #endif
94 
95  // Columns indices for row
96  auto cit0 = std::next(cols.begin(), row_ptr[row]);
97  auto cit1 = std::next(cols.begin(), row_ptr[row + 1]);
98  for (std::size_t c = 0; c < xcols.size(); ++c)
99  {
100  // Find position of column index
101  auto it = std::lower_bound(cit0, cit1, xcols[c]);
102  assert(it != cit1);
103  std::size_t d = std::distance(cols.begin(), it);
104  assert(d < data.size());
105  data[d] += xr[c];
106  }
107  }
108 }
109 } // namespace impl
110 
125 template <typename T, class Allocator = std::allocator<T>>
127 {
128 public:
130  using value_type = T;
131 
133  using allocator_type = Allocator;
134 
141  {
142  return [&](const std::span<const std::int32_t>& rows,
143  const std::span<const std::int32_t>& cols,
144  const std::span<const T>& data) -> int
145  {
146  this->set(data, rows, cols);
147  return 0;
148  };
149  }
150 
156  {
157  return [&](const std::span<const std::int32_t>& rows,
158  const std::span<const std::int32_t>& cols,
159  const std::span<const T>& data) -> int
160  {
161  this->add(data, rows, cols);
162  return 0;
163  };
164  }
165 
170  MatrixCSR(const SparsityPattern& p, const Allocator& alloc = Allocator())
171  : _index_maps({p.index_map(0),
172  std::make_shared<common::IndexMap>(p.column_index_map())}),
173  _bs({p.block_size(0), p.block_size(1)}),
174  _data(p.num_nonzeros(), 0, alloc),
175  _cols(p.graph().array().begin(), p.graph().array().end()),
176  _row_ptr(p.graph().offsets().begin(), p.graph().offsets().end()),
177  _comm(MPI_COMM_NULL)
178  {
179  // TODO: handle block sizes
180  if (_bs[0] > 1 or _bs[1] > 1)
181  throw std::runtime_error("Block size not yet supported");
182 
183  // Compute off-diagonal offset for each row
184  std::span<const std::int32_t> num_diag_nnz = p.off_diagonal_offset();
185  _off_diagonal_offset.reserve(num_diag_nnz.size());
186  std::transform(num_diag_nnz.begin(), num_diag_nnz.end(), _row_ptr.begin(),
187  std::back_inserter(_off_diagonal_offset), std::plus{});
188 
189  // Some short-hand
190  const std::array local_size
191  = {_index_maps[0]->size_local(), _index_maps[1]->size_local()};
192  const std::array local_range
193  = {_index_maps[0]->local_range(), _index_maps[1]->local_range()};
194  const std::vector<std::int64_t>& ghosts1 = _index_maps[1]->ghosts();
195 
196  const std::vector<std::int64_t>& ghosts0 = _index_maps[0]->ghosts();
197  const std::vector<int>& src_ranks = _index_maps[0]->src();
198  const std::vector<int>& dest_ranks = _index_maps[0]->dest();
199 
200  // Create neigbourhood communicator (owner <- ghost)
201  MPI_Comm comm;
202  MPI_Dist_graph_create_adjacent(_index_maps[0]->comm(), dest_ranks.size(),
203  dest_ranks.data(), MPI_UNWEIGHTED,
204  src_ranks.size(), src_ranks.data(),
205  MPI_UNWEIGHTED, MPI_INFO_NULL, false, &comm);
206  _comm = dolfinx::MPI::Comm(comm, false);
207 
208  // Build map from ghost row index position to owning (neighborhood)
209  // rank
210  _ghost_row_to_rank.reserve(_index_maps[0]->owners().size());
211  for (int r : _index_maps[0]->owners())
212  {
213  auto it = std::lower_bound(src_ranks.begin(), src_ranks.end(), r);
214  assert(it != src_ranks.end() and *it == r);
215  int pos = std::distance(src_ranks.begin(), it);
216  _ghost_row_to_rank.push_back(pos);
217  }
218 
219  // Compute size of data to send to each neighbor
220  std::vector<std::int32_t> data_per_proc(src_ranks.size(), 0);
221  for (std::size_t i = 0; i < _ghost_row_to_rank.size(); ++i)
222  {
223  assert(_ghost_row_to_rank[i] < data_per_proc.size());
224  std::size_t pos = local_size[0] + i;
225  data_per_proc[_ghost_row_to_rank[i]] += _row_ptr[pos + 1] - _row_ptr[pos];
226  }
227 
228  // Compute send displacements
229  _val_send_disp.resize(src_ranks.size() + 1, 0);
230  std::partial_sum(data_per_proc.begin(), data_per_proc.end(),
231  std::next(_val_send_disp.begin()));
232 
233  // For each ghost row, pack and send indices to neighborhood
234  std::vector<std::int64_t> ghost_index_data(2 * _val_send_disp.back());
235  {
236  std::vector<int> insert_pos = _val_send_disp;
237  for (std::size_t i = 0; i < _ghost_row_to_rank.size(); ++i)
238  {
239  const int rank = _ghost_row_to_rank[i];
240  int row_id = local_size[0] + i;
241  for (int j = _row_ptr[row_id]; j < _row_ptr[row_id + 1]; ++j)
242  {
243  // Get position in send buffer
244  const std::int32_t idx_pos = 2 * insert_pos[rank];
245 
246  // Pack send data (row, col) as global indices
247  ghost_index_data[idx_pos] = ghosts0[i];
248  if (std::int32_t col_local = _cols[j]; col_local < local_size[1])
249  ghost_index_data[idx_pos + 1] = col_local + local_range[1][0];
250  else
251  ghost_index_data[idx_pos + 1] = ghosts1[col_local - local_size[1]];
252 
253  insert_pos[rank] += 1;
254  }
255  }
256  }
257 
258  // Communicate data with neighborhood
259  std::vector<std::int64_t> ghost_index_array;
260  std::vector<int> recv_disp;
261  {
262  std::vector<int> send_sizes;
263  std::transform(data_per_proc.begin(), data_per_proc.end(),
264  std::back_inserter(send_sizes),
265  [](auto x) { return 2 * x; });
266 
267  std::vector<int> recv_sizes(dest_ranks.size());
268  send_sizes.reserve(1);
269  recv_sizes.reserve(1);
270  MPI_Neighbor_alltoall(send_sizes.data(), 1, MPI_INT, recv_sizes.data(), 1,
271  MPI_INT, _comm.comm());
272 
273  // Build send/recv displacement
274  std::vector<int> send_disp = {0};
275  std::partial_sum(send_sizes.begin(), send_sizes.end(),
276  std::back_inserter(send_disp));
277  recv_disp = {0};
278  std::partial_sum(recv_sizes.begin(), recv_sizes.end(),
279  std::back_inserter(recv_disp));
280 
281  ghost_index_array.resize(recv_disp.back());
282  MPI_Neighbor_alltoallv(ghost_index_data.data(), send_sizes.data(),
283  send_disp.data(), MPI_INT64_T,
284  ghost_index_array.data(), recv_sizes.data(),
285  recv_disp.data(), MPI_INT64_T, _comm.comm());
286  }
287 
288  // Store receive displacements for future use, when transferring
289  // data values
290  _val_recv_disp.resize(recv_disp.size());
291  std::transform(recv_disp.begin(), recv_disp.end(), _val_recv_disp.begin(),
292  [](int d) { return d / 2; });
293 
294  // Global-to-local map for ghost columns
295  std::vector<std::pair<std::int64_t, std::int32_t>> global_to_local;
296  global_to_local.reserve(ghosts1.size());
297  std::int32_t local_i = local_size[1];
298  for (std::int64_t idx : ghosts1)
299  global_to_local.push_back({idx, global_to_local.size() + local_size[1]});
300  std::sort(global_to_local.begin(), global_to_local.end());
301 
302  // Compute location in which data for each index should be stored
303  // when received
304  for (std::size_t i = 0; i < ghost_index_array.size(); i += 2)
305  {
306  // Row must be on this process
307  const std::int32_t local_row = ghost_index_array[i] - local_range[0][0];
308  assert(local_row >= 0 and local_row < local_size[0]);
309 
310  // Column may be owned or unowned
311  std::int32_t local_col = ghost_index_array[i + 1] - local_range[1][0];
312  if (local_col < 0 or local_col >= local_size[1])
313  {
314  const auto it = std::lower_bound(
315  global_to_local.begin(), global_to_local.end(),
316  std::pair(ghost_index_array[i + 1], -1),
317  [](auto& a, auto& b) { return a.first < b.first; });
318  assert(it != global_to_local.end()
319  and it->first == ghost_index_array[i + 1]);
320  local_col = it->second;
321  }
322  auto cit0 = std::next(_cols.begin(), _row_ptr[local_row]);
323  auto cit1 = std::next(_cols.begin(), _row_ptr[local_row + 1]);
324 
325  // Find position of column index and insert data
326  auto cit = std::lower_bound(cit0, cit1, local_col);
327  assert(cit != cit1);
328  assert(*cit == local_col);
329  std::size_t d = std::distance(_cols.begin(), cit);
330  _unpack_pos.push_back(d);
331  }
332  }
333 
336  MatrixCSR(MatrixCSR&& A) = default;
337 
341  void set(T x) { std::fill(_data.begin(), _data.end(), x); }
342 
355  void set(const std::span<const T>& x,
356  const std::span<const std::int32_t>& rows,
357  const std::span<const std::int32_t>& cols)
358  {
359  impl::set_csr(_data, _cols, _row_ptr, x, rows, cols,
360  _index_maps[0]->size_local());
361  }
362 
375  void add(const std::span<const T>& x,
376  const std::span<const std::int32_t>& rows,
377  const std::span<const std::int32_t>& cols)
378  {
379  impl::add_csr(_data, _cols, _row_ptr, x, rows, cols);
380  }
381 
383  std::int32_t num_owned_rows() const { return _index_maps[0]->size_local(); }
384 
386  std::int32_t num_all_rows() const { return _row_ptr.size() - 1; }
387 
395  std::vector<T> to_dense() const
396  {
397  const std::size_t nrows = num_all_rows();
398  const std::size_t ncols
399  = _index_maps[1]->size_local() + _index_maps[1]->num_ghosts();
400  std::vector<T> A(nrows * ncols);
401  for (std::size_t r = 0; r < nrows; ++r)
402  for (std::int32_t j = _row_ptr[r]; j < _row_ptr[r + 1]; ++j)
403  A[r * ncols + _cols[j]] = _data[j];
404 
405  return A;
406  }
407 
411  void finalize()
412  {
413  finalize_begin();
414  finalize_end();
415  }
416 
425  {
426  const std::int32_t local_size0 = _index_maps[0]->size_local();
427  const std::int32_t num_ghosts0 = _index_maps[0]->num_ghosts();
428 
429  // For each ghost row, pack and send values to send to neighborhood
430  std::vector<int> insert_pos = _val_send_disp;
431  std::vector<T> ghost_value_data(_val_send_disp.back());
432  for (int i = 0; i < num_ghosts0; ++i)
433  {
434  const int rank = _ghost_row_to_rank[i];
435 
436  // Get position in send buffer to place data to send to this
437  // neighbour
438  const std::int32_t val_pos = insert_pos[rank];
439  std::copy(std::next(_data.data(), _row_ptr[local_size0 + i]),
440  std::next(_data.data(), _row_ptr[local_size0 + i + 1]),
441  std::next(ghost_value_data.begin(), val_pos));
442  insert_pos[rank]
443  += _row_ptr[local_size0 + i + 1] - _row_ptr[local_size0 + i];
444  }
445 
446  _ghost_value_data_in.resize(_val_recv_disp.back());
447 
448  // Compute data sizes for send and receive from displacements
449  std::vector<int> val_send_count(_val_send_disp.size() - 1);
450  std::adjacent_difference(std::next(_val_send_disp.begin()),
451  _val_send_disp.end(), val_send_count.begin());
452 
453  std::vector<int> val_recv_count(_val_recv_disp.size() - 1);
454  std::adjacent_difference(std::next(_val_recv_disp.begin()),
455  _val_recv_disp.end(), val_recv_count.begin());
456 
457  int status = MPI_Ineighbor_alltoallv(
458  ghost_value_data.data(), val_send_count.data(), _val_send_disp.data(),
459  dolfinx::MPI::mpi_type<T>(), _ghost_value_data_in.data(),
460  val_recv_count.data(), _val_recv_disp.data(),
461  dolfinx::MPI::mpi_type<T>(), _comm.comm(), &_request);
462  assert(status == MPI_SUCCESS);
463  }
464 
471  {
472  int status = MPI_Wait(&_request, MPI_STATUS_IGNORE);
473  assert(status == MPI_SUCCESS);
474 
475  // Add to local rows
476  assert(_ghost_value_data_in.size() == _unpack_pos.size());
477  for (std::size_t i = 0; i < _ghost_value_data_in.size(); ++i)
478  _data[_unpack_pos[i]] += _ghost_value_data_in[i];
479 
480  // Set ghost row data to zero
481  const std::int32_t local_size0 = _index_maps[0]->size_local();
482  std::fill(std::next(_data.begin(), _row_ptr[local_size0]), _data.end(), 0);
483  }
484 
486  double norm_squared() const
487  {
488  const std::size_t num_owned_rows = _index_maps[0]->size_local();
489  assert(num_owned_rows < _row_ptr.size());
490 
491  const double norm_sq_local = std::accumulate(
492  _data.cbegin(), std::next(_data.cbegin(), _row_ptr[num_owned_rows]),
493  double(0), [](double norm, T y) { return norm + std::norm(y); });
494  double norm_sq;
495  MPI_Allreduce(&norm_sq_local, &norm_sq, 1, MPI_DOUBLE, MPI_SUM,
496  _comm.comm());
497  return norm_sq;
498  }
499 
506  const std::array<std::shared_ptr<const common::IndexMap>, 2>&
507  index_maps() const
508  {
509  return _index_maps;
510  }
511 
514  std::vector<T>& values() { return _data; }
515 
518  const std::vector<T>& values() const { return _data; }
519 
522  const std::vector<std::int32_t>& row_ptr() const { return _row_ptr; }
523 
526  const std::vector<std::int32_t>& cols() const { return _cols; }
527 
535  const std::vector<std::int32_t>& off_diag_offset() const
536  {
537  return _off_diagonal_offset;
538  }
539 
540 private:
541  // Maps for the distribution of the ows and columns
542  std::array<std::shared_ptr<const common::IndexMap>, 2> _index_maps;
543 
544  // Block sizes
545  std::array<int, 2> _bs;
546 
547  // Matrix data
548  std::vector<T, Allocator> _data;
549  std::vector<std::int32_t> _cols, _row_ptr;
550 
551  // Start of off-diagonal (unowned columns) on each row
552  std::vector<std::int32_t> _off_diagonal_offset;
553 
554  // Neighborhood communicator (ghost->owner communicator for rows)
555  dolfinx::MPI::Comm _comm;
556 
557  // -- Precomputed data for finalize/update
558 
559  // Request in non-blocking communication
560  MPI_Request _request;
561 
562  // Position in _data to add received data
563  std::vector<int> _unpack_pos;
564 
565  // Displacements for alltoall for each neighbor when sending and
566  // receiving
567  std::vector<int> _val_send_disp, _val_recv_disp;
568 
569  // Ownership of each row, by neighbor (for the neighbourhood defined
570  // on _comm)
571  std::vector<int> _ghost_row_to_rank;
572 
573  // Temporary store for finalize data during non-blocking communication
574  std::vector<T> _ghost_value_data_in;
575 };
576 
577 } // namespace dolfinx::la
A duplicate MPI communicator and manage lifetime of the communicator.
Definition: MPI.h:41
Distributed sparse matrix.
Definition: MatrixCSR.h:127
auto mat_set_values()
Insertion functor for setting values in matrix. It is typically used in finite element assembly funct...
Definition: MatrixCSR.h:140
void set(const std::span< const T > &x, const std::span< const std::int32_t > &rows, const std::span< const std::int32_t > &cols)
Set values in the matrix.
Definition: MatrixCSR.h:355
std::vector< T > to_dense() const
Copy to a dense matrix.
Definition: MatrixCSR.h:395
Allocator allocator_type
The allocator type.
Definition: MatrixCSR.h:133
void finalize()
Transfer ghost row data to the owning ranks accumulating received values on the owned rows,...
Definition: MatrixCSR.h:411
std::int32_t num_owned_rows() const
Number of local rows excluding ghost rows.
Definition: MatrixCSR.h:383
MatrixCSR(const SparsityPattern &p, const Allocator &alloc=Allocator())
Create a distributed matrix.
Definition: MatrixCSR.h:170
const std::vector< std::int32_t > & cols() const
Get local column indices.
Definition: MatrixCSR.h:526
auto mat_add_values()
Insertion functor for accumulating values in matrix. It is typically used in finite element assembly ...
Definition: MatrixCSR.h:155
void set(T x)
Set all non-zero local entries to a value including entries in ghost rows.
Definition: MatrixCSR.h:341
MatrixCSR(MatrixCSR &&A)=default
Move constructor.
void finalize_end()
End transfer of ghost row data to owning ranks.
Definition: MatrixCSR.h:470
const std::array< std::shared_ptr< const common::IndexMap >, 2 > & index_maps() const
Index maps for the row and column space. The row IndexMap contains ghost entries for rows which may b...
Definition: MatrixCSR.h:507
const std::vector< std::int32_t > & row_ptr() const
Get local row pointers.
Definition: MatrixCSR.h:522
const std::vector< T > & values() const
Get local values (const version)
Definition: MatrixCSR.h:518
void add(const std::span< const T > &x, const std::span< const std::int32_t > &rows, const std::span< const std::int32_t > &cols)
Accumulate values in the matrix.
Definition: MatrixCSR.h:375
std::vector< T > & values()
Get local data values.
Definition: MatrixCSR.h:514
std::int32_t num_all_rows() const
Number of local rows including ghost rows.
Definition: MatrixCSR.h:386
double norm_squared() const
Compute the Frobenius norm squared.
Definition: MatrixCSR.h:486
T value_type
The value type.
Definition: MatrixCSR.h:130
void finalize_begin()
Begin transfer of ghost row data to owning ranks, where it will be accumulated into existing owned ro...
Definition: MatrixCSR.h:424
const std::vector< std::int32_t > & off_diag_offset() const
Get the start of off-diagonal (unowned columns) on each row, allowing the matrix to be split (virtual...
Definition: MatrixCSR.h:535
This class provides a sparsity pattern data structure that can be used to initialize sparse matrices....
Definition: SparsityPattern.h:34
std::shared_ptr< const common::IndexMap > index_map(int dim) const
Index map for given dimension dimension. Returns the index map for rows and columns that will be set ...
Definition: SparsityPattern.cpp:194
int block_size(int dim) const
Return index map block size for dimension dim.
Definition: SparsityPattern.cpp:225
const graph::AdjacencyList< std::int32_t > & graph() const
Sparsity pattern graph after assembly. Uses local indices for the columns.
Definition: SparsityPattern.cpp:415
std::int64_t num_nonzeros() const
Number of nonzeros on this rank after assembly, including ghost rows.
Definition: SparsityPattern.cpp:394
common::IndexMap column_index_map() const
Builds the index map for columns after assembly of the sparsity pattern.
Definition: SparsityPattern.cpp:214
std::span< const int > off_diagonal_offset() const
Row-wise start of off-diagonal (unowned columns) on each row.
Definition: SparsityPattern.cpp:422
int size(MPI_Comm comm)
Return size of the group (number of processes) associated with the communicator.
Definition: MPI.cpp:84
int rank(MPI_Comm comm)
Return process rank for the communicator.
Definition: MPI.cpp:76
constexpr std::array< std::int64_t, 2 > local_range(int rank, std::int64_t N, int size)
Return local range for the calling process, partitioning the global [0, N - 1] range across all ranks...
Definition: MPI.h:83
Linear algebra interface.
Definition: sparsitybuild.h:15
auto norm(const Vector< T, Allocator > &a, Norm type=Norm::l2)
Compute the norm of the vector.
Definition: Vector.h:252