Note: this is documentation for an old release. View the latest documentation at docs.fenicsproject.org/dolfinx/v0.9.0/cpp/doxygen/dd/d65/MatrixCSR_8h_source.html
DOLFINx  0.4.0
DOLFINx C++ interface
MatrixCSR.h
1 // Copyright (C) 2021-2022 Garth N. Wells and Chris N. Richardson
2 //
3 // This file is part of DOLFINx (https://www.fenicsproject.org)
4 //
5 // SPDX-License-Identifier: LGPL-3.0-or-later
6 
7 #pragma once
8 
9 #include "SparsityPattern.h"
10 #include <dolfinx/common/MPI.h>
11 #include <dolfinx/graph/AdjacencyList.h>
12 #include <mpi.h>
13 #include <numeric>
14 #include <vector>
15 #include <xtensor/xtensor.hpp>
16 #include <xtl/xspan.hpp>
17 
18 namespace dolfinx::la
19 {
20 
21 namespace impl
22 {
35 template <typename U, typename V, typename W, typename X>
36 void set_csr(U&& data, const V& cols, const V& row_ptr, const W& x,
37  const X& xrows, const X& xcols,
38  [[maybe_unused]] typename X::value_type local_size)
39 {
40  assert(x.size() == xrows.size() * xcols.size());
41  for (std::size_t r = 0; r < xrows.size(); ++r)
42  {
43  // Row index and current data row
44  auto row = xrows[r];
45  using T = typename W::value_type;
46  const T* xr = x.data() + r * xcols.size();
47 
48 #ifndef NDEBUG
49  if (row >= local_size)
50  throw std::runtime_error("Local row out of range");
51 #endif
52 
53  // Columns indices for row
54  auto cit0 = std::next(cols.begin(), row_ptr[row]);
55  auto cit1 = std::next(cols.begin(), row_ptr[row + 1]);
56  for (std::size_t c = 0; c < xcols.size(); ++c)
57  {
58  // Find position of column index
59  auto it = std::lower_bound(cit0, cit1, xcols[c]);
60  assert(it != cit1);
61  std::size_t d = std::distance(cols.begin(), it);
62  assert(d < data.size());
63  data[d] = xr[c];
64  }
65  }
66 }
67 
77 template <typename U, typename V, typename W, typename X>
78 void add_csr(U&& data, const V& cols, const V& row_ptr, const W& x,
79  const X& xrows, const X& xcols)
80 {
81  assert(x.size() == xrows.size() * xcols.size());
82  for (std::size_t r = 0; r < xrows.size(); ++r)
83  {
84  // Row index and current data row
85  auto row = xrows[r];
86  using T = typename W::value_type;
87  const T* xr = x.data() + r * xcols.size();
88 
89 #ifndef NDEBUG
90  if (row >= (int)row_ptr.size())
91  throw std::runtime_error("Local row out of range");
92 #endif
93 
94  // Columns indices for row
95  auto cit0 = std::next(cols.begin(), row_ptr[row]);
96  auto cit1 = std::next(cols.begin(), row_ptr[row + 1]);
97  for (std::size_t c = 0; c < xcols.size(); ++c)
98  {
99  // Find position of column index
100  auto it = std::lower_bound(cit0, cit1, xcols[c]);
101  assert(it != cit1);
102  std::size_t d = std::distance(cols.begin(), it);
103  assert(d < data.size());
104  data[d] += xr[c];
105  }
106  }
107 }
108 } // namespace impl
109 
124 template <typename T, class Allocator = std::allocator<T>>
126 {
127 public:
129  using value_type = T;
130 
132  using allocator_type = Allocator;
133 
140  {
141  return [&](const xtl::span<const std::int32_t>& rows,
142  const xtl::span<const std::int32_t>& cols,
143  const xtl::span<const T>& data) -> int
144  {
145  this->set(data, rows, cols);
146  return 0;
147  };
148  }
149 
155  {
156  return [&](const xtl::span<const std::int32_t>& rows,
157  const xtl::span<const std::int32_t>& cols,
158  const xtl::span<const T>& data) -> int
159  {
160  this->add(data, rows, cols);
161  return 0;
162  };
163  }
164 
169  MatrixCSR(const SparsityPattern& p, const Allocator& alloc = Allocator())
170  : _index_maps({p.index_map(0),
171  std::make_shared<common::IndexMap>(p.column_index_map())}),
172  _bs({p.block_size(0), p.block_size(1)}),
173  _data(p.num_nonzeros(), 0, alloc),
174  _cols(p.graph().array().begin(), p.graph().array().end()),
175  _row_ptr(p.graph().offsets().begin(), p.graph().offsets().end()),
176  _comm(p.index_map(0)->comm(common::IndexMap::Direction::reverse))
177  {
178  // TODO: handle block sizes
179  if (_bs[0] > 1 or _bs[1] > 1)
180  throw std::runtime_error("Block size not yet supported");
181 
182  // Compute off-diagonal offset for each row
183  xtl::span<const std::int32_t> num_diag_nnz = p.off_diagonal_offset();
184  _off_diagonal_offset.reserve(num_diag_nnz.size());
185  std::transform(num_diag_nnz.begin(), num_diag_nnz.end(), _row_ptr.begin(),
186  std::back_inserter(_off_diagonal_offset),
187  std::plus<std::int32_t>());
188 
189  // Precompute some data for ghost updates via MPI
190  const std::array local_size
191  = {_index_maps[0]->size_local(), _index_maps[1]->size_local()};
192  const std::array local_range
193  = {_index_maps[0]->local_range(), _index_maps[1]->local_range()};
194  const std::int32_t num_ghosts0 = _index_maps[0]->num_ghosts();
195  const std::vector<int> ghost_owners0 = _index_maps[0]->ghost_owner_rank();
196  const std::vector<std::int64_t>& ghosts0 = _index_maps[0]->ghosts();
197  const std::vector<std::int64_t>& ghosts1 = _index_maps[1]->ghosts();
198 
199  const std::vector<int> dest_ranks
200  = dolfinx::MPI::neighbors(_comm.comm())[1];
201  const int num_neighbors = dest_ranks.size();
202 
203  // Global-to-local ranks for neighborhood
204  std::map<int, std::int32_t> dest_proc_to_neighbor;
205  for (std::size_t i = 0; i < dest_ranks.size(); ++i)
206  dest_proc_to_neighbor.insert({dest_ranks[i], i});
207 
208  // Ownership of each ghost row using neighbor rank
209  _ghost_row_to_neighbor_rank.resize(num_ghosts0, -1);
210  for (int i = 0; i < num_ghosts0; ++i)
211  {
212  const auto it = dest_proc_to_neighbor.find(ghost_owners0[i]);
213  assert(it != dest_proc_to_neighbor.end());
214  _ghost_row_to_neighbor_rank[i] = it->second;
215  }
216 
217  // Compute size of data to send to each neighbor
218  std::vector<std::int32_t> data_per_proc(num_neighbors, 0);
219  for (int i = 0; i < num_ghosts0; ++i)
220  {
221  assert(_ghost_row_to_neighbor_rank[i] < (int)data_per_proc.size());
222  data_per_proc[_ghost_row_to_neighbor_rank[i]]
223  += _row_ptr[local_size[0] + i + 1] - _row_ptr[local_size[0] + i];
224  }
225 
226  // Compute send displacements for values and indices (x2)
227  _val_send_disp.resize(num_neighbors + 1, 0);
228  std::partial_sum(data_per_proc.begin(), data_per_proc.end(),
229  std::next(_val_send_disp.begin()));
230 
231  std::vector<int> index_send_disp(num_neighbors + 1);
232  std::transform(_val_send_disp.begin(), _val_send_disp.end(),
233  index_send_disp.begin(), [](int d) { return d * 2; });
234 
235  // For each ghost row, pack and send indices to neighborhood
236  std::vector<int> insert_pos(index_send_disp);
237  std::vector<std::int64_t> ghost_index_data(index_send_disp.back());
238  for (int i = 0; i < num_ghosts0; ++i)
239  {
240  const int neighbor_rank = _ghost_row_to_neighbor_rank[i];
241  int row_id = local_size[0] + i;
242  for (int j = _row_ptr[row_id]; j < _row_ptr[row_id + 1]; ++j)
243  {
244  // Get position in send buffer to place data for this neighbour
245  const std::int32_t idx_pos = insert_pos[neighbor_rank];
246 
247  // Pack send data (row, col) as global indices
248  ghost_index_data[idx_pos] = ghosts0[i];
249  if (const std::int32_t col_local = _cols[j]; col_local < local_size[1])
250  ghost_index_data[idx_pos + 1] = col_local + local_range[1][0];
251  else
252  ghost_index_data[idx_pos + 1] = ghosts1[col_local - local_size[1]];
253 
254  insert_pos[neighbor_rank] += 2;
255  }
256  }
257 
258  // Create and communicate adjacency list to neighborhood
259  const graph::AdjacencyList<std::int64_t> ghost_index_data_out(
260  std::move(ghost_index_data), std::move(index_send_disp));
261  const graph::AdjacencyList<std::int64_t> ghost_index_data_in
262  = dolfinx::MPI::neighbor_all_to_all(_comm.comm(), ghost_index_data_out);
263 
264  // Store received offsets for future use, when transferring data values.
265  _val_recv_disp.resize(ghost_index_data_in.offsets().size());
266  std::transform(ghost_index_data_in.offsets().begin(),
267  ghost_index_data_in.offsets().end(), _val_recv_disp.begin(),
268  [](int d) { return d / 2; });
269 
270  // Global to local map for ghost columns
271  std::map<std::int64_t, std::int32_t> global_to_local;
272  std::int32_t local_i = local_size[1];
273  for (std::int64_t global_i : ghosts1)
274  global_to_local.insert({global_i, local_i++});
275 
276  // Compute location in which data for each index should be stored
277  // when received
278  const std::vector<std::int64_t>& ghost_index_array
279  = ghost_index_data_in.array();
280  for (std::size_t i = 0; i < ghost_index_array.size(); i += 2)
281  {
282  // Row must be on this process
283  const std::int32_t local_row = ghost_index_array[i] - local_range[0][0];
284  assert(local_row >= 0 and local_row < local_size[0]);
285 
286  // Column may be owned or unowned
287  std::int32_t local_col = ghost_index_array[i + 1] - local_range[1][0];
288  if (local_col < 0 or local_col >= local_size[1])
289  {
290  const auto it = global_to_local.find(ghost_index_array[i + 1]);
291  assert(it != global_to_local.end());
292  local_col = it->second;
293  }
294  auto cit0 = std::next(_cols.begin(), _row_ptr[local_row]);
295  auto cit1 = std::next(_cols.begin(), _row_ptr[local_row + 1]);
296 
297  // Find position of column index and insert data
298  auto cit = std::lower_bound(cit0, cit1, local_col);
299  assert(cit != cit1);
300  assert(*cit == local_col);
301  std::size_t d = std::distance(_cols.begin(), cit);
302  _unpack_pos.push_back(d);
303  }
304  }
305 
308  MatrixCSR(MatrixCSR&& A) = default;
309 
313  void set(T x) { std::fill(_data.begin(), _data.end(), x); }
314 
327  void set(const xtl::span<const T>& x,
328  const xtl::span<const std::int32_t>& rows,
329  const xtl::span<const std::int32_t>& cols)
330  {
331  impl::set_csr(_data, _cols, _row_ptr, x, rows, cols,
332  _index_maps[0]->size_local());
333  }
334 
347  void add(const xtl::span<const T>& x,
348  const xtl::span<const std::int32_t>& rows,
349  const xtl::span<const std::int32_t>& cols)
350  {
351  impl::add_csr(_data, _cols, _row_ptr, x, rows, cols);
352  }
353 
355  std::int32_t num_owned_rows() const { return _index_maps[0]->size_local(); }
356 
358  std::int32_t num_all_rows() const { return _row_ptr.size() - 1; }
359 
366  xt::xtensor<T, 2> to_dense() const
367  {
368  const std::int32_t nrows = num_all_rows();
369  const std::int32_t ncols
370  = _index_maps[1]->size_local() + _index_maps[1]->num_ghosts();
371  xt::xtensor<T, 2> A = xt::zeros<T>({nrows, ncols});
372  for (std::size_t r = 0; r < nrows; ++r)
373  for (int j = _row_ptr[r]; j < _row_ptr[r + 1]; ++j)
374  A(r, _cols[j]) = _data[j];
375  return A;
376  }
377 
381  void finalize()
382  {
383  finalize_begin();
384  finalize_end();
385  }
386 
395  {
396  const std::int32_t local_size0 = _index_maps[0]->size_local();
397  const std::int32_t num_ghosts0 = _index_maps[0]->num_ghosts();
398 
399  // For each ghost row, pack and send values to send to neighborhood
400  std::vector<int> insert_pos(_val_send_disp);
401  std::vector<T> ghost_value_data(_val_send_disp.back());
402  for (int i = 0; i < num_ghosts0; ++i)
403  {
404  const int neighbor_rank = _ghost_row_to_neighbor_rank[i];
405 
406  // Get position in send buffer to place data to send to this neighbour
407  const std::int32_t val_pos = insert_pos[neighbor_rank];
408  std::copy(std::next(_data.data(), _row_ptr[local_size0 + i]),
409  std::next(_data.data(), _row_ptr[local_size0 + i + 1]),
410  std::next(ghost_value_data.begin(), val_pos));
411  insert_pos[neighbor_rank]
412  += _row_ptr[local_size0 + i + 1] - _row_ptr[local_size0 + i];
413  }
414 
415  _ghost_value_data_in.resize(_val_recv_disp.back());
416 
417  // Compute data sizes for send and receive from displacements
418  std::vector<int> val_send_count(_val_send_disp.size() - 1);
419  std::adjacent_difference(std::next(_val_send_disp.begin()),
420  _val_send_disp.end(), val_send_count.begin());
421 
422  std::vector<int> val_recv_count(_val_recv_disp.size() - 1);
423  std::adjacent_difference(std::next(_val_recv_disp.begin()),
424  _val_recv_disp.end(), val_recv_count.begin());
425 
426  int status = MPI_Ineighbor_alltoallv(
427  ghost_value_data.data(), val_send_count.data(), _val_send_disp.data(),
428  dolfinx::MPI::mpi_type<T>(), _ghost_value_data_in.data(),
429  val_recv_count.data(), _val_recv_disp.data(),
430  dolfinx::MPI::mpi_type<T>(), _comm.comm(), &_request);
431  assert(status == MPI_SUCCESS);
432  }
433 
440  {
441  int status = MPI_Wait(&_request, MPI_STATUS_IGNORE);
442  assert(status == MPI_SUCCESS);
443 
444  // Add to local rows
445  assert(_ghost_value_data_in.size() == _unpack_pos.size());
446  for (std::size_t i = 0; i < _ghost_value_data_in.size(); ++i)
447  _data[_unpack_pos[i]] += _ghost_value_data_in[i];
448 
449  // Set ghost row data to zero
450  const std::int32_t local_size0 = _index_maps[0]->size_local();
451  std::fill(std::next(_data.begin(), _row_ptr[local_size0]), _data.end(), 0);
452  }
453 
455  double norm_squared() const
456  {
457  const std::size_t num_owned_rows = _index_maps[0]->size_local();
458  assert(num_owned_rows < _row_ptr.size());
459 
460  const double norm_sq_local = std::accumulate(
461  _data.cbegin(), std::next(_data.cbegin(), _row_ptr[num_owned_rows]),
462  double(0), [](double norm, T y) { return norm + std::norm(y); });
463  double norm_sq;
464  MPI_Allreduce(&norm_sq_local, &norm_sq, 1, MPI_DOUBLE, MPI_SUM,
465  _comm.comm());
466  return norm_sq;
467  }
468 
475  const std::array<std::shared_ptr<const common::IndexMap>, 2>&
476  index_maps() const
477  {
478  return _index_maps;
479  }
480 
483  std::vector<T>& values() { return _data; }
484 
487  const std::vector<T>& values() const { return _data; }
488 
491  const std::vector<std::int32_t>& row_ptr() const { return _row_ptr; }
492 
495  const std::vector<std::int32_t>& cols() const { return _cols; }
496 
504  const std::vector<std::int32_t>& off_diag_offset() const
505  {
506  return _off_diagonal_offset;
507  }
508 
509 private:
510  // Maps describing the data layout for rows and columns
511  std::array<std::shared_ptr<const common::IndexMap>, 2> _index_maps;
512 
513  // Block sizes
514  std::array<int, 2> _bs;
515 
516  // Matrix data
517  std::vector<T, Allocator> _data;
518  std::vector<std::int32_t> _cols, _row_ptr;
519 
520  // Start of off-diagonal (unowned columns) on each row
521  std::vector<std::int32_t> _off_diagonal_offset;
522 
523  // Neighborhood communicator (ghost->owner communicator for rows)
524  dolfinx::MPI::Comm _comm;
525 
526  // -- Precomputed data for finalize/update
527 
528  // Request in non-blocking communication
529  MPI_Request _request;
530 
531  // Position in _data to add received data
532  std::vector<int> _unpack_pos;
533 
534  // Displacements for alltoall for each neighbor when sending and
535  // receiving
536  std::vector<int> _val_send_disp, _val_recv_disp;
537 
538  // Ownership of each row, by neighbor
539  std::vector<int> _ghost_row_to_neighbor_rank;
540 
541  // Temporary store for finalize data during non-blocking communication
542  std::vector<T> _ghost_value_data_in;
543 };
544 
545 } // namespace dolfinx::la
A duplicate MPI communicator and manage lifetime of the communicator.
Definition: MPI.h:40
Distributed sparse matrix.
Definition: MatrixCSR.h:126
auto mat_set_values()
Insertion functor for setting values in matrix. It is typically used in finite element assembly funct...
Definition: MatrixCSR.h:139
Allocator allocator_type
The allocator type.
Definition: MatrixCSR.h:132
void add(const xtl::span< const T > &x, const xtl::span< const std::int32_t > &rows, const xtl::span< const std::int32_t > &cols)
Accumulate values in the matrix.
Definition: MatrixCSR.h:347
void finalize()
Transfer ghost row data to the owning ranks accumulating received values on the owned rows,...
Definition: MatrixCSR.h:381
std::int32_t num_owned_rows() const
Number of local rows excluding ghost rows.
Definition: MatrixCSR.h:355
MatrixCSR(const SparsityPattern &p, const Allocator &alloc=Allocator())
Create a distributed matrix.
Definition: MatrixCSR.h:169
const std::vector< std::int32_t > & cols() const
Get local column indices.
Definition: MatrixCSR.h:495
auto mat_add_values()
Insertion functor for accumulating values in matrix. It is typically used in finite element assembly ...
Definition: MatrixCSR.h:154
void set(T x)
Set all non-zero local entries to a value including entries in ghost rows.
Definition: MatrixCSR.h:313
MatrixCSR(MatrixCSR &&A)=default
Move constructor.
void finalize_end()
End transfer of ghost row data to owning ranks.
Definition: MatrixCSR.h:439
void set(const xtl::span< const T > &x, const xtl::span< const std::int32_t > &rows, const xtl::span< const std::int32_t > &cols)
Set values in the matrix.
Definition: MatrixCSR.h:327
const std::array< std::shared_ptr< const common::IndexMap >, 2 > & index_maps() const
Index maps for the row and column space. The row IndexMap contains ghost entries for rows which may b...
Definition: MatrixCSR.h:476
const std::vector< std::int32_t > & row_ptr() const
Get local row pointers.
Definition: MatrixCSR.h:491
const std::vector< T > & values() const
Get local values (const version)
Definition: MatrixCSR.h:487
xt::xtensor< T, 2 > to_dense() const
Copy to a dense matrix.
Definition: MatrixCSR.h:366
std::vector< T > & values()
Get local data values.
Definition: MatrixCSR.h:483
std::int32_t num_all_rows() const
Number of local rows including ghost rows.
Definition: MatrixCSR.h:358
double norm_squared() const
Compute the Frobenius norm squared.
Definition: MatrixCSR.h:455
T value_type
The value type.
Definition: MatrixCSR.h:129
void finalize_begin()
Begin transfer of ghost row data to owning ranks, where it will be accumulated into existing owned ro...
Definition: MatrixCSR.h:394
const std::vector< std::int32_t > & off_diag_offset() const
Get the start of off-diagonal (unowned columns) on each row, allowing the matrix to be split (virtual...
Definition: MatrixCSR.h:504
This class provides a sparsity pattern data structure that can be used to initialize sparse matrices....
Definition: SparsityPattern.h:34
std::shared_ptr< const common::IndexMap > index_map(int dim) const
Index map for given dimension dimension. Returns the index map for rows and columns that will be set ...
Definition: SparsityPattern.cpp:208
int block_size(int dim) const
Return index map block size for dimension dim.
Definition: SparsityPattern.cpp:245
const graph::AdjacencyList< std::int32_t > & graph() const
Sparsity pattern graph after assembly. Uses local indices for the columns.
Definition: SparsityPattern.cpp:423
std::int64_t num_nonzeros() const
Number of nonzeros on this rank after assembly, including ghost rows.
Definition: SparsityPattern.cpp:402
common::IndexMap column_index_map() const
Builds the index map for columns after assembly of the sparsity pattern.
Definition: SparsityPattern.cpp:228
xtl::span< const int > off_diagonal_offset() const
Row-wise start of off-diagonal (unowned columns) on each row.
Definition: SparsityPattern.cpp:430
std::array< std::vector< int >, 2 > neighbors(MPI_Comm comm)
Return list of neighbors ranks (sources and destinations) for a neighborhood communicator.
Definition: MPI.cpp:224
graph::AdjacencyList< T > neighbor_all_to_all(MPI_Comm comm, const graph::AdjacencyList< T > &send_data)
Send in_values[n0] to neighbor process n0 and receive values from neighbor process n1 in out_values[n...
Definition: MPI.h:682
constexpr std::array< std::int64_t, 2 > local_range(int rank, std::int64_t N, int size)
Return local range for the calling process, partitioning the global [0, N - 1] range across all ranks...
Definition: MPI.h:82
Linear algebra interface.
Definition: sparsitybuild.h:13