9 #include "SparsityPattern.h"
10 #include <dolfinx/common/MPI.h>
11 #include <dolfinx/graph/AdjacencyList.h>
15 #include <xtensor/xtensor.hpp>
16 #include <xtl/xspan.hpp>
35 template <
typename U,
typename V,
typename W,
typename X>
36 void set_csr(U&& data,
const V& cols,
const V& row_ptr,
const W& x,
37 const X& xrows,
const X& xcols,
38 [[maybe_unused]]
typename X::value_type local_size)
40 assert(x.size() == xrows.size() * xcols.size());
41 for (std::size_t r = 0; r < xrows.size(); ++r)
45 using T =
typename W::value_type;
46 const T* xr = x.data() + r * xcols.size();
49 if (row >= local_size)
50 throw std::runtime_error(
"Local row out of range");
54 auto cit0 = std::next(cols.begin(), row_ptr[row]);
55 auto cit1 = std::next(cols.begin(), row_ptr[row + 1]);
56 for (std::size_t c = 0; c < xcols.size(); ++c)
59 auto it = std::lower_bound(cit0, cit1, xcols[c]);
61 std::size_t d = std::distance(cols.begin(), it);
62 assert(d < data.size());
77 template <
typename U,
typename V,
typename W,
typename X>
78 void add_csr(U&& data,
const V& cols,
const V& row_ptr,
const W& x,
79 const X& xrows,
const X& xcols)
81 assert(x.size() == xrows.size() * xcols.size());
82 for (std::size_t r = 0; r < xrows.size(); ++r)
86 using T =
typename W::value_type;
87 const T* xr = x.data() + r * xcols.size();
90 if (row >= (
int)row_ptr.size())
91 throw std::runtime_error(
"Local row out of range");
95 auto cit0 = std::next(cols.begin(), row_ptr[row]);
96 auto cit1 = std::next(cols.begin(), row_ptr[row + 1]);
97 for (std::size_t c = 0; c < xcols.size(); ++c)
100 auto it = std::lower_bound(cit0, cit1, xcols[c]);
102 std::size_t d = std::distance(cols.begin(), it);
103 assert(d < data.size());
124 template <
typename T,
class Allocator = std::allocator<T>>
141 return [&](
const xtl::span<const std::int32_t>& rows,
142 const xtl::span<const std::int32_t>&
cols,
143 const xtl::span<const T>& data) ->
int
156 return [&](
const xtl::span<const std::int32_t>& rows,
157 const xtl::span<const std::int32_t>&
cols,
158 const xtl::span<const T>& data) ->
int
174 _cols(p.
graph().array().begin(), p.
graph().array().end()),
175 _row_ptr(p.
graph().offsets().begin(), p.
graph().offsets().end()),
176 _comm(p.
index_map(0)->comm(common::IndexMap::Direction::reverse))
179 if (_bs[0] > 1 or _bs[1] > 1)
180 throw std::runtime_error(
"Block size not yet supported");
184 _off_diagonal_offset.reserve(num_diag_nnz.size());
185 std::transform(num_diag_nnz.begin(), num_diag_nnz.end(), _row_ptr.begin(),
186 std::back_inserter(_off_diagonal_offset),
187 std::plus<std::int32_t>());
190 const std::array local_size
191 = {_index_maps[0]->size_local(), _index_maps[1]->size_local()};
192 const std::array local_range
193 = {_index_maps[0]->local_range(), _index_maps[1]->local_range()};
194 const std::int32_t num_ghosts0 = _index_maps[0]->num_ghosts();
195 const std::vector<int> ghost_owners0 = _index_maps[0]->ghost_owner_rank();
196 const std::vector<std::int64_t>& ghosts0 = _index_maps[0]->ghosts();
197 const std::vector<std::int64_t>& ghosts1 = _index_maps[1]->ghosts();
199 const std::vector<int> dest_ranks
201 const int num_neighbors = dest_ranks.size();
204 std::map<int, std::int32_t> dest_proc_to_neighbor;
205 for (std::size_t i = 0; i < dest_ranks.size(); ++i)
206 dest_proc_to_neighbor.insert({dest_ranks[i], i});
209 _ghost_row_to_neighbor_rank.resize(num_ghosts0, -1);
210 for (
int i = 0; i < num_ghosts0; ++i)
212 const auto it = dest_proc_to_neighbor.find(ghost_owners0[i]);
213 assert(it != dest_proc_to_neighbor.end());
214 _ghost_row_to_neighbor_rank[i] = it->second;
218 std::vector<std::int32_t> data_per_proc(num_neighbors, 0);
219 for (
int i = 0; i < num_ghosts0; ++i)
221 assert(_ghost_row_to_neighbor_rank[i] < (
int)data_per_proc.size());
222 data_per_proc[_ghost_row_to_neighbor_rank[i]]
223 += _row_ptr[local_size[0] + i + 1] - _row_ptr[local_size[0] + i];
227 _val_send_disp.resize(num_neighbors + 1, 0);
228 std::partial_sum(data_per_proc.begin(), data_per_proc.end(),
229 std::next(_val_send_disp.begin()));
231 std::vector<int> index_send_disp(num_neighbors + 1);
232 std::transform(_val_send_disp.begin(), _val_send_disp.end(),
233 index_send_disp.begin(), [](
int d) { return d * 2; });
236 std::vector<int> insert_pos(index_send_disp);
237 std::vector<std::int64_t> ghost_index_data(index_send_disp.back());
238 for (
int i = 0; i < num_ghosts0; ++i)
240 const int neighbor_rank = _ghost_row_to_neighbor_rank[i];
241 int row_id = local_size[0] + i;
242 for (
int j = _row_ptr[row_id]; j < _row_ptr[row_id + 1]; ++j)
245 const std::int32_t idx_pos = insert_pos[neighbor_rank];
248 ghost_index_data[idx_pos] = ghosts0[i];
249 if (
const std::int32_t col_local = _cols[j]; col_local < local_size[1])
250 ghost_index_data[idx_pos + 1] = col_local + local_range[1][0];
252 ghost_index_data[idx_pos + 1] = ghosts1[col_local - local_size[1]];
254 insert_pos[neighbor_rank] += 2;
259 const graph::AdjacencyList<std::int64_t> ghost_index_data_out(
260 std::move(ghost_index_data), std::move(index_send_disp));
261 const graph::AdjacencyList<std::int64_t> ghost_index_data_in
265 _val_recv_disp.resize(ghost_index_data_in.offsets().size());
266 std::transform(ghost_index_data_in.offsets().begin(),
267 ghost_index_data_in.offsets().end(), _val_recv_disp.begin(),
268 [](
int d) { return d / 2; });
271 std::map<std::int64_t, std::int32_t> global_to_local;
272 std::int32_t local_i = local_size[1];
273 for (std::int64_t global_i : ghosts1)
274 global_to_local.insert({global_i, local_i++});
278 const std::vector<std::int64_t>& ghost_index_array
279 = ghost_index_data_in.array();
280 for (std::size_t i = 0; i < ghost_index_array.size(); i += 2)
283 const std::int32_t local_row = ghost_index_array[i] -
local_range[0][0];
284 assert(local_row >= 0 and local_row < local_size[0]);
287 std::int32_t local_col = ghost_index_array[i + 1] -
local_range[1][0];
288 if (local_col < 0 or local_col >= local_size[1])
290 const auto it = global_to_local.find(ghost_index_array[i + 1]);
291 assert(it != global_to_local.end());
292 local_col = it->second;
294 auto cit0 = std::next(_cols.begin(), _row_ptr[local_row]);
295 auto cit1 = std::next(_cols.begin(), _row_ptr[local_row + 1]);
298 auto cit = std::lower_bound(cit0, cit1, local_col);
300 assert(*cit == local_col);
301 std::size_t d = std::distance(_cols.begin(), cit);
302 _unpack_pos.push_back(d);
313 void set(T x) { std::fill(_data.begin(), _data.end(), x); }
327 void set(
const xtl::span<const T>& x,
328 const xtl::span<const std::int32_t>& rows,
329 const xtl::span<const std::int32_t>&
cols)
331 impl::set_csr(_data, _cols, _row_ptr, x, rows,
cols,
332 _index_maps[0]->size_local());
347 void add(
const xtl::span<const T>& x,
348 const xtl::span<const std::int32_t>& rows,
349 const xtl::span<const std::int32_t>&
cols)
351 impl::add_csr(_data, _cols, _row_ptr, x, rows,
cols);
369 const std::int32_t ncols
370 = _index_maps[1]->size_local() + _index_maps[1]->num_ghosts();
371 xt::xtensor<T, 2> A = xt::zeros<T>({nrows, ncols});
372 for (std::size_t r = 0; r < nrows; ++r)
373 for (
int j = _row_ptr[r]; j < _row_ptr[r + 1]; ++j)
374 A(r, _cols[j]) = _data[j];
396 const std::int32_t local_size0 = _index_maps[0]->size_local();
397 const std::int32_t num_ghosts0 = _index_maps[0]->num_ghosts();
400 std::vector<int> insert_pos(_val_send_disp);
401 std::vector<T> ghost_value_data(_val_send_disp.back());
402 for (
int i = 0; i < num_ghosts0; ++i)
404 const int neighbor_rank = _ghost_row_to_neighbor_rank[i];
407 const std::int32_t val_pos = insert_pos[neighbor_rank];
408 std::copy(std::next(_data.data(), _row_ptr[local_size0 + i]),
409 std::next(_data.data(), _row_ptr[local_size0 + i + 1]),
410 std::next(ghost_value_data.begin(), val_pos));
411 insert_pos[neighbor_rank]
412 += _row_ptr[local_size0 + i + 1] - _row_ptr[local_size0 + i];
415 _ghost_value_data_in.resize(_val_recv_disp.back());
418 std::vector<int> val_send_count(_val_send_disp.size() - 1);
419 std::adjacent_difference(std::next(_val_send_disp.begin()),
420 _val_send_disp.end(), val_send_count.begin());
422 std::vector<int> val_recv_count(_val_recv_disp.size() - 1);
423 std::adjacent_difference(std::next(_val_recv_disp.begin()),
424 _val_recv_disp.end(), val_recv_count.begin());
426 int status = MPI_Ineighbor_alltoallv(
427 ghost_value_data.data(), val_send_count.data(), _val_send_disp.data(),
428 dolfinx::MPI::mpi_type<T>(), _ghost_value_data_in.data(),
429 val_recv_count.data(), _val_recv_disp.data(),
430 dolfinx::MPI::mpi_type<T>(), _comm.comm(), &_request);
431 assert(status == MPI_SUCCESS);
441 int status = MPI_Wait(&_request, MPI_STATUS_IGNORE);
442 assert(status == MPI_SUCCESS);
445 assert(_ghost_value_data_in.size() == _unpack_pos.size());
446 for (std::size_t i = 0; i < _ghost_value_data_in.size(); ++i)
447 _data[_unpack_pos[i]] += _ghost_value_data_in[i];
450 const std::int32_t local_size0 = _index_maps[0]->size_local();
451 std::fill(std::next(_data.begin(), _row_ptr[local_size0]), _data.end(), 0);
460 const double norm_sq_local = std::accumulate(
461 _data.cbegin(), std::next(_data.cbegin(), _row_ptr[
num_owned_rows]),
462 double(0), [](
double norm, T y) { return norm + std::norm(y); });
464 MPI_Allreduce(&norm_sq_local, &norm_sq, 1, MPI_DOUBLE, MPI_SUM,
475 const std::array<std::shared_ptr<const common::IndexMap>, 2>&
483 std::vector<T>&
values() {
return _data; }
487 const std::vector<T>&
values()
const {
return _data; }
491 const std::vector<std::int32_t>&
row_ptr()
const {
return _row_ptr; }
495 const std::vector<std::int32_t>&
cols()
const {
return _cols; }
506 return _off_diagonal_offset;
511 std::array<std::shared_ptr<const common::IndexMap>, 2> _index_maps;
514 std::array<int, 2> _bs;
517 std::vector<T, Allocator> _data;
518 std::vector<std::int32_t> _cols, _row_ptr;
521 std::vector<std::int32_t> _off_diagonal_offset;
529 MPI_Request _request;
532 std::vector<int> _unpack_pos;
536 std::vector<int> _val_send_disp, _val_recv_disp;
539 std::vector<int> _ghost_row_to_neighbor_rank;
542 std::vector<T> _ghost_value_data_in;
A duplicate MPI communicator and manage lifetime of the communicator.
Definition: MPI.h:40
Distributed sparse matrix.
Definition: MatrixCSR.h:126
auto mat_set_values()
Insertion functor for setting values in matrix. It is typically used in finite element assembly funct...
Definition: MatrixCSR.h:139
Allocator allocator_type
The allocator type.
Definition: MatrixCSR.h:132
void add(const xtl::span< const T > &x, const xtl::span< const std::int32_t > &rows, const xtl::span< const std::int32_t > &cols)
Accumulate values in the matrix.
Definition: MatrixCSR.h:347
void finalize()
Transfer ghost row data to the owning ranks accumulating received values on the owned rows,...
Definition: MatrixCSR.h:381
std::int32_t num_owned_rows() const
Number of local rows excluding ghost rows.
Definition: MatrixCSR.h:355
MatrixCSR(const SparsityPattern &p, const Allocator &alloc=Allocator())
Create a distributed matrix.
Definition: MatrixCSR.h:169
const std::vector< std::int32_t > & cols() const
Get local column indices.
Definition: MatrixCSR.h:495
auto mat_add_values()
Insertion functor for accumulating values in matrix. It is typically used in finite element assembly ...
Definition: MatrixCSR.h:154
void set(T x)
Set all non-zero local entries to a value including entries in ghost rows.
Definition: MatrixCSR.h:313
MatrixCSR(MatrixCSR &&A)=default
Move constructor.
void finalize_end()
End transfer of ghost row data to owning ranks.
Definition: MatrixCSR.h:439
void set(const xtl::span< const T > &x, const xtl::span< const std::int32_t > &rows, const xtl::span< const std::int32_t > &cols)
Set values in the matrix.
Definition: MatrixCSR.h:327
const std::array< std::shared_ptr< const common::IndexMap >, 2 > & index_maps() const
Index maps for the row and column space. The row IndexMap contains ghost entries for rows which may b...
Definition: MatrixCSR.h:476
const std::vector< std::int32_t > & row_ptr() const
Get local row pointers.
Definition: MatrixCSR.h:491
const std::vector< T > & values() const
Get local values (const version)
Definition: MatrixCSR.h:487
xt::xtensor< T, 2 > to_dense() const
Copy to a dense matrix.
Definition: MatrixCSR.h:366
std::vector< T > & values()
Get local data values.
Definition: MatrixCSR.h:483
std::int32_t num_all_rows() const
Number of local rows including ghost rows.
Definition: MatrixCSR.h:358
double norm_squared() const
Compute the Frobenius norm squared.
Definition: MatrixCSR.h:455
T value_type
The value type.
Definition: MatrixCSR.h:129
void finalize_begin()
Begin transfer of ghost row data to owning ranks, where it will be accumulated into existing owned ro...
Definition: MatrixCSR.h:394
const std::vector< std::int32_t > & off_diag_offset() const
Get the start of off-diagonal (unowned columns) on each row, allowing the matrix to be split (virtual...
Definition: MatrixCSR.h:504
This class provides a sparsity pattern data structure that can be used to initialize sparse matrices....
Definition: SparsityPattern.h:34
std::shared_ptr< const common::IndexMap > index_map(int dim) const
Index map for given dimension dimension. Returns the index map for rows and columns that will be set ...
Definition: SparsityPattern.cpp:208
int block_size(int dim) const
Return index map block size for dimension dim.
Definition: SparsityPattern.cpp:245
const graph::AdjacencyList< std::int32_t > & graph() const
Sparsity pattern graph after assembly. Uses local indices for the columns.
Definition: SparsityPattern.cpp:423
std::int64_t num_nonzeros() const
Number of nonzeros on this rank after assembly, including ghost rows.
Definition: SparsityPattern.cpp:402
common::IndexMap column_index_map() const
Builds the index map for columns after assembly of the sparsity pattern.
Definition: SparsityPattern.cpp:228
xtl::span< const int > off_diagonal_offset() const
Row-wise start of off-diagonal (unowned columns) on each row.
Definition: SparsityPattern.cpp:430
std::array< std::vector< int >, 2 > neighbors(MPI_Comm comm)
Return list of neighbors ranks (sources and destinations) for a neighborhood communicator.
Definition: MPI.cpp:224
graph::AdjacencyList< T > neighbor_all_to_all(MPI_Comm comm, const graph::AdjacencyList< T > &send_data)
Send in_values[n0] to neighbor process n0 and receive values from neighbor process n1 in out_values[n...
Definition: MPI.h:682
constexpr std::array< std::int64_t, 2 > local_range(int rank, std::int64_t N, int size)
Return local range for the calling process, partitioning the global [0, N - 1] range across all ranks...
Definition: MPI.h:82
Linear algebra interface.
Definition: sparsitybuild.h:13