9 #include "SparsityPattern.h"
10 #include <dolfinx/common/MPI.h>
11 #include <dolfinx/graph/AdjacencyList.h>
15 #include <xtensor/xtensor.hpp>
16 #include <xtl/xspan.hpp>
35 template <
typename U,
typename V,
typename W,
typename X>
36 void set_csr(U&& data,
const V& cols,
const V& row_ptr,
const W& x,
37 const X& xrows,
const X& xcols,
38 [[maybe_unused]]
typename X::value_type local_size)
40 assert(x.size() == xrows.size() * xcols.size());
41 for (std::size_t r = 0; r < xrows.size(); ++r)
45 using T =
typename W::value_type;
46 const T* xr = x.data() + r * xcols.size();
49 if (row >= local_size)
50 throw std::runtime_error(
"Local row out of range");
54 auto cit0 = std::next(cols.begin(), row_ptr[row]);
55 auto cit1 = std::next(cols.begin(), row_ptr[row + 1]);
56 for (std::size_t c = 0; c < xcols.size(); ++c)
59 auto it = std::lower_bound(cit0, cit1, xcols[c]);
61 std::size_t d = std::distance(cols.begin(), it);
62 assert(d < data.size());
77 template <
typename U,
typename V,
typename W,
typename X>
78 void add_csr(U&& data,
const V& cols,
const V& row_ptr,
const W& x,
79 const X& xrows,
const X& xcols)
81 assert(x.size() == xrows.size() * xcols.size());
82 for (std::size_t r = 0; r < xrows.size(); ++r)
86 using T =
typename W::value_type;
87 const T* xr = x.data() + r * xcols.size();
90 if (row >= (
int)row_ptr.size())
91 throw std::runtime_error(
"Local row out of range");
95 auto cit0 = std::next(cols.begin(), row_ptr[row]);
96 auto cit1 = std::next(cols.begin(), row_ptr[row + 1]);
97 for (std::size_t c = 0; c < xcols.size(); ++c)
100 auto it = std::lower_bound(cit0, cit1, xcols[c]);
102 std::size_t d = std::distance(cols.begin(), it);
103 assert(d < data.size());
124 template <
typename T,
class Allocator = std::allocator<T>>
141 return [&](
const xtl::span<const std::int32_t>& rows,
142 const xtl::span<const std::int32_t>&
cols,
143 const xtl::span<const T>& data) ->
int
156 return [&](
const xtl::span<const std::int32_t>& rows,
157 const xtl::span<const std::int32_t>&
cols,
158 const xtl::span<const T>& data) ->
int
174 _cols(p.
graph().array().begin(), p.
graph().array().end()),
175 _row_ptr(p.
graph().offsets().begin(), p.
graph().offsets().end()),
176 _comm(p.
index_map(0)->comm(common::IndexMap::Direction::reverse)),
177 _ghost_row_to_neighbor_rank(_index_maps[0]->ghost_owners())
180 if (_bs[0] > 1 or _bs[1] > 1)
181 throw std::runtime_error(
"Block size not yet supported");
185 _off_diagonal_offset.reserve(num_diag_nnz.size());
186 std::transform(num_diag_nnz.begin(), num_diag_nnz.end(), _row_ptr.begin(),
187 std::back_inserter(_off_diagonal_offset),
188 std::plus<std::int32_t>());
191 const std::array local_size
192 = {_index_maps[0]->size_local(), _index_maps[1]->size_local()};
193 const std::array local_range
194 = {_index_maps[0]->local_range(), _index_maps[1]->local_range()};
196 const std::vector<std::int64_t>& ghosts0 = _index_maps[0]->ghosts();
197 const std::vector<std::int64_t>& ghosts1 = _index_maps[1]->ghosts();
201 int indegree(-1), weighted(-1);
202 MPI_Dist_graph_neighbors_count(_comm.comm(), &indegree, &outdegree,
207 std::vector<std::int32_t> data_per_proc(outdegree, 0);
208 for (std::size_t i = 0; i < _ghost_row_to_neighbor_rank.size(); ++i)
210 assert(_ghost_row_to_neighbor_rank[i] < data_per_proc.size());
211 data_per_proc[_ghost_row_to_neighbor_rank[i]]
212 += _row_ptr[local_size[0] + i + 1] - _row_ptr[local_size[0] + i];
216 _val_send_disp.resize(outdegree + 1, 0);
217 std::partial_sum(data_per_proc.begin(), data_per_proc.end(),
218 std::next(_val_send_disp.begin()));
220 std::vector<int> index_send_disp(outdegree + 1);
221 std::transform(_val_send_disp.begin(), _val_send_disp.end(),
222 index_send_disp.begin(), [](
int d) { return d * 2; });
225 std::vector<int> insert_pos(index_send_disp);
226 std::vector<std::int64_t> ghost_index_data(index_send_disp.back());
227 for (std::size_t i = 0; i < _ghost_row_to_neighbor_rank.size(); ++i)
229 const int neighbor_rank = _ghost_row_to_neighbor_rank[i];
230 int row_id = local_size[0] + i;
231 for (
int j = _row_ptr[row_id]; j < _row_ptr[row_id + 1]; ++j)
234 const std::int32_t idx_pos = insert_pos[neighbor_rank];
237 ghost_index_data[idx_pos] = ghosts0[i];
238 if (
const std::int32_t col_local = _cols[j]; col_local < local_size[1])
239 ghost_index_data[idx_pos + 1] = col_local + local_range[1][0];
241 ghost_index_data[idx_pos + 1] = ghosts1[col_local - local_size[1]];
243 insert_pos[neighbor_rank] += 2;
248 const graph::AdjacencyList<std::int64_t> ghost_index_data_out(
249 std::move(ghost_index_data), std::move(index_send_disp));
250 const graph::AdjacencyList<std::int64_t> ghost_index_data_in
254 _val_recv_disp.resize(ghost_index_data_in.offsets().size());
255 std::transform(ghost_index_data_in.offsets().begin(),
256 ghost_index_data_in.offsets().end(), _val_recv_disp.begin(),
257 [](
int d) { return d / 2; });
260 std::map<std::int64_t, std::int32_t> global_to_local;
261 std::int32_t local_i = local_size[1];
262 for (std::int64_t global_i : ghosts1)
263 global_to_local.insert({global_i, local_i++});
267 const std::vector<std::int64_t>& ghost_index_array
268 = ghost_index_data_in.array();
269 for (std::size_t i = 0; i < ghost_index_array.size(); i += 2)
272 const std::int32_t local_row = ghost_index_array[i] -
local_range[0][0];
273 assert(local_row >= 0 and local_row < local_size[0]);
276 std::int32_t local_col = ghost_index_array[i + 1] -
local_range[1][0];
277 if (local_col < 0 or local_col >= local_size[1])
279 const auto it = global_to_local.find(ghost_index_array[i + 1]);
280 assert(it != global_to_local.end());
281 local_col = it->second;
283 auto cit0 = std::next(_cols.begin(), _row_ptr[local_row]);
284 auto cit1 = std::next(_cols.begin(), _row_ptr[local_row + 1]);
287 auto cit = std::lower_bound(cit0, cit1, local_col);
289 assert(*cit == local_col);
290 std::size_t d = std::distance(_cols.begin(), cit);
291 _unpack_pos.push_back(d);
302 void set(T x) { std::fill(_data.begin(), _data.end(), x); }
316 void set(
const xtl::span<const T>& x,
317 const xtl::span<const std::int32_t>& rows,
318 const xtl::span<const std::int32_t>&
cols)
320 impl::set_csr(_data, _cols, _row_ptr, x, rows,
cols,
321 _index_maps[0]->size_local());
336 void add(
const xtl::span<const T>& x,
337 const xtl::span<const std::int32_t>& rows,
338 const xtl::span<const std::int32_t>&
cols)
340 impl::add_csr(_data, _cols, _row_ptr, x, rows,
cols);
358 const std::int32_t ncols
359 = _index_maps[1]->size_local() + _index_maps[1]->num_ghosts();
360 xt::xtensor<T, 2> A = xt::zeros<T>({nrows, ncols});
361 for (std::size_t r = 0; r < nrows; ++r)
362 for (
int j = _row_ptr[r]; j < _row_ptr[r + 1]; ++j)
363 A(r, _cols[j]) = _data[j];
385 const std::int32_t local_size0 = _index_maps[0]->size_local();
386 const std::int32_t num_ghosts0 = _index_maps[0]->num_ghosts();
389 std::vector<int> insert_pos(_val_send_disp);
390 std::vector<T> ghost_value_data(_val_send_disp.back());
391 for (
int i = 0; i < num_ghosts0; ++i)
393 const int neighbor_rank = _ghost_row_to_neighbor_rank[i];
396 const std::int32_t val_pos = insert_pos[neighbor_rank];
397 std::copy(std::next(_data.data(), _row_ptr[local_size0 + i]),
398 std::next(_data.data(), _row_ptr[local_size0 + i + 1]),
399 std::next(ghost_value_data.begin(), val_pos));
400 insert_pos[neighbor_rank]
401 += _row_ptr[local_size0 + i + 1] - _row_ptr[local_size0 + i];
404 _ghost_value_data_in.resize(_val_recv_disp.back());
407 std::vector<int> val_send_count(_val_send_disp.size() - 1);
408 std::adjacent_difference(std::next(_val_send_disp.begin()),
409 _val_send_disp.end(), val_send_count.begin());
411 std::vector<int> val_recv_count(_val_recv_disp.size() - 1);
412 std::adjacent_difference(std::next(_val_recv_disp.begin()),
413 _val_recv_disp.end(), val_recv_count.begin());
415 int status = MPI_Ineighbor_alltoallv(
416 ghost_value_data.data(), val_send_count.data(), _val_send_disp.data(),
417 dolfinx::MPI::mpi_type<T>(), _ghost_value_data_in.data(),
418 val_recv_count.data(), _val_recv_disp.data(),
419 dolfinx::MPI::mpi_type<T>(), _comm.comm(), &_request);
420 assert(status == MPI_SUCCESS);
430 int status = MPI_Wait(&_request, MPI_STATUS_IGNORE);
431 assert(status == MPI_SUCCESS);
434 assert(_ghost_value_data_in.size() == _unpack_pos.size());
435 for (std::size_t i = 0; i < _ghost_value_data_in.size(); ++i)
436 _data[_unpack_pos[i]] += _ghost_value_data_in[i];
439 const std::int32_t local_size0 = _index_maps[0]->size_local();
440 std::fill(std::next(_data.begin(), _row_ptr[local_size0]), _data.end(), 0);
449 const double norm_sq_local = std::accumulate(
450 _data.cbegin(), std::next(_data.cbegin(), _row_ptr[
num_owned_rows]),
451 double(0), [](
double norm, T y) { return norm + std::norm(y); });
453 MPI_Allreduce(&norm_sq_local, &norm_sq, 1, MPI_DOUBLE, MPI_SUM,
464 const std::array<std::shared_ptr<const common::IndexMap>, 2>&
472 std::vector<T>&
values() {
return _data; }
476 const std::vector<T>&
values()
const {
return _data; }
480 const std::vector<std::int32_t>&
row_ptr()
const {
return _row_ptr; }
484 const std::vector<std::int32_t>&
cols()
const {
return _cols; }
495 return _off_diagonal_offset;
500 std::array<std::shared_ptr<const common::IndexMap>, 2> _index_maps;
503 std::array<int, 2> _bs;
506 std::vector<T, Allocator> _data;
507 std::vector<std::int32_t> _cols, _row_ptr;
510 std::vector<std::int32_t> _off_diagonal_offset;
518 MPI_Request _request;
521 std::vector<int> _unpack_pos;
525 std::vector<int> _val_send_disp, _val_recv_disp;
528 std::vector<int> _ghost_row_to_neighbor_rank;
531 std::vector<T> _ghost_value_data_in;
A duplicate MPI communicator and manage lifetime of the communicator.
Definition: MPI.h:40
Distributed sparse matrix.
Definition: MatrixCSR.h:126
auto mat_set_values()
Insertion functor for setting values in matrix. It is typically used in finite element assembly funct...
Definition: MatrixCSR.h:139
Allocator allocator_type
The allocator type.
Definition: MatrixCSR.h:132
void add(const xtl::span< const T > &x, const xtl::span< const std::int32_t > &rows, const xtl::span< const std::int32_t > &cols)
Accumulate values in the matrix.
Definition: MatrixCSR.h:336
void finalize()
Transfer ghost row data to the owning ranks accumulating received values on the owned rows,...
Definition: MatrixCSR.h:370
std::int32_t num_owned_rows() const
Number of local rows excluding ghost rows.
Definition: MatrixCSR.h:344
MatrixCSR(const SparsityPattern &p, const Allocator &alloc=Allocator())
Create a distributed matrix.
Definition: MatrixCSR.h:169
const std::vector< std::int32_t > & cols() const
Get local column indices.
Definition: MatrixCSR.h:484
auto mat_add_values()
Insertion functor for accumulating values in matrix. It is typically used in finite element assembly ...
Definition: MatrixCSR.h:154
void set(T x)
Set all non-zero local entries to a value including entries in ghost rows.
Definition: MatrixCSR.h:302
MatrixCSR(MatrixCSR &&A)=default
Move constructor.
void finalize_end()
End transfer of ghost row data to owning ranks.
Definition: MatrixCSR.h:428
void set(const xtl::span< const T > &x, const xtl::span< const std::int32_t > &rows, const xtl::span< const std::int32_t > &cols)
Set values in the matrix.
Definition: MatrixCSR.h:316
const std::array< std::shared_ptr< const common::IndexMap >, 2 > & index_maps() const
Index maps for the row and column space. The row IndexMap contains ghost entries for rows which may b...
Definition: MatrixCSR.h:465
const std::vector< std::int32_t > & row_ptr() const
Get local row pointers.
Definition: MatrixCSR.h:480
const std::vector< T > & values() const
Get local values (const version)
Definition: MatrixCSR.h:476
xt::xtensor< T, 2 > to_dense() const
Copy to a dense matrix.
Definition: MatrixCSR.h:355
std::vector< T > & values()
Get local data values.
Definition: MatrixCSR.h:472
std::int32_t num_all_rows() const
Number of local rows including ghost rows.
Definition: MatrixCSR.h:347
double norm_squared() const
Compute the Frobenius norm squared.
Definition: MatrixCSR.h:444
T value_type
The value type.
Definition: MatrixCSR.h:129
void finalize_begin()
Begin transfer of ghost row data to owning ranks, where it will be accumulated into existing owned ro...
Definition: MatrixCSR.h:383
const std::vector< std::int32_t > & off_diag_offset() const
Get the start of off-diagonal (unowned columns) on each row, allowing the matrix to be split (virtual...
Definition: MatrixCSR.h:493
This class provides a sparsity pattern data structure that can be used to initialize sparse matrices....
Definition: SparsityPattern.h:34
std::shared_ptr< const common::IndexMap > index_map(int dim) const
Index map for given dimension dimension. Returns the index map for rows and columns that will be set ...
Definition: SparsityPattern.cpp:208
int block_size(int dim) const
Return index map block size for dimension dim.
Definition: SparsityPattern.cpp:245
const graph::AdjacencyList< std::int32_t > & graph() const
Sparsity pattern graph after assembly. Uses local indices for the columns.
Definition: SparsityPattern.cpp:424
std::int64_t num_nonzeros() const
Number of nonzeros on this rank after assembly, including ghost rows.
Definition: SparsityPattern.cpp:403
common::IndexMap column_index_map() const
Builds the index map for columns after assembly of the sparsity pattern.
Definition: SparsityPattern.cpp:228
xtl::span< const int > off_diagonal_offset() const
Row-wise start of off-diagonal (unowned columns) on each row.
Definition: SparsityPattern.cpp:431
graph::AdjacencyList< T > neighbor_all_to_all(MPI_Comm comm, const graph::AdjacencyList< T > &send_data)
Send in_values[n0] to neighbor process n0 and receive values from neighbor process n1 in out_values[n...
Definition: MPI.h:682
constexpr std::array< std::int64_t, 2 > local_range(int rank, std::int64_t N, int size)
Return local range for the calling process, partitioning the global [0, N - 1] range across all ranks...
Definition: MPI.h:82
Linear algebra interface.
Definition: sparsitybuild.h:13