DOLFINx 0.11.0.0
DOLFINx C++
Loading...
Searching...
No Matches
MatrixCSR.h
1// Copyright (C) 2021-2022 Garth N. Wells and Chris N. Richardson
2//
3// This file is part of DOLFINx (https://www.fenicsproject.org)
4//
5// SPDX-License-Identifier: LGPL-3.0-or-later
6
7#pragma once
8
9#include "SparsityPattern.h"
10#include "Vector.h"
11#include "matrix_csr_impl.h"
12#include <algorithm>
13#include <dolfinx/common/IndexMap.h>
14#include <dolfinx/common/MPI.h>
15#include <dolfinx/graph/AdjacencyList.h>
16#include <mpi.h>
17#include <numeric>
18#include <span>
19#include <utility>
20#include <vector>
21
22// Define requirements on sparsity pattern required for MatrixCSR constructor
23// allowing alternative implentations that can provide these essentials.
24template <typename T>
25concept SparsityImplementation = requires(T sp, int i) {
26 { sp.graph() };
27 requires std::forward_iterator<typename decltype(sp.graph().first)::iterator>;
28 requires std::convertible_to<std::int32_t,
29 typename decltype(sp.graph().first)::value_type>;
30 requires std::forward_iterator<
31 typename decltype(sp.graph().second)::iterator>;
32 requires std::convertible_to<
33 std::int64_t, typename decltype(sp.graph().second)::value_type>;
34
35 { sp.block_size(i) } -> std::same_as<int>;
36 {
37 sp.index_map(i)
38 } -> std::same_as<std::shared_ptr<const dolfinx::common::IndexMap>>;
39};
40
41namespace dolfinx::la
42{
44enum class BlockMode : int
45{
46 compact = 0,
51};
52
66template <typename Scalar, typename Container = std::vector<Scalar>,
67 typename ColContainer = std::vector<std::int32_t>,
68 typename RowPtrContainer = std::vector<std::int64_t>>
69class MatrixCSR
70{
71 static_assert(std::is_same_v<typename Container::value_type, Scalar>);
72 static_assert(std::is_integral_v<typename ColContainer::value_type>);
73 static_assert(std::is_integral_v<typename RowPtrContainer::value_type>);
74
75 template <typename, typename, typename, typename>
76 friend class MatrixCSR;
77
78public:
80 using value_type = Scalar;
81
83 using container_type = Container;
84
86 using column_container_type = ColContainer;
87
89 using rowptr_container_type = RowPtrContainer;
90
114 template <int BS0 = 1, int BS1 = 1>
116 {
117 if ((BS0 != _bs[0] and BS0 > 1 and _bs[0] > 1)
118 or (BS1 != _bs[1] and BS1 > 1 and _bs[1] > 1))
119 {
120 throw std::runtime_error(
121 "Cannot insert blocks of different size than matrix block size");
122 }
123
124 return [&](std::span<const std::int32_t> rows,
125 std::span<const std::int32_t> cols,
126 std::span<const value_type> data) -> int
127 {
128 this->set<BS0, BS1>(data, rows, cols);
129 return 0;
130 };
131 }
132
156 template <int BS0 = 1, int BS1 = 1>
158 {
159 if ((BS0 != _bs[0] and BS0 > 1 and _bs[0] > 1)
160 or (BS1 != _bs[1] and BS1 > 1 and _bs[1] > 1))
161 {
162 throw std::runtime_error(
163 "Cannot insert blocks of different size than matrix block size");
164 }
165
166 return [&](std::span<const std::int32_t> rows,
167 std::span<const std::int32_t> cols,
168 std::span<const value_type> data) -> int
169 {
170 this->add<BS0, BS1>(data, rows, cols);
171 return 0;
172 };
173 }
174
198 template <SparsityImplementation T>
199 MatrixCSR(const T& p, BlockMode mode = BlockMode::compact);
200
203 MatrixCSR(MatrixCSR&& A) = default;
204
207 MatrixCSR(const MatrixCSR& A) = default;
208
217 template <typename Scalar0, typename Container0, typename ColContainer0,
218 typename RowPtrContainer0>
219 explicit MatrixCSR(
220 const MatrixCSR<Scalar0, Container0, ColContainer0, RowPtrContainer0>& A)
221 : _index_maps(A._index_maps), _block_mode(A.block_mode()),
222 _bs(A.block_size()), _data(A._data.begin(), A._data.end()),
223 _cols(A.cols().begin(), A.cols().end()),
224 _row_ptr(A.row_ptr().begin(), A.row_ptr().end()),
225 _off_diagonal_offset(A.off_diag_offset().begin(),
226 A.off_diag_offset().end()),
227 _comm(A.comm()), _request(MPI_REQUEST_NULL), _unpack_pos(A._unpack_pos),
228 _val_send_disp(A._val_send_disp), _val_recv_disp(A._val_recv_disp),
229 _ghost_row_to_rank(A._ghost_row_to_rank)
230 {
231 }
232
237 [[deprecated("Use std::ranges::fill(A.values(), v) instead.")]]
239 {
240 std::ranges::fill(_data, x);
241 }
242
259 template <int BS0, int BS1>
260 void set(std::span<const value_type> x, std::span<const std::int32_t> rows,
261 std::span<const std::int32_t> cols)
262 {
263 auto set_fn = [](value_type& y, const value_type& x) { y = x; };
264
265 std::int32_t num_rows
266 = _index_maps[0]->size_local() + _index_maps[0]->num_ghosts();
267 assert(x.size() == rows.size() * cols.size() * BS0 * BS1);
268 if (_bs[0] == BS0 and _bs[1] == BS1)
269 {
270 impl::insert_csr<BS0, BS1>(_data, _cols, _row_ptr, x, rows, cols, set_fn,
271 num_rows);
272 }
273 else if (_bs[0] == 1 and _bs[1] == 1)
274 {
275 // Set blocked data in a regular CSR matrix (_bs[0]=1, _bs[1]=1)
276 // with correct sparsity
277 impl::insert_blocked_csr<BS0, BS1>(_data, _cols, _row_ptr, x, rows, cols,
278 set_fn, num_rows);
279 }
280 else
281 {
282 assert(BS0 == 1 and BS1 == 1);
283 // Set non-blocked data in a blocked CSR matrix (BS0=1, BS1=1)
284 impl::insert_nonblocked_csr(_data, _cols, _row_ptr, x, rows, cols, set_fn,
285 num_rows, _bs[0], _bs[1]);
286 }
287 }
288
304 template <int BS0 = 1, int BS1 = 1>
305 void add(std::span<const value_type> x, std::span<const std::int32_t> rows,
306 std::span<const std::int32_t> cols)
307 {
308 auto add_fn = [](value_type& y, const value_type& x) { y += x; };
309
310 assert(x.size() == rows.size() * cols.size() * BS0 * BS1);
311 if (_bs[0] == BS0 and _bs[1] == BS1)
312 {
313 impl::insert_csr<BS0, BS1>(_data, _cols, _row_ptr, x, rows, cols, add_fn,
314 _row_ptr.size());
315 }
316 else if (_bs[0] == 1 and _bs[1] == 1)
317 {
318 // Add blocked data to a regular CSR matrix (_bs[0]=1, _bs[1]=1)
319 impl::insert_blocked_csr<BS0, BS1>(_data, _cols, _row_ptr, x, rows, cols,
320 add_fn, _row_ptr.size());
321 }
322 else
323 {
324 assert(BS0 == 1 and BS1 == 1);
325 // Add non-blocked data to a blocked CSR matrix (BS0=1, BS1=1)
326 impl::insert_nonblocked_csr(_data, _cols, _row_ptr, x, rows, cols, add_fn,
327 _row_ptr.size(), _bs[0], _bs[1]);
328 }
329 }
330
332 std::int32_t num_owned_rows() const { return _index_maps[0]->size_local(); }
333
335 std::int32_t num_all_rows() const { return _row_ptr.size() - 1; }
336
346 std::vector<value_type> to_dense() const
347 {
348 const std::size_t nrows = num_all_rows();
349 const std::size_t ncols = _index_maps[1]->size_global();
350 std::vector<value_type> A(nrows * ncols * _bs[0] * _bs[1], 0.0);
351 for (std::size_t r = 0; r < nrows; ++r)
352 {
353 for (std::int32_t j = _row_ptr[r]; j < _row_ptr[r + 1]; ++j)
354 {
355 for (int i0 = 0; i0 < _bs[0]; ++i0)
356 {
357 for (int i1 = 0; i1 < _bs[1]; ++i1)
358 {
359 std::array<std::int32_t, 1> local_col{_cols[j]};
360 std::array<std::int64_t, 1> global_col{0};
361 _index_maps[1]->local_to_global(local_col, global_col);
362 A[(r * _bs[0] + i0) * ncols * _bs[1] + global_col[0] * _bs[1] + i1]
363 = _data[j * _bs[0] * _bs[1] + i0 * _bs[1] + i1];
364 }
365 }
366 }
367 }
368
369 return A;
370 }
371
379 {
382 }
383
394 {
395 const std::int32_t local_size0 = _index_maps[0]->size_local();
396 const std::int32_t num_ghosts0 = _index_maps[0]->num_ghosts();
397 const int bs2 = _bs[0] * _bs[1];
398
399 // For each ghost row, pack and send values to send to neighborhood
400 std::vector<int> insert_pos = _val_send_disp;
401 _ghost_value_data.resize(_val_send_disp.back());
402 for (int i = 0; i < num_ghosts0; ++i)
403 {
404 int rank = _ghost_row_to_rank[i];
405
406 // Get position in send buffer to place data to send to this
407 // neighbour
408 std::int32_t val_pos = insert_pos[rank];
409 std::copy(std::next(_data.data(), _row_ptr[local_size0 + i] * bs2),
410 std::next(_data.data(), _row_ptr[local_size0 + i + 1] * bs2),
411 std::next(_ghost_value_data.begin(), val_pos));
412 insert_pos[rank]
413 += bs2 * (_row_ptr[local_size0 + i + 1] - _row_ptr[local_size0 + i]);
414 }
415
416 _ghost_value_data_in.resize(_val_recv_disp.back());
417
418 // Compute data sizes for send and receive from displacements
419 std::vector<int> val_send_count(_val_send_disp.size() - 1);
420 std::adjacent_difference(std::next(_val_send_disp.begin()),
421 _val_send_disp.end(), val_send_count.begin());
422
423 std::vector<int> val_recv_count(_val_recv_disp.size() - 1);
424 std::adjacent_difference(std::next(_val_recv_disp.begin()),
425 _val_recv_disp.end(), val_recv_count.begin());
426
427 int status = MPI_Ineighbor_alltoallv(
428 _ghost_value_data.data(), val_send_count.data(), _val_send_disp.data(),
429 dolfinx::MPI::mpi_t<value_type>, _ghost_value_data_in.data(),
430 val_recv_count.data(), _val_recv_disp.data(),
431 dolfinx::MPI::mpi_t<value_type>, _comm.comm(), &_request);
432 dolfinx::MPI::check_error(_comm.comm(), status);
433 }
434
441 {
442 int status = MPI_Wait(&_request, MPI_STATUS_IGNORE);
443 dolfinx::MPI::check_error(_comm.comm(), status);
444
445 _ghost_value_data.clear();
446 _ghost_value_data.shrink_to_fit();
447
448 // Add to local rows
449 int bs2 = _bs[0] * _bs[1];
450 assert(_ghost_value_data_in.size() == _unpack_pos.size() * bs2);
451 for (std::size_t i = 0; i < _unpack_pos.size(); ++i)
452 for (int j = 0; j < bs2; ++j)
453 _data[_unpack_pos[i] * bs2 + j] += _ghost_value_data_in[i * bs2 + j];
454
455 _ghost_value_data_in.clear();
456 _ghost_value_data_in.shrink_to_fit();
457
458 // Set ghost row data to zero
459 std::int32_t local_size0 = _index_maps[0]->size_local();
460 std::fill(std::next(_data.begin(), _row_ptr[local_size0] * bs2),
461 _data.end(), 0);
462 }
463
467 double squared_norm() const
468 {
469 const std::size_t num_owned_rows = _index_maps[0]->size_local();
470 const int bs2 = _bs[0] * _bs[1];
471 assert(num_owned_rows < _row_ptr.size());
472 double norm_sq_local = std::accumulate(
473 _data.cbegin(),
474 std::next(_data.cbegin(), _row_ptr[num_owned_rows] * bs2), double(0),
475 [](auto norm, value_type y) { return norm + std::norm(y); });
476 double norm_sq;
477 MPI_Allreduce(&norm_sq_local, &norm_sq, 1, MPI_DOUBLE, MPI_SUM,
478 _comm.comm());
479 return norm_sq;
480 }
481
492 void mult(Vector<value_type>& x, Vector<value_type>& y) const;
493
525
527 MPI_Comm comm() const { return _comm.comm(); }
528
536 std::shared_ptr<const common::IndexMap> index_map(int dim) const
537 {
538 return _index_maps.at(dim);
539 }
540
543 container_type& values() { return _data; }
544
547 const container_type& values() const { return _data; }
548
551 const rowptr_container_type& row_ptr() const { return _row_ptr; }
552
555 const column_container_type& cols() const { return _cols; }
556
567 {
568 return _off_diagonal_offset;
569 }
570
573 std::array<int, 2> block_size() const { return _bs; }
574
576 BlockMode block_mode() const { return _block_mode; }
577
578private:
579 // Parallel distribution of the rows and columns
580 std::array<std::shared_ptr<const common::IndexMap>, 2> _index_maps;
581
582 // Block mode (compact or expanded)
583 BlockMode _block_mode;
584
585 // Block sizes
586 std::array<int, 2> _bs;
587
588 // Matrix data
589 container_type _data;
591 rowptr_container_type _row_ptr;
592
593 // Start of off-diagonal (unowned columns) on each row
594 rowptr_container_type _off_diagonal_offset;
595
596 // Communicator with neighborhood (ghost->owner communicator for rows)
597 dolfinx::MPI::Comm _comm;
598
599 // -- Precomputed data for scatter_rev/update
600
601 // Request in non-blocking communication
602 MPI_Request _request;
603
604 // Position in _data to add received data
605 std::vector<std::size_t> _unpack_pos;
606
607 // Displacements for alltoall for each neighbor when sending and
608 // receiving
609 std::vector<int> _val_send_disp, _val_recv_disp;
610
611 // Ownership of each row, by neighbor (for the neighbourhood defined
612 // on _comm)
613 std::vector<int> _ghost_row_to_rank;
614
615 // Temporary stores for data during non-blocking communication
616 container_type _ghost_value_data;
617 container_type _ghost_value_data_in;
618};
619//-----------------------------------------------------------------------------
620
622template <typename U, typename V, typename W, typename X>
623template <SparsityImplementation SparsityType>
624MatrixCSR<U, V, W, X>::MatrixCSR(const SparsityType& p, BlockMode mode)
625 : _index_maps({p.index_map(0), p.index_map(1)}), _block_mode(mode),
626 _bs({p.block_size(0), p.block_size(1)}),
627 _data(p.graph().first.size() * _bs[0] * _bs[1], 0),
628 _cols(p.graph().first.begin(), p.graph().first.end()),
629 _row_ptr(p.graph().second.begin(), p.graph().second.end()),
630 _comm(MPI_COMM_NULL)
631{
632 if (_block_mode == BlockMode::expanded)
633 {
634 // Rebuild IndexMaps
635 for (int i = 0; i < 2; ++i)
636 {
637 auto im = _index_maps[i];
638 std::int32_t size_local = im->size_local() * _bs[i];
639 std::span ghost_i = im->ghosts();
640 std::vector<std::int64_t> ghosts;
641 const std::vector<int> ghost_owner_i(im->owners().begin(),
642 im->owners().end());
643 std::vector<int> src_rank;
644 for (std::size_t j = 0; j < ghost_i.size(); ++j)
645 {
646 for (int k = 0; k < _bs[i]; ++k)
647 {
648 ghosts.push_back(ghost_i[j] * _bs[i] + k);
649 src_rank.push_back(ghost_owner_i[j]);
650 }
651 }
652
653 std::array<std::vector<int>, 2> src_dest0
654 = {std::vector(_index_maps[i]->src().begin(),
655 _index_maps[i]->src().end()),
656 std::vector(_index_maps[i]->dest().begin(),
657 _index_maps[i]->dest().end())};
658 _index_maps[i] = std::make_shared<common::IndexMap>(
659 _index_maps[i]->comm(), size_local, src_dest0, ghosts, src_rank);
660 }
661
662 // Convert sparsity pattern and set _bs to 1
663
664 column_container_type new_cols;
665 new_cols.reserve(_data.size());
666 rowptr_container_type new_row_ptr{0};
667 new_row_ptr.reserve(_row_ptr.size() * _bs[0]);
668 std::span<const std::int32_t> num_diag_nnz = p.off_diagonal_offsets();
669 for (std::size_t i = 0; i < _row_ptr.size() - 1; ++i)
670 {
671 // Repeat row _bs[0] times
672 for (int q0 = 0; q0 < _bs[0]; ++q0)
673 {
674 _off_diagonal_offset.push_back(new_row_ptr.back()
675 + num_diag_nnz[i] * _bs[1]);
676 for (auto j = _row_ptr[i]; j < _row_ptr[i + 1]; ++j)
677 {
678 for (int q1 = 0; q1 < _bs[1]; ++q1)
679 new_cols.push_back(_cols[j] * _bs[1] + q1);
680 }
681 new_row_ptr.push_back(new_cols.size());
682 }
683 }
684 _cols = new_cols;
685 _row_ptr = new_row_ptr;
686 _bs[0] = 1;
687 _bs[1] = 1;
688 }
689 else
690 {
691 // Compute off-diagonal offset for each row (compact)
692 std::span<const std::int32_t> num_diag_nnz = p.off_diagonal_offsets();
693 _off_diagonal_offset.reserve(num_diag_nnz.size());
694 std::ranges::transform(num_diag_nnz, _row_ptr,
695 std::back_inserter(_off_diagonal_offset),
696 std::plus{});
697 }
698
699 // Some short-hand
700 std::array local_size
701 = {_index_maps[0]->size_local(), _index_maps[1]->size_local()};
702 std::array local_range
703 = {_index_maps[0]->local_range(), _index_maps[1]->local_range()};
704 std::span ghosts1 = _index_maps[1]->ghosts();
705
706 std::span ghosts0 = _index_maps[0]->ghosts();
707 std::span src_ranks = _index_maps[0]->src();
708 std::span dest_ranks = _index_maps[0]->dest();
709
710 // Create neighbourhood communicator (owner <- ghost)
711 MPI_Comm comm;
712 MPI_Dist_graph_create_adjacent(_index_maps[0]->comm(), dest_ranks.size(),
713 dest_ranks.data(), MPI_UNWEIGHTED,
714 src_ranks.size(), src_ranks.data(),
715 MPI_UNWEIGHTED, MPI_INFO_NULL, false, &comm);
716 _comm = dolfinx::MPI::Comm(comm, false);
717
718 // Build map from ghost row index position to owning (neighborhood)
719 // rank
720 _ghost_row_to_rank.reserve(_index_maps[0]->owners().size());
721 for (int r : _index_maps[0]->owners())
722 {
723 auto it = std::ranges::lower_bound(src_ranks, r);
724 assert(it != src_ranks.end() and *it == r);
725 std::size_t pos = std::distance(src_ranks.begin(), it);
726 _ghost_row_to_rank.push_back(pos);
727 }
728
729 // Compute size of data to send to each neighbor
730 std::vector<std::int32_t> data_per_proc(src_ranks.size(), 0);
731 for (std::size_t i = 0; i < _ghost_row_to_rank.size(); ++i)
732 {
733 assert(_ghost_row_to_rank[i] < (int)data_per_proc.size());
734 std::size_t pos = local_size[0] + i;
735 data_per_proc[_ghost_row_to_rank[i]] += _row_ptr[pos + 1] - _row_ptr[pos];
736 }
737
738 // Compute send displacements
739 _val_send_disp.resize(src_ranks.size() + 1, 0);
740 std::partial_sum(data_per_proc.begin(), data_per_proc.end(),
741 std::next(_val_send_disp.begin()));
742
743 // For each ghost row, pack and send indices to neighborhood
744 std::vector<std::int64_t> ghost_index_data(2 * _val_send_disp.back());
745 {
746 std::vector<int> insert_pos = _val_send_disp;
747 for (std::size_t i = 0; i < _ghost_row_to_rank.size(); ++i)
748 {
749 int rank = _ghost_row_to_rank[i];
750 std::int32_t row_id = local_size[0] + i;
751 for (int j = _row_ptr[row_id]; j < _row_ptr[row_id + 1]; ++j)
752 {
753 // Get position in send buffer
754 std::int32_t idx_pos = 2 * insert_pos[rank];
755
756 // Pack send data (row, col) as global indices
757 ghost_index_data[idx_pos] = ghosts0[i];
758 if (std::int32_t col_local = _cols[j]; col_local < local_size[1])
759 ghost_index_data[idx_pos + 1] = col_local + local_range[1][0];
760 else
761 ghost_index_data[idx_pos + 1] = ghosts1[col_local - local_size[1]];
762
763 insert_pos[rank] += 1;
764 }
765 }
766 }
767
768 // Communicate data with neighborhood
769 std::vector<std::int64_t> ghost_index_array;
770 std::vector<int> recv_disp;
771 {
772 std::vector<int> send_sizes;
773 std::ranges::transform(data_per_proc, std::back_inserter(send_sizes),
774 [](auto x) { return 2 * x; });
775
776 std::vector<int> recv_sizes(dest_ranks.size());
777 send_sizes.reserve(1);
778 recv_sizes.reserve(1);
779 MPI_Neighbor_alltoall(send_sizes.data(), 1, MPI_INT, recv_sizes.data(), 1,
780 MPI_INT, _comm.comm());
781
782 // Build send/recv displacement
783 std::vector<int> send_disp{0};
784 std::partial_sum(send_sizes.begin(), send_sizes.end(),
785 std::back_inserter(send_disp));
786 recv_disp = {0};
787 std::partial_sum(recv_sizes.begin(), recv_sizes.end(),
788 std::back_inserter(recv_disp));
789
790 ghost_index_array.resize(recv_disp.back());
791 MPI_Neighbor_alltoallv(ghost_index_data.data(), send_sizes.data(),
792 send_disp.data(), MPI_INT64_T,
793 ghost_index_array.data(), recv_sizes.data(),
794 recv_disp.data(), MPI_INT64_T, _comm.comm());
795 }
796
797 // Store receive displacements for future use, when transferring
798 // data values
799 _val_recv_disp.resize(recv_disp.size());
800 int bs2 = _bs[0] * _bs[1];
801 std::ranges::transform(recv_disp, _val_recv_disp.begin(),
802 [&bs2](auto d) { return bs2 * d / 2; });
803 std::ranges::transform(_val_send_disp, _val_send_disp.begin(),
804 [&bs2](auto d) { return d * bs2; });
805
806 // Global-to-local map for ghost columns
807 std::vector<std::pair<std::int64_t, std::int32_t>> global_to_local;
808 global_to_local.reserve(ghosts1.size());
809 for (std::int64_t idx : ghosts1)
810 global_to_local.push_back({idx, global_to_local.size() + local_size[1]});
811 std::ranges::sort(global_to_local);
812
813 // Compute location in which data for each index should be stored
814 // when received
815 for (std::size_t i = 0; i < ghost_index_array.size(); i += 2)
816 {
817 // Row must be on this process
818 std::int32_t local_row = ghost_index_array[i] - local_range[0][0];
819 assert(local_row >= 0 and local_row < local_size[0]);
820
821 // Column may be owned or unowned
822 std::int32_t local_col = ghost_index_array[i + 1] - local_range[1][0];
823 if (local_col < 0 or local_col >= local_size[1])
824 {
825 auto it = std::ranges::lower_bound(
826 global_to_local, std::pair(ghost_index_array[i + 1], -1),
827 [](auto a, auto b) { return a.first < b.first; });
828 assert(it != global_to_local.end()
829 and it->first == ghost_index_array[i + 1]);
830 local_col = it->second;
831 }
832 auto cit0 = std::next(_cols.begin(), _row_ptr[local_row]);
833 auto cit1 = std::next(_cols.begin(), _row_ptr[local_row + 1]);
834
835 // Find position of column index and insert data
836 auto cit = std::lower_bound(cit0, cit1, local_col);
837 assert(cit != cit1);
838 assert(*cit == local_col);
839 std::size_t d = std::distance(_cols.begin(), cit);
840 _unpack_pos.push_back(d);
841 }
842
843 _unpack_pos.shrink_to_fit();
844}
845//-----------------------------------------------------------------------------
846
847// The matrix A is distributed across P processes by blocks of rows:
848// A = | A_0 |
849// | A_1 |
850// | ... |
851// | A_P-1 |
852//
853// Each submatrix A_i is owned by a single process "i" and can be further
854// decomposed into diagonal (Ai[0]) and off diagonal (Ai[1]) blocks:
855// Ai = |Ai[0] Ai[1]|
856//
857// If A is square, the diagonal block Ai[0] is also square and contains
858// only owned columns and rows. The block Ai[1] contains ghost columns
859// (unowned dofs).
860
861// Likewise, a local vector x can be decomposed into owned and ghost blocks:
862// xi = | x[0] |
863// | x[1] |
864//
865// So the product y = Ax can be computed into two separate steps:
866// y[0] = |Ai[0] Ai[1]| | x[0] | = Ai[0] x[0] + Ai[1] x[1]
867// | x[1] |
868//
871template <typename Scalar, typename V, typename W, typename X>
873 la::Vector<Scalar>& y) const
874{
875 // start communication (update ghosts)
877
878 std::int32_t nrowslocal = num_owned_rows();
879 std::span<const std::int64_t> Arow_ptr(row_ptr().data(), nrowslocal + 1);
880 std::span<const std::int32_t> Acols(cols().data(), Arow_ptr[nrowslocal]);
881 std::span<const std::int64_t> Aoff_diag_offset(off_diag_offset().data(),
882 nrowslocal);
883 std::span<const Scalar> Avalues(values().data(),
884 Arow_ptr[nrowslocal] * _bs[0] * _bs[1]);
885
886 std::span<const Scalar> _x = x.array();
887 std::span<Scalar> _y = y.array();
888
889 std::span<const std::int64_t> Arow_begin(Arow_ptr.data(), nrowslocal);
890 std::span<const std::int64_t> Arow_end(Arow_ptr.data() + 1, nrowslocal);
891
892 // First stage: spmv - diagonal
893 // yi[0] += Ai[0] * xi[0]
894 if (_bs[1] == 1)
895 {
896 impl::spmv<Scalar, 1>(Avalues, Arow_begin, Aoff_diag_offset, Acols, _x, _y,
897 _bs[0], 1);
898 }
899 else
900 {
901 impl::spmv<Scalar, -1>(Avalues, Arow_begin, Aoff_diag_offset, Acols, _x, _y,
902 _bs[0], _bs[1]);
903 }
904
905 // finalize ghost update
906 x.scatter_fwd_end();
907
908 // Second stage: spmv - off-diagonal
909 // yi[0] += Ai[1] * xi[1]
910 if (_bs[1] == 1)
911 {
912 impl::spmv<Scalar, 1>(Avalues, Aoff_diag_offset, Arow_end, Acols, _x, _y,
913 _bs[0], 1);
914 }
915 else
916 {
917 impl::spmv<Scalar, -1>(Avalues, Aoff_diag_offset, Arow_end, Acols, _x, _y,
918 _bs[0], _bs[1]);
919 }
920}
921
924template <typename Scalar, typename V, typename W, typename X>
926 la::Vector<Scalar>& y) const
927{
928 std::int32_t nrowslocal = num_owned_rows();
929 std::span<const std::int64_t> Arow_ptr(row_ptr().data(), nrowslocal + 1);
930 std::span<const std::int32_t> Acols(cols().data(), Arow_ptr[nrowslocal]);
931 std::span<const std::int64_t> Aoff_diag_offset(off_diag_offset().data(),
932 nrowslocal);
933 std::span<const Scalar> Avalues(values().data(),
934 Arow_ptr[nrowslocal] * _bs[0] * _bs[1]);
935
936 std::span<const Scalar> _x = x.array();
937 std::span<Scalar> _y = y.array();
938
939 std::span<const std::int64_t> Arow_begin(Arow_ptr.data(), nrowslocal);
940 std::span<const std::int64_t> Arow_end(Arow_ptr.data() + 1, nrowslocal);
941
942 // Compute ghost region contribution and scatter back. Zero only the
943 // ghost portion of y so the caller's owned values are preserved (multT
944 // accumulates).
945 int ncolslocal = index_map(1)->size_local();
946 std::fill(std::next(_y.begin(), ncolslocal * _bs[1]), _y.end(), Scalar(0));
947 if (_bs[1] == 1)
948 impl::spmvT<Scalar, 1>(Avalues, Aoff_diag_offset, Arow_end, Acols, _x, _y,
949 _bs[0], 1);
950 else
951 impl::spmvT<Scalar, -1>(Avalues, Aoff_diag_offset, Arow_end, Acols, _x, _y,
952 _bs[0], _bs[1]);
953
954 y.scatter_rev(std::plus<Scalar>{});
955
956 if (_bs[1] == 1)
957 impl::spmvT<Scalar, 1>(Avalues, Arow_begin, Aoff_diag_offset, Acols, _x, _y,
958 _bs[0], 1);
959 else
960 impl::spmvT<Scalar, -1>(Avalues, Arow_begin, Aoff_diag_offset, Acols, _x,
961 _y, _bs[0], _bs[1]);
962}
963
964} // namespace dolfinx::la
A duplicate MPI communicator and manage lifetime of the communicator.
Definition MPI.h:42
const container_type & values() const
Get local values (const version).
Definition MatrixCSR.h:547
std::shared_ptr< const common::IndexMap > index_map(int dim) const
Index map for the row or column space.
Definition MatrixCSR.h:536
const rowptr_container_type & off_diag_offset() const
Get the start of off-diagonal (unowned columns) on each row, allowing the matrix to be split (virtual...
Definition MatrixCSR.h:566
void set(std::span< const value_type > x, std::span< const std::int32_t > rows, std::span< const std::int32_t > cols)
Set values in the matrix.
Definition MatrixCSR.h:260
MatrixCSR(const MatrixCSR< Scalar0, Container0, ColContainer0, RowPtrContainer0 > &A)
Copy-convert matrix, possibly using to different container types.
Definition MatrixCSR.h:219
RowPtrContainer rowptr_container_type
Row pointer container type.
Definition MatrixCSR.h:89
void scatter_rev_end()
End transfer of ghost row data to owning ranks.
Definition MatrixCSR.h:440
container_type & values()
Get local data values.
Definition MatrixCSR.h:543
auto mat_add_values()
Insertion functor for adding values to a matrix. It is typically used in finite element assembly func...
Definition MatrixCSR.h:157
BlockMode block_mode() const
Get 'block mode'.
Definition MatrixCSR.h:576
void add(std::span< const value_type > x, std::span< const std::int32_t > rows, std::span< const std::int32_t > cols)
Accumulate values in the matrix.
Definition MatrixCSR.h:305
std::int32_t num_owned_rows() const
Number of local rows excluding ghost rows.
Definition MatrixCSR.h:332
ColContainer column_container_type
Column index container type.
Definition MatrixCSR.h:86
void mult(Vector< value_type > &x, Vector< value_type > &y) const
Compute the product y += Ax.
Definition MatrixCSR.h:872
MatrixCSR(MatrixCSR &&A)=default
MatrixCSR(const T &p, BlockMode mode=BlockMode::compact)
Create a distributed matrix.
double squared_norm() const
Compute the Frobenius norm squared across all processes.
Definition MatrixCSR.h:467
void scatter_rev()
Transfer ghost row data to the owning ranks accumulating received values on the owned rows,...
Definition MatrixCSR.h:378
void multT(Vector< value_type > &x, Vector< value_type > &y) const
Compute the product y += A^T x.
Definition MatrixCSR.h:925
Container container_type
Matrix entries container type.
Definition MatrixCSR.h:83
Scalar value_type
Scalar type.
Definition MatrixCSR.h:80
void scatter_rev_begin()
Begin transfer of ghost row data to owning ranks, where it will be accumulated into existing owned ro...
Definition MatrixCSR.h:393
const column_container_type & cols() const
Definition MatrixCSR.h:555
void set(value_type x)
Set all non-zero local entries to a value, including entries in ghost rows.
Definition MatrixCSR.h:238
std::array< int, 2 > block_size() const
Get block sizes.
Definition MatrixCSR.h:573
std::int32_t num_all_rows() const
Number of local rows including ghost rows.
Definition MatrixCSR.h:335
const rowptr_container_type & row_ptr() const
Get local row pointers.
Definition MatrixCSR.h:551
std::vector< value_type > to_dense() const
Copy to a dense matrix.
Definition MatrixCSR.h:346
MPI_Comm comm() const
Get MPI communicator that matrix is defined on.
Definition MatrixCSR.h:527
MatrixCSR(const MatrixCSR &A)=default
auto mat_set_values()
Insertion functor for setting values in a matrix. It is typically used in finite element assembly fun...
Definition MatrixCSR.h:115
A vector that can be distributed across processes.
Definition Vector.h:50
container_type & array()
Get the process-local part of the vector.
Definition Vector.h:387
void scatter_fwd_begin(U pack, GetPtr get_ptr)
Begin scatter (send) of local data that is ghosted on other processes.
Definition Vector.h:219
void scatter_fwd_end(U unpack)
End scatter (send) of local data values that are ghosted on other processes.
Definition Vector.h:256
void scatter_rev(BinaryOperation op)
Scatter (send) of ghost data values to the owning process and assign/accumulate into the owned data e...
Definition Vector.h:372
Definition MatrixCSR.h:25
MPI_Datatype mpi_t
Retrieves the MPI data type associated to the provided type.
Definition MPI.h:280
void check_error(MPI_Comm comm, int code)
Check MPI error code. If the error code is not equal to MPI_SUCCESS, then std::abort is called.
Definition MPI.cpp:80
int size(MPI_Comm comm)
Definition MPI.cpp:72
int rank(MPI_Comm comm)
Return process rank for the communicator.
Definition MPI.cpp:64
constexpr std::array< std::int64_t, 2 > local_range(int rank, std::int64_t N, int size)
Return local range for the calling process, partitioning the global [0, N - 1] range across all ranks...
Definition MPI.h:89
Linear algebra interface.
Definition dolfinx_la.h:7
BlockMode
Modes for representing block structured matrices.
Definition MatrixCSR.h:45
@ expanded
Definition MatrixCSR.h:48
auto norm(const V &x, Norm type=Norm::l2)
Compute the norm of the vector.
Definition Vector.h:477