DOLFINx 0.11.0.0
DOLFINx C++
Loading...
Searching...
No Matches
matmul.h
1// Copyright (C) 2026 Chris Richardson
2//
3// This file is part of DOLFINx (https://www.fenicsproject.org)
4//
5// SPDX-License-Identifier: LGPL-3.0-or-later
6
7#pragma once
8
9#include <dolfinx/common/IndexMap.h>
10
11#include <dolfinx/la/MatrixCSR.h>
12#include <mpi.h>
13
14#include <algorithm>
15#include <numeric>
16#include <span>
17#include <vector>
18
19namespace dolfinx::la
20{
21
35namespace impl
36{
37
50{
52 std::shared_ptr<const common::IndexMap> _row_map;
53
57 std::shared_ptr<const common::IndexMap> _col_map;
58
61 std::span<const std::int32_t> _cols;
62
65 std::span<const std::int64_t> _offsets;
66
73 std::span<const std::int32_t> _off_diag;
74
76 std::array<int, 2> _bs;
77
79 std::shared_ptr<const common::IndexMap> index_map(int dim) const
80 {
81 return dim == 0 ? _row_map : _col_map;
82 }
83
85 int block_size(int i) const { return _bs[i]; }
86
88 std::pair<std::span<const std::int32_t>, std::span<const std::int64_t>>
89 graph() const
90 {
91 return {_cols, _offsets};
92 }
93
95 std::span<const std::int32_t> off_diagonal_offsets() const
96 {
97 return _off_diag;
98 }
99};
100
107template <typename T>
108std::tuple<std::shared_ptr<common::IndexMap>, std::vector<std::int32_t>,
109 std::vector<std::int32_t>, std::vector<T>>
112{
113 // The ghost columns of A are global row indices into B.
114 auto col_map_A = A.index_map(1); // column IndexMap of A
115 auto row_map_B = B.index_map(0); // row IndexMap of B
116
117 // Create neighborhood comms for col_map_A
118 std::span<const int> src = col_map_A->src();
119 std::span<const int> dest = col_map_A->dest();
120 MPI_Comm comm = col_map_A->comm();
121
122 // Serial fast-path: with a single rank there are no ghost columns and
123 // no MPI communication to perform.
124 // No-neighbour parallel fast-path: A has no ghost columns (src empty) and
125 // no rank ghosts our A-owned columns (dest empty), so no remote rows of B
126 // are needed and no rank is waiting for us to participate in a collective.
127 // Symmetric topology guarantees the early return is safe without any global
128 // synchronisation (same argument as for transpose()).
129 // Use col_map_B — not col_map_A — for new_col_map so that any existing
130 // ghost columns in B are preserved in the product's column map.
131
132 int comm_size = dolfinx::MPI::size(col_map_A->comm());
133 if (comm_size == 1 or (src.empty() && dest.empty()))
134 {
135 auto col_map_B = B.index_map(1);
136 auto new_col_map = std::make_shared<dolfinx::common::IndexMap>(
137 col_map_B->comm(), col_map_B->size_local(), col_map_B->ghosts(),
138 col_map_B->owners());
139 return {new_col_map, std::vector<std::int32_t>{0},
140 std::vector<std::int32_t>{}, std::vector<T>{}};
141 }
142
143 // src is guaranteed sorted (Scatterer asserts this), so use binary search
144 // rather than a heap-allocated map.
145 auto rank_to_nbr = [&src](int r) -> int
146 {
147 auto it = std::lower_bound(src.begin(), src.end(), r);
148 assert(it != src.end() && *it == r);
149 return static_cast<int>(std::distance(src.begin(), it));
150 };
151 MPI_Comm neigh_comm_fwd;
152 MPI_Comm neigh_comm_rev;
153 MPI_Dist_graph_create_adjacent(comm, dest.size(), dest.data(), MPI_UNWEIGHTED,
154 src.size(), src.data(), MPI_UNWEIGHTED,
155 MPI_INFO_NULL, false, &neigh_comm_fwd);
156 MPI_Dist_graph_create_adjacent(comm, src.size(), src.data(), MPI_UNWEIGHTED,
157 dest.size(), dest.data(), MPI_UNWEIGHTED,
158 MPI_INFO_NULL, false, &neigh_comm_rev);
159
160 // Global indices of the rows of B we need (= ghost cols of A)
161 std::span<const std::int64_t> required_globals_map = col_map_A->ghosts();
162
163 // Owning rank for each ghost col of A (= source rank for each needed row)
164 std::span<const int> ghost_owners = col_map_A->owners();
165
166 std::vector<int> send_count(src.size(), 0);
167 std::vector<int> recv_count(dest.size(), 0);
168
169 for (int gh : ghost_owners)
170 ++send_count[rank_to_nbr(gh)];
171 MPI_Neighbor_alltoall(send_count.data(), 1, MPI_INT, recv_count.data(), 1,
172 MPI_INT, neigh_comm_fwd);
173
174 // Send and recv displacements
175 std::vector<int> send_disp(src.size() + 1, 0);
176 std::partial_sum(send_count.begin(), send_count.end(),
177 std::next(send_disp.begin()));
178 std::vector<int> recv_disp(dest.size() + 1, 0);
179 std::partial_sum(recv_count.begin(), recv_count.end(),
180 std::next(recv_disp.begin()));
181
182 // perm[i] = position in the send buffer (and received data) of original
183 // ghost column i, so we can invert the ordering after receiving.
184 std::vector<std::int32_t> perm(ghost_owners.size());
185 std::vector<std::int64_t> required_globals(required_globals_map.size());
186 for (std::size_t i = 0; i < ghost_owners.size(); ++i)
187 {
188 int pos = rank_to_nbr(ghost_owners[i]);
189 perm[i] = send_disp[pos]; // position before increment
190 required_globals[send_disp[pos]++] = required_globals_map[i];
191 }
192
193 // Reset send_disp
194 send_disp[0] = 0;
195 std::partial_sum(send_count.begin(), send_count.end(),
196 std::next(send_disp.begin()));
197
198 // Send the global row indices we need; receive the indices others need from
199 // us send buffer = required_globals (already ordered by src[])
200 std::vector<std::int64_t> recv_row_globals(recv_disp.back());
201
202 MPI_Neighbor_alltoallv(required_globals.data(), send_count.data(),
203 send_disp.data(), MPI_INT64_T, recv_row_globals.data(),
204 recv_count.data(), recv_disp.data(), MPI_INT64_T,
205 neigh_comm_fwd);
206
207 // Convert received global row indices to local row indices in B.
208 // The received indices are global rows of B that remote ranks need.
209 // They must all be owned by this rank (by construction).
210 std::vector<std::int32_t> recv_row_locals(recv_row_globals.size());
211 row_map_B->global_to_local(recv_row_globals, recv_row_locals);
212
213 // Compute size of send_data and communicate
214 auto row_ptr_B = B.row_ptr();
215
216 std::vector<int> send_entry_count(dest.size(), 0);
217 std::vector<int> send_entry_disp(dest.size() + 1, 0);
218 std::vector<int> recv_entry_count(src.size(), 0);
219 std::vector<int> recv_entry_disp(src.size() + 1, 0);
220 assert(recv_count.size() == dest.size());
221 for (std::size_t p = 0; p < recv_count.size(); ++p)
222 {
223 for (std::int32_t i = recv_disp[p]; i < recv_disp[p + 1]; ++i)
224 {
225 int r = recv_row_locals[i];
226 send_entry_count[p] += row_ptr_B[r + 1] - row_ptr_B[r];
227 }
228 }
229 std::partial_sum(send_entry_count.begin(), send_entry_count.end(),
230 std::next(send_entry_disp.begin()));
231
232 MPI_Neighbor_alltoall(send_entry_count.data(), 1, MPI_INT,
233 recv_entry_count.data(), 1, MPI_INT, neigh_comm_rev);
234
235 std::partial_sum(recv_entry_count.begin(), recv_entry_count.end(),
236 std::next(recv_entry_disp.begin()));
237
238 // Pack and send data values
239 auto values_B = B.values();
240 std::vector<T> send_vals(send_entry_disp.back());
241 std::vector<T> recv_vals(recv_entry_disp.back());
242 std::size_t k = 0;
243 for (int r : recv_row_locals)
244 {
245 std::size_t row_size = row_ptr_B[r + 1] - row_ptr_B[r];
246 std::copy(std::next(values_B.begin(), row_ptr_B[r]),
247 std::next(values_B.begin(), row_ptr_B[r + 1]),
248 std::next(send_vals.begin(), k));
249 k += row_size;
250 }
251 MPI_Datatype mpi_T = dolfinx::MPI::mpi_t<T>;
252 MPI_Neighbor_alltoallv(send_vals.data(), send_entry_count.data(),
253 send_entry_disp.data(), mpi_T, recv_vals.data(),
254 recv_entry_count.data(), recv_entry_disp.data(), mpi_T,
255 neigh_comm_rev);
256
257 // Pack and send column indices
258 auto cols_B = B.cols();
259 auto col_map_B = B.index_map(1);
260 const std::int32_t num_owned_cols_B = col_map_B->size_local();
261 std::span<const int> ghost_owners_B = col_map_B->owners();
262
263 // Send column indices (global indexing) and their owner ranks
264 std::vector<std::int64_t> send_col_data(send_entry_disp.back());
265 std::vector<std::int64_t> recv_col_data(recv_entry_disp.back());
266 std::vector<int> send_col_owners(send_entry_disp.back());
267 std::vector<int> recv_col_owners(recv_entry_disp.back());
268
269 // Send row sizes so receivers can reconstruct the CSR row_ptr
270 std::vector<int> send_row_size(recv_disp.back());
271 std::vector<int> recv_row_size(send_disp.back());
272
273 k = 0;
274 int rank = dolfinx::MPI::rank(col_map_A->comm());
275 for (std::size_t p = 0; p < dest.size(); ++p)
276 {
277 for (std::int32_t i = recv_disp[p]; i < recv_disp[p + 1]; ++i)
278 {
279 int r = recv_row_locals[i];
280 std::size_t row_size = row_ptr_B[r + 1] - row_ptr_B[r];
281 col_map_B->local_to_global(
282 std::span(std::next(cols_B.begin(), row_ptr_B[r]), row_size),
283 std::span(std::next(send_col_data.begin(), k), row_size));
284 for (std::size_t j = 0; j < row_size; ++j)
285 {
286 std::int32_t local_col = cols_B[row_ptr_B[r] + j];
287 send_col_owners[k + j]
288 = (local_col < num_owned_cols_B)
289 ? rank
290 : ghost_owners_B[local_col - num_owned_cols_B];
291 }
292 k += row_size;
293 send_row_size[i] = row_size;
294 }
295 }
296
297 MPI_Neighbor_alltoallv(send_col_data.data(), send_entry_count.data(),
298 send_entry_disp.data(), MPI_INT64_T,
299 recv_col_data.data(), recv_entry_count.data(),
300 recv_entry_disp.data(), MPI_INT64_T, neigh_comm_rev);
301 MPI_Neighbor_alltoallv(send_col_owners.data(), send_entry_count.data(),
302 send_entry_disp.data(), MPI_INT,
303 recv_col_owners.data(), recv_entry_count.data(),
304 recv_entry_disp.data(), MPI_INT, neigh_comm_rev);
305 MPI_Neighbor_alltoallv(send_row_size.data(), recv_count.data(),
306 recv_disp.data(), MPI_INT, recv_row_size.data(),
307 send_count.data(), send_disp.data(), MPI_INT,
308 neigh_comm_rev);
309
310 MPI_Comm_free(&neigh_comm_fwd);
311 MPI_Comm_free(&neigh_comm_rev);
312
313 // Build CSR row pointer for the received ghost rows.
314 // recv_vals and recv_col_data are the values and (global) column indices;
315 // recv_row_size[i] is the number of entries in ghost row i.
316 std::vector<std::int32_t> ghost_row_ptr(recv_row_size.size() + 1, 0);
317 std::partial_sum(recv_row_size.begin(), recv_row_size.end(),
318 std::next(ghost_row_ptr.begin()));
319
320 // ghost_row_ptr, recv_vals, recv_col_data now form a CSR structure for
321 // the ghost rows of B needed by this rank.
322
323 // Collect ghost column indices with their owners.
324 // Start from the received ghost rows, then add existing ghosts of col_map_B.
325 // Exclude anything in the locally owned range of col_map_B, then deduplicate.
326 auto [col_B_local_start, col_B_local_end] = col_map_B->local_range();
327 auto is_local = [col_B_local_start, col_B_local_end](std::int64_t idx)
328 { return idx >= col_B_local_start && idx < col_B_local_end; };
329
330 // Merge received (global_col, owner) pairs with the existing ghosts.
331 std::vector<std::pair<std::int64_t, int>> col_owner_pairs;
332 col_owner_pairs.reserve(recv_col_data.size() + col_map_B->ghosts().size());
333 for (std::size_t i = 0; i < recv_col_data.size(); ++i)
334 if (!is_local(recv_col_data[i]))
335 col_owner_pairs.push_back({recv_col_data[i], recv_col_owners[i]});
336
337 std::span<const std::int64_t> existing_ghosts = col_map_B->ghosts();
338 std::span<const int> existing_ghost_owners = col_map_B->owners();
339 for (std::size_t i = 0; i < existing_ghosts.size(); ++i)
340 col_owner_pairs.push_back({existing_ghosts[i], existing_ghost_owners[i]});
341
342 // Sort by global index and deduplicate.
343 std::sort(col_owner_pairs.begin(), col_owner_pairs.end());
344 col_owner_pairs.erase(
345 std::unique(col_owner_pairs.begin(), col_owner_pairs.end()),
346 col_owner_pairs.end());
347
348 std::vector<std::int64_t> unique_cols(col_owner_pairs.size());
349 std::vector<int> unique_col_owners(col_owner_pairs.size());
350 for (std::size_t i = 0; i < col_owner_pairs.size(); ++i)
351 {
352 unique_cols[i] = col_owner_pairs[i].first;
353 unique_col_owners[i] = col_owner_pairs[i].second;
354 }
355
356 // Build the extended column IndexMap for B.
357 auto new_col_map = std::make_shared<dolfinx::common::IndexMap>(
358 comm, col_map_B->size_local(), unique_cols, unique_col_owners);
359
360 // Convert received global column indices to local indices using new_col_map.
361 std::vector<std::int32_t> recv_col_local(recv_col_data.size());
362 new_col_map->global_to_local(recv_col_data, recv_col_local);
363
364 // The received data is in send-buffer order (grouped by src rank).
365 // Reorder it back to the original ghost column order of col_map_A so that
366 // sparsity_pattern and matmul can index by (j - num_local_rows_B).
367 const std::size_t num_ghosts = perm.size();
368 std::vector<std::int32_t> orig_ghost_row_ptr(num_ghosts + 1, 0);
369 for (std::size_t i = 0; i < num_ghosts; ++i)
370 orig_ghost_row_ptr[i + 1] = recv_row_size[perm[i]];
371 std::partial_sum(orig_ghost_row_ptr.begin(), orig_ghost_row_ptr.end(),
372 orig_ghost_row_ptr.begin());
373
374 std::vector<std::int32_t> orig_recv_col_local(recv_col_local.size());
375 std::vector<T> orig_recv_vals(recv_vals.size());
376 for (std::size_t i = 0; i < num_ghosts; ++i)
377 {
378 std::int32_t src_start = ghost_row_ptr[perm[i]];
379 std::int32_t src_end = ghost_row_ptr[perm[i] + 1];
380 std::int32_t dst_start = orig_ghost_row_ptr[i];
381 std::copy(std::next(recv_col_local.begin(), src_start),
382 std::next(recv_col_local.begin(), src_end),
383 std::next(orig_recv_col_local.begin(), dst_start));
384 std::copy(std::next(recv_vals.begin(), src_start),
385 std::next(recv_vals.begin(), src_end),
386 std::next(orig_recv_vals.begin(), dst_start));
387 }
388
389 return {new_col_map, orig_ghost_row_ptr, orig_recv_col_local, orig_recv_vals};
390}
391
409template <typename T>
410std::tuple<std::vector<std::int64_t>, std::vector<std::int32_t>,
411 std::vector<std::int32_t>, std::vector<T>>
413 std::shared_ptr<const common::IndexMap> new_col_map,
414 std::span<const std::int32_t> ghost_row_ptr,
415 std::span<const std::int32_t> ghost_cols, std::span<const T> ghost_vals)
416{
417 auto col_map_B = B.index_map(1);
418 const std::int32_t num_local_rows_A = A.index_map(0)->size_local();
419 const std::int32_t num_local_rows_B = B.index_map(0)->size_local();
420 const std::int32_t num_owned_cols_B = col_map_B->size_local();
421 const std::int32_t num_owned_cols_C = new_col_map->size_local();
422
423 auto row_ptr_A = A.row_ptr();
424 auto offdiag_ptr_A = A.off_diag_offset();
425 auto cols_A = A.cols();
426 auto vals_A = A.values();
427 auto row_ptr_B = B.row_ptr();
428 auto cols_B = B.cols();
429 auto vals_B = B.values();
430
431 // Remap B's ghost column indices into new_col_map's index space.
432 // Owned column indices (0..num_owned_cols_B-1) are identical in both maps.
433 std::span<const std::int64_t> b_ghost_globals = col_map_B->ghosts();
434 std::vector<std::int32_t> b_ghost_remap(b_ghost_globals.size());
435 new_col_map->global_to_local(b_ghost_globals, b_ghost_remap);
436
437 const std::int32_t num_cols_C
438 = new_col_map->size_local()
439 + static_cast<std::int32_t>(new_col_map->ghosts().size());
440
441 std::vector<std::int64_t> row_ptr_C = {0};
442 row_ptr_C.reserve(num_local_rows_A + 1);
443 std::vector<std::int32_t> off_diag_offsets_C;
444 off_diag_offsets_C.reserve(num_local_rows_A);
445 std::vector<std::int32_t> cols_C;
446 std::vector<T> vals_C;
447
448 // Dense accumulator: acc[k] holds the running sum for column k.
449 // in_row[k] marks whether column k was touched in the current row.
450 // Both are reset via row_cols at the end of each row (O(nnz/row), not
451 // O(num_cols_C)).
452 std::vector<T> acc(num_cols_C, T(0));
453 std::vector<bool> in_row(num_cols_C, false);
454 std::vector<std::int32_t> row_cols;
455
456 for (std::int32_t i = 0; i < num_local_rows_A; ++i)
457 {
458 row_cols.clear();
459
460 // Local columns of A → local rows of B.
461 for (std::int32_t ka = row_ptr_A[i]; ka < offdiag_ptr_A[i]; ++ka)
462 {
463 const std::int32_t j = cols_A[ka];
464 const T a = vals_A[ka];
465 for (std::int32_t kb = row_ptr_B[j]; kb < row_ptr_B[j + 1]; ++kb)
466 {
467 const std::int32_t c = cols_B[kb];
468 const std::int32_t k
469 = c < num_owned_cols_B ? c : b_ghost_remap[c - num_owned_cols_B];
470 T acc_val = a * vals_B[kb];
471 if (!in_row[k] and acc_val != T(0))
472 {
473 in_row[k] = true;
474 row_cols.push_back(k);
475 }
476 acc[k] += acc_val;
477 }
478 }
479
480 // Ghost columns of A → ghost rows of B.
481 for (std::int32_t ka = offdiag_ptr_A[i]; ka < row_ptr_A[i + 1]; ++ka)
482 {
483 const std::int32_t j = cols_A[ka];
484 const T a = vals_A[ka];
485 const std::int32_t g = j - num_local_rows_B;
486 for (std::int32_t kb = ghost_row_ptr[g]; kb < ghost_row_ptr[g + 1]; ++kb)
487 {
488 const std::int32_t k = ghost_cols[kb];
489 T acc_val = a * ghost_vals[kb];
490 if (!in_row[k] and acc_val != T(0))
491 {
492 in_row[k] = true;
493 row_cols.push_back(k);
494 }
495 acc[k] += acc_val;
496 }
497 }
498
499 // Remove zeros produced by exact cancellation in the SpGEMM.
500 auto pos = row_cols.begin();
501 while (pos != row_cols.end())
502 {
503 if (acc[*pos] == T(0))
504 {
505 in_row[*pos] = false;
506 pos = row_cols.erase(pos);
507 }
508 else
509 ++pos;
510 }
511
512 // Sort touched indices to satisfy the CSR sorted-column invariant
513 // required by off_diag_offset and MatrixCSR.
514 std::sort(row_cols.begin(), row_cols.end());
515
516 // Diagonal block boundary: count entries in the owned column range.
517 // row_cols is sorted, so a single lower_bound suffices.
518 off_diag_offsets_C.push_back(static_cast<std::int32_t>(
519 std::lower_bound(row_cols.begin(), row_cols.end(), num_owned_cols_C)
520 - row_cols.begin()));
521
522 // Flush accumulator to output and reset both acc and in_row.
523 for (const std::int32_t c : row_cols)
524 {
525 cols_C.push_back(c);
526 vals_C.push_back(acc[c]);
527 acc[c] = T(0);
528 in_row[c] = false;
529 }
530 row_ptr_C.push_back(static_cast<std::int64_t>(cols_C.size()));
531 }
532
533 return {std::move(row_ptr_C), std::move(off_diag_offsets_C),
534 std::move(cols_C), std::move(vals_C)};
535}
536
537} // namespace impl
538
548template <typename T>
551{
552 dolfinx::common::Timer t_spgemm("MatrixCSR SpGEMM");
553
554 std::array<int, 2> bs{1, 1};
555 if (A.block_size() != B.block_size() or A.block_size() != bs)
556 {
557 throw std::runtime_error("Currently matmul only supports block size=1");
558 }
559
560 // Fetch ghost rows of B needed to multiply against A's ghost columns.
561 spdlog::debug("Fetch remote rows of B in C=A*B");
562 auto [new_col_map, ghost_row_ptr, ghost_cols, ghost_vals]
563 = impl::fetch_ghost_rows(A, B);
564
565 // Single pass: compute sparsity, off-diagonal boundaries, and values.
566 spdlog::debug("Compute sparsity and values of C=A*B");
567 auto [C_row_ptr, C_off_diag_offsets, C_cols, C_vals_vec] = impl::matmul(
568 A, B, new_col_map, std::span<const std::int32_t>(ghost_row_ptr),
569 std::span<const std::int32_t>(ghost_cols),
570 std::span<const T>(ghost_vals));
571
572 auto C_row_map = std::make_shared<common::IndexMap>(
573 A.index_map(0)->comm(), A.index_map(0)->size_local());
574 impl::Sparsity sp{C_row_map, new_col_map, C_cols,
575 C_row_ptr, C_off_diag_offsets, bs};
577 std::copy(C_vals_vec.begin(), C_vals_vec.end(), C.values().begin());
578
579 return C;
580}
581
582} // namespace dolfinx::la
Timer for measuring and logging elapsed time durations.
Definition Timer.h:40
Distributed sparse matrix using compressed sparse row storage.
Definition MatrixCSR.h:70
std::shared_ptr< const common::IndexMap > index_map(int dim) const
Index map for the row or column space.
Definition MatrixCSR.h:536
const rowptr_container_type & off_diag_offset() const
Get the start of off-diagonal (unowned columns) on each row, allowing the matrix to be split (virtual...
Definition MatrixCSR.h:566
container_type & values()
Get local data values.
Definition MatrixCSR.h:543
const column_container_type & cols() const
Definition MatrixCSR.h:555
std::array< int, 2 > block_size() const
Get block sizes.
Definition MatrixCSR.h:573
const rowptr_container_type & row_ptr() const
Get local row pointers.
Definition MatrixCSR.h:551
MPI_Datatype mpi_t
Retrieves the MPI data type associated to the provided type.
Definition MPI.h:280
int size(MPI_Comm comm)
Definition MPI.cpp:72
int rank(MPI_Comm comm)
Return process rank for the communicator.
Definition MPI.cpp:64
Fetch the rows of B that correspond to the ghost columns of A.
Definition matmul.h:36
std::tuple< std::shared_ptr< common::IndexMap >, std::vector< std::int32_t >, std::vector< std::int32_t >, std::vector< T > > fetch_ghost_rows(const dolfinx::la::MatrixCSR< T > &A, const dolfinx::la::MatrixCSR< T > &B)
Fetch the rows of Matrix B which are referenced by the ghost columns of Matrix A.
Definition matmul.h:110
std::tuple< std::vector< std::int64_t >, std::vector< std::int32_t >, std::vector< std::int32_t >, std::vector< T > > matmul(const dolfinx::la::MatrixCSR< T > &A, const dolfinx::la::MatrixCSR< T > &B, std::shared_ptr< const common::IndexMap > new_col_map, std::span< const std::int32_t > ghost_row_ptr, std::span< const std::int32_t > ghost_cols, std::span< const T > ghost_vals)
Compute the sparsity pattern and values of C = A*B in a single pass.
Definition matmul.h:412
Linear algebra interface.
Definition dolfinx_la.h:7
dolfinx::la::MatrixCSR< T > matmul(const dolfinx::la::MatrixCSR< T > &A, const dolfinx::la::MatrixCSR< T > &B)
Compute C = A*B as a distributed MatrixCSR.
Definition matmul.h:549
Lightweight sparsity descriptor satisfying the SparsityImplementation concept required by the MatrixC...
Definition matmul.h:50
std::shared_ptr< const common::IndexMap > index_map(int dim) const
Return the row (dim == 0) or column (dim == 1) IndexMap.
Definition matmul.h:79
int block_size(int i) const
Return the block size.
Definition matmul.h:85
std::span< const std::int32_t > off_diagonal_offsets() const
Return the per-row diagonal block sizes (see _off_diag).
Definition matmul.h:95
std::pair< std::span< const std::int32_t >, std::span< const std::int64_t > > graph() const
Return the CSR graph as (column_indices, row_pointers).
Definition matmul.h:89