118 std::span<const int> src = col_map_A->src();
119 std::span<const int> dest = col_map_A->dest();
120 MPI_Comm comm = col_map_A->comm();
133 if (comm_size == 1 or (src.empty() && dest.empty()))
136 auto new_col_map = std::make_shared<dolfinx::common::IndexMap>(
137 col_map_B->comm(), col_map_B->size_local(), col_map_B->ghosts(),
138 col_map_B->owners());
139 return {new_col_map, std::vector<std::int32_t>{0},
140 std::vector<std::int32_t>{}, std::vector<T>{}};
145 auto rank_to_nbr = [&src](
int r) ->
int
147 auto it = std::lower_bound(src.begin(), src.end(), r);
148 assert(it != src.end() && *it == r);
149 return static_cast<int>(std::distance(src.begin(), it));
151 MPI_Comm neigh_comm_fwd;
152 MPI_Comm neigh_comm_rev;
153 MPI_Dist_graph_create_adjacent(comm, dest.size(), dest.data(), MPI_UNWEIGHTED,
154 src.size(), src.data(), MPI_UNWEIGHTED,
155 MPI_INFO_NULL,
false, &neigh_comm_fwd);
156 MPI_Dist_graph_create_adjacent(comm, src.size(), src.data(), MPI_UNWEIGHTED,
157 dest.size(), dest.data(), MPI_UNWEIGHTED,
158 MPI_INFO_NULL,
false, &neigh_comm_rev);
161 std::span<const std::int64_t> required_globals_map = col_map_A->ghosts();
164 std::span<const int> ghost_owners = col_map_A->owners();
166 std::vector<int> send_count(src.size(), 0);
167 std::vector<int> recv_count(dest.size(), 0);
169 for (
int gh : ghost_owners)
170 ++send_count[rank_to_nbr(gh)];
171 MPI_Neighbor_alltoall(send_count.data(), 1, MPI_INT, recv_count.data(), 1,
172 MPI_INT, neigh_comm_fwd);
175 std::vector<int> send_disp(src.size() + 1, 0);
176 std::partial_sum(send_count.begin(), send_count.end(),
177 std::next(send_disp.begin()));
178 std::vector<int> recv_disp(dest.size() + 1, 0);
179 std::partial_sum(recv_count.begin(), recv_count.end(),
180 std::next(recv_disp.begin()));
184 std::vector<std::int32_t> perm(ghost_owners.size());
185 std::vector<std::int64_t> required_globals(required_globals_map.size());
186 for (std::size_t i = 0; i < ghost_owners.size(); ++i)
188 int pos = rank_to_nbr(ghost_owners[i]);
189 perm[i] = send_disp[pos];
190 required_globals[send_disp[pos]++] = required_globals_map[i];
195 std::partial_sum(send_count.begin(), send_count.end(),
196 std::next(send_disp.begin()));
200 std::vector<std::int64_t> recv_row_globals(recv_disp.back());
202 MPI_Neighbor_alltoallv(required_globals.data(), send_count.data(),
203 send_disp.data(), MPI_INT64_T, recv_row_globals.data(),
204 recv_count.data(), recv_disp.data(), MPI_INT64_T,
210 std::vector<std::int32_t> recv_row_locals(recv_row_globals.size());
211 row_map_B->global_to_local(recv_row_globals, recv_row_locals);
216 std::vector<int> send_entry_count(dest.size(), 0);
217 std::vector<int> send_entry_disp(dest.size() + 1, 0);
218 std::vector<int> recv_entry_count(src.size(), 0);
219 std::vector<int> recv_entry_disp(src.size() + 1, 0);
220 assert(recv_count.size() == dest.size());
221 for (std::size_t p = 0; p < recv_count.size(); ++p)
223 for (std::int32_t i = recv_disp[p]; i < recv_disp[p + 1]; ++i)
225 int r = recv_row_locals[i];
226 send_entry_count[p] += row_ptr_B[r + 1] - row_ptr_B[r];
229 std::partial_sum(send_entry_count.begin(), send_entry_count.end(),
230 std::next(send_entry_disp.begin()));
232 MPI_Neighbor_alltoall(send_entry_count.data(), 1, MPI_INT,
233 recv_entry_count.data(), 1, MPI_INT, neigh_comm_rev);
235 std::partial_sum(recv_entry_count.begin(), recv_entry_count.end(),
236 std::next(recv_entry_disp.begin()));
239 auto values_B = B.
values();
240 std::vector<T> send_vals(send_entry_disp.back());
241 std::vector<T> recv_vals(recv_entry_disp.back());
243 for (
int r : recv_row_locals)
245 std::size_t row_size = row_ptr_B[r + 1] - row_ptr_B[r];
246 std::copy(std::next(values_B.begin(), row_ptr_B[r]),
247 std::next(values_B.begin(), row_ptr_B[r + 1]),
248 std::next(send_vals.begin(), k));
252 MPI_Neighbor_alltoallv(send_vals.data(), send_entry_count.data(),
253 send_entry_disp.data(), mpi_T, recv_vals.data(),
254 recv_entry_count.data(), recv_entry_disp.data(), mpi_T,
258 auto cols_B = B.
cols();
260 const std::int32_t num_owned_cols_B = col_map_B->size_local();
261 std::span<const int> ghost_owners_B = col_map_B->owners();
264 std::vector<std::int64_t> send_col_data(send_entry_disp.back());
265 std::vector<std::int64_t> recv_col_data(recv_entry_disp.back());
266 std::vector<int> send_col_owners(send_entry_disp.back());
267 std::vector<int> recv_col_owners(recv_entry_disp.back());
270 std::vector<int> send_row_size(recv_disp.back());
271 std::vector<int> recv_row_size(send_disp.back());
275 for (std::size_t p = 0; p < dest.size(); ++p)
277 for (std::int32_t i = recv_disp[p]; i < recv_disp[p + 1]; ++i)
279 int r = recv_row_locals[i];
280 std::size_t row_size = row_ptr_B[r + 1] - row_ptr_B[r];
281 col_map_B->local_to_global(
282 std::span(std::next(cols_B.begin(), row_ptr_B[r]), row_size),
283 std::span(std::next(send_col_data.begin(), k), row_size));
284 for (std::size_t j = 0; j < row_size; ++j)
286 std::int32_t local_col = cols_B[row_ptr_B[r] + j];
287 send_col_owners[k + j]
288 = (local_col < num_owned_cols_B)
290 : ghost_owners_B[local_col - num_owned_cols_B];
293 send_row_size[i] = row_size;
297 MPI_Neighbor_alltoallv(send_col_data.data(), send_entry_count.data(),
298 send_entry_disp.data(), MPI_INT64_T,
299 recv_col_data.data(), recv_entry_count.data(),
300 recv_entry_disp.data(), MPI_INT64_T, neigh_comm_rev);
301 MPI_Neighbor_alltoallv(send_col_owners.data(), send_entry_count.data(),
302 send_entry_disp.data(), MPI_INT,
303 recv_col_owners.data(), recv_entry_count.data(),
304 recv_entry_disp.data(), MPI_INT, neigh_comm_rev);
305 MPI_Neighbor_alltoallv(send_row_size.data(), recv_count.data(),
306 recv_disp.data(), MPI_INT, recv_row_size.data(),
307 send_count.data(), send_disp.data(), MPI_INT,
310 MPI_Comm_free(&neigh_comm_fwd);
311 MPI_Comm_free(&neigh_comm_rev);
316 std::vector<std::int32_t> ghost_row_ptr(recv_row_size.size() + 1, 0);
317 std::partial_sum(recv_row_size.begin(), recv_row_size.end(),
318 std::next(ghost_row_ptr.begin()));
326 auto [col_B_local_start, col_B_local_end] = col_map_B->local_range();
327 auto is_local = [col_B_local_start, col_B_local_end](std::int64_t idx)
328 {
return idx >= col_B_local_start && idx < col_B_local_end; };
331 std::vector<std::pair<std::int64_t, int>> col_owner_pairs;
332 col_owner_pairs.reserve(recv_col_data.size() + col_map_B->ghosts().size());
333 for (std::size_t i = 0; i < recv_col_data.size(); ++i)
334 if (!is_local(recv_col_data[i]))
335 col_owner_pairs.push_back({recv_col_data[i], recv_col_owners[i]});
337 std::span<const std::int64_t> existing_ghosts = col_map_B->ghosts();
338 std::span<const int> existing_ghost_owners = col_map_B->owners();
339 for (std::size_t i = 0; i < existing_ghosts.size(); ++i)
340 col_owner_pairs.push_back({existing_ghosts[i], existing_ghost_owners[i]});
343 std::sort(col_owner_pairs.begin(), col_owner_pairs.end());
344 col_owner_pairs.erase(
345 std::unique(col_owner_pairs.begin(), col_owner_pairs.end()),
346 col_owner_pairs.end());
348 std::vector<std::int64_t> unique_cols(col_owner_pairs.size());
349 std::vector<int> unique_col_owners(col_owner_pairs.size());
350 for (std::size_t i = 0; i < col_owner_pairs.size(); ++i)
352 unique_cols[i] = col_owner_pairs[i].first;
353 unique_col_owners[i] = col_owner_pairs[i].second;
357 auto new_col_map = std::make_shared<dolfinx::common::IndexMap>(
358 comm, col_map_B->size_local(), unique_cols, unique_col_owners);
361 std::vector<std::int32_t> recv_col_local(recv_col_data.size());
362 new_col_map->global_to_local(recv_col_data, recv_col_local);
367 const std::size_t num_ghosts = perm.size();
368 std::vector<std::int32_t> orig_ghost_row_ptr(num_ghosts + 1, 0);
369 for (std::size_t i = 0; i < num_ghosts; ++i)
370 orig_ghost_row_ptr[i + 1] = recv_row_size[perm[i]];
371 std::partial_sum(orig_ghost_row_ptr.begin(), orig_ghost_row_ptr.end(),
372 orig_ghost_row_ptr.begin());
374 std::vector<std::int32_t> orig_recv_col_local(recv_col_local.size());
375 std::vector<T> orig_recv_vals(recv_vals.size());
376 for (std::size_t i = 0; i < num_ghosts; ++i)
378 std::int32_t src_start = ghost_row_ptr[perm[i]];
379 std::int32_t src_end = ghost_row_ptr[perm[i] + 1];
380 std::int32_t dst_start = orig_ghost_row_ptr[i];
381 std::copy(std::next(recv_col_local.begin(), src_start),
382 std::next(recv_col_local.begin(), src_end),
383 std::next(orig_recv_col_local.begin(), dst_start));
384 std::copy(std::next(recv_vals.begin(), src_start),
385 std::next(recv_vals.begin(), src_end),
386 std::next(orig_recv_vals.begin(), dst_start));
389 return {new_col_map, orig_ghost_row_ptr, orig_recv_col_local, orig_recv_vals};
413 std::shared_ptr<const common::IndexMap> new_col_map,
414 std::span<const std::int32_t> ghost_row_ptr,
415 std::span<const std::int32_t> ghost_cols, std::span<const T> ghost_vals)
418 const std::int32_t num_local_rows_A = A.
index_map(0)->size_local();
419 const std::int32_t num_local_rows_B = B.
index_map(0)->size_local();
420 const std::int32_t num_owned_cols_B = col_map_B->size_local();
421 const std::int32_t num_owned_cols_C = new_col_map->size_local();
425 auto cols_A = A.
cols();
428 auto cols_B = B.
cols();
433 std::span<const std::int64_t> b_ghost_globals = col_map_B->ghosts();
434 std::vector<std::int32_t> b_ghost_remap(b_ghost_globals.size());
435 new_col_map->global_to_local(b_ghost_globals, b_ghost_remap);
437 const std::int32_t num_cols_C
438 = new_col_map->size_local()
439 +
static_cast<std::int32_t
>(new_col_map->ghosts().size());
441 std::vector<std::int64_t> row_ptr_C = {0};
442 row_ptr_C.reserve(num_local_rows_A + 1);
443 std::vector<std::int32_t> off_diag_offsets_C;
444 off_diag_offsets_C.reserve(num_local_rows_A);
445 std::vector<std::int32_t> cols_C;
446 std::vector<T> vals_C;
452 std::vector<T> acc(num_cols_C, T(0));
453 std::vector<bool> in_row(num_cols_C,
false);
454 std::vector<std::int32_t> row_cols;
456 for (std::int32_t i = 0; i < num_local_rows_A; ++i)
461 for (std::int32_t ka = row_ptr_A[i]; ka < offdiag_ptr_A[i]; ++ka)
463 const std::int32_t j = cols_A[ka];
464 const T a = vals_A[ka];
465 for (std::int32_t kb = row_ptr_B[j]; kb < row_ptr_B[j + 1]; ++kb)
467 const std::int32_t c = cols_B[kb];
469 = c < num_owned_cols_B ? c : b_ghost_remap[c - num_owned_cols_B];
470 T acc_val = a * vals_B[kb];
471 if (!in_row[k] and acc_val != T(0))
474 row_cols.push_back(k);
481 for (std::int32_t ka = offdiag_ptr_A[i]; ka < row_ptr_A[i + 1]; ++ka)
483 const std::int32_t j = cols_A[ka];
484 const T a = vals_A[ka];
485 const std::int32_t g = j - num_local_rows_B;
486 for (std::int32_t kb = ghost_row_ptr[g]; kb < ghost_row_ptr[g + 1]; ++kb)
488 const std::int32_t k = ghost_cols[kb];
489 T acc_val = a * ghost_vals[kb];
490 if (!in_row[k] and acc_val != T(0))
493 row_cols.push_back(k);
500 auto pos = row_cols.begin();
501 while (pos != row_cols.end())
503 if (acc[*pos] == T(0))
505 in_row[*pos] =
false;
506 pos = row_cols.erase(pos);
514 std::sort(row_cols.begin(), row_cols.end());
518 off_diag_offsets_C.push_back(
static_cast<std::int32_t
>(
519 std::lower_bound(row_cols.begin(), row_cols.end(), num_owned_cols_C)
520 - row_cols.begin()));
523 for (
const std::int32_t c : row_cols)
526 vals_C.push_back(acc[c]);
530 row_ptr_C.push_back(
static_cast<std::int64_t
>(cols_C.size()));
533 return {std::move(row_ptr_C), std::move(off_diag_offsets_C),
534 std::move(cols_C), std::move(vals_C)};