25namespace dolfinx::fem::impl
28using mdspan2_t = MDSPAN_IMPL_STANDARD_NAMESPACE::mdspan<
30 MDSPAN_IMPL_STANDARD_NAMESPACE::dextents<std::size_t, 2>>;
60template <dolfinx::scalar T>
63 std::span<
const scalar_value_type_t<T>> x,
64 std::span<const std::int32_t> cells,
65 std::tuple<mdspan2_t,
int, std::span<const std::int32_t>> dofmap0,
67 std::tuple<mdspan2_t,
int, std::span<const std::int32_t>> dofmap1,
69 std::span<const std::int8_t> bc1,
FEkernel<T> auto kernel,
70 std::span<const T> coeffs,
int cstride, std::span<const T> constants,
71 std::span<const std::uint32_t> cell_info0,
72 std::span<const std::uint32_t> cell_info1)
77 const auto [dmap0, bs0, cells0] = dofmap0;
78 const auto [dmap1, bs1, cells1] = dofmap1;
81 const int num_dofs0 = dmap0.extent(1);
82 const int num_dofs1 = dmap1.extent(1);
83 const int ndim0 = bs0 * num_dofs0;
84 const int ndim1 = bs1 * num_dofs1;
85 std::vector<T> Ae(ndim0 * ndim1);
87 std::vector<scalar_value_type_t<T>> coordinate_dofs(3 * x_dofmap.extent(1));
90 assert(cells0.size() == cells.size());
91 assert(cells1.size() == cells.size());
92 for (std::size_t index = 0; index < cells.size(); ++index)
96 std::int32_t c = cells[index];
97 std::int32_t c0 = cells0[index];
98 std::int32_t c1 = cells1[index];
101 auto x_dofs = MDSPAN_IMPL_STANDARD_NAMESPACE::submdspan(
102 x_dofmap, c, MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent);
103 for (std::size_t i = 0; i < x_dofs.size(); ++i)
105 std::copy_n(std::next(x.begin(), 3 * x_dofs[i]), 3,
106 std::next(coordinate_dofs.begin(), 3 * i));
110 std::ranges::fill(Ae, 0);
111 kernel(Ae.data(), coeffs.data() + index * cstride, constants.data(),
112 coordinate_dofs.data(),
nullptr,
nullptr);
115 P0(_Ae, cell_info0, c0, ndim1);
116 P1T(_Ae, cell_info1, c1, ndim0);
119 auto dofs0 = std::span(dmap0.data_handle() + c0 * num_dofs0, num_dofs0);
120 auto dofs1 = std::span(dmap1.data_handle() + c1 * num_dofs1, num_dofs1);
124 for (
int i = 0; i < num_dofs0; ++i)
126 for (
int k = 0; k < bs0; ++k)
128 if (bc0[bs0 * dofs0[i] + k])
131 const int row = bs0 * i + k;
132 std::fill_n(std::next(Ae.begin(), ndim1 * row), ndim1, 0);
140 for (
int j = 0; j < num_dofs1; ++j)
142 for (
int k = 0; k < bs1; ++k)
144 if (bc1[bs1 * dofs1[j] + k])
147 const int col = bs1 * j + k;
148 for (
int row = 0; row < ndim0; ++row)
149 Ae[row * ndim1 + col] = 0;
155 mat_set(dofs0, dofs1, Ae);
194template <dolfinx::scalar T>
195void assemble_exterior_facets(
197 std::span<
const scalar_value_type_t<T>> x,
int num_facets_per_cell,
198 std::span<const std::int32_t> facets,
199 std::tuple<mdspan2_t,
int, std::span<const std::int32_t>> dofmap0,
201 std::tuple<mdspan2_t,
int, std::span<const std::int32_t>> dofmap1,
203 std::span<const std::int8_t> bc1,
FEkernel<T> auto kernel,
204 std::span<const T> coeffs,
int cstride, std::span<const T> constants,
205 std::span<const std::uint32_t> cell_info0,
206 std::span<const std::uint32_t> cell_info1,
207 std::span<const std::uint8_t> perms)
212 const auto [dmap0, bs0, facets0] = dofmap0;
213 const auto [dmap1, bs1, facets1] = dofmap1;
216 std::vector<scalar_value_type_t<T>> coordinate_dofs(3 * x_dofmap.extent(1));
217 const int num_dofs0 = dmap0.extent(1);
218 const int num_dofs1 = dmap1.extent(1);
219 const int ndim0 = bs0 * num_dofs0;
220 const int ndim1 = bs1 * num_dofs1;
221 std::vector<T> Ae(ndim0 * ndim1);
222 std::span<T> _Ae(Ae);
223 assert(facets.size() % 2 == 0);
224 assert(facets0.size() == facets.size());
225 assert(facets1.size() == facets.size());
226 for (std::size_t index = 0; index < facets.size(); index += 2)
231 std::int32_t
cell = facets[index];
232 std::int32_t local_facet = facets[index + 1];
233 std::int32_t cell0 = facets0[index];
234 std::int32_t cell1 = facets1[index];
237 auto x_dofs = MDSPAN_IMPL_STANDARD_NAMESPACE::submdspan(
238 x_dofmap,
cell, MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent);
239 for (std::size_t i = 0; i < x_dofs.size(); ++i)
241 std::copy_n(std::next(x.begin(), 3 * x_dofs[i]), 3,
242 std::next(coordinate_dofs.begin(), 3 * i));
247 = perms.empty() ? 0 : perms[
cell * num_facets_per_cell + local_facet];
250 std::ranges::fill(Ae, 0);
251 kernel(Ae.data(), coeffs.data() + index / 2 * cstride, constants.data(),
252 coordinate_dofs.data(), &local_facet, &perm);
254 P0(_Ae, cell_info0, cell0, ndim1);
255 P1T(_Ae, cell_info1, cell1, ndim0);
258 auto dofs0 = std::span(dmap0.data_handle() + cell0 * num_dofs0, num_dofs0);
259 auto dofs1 = std::span(dmap1.data_handle() + cell1 * num_dofs1, num_dofs1);
262 for (
int i = 0; i < num_dofs0; ++i)
264 for (
int k = 0; k < bs0; ++k)
266 if (bc0[bs0 * dofs0[i] + k])
269 const int row = bs0 * i + k;
270 std::fill_n(std::next(Ae.begin(), ndim1 * row), ndim1, 0);
277 for (
int j = 0; j < num_dofs1; ++j)
279 for (
int k = 0; k < bs1; ++k)
281 if (bc1[bs1 * dofs1[j] + k])
284 const int col = bs1 * j + k;
285 for (
int row = 0; row < ndim0; ++row)
286 Ae[row * ndim1 + col] = 0;
292 mat_set(dofs0, dofs1, Ae);
333template <dolfinx::scalar T>
334void assemble_interior_facets(
336 std::span<
const scalar_value_type_t<T>> x,
int num_facets_per_cell,
337 std::span<const std::int32_t> facets,
338 std::tuple<
const DofMap&,
int, std::span<const std::int32_t>> dofmap0,
340 std::tuple<
const DofMap&,
int, std::span<const std::int32_t>> dofmap1,
342 std::span<const std::int8_t> bc1,
FEkernel<T> auto kernel,
343 std::span<const T> coeffs,
int cstride, std::span<const int> offsets,
344 std::span<const T> constants, std::span<const std::uint32_t> cell_info0,
345 std::span<const std::uint32_t> cell_info1,
346 std::span<const std::uint8_t> perms)
351 const auto [dmap0, bs0, facets0] = dofmap0;
352 const auto [dmap1, bs1, facets1] = dofmap1;
355 using X = scalar_value_type_t<T>;
356 std::vector<X> coordinate_dofs(2 * x_dofmap.extent(1) * 3);
357 std::span<X> cdofs0(coordinate_dofs.data(), x_dofmap.extent(1) * 3);
358 std::span<X> cdofs1(coordinate_dofs.data() + x_dofmap.extent(1) * 3,
359 x_dofmap.extent(1) * 3);
361 std::vector<T> Ae, be;
362 std::vector<T> coeff_array(2 * offsets.back());
363 assert(offsets.back() == cstride);
366 std::vector<std::int32_t> dmapjoint0, dmapjoint1;
367 assert(facets.size() % 4 == 0);
368 assert(facets0.size() == facets.size());
369 assert(facets1.size() == facets.size());
370 for (std::size_t index = 0; index < facets.size(); index += 4)
374 std::array cells{facets[index], facets[index + 2]};
375 std::array cells0{facets0[index], facets0[index + 2]};
376 std::array cells1{facets1[index], facets1[index + 2]};
379 std::array local_facet{facets[index + 1], facets[index + 3]};
382 auto x_dofs0 = MDSPAN_IMPL_STANDARD_NAMESPACE::submdspan(
383 x_dofmap, cells[0], MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent);
384 for (std::size_t i = 0; i < x_dofs0.size(); ++i)
386 std::copy_n(std::next(x.begin(), 3 * x_dofs0[i]), 3,
387 std::next(cdofs0.begin(), 3 * i));
389 auto x_dofs1 = MDSPAN_IMPL_STANDARD_NAMESPACE::submdspan(
390 x_dofmap, cells[1], MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent);
391 for (std::size_t i = 0; i < x_dofs1.size(); ++i)
393 std::copy_n(std::next(x.begin(), 3 * x_dofs1[i]), 3,
394 std::next(cdofs1.begin(), 3 * i));
398 std::span<const std::int32_t> dmap0_cell0 = dmap0.cell_dofs(cells0[0]);
399 std::span<const std::int32_t> dmap0_cell1 = dmap0.cell_dofs(cells0[1]);
400 dmapjoint0.resize(dmap0_cell0.size() + dmap0_cell1.size());
401 std::ranges::copy(dmap0_cell0, dmapjoint0.begin());
402 std::ranges::copy(dmap0_cell1,
403 std::next(dmapjoint0.begin(), dmap0_cell0.size()));
405 std::span<const std::int32_t> dmap1_cell0 = dmap1.cell_dofs(cells1[0]);
406 std::span<const std::int32_t> dmap1_cell1 = dmap1.cell_dofs(cells1[1]);
407 dmapjoint1.resize(dmap1_cell0.size() + dmap1_cell1.size());
408 std::ranges::copy(dmap1_cell0, dmapjoint1.begin());
409 std::ranges::copy(dmap1_cell1,
410 std::next(dmapjoint1.begin(), dmap1_cell0.size()));
412 const int num_rows = bs0 * dmapjoint0.size();
413 const int num_cols = bs1 * dmapjoint1.size();
416 Ae.resize(num_rows * num_cols);
417 std::ranges::fill(Ae, 0);
421 ? std::array<std::uint8_t, 2>{0, 0}
423 perms[cells[0] * num_facets_per_cell + local_facet[0]],
424 perms[cells[1] * num_facets_per_cell + local_facet[1]]};
425 kernel(Ae.data(), coeffs.data() + index / 2 * cstride, constants.data(),
426 coordinate_dofs.data(), local_facet.data(), perm.data());
435 std::span<T> _Ae(Ae);
436 std::span<T> sub_Ae0 = _Ae.subspan(bs0 * dmap0_cell0.size() * num_cols,
437 bs0 * dmap0_cell1.size() * num_cols);
439 P0(_Ae, cell_info0, cells0[0], num_cols);
440 P0(sub_Ae0, cell_info0, cells0[1], num_cols);
441 P1T(_Ae, cell_info1, cells1[0], num_rows);
443 for (
int row = 0; row < num_rows; ++row)
447 std::span<T> sub_Ae1 = _Ae.subspan(
448 row * num_cols + bs1 * dmap1_cell0.size(), bs1 * dmap1_cell1.size());
449 P1T(sub_Ae1, cell_info1, cells1[1], 1);
455 for (std::size_t i = 0; i < dmapjoint0.size(); ++i)
457 for (
int k = 0; k < bs0; ++k)
459 if (bc0[bs0 * dmapjoint0[i] + k])
462 std::fill_n(std::next(Ae.begin(), num_cols * (bs0 * i + k)),
470 for (std::size_t j = 0; j < dmapjoint1.size(); ++j)
472 for (
int k = 0; k < bs1; ++k)
474 if (bc1[bs1 * dmapjoint1[j] + k])
477 for (
int m = 0; m < num_rows; ++m)
478 Ae[m * num_cols + bs1 * j + k] = 0;
484 mat_set(dmapjoint0, dmapjoint1, Ae);
493template <dolfinx::scalar T, std::
floating_po
int U>
496 std::span<
const scalar_value_type_t<T>> x, std::span<const T> constants,
497 const std::map<std::pair<IntegralType, int>,
498 std::pair<std::span<const T>,
int>>& coefficients,
499 std::span<const std::int8_t> bc0, std::span<const std::int8_t> bc1)
502 std::shared_ptr<const mesh::Mesh<U>> mesh = a.mesh();
505 auto mesh0 = a.function_spaces().at(0)->mesh();
508 auto mesh1 = a.function_spaces().at(1)->mesh();
512 std::shared_ptr<const fem::DofMap> dofmap0
513 = a.function_spaces().at(0)->dofmap();
514 std::shared_ptr<const fem::DofMap> dofmap1
515 = a.function_spaces().at(1)->dofmap();
518 auto dofs0 = dofmap0->map();
519 const int bs0 = dofmap0->bs();
520 auto dofs1 = dofmap1->map();
521 const int bs1 = dofmap1->bs();
523 auto element0 = a.function_spaces().at(0)->element();
525 auto element1 = a.function_spaces().at(1)->element();
530 = element1->template dof_transformation_right_fn<T>(
533 std::span<const std::uint32_t> cell_info0;
534 std::span<const std::uint32_t> cell_info1;
535 if (element0->needs_dof_transformations()
536 or element1->needs_dof_transformations() or a.needs_facet_permutations())
538 mesh0->topology_mutable()->create_entity_permutations();
539 mesh1->topology_mutable()->create_entity_permutations();
540 cell_info0 = std::span(mesh0->topology()->get_cell_permutation_info());
541 cell_info1 = std::span(mesh1->topology()->get_cell_permutation_info());
549 impl::assemble_cells(
551 {dofs0, bs0, a.domain(IntegralType::cell, i, *mesh0)}, P0,
552 {dofs1, bs1, a.domain(IntegralType::cell, i, *mesh1)}, P1T, bc0, bc1,
553 fn, coeffs, cstride, constants, cell_info0, cell_info1);
556 std::span<const std::uint8_t> perms;
557 if (a.needs_facet_permutations())
559 mesh->topology_mutable()->create_entity_permutations();
560 perms = std::span(mesh->topology()->get_facet_permutations());
564 int num_facets_per_cell
570 auto& [coeffs, cstride]
572 impl::assemble_exterior_facets(
573 mat_set, x_dofmap, x, num_facets_per_cell,
575 {dofs0, bs0, a.domain(IntegralType::exterior_facet, i, *mesh0)}, P0,
576 {dofs1, bs1, a.domain(IntegralType::exterior_facet, i, *mesh1)}, P1T,
577 bc0, bc1, fn, coeffs, cstride, constants, cell_info0, cell_info1,
583 const std::vector<int> c_offsets = a.coefficient_offsets();
586 auto& [coeffs, cstride]
588 impl::assemble_interior_facets(
589 mat_set, x_dofmap, x, num_facets_per_cell,
591 {*dofmap0, bs0, a.domain(IntegralType::interior_facet, i, *mesh0)}, P0,
592 {*dofmap1, bs1, a.domain(IntegralType::interior_facet, i, *mesh1)}, P1T,
593 bc0, bc1, fn, coeffs, cstride, c_offsets, constants, cell_info0,