25namespace dolfinx::fem::impl
 
   28using mdspan2_t = MDSPAN_IMPL_STANDARD_NAMESPACE::mdspan<
 
   30    MDSPAN_IMPL_STANDARD_NAMESPACE::dextents<std::size_t, 2>>;
 
   60template <dolfinx::scalar T>
 
   63    std::span<
const scalar_value_type_t<T>> x,
 
   64    std::span<const std::int32_t> cells,
 
   65    std::tuple<mdspan2_t, 
int, std::span<const std::int32_t>> dofmap0,
 
   67    std::tuple<mdspan2_t, 
int, std::span<const std::int32_t>> dofmap1,
 
   69    std::span<const std::int8_t> bc1, 
FEkernel<T> auto kernel,
 
   70    std::span<const T> coeffs, 
int cstride, std::span<const T> constants,
 
   71    std::span<const std::uint32_t> cell_info0,
 
   72    std::span<const std::uint32_t> cell_info1)
 
   77  const auto [dmap0, bs0, cells0] = dofmap0;
 
   78  const auto [dmap1, bs1, cells1] = dofmap1;
 
   81  const int num_dofs0 = dmap0.extent(1);
 
   82  const int num_dofs1 = dmap1.extent(1);
 
   83  const int ndim0 = bs0 * num_dofs0;
 
   84  const int ndim1 = bs1 * num_dofs1;
 
   85  std::vector<T> Ae(ndim0 * ndim1);
 
   87  std::vector<scalar_value_type_t<T>> coordinate_dofs(3 * x_dofmap.extent(1));
 
   90  assert(cells0.size() == cells.size());
 
   91  assert(cells1.size() == cells.size());
 
   92  for (std::size_t index = 0; index < cells.size(); ++index)
 
   96    std::int32_t c = cells[index];
 
   97    std::int32_t c0 = cells0[index];
 
   98    std::int32_t c1 = cells1[index];
 
  101    auto x_dofs = MDSPAN_IMPL_STANDARD_NAMESPACE::submdspan(
 
  102        x_dofmap, c, MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent);
 
  103    for (std::size_t i = 0; i < x_dofs.size(); ++i)
 
  105      std::copy_n(std::next(x.begin(), 3 * x_dofs[i]), 3,
 
  106                  std::next(coordinate_dofs.begin(), 3 * i));
 
  110    std::ranges::fill(Ae, 0);
 
  111    kernel(Ae.data(), coeffs.data() + index * cstride, constants.data(),
 
  112           coordinate_dofs.data(), 
nullptr, 
nullptr);
 
  115    P0(_Ae, cell_info0, c0, ndim1);  
 
  116    P1T(_Ae, cell_info1, c1, ndim0); 
 
  119    auto dofs0 = std::span(dmap0.data_handle() + c0 * num_dofs0, num_dofs0);
 
  120    auto dofs1 = std::span(dmap1.data_handle() + c1 * num_dofs1, num_dofs1);
 
  124      for (
int i = 0; i < num_dofs0; ++i)
 
  126        for (
int k = 0; k < bs0; ++k)
 
  128          if (bc0[bs0 * dofs0[i] + k])
 
  131            const int row = bs0 * i + k;
 
  132            std::fill_n(std::next(Ae.begin(), ndim1 * row), ndim1, 0);
 
  140      for (
int j = 0; j < num_dofs1; ++j)
 
  142        for (
int k = 0; k < bs1; ++k)
 
  144          if (bc1[bs1 * dofs1[j] + k])
 
  147            const int col = bs1 * j + k;
 
  148            for (
int row = 0; row < ndim0; ++row)
 
  149              Ae[row * ndim1 + col] = 0;
 
  155    mat_set(dofs0, dofs1, Ae);
 
  194template <dolfinx::scalar T>
 
  195void assemble_exterior_facets(
 
  197    std::span<
const scalar_value_type_t<T>> x, 
int num_facets_per_cell,
 
  198    std::span<const std::int32_t> facets,
 
  199    std::tuple<mdspan2_t, 
int, std::span<const std::int32_t>> dofmap0,
 
  201    std::tuple<mdspan2_t, 
int, std::span<const std::int32_t>> dofmap1,
 
  203    std::span<const std::int8_t> bc1, 
FEkernel<T> auto kernel,
 
  204    std::span<const T> coeffs, 
int cstride, std::span<const T> constants,
 
  205    std::span<const std::uint32_t> cell_info0,
 
  206    std::span<const std::uint32_t> cell_info1,
 
  207    std::span<const std::uint8_t> perms)
 
  212  const auto [dmap0, bs0, facets0] = dofmap0;
 
  213  const auto [dmap1, bs1, facets1] = dofmap1;
 
  216  std::vector<scalar_value_type_t<T>> coordinate_dofs(3 * x_dofmap.extent(1));
 
  217  const int num_dofs0 = dmap0.extent(1);
 
  218  const int num_dofs1 = dmap1.extent(1);
 
  219  const int ndim0 = bs0 * num_dofs0;
 
  220  const int ndim1 = bs1 * num_dofs1;
 
  221  std::vector<T> Ae(ndim0 * ndim1);
 
  222  std::span<T> _Ae(Ae);
 
  223  assert(facets.size() % 2 == 0);
 
  224  assert(facets0.size() == facets.size());
 
  225  assert(facets1.size() == facets.size());
 
  226  for (std::size_t index = 0; index < facets.size(); index += 2)
 
  231    std::int32_t 
cell = facets[index];
 
  232    std::int32_t local_facet = facets[index + 1];
 
  233    std::int32_t cell0 = facets0[index];
 
  234    std::int32_t cell1 = facets1[index];
 
  237    auto x_dofs = MDSPAN_IMPL_STANDARD_NAMESPACE::submdspan(
 
  238        x_dofmap, 
cell, MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent);
 
  239    for (std::size_t i = 0; i < x_dofs.size(); ++i)
 
  241      std::copy_n(std::next(x.begin(), 3 * x_dofs[i]), 3,
 
  242                  std::next(coordinate_dofs.begin(), 3 * i));
 
  247        = perms.empty() ? 0 : perms[
cell * num_facets_per_cell + local_facet];
 
  250    std::ranges::fill(Ae, 0);
 
  251    kernel(Ae.data(), coeffs.data() + index / 2 * cstride, constants.data(),
 
  252           coordinate_dofs.data(), &local_facet, &perm);
 
  254    P0(_Ae, cell_info0, cell0, ndim1);
 
  255    P1T(_Ae, cell_info1, cell1, ndim0);
 
  258    auto dofs0 = std::span(dmap0.data_handle() + cell0 * num_dofs0, num_dofs0);
 
  259    auto dofs1 = std::span(dmap1.data_handle() + cell1 * num_dofs1, num_dofs1);
 
  262      for (
int i = 0; i < num_dofs0; ++i)
 
  264        for (
int k = 0; k < bs0; ++k)
 
  266          if (bc0[bs0 * dofs0[i] + k])
 
  269            const int row = bs0 * i + k;
 
  270            std::fill_n(std::next(Ae.begin(), ndim1 * row), ndim1, 0);
 
  277      for (
int j = 0; j < num_dofs1; ++j)
 
  279        for (
int k = 0; k < bs1; ++k)
 
  281          if (bc1[bs1 * dofs1[j] + k])
 
  284            const int col = bs1 * j + k;
 
  285            for (
int row = 0; row < ndim0; ++row)
 
  286              Ae[row * ndim1 + col] = 0;
 
  292    mat_set(dofs0, dofs1, Ae);
 
  333template <dolfinx::scalar T>
 
  334void assemble_interior_facets(
 
  336    std::span<
const scalar_value_type_t<T>> x, 
int num_facets_per_cell,
 
  337    std::span<const std::int32_t> facets,
 
  338    std::tuple<
const DofMap&, 
int, std::span<const std::int32_t>> dofmap0,
 
  340    std::tuple<
const DofMap&, 
int, std::span<const std::int32_t>> dofmap1,
 
  342    std::span<const std::int8_t> bc1, 
FEkernel<T> auto kernel,
 
  343    std::span<const T> coeffs, 
int cstride, std::span<const int> offsets,
 
  344    std::span<const T> constants, std::span<const std::uint32_t> cell_info0,
 
  345    std::span<const std::uint32_t> cell_info1,
 
  346    std::span<const std::uint8_t> perms)
 
  351  const auto [dmap0, bs0, facets0] = dofmap0;
 
  352  const auto [dmap1, bs1, facets1] = dofmap1;
 
  355  using X = scalar_value_type_t<T>;
 
  356  std::vector<X> coordinate_dofs(2 * x_dofmap.extent(1) * 3);
 
  357  std::span<X> cdofs0(coordinate_dofs.data(), x_dofmap.extent(1) * 3);
 
  358  std::span<X> cdofs1(coordinate_dofs.data() + x_dofmap.extent(1) * 3,
 
  359                      x_dofmap.extent(1) * 3);
 
  361  std::vector<T> Ae, be;
 
  362  std::vector<T> coeff_array(2 * offsets.back());
 
  363  assert(offsets.back() == cstride);
 
  366  std::vector<std::int32_t> dmapjoint0, dmapjoint1;
 
  367  assert(facets.size() % 4 == 0);
 
  368  assert(facets0.size() == facets.size());
 
  369  assert(facets1.size() == facets.size());
 
  370  for (std::size_t index = 0; index < facets.size(); index += 4)
 
  374    std::array cells{facets[index], facets[index + 2]};
 
  375    std::array cells0{facets0[index], facets0[index + 2]};
 
  376    std::array cells1{facets1[index], facets1[index + 2]};
 
  379    std::array local_facet{facets[index + 1], facets[index + 3]};
 
  382    auto x_dofs0 = MDSPAN_IMPL_STANDARD_NAMESPACE::submdspan(
 
  383        x_dofmap, cells[0], MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent);
 
  384    for (std::size_t i = 0; i < x_dofs0.size(); ++i)
 
  386      std::copy_n(std::next(x.begin(), 3 * x_dofs0[i]), 3,
 
  387                  std::next(cdofs0.begin(), 3 * i));
 
  389    auto x_dofs1 = MDSPAN_IMPL_STANDARD_NAMESPACE::submdspan(
 
  390        x_dofmap, cells[1], MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent);
 
  391    for (std::size_t i = 0; i < x_dofs1.size(); ++i)
 
  393      std::copy_n(std::next(x.begin(), 3 * x_dofs1[i]), 3,
 
  394                  std::next(cdofs1.begin(), 3 * i));
 
  398    std::span<const std::int32_t> dmap0_cell0 = dmap0.cell_dofs(cells0[0]);
 
  399    std::span<const std::int32_t> dmap0_cell1 = dmap0.cell_dofs(cells0[1]);
 
  400    dmapjoint0.resize(dmap0_cell0.size() + dmap0_cell1.size());
 
  401    std::ranges::copy(dmap0_cell0, dmapjoint0.begin());
 
  402    std::ranges::copy(dmap0_cell1,
 
  403                      std::next(dmapjoint0.begin(), dmap0_cell0.size()));
 
  405    std::span<const std::int32_t> dmap1_cell0 = dmap1.cell_dofs(cells1[0]);
 
  406    std::span<const std::int32_t> dmap1_cell1 = dmap1.cell_dofs(cells1[1]);
 
  407    dmapjoint1.resize(dmap1_cell0.size() + dmap1_cell1.size());
 
  408    std::ranges::copy(dmap1_cell0, dmapjoint1.begin());
 
  409    std::ranges::copy(dmap1_cell1,
 
  410                      std::next(dmapjoint1.begin(), dmap1_cell0.size()));
 
  412    const int num_rows = bs0 * dmapjoint0.size();
 
  413    const int num_cols = bs1 * dmapjoint1.size();
 
  416    Ae.resize(num_rows * num_cols);
 
  417    std::ranges::fill(Ae, 0);
 
  421              ? std::array<std::uint8_t, 2>{0, 0}
 
  423                    perms[cells[0] * num_facets_per_cell + local_facet[0]],
 
  424                    perms[cells[1] * num_facets_per_cell + local_facet[1]]};
 
  425    kernel(Ae.data(), coeffs.data() + index / 2 * cstride, constants.data(),
 
  426           coordinate_dofs.data(), local_facet.data(), perm.data());
 
  435    std::span<T> _Ae(Ae);
 
  436    std::span<T> sub_Ae0 = _Ae.subspan(bs0 * dmap0_cell0.size() * num_cols,
 
  437                                       bs0 * dmap0_cell1.size() * num_cols);
 
  439    P0(_Ae, cell_info0, cells0[0], num_cols);
 
  440    P0(sub_Ae0, cell_info0, cells0[1], num_cols);
 
  441    P1T(_Ae, cell_info1, cells1[0], num_rows);
 
  443    for (
int row = 0; row < num_rows; ++row)
 
  447      std::span<T> sub_Ae1 = _Ae.subspan(
 
  448          row * num_cols + bs1 * dmap1_cell0.size(), bs1 * dmap1_cell1.size());
 
  449      P1T(sub_Ae1, cell_info1, cells1[1], 1);
 
  455      for (std::size_t i = 0; i < dmapjoint0.size(); ++i)
 
  457        for (
int k = 0; k < bs0; ++k)
 
  459          if (bc0[bs0 * dmapjoint0[i] + k])
 
  462            std::fill_n(std::next(Ae.begin(), num_cols * (bs0 * i + k)),
 
  470      for (std::size_t j = 0; j < dmapjoint1.size(); ++j)
 
  472        for (
int k = 0; k < bs1; ++k)
 
  474          if (bc1[bs1 * dmapjoint1[j] + k])
 
  477            for (
int m = 0; m < num_rows; ++m)
 
  478              Ae[m * num_cols + bs1 * j + k] = 0;
 
  484    mat_set(dmapjoint0, dmapjoint1, Ae);
 
  493template <dolfinx::scalar T, std::
floating_po
int U>
 
  496    std::span<
const scalar_value_type_t<T>> x, std::span<const T> constants,
 
  497    const std::map<std::pair<IntegralType, int>,
 
  498                   std::pair<std::span<const T>, 
int>>& coefficients,
 
  499    std::span<const std::int8_t> bc0, std::span<const std::int8_t> bc1)
 
  502  std::shared_ptr<const mesh::Mesh<U>> mesh = a.mesh();
 
  505  auto mesh0 = a.function_spaces().at(0)->mesh();
 
  508  auto mesh1 = a.function_spaces().at(1)->mesh();
 
  512  std::shared_ptr<const fem::DofMap> dofmap0
 
  513      = a.function_spaces().at(0)->dofmap();
 
  514  std::shared_ptr<const fem::DofMap> dofmap1
 
  515      = a.function_spaces().at(1)->dofmap();
 
  518  auto dofs0 = dofmap0->map();
 
  519  const int bs0 = dofmap0->bs();
 
  520  auto dofs1 = dofmap1->map();
 
  521  const int bs1 = dofmap1->bs();
 
  523  auto element0 = a.function_spaces().at(0)->element();
 
  525  auto element1 = a.function_spaces().at(1)->element();
 
  530      = element1->template dof_transformation_right_fn<T>(
 
  533  std::span<const std::uint32_t> cell_info0;
 
  534  std::span<const std::uint32_t> cell_info1;
 
  535  if (element0->needs_dof_transformations()
 
  536      or element1->needs_dof_transformations() or a.needs_facet_permutations())
 
  538    mesh0->topology_mutable()->create_entity_permutations();
 
  539    mesh1->topology_mutable()->create_entity_permutations();
 
  540    cell_info0 = std::span(mesh0->topology()->get_cell_permutation_info());
 
  541    cell_info1 = std::span(mesh1->topology()->get_cell_permutation_info());
 
  549    impl::assemble_cells(
 
  551        {dofs0, bs0, a.domain(IntegralType::cell, i, *mesh0)}, P0,
 
  552        {dofs1, bs1, a.domain(IntegralType::cell, i, *mesh1)}, P1T, bc0, bc1,
 
  553        fn, coeffs, cstride, constants, cell_info0, cell_info1);
 
  556  std::span<const std::uint8_t> perms;
 
  557  if (a.needs_facet_permutations())
 
  559    mesh->topology_mutable()->create_entity_permutations();
 
  560    perms = std::span(mesh->topology()->get_facet_permutations());
 
  564  int num_facets_per_cell
 
  570    auto& [coeffs, cstride]
 
  572    impl::assemble_exterior_facets(
 
  573        mat_set, x_dofmap, x, num_facets_per_cell,
 
  575        {dofs0, bs0, a.domain(IntegralType::exterior_facet, i, *mesh0)}, P0,
 
  576        {dofs1, bs1, a.domain(IntegralType::exterior_facet, i, *mesh1)}, P1T,
 
  577        bc0, bc1, fn, coeffs, cstride, constants, cell_info0, cell_info1,
 
  583    const std::vector<int> c_offsets = a.coefficient_offsets();
 
  586    auto& [coeffs, cstride]
 
  588    impl::assemble_interior_facets(
 
  589        mat_set, x_dofmap, x, num_facets_per_cell,
 
  591        {*dofmap0, bs0, a.domain(IntegralType::interior_facet, i, *mesh0)}, P0,
 
  592        {*dofmap1, bs1, a.domain(IntegralType::interior_facet, i, *mesh1)}, P1T,
 
  593        bc0, bc1, fn, coeffs, cstride, c_offsets, constants, cell_info0,