Copyright (C) 2022 Igor A. Baratta
This file is part of DOLFINx (https://www.fenicsproject.org)
SPDX-License-Identifier:    LGPL-3.0-or-later

Matrix-free conjugate gradient (CG) solver

This demo illustrates how to:

Solve a linear partial differential equation using a matrix-free CG solver
Create and apply Dirichlet boundary conditions
Compute errors

\[\begin{align*} - \nabla^{2} u &= f \quad {\rm in} \ \Omega, \\ u &= u_D \quad {\rm on} \ \Gamma_{D} \end{align*}\]

where

\[\begin{align*} u_D &= 1 + x^2 + 2y^2, \\ f = -6 \end{align*}\]

Note

This demo illustrates the use of a matrix-free Conjugate Gradient solver. Many practical problems will also require a preconditioner to create an efficient solver.

UFL form file

The UFL file is implemented in demo_poisson_matrix_free/poisson.py.

UFL form implemented in python

UFL input for the Matrix-free Poisson Demo

from basix.ufl import element
from ufl import (
    Coefficient,
    Constant,
    FunctionSpace,
    Mesh,
    TestFunction,
    TrialFunction,
    action,
    dx,
    grad,
    inner,
)

coord_element = element("Lagrange", "triangle", 1, shape=(2,))
mesh = Mesh(coord_element)

# Function Space
e = element("Lagrange", "triangle", 2)
V = FunctionSpace(mesh, e)

# Trial and test functions
u = TrialFunction(V)
v = TestFunction(V)

# Constant RHS
f = Constant(V)

# Bilinear and linear forms according to the variational
# formulation of the equations:
a = inner(grad(u), grad(v)) * dx
L = inner(f, v) * dx

# Linear form representing the action of the form `a`` on the
# coefficient `ui`:`
ui = Coefficient(V)
M = action(a, ui)

# Form to compute the L2 norm of the error
usol = Coefficient(V)
uexact = Coefficient(V)
E = inner(usol - uexact, usol - uexact) * dx

forms = [M, L, E]

C++ program

#include "poisson.h"
#include <algorithm>
#include <basix/finite-element.h>
#include <cmath>
#include <complex>
#include <concepts>
#include <dolfinx.h>
#include <dolfinx/common/types.h>
#include <dolfinx/fem/Constant.h>
#include <memory>
#include <petscsystypes.h>

using namespace dolfinx;

namespace linalg
{
/// @brief Compute vector r = alpha * x + y.
/// @param[out] r
/// @param[in] alpha
/// @param[in] x
/// @param[in] y
void axpy(auto&& r, auto alpha, auto&& x, auto&& y)
{
  std::ranges::transform(x.array(), y.array(), r.mutable_array().begin(),
                         [alpha](auto x, auto y) { return alpha * x + y; });
}

/// @brief Solve problem A.x = b using the conjugate gradient (CG)
/// method.
///
/// @param[in, out] x Solution vector, may be set to an initial guess
/// hence no zeroed.
/// @param[in] b Right-hand side vector.
/// @param[in] action Function that computes the action of the linear
/// operator on a vector.
/// @param[in] kmax Maximum number of iterations
/// @param[in] rtol Relative tolerances for convergence
/// @return Number of CG iterations.
/// @pre The ghost values of `x` and `b` must be updated before this
/// function is called.
int cg(auto& x, auto& b, auto action, int kmax = 50, double rtol = 1e-8)
{
  using T = typename std::decay_t<decltype(x)>::value_type;

  // Create working vectors
  la::Vector r(b), y(b);

  // Compute initial residual r0 = b - Ax0
  action(x, y);
  axpy(r, T(-1), y, b);

  // Create p work vector
  la::Vector p(r);

  // Iterations of CG
  auto rnorm0 = la::squared_norm(r);
  auto rtol2 = rtol * rtol;
  auto rnorm = rnorm0;
  int k = 0;
  while (k < kmax)
  {
    ++k;

    // Compute y = A p
    action(p, y);

    // Compute alpha = r.r/p.y
    T alpha = rnorm / la::inner_product(p, y);

    // Update x (x <- x + alpha*p)
    axpy(x, alpha, p, x);

    // Update r (r <- r - alpha*y)
    axpy(r, -alpha, y, r);

    // Update residual norm
    auto rnorm_new = la::squared_norm(r);
    T beta = rnorm_new / rnorm;
    rnorm = rnorm_new;

    if (rnorm / rnorm0 < rtol2)
      break;

    // Update p (p <- beta * p + r)
    axpy(p, beta, p, r);
  }

  return k;
}
} // namespace linalg

template <typename T, std::floating_point U>
void solver(MPI_Comm comm)
{
  // Create mesh and function space
  auto mesh = std::make_shared<mesh::Mesh<U>>(mesh::create_rectangle<U>(
      comm, {{{0.0, 0.0}, {1.0, 1.0}}}, {10, 10}, mesh::CellType::triangle,
      mesh::create_cell_partitioner(mesh::GhostMode::none)));
  auto element = basix::create_element<U>(
      basix::element::family::P, basix::cell::type::triangle, 2,
      basix::element::lagrange_variant::unset,
      basix::element::dpc_variant::unset, false);
  auto V = std::make_shared<fem::FunctionSpace<U>>(
      fem::create_functionspace(mesh, element, {}));

  // Prepare and set Constants for the bilinear form
  auto f = std::make_shared<fem::Constant<T>>(-6.0);

  // Define variational forms
  auto L = std::make_shared<fem::Form<T, U>>(
      fem::create_form<T>(*form_poisson_L, {V}, {}, {{"f", f}}, {}, {}));

  // Action of the bilinear form "a" on a function ui
  auto ui = std::make_shared<fem::Function<T, U>>(V);
  auto M = std::make_shared<fem::Form<T, U>>(
      fem::create_form<T>(*form_poisson_M, {V}, {{"ui", ui}}, {{}}, {}, {}));

  // Define boundary condition
  auto u_D = std::make_shared<fem::Function<T, U>>(V);
  u_D->interpolate(
      [](auto x) -> std::pair<std::vector<T>, std::vector<std::size_t>>
      {
        std::vector<T> f;
        for (std::size_t p = 0; p < x.extent(1); ++p)
          f.push_back(1 + x(0, p) * x(0, p) + 2 * x(1, p) * x(1, p));
        return {f, {f.size()}};
      });

  mesh->topology_mutable()->create_connectivity(1, 2);
  const std::vector<std::int32_t> facets
      = mesh::exterior_facet_indices(*mesh->topology());
  std::vector<std::int32_t> bdofs = fem::locate_dofs_topological(
      *V->mesh()->topology_mutable(), *V->dofmap(), 1, facets);
  auto bc = std::make_shared<const fem::DirichletBC<T>>(u_D, bdofs);

  // Assemble RHS vector
  la::Vector<T> b(V->dofmap()->index_map, V->dofmap()->index_map_bs());
  fem::assemble_vector(b.mutable_array(), *L);

  // Apply lifting to account for Dirichlet boundary condition
  // b <- b - A * x_bc
  bc->set(ui->x()->mutable_array(), std::nullopt, T(-1));
  fem::assemble_vector(b.mutable_array(), *M);

  // Communicate ghost values
  b.scatter_rev(std::plus<T>());

  // Set BC dofs to zero (effectively zeroes columns of A)
  bc->set(b.mutable_array(), std::nullopt, T(0));

  b.scatter_fwd();

  // Pack coefficients and constants
  auto coeff = fem::allocate_coefficient_storage(*M);
  std::vector<T> constants = fem::pack_constants(*M);

  // Create function for computing the action of A on x (y = Ax)
  auto action = [&M, &ui, &bc, &coeff, &constants](auto& x, auto& y)
  {
    // Zero y
    y.set(0.0);

    // Update coefficient ui (just copy data from x to ui)
    std::ranges::copy(x.array(), ui->x()->mutable_array().begin());

    // Compute action of A on x
    fem::pack_coefficients(*M, coeff);
    fem::assemble_vector(y.mutable_array(), *M, std::span<const T>(constants),
                         fem::make_coefficients_span(coeff));

    // Set BC dofs to zero (effectively zeroes rows of A)
    bc->set(y.mutable_array(), std::nullopt, T(0));

    // Accumulate ghost values
    y.scatter_rev(std::plus<T>());

    // Update ghost values
    y.scatter_fwd();
  };

  // Compute solution using the CG method
  auto u = std::make_shared<fem::Function<T>>(V);
  int num_it = linalg::cg(*u->x(), b, action, 200, 1e-6);

  // Set BC values in the solution vectors
  bc->set(u->x()->mutable_array(), std::nullopt, T(1));

  // Compute L2 error (squared) of the solution vector e = (u - u_d, u
  // - u_d)*dx
  auto E = std::make_shared<fem::Form<T>>(fem::create_form<T, U>(
      *form_poisson_E, {}, {{"uexact", u_D}, {"usol", u}}, {}, {}, {}, mesh));
  T error = fem::assemble_scalar(*E);
  if (dolfinx::MPI::rank(comm) == 0)
  {
    std::cout << "Number of CG iterations " << num_it << std::endl;
    std::cout << "Finite element error (L2 norm (squared)) " << std::abs(error)
              << std::endl;
  }
}

/// Main program
int main(int argc, char* argv[])
{
  using T = PetscScalar;
  using U = typename dolfinx::scalar_value_type_t<T>;
  init_logging(argc, argv);
  MPI_Init(&argc, &argv);
  solver<T, U>(MPI_COMM_WORLD);
  MPI_Finalize();
  return 0;
}