xad-rs 0.2.0 - Docs.rs

//! Hessian computation via reverse-mode AD.
//!
//! The Hessian matrix `H[i][j] = d²f / (d x_i d x_j)` for a scalar-valued function.

use crate::areal::AReal;
use crate::tape::{Tape, TapeStorage};

/// Compute the Hessian of a scalar-valued function using repeated reverse-mode passes
/// with finite-difference perturbation of the gradient.
///
/// For each input direction i, computes the gradient at `x` and at `x + eps * e_i`,
/// then approximates `H[i][j] = (grad_j(x + eps*e_i) - grad_j(x)) / eps`.
///
/// `func` takes a mutable tape reference and AReal inputs, returns a single AReal output.
/// Returns a symmetric `n x n` matrix.
// The symmetrization loop is naturally index-based because it swaps paired
// entries `[i][j]` ↔ `[j][i]`, which cannot be expressed with an iterator
// without extra allocation or unsafe.
#[allow(clippy::needless_range_loop)]
pub fn compute_hessian<T, F>(inputs: &[T], func: F) -> Vec<Vec<T>>
where
    T: TapeStorage,
    F: Fn(&[AReal<T>]) -> AReal<T>,
{
    let n = inputs.len();
    let eps = T::from(1e-7).unwrap();
    let inv_eps = T::one() / eps;
    let half = T::from(0.5).unwrap();
    let mut hessian = vec![vec![T::zero(); n]; n];

    // Compute base gradient
    let base_grad = compute_gradient(inputs, &func);

    // For each direction, perturb and compute gradient
    for i in 0..n {
        let mut perturbed = inputs.to_vec();
        perturbed[i] += eps;
        let perturbed_grad = compute_gradient(&perturbed, &func);

        for (j, h_ij) in hessian[i].iter_mut().enumerate() {
            *h_ij = (perturbed_grad[j] - base_grad[j]) * inv_eps;
        }
    }

    // Symmetrize off-diagonal entries.
    for i in 0..n {
        for j in (i + 1)..n {
            let avg = (hessian[i][j] + hessian[j][i]) * half;
            hessian[i][j] = avg;
            hessian[j][i] = avg;
        }
    }

    hessian
}

/// Compute the gradient of a scalar-valued function using reverse-mode AD.
fn compute_gradient<T, F>(inputs: &[T], func: &F) -> Vec<T>
where
    T: TapeStorage,
    F: Fn(&[AReal<T>]) -> AReal<T>,
{
    let mut tape = Tape::<T>::new(true);
    tape.activate();

    let mut ad_inputs: Vec<AReal<T>> = inputs.iter().map(|&v| AReal::new(v)).collect();
    AReal::register_input(&mut ad_inputs, &mut tape);

    let mut output = func(&ad_inputs);
    AReal::register_output(std::slice::from_mut(&mut output), &mut tape);

    output.set_adjoint(&mut tape, T::one());
    tape.compute_adjoints();

    let grad: Vec<T> = ad_inputs.iter().map(|x| x.adjoint(&tape)).collect();

    Tape::<T>::deactivate_all();
    grad
}