use crate::core::constraint::BoxConstraints;
use crate::core::math::{DenseMatrixFromFn, VectorIndex, VectorLen};
use crate::core::parallel::{MaybeSend, MaybeSync, try_map_range_with, try_map_slice_with};
use crate::core::problem::{CostFunction, Gradient, Hessian, Jacobian, Residual};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[non_exhaustive]
pub enum Method {
Forward,
Central,
}
#[derive(Debug, Clone, Copy)]
pub struct FiniteDiff<P> {
problem: P,
gradient_method: Method,
jacobian_method: Method,
hessian_method: Method,
function_precision: f64,
fixed_step: Option<f64>,
}
impl<P> FiniteDiff<P> {
pub fn new(problem: P) -> Self {
Self {
problem,
gradient_method: Method::Central,
jacobian_method: Method::Forward,
hessian_method: Method::Central,
function_precision: f64::EPSILON,
fixed_step: None,
}
}
pub fn gradient_method(mut self, method: Method) -> Self {
self.gradient_method = method;
self
}
pub fn jacobian_method(mut self, method: Method) -> Self {
self.jacobian_method = method;
self
}
pub fn hessian_method(mut self, method: Method) -> Self {
self.hessian_method = method;
self
}
pub fn function_precision(mut self, epsfcn: f64) -> Self {
self.function_precision = epsfcn;
self
}
pub fn with_step(mut self, h: f64) -> Self {
self.fixed_step = Some(h);
self
}
pub fn get_ref(&self) -> &P {
&self.problem
}
pub fn into_inner(self) -> P {
self.problem
}
}
impl<P: CostFunction> CostFunction for FiniteDiff<P> {
type Param = P::Param;
type Output = P::Output;
type Error = P::Error;
fn cost(&self, param: &Self::Param) -> Result<Self::Output, Self::Error> {
self.problem.cost(param)
}
}
impl<P: Residual> Residual for FiniteDiff<P> {
type Param = P::Param;
type Output = P::Output;
type Error = P::Error;
fn residual(&self, param: &Self::Param) -> Result<Self::Output, Self::Error> {
self.problem.residual(param)
}
}
impl<P: BoxConstraints> BoxConstraints for FiniteDiff<P> {
fn lower(&self) -> &Self::Param {
self.problem.lower()
}
fn upper(&self) -> &Self::Param {
self.problem.upper()
}
}
impl<P, V> Gradient for FiniteDiff<P>
where
P: CostFunction<Param = V, Output = f64> + MaybeSync,
V: Clone + VectorLen + VectorIndex + MaybeSync,
<P as CostFunction>::Error: MaybeSend,
{
type Gradient = V;
fn gradient(&self, param: &V) -> Result<V, P::Error> {
match self.gradient_method {
Method::Forward => forward_difference_gradient(
&self.problem,
param,
self.function_precision,
self.fixed_step,
),
Method::Central => central_difference_gradient(
&self.problem,
param,
self.function_precision,
self.fixed_step,
),
}
}
}
impl<P, V> Jacobian for FiniteDiff<P>
where
P: Residual<Param = V, Output = V> + MaybeSync,
V: Clone + VectorLen + VectorIndex + DenseMatrixFromFn + MaybeSync + MaybeSend,
<P as Residual>::Error: MaybeSend,
{
type Jacobian = <V as DenseMatrixFromFn>::Matrix;
fn jacobian(&self, param: &V) -> Result<Self::Jacobian, P::Error> {
match self.jacobian_method {
Method::Forward => forward_difference_jacobian(
&self.problem,
param,
self.function_precision,
self.fixed_step,
),
Method::Central => central_difference_jacobian(
&self.problem,
param,
self.function_precision,
self.fixed_step,
),
}
}
}
impl<P, V> Hessian for FiniteDiff<P>
where
P: CostFunction<Param = V, Output = f64> + MaybeSync,
V: Clone + VectorLen + VectorIndex + DenseMatrixFromFn + MaybeSync,
<P as CostFunction>::Error: MaybeSend,
{
type Hessian = <V as DenseMatrixFromFn>::Matrix;
fn hessian(&self, param: &V) -> Result<Self::Hessian, P::Error> {
match self.hessian_method {
Method::Forward => forward_difference_hessian(
&self.problem,
param,
self.function_precision,
self.fixed_step,
),
Method::Central => central_difference_hessian(
&self.problem,
param,
self.function_precision,
self.fixed_step,
),
}
}
}
fn nr_step(xj: f64, scale: f64, fixed_step: Option<f64>) -> f64 {
if let Some(h) = fixed_step {
return h;
}
let h = scale * xj.abs().max(1.0);
let reround = (xj + h) - xj;
if reround == 0.0 { h } else { reround }
}
pub fn central_difference_gradient<P, V>(
problem: &P,
x: &V,
function_precision: f64,
fixed_step: Option<f64>,
) -> Result<V, P::Error>
where
P: CostFunction<Param = V, Output = f64> + MaybeSync,
V: Clone + VectorLen + VectorIndex + MaybeSync,
P::Error: MaybeSend,
{
let n = x.vec_len();
let scale = function_precision.max(f64::EPSILON).cbrt();
let cols = try_map_range_with(
n,
|| x.clone(),
|probe, j| {
let xj = x.get_scalar(j);
let h = nr_step(xj, scale, fixed_step);
probe.set_scalar(j, xj + h);
let fp = problem.cost(probe)?;
probe.set_scalar(j, xj - h);
let fm = problem.cost(probe)?;
probe.set_scalar(j, xj);
Ok((fp - fm) / (2.0 * h))
},
)?;
let mut grad = x.clone();
for (j, g) in cols.into_iter().enumerate() {
grad.set_scalar(j, g);
}
Ok(grad)
}
pub fn forward_difference_gradient<P, V>(
problem: &P,
x: &V,
function_precision: f64,
fixed_step: Option<f64>,
) -> Result<V, P::Error>
where
P: CostFunction<Param = V, Output = f64> + MaybeSync,
V: Clone + VectorLen + VectorIndex + MaybeSync,
P::Error: MaybeSend,
{
let n = x.vec_len();
let scale = function_precision.max(f64::EPSILON).sqrt();
let f0 = problem.cost(x)?;
let cols = try_map_range_with(
n,
|| x.clone(),
|probe, j| {
let xj = x.get_scalar(j);
let h = nr_step(xj, scale, fixed_step);
probe.set_scalar(j, xj + h);
let fp = problem.cost(probe)?;
probe.set_scalar(j, xj);
Ok((fp - f0) / h)
},
)?;
let mut grad = x.clone();
for (j, g) in cols.into_iter().enumerate() {
grad.set_scalar(j, g);
}
Ok(grad)
}
pub fn forward_difference_jacobian<P, V>(
problem: &P,
x: &V,
function_precision: f64,
fixed_step: Option<f64>,
) -> Result<<V as DenseMatrixFromFn>::Matrix, P::Error>
where
P: Residual<Param = V, Output = V> + MaybeSync,
V: Clone + VectorLen + VectorIndex + DenseMatrixFromFn + MaybeSync + MaybeSend,
P::Error: MaybeSend,
{
let n = x.vec_len();
let r0 = problem.residual(x)?;
let m = r0.vec_len();
let eps = function_precision.max(f64::EPSILON).sqrt();
let columns = try_map_range_with(
n,
|| x.clone(),
|probe, j| {
let xj = x.get_scalar(j);
let h = match fixed_step {
Some(h) => h,
None => {
let h = eps * xj.abs();
if h == 0.0 { eps } else { h }
}
};
probe.set_scalar(j, xj + h);
let mut col = problem.residual(probe)?;
probe.set_scalar(j, xj);
for i in 0..m {
col.set_scalar(i, (col.get_scalar(i) - r0.get_scalar(i)) / h);
}
Ok(col)
},
)?;
Ok(V::dense_from_fn(m, n, |i, j| columns[j].get_scalar(i)))
}
pub fn central_difference_jacobian<P, V>(
problem: &P,
x: &V,
function_precision: f64,
fixed_step: Option<f64>,
) -> Result<<V as DenseMatrixFromFn>::Matrix, P::Error>
where
P: Residual<Param = V, Output = V> + MaybeSync,
V: Clone + VectorLen + VectorIndex + DenseMatrixFromFn + MaybeSync + MaybeSend,
P::Error: MaybeSend,
{
let n = x.vec_len();
let m = problem.residual(x)?.vec_len();
let scale = function_precision.max(f64::EPSILON).cbrt();
let columns = try_map_range_with(
n,
|| x.clone(),
|probe, j| {
let xj = x.get_scalar(j);
let h = nr_step(xj, scale, fixed_step);
probe.set_scalar(j, xj + h);
let mut col = problem.residual(probe)?;
probe.set_scalar(j, xj - h);
let rm = problem.residual(probe)?;
probe.set_scalar(j, xj);
for i in 0..m {
col.set_scalar(i, (col.get_scalar(i) - rm.get_scalar(i)) / (2.0 * h));
}
Ok(col)
},
)?;
Ok(V::dense_from_fn(m, n, |i, j| columns[j].get_scalar(i)))
}
pub fn central_difference_hessian<P, V>(
problem: &P,
x: &V,
function_precision: f64,
fixed_step: Option<f64>,
) -> Result<<V as DenseMatrixFromFn>::Matrix, P::Error>
where
P: CostFunction<Param = V, Output = f64> + MaybeSync,
V: Clone + VectorLen + VectorIndex + DenseMatrixFromFn + MaybeSync,
P::Error: MaybeSend,
{
let n = x.vec_len();
let scale = function_precision.max(f64::EPSILON).powf(0.25);
let f0 = problem.cost(x)?;
let h: Vec<f64> = (0..n)
.map(|j| nr_step(x.get_scalar(j), scale, fixed_step))
.collect();
let pairs: Vec<(usize, usize)> = (0..n).flat_map(|i| (i..n).map(move |j| (i, j))).collect();
let values = try_map_slice_with(
&pairs,
|| x.clone(),
|probe, &(i, j)| {
let xi = x.get_scalar(i);
if i == j {
probe.set_scalar(i, xi + h[i]);
let fp = problem.cost(probe)?;
probe.set_scalar(i, xi - h[i]);
let fm = problem.cost(probe)?;
probe.set_scalar(i, xi);
Ok((fp - 2.0 * f0 + fm) / (h[i] * h[i]))
} else {
let xj = x.get_scalar(j);
probe.set_scalar(i, xi + h[i]);
probe.set_scalar(j, xj + h[j]);
let fpp = problem.cost(probe)?;
probe.set_scalar(j, xj - h[j]);
let fpm = problem.cost(probe)?;
probe.set_scalar(i, xi - h[i]);
let fmm = problem.cost(probe)?;
probe.set_scalar(j, xj + h[j]);
let fmp = problem.cost(probe)?;
probe.set_scalar(i, xi);
probe.set_scalar(j, xj);
Ok((fpp - fpm - fmp + fmm) / (4.0 * h[i] * h[j]))
}
},
)?;
let mut hess = vec![0.0_f64; n * n];
for (&(i, j), &v) in pairs.iter().zip(&values) {
hess[i * n + j] = v;
hess[j * n + i] = v;
}
Ok(V::dense_from_fn(n, n, |i, j| hess[i * n + j]))
}
pub fn forward_difference_hessian<P, V>(
problem: &P,
x: &V,
function_precision: f64,
fixed_step: Option<f64>,
) -> Result<<V as DenseMatrixFromFn>::Matrix, P::Error>
where
P: CostFunction<Param = V, Output = f64> + MaybeSync,
V: Clone + VectorLen + VectorIndex + DenseMatrixFromFn + MaybeSync,
P::Error: MaybeSend,
{
let n = x.vec_len();
let scale = function_precision.max(f64::EPSILON).powf(0.25);
let f0 = problem.cost(x)?;
let h: Vec<f64> = (0..n)
.map(|j| nr_step(x.get_scalar(j), scale, fixed_step))
.collect();
let fi = try_map_range_with(
n,
|| x.clone(),
|probe, k| {
let xk = x.get_scalar(k);
probe.set_scalar(k, xk + h[k]);
let f = problem.cost(probe)?;
probe.set_scalar(k, xk);
Ok(f)
},
)?;
let pairs: Vec<(usize, usize)> = (0..n).flat_map(|i| (i..n).map(move |j| (i, j))).collect();
let values = try_map_slice_with(
&pairs,
|| x.clone(),
|probe, &(i, j)| {
let xi = x.get_scalar(i);
let cross = if i == j {
probe.set_scalar(i, xi + 2.0 * h[i]);
let c = problem.cost(probe)?;
probe.set_scalar(i, xi);
c
} else {
let xj = x.get_scalar(j);
probe.set_scalar(i, xi + h[i]);
probe.set_scalar(j, xj + h[j]);
let c = problem.cost(probe)?;
probe.set_scalar(i, xi);
probe.set_scalar(j, xj);
c
};
Ok((cross - fi[i] - fi[j] + f0) / (h[i] * h[j]))
},
)?;
let mut hess = vec![0.0_f64; n * n];
for (&(i, j), &v) in pairs.iter().zip(&values) {
hess[i * n + j] = v;
hess[j * n + i] = v;
}
Ok(V::dense_from_fn(n, n, |i, j| hess[i * n + j]))
}
#[cfg(test)]
mod tests {
use super::*;
struct DiagQuadratic {
a: Vec<f64>,
}
impl CostFunction for DiagQuadratic {
type Param = Vec<f64>;
type Output = f64;
type Error = std::convert::Infallible;
fn cost(&self, x: &Vec<f64>) -> Result<f64, std::convert::Infallible> {
Ok(x.iter().zip(&self.a).map(|(xi, ai)| ai * xi * xi).sum())
}
}
fn approx(a: f64, b: f64, tol: f64) -> bool {
(a - b).abs() <= tol
}
#[test]
fn central_gradient_matches_analytic_quadratic() {
let a = vec![1.0, 2.0, 0.5];
let x = vec![-1.2, 0.7, 3.0];
let g = FiniteDiff::new(DiagQuadratic { a: a.clone() })
.gradient(&x)
.unwrap();
for i in 0..3 {
let want = 2.0 * a[i] * x[i];
assert!(approx(g[i], want, 1e-7), "i={i} got {} want {want}", g[i]);
}
}
#[test]
fn forward_gradient_matches_analytic_quadratic() {
let a = vec![1.0, 2.0, 0.5];
let x = vec![-1.2, 0.7, 3.0];
let g = FiniteDiff::new(DiagQuadratic { a: a.clone() })
.gradient_method(Method::Forward)
.gradient(&x)
.unwrap();
for i in 0..3 {
let want = 2.0 * a[i] * x[i];
assert!(approx(g[i], want, 1e-5), "i={i} got {} want {want}", g[i]);
}
}
#[test]
fn gradient_handles_zero_coordinate_and_singletons() {
let g = FiniteDiff::new(DiagQuadratic { a: vec![3.0] })
.gradient(&vec![0.0])
.unwrap();
assert!(approx(g[0], 0.0, 1e-9), "got {}", g[0]);
assert_eq!(g.len(), 1);
}
#[test]
fn gradient_is_bitwise_reproducible_across_calls() {
let a: Vec<f64> = (0..16).map(|k| 0.5 + k as f64).collect();
let x: Vec<f64> = (0..16).map(|k| -2.0 + 0.37 * k as f64).collect();
let fd = FiniteDiff::new(DiagQuadratic { a });
let g1 = fd.gradient(&x).unwrap();
let g2 = fd.gradient(&x).unwrap();
assert_eq!(g1, g2, "finite-difference gradient is not reproducible");
for (k, gk) in g1.iter().enumerate() {
let want = 2.0 * (0.5 + k as f64) * x[k];
assert!(approx(*gk, want, 1e-6), "k={k} got {gk} want {want}");
}
}
#[test]
fn gradient_of_empty_param_is_empty() {
let g = FiniteDiff::new(DiagQuadratic { a: vec![] })
.gradient(&Vec::<f64>::new())
.unwrap();
assert!(g.is_empty());
}
#[test]
fn defaults_are_central_gradient_forward_jacobian() {
let fd = FiniteDiff::new(DiagQuadratic { a: vec![1.0] });
assert_eq!(fd.gradient_method, Method::Central);
assert_eq!(fd.jacobian_method, Method::Forward);
assert_eq!(fd.hessian_method, Method::Central);
}
#[cfg(feature = "problems")]
mod with_problems {
use super::*;
use crate::problems::rosenbrock::{Rosenbrock, rosenbrock_gradient};
use crate::problems::sphere::{Sphere, sphere_gradient};
#[test]
fn central_gradient_matches_sphere_analytic() {
let x = vec![-1.2, 1.0, 0.7, 0.4];
let g = FiniteDiff::new(Sphere::<Vec<f64>>::new())
.gradient(&x)
.unwrap();
let mut want = vec![0.0; x.len()];
sphere_gradient(&x, &mut want);
for i in 0..x.len() {
assert!(approx(g[i], want[i], 1e-8), "i={i} {} vs {}", g[i], want[i]);
}
}
#[test]
fn central_gradient_matches_rosenbrock_analytic() {
let x = vec![-1.2, 1.0, 0.7, 0.4];
let g = FiniteDiff::new(Rosenbrock::<Vec<f64>>::new())
.gradient(&x)
.unwrap();
let mut want = vec![0.0; x.len()];
rosenbrock_gradient(&x, &mut want);
for i in 0..x.len() {
assert!(approx(g[i], want[i], 1e-4), "i={i} {} vs {}", g[i], want[i]);
}
}
}
#[cfg(feature = "problems")]
#[test]
fn box_constrained_is_forwarded() {
use crate::core::constraint::BoxConstraints;
use crate::problems::rastrigin::RastriginBoxed;
fn _assert_constrained_and_differentiable<T: BoxConstraints + Gradient>() {}
_assert_constrained_and_differentiable::<FiniteDiff<RastriginBoxed<Vec<f64>>>>();
let lower = vec![-5.12, -5.12];
let upper = vec![5.12, 5.12];
let p = RastriginBoxed::new(lower.clone(), upper.clone());
let wrapped = FiniteDiff::new(p);
assert_eq!(wrapped.lower(), &lower);
assert_eq!(wrapped.upper(), &upper);
}
#[cfg(all(feature = "problems", feature = "nalgebra"))]
mod nalgebra_matrix {
use super::*;
use crate::problems::rosenbrock::{Rosenbrock, RosenbrockResiduals};
use crate::problems::sphere::Sphere;
use nalgebra::DVector;
#[test]
fn forward_jacobian_matches_analytic() {
let x = DVector::from_vec(vec![-1.2, 1.0]);
let analytic = RosenbrockResiduals::<DVector<f64>>::new()
.jacobian(&x)
.unwrap();
let fd = FiniteDiff::new(RosenbrockResiduals::<DVector<f64>>::new())
.jacobian(&x)
.unwrap();
assert_eq!(fd.shape(), (2, 2));
for i in 0..2 {
for j in 0..2 {
assert!(
approx(fd[(i, j)], analytic[(i, j)], 1e-5),
"({i},{j}) {} vs {}",
fd[(i, j)],
analytic[(i, j)]
);
}
}
}
#[test]
fn central_jacobian_is_tighter_than_forward() {
let x = DVector::from_vec(vec![-1.2, 1.0]);
let analytic = RosenbrockResiduals::<DVector<f64>>::new()
.jacobian(&x)
.unwrap();
let central = FiniteDiff::new(RosenbrockResiduals::<DVector<f64>>::new())
.jacobian_method(Method::Central)
.jacobian(&x)
.unwrap();
for i in 0..2 {
for j in 0..2 {
assert!(approx(central[(i, j)], analytic[(i, j)], 1e-7));
}
}
}
#[test]
fn hessian_of_sphere_is_two_times_identity() {
let x = DVector::from_vec(vec![0.5, -1.5, 2.0]);
let h = FiniteDiff::new(Sphere::<DVector<f64>>::new())
.hessian(&x)
.unwrap();
assert_eq!(h.shape(), (3, 3));
for i in 0..3 {
for j in 0..3 {
let want = if i == j { 2.0 } else { 0.0 };
assert!(approx(h[(i, j)], want, 1e-3), "({i},{j}) {}", h[(i, j)]);
}
}
}
#[test]
fn hessian_matches_rosenbrock_analytic_2d() {
let x = DVector::from_vec(vec![-1.2, 1.0]);
let h = FiniteDiff::new(Rosenbrock::<DVector<f64>>::new())
.hessian(&x)
.unwrap();
let (x0, x1) = (x[0], x[1]);
let want = [
[2.0 - 400.0 * (x1 - 3.0 * x0 * x0), -400.0 * x0],
[-400.0 * x0, 200.0],
];
for i in 0..2 {
for j in 0..2 {
let rel = (h[(i, j)] - want[i][j]).abs() / want[i][j].abs().max(1.0);
assert!(rel < 1e-3, "({i},{j}) {} vs {}", h[(i, j)], want[i][j]);
assert!(approx(h[(i, j)], h[(j, i)], 1e-9));
}
}
}
}
#[cfg(all(feature = "problems", feature = "faer"))]
mod faer_matrix {
use super::*;
use crate::problems::rosenbrock::RosenbrockResiduals;
use crate::problems::sphere::Sphere;
use faer::Col;
#[test]
fn forward_jacobian_matches_analytic() {
let x = Col::<f64>::from_fn(2, |i| [-1.2, 1.0][i]);
let analytic = RosenbrockResiduals::<Col<f64>>::new().jacobian(&x).unwrap();
let fd = FiniteDiff::new(RosenbrockResiduals::<Col<f64>>::new())
.jacobian(&x)
.unwrap();
assert_eq!((fd.nrows(), fd.ncols()), (2, 2));
for i in 0..2 {
for j in 0..2 {
assert!(
approx(fd[(i, j)], analytic[(i, j)], 1e-5),
"({i},{j}) {} vs {}",
fd[(i, j)],
analytic[(i, j)]
);
}
}
}
#[test]
fn hessian_of_sphere_is_two_times_identity() {
let x = Col::<f64>::from_fn(3, |i| [0.5, -1.5, 2.0][i]);
let h = FiniteDiff::new(Sphere::<Col<f64>>::new())
.hessian(&x)
.unwrap();
assert_eq!((h.nrows(), h.ncols()), (3, 3));
for i in 0..3 {
for j in 0..3 {
let want = if i == j { 2.0 } else { 0.0 };
assert!(approx(h[(i, j)], want, 1e-3), "({i},{j}) {}", h[(i, j)]);
}
}
}
}
}