use crate::matrix::vector::Vector;
use crate::traits::FloatScalar;
use crate::Matrix;
use super::line_search::backtracking_armijo;
use super::{MinimizeResult, OptimError};
#[derive(Debug, Clone, Copy)]
pub struct BfgsSettings<T> {
pub grad_tol: T,
pub f_tol: T,
pub x_tol: T,
pub max_iter: usize,
pub armijo_c1: T,
pub armijo_rho: T,
pub max_ls_iter: usize,
}
impl Default for BfgsSettings<f64> {
fn default() -> Self {
Self {
grad_tol: 1e-8,
f_tol: 1e-12,
x_tol: 1e-12,
max_iter: 200,
armijo_c1: 1e-4,
armijo_rho: 0.5,
max_ls_iter: 40,
}
}
}
impl Default for BfgsSettings<f32> {
fn default() -> Self {
Self {
grad_tol: 1e-4,
f_tol: 1e-6,
x_tol: 1e-6,
max_iter: 200,
armijo_c1: 1e-4,
armijo_rho: 0.5,
max_ls_iter: 40,
}
}
}
pub fn minimize_bfgs<T: FloatScalar, const N: usize>(
mut f: impl FnMut(&Vector<T, N>) -> T,
mut grad: impl FnMut(&Vector<T, N>) -> Vector<T, N>,
x0: &Vector<T, N>,
settings: &BfgsSettings<T>,
) -> Result<MinimizeResult<T, N>, OptimError> {
let mut x = *x0;
let mut fx = f(&x);
let mut g = grad(&x);
let mut f_evals = 1usize;
let mut grad_evals = 1usize;
let mut h: Matrix<T, N, N> = Matrix::eye();
for iter in 0..settings.max_iter {
let g_norm = g.norm();
if g_norm < settings.grad_tol {
return Ok(MinimizeResult {
x,
fx,
grad_norm: g_norm,
iterations: iter,
f_evals,
grad_evals,
});
}
let p = -h * g;
let grad_dot_p = g.dot(&p);
let (p, h_reset) = if grad_dot_p >= T::zero() {
(-g, true)
} else {
(p, false)
};
let grad_dot_p = if h_reset { -g.dot(&g) } else { grad_dot_p };
let (alpha, f_new, ls_evals) = backtracking_armijo(
fx,
grad_dot_p,
&x,
&p,
&mut f,
settings.armijo_c1,
settings.armijo_rho,
settings.max_ls_iter,
)?;
f_evals += ls_evals;
let s = p * alpha; let x_new = x + s;
let g_new = grad(&x_new);
grad_evals += 1;
let y = g_new - g; let ys = y.dot(&s);
if s.norm() < settings.x_tol * (T::one() + x.norm()) {
return Ok(MinimizeResult {
x: x_new,
fx: f_new,
grad_norm: g_new.norm(),
iterations: iter + 1,
f_evals,
grad_evals,
});
}
if (fx - f_new).abs() < settings.f_tol * (T::one() + fx.abs()) {
return Ok(MinimizeResult {
x: x_new,
fx: f_new,
grad_norm: g_new.norm(),
iterations: iter + 1,
f_evals,
grad_evals,
});
}
if h_reset {
h = Matrix::eye();
}
if ys > T::epsilon() {
let rho = T::one() / ys;
let hy = h * y;
let yhy = y.dot(&hy);
let factor = (T::one() + rho * yhy) * rho;
for i in 0..N {
for j in 0..N {
h[(i, j)] = h[(i, j)] + factor * s[i] * s[j]
- rho * (hy[i] * s[j] + s[i] * hy[j]);
}
}
}
x = x_new;
fx = f_new;
g = g_new;
}
Err(OptimError::MaxIterations)
}