use nalgebra::{DMatrix, DVector};
use crate::core::problem::{CostFunction, Gradient};
use crate::core::solver::Solver;
use crate::core::state::QuasiNewtonState;
use crate::core::termination::TerminationReason;
use crate::line_search::{LineSearch, Wolfe};
pub struct BFGS<S = Wolfe> {
line_search: S,
epsilon: f64,
}
impl Default for BFGS<Wolfe> {
fn default() -> Self {
Self::new()
}
}
impl BFGS<Wolfe> {
pub fn new() -> Self {
Self {
line_search: Wolfe::new(),
epsilon: 1e-10,
}
}
}
impl<S> BFGS<S> {
pub fn with_line_search(line_search: S) -> Self {
Self {
line_search,
epsilon: 1e-10,
}
}
pub fn epsilon(mut self, epsilon: f64) -> Self {
assert!(epsilon >= 0.0, "epsilon must be ≥ 0");
self.epsilon = epsilon;
self
}
}
impl<P, S> Solver<P, QuasiNewtonState<DVector<f64>, DMatrix<f64>>> for BFGS<S>
where
P: CostFunction<Param = DVector<f64>, Output = f64>
+ Gradient<Param = DVector<f64>, Gradient = DVector<f64>>,
S: LineSearch<P, DVector<f64>>,
{
fn init(
&mut self,
problem: &P,
mut state: QuasiNewtonState<DVector<f64>, DMatrix<f64>>,
) -> QuasiNewtonState<DVector<f64>, DMatrix<f64>> {
state.cost = Some(problem.cost(&state.param));
state.gradient = Some(problem.gradient(&state.param));
state.cost_evals += 1;
state.gradient_evals += 1;
state
}
fn next_iter(
&mut self,
problem: &P,
mut state: QuasiNewtonState<DVector<f64>, DMatrix<f64>>,
) -> (
QuasiNewtonState<DVector<f64>, DMatrix<f64>>,
Option<TerminationReason>,
) {
let g = state
.gradient
.take()
.expect("gradient not set: Solver::init must run before next_iter");
let cost_old = state
.cost
.expect("cost not set: Solver::init must run before next_iter");
let direction = -(&state.inverse_hessian * &g);
let step = self
.line_search
.next(problem, &state.param, cost_old, &g, &direction);
state.cost_evals += step.cost_evals;
state.gradient_evals += step.gradient_evals;
if !(step.alpha.is_finite() && step.alpha > 0.0) {
state.gradient = Some(g);
state.cost = Some(cost_old);
return (state, Some(TerminationReason::SolverConverged));
}
let s = step.alpha * &direction;
state.param += &s;
let g_new = problem.gradient(&state.param);
state.gradient_evals += 1;
let y = &g_new - &g;
let sy = s.dot(&y);
let s_norm = s.norm();
let y_norm = y.norm();
if sy > self.epsilon * s_norm * y_norm {
if !state.initial_scaling_done {
let yy = y.dot(&y);
if yy > 0.0 {
let scale = sy / yy;
let n = state.param.len();
state.inverse_hessian = DMatrix::identity(n, n) * scale;
}
state.initial_scaling_done = true;
}
let rho = 1.0 / sy;
let hy = &state.inverse_hessian * &y;
let yhy = y.dot(&hy);
let coef = rho * (1.0 + rho * yhy);
state.inverse_hessian.ger(coef, &s, &s, 1.0);
state.inverse_hessian.ger(-rho, &s, &hy, 1.0);
state.inverse_hessian.ger(-rho, &hy, &s, 1.0);
}
state.cost = Some(problem.cost(&state.param));
state.cost_evals += 1;
state.gradient = Some(g_new);
(state, None)
}
}