#[cfg(feature = "nalgebra")]
use crate::core::inner::WarmStart;
use crate::core::math::{
Dot, GeneralRankOneUpdate, MatVec, MatrixIdentity, NegInPlace, NormSquared, Scalar,
ScaleInPlace, ScaledAdd, VectorLen,
};
use crate::core::problem::{CostFunction, Gradient, Problem};
use crate::core::solver::Solver;
use crate::core::state::QuasiNewtonState;
use crate::core::termination::TerminationReason;
use crate::line_search::{LineSearch, Wolfe};
pub struct Bfgs<S = Wolfe, F = f64> {
line_search: S,
epsilon: F,
}
impl Default for Bfgs<Wolfe> {
fn default() -> Self {
Self::new()
}
}
impl Bfgs<Wolfe> {
pub fn new() -> Self {
Self {
line_search: Wolfe::new(),
epsilon: 1e-10,
}
}
}
impl<S, F: Scalar> Bfgs<S, F> {
pub fn with_line_search(line_search: S) -> Self {
Self {
line_search,
epsilon: F::from_f64(1e-10).unwrap(),
}
}
pub fn with_epsilon(mut self, epsilon: F) -> Self {
assert!(epsilon >= F::zero(), "epsilon must be ≥ 0");
self.epsilon = epsilon;
self
}
}
impl<P, S, V, M, F> Solver<P, QuasiNewtonState<V, M, F>> for Bfgs<S, F>
where
F: Scalar,
P: CostFunction<Param = V, Output = F> + Gradient<Gradient = V>,
S: LineSearch<P, V, F, Error = P::Error>,
V: Clone + Dot<F> + NormSquared<F> + ScaledAdd<F> + ScaleInPlace<F> + NegInPlace + VectorLen,
M: MatVec<V> + MatrixIdentity + ScaleInPlace<F> + GeneralRankOneUpdate<V, F>,
{
type Error = P::Error;
fn init(
&mut self,
problem: &mut Problem<P>,
mut state: QuasiNewtonState<V, M, F>,
) -> Result<QuasiNewtonState<V, M, F>, Self::Error> {
let (cost, grad) = problem.cost_and_gradient(&state.param)?;
state.cost = Some(cost);
state.gradient = Some(grad);
Ok(state)
}
fn next_iter(
&mut self,
problem: &mut Problem<P>,
mut state: QuasiNewtonState<V, M, F>,
) -> Result<(QuasiNewtonState<V, M, F>, Option<TerminationReason>), Self::Error> {
let g = state
.gradient
.take()
.expect("gradient not set: Solver::init must run before next_iter");
let cost_old = state
.cost
.expect("cost not set: Solver::init must run before next_iter");
let mut direction = state.inverse_hessian.matvec(&g);
direction.neg_in_place();
let alpha = self
.line_search
.next(problem, &state.param, cost_old, &g, &direction)?;
if !(alpha.is_finite() && alpha > F::zero()) {
state.gradient = Some(g);
state.cost = Some(cost_old);
return Ok((state, Some(TerminationReason::SolverConverged)));
}
let mut s = direction;
s.scale_in_place(alpha);
state.param.scaled_add(F::one(), &s);
let (cost_new, g_new) = problem.cost_and_gradient(&state.param)?;
let mut y = g_new.clone();
y.scaled_add(-F::one(), &g);
let sy = s.dot(&y);
let s_norm = s.norm_squared().sqrt();
let y_norm = y.norm_squared().sqrt();
if sy > self.epsilon * s_norm * y_norm {
if !state.initial_scaling_done {
let yy = y.dot(&y);
if yy > F::zero() {
let scale = sy / yy;
let n = state.param.vec_len();
let mut h0 = M::identity(n);
h0.scale_in_place(scale);
state.inverse_hessian = h0;
}
state.initial_scaling_done = true;
}
let rho = F::one() / sy;
let hy = state.inverse_hessian.matvec(&y);
let yhy = y.dot(&hy);
let coef = rho * (F::one() + rho * yhy);
state.inverse_hessian.general_rank_one_update(coef, &s, &s);
state.inverse_hessian.general_rank_one_update(-rho, &s, &hy);
state.inverse_hessian.general_rank_one_update(-rho, &hy, &s);
}
state.cost = Some(cost_new);
state.gradient = Some(g_new);
Ok((state, None))
}
}
#[cfg(feature = "nalgebra")]
impl<S, F> WarmStart<nalgebra::DVector<F>> for Bfgs<S, F>
where
F: Scalar + nalgebra::Scalar + num_traits::Zero,
{
type State = QuasiNewtonState<nalgebra::DVector<F>, nalgebra::DMatrix<F>, F>;
fn seed(&self, x: &nalgebra::DVector<F>) -> Self::State {
QuasiNewtonState::<nalgebra::DVector<F>, nalgebra::DMatrix<F>, F>::new(x.clone())
}
}