pub mod dogleg;
pub mod steihaug;
pub use dogleg::Dogleg;
pub use steihaug::Steihaug;
use crate::core::inner::InitialState;
use crate::core::math::{Dot, MatVec, NegInPlace, NormSquared, Scalar, ScaleInPlace, ScaledAdd};
use crate::core::problem::{CostFunction, Gradient, Hessian, Problem};
use crate::core::solver::Solver;
use crate::core::state::BasicState;
use crate::core::termination::TerminationReason;
pub(crate) struct Step<V, F> {
pub(crate) d: V,
pub(crate) predicted_reduction: F,
pub(crate) hit_boundary: bool,
}
pub(crate) trait Subproblem<V, M, F> {
fn solve(&self, gradient: &V, hessian: &M, radius: F) -> Step<V, F>;
}
pub(crate) fn model_decrease<V, M, F>(g: &V, b: &M, d: &V) -> F
where
F: Scalar,
V: Dot<F>,
M: MatVec<V>,
{
let bd = b.matvec(d);
let half = F::from_f64(0.5).unwrap();
-g.dot(d) - half * d.dot(&bd)
}
pub(crate) fn tau_to_boundary<V, F>(z: &V, d: &V, radius: F) -> F
where
F: Scalar,
V: Dot<F>,
{
let dd = d.dot(d);
let zd = z.dot(d);
let zz = z.dot(z);
let rr = radius * radius;
let disc = zd * zd - dd * (zz - rr);
let disc = if disc < F::zero() { F::zero() } else { disc };
(-zd + disc.sqrt()) / dd
}
#[derive(Debug, Clone, Copy, Default)]
pub struct CauchyPoint;
impl<V, M, F> Subproblem<V, M, F> for CauchyPoint
where
F: Scalar,
V: Clone + Dot<F> + NormSquared<F> + ScaleInPlace<F> + NegInPlace,
M: MatVec<V>,
{
fn solve(&self, g: &V, b: &M, radius: F) -> Step<V, F> {
let g_norm = g.norm_squared().sqrt();
if g_norm <= F::zero() {
let mut d = g.clone();
d.scale_in_place(F::zero());
return Step {
d,
predicted_reduction: F::zero(),
hit_boundary: false,
};
}
let bg = b.matvec(g);
let gbg = g.dot(&bg);
let tau = if gbg <= F::zero() {
F::one()
} else {
let t = g_norm * g_norm * g_norm / (radius * gbg);
if t < F::one() { t } else { F::one() }
};
let mut d = g.clone();
d.scale_in_place(-(tau * radius / g_norm));
let predicted_reduction = model_decrease(g, b, &d);
Step {
d,
predicted_reduction,
hit_boundary: tau >= F::one(),
}
}
}
pub struct TrustRegion<Sub = Steihaug, F = f64> {
subproblem: Sub,
radius: F,
initial_radius: F,
max_radius: F,
eta: F,
max_inner: u32,
}
impl Default for TrustRegion<Steihaug> {
fn default() -> Self {
Self::new()
}
}
impl TrustRegion<Steihaug> {
pub fn new() -> Self {
Self::with_subproblem(Steihaug::new())
}
}
impl<Sub, F: Scalar> TrustRegion<Sub, F> {
pub fn with_subproblem(subproblem: Sub) -> Self {
Self {
subproblem,
radius: F::one(),
initial_radius: F::one(),
max_radius: F::from_f64(100.0).unwrap(),
eta: F::from_f64(0.125).unwrap(),
max_inner: 10,
}
}
pub fn with_radius(mut self, radius: F) -> Self {
assert!(radius > F::zero(), "initial radius must be > 0");
self.initial_radius = radius;
self.radius = radius;
self
}
pub fn with_max_radius(mut self, max_radius: F) -> Self {
assert!(max_radius > F::zero(), "max radius must be > 0");
self.max_radius = max_radius;
self
}
pub fn with_eta(mut self, eta: F) -> Self {
assert!(
eta >= F::zero() && eta < F::from_f64(0.25).unwrap(),
"eta must be in [0, 1/4)"
);
self.eta = eta;
self
}
pub fn with_max_inner_attempts(mut self, n: u32) -> Self {
assert!(n >= 1, "max inner attempts must be ≥ 1");
self.max_inner = n;
self
}
}
impl<Sub, V, F> InitialState<V> for TrustRegion<Sub, F>
where
F: Scalar,
V: Clone,
{
type State = BasicState<V, F>;
fn seed(&self, x: &V) -> Self::State {
BasicState::new(x.clone())
}
}
impl<P, Sub, V, M, F> Solver<P, BasicState<V, F>> for TrustRegion<Sub, F>
where
F: Scalar,
P: CostFunction<Param = V, Output = F> + Gradient<Gradient = V> + Hessian<Hessian = M>,
V: Clone + ScaledAdd<F> + NormSquared<F>,
Sub: Subproblem<V, M, F>,
{
type Error = P::Error;
fn init(
&mut self,
problem: &mut Problem<P>,
mut state: BasicState<V, F>,
) -> Result<BasicState<V, F>, Self::Error> {
self.radius = self.initial_radius;
let (cost, grad) = problem.cost_and_gradient(&state.param)?;
state.cost = Some(cost);
state.gradient = Some(grad);
Ok(state)
}
fn next_iter(
&mut self,
problem: &mut Problem<P>,
mut state: BasicState<V, F>,
) -> Result<(BasicState<V, F>, Option<TerminationReason>), Self::Error> {
let g = state
.gradient
.take()
.expect("gradient not set: Solver::init must run before next_iter");
let cost_old = state
.cost
.expect("cost not set: Solver::init must run before next_iter");
let b = problem.hessian(&state.param)?;
let quarter = F::from_f64(0.25).unwrap();
let three_quarters = F::from_f64(0.75).unwrap();
let two = F::from_f64(2.0).unwrap();
for _ in 0..self.max_inner {
let step = self.subproblem.solve(&g, &b, self.radius);
if step.predicted_reduction <= F::zero() {
state.gradient = Some(g);
return Ok((state, Some(TerminationReason::SolverConverged)));
}
let mut trial = state.param.clone();
trial.scaled_add(F::one(), &step.d);
let cost_trial = problem.cost(&trial)?;
let rho = (cost_old - cost_trial) / step.predicted_reduction;
let step_norm = step.d.norm_squared().sqrt();
if rho < quarter || !rho.is_finite() {
self.radius = quarter * step_norm;
} else if rho > three_quarters && step.hit_boundary {
let grown = two * self.radius;
self.radius = if grown < self.max_radius {
grown
} else {
self.max_radius
};
}
if rho > self.eta {
state.param = trial;
state.cost = Some(cost_trial);
let g_new = problem.gradient(&state.param)?;
state.gradient = Some(g_new);
return Ok((state, None));
}
}
state.gradient = Some(g);
Ok((state, None))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{BasicState, Executor, GradientTolerance};
struct Quadratic;
impl CostFunction for Quadratic {
type Param = Vec<f64>;
type Output = f64;
type Error = std::convert::Infallible;
fn cost(&self, x: &Vec<f64>) -> Result<f64, Self::Error> {
Ok(0.5 * (x[0] * x[0] + 100.0 * x[1] * x[1]))
}
}
impl Gradient for Quadratic {
type Gradient = Vec<f64>;
fn gradient(&self, x: &Vec<f64>) -> Result<Vec<f64>, Self::Error> {
Ok(vec![x[0], 100.0 * x[1]])
}
}
impl Hessian for Quadratic {
type Hessian = crate::core::math::DenseMatrix<f64>;
fn hessian(&self, _x: &Vec<f64>) -> Result<Self::Hessian, Self::Error> {
Ok(crate::core::math::DenseMatrix::from_row_slice(
2,
2,
&[1.0, 0.0, 0.0, 100.0],
))
}
}
struct Rosenbrock;
impl CostFunction for Rosenbrock {
type Param = Vec<f64>;
type Output = f64;
type Error = std::convert::Infallible;
fn cost(&self, x: &Vec<f64>) -> Result<f64, Self::Error> {
Ok((1.0 - x[0]).powi(2) + 100.0 * (x[1] - x[0].powi(2)).powi(2))
}
}
impl Gradient for Rosenbrock {
type Gradient = Vec<f64>;
fn gradient(&self, x: &Vec<f64>) -> Result<Vec<f64>, Self::Error> {
Ok(vec![
-2.0 * (1.0 - x[0]) - 400.0 * x[0] * (x[1] - x[0].powi(2)),
200.0 * (x[1] - x[0].powi(2)),
])
}
}
impl Hessian for Rosenbrock {
type Hessian = crate::core::math::DenseMatrix<f64>;
fn hessian(&self, x: &Vec<f64>) -> Result<Self::Hessian, Self::Error> {
let h11 = 2.0 + 1200.0 * x[0] * x[0] - 400.0 * x[1];
let h12 = -400.0 * x[0];
Ok(crate::core::math::DenseMatrix::from_row_slice(
2,
2,
&[h11, h12, h12, 200.0],
))
}
}
#[test]
fn cauchy_point_minimizes_quadratic() {
let result = Executor::new(
Quadratic,
TrustRegion::with_subproblem(CauchyPoint),
BasicState::new(vec![5.0, 1.0]),
)
.max_iter(500)
.terminate_on(GradientTolerance(1e-8))
.run()
.unwrap();
assert!(result.cost() < 1e-8, "cost = {}", result.cost());
}
#[test]
fn steihaug_minimizes_quadratic() {
let result = Executor::new(
Quadratic,
TrustRegion::with_subproblem(Steihaug::new()),
BasicState::new(vec![5.0, 1.0]),
)
.max_iter(100)
.terminate_on(GradientTolerance(1e-10))
.run()
.unwrap();
assert!(result.cost() < 1e-16, "cost = {}", result.cost());
}
#[test]
fn dogleg_minimizes_quadratic() {
let result = Executor::new(
Quadratic,
TrustRegion::with_subproblem(Dogleg),
BasicState::new(vec![5.0, 1.0]),
)
.max_iter(100)
.terminate_on(GradientTolerance(1e-10))
.run()
.unwrap();
assert!(result.cost() < 1e-16, "cost = {}", result.cost());
}
#[test]
fn steihaug_minimizes_rosenbrock() {
let result = Executor::new(
Rosenbrock,
TrustRegion::new(),
BasicState::new(vec![-1.2, 1.0]),
)
.max_iter(200)
.terminate_on(GradientTolerance(1e-8))
.run()
.unwrap();
assert!(result.cost() < 1e-10, "cost = {}", result.cost());
}
#[test]
fn dogleg_minimizes_rosenbrock() {
let result = Executor::new(
Rosenbrock,
TrustRegion::with_subproblem(Dogleg),
BasicState::new(vec![-1.2, 1.0]),
)
.max_iter(500)
.terminate_on(GradientTolerance(1e-8))
.run()
.unwrap();
assert!(result.cost() < 1e-10, "cost = {}", result.cost());
}
}