#![cfg(feature = "nalgebra")]
use basin::problems::Rosenbrock;
use basin::{
Backtracking, BasicState, CostFunction, Executor, Gradient, GradientDescent, GradientTolerance,
QuasiNewtonState, TerminationReason, BFGS,
};
use nalgebra::{DMatrix, DVector};
#[test]
fn bfgs_converges_on_rosenbrock() {
let problem = Rosenbrock::<DVector<f64>>::default();
let initial = DVector::from_vec(vec![-1.2, 1.0]);
let result = Executor::new(
problem,
BFGS::new(),
QuasiNewtonState::<DVector<f64>, DMatrix<f64>>::new(initial),
)
.max_iter(100)
.run();
assert!(
result.cost() < 1e-8,
"expected near-zero cost, got {}",
result.cost()
);
assert!(
(result.param()[0] - 1.0).abs() < 1e-4,
"x[0] = {}",
result.param()[0]
);
assert!(
(result.param()[1] - 1.0).abs() < 1e-4,
"x[1] = {}",
result.param()[1]
);
}
#[test]
fn bfgs_terminates_on_gradient_tolerance() {
let problem = Rosenbrock::<DVector<f64>>::default();
let initial = DVector::from_vec(vec![-1.2, 1.0]);
let result = Executor::new(
problem,
BFGS::new(),
QuasiNewtonState::<DVector<f64>, DMatrix<f64>>::new(initial),
)
.max_iter(200)
.terminate_on(GradientTolerance(1e-6))
.run();
assert_eq!(result.reason, TerminationReason::GradientTolerance);
assert!(result.cost() < 1e-10, "cost = {}", result.cost());
}
#[test]
fn bfgs_converges_faster_than_gd_with_backtracking() {
let initial = DVector::from_vec(vec![-1.2, 1.0]);
let bfgs_result = Executor::new(
Rosenbrock::<DVector<f64>>::default(),
BFGS::new(),
QuasiNewtonState::<DVector<f64>, DMatrix<f64>>::new(initial.clone()),
)
.max_iter(500)
.terminate_on(GradientTolerance(1e-6))
.run();
let gd_result = Executor::new(
Rosenbrock::<DVector<f64>>::default(),
GradientDescent::with_line_search(Backtracking::new()),
BasicState::new(initial),
)
.max_iter(500)
.terminate_on(GradientTolerance(1e-6))
.run();
assert!(
bfgs_result.cost() < gd_result.cost(),
"BFGS cost {} not better than GD cost {}",
bfgs_result.cost(),
gd_result.cost()
);
assert!(
bfgs_result.iter() < gd_result.iter(),
"BFGS iters {} not fewer than GD iters {}",
bfgs_result.iter(),
gd_result.iter()
);
}
struct Quadratic {
diag: Vec<f64>,
}
impl CostFunction for Quadratic {
type Param = DVector<f64>;
type Output = f64;
fn cost(&self, x: &DVector<f64>) -> f64 {
let mut c = 0.0;
for (i, xi) in x.iter().enumerate() {
c += 0.5 * self.diag[i] * xi * xi - xi;
}
c
}
}
impl Gradient for Quadratic {
type Param = DVector<f64>;
type Gradient = DVector<f64>;
fn gradient(&self, x: &DVector<f64>) -> DVector<f64> {
let mut g = DVector::zeros(x.len());
for (i, xi) in x.iter().enumerate() {
g[i] = self.diag[i] * xi - 1.0;
}
g
}
}
#[test]
fn bfgs_on_5d_quadratic_converges_quickly() {
let problem = Quadratic {
diag: vec![1.0, 2.0, 3.0, 4.0, 5.0],
};
let initial = DVector::from_element(5, 0.0);
let result = Executor::new(
problem,
BFGS::new(),
QuasiNewtonState::<DVector<f64>, DMatrix<f64>>::new(initial),
)
.max_iter(50)
.terminate_on(GradientTolerance(1e-8))
.run();
assert_eq!(result.reason, TerminationReason::GradientTolerance);
let expected_cost = -0.5 * (1.0 + 0.5 + 1.0 / 3.0 + 0.25 + 0.2);
assert!(
(result.cost() - expected_cost).abs() < 1e-10,
"cost = {}, expected {}",
result.cost(),
expected_cost
);
assert!(
result.iter() <= 15,
"expected convergence in ≤ 15 iters, got {}",
result.iter()
);
}
#[test]
fn bfgs_terminates_via_converged_when_at_machine_precision() {
let problem = Quadratic {
diag: vec![1.0, 2.0, 3.0, 4.0, 5.0],
};
let initial = DVector::from_element(5, 0.0);
let result = Executor::new(
problem,
BFGS::new(),
QuasiNewtonState::<DVector<f64>, DMatrix<f64>>::new(initial),
)
.max_iter(200)
.terminate_on(GradientTolerance(1e-30))
.run();
assert_eq!(result.reason, TerminationReason::SolverConverged);
let expected_cost = -0.5 * (1.0 + 0.5 + 1.0 / 3.0 + 0.25 + 0.2);
assert!(
(result.cost() - expected_cost).abs() < 1e-10,
"cost = {}, expected {}",
result.cost(),
expected_cost
);
}