use ndarray::{Array1, ArrayView1};
use crate::geometry::manifold::{GeometryResult, RiemannianManifold, check_len, quad_form};
const STEIHAUG_CG_FORCING_FACTOR: f64 = 1.0e-2;
pub trait RiemannianObjective {
fn value_gradient(&mut self, point: ArrayView1<'_, f64>) -> GeometryResult<(f64, Array1<f64>)>;
fn hessian_vector_product(
&mut self,
point: ArrayView1<'_, f64>,
tangent: ArrayView1<'_, f64>,
) -> GeometryResult<Option<Array1<f64>>> {
check_len("hessian_vector_product tangent", tangent.len(), point.len())?;
Ok(None)
}
}
fn g_inner(
manifold: &dyn RiemannianManifold,
point: ArrayView1<'_, f64>,
a: ArrayView1<'_, f64>,
b: ArrayView1<'_, f64>,
) -> GeometryResult<f64> {
let g = manifold.metric_tensor(point)?;
Ok(quad_form(g.view(), a, b))
}
fn g_norm(
manifold: &dyn RiemannianManifold,
point: ArrayView1<'_, f64>,
a: ArrayView1<'_, f64>,
) -> GeometryResult<f64> {
Ok(g_inner(manifold, point, a, a)?.max(0.0).sqrt())
}
fn relative_stationarity(grad_norm: f64, grad0_norm: f64) -> f64 {
if !grad_norm.is_finite() || !grad0_norm.is_finite() {
return f64::INFINITY;
}
grad_norm / grad0_norm.max(1.0)
}
#[derive(Debug, Clone, PartialEq)]
pub struct RiemannianTrustRegion {
pub radius: f64,
pub max_radius: f64,
pub max_iter: usize,
pub grad_tol: f64,
}
impl Default for RiemannianTrustRegion {
fn default() -> Self {
Self {
radius: 1.0,
max_radius: 1.0e6,
max_iter: 64,
grad_tol: 1.0e-8,
}
}
}
impl RiemannianTrustRegion {
pub fn minimize(
&self,
manifold: &dyn RiemannianManifold,
objective: &mut dyn RiemannianObjective,
initial: ArrayView1<'_, f64>,
) -> GeometryResult<Array1<f64>> {
const ETA1: f64 = 0.1; const ETA_SHRINK: f64 = 0.25; const ETA_EXPAND: f64 = 0.75; const SHRINK: f64 = 0.25;
const EXPAND: f64 = 2.0;
let mut x = initial.to_owned();
let d = manifold.ambient_dim();
check_len("trust-region initial point", x.len(), d)?;
let mut delta = self.radius.min(self.max_radius);
let mut grad0_norm: Option<f64> = None;
for _ in 0..self.max_iter {
let (f_curr, grad_e) = objective.value_gradient(x.view())?;
let grad = manifold.riemannian_gradient(x.view(), grad_e.view())?;
let grad_norm = g_norm(manifold, x.view(), grad.view())?;
let grad0 = *grad0_norm.get_or_insert(grad_norm);
if relative_stationarity(grad_norm, grad0) <= self.grad_tol {
break;
}
let (step, predicted_reduction, hit_boundary) =
self.solve_subproblem(manifold, objective, x.view(), grad.view(), delta)?;
if !(predicted_reduction > 0.0) {
delta *= SHRINK;
if delta <= self.grad_tol * self.grad_tol {
break;
}
continue;
}
let trial_x = manifold.retract(x.view(), step.view())?;
let f_trial = objective.value_gradient(trial_x.view())?.0;
let actual_reduction = f_curr - f_trial;
let rho = if f_trial.is_finite() {
actual_reduction / predicted_reduction
} else {
f64::NEG_INFINITY
};
if rho < ETA_SHRINK {
delta *= SHRINK;
} else if rho > ETA_EXPAND && hit_boundary {
delta = (delta * EXPAND).min(self.max_radius);
}
if rho > ETA1 && f_trial.is_finite() {
x = trial_x;
}
}
Ok(x)
}
fn solve_subproblem(
&self,
manifold: &dyn RiemannianManifold,
objective: &mut dyn RiemannianObjective,
x: ArrayView1<'_, f64>,
grad: ArrayView1<'_, f64>,
delta: f64,
) -> GeometryResult<(Array1<f64>, f64, bool)> {
const BOUNDARY_FRAC: f64 = 0.9;
let has_hessian = objective.hessian_vector_product(x, grad)?.is_some();
if !has_hessian {
return self.cauchy_point(manifold, x, grad, delta);
}
let n = grad.len();
let mut z = Array1::<f64>::zeros(n); let mut r = grad.to_owned(); let mut p = -&r; let r0_norm = g_norm(manifold, x, r.view())?;
let tol = (STEIHAUG_CG_FORCING_FACTOR * r0_norm).min(r0_norm * r0_norm);
let max_cg = 2 * n + 1;
for _ in 0..max_cg {
let hp = objective.hessian_vector_product(x, p.view())?.ok_or(
crate::geometry::manifold::GeometryError::Unsupported(
"Hessian–vector product became unavailable mid-subproblem",
),
)?;
let php = g_inner(manifold, x, p.view(), hp.view())?;
if php <= 0.0 {
let (tau, _) = boundary_tau(manifold, x, z.view(), p.view(), delta)?;
let eta = &z + &(&p * tau);
let red = model_reduction(manifold, objective, x, grad, eta.view())?;
return Ok((eta, red, true));
}
let rr = g_inner(manifold, x, r.view(), r.view())?;
let alpha = rr / php;
let z_next = &z + &(&p * alpha);
if g_norm(manifold, x, z_next.view())? >= delta {
let (tau, _) = boundary_tau(manifold, x, z.view(), p.view(), delta)?;
let eta = &z + &(&p * tau);
let red = model_reduction(manifold, objective, x, grad, eta.view())?;
return Ok((eta, red, true));
}
z = z_next;
let r_next = &r + &(&hp * alpha);
let r_next_norm = g_norm(manifold, x, r_next.view())?;
if r_next_norm <= tol {
let red = model_reduction(manifold, objective, x, grad, z.view())?;
let hit = g_norm(manifold, x, z.view())? >= BOUNDARY_FRAC * delta;
return Ok((z, red, hit));
}
let rr_next = g_inner(manifold, x, r_next.view(), r_next.view())?;
let beta = rr_next / rr;
p = &(-&r_next) + &(&p * beta);
r = r_next;
}
let red = model_reduction(manifold, objective, x, grad, z.view())?;
let hit = g_norm(manifold, x, z.view())? >= BOUNDARY_FRAC * delta;
Ok((z, red, hit))
}
fn cauchy_point(
&self,
manifold: &dyn RiemannianManifold,
x: ArrayView1<'_, f64>,
grad: ArrayView1<'_, f64>,
delta: f64,
) -> GeometryResult<(Array1<f64>, f64, bool)> {
let grad_norm = g_norm(manifold, x, grad.view())?;
if grad_norm <= 0.0 {
return Ok((Array1::<f64>::zeros(grad.len()), 0.0, false));
}
let tau = delta / grad_norm;
let step = &grad.to_owned() * (-tau);
let predicted = tau * grad_norm * grad_norm;
Ok((step, predicted, true))
}
}
fn boundary_tau(
manifold: &dyn RiemannianManifold,
x: ArrayView1<'_, f64>,
z: ArrayView1<'_, f64>,
p: ArrayView1<'_, f64>,
delta: f64,
) -> GeometryResult<(f64, f64)> {
let pp = g_inner(manifold, x, p, p)?;
let zp = g_inner(manifold, x, z, p)?;
let zz = g_inner(manifold, x, z, z)?;
if pp <= 0.0 {
return Ok((0.0, zz.max(0.0).sqrt()));
}
let c = zz - delta * delta;
let disc = (zp * zp - pp * c).max(0.0);
let tau = (-zp + disc.sqrt()) / pp;
let tau = tau.max(0.0);
Ok((tau, delta))
}
fn model_reduction(
manifold: &dyn RiemannianManifold,
objective: &mut dyn RiemannianObjective,
x: ArrayView1<'_, f64>,
grad: ArrayView1<'_, f64>,
eta: ArrayView1<'_, f64>,
) -> GeometryResult<f64> {
let lin = g_inner(manifold, x, grad, eta)?;
let heta = objective.hessian_vector_product(x, eta)?.ok_or(
crate::geometry::manifold::GeometryError::Unsupported(
"Hessian–vector product unavailable while scoring the model",
),
)?;
let quad = g_inner(manifold, x, eta, heta.view())?;
Ok(-lin - 0.5 * quad)
}
#[derive(Debug, Clone, PartialEq)]
pub struct RiemannianLBFGS {
pub history: usize,
pub step_size: f64,
pub max_iter: usize,
pub grad_tol: f64,
}
impl Default for RiemannianLBFGS {
fn default() -> Self {
Self {
history: 10,
step_size: 1.0,
max_iter: 100,
grad_tol: 1.0e-8,
}
}
}
#[derive(Clone)]
struct SecantPair {
base: Array1<f64>,
s: Array1<f64>,
y: Array1<f64>,
}
impl RiemannianLBFGS {
pub fn minimize(
&self,
manifold: &dyn RiemannianManifold,
objective: &mut dyn RiemannianObjective,
initial: ArrayView1<'_, f64>,
) -> GeometryResult<Array1<f64>> {
let mut x = initial.to_owned();
let d = manifold.ambient_dim();
check_len("L-BFGS initial point", x.len(), d)?;
let mut history: Vec<SecantPair> = Vec::new();
let (mut f_curr, grad_e0) = objective.value_gradient(x.view())?;
let mut grad = manifold.riemannian_gradient(x.view(), grad_e0.view())?;
let grad0_norm = g_norm(manifold, x.view(), grad.view())?;
let armijo_c: f64 = 1.0e-4;
let alpha_min: f64 = 1.0e-16;
let alpha_max: f64 = 1.0e16;
let initial_step = if self.step_size.is_finite() && self.step_size > 0.0 {
self.step_size
} else {
1.0
};
for _ in 0..self.max_iter {
let grad_norm = g_norm(manifold, x.view(), grad.view())?;
if relative_stationarity(grad_norm, grad0_norm) <= self.grad_tol {
break;
}
let direction = two_loop(manifold, x.view(), grad.view(), &history)?;
let direction = -&direction;
let slope = g_inner(manifold, x.view(), grad.view(), direction.view())?;
let (direction, slope) = if slope < 0.0 {
(direction, slope)
} else {
let sd = -&grad;
let s_sd = g_inner(manifold, x.view(), grad.view(), sd.view())?;
(sd, s_sd)
};
let old_x = x.clone();
let old_grad = grad.clone();
let mut alpha = initial_step;
let mut best_alpha = 0.0;
let mut best_f = f_curr;
let mut best_x = x.clone();
let mut best_grad = grad.clone();
loop {
let step = &direction * alpha;
let trial_x = manifold.retract(x.view(), step.view())?;
let (f_trial, g_trial_e) = objective.value_gradient(trial_x.view())?;
let armijo_rhs = f_curr + armijo_c * alpha * slope;
let accepted = f_trial.is_finite() && f_trial <= armijo_rhs;
if accepted && f_trial < best_f {
best_alpha = alpha;
best_f = f_trial;
best_x = trial_x;
best_grad = manifold.riemannian_gradient(best_x.view(), g_trial_e.view())?;
if alpha >= alpha_max {
break;
}
alpha *= 2.0;
} else {
break;
}
}
if best_alpha == 0.0 {
alpha = initial_step;
while alpha > alpha_min {
let step = &direction * alpha;
let trial_x = manifold.retract(x.view(), step.view())?;
let (f_trial, g_trial_e) = objective.value_gradient(trial_x.view())?;
let armijo_rhs = f_curr + armijo_c * alpha * slope;
if f_trial.is_finite() && f_trial <= armijo_rhs {
best_alpha = alpha;
best_f = f_trial;
best_x = trial_x;
best_grad =
manifold.riemannian_gradient(best_x.view(), g_trial_e.view())?;
break;
}
alpha *= 0.5;
}
}
if best_alpha == 0.0 {
break;
}
let eta = &direction * best_alpha;
x = best_x;
f_curr = best_f;
grad = best_grad;
let path = transport_path(&old_x, &x);
let s = manifold.parallel_transport(path.view(), eta.view())?;
let transported_old_grad = manifold.parallel_transport(path.view(), old_grad.view())?;
let y = &grad - &transported_old_grad;
let sy = g_inner(manifold, x.view(), s.view(), y.view())?;
if sy > 1.0e-14 {
history.push(SecantPair {
base: x.clone(),
s,
y,
});
if history.len() > self.history {
history.remove(0);
}
}
}
Ok(x)
}
}
fn transport_path(p_from: &Array1<f64>, p_to: &Array1<f64>) -> ndarray::Array2<f64> {
let d = p_from.len();
let mut path = ndarray::Array2::<f64>::zeros((2, d));
path.row_mut(0).assign(p_from);
path.row_mut(1).assign(p_to);
path
}
fn two_loop(
manifold: &dyn RiemannianManifold,
x: ArrayView1<'_, f64>,
grad: ArrayView1<'_, f64>,
history: &[SecantPair],
) -> GeometryResult<Array1<f64>> {
let mut s_cur: Vec<Array1<f64>> = Vec::with_capacity(history.len());
let mut y_cur: Vec<Array1<f64>> = Vec::with_capacity(history.len());
for pair in history {
let path = transport_path(&pair.base, &x.to_owned());
s_cur.push(manifold.parallel_transport(path.view(), pair.s.view())?);
y_cur.push(manifold.parallel_transport(path.view(), pair.y.view())?);
}
let mut q = grad.to_owned();
let mut alpha = vec![0.0; history.len()];
let mut rho = vec![0.0; history.len()];
for i in (0..history.len()).rev() {
let sy = g_inner(manifold, x, s_cur[i].view(), y_cur[i].view())?;
rho[i] = 1.0 / sy;
alpha[i] = rho[i] * g_inner(manifold, x, s_cur[i].view(), q.view())?;
q = &q - &(&y_cur[i] * alpha[i]);
}
let mut r = q;
if let (Some(s), Some(y)) = (s_cur.last(), y_cur.last()) {
let yy = g_inner(manifold, x, y.view(), y.view())?;
if yy > 1.0e-14 {
let sy = g_inner(manifold, x, s.view(), y.view())?;
r = &r * (sy / yy);
}
}
for i in 0..history.len() {
let beta = rho[i] * g_inner(manifold, x, y_cur[i].view(), r.view())?;
r = &r + &(&s_cur[i] * (alpha[i] - beta));
}
Ok(r)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::geometry::EuclideanManifold;
use ndarray::{Array1, ArrayView1};
struct Square;
impl RiemannianObjective for Square {
fn value_gradient(
&mut self,
point: ArrayView1<'_, f64>,
) -> GeometryResult<(f64, Array1<f64>)> {
let x = point[0];
Ok((x * x, Array1::from_vec(vec![2.0 * x])))
}
fn hessian_vector_product(
&mut self,
point: ArrayView1<'_, f64>,
tangent: ArrayView1<'_, f64>,
) -> GeometryResult<Option<Array1<f64>>> {
assert!(point.iter().all(|value| value.is_finite()));
check_len("hessian_vector_product tangent", tangent.len(), point.len())?;
Ok(Some(&tangent.to_owned() * 2.0))
}
}
struct SquareGradOnly;
impl RiemannianObjective for SquareGradOnly {
fn value_gradient(
&mut self,
point: ArrayView1<'_, f64>,
) -> GeometryResult<(f64, Array1<f64>)> {
let x = point[0];
Ok((x * x, Array1::from_vec(vec![2.0 * x])))
}
}
struct Quadratic {
a: ndarray::Array2<f64>,
b: Array1<f64>,
}
impl RiemannianObjective for Quadratic {
fn value_gradient(
&mut self,
point: ArrayView1<'_, f64>,
) -> GeometryResult<(f64, Array1<f64>)> {
let ax = self.a.dot(&point.to_owned());
let val = 0.5 * point.dot(&ax) - self.b.dot(&point.to_owned());
let grad = &ax - &self.b;
Ok((val, grad))
}
fn hessian_vector_product(
&mut self,
point: ArrayView1<'_, f64>,
tangent: ArrayView1<'_, f64>,
) -> GeometryResult<Option<Array1<f64>>> {
assert!(point.iter().all(|value| value.is_finite()));
check_len("hessian_vector_product tangent", tangent.len(), point.len())?;
Ok(Some(self.a.dot(&tangent.to_owned())))
}
}
#[test]
fn trust_region_converges_on_square_steihaug() {
let manifold = EuclideanManifold::new(1);
let tr = RiemannianTrustRegion {
radius: 1.0,
max_radius: 1.0e6,
max_iter: 100,
grad_tol: 1.0e-12,
};
let mut obj = Square;
let x0 = Array1::from_vec(vec![0.1]);
let x = tr
.minimize(&manifold, &mut obj, x0.view())
.expect("TR runs");
assert!(
x[0].abs() < 1.0e-6,
"trust region must converge to 0, got {}",
x[0]
);
}
#[test]
fn trust_region_never_increases_objective() {
let manifold = EuclideanManifold::new(1);
let tr = RiemannianTrustRegion {
radius: 1.0,
max_radius: 1.0e6,
max_iter: 1,
grad_tol: 1.0e-12,
};
let mut obj = Square;
let x0 = Array1::from_vec(vec![0.1]);
let f0 = obj.value_gradient(x0.view()).unwrap().0;
let x1 = tr
.minimize(&manifold, &mut obj, x0.view())
.expect("TR runs");
let f1 = obj.value_gradient(x1.view()).unwrap().0;
assert!(f1 <= f0, "objective increased: {f0} -> {f1}");
assert!(x1[0].abs() <= x0[0].abs() + 1e-15, "moved away from min");
}
#[test]
fn trust_region_cauchy_point_converges() {
let manifold = EuclideanManifold::new(1);
let tr = RiemannianTrustRegion {
radius: 1.0,
max_radius: 1.0e6,
max_iter: 500,
grad_tol: 1.0e-12,
};
let mut obj = SquareGradOnly;
let x0 = Array1::from_vec(vec![0.1]);
let x = tr
.minimize(&manifold, &mut obj, x0.view())
.expect("TR runs");
assert!(
x[0].abs() < 1.0e-6,
"Cauchy-point trust region must converge to 0, got {}",
x[0]
);
}
#[test]
fn trust_region_solves_spd_quadratic() {
let manifold = EuclideanManifold::new(3);
let a = ndarray::array![[4.0, 1.0, 0.0], [1.0, 3.0, 1.0], [0.0, 1.0, 2.0],];
let b = Array1::from_vec(vec![1.0, 2.0, -1.0]);
let x_ref = crate::geometry::manifold::inverse(&a).unwrap().dot(&b);
let mut obj = Quadratic { a, b };
let tr = RiemannianTrustRegion {
radius: 1.0,
max_radius: 1.0e6,
max_iter: 200,
grad_tol: 1.0e-12,
};
let x0 = Array1::from_vec(vec![0.0, 0.0, 0.0]);
let x = tr
.minimize(&manifold, &mut obj, x0.view())
.expect("TR runs");
for i in 0..3 {
assert!(
(x[i] - x_ref[i]).abs() < 1.0e-6,
"component {i}: got {}, want {}",
x[i],
x_ref[i]
);
}
}
#[test]
fn lbfgs_reduces_euclidean_quadratic() {
let manifold = EuclideanManifold::new(3);
let a = ndarray::array![[5.0, 1.0, 0.5], [1.0, 4.0, 1.0], [0.5, 1.0, 3.0],];
let b = Array1::from_vec(vec![2.0, -1.0, 0.5]);
let x_ref = crate::geometry::manifold::inverse(&a).unwrap().dot(&b);
let mut obj = Quadratic { a, b };
let lbfgs = RiemannianLBFGS {
history: 10,
step_size: 1.0,
max_iter: 200,
grad_tol: 1.0e-10,
};
let x0 = Array1::from_vec(vec![0.0, 0.0, 0.0]);
let f0 = obj.value_gradient(x0.view()).unwrap().0;
let x = lbfgs
.minimize(&manifold, &mut obj, x0.view())
.expect("L-BFGS runs");
let f1 = obj.value_gradient(x.view()).unwrap().0;
assert!(f1 < f0, "L-BFGS did not reduce the quadratic: {f0} -> {f1}");
for i in 0..3 {
assert!(
(x[i] - x_ref[i]).abs() < 1.0e-6,
"component {i}: got {}, want {}",
x[i],
x_ref[i]
);
}
}
}