use crate::error::OptimizeError;
use crate::unconstrained::result::OptimizeResult;
use crate::unconstrained::utils::finite_difference_gradient;
use crate::unconstrained::Bounds;
use scirs2_core::ndarray::{Array1, ArrayView1};
pub trait HVPFunction: Send + Sync {
fn hvp(&self, x: &[f64], v: &[f64]) -> Result<Vec<f64>, OptimizeError>;
}
pub struct FiniteDiffHVP<F>
where
F: Fn(&ArrayView1<f64>) -> f64 + Send + Sync,
{
pub fun: F,
pub step: f64,
}
impl<F> FiniteDiffHVP<F>
where
F: Fn(&ArrayView1<f64>) -> f64 + Send + Sync,
{
pub fn new(fun: F, step: f64) -> Self {
Self { fun, step }
}
pub fn with_default_step(fun: F) -> Self {
Self {
fun,
step: f64::EPSILON.sqrt(),
}
}
}
impl<F> HVPFunction for FiniteDiffHVP<F>
where
F: Fn(&ArrayView1<f64>) -> f64 + Send + Sync,
{
fn hvp(&self, x: &[f64], v: &[f64]) -> Result<Vec<f64>, OptimizeError> {
let n = x.len();
if v.len() != n {
return Err(OptimizeError::ValueError(
"x and v must have the same length".to_string(),
));
}
let v_norm = v.iter().map(|vi| vi * vi).sum::<f64>().sqrt();
if v_norm < 1e-300 {
return Ok(vec![0.0; n]);
}
let x_norm = x.iter().map(|xi| xi * xi).sum::<f64>().sqrt().max(1.0);
let outer_step = f64::EPSILON.cbrt(); let eps = outer_step * x_norm;
let mut x_plus = Array1::from(x.to_vec());
let mut x_minus = Array1::from(x.to_vec());
for i in 0..n {
x_plus[i] += eps * v[i] / v_norm;
x_minus[i] -= eps * v[i] / v_norm;
}
let grad_plus = finite_diff_grad_internal(&self.fun, &x_plus.view(), self.step)?;
let grad_minus = finite_diff_grad_internal(&self.fun, &x_minus.view(), self.step)?;
let hvp: Vec<f64> = grad_plus
.iter()
.zip(grad_minus.iter())
.map(|(gp, gm)| v_norm * (gp - gm) / (2.0 * eps))
.collect();
Ok(hvp)
}
}
fn finite_diff_grad_internal<F>(
fun: &F,
x: &ArrayView1<f64>,
step: f64,
) -> Result<Vec<f64>, OptimizeError>
where
F: Fn(&ArrayView1<f64>) -> f64,
{
let n = x.len();
let mut grad = vec![0.0; n];
let mut x_tmp = x.to_owned();
for i in 0..n {
let h = step * (1.0 + x[i].abs());
let orig = x_tmp[i];
x_tmp[i] = orig + h;
let fp = fun(&x_tmp.view());
x_tmp[i] = orig - h;
let fm = fun(&x_tmp.view());
x_tmp[i] = orig;
if !fp.is_finite() || !fm.is_finite() {
return Err(OptimizeError::ComputationError(
"Non-finite function value in HVP gradient computation".to_string(),
));
}
grad[i] = (fp - fm) / (2.0 * h);
}
Ok(grad)
}
#[derive(Debug, Clone)]
pub struct TruncatedCGOptions {
pub eta: f64,
pub max_iter: Option<usize>,
pub min_step_norm: f64,
pub use_negative_curvature_detection: bool,
pub trust_radius: f64,
}
impl Default for TruncatedCGOptions {
fn default() -> Self {
Self {
eta: 0.5,
max_iter: None,
min_step_norm: 1e-12,
use_negative_curvature_detection: true,
trust_radius: 1e10,
}
}
}
#[derive(Debug, Clone)]
pub struct TruncatedCGResult {
pub step: Array1<f64>,
pub residual_norm: f64,
pub num_hvp: usize,
pub converged: bool,
pub hit_negative_curvature: bool,
}
pub struct TruncatedCG {
pub options: TruncatedCGOptions,
}
impl TruncatedCG {
pub fn new(options: TruncatedCGOptions) -> Self {
Self { options }
}
pub fn default_solver() -> Self {
Self {
options: TruncatedCGOptions::default(),
}
}
pub fn solve<H: HVPFunction>(
&self,
x: &[f64],
g: &[f64],
hvp_fn: &H,
) -> Result<TruncatedCGResult, OptimizeError> {
let n = g.len();
let max_iter = self.options.max_iter.unwrap_or(n.max(1));
let g_norm = g.iter().map(|gi| gi * gi).sum::<f64>().sqrt();
if g_norm < 1e-300 {
return Ok(TruncatedCGResult {
step: Array1::zeros(n),
residual_norm: 0.0,
num_hvp: 0,
converged: true,
hit_negative_curvature: false,
});
}
let tol = self.options.eta * g_norm;
let delta = self.options.trust_radius;
let mut p = vec![0.0f64; n];
let mut r: Vec<f64> = g.iter().map(|gi| -gi).collect();
let mut d = r.clone();
let mut r_dot_r: f64 = r.iter().map(|ri| ri * ri).sum();
let mut num_hvp = 0;
let mut hit_neg_curv = false;
for _ in 0..max_iter {
let hd = hvp_fn.hvp(x, &d)?;
num_hvp += 1;
let d_hd: f64 = d.iter().zip(hd.iter()).map(|(di, hdi)| di * hdi).sum();
if self.options.use_negative_curvature_detection && d_hd <= 0.0 {
hit_neg_curv = true;
if delta < 1e10 {
let d_norm_sq: f64 = d.iter().map(|di| di * di).sum();
let p_dot_d: f64 = p.iter().zip(d.iter()).map(|(pi, di)| pi * di).sum();
let p_norm_sq: f64 = p.iter().map(|pi| pi * pi).sum();
let disc = p_dot_d * p_dot_d + d_norm_sq * (delta * delta - p_norm_sq);
let tau = if disc >= 0.0 {
(-p_dot_d + disc.sqrt()) / d_norm_sq.max(1e-300)
} else {
0.0
};
for i in 0..n {
p[i] += tau * d[i];
}
}
break;
}
let alpha = r_dot_r / d_hd.max(1e-300);
let p_new: Vec<f64> = p.iter().zip(d.iter()).map(|(pi, di)| pi + alpha * di).collect();
let p_new_norm_sq: f64 = p_new.iter().map(|pi| pi * pi).sum();
if self.options.use_negative_curvature_detection && p_new_norm_sq >= delta * delta {
let d_norm_sq: f64 = d.iter().map(|di| di * di).sum();
let p_dot_d: f64 = p.iter().zip(d.iter()).map(|(pi, di)| pi * di).sum();
let p_norm_sq: f64 = p.iter().map(|pi| pi * pi).sum();
let disc = p_dot_d * p_dot_d + d_norm_sq * (delta * delta - p_norm_sq);
let tau = if disc >= 0.0 {
(-p_dot_d + disc.sqrt()) / d_norm_sq.max(1e-300)
} else {
0.0
};
for i in 0..n {
p[i] += tau * d[i];
}
break;
}
p = p_new;
let mut r_new = vec![0.0f64; n];
for i in 0..n {
r_new[i] = r[i] - alpha * hd[i];
}
let r_new_dot_r_new: f64 = r_new.iter().map(|ri| ri * ri).sum();
let r_norm = r_new_dot_r_new.sqrt();
if r_norm <= tol {
r = r_new;
r_dot_r = r_new_dot_r_new;
break;
}
let beta = r_new_dot_r_new / r_dot_r.max(1e-300);
for i in 0..n {
d[i] = r_new[i] + beta * d[i];
}
r = r_new;
r_dot_r = r_new_dot_r_new;
}
let residual_norm = r.iter().map(|ri| ri * ri).sum::<f64>().sqrt();
let converged = residual_norm <= tol || !hit_neg_curv;
Ok(TruncatedCGResult {
step: Array1::from(p),
residual_norm,
num_hvp,
converged,
hit_negative_curvature: hit_neg_curv,
})
}
}
#[derive(Debug, Clone)]
pub struct NewtonCGResult {
pub x: Array1<f64>,
pub f_val: f64,
pub gradient: Array1<f64>,
pub grad_norm_history: Vec<f64>,
pub n_iter: usize,
pub n_fev: usize,
pub n_hvp: usize,
pub converged: bool,
pub message: String,
}
#[derive(Debug, Clone)]
pub struct NewtonCGOptions {
pub gtol: f64,
pub ftol: f64,
pub max_iter: usize,
pub eps: f64,
pub cg_options: TruncatedCGOptions,
pub use_line_search: bool,
pub c1: f64,
pub backtrack_rho: f64,
pub bounds: Option<Bounds>,
}
impl Default for NewtonCGOptions {
fn default() -> Self {
Self {
gtol: 1e-5,
ftol: 1e-8,
max_iter: 200,
eps: f64::EPSILON.sqrt(),
cg_options: TruncatedCGOptions::default(),
use_line_search: true,
c1: 1e-4,
backtrack_rho: 0.5,
bounds: None,
}
}
}
pub struct NewtonCG {
pub options: NewtonCGOptions,
}
impl NewtonCG {
pub fn new(options: NewtonCGOptions) -> Self {
Self { options }
}
pub fn default_solver() -> Self {
Self {
options: NewtonCGOptions::default(),
}
}
pub fn minimize<F, H>(
&self,
mut fun: F,
x0: &[f64],
hvp_fn: &H,
) -> Result<NewtonCGResult, OptimizeError>
where
F: FnMut(&ArrayView1<f64>) -> f64,
H: HVPFunction,
{
let n = x0.len();
let opts = &self.options;
let cg_solver = TruncatedCG::new(opts.cg_options.clone());
let mut x = Array1::from(x0.to_vec());
if let Some(ref b) = opts.bounds {
b.project(x.as_slice_mut().ok_or_else(|| {
OptimizeError::ComputationError("Cannot get mutable slice".to_string())
})?);
}
let mut n_fev = 0usize;
let mut n_hvp = 0usize;
let mut f = {
n_fev += 1;
fun(&x.view())
};
let mut g = {
let grad = finite_difference_gradient(&mut fun, &x.view(), opts.eps)?;
n_fev += 2 * n;
grad
};
let mut grad_norm_history = Vec::new();
let mut iter = 0usize;
let mut converged = false;
let mut message = "Maximum iterations reached".to_string();
loop {
let g_norm = g.dot(&g).sqrt();
grad_norm_history.push(g_norm);
if g_norm <= opts.gtol {
converged = true;
message = "Gradient norm below tolerance".to_string();
break;
}
if iter >= opts.max_iter {
break;
}
let x_slice = x.as_slice().ok_or_else(|| {
OptimizeError::ComputationError("Cannot get slice from x".to_string())
})?;
let g_slice = g.as_slice().ok_or_else(|| {
OptimizeError::ComputationError("Cannot get slice from g".to_string())
})?;
let cg_result = cg_solver.solve(x_slice, g_slice, hvp_fn)?;
n_hvp += cg_result.num_hvp;
let mut step = cg_result.step;
if let Some(ref b) = opts.bounds {
project_step_bounds(&mut step, &x, b);
}
let step_norm = step.dot(&step).sqrt();
if step_norm < opts.cg_options.min_step_norm {
converged = true;
message = "Step size too small, converged".to_string();
break;
}
let slope = g.dot(&step);
let alpha = if opts.use_line_search {
backtrack_line_search(
&mut fun,
&x,
&step,
f,
slope,
opts.c1,
opts.backtrack_rho,
opts.bounds.as_ref(),
&mut n_fev,
)
} else {
1.0
};
let mut x_new = &x + &(alpha * &step);
if let Some(ref b) = opts.bounds {
b.project(x_new.as_slice_mut().ok_or_else(|| {
OptimizeError::ComputationError("Cannot get mutable slice".to_string())
})?);
}
let f_new = {
n_fev += 1;
fun(&x_new.view())
};
let g_new = {
let grad = finite_difference_gradient(&mut fun, &x_new.view(), opts.eps)?;
n_fev += 2 * n;
grad
};
if (f - f_new).abs() < opts.ftol * (1.0 + f.abs()) {
x = x_new;
f = f_new;
g = g_new;
converged = true;
message = "Function value change below tolerance".to_string();
break;
}
x = x_new;
f = f_new;
g = g_new;
iter += 1;
}
Ok(NewtonCGResult {
x,
f_val: f,
gradient: g,
grad_norm_history,
n_iter: iter,
n_fev,
n_hvp,
converged,
message,
})
}
}
pub struct DampedNewton {
pub gtol: f64,
pub max_iter: usize,
pub eps: f64,
pub lambda_init: f64,
pub c1: f64,
pub rho: f64,
pub bounds: Option<Bounds>,
}
impl Default for DampedNewton {
fn default() -> Self {
Self {
gtol: 1e-5,
max_iter: 200,
eps: f64::EPSILON.sqrt(),
lambda_init: 1e-4,
c1: 1e-4,
rho: 0.5,
bounds: None,
}
}
}
impl DampedNewton {
pub fn minimize<F>(&self, mut fun: F, x0: &[f64]) -> Result<NewtonCGResult, OptimizeError>
where
F: FnMut(&ArrayView1<f64>) -> f64,
{
let n = x0.len();
let mut x = Array1::from(x0.to_vec());
if let Some(ref b) = self.bounds {
b.project(x.as_slice_mut().ok_or_else(|| {
OptimizeError::ComputationError("Cannot get mutable slice".to_string())
})?);
}
let mut n_fev = 0usize;
let mut f = {
n_fev += 1;
fun(&x.view())
};
let mut g = {
let grad = finite_difference_gradient(&mut fun, &x.view(), self.eps)?;
n_fev += 2 * n;
grad
};
let mut grad_norm_history = Vec::new();
let mut iter = 0usize;
let mut converged = false;
let mut message = "Maximum iterations reached".to_string();
while iter < self.max_iter {
let g_norm = g.dot(&g).sqrt();
grad_norm_history.push(g_norm);
if g_norm <= self.gtol {
converged = true;
message = "Gradient norm below tolerance".to_string();
break;
}
let hess = build_finite_diff_hessian(&mut fun, &x.view(), self.eps, &mut n_fev)?;
let mut step = regularised_solve(&hess, g.as_slice().unwrap_or(&[]), self.lambda_init);
if let Some(ref b) = self.bounds {
project_step_bounds(&mut step, &x, b);
}
let slope = g.dot(&step);
let alpha = backtrack_line_search(
&mut fun,
&x,
&step,
f,
slope,
self.c1,
self.rho,
self.bounds.as_ref(),
&mut n_fev,
);
let mut x_new = &x + &(alpha * &step);
if let Some(ref b) = self.bounds {
b.project(x_new.as_slice_mut().ok_or_else(|| {
OptimizeError::ComputationError("Cannot get mutable slice".to_string())
})?);
}
let f_new = {
n_fev += 1;
fun(&x_new.view())
};
let g_new = {
let grad = finite_difference_gradient(&mut fun, &x_new.view(), self.eps)?;
n_fev += 2 * n;
grad
};
x = x_new;
f = f_new;
g = g_new;
iter += 1;
}
Ok(NewtonCGResult {
x,
f_val: f,
gradient: g,
grad_norm_history,
n_iter: iter,
n_fev,
n_hvp: 0,
converged,
message,
})
}
}
fn backtrack_line_search<F>(
fun: &mut F,
x: &Array1<f64>,
step: &Array1<f64>,
f0: f64,
slope: f64,
c1: f64,
rho: f64,
bounds: Option<&Bounds>,
n_fev: &mut usize,
) -> f64
where
F: FnMut(&ArrayView1<f64>) -> f64,
{
if slope >= 0.0 {
return 1e-14;
}
let mut alpha = 1.0;
let max_steps = 60;
for _ in 0..max_steps {
let mut x_trial = x + alpha * step;
if let Some(b) = bounds {
if let Some(s) = x_trial.as_slice_mut() {
b.project(s);
}
}
*n_fev += 1;
let f_trial = fun(&x_trial.view());
if f_trial <= f0 + c1 * alpha * slope {
return alpha;
}
alpha *= rho;
if alpha < 1e-14 {
break;
}
}
alpha
}
fn project_step_bounds(step: &mut Array1<f64>, x: &Array1<f64>, bounds: &Bounds) {
for i in 0..x.len() {
if let Some(lb) = bounds.lower[i] {
if (x[i] - lb).abs() < 1e-12 && step[i] < 0.0 {
step[i] = 0.0;
}
}
if let Some(ub) = bounds.upper[i] {
if (x[i] - ub).abs() < 1e-12 && step[i] > 0.0 {
step[i] = 0.0;
}
}
}
}
fn build_finite_diff_hessian<F>(
fun: &mut F,
x: &ArrayView1<f64>,
step: f64,
n_fev: &mut usize,
) -> Result<Vec<Vec<f64>>, OptimizeError>
where
F: FnMut(&ArrayView1<f64>) -> f64,
{
let n = x.len();
let mut hess = vec![vec![0.0f64; n]; n];
let mut x_tmp = x.to_owned();
*n_fev += 1;
let f0 = fun(&x.view());
for i in 0..n {
let hi = step * (1.0 + x[i].abs());
x_tmp[i] = x[i] + hi;
*n_fev += 1;
let fp = fun(&x_tmp.view());
x_tmp[i] = x[i] - hi;
*n_fev += 1;
let fm = fun(&x_tmp.view());
x_tmp[i] = x[i];
if !fp.is_finite() || !fm.is_finite() {
return Err(OptimizeError::ComputationError(
"Non-finite value during Hessian computation".to_string(),
));
}
hess[i][i] = (fp - 2.0 * f0 + fm) / (hi * hi);
for j in (i + 1)..n {
let hj = step * (1.0 + x[j].abs());
x_tmp[i] = x[i] + hi;
x_tmp[j] = x[j] + hj;
*n_fev += 1;
let fpp = fun(&x_tmp.view());
x_tmp[i] = x[i] + hi;
x_tmp[j] = x[j] - hj;
*n_fev += 1;
let fpm = fun(&x_tmp.view());
x_tmp[i] = x[i] - hi;
x_tmp[j] = x[j] + hj;
*n_fev += 1;
let fmp = fun(&x_tmp.view());
x_tmp[i] = x[i] - hi;
x_tmp[j] = x[j] - hj;
*n_fev += 1;
let fmm = fun(&x_tmp.view());
x_tmp[i] = x[i];
x_tmp[j] = x[j];
let val = (fpp - fpm - fmp + fmm) / (4.0 * hi * hj);
hess[i][j] = val;
hess[j][i] = val;
}
}
Ok(hess)
}
fn regularised_solve(hess: &[Vec<f64>], g: &[f64], lambda_init: f64) -> Array1<f64> {
let n = g.len();
if n == 0 {
return Array1::zeros(0);
}
let mut lambda = lambda_init;
for attempt in 0..10 {
let _ = attempt;
let mut a: Vec<Vec<f64>> = (0..n)
.map(|i| {
let mut row = hess[i].clone();
row[i] += lambda;
row
})
.collect();
let mut b: Vec<f64> = g.iter().map(|gi| -gi).collect();
if let Some(sol) = gaussian_elimination(&mut a, &mut b) {
return Array1::from(sol);
}
lambda *= 10.0;
}
Array1::from(g.iter().map(|gi| -gi).collect::<Vec<_>>())
}
fn gaussian_elimination(a: &mut Vec<Vec<f64>>, b: &mut Vec<f64>) -> Option<Vec<f64>> {
let n = b.len();
for col in 0..n {
let mut max_row = col;
let mut max_val = a[col][col].abs();
for row in (col + 1)..n {
let v = a[row][col].abs();
if v > max_val {
max_val = v;
max_row = row;
}
}
if max_val < 1e-14 {
return None;
}
a.swap(col, max_row);
b.swap(col, max_row);
let pivot = a[col][col];
for row in (col + 1)..n {
let factor = a[row][col] / pivot;
b[row] -= factor * b[col];
for k in col..n {
let v = a[col][k];
a[row][k] -= factor * v;
}
}
}
let mut x = vec![0.0f64; n];
for i in (0..n).rev() {
let mut s = b[i];
for j in (i + 1)..n {
s -= a[i][j] * x[j];
}
if a[i][i].abs() < 1e-14 {
return None;
}
x[i] = s / a[i][i];
}
Some(x)
}
pub fn minimize_newton_cg_advanced<F>(
fun: F,
x0: &[f64],
options: Option<NewtonCGOptions>,
) -> Result<OptimizeResult<f64>, OptimizeError>
where
F: Fn(&ArrayView1<f64>) -> f64 + Send + Sync + Clone,
{
let opts = options.unwrap_or_default();
let eps = opts.eps;
let fun_clone = fun.clone();
let hvp = FiniteDiffHVP::new(fun_clone, eps);
let solver = NewtonCG::new(opts);
let result = solver.minimize(fun, x0, &hvp)?;
Ok(OptimizeResult {
x: result.x.clone(),
fun: result.f_val,
nit: result.n_iter,
func_evals: result.n_fev,
nfev: result.n_fev,
success: result.converged,
message: result.message,
jacobian: Some(result.gradient),
hessian: None,
})
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_abs_diff_eq;
fn rosenbrock(x: &ArrayView1<f64>) -> f64 {
(1.0 - x[0]).powi(2) + 100.0 * (x[1] - x[0].powi(2)).powi(2)
}
fn quadratic(x: &ArrayView1<f64>) -> f64 {
x[0] * x[0] + 4.0 * x[1] * x[1]
}
#[test]
fn test_finite_diff_hvp_quadratic() {
let hvp_oracle = FiniteDiffHVP::with_default_step(quadratic);
let x = vec![1.0, 1.0];
let v = vec![1.0, 0.0];
let hv = hvp_oracle.hvp(&x, &v).expect("HVP failed");
assert_abs_diff_eq!(hv[0], 2.0, epsilon = 1e-4);
assert_abs_diff_eq!(hv[1], 0.0, epsilon = 1e-4);
}
#[test]
fn test_truncated_cg_quadratic() {
let hvp_oracle = FiniteDiffHVP::with_default_step(quadratic);
let x = vec![1.0, 1.0];
let g = vec![2.0, 8.0];
let cg = TruncatedCG::default_solver();
let result = cg.solve(&x, &g, &hvp_oracle).expect("CG failed");
assert_abs_diff_eq!(result.step[0], -1.0, epsilon = 1e-3);
assert_abs_diff_eq!(result.step[1], -1.0, epsilon = 1e-3);
}
#[test]
fn test_newton_cg_quadratic() {
let hvp = FiniteDiffHVP::with_default_step(quadratic);
let solver = NewtonCG::default_solver();
let result = solver
.minimize(quadratic, &[2.0, 1.0], &hvp)
.expect("NewtonCG failed");
assert!(result.converged);
assert_abs_diff_eq!(result.x[0], 0.0, epsilon = 1e-5);
assert_abs_diff_eq!(result.x[1], 0.0, epsilon = 1e-5);
}
#[test]
fn test_newton_cg_rosenbrock() {
let hvp = FiniteDiffHVP::with_default_step(rosenbrock);
let mut opts = NewtonCGOptions::default();
opts.max_iter = 300;
opts.gtol = 1e-4;
let solver = NewtonCG::new(opts);
let result = solver
.minimize(rosenbrock, &[0.5, 0.5], &hvp)
.expect("Newton-CG failed");
assert!(result.converged);
assert_abs_diff_eq!(result.x[0], 1.0, epsilon = 1e-3);
assert_abs_diff_eq!(result.x[1], 1.0, epsilon = 1e-3);
}
#[test]
fn test_damped_newton_quadratic() {
let solver = DampedNewton::default();
let result = solver
.minimize(quadratic, &[2.0, 1.0])
.expect("DampedNewton failed");
assert!(result.converged);
assert_abs_diff_eq!(result.x[0], 0.0, epsilon = 1e-4);
assert_abs_diff_eq!(result.x[1], 0.0, epsilon = 1e-4);
}
#[test]
fn test_newton_cg_with_bounds() {
let f = |x: &ArrayView1<f64>| (x[0] + 1.0).powi(2) + (x[1] + 1.0).powi(2);
let hvp = FiniteDiffHVP::with_default_step(f);
let mut opts = NewtonCGOptions::default();
opts.bounds = Some(Bounds::new(&[(Some(0.0), None), (Some(0.0), None)]));
let solver = NewtonCG::new(opts);
let result = solver.minimize(f, &[0.5, 0.5], &hvp).expect("failed");
assert_abs_diff_eq!(result.x[0], 0.0, epsilon = 0.1);
assert_abs_diff_eq!(result.x[1], 0.0, epsilon = 0.1);
}
}