use super::types::{LbfgsBConfig, OptResult};
use crate::error::OptimizeError;
pub fn project(x: &[f64], lo: &[f64], hi: &[f64]) -> Vec<f64> {
x.iter()
.zip(lo.iter())
.zip(hi.iter())
.map(|((xi, li), ui)| xi.clamp(*li, *ui))
.collect()
}
pub fn projected_grad_norm(x: &[f64], g: &[f64], lo: &[f64], hi: &[f64]) -> f64 {
x.iter()
.zip(g.iter())
.zip(lo.iter())
.zip(hi.iter())
.map(|(((xi, gi), li), ui)| {
let px = (xi - gi).clamp(*li, *ui);
(xi - px).abs()
})
.fold(0.0_f64, f64::max)
}
pub fn hv_product(
g: &[f64],
s_hist: &[Vec<f64>],
y_hist: &[Vec<f64>],
rho_hist: &[f64],
gamma: f64,
) -> Vec<f64> {
let n = g.len();
let m = s_hist.len();
let mut q = g.to_vec();
let mut alpha = vec![0.0_f64; m];
for i in (0..m).rev() {
let rho_i = rho_hist[i];
let sy: f64 = s_hist[i].iter().zip(q.iter()).map(|(si, qi)| si * qi).sum();
alpha[i] = rho_i * sy;
for j in 0..n {
q[j] -= alpha[i] * y_hist[i][j];
}
}
let mut r: Vec<f64> = q.iter().map(|qi| gamma * qi).collect();
for i in 0..m {
let rho_i = rho_hist[i];
let yr: f64 = y_hist[i].iter().zip(r.iter()).map(|(yi, ri)| yi * ri).sum();
let beta = rho_i * yr;
for j in 0..n {
r[j] += s_hist[i][j] * (alpha[i] - beta);
}
}
r
}
fn cubic_min(a: f64, fa: f64, dfa: f64, b: f64, fb: f64, dfb: f64) -> f64 {
let d1 = dfa + dfb - 3.0 * (fb - fa) / (b - a);
let d2_sq = d1 * d1 - dfa * dfb;
if d2_sq < 0.0 {
return 0.5 * (a + b);
}
let d2 = d2_sq.sqrt();
let t = b - (b - a) * (dfb + d2 - d1) / (dfb - dfa + 2.0 * d2);
t.clamp(
a.min(b) + 1e-10 * (a - b).abs(),
a.max(b) - 1e-10 * (a - b).abs(),
)
}
fn quadratic_min(a: f64, fa: f64, dfa: f64, b: f64, fb: f64) -> f64 {
let denom = 2.0 * (fb - fa - dfa * (b - a));
if denom.abs() < 1e-14 {
return 0.5 * (a + b);
}
let t = a - dfa * (b - a).powi(2) / denom;
t.clamp(a.min(b), a.max(b))
}
fn wolfe_zoom<F>(
f_and_g: &F,
x: &[f64],
d: &[f64],
lo: &[f64],
hi: &[f64],
f0: f64,
g0: f64, mut a_lo: f64,
mut f_lo: f64,
mut df_lo: f64,
mut a_hi: f64,
mut f_hi: f64,
c1: f64,
c2: f64,
max_iter: usize,
) -> Result<f64, OptimizeError>
where
F: Fn(&[f64]) -> (f64, Vec<f64>),
{
let n = x.len();
for _ in 0..max_iter {
let a_j = if (a_hi - a_lo).abs() < 1e-14 {
0.5 * (a_lo + a_hi)
} else {
cubic_min(a_lo, f_lo, df_lo, a_hi, f_hi, {
let x_j: Vec<f64> = (0..n)
.map(|i| (x[i] + a_hi * d[i]).clamp(lo[i], hi[i]))
.collect();
let (_, g_j) = f_and_g(&x_j);
g_j.iter()
.zip(d.iter())
.map(|(gi, di)| gi * di)
.sum::<f64>()
})
};
let x_j: Vec<f64> = (0..n)
.map(|i| (x[i] + a_j * d[i]).clamp(lo[i], hi[i]))
.collect();
let (f_j, g_j) = f_and_g(&x_j);
let df_j: f64 = g_j.iter().zip(d.iter()).map(|(gi, di)| gi * di).sum();
if f_j > f0 + c1 * a_j * g0 || f_j >= f_lo {
a_hi = a_j;
f_hi = f_j;
} else {
if df_j.abs() <= -c2 * g0 {
return Ok(a_j); }
if df_j * (a_hi - a_lo) >= 0.0 {
a_hi = a_lo;
f_hi = f_lo;
}
a_lo = a_j;
f_lo = f_j;
df_lo = df_j;
}
if (a_hi - a_lo).abs() < 1e-14 * a_lo.abs().max(1.0) {
break;
}
}
Ok(a_lo)
}
pub fn wolfe_line_search<F>(
f_and_g: &F,
x: &[f64],
d: &[f64],
lo: &[f64],
hi: &[f64],
f0: f64,
g0: f64,
c1: f64,
c2: f64,
alpha_init: f64,
max_iter: usize,
) -> Result<f64, OptimizeError>
where
F: Fn(&[f64]) -> (f64, Vec<f64>),
{
let n = x.len();
let mut a_prev = 0.0_f64;
let mut a_curr = alpha_init;
let mut f_prev = f0;
for i in 0..max_iter {
let x_curr: Vec<f64> = (0..n)
.map(|i| (x[i] + a_curr * d[i]).clamp(lo[i], hi[i]))
.collect();
let (f_curr, g_curr) = f_and_g(&x_curr);
let df_curr: f64 = g_curr.iter().zip(d.iter()).map(|(gi, di)| gi * di).sum();
if f_curr > f0 + c1 * a_curr * g0 || (i > 0 && f_curr >= f_prev) {
return wolfe_zoom(
f_and_g,
x,
d,
lo,
hi,
f0,
g0,
a_prev,
f_prev,
{
let x_p: Vec<f64> = (0..n)
.map(|j| (x[j] + a_prev * d[j]).clamp(lo[j], hi[j]))
.collect();
let (_, gp) = f_and_g(&x_p);
gp.iter().zip(d.iter()).map(|(gi, di)| gi * di).sum::<f64>()
},
a_curr,
f_curr,
c1,
c2,
30,
);
}
if df_curr.abs() <= -c2 * g0 {
return Ok(a_curr); }
if df_curr >= 0.0 {
return wolfe_zoom(
f_and_g, x, d, lo, hi, f0, g0, a_curr, f_curr, df_curr, a_prev, f_prev, c1, c2, 30,
);
}
a_prev = a_curr;
f_prev = f_curr;
a_curr = (a_curr * 2.0).min(1e10);
}
Ok(a_curr)
}
pub fn cauchy_point(
x: &[f64],
g: &[f64],
lo: &[f64],
hi: &[f64],
s_hist: &[Vec<f64>],
y_hist: &[Vec<f64>],
rho_hist: &[f64],
gamma: f64,
) -> (Vec<f64>, Vec<bool>) {
let n = x.len();
let mut breakpoints: Vec<(f64, usize)> = (0..n)
.filter_map(|i| {
let ti = if g[i] < 0.0 {
(x[i] - hi[i]) / g[i] } else if g[i] > 0.0 {
(x[i] - lo[i]) / g[i] } else {
return None;
};
if ti > 0.0 {
Some((ti, i))
} else {
None
}
})
.collect();
breakpoints.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
let mut xc = x.to_vec();
let mut free: Vec<bool> = vec![true; n];
for i in 0..n {
if g[i] == 0.0 {
free[i] = false;
}
}
let mut t_prev = 0.0_f64;
let d: Vec<f64> = (0..n).map(|i| if free[i] { -g[i] } else { 0.0 }).collect();
let fp0: f64 = d.iter().zip(g.iter()).map(|(di, gi)| di * gi).sum();
let _ = fp0;
for &(t_j, idx) in &breakpoints {
let dt = t_j - t_prev;
let fp: f64 = (0..n)
.filter(|&i| free[i])
.map(|i| g[i] * d[i])
.sum::<f64>();
let d_free: Vec<f64> = (0..n).map(|i| if free[i] { d[i] } else { 0.0 }).collect();
let hv = hv_product(&d_free, s_hist, y_hist, rho_hist, gamma);
let fpp: f64 = d_free
.iter()
.zip(hv.iter())
.map(|(di, hi)| di * hi)
.sum::<f64>();
let fpp = if fpp < 1e-14 {
1.0 / gamma
} else {
let dn2: f64 = d_free.iter().map(|di| di * di).sum();
if dn2 > 1e-14 {
dn2 / fpp
} else {
1.0 / gamma
}
};
if fpp > 0.0 {
let t_star = t_prev - fp / fpp;
if t_star < t_j {
for i in 0..n {
if free[i] {
xc[i] = (x[i] + t_star * d[i]).clamp(lo[i], hi[i]);
}
}
for bp in &breakpoints {
if bp.0 > t_prev {
free[bp.1] = false; }
}
for &(tb, bi) in &breakpoints {
if tb > t_star {
free[bi] = true;
}
}
return (xc, free);
}
}
for i in 0..n {
if free[i] {
xc[i] = (x[i] + t_j * d[i]).clamp(lo[i], hi[i]);
}
}
free[idx] = false;
t_prev = t_j;
}
(xc, free)
}
pub struct LbfgsBOptimizer {
pub config: LbfgsBConfig,
}
impl LbfgsBOptimizer {
pub fn new(config: LbfgsBConfig) -> Self {
Self { config }
}
pub fn default_config() -> Self {
Self {
config: LbfgsBConfig::default(),
}
}
pub fn minimize<F>(
&self,
f_and_g: &F,
x0: &[f64],
lo: &[f64],
hi: &[f64],
) -> Result<OptResult, OptimizeError>
where
F: Fn(&[f64]) -> (f64, Vec<f64>),
{
let n = x0.len();
if lo.len() != n || hi.len() != n {
return Err(OptimizeError::ValueError(format!(
"Bound vectors must have length {}, got lo={} hi={}",
n,
lo.len(),
hi.len()
)));
}
for i in 0..n {
if lo[i] > hi[i] {
return Err(OptimizeError::ValueError(format!(
"lo[{}]={} > hi[{}]={}",
i, lo[i], i, hi[i]
)));
}
}
let cfg = &self.config;
let m = cfg.m;
let mut x = project(x0, lo, hi);
let (mut f_val, mut g) = f_and_g(&x);
let mut s_hist: Vec<Vec<f64>> = Vec::with_capacity(m);
let mut y_hist: Vec<Vec<f64>> = Vec::with_capacity(m);
let mut rho_hist: Vec<f64> = Vec::with_capacity(m);
let mut gamma = 1.0_f64;
let mut n_iter = 0usize;
let mut converged = false;
for iter in 0..cfg.max_iter {
n_iter = iter;
let pg_norm = projected_grad_norm(&x, &g, lo, hi);
if pg_norm < cfg.tol {
converged = true;
break;
}
let hg = hv_product(&g, &s_hist, &y_hist, &rho_hist, gamma);
let mut d: Vec<f64> = hg.iter().map(|v| -v).collect();
for i in 0..n {
let at_lo = x[i] <= lo[i] + 1e-12;
let at_hi = x[i] >= hi[i] - 1e-12;
if (at_lo && d[i] < 0.0) || (at_hi && d[i] > 0.0) {
d[i] = 0.0;
}
}
let mut slope: f64 = g.iter().zip(d.iter()).map(|(gi, di)| gi * di).sum();
if slope >= 0.0 {
d = g.iter().map(|gi| -gi).collect();
for i in 0..n {
let at_lo = x[i] <= lo[i] + 1e-12;
let at_hi = x[i] >= hi[i] - 1e-12;
if (at_lo && d[i] < 0.0) || (at_hi && d[i] > 0.0) {
d[i] = 0.0;
}
}
slope = g.iter().zip(d.iter()).map(|(gi, di)| gi * di).sum();
}
if slope >= 0.0 {
converged = pg_norm < cfg.tol;
break;
}
let alpha = match wolfe_line_search(
f_and_g,
&x,
&d,
lo,
hi,
f_val,
slope,
cfg.c1,
cfg.c2,
cfg.alpha_init,
cfg.max_ls_iter,
) {
Ok(a) => a,
Err(_) => 1e-8, };
let x_new: Vec<f64> = (0..n)
.map(|i| (x[i] + alpha * d[i]).clamp(lo[i], hi[i]))
.collect();
let (f_new, g_new) = f_and_g(&x_new);
let s: Vec<f64> = (0..n).map(|i| x_new[i] - x[i]).collect();
let y: Vec<f64> = (0..n).map(|i| g_new[i] - g[i]).collect();
let sy: f64 = s.iter().zip(y.iter()).map(|(si, yi)| si * yi).sum();
if sy > 1e-14 * s.iter().map(|si| si * si).sum::<f64>().sqrt() {
if s_hist.len() == m {
s_hist.remove(0);
y_hist.remove(0);
rho_hist.remove(0);
}
let yy: f64 = y.iter().map(|yi| yi * yi).sum();
gamma = if yy > 1e-14 { sy / yy } else { gamma };
rho_hist.push(1.0 / sy);
s_hist.push(s);
y_hist.push(y);
}
x = x_new;
f_val = f_new;
g = g_new;
}
let grad_norm = g.iter().map(|gi| gi * gi).sum::<f64>().sqrt();
Ok(OptResult {
x,
f_val,
grad_norm,
n_iter,
converged,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::second_order::types::LbfgsBConfig;
fn rosenbrock(x: &[f64]) -> (f64, Vec<f64>) {
let f = (1.0 - x[0]).powi(2) + 100.0 * (x[1] - x[0].powi(2)).powi(2);
let g0 = -2.0 * (1.0 - x[0]) - 400.0 * x[0] * (x[1] - x[0].powi(2));
let g1 = 200.0 * (x[1] - x[0].powi(2));
(f, vec![g0, g1])
}
fn quadratic(x: &[f64]) -> (f64, Vec<f64>) {
let f: f64 = x.iter().map(|xi| 0.5 * xi * xi).sum();
let g: Vec<f64> = x.to_vec();
(f, g)
}
#[test]
fn test_lbfgsb_quadratic() {
let opt = LbfgsBOptimizer::default_config();
let x0 = vec![3.0, -2.0, 1.5];
let lo = vec![f64::NEG_INFINITY; 3];
let hi = vec![f64::INFINITY; 3];
let result = opt
.minimize(&quadratic, &x0, &lo, &hi)
.expect("minimize failed");
for xi in &result.x {
assert!(xi.abs() < 1e-5, "Expected x≈0, got {}", xi);
}
assert!(result.converged);
}
#[test]
fn test_lbfgsb_rosenbrock() {
let mut cfg = LbfgsBConfig::default();
cfg.max_iter = 2000;
cfg.tol = 1e-5;
let opt = LbfgsBOptimizer::new(cfg);
let x0 = vec![-1.0, 1.0];
let lo = vec![f64::NEG_INFINITY; 2];
let hi = vec![f64::INFINITY; 2];
let result = opt
.minimize(&rosenbrock, &x0, &lo, &hi)
.expect("minimize failed");
assert!(
(result.x[0] - 1.0).abs() < 0.05 && (result.x[1] - 1.0).abs() < 0.05,
"Rosenbrock solution wrong: {:?}",
result.x
);
}
#[test]
fn test_lbfgsb_box_constraint() {
let opt = LbfgsBOptimizer::default_config();
let x0 = vec![3.0, 3.0];
let lo = vec![-1.0, -1.0];
let hi = vec![1.0, 1.0];
let result = opt
.minimize(&quadratic, &x0, &lo, &hi)
.expect("minimize failed");
for (xi, (li, ui)) in result.x.iter().zip(lo.iter().zip(hi.iter())) {
assert!(
*xi >= *li - 1e-9 && *xi <= *ui + 1e-9,
"Bound violated: {}",
xi
);
}
}
#[test]
fn test_lbfgsb_active_constraint() {
let opt = LbfgsBOptimizer::default_config();
let x0 = vec![1.0];
let lo = vec![0.0];
let hi = vec![f64::INFINITY];
let obj = |x: &[f64]| -> (f64, Vec<f64>) {
let f = (x[0] + 2.0).powi(2);
let g = vec![2.0 * (x[0] + 2.0)];
(f, g)
};
let result = opt.minimize(&obj, &x0, &lo, &hi).expect("minimize failed");
assert!(
result.x[0] < 0.01,
"Active bound not satisfied: x={}",
result.x[0]
);
}
#[test]
fn test_wolfe_conditions_satisfied() {
let obj = |x: &[f64]| -> (f64, Vec<f64>) {
let f = 0.5 * x[0] * x[0];
let g = vec![x[0]];
(f, g)
};
let x = vec![2.0];
let d = vec![-2.0]; let lo = vec![f64::NEG_INFINITY];
let hi = vec![f64::INFINITY];
let f0 = 2.0;
let g0 = -4.0_f64; let c1 = 1e-4;
let c2 = 0.9;
let alpha = wolfe_line_search(&obj, &x, &d, &lo, &hi, f0, g0, c1, c2, 1.0, 30)
.expect("line search failed");
let x_new: Vec<f64> = x
.iter()
.zip(d.iter())
.map(|(xi, di)| xi + alpha * di)
.collect();
let (f_new, g_new) = obj(&x_new);
assert!(f_new <= f0 + c1 * alpha * g0, "Wolfe c1 violated");
let slope_new: f64 = g_new.iter().zip(d.iter()).map(|(gi, di)| gi * di).sum();
assert!(slope_new.abs() <= -c2 * g0, "Wolfe c2 violated");
}
#[test]
fn test_two_loop_recursion() {
let n = 4;
let s = vec![1.0, 0.0, 0.0, 0.0];
let y = vec![3.0, 0.0, 0.0, 0.0];
let rho = 1.0 / 3.0;
let g = vec![1.0; n];
let result = hv_product(&g, &[s], &[y], &[rho], 1.0);
assert_eq!(result.len(), n);
assert!(result.iter().all(|v| v.is_finite()));
}
#[test]
fn test_lbfgsb_memory_m() {
let obj = |x: &[f64]| -> (f64, Vec<f64>) {
let f = 0.5 * (x[0] * x[0] + 4.0 * x[1] * x[1]);
let g = vec![x[0], 4.0 * x[1]];
(f, g)
};
let mut cfg5 = LbfgsBConfig::default();
cfg5.m = 5;
cfg5.max_iter = 500;
let opt5 = LbfgsBOptimizer::new(cfg5);
let mut cfg10 = LbfgsBConfig::default();
cfg10.m = 10;
cfg10.max_iter = 500;
let opt10 = LbfgsBOptimizer::new(cfg10);
let x0 = vec![3.0, 2.0];
let lo = vec![f64::NEG_INFINITY; 2];
let hi = vec![f64::INFINITY; 2];
let r5 = opt5.minimize(&obj, &x0, &lo, &hi).expect("m=5 failed");
let r10 = opt10.minimize(&obj, &x0, &lo, &hi).expect("m=10 failed");
assert!(r5.f_val.abs() < 1e-8, "m=5 solution wrong: {}", r5.f_val);
assert!(r10.f_val.abs() < 1e-8, "m=10 solution wrong: {}", r10.f_val);
}
#[test]
fn test_lbfgsb_gradient_check() {
let obj = |x: &[f64]| -> (f64, Vec<f64>) {
let f = (x[0] - 1.0).powi(2) + (x[1] + 2.0).powi(2);
let g = vec![2.0 * (x[0] - 1.0), 2.0 * (x[1] + 2.0)];
(f, g)
};
let x = vec![0.0, 0.0];
let lo = vec![f64::NEG_INFINITY; 2];
let hi = vec![f64::INFINITY; 2];
let (f0, g0_vec) = obj(&x);
let d_desc: Vec<f64> = g0_vec.iter().map(|v| -v).collect();
let g0_desc: f64 = g0_vec
.iter()
.zip(d_desc.iter())
.map(|(gi, di)| gi * di)
.sum();
assert!(
g0_desc < 0.0,
"Descent slope should be negative, got {}",
g0_desc
);
let d_asc: Vec<f64> = g0_vec.clone();
let g0_asc: f64 = g0_vec
.iter()
.zip(d_asc.iter())
.map(|(gi, di)| gi * di)
.sum();
assert!(
g0_asc > 0.0,
"Ascent slope should be positive, got {}",
g0_asc
);
let alpha = wolfe_line_search(&obj, &x, &d_desc, &lo, &hi, f0, g0_desc, 1e-4, 0.9, 1.0, 30)
.expect("ls failed");
assert!(
alpha > 0.0 && alpha.is_finite(),
"Step size invalid: {}",
alpha
);
}
#[test]
fn test_cauchy_point_feasible() {
let x = vec![0.5, 0.5];
let g = vec![1.0, -1.0];
let lo = vec![0.0, 0.0];
let hi = vec![1.0, 1.0];
let (xc, _free) = cauchy_point(&x, &g, &lo, &hi, &[], &[], &[], 1.0);
for (xi, (li, ui)) in xc.iter().zip(lo.iter().zip(hi.iter())) {
assert!(*xi >= *li - 1e-10, "Cauchy point violates lower bound");
assert!(*xi <= *ui + 1e-10, "Cauchy point violates upper bound");
}
}
}