use crate::error::OptimizeError;
use crate::unconstrained::result::OptimizeResult;
use crate::unconstrained::utils::finite_difference_gradient;
use crate::unconstrained::Bounds;
use scirs2_core::ndarray::{Array1, ArrayView1};
pub struct ProjectedGradient;
impl ProjectedGradient {
pub fn project(
x: &[f64],
g: &[f64],
lower: &[Option<f64>],
upper: &[Option<f64>],
) -> Vec<f64> {
let n = x.len();
let mut pg = vec![0.0f64; n];
for i in 0..n {
let at_lb = lower[i].map_or(false, |l| (x[i] - l).abs() < 1e-12);
let at_ub = upper[i].map_or(false, |u| (x[i] - u).abs() < 1e-12);
pg[i] = if at_lb && at_ub {
0.0 } else if at_lb {
g[i].min(0.0).abs() * g[i].signum() * (-1.0) } else if at_ub {
g[i].max(0.0)
} else {
g[i]
};
}
pg
}
pub fn optimality_measure(
x: &[f64],
g: &[f64],
lower: &[Option<f64>],
upper: &[Option<f64>],
) -> f64 {
let n = x.len();
let mut max_val = 0.0f64;
for i in 0..n {
let x_minus_g = x[i] - g[i];
let proj = match (lower[i], upper[i]) {
(Some(l), Some(u)) => x_minus_g.max(l).min(u),
(Some(l), None) => x_minus_g.max(l),
(None, Some(u)) => x_minus_g.min(u),
(None, None) => x_minus_g,
};
let val = (x[i] - proj).abs();
if val > max_val {
max_val = val;
}
}
max_val
}
}
pub struct CauchyPoint;
impl CauchyPoint {
#[allow(clippy::too_many_arguments)]
pub fn compute(
x: &[f64],
g: &[f64],
lower: &[Option<f64>],
upper: &[Option<f64>],
theta: f64,
s_vecs: &[Vec<f64>],
y_vecs: &[Vec<f64>],
) -> (Vec<f64>, Vec<bool>) {
let n = x.len();
let mut break_pts: Vec<(f64, usize)> = Vec::with_capacity(n);
let d: Vec<f64> = (0..n)
.map(|i| {
if g[i] < 0.0 {
upper[i].map_or(f64::INFINITY, |u| (u - x[i]) / (-g[i]).max(1e-300))
} else if g[i] > 0.0 {
lower[i].map_or(f64::INFINITY, |l| (x[i] - l) / g[i].max(1e-300))
} else {
f64::INFINITY
}
})
.collect();
for (i, &ti) in d.iter().enumerate() {
if ti.is_finite() && ti >= 0.0 {
break_pts.push((ti, i));
}
}
break_pts.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
let mut xc = x.to_vec();
let mut active = vec![false; n];
let mut t_prev = 0.0;
for (bp, fixed_var) in &break_pts {
let t = *bp;
let dt = t - t_prev;
let g_dot_d: f64 = (0..n)
.filter(|&i| !active[i])
.map(|i| g[i] * (-g[i]))
.sum();
let d_hd: f64 = {
let g_free_sq: f64 = (0..n)
.filter(|&i| !active[i])
.map(|i| g[i] * g[i])
.sum();
theta * g_free_sq
+ l_bfgs_correction_scalar(g, s_vecs, y_vecs, theta, &active)
};
if d_hd < 1e-300 {
} else {
let t_min = t_prev - g_dot_d / d_hd;
if t_min <= t {
let t_star = t_min.max(t_prev);
for i in 0..n {
if !active[i] {
xc[i] = x[i] - g[i] * t_star;
if let Some(l) = lower[i] {
xc[i] = xc[i].max(l);
}
if let Some(u) = upper[i] {
xc[i] = xc[i].min(u);
}
}
}
for &(bt, bi) in break_pts.iter().filter(|(bt, _)| *bt <= t_star) {
let _ = bt;
active[bi] = true;
xc[bi] = if g[bi] > 0.0 {
lower[bi].unwrap_or(x[bi])
} else {
upper[bi].unwrap_or(x[bi])
};
}
let free = active.iter().map(|a| !a).collect();
return (xc, free);
}
}
active[*fixed_var] = true;
xc[*fixed_var] = if g[*fixed_var] > 0.0 {
lower[*fixed_var].unwrap_or(x[*fixed_var])
} else {
upper[*fixed_var].unwrap_or(x[*fixed_var])
};
let _ = dt;
t_prev = t;
}
for i in 0..n {
if !active[i] {
xc[i] = x[i] - g[i] * t_prev;
if let Some(l) = lower[i] {
xc[i] = xc[i].max(l);
}
if let Some(u) = upper[i] {
xc[i] = xc[i].min(u);
}
}
}
let free = active.iter().map(|a| !a).collect();
(xc, free)
}
}
fn l_bfgs_correction_scalar(
g: &[f64],
s_vecs: &[Vec<f64>],
y_vecs: &[Vec<f64>],
theta: f64,
active: &[bool],
) -> f64 {
if s_vecs.is_empty() {
return 0.0;
}
let n = g.len();
let m = s_vecs.len().min(y_vecs.len());
let mut h_diag = 1.0 / theta;
if let (Some(s_last), Some(y_last)) = (s_vecs.last(), y_vecs.last()) {
let sy: f64 = s_last.iter().zip(y_last.iter()).map(|(s, y)| s * y).sum();
let yy: f64 = y_last.iter().map(|y| y * y).sum();
if sy > 0.0 && yy > 0.0 {
h_diag = sy / yy;
}
}
let g_free_sq: f64 = (0..n)
.filter(|&i| !active[i])
.map(|i| g[i] * g[i])
.sum();
let _ = m;
g_free_sq * (1.0 / h_diag - theta) * 0.5
}
pub struct SubspaceMinimization;
impl SubspaceMinimization {
#[allow(clippy::too_many_arguments)]
pub fn minimize(
x_cauchy: &[f64],
x: &[f64],
g: &[f64],
free: &[bool],
lower: &[Option<f64>],
upper: &[Option<f64>],
theta: f64,
s_vecs: &[Vec<f64>],
y_vecs: &[Vec<f64>],
rho_vals: &[f64],
) -> Vec<f64> {
let n = x.len();
let free_indices: Vec<usize> = (0..n).filter(|&i| free[i]).collect();
let n_free = free_indices.len();
if n_free == 0 {
return x_cauchy.to_vec();
}
let r_free: Vec<f64> = free_indices
.iter()
.map(|&i| {
g[i]
})
.collect();
let delta: Vec<f64> = (0..n).map(|i| x_cauchy[i] - x[i]).collect();
let h_delta = l_bfgs_product(&delta, s_vecs, y_vecs, rho_vals, theta);
let r_c: Vec<f64> = free_indices
.iter()
.map(|&i| r_free[free_indices.iter().position(|&j| j == i).unwrap_or(0)] + h_delta[i])
.collect();
let mut r_full = vec![0.0f64; n];
for (k, &fi) in free_indices.iter().enumerate() {
r_full[fi] = r_c[k];
}
let step_full = l_bfgs_two_loop_neg(&r_full, s_vecs, y_vecs, rho_vals, theta);
let mut x_new = x_cauchy.to_vec();
for &fi in &free_indices {
x_new[fi] += step_full[fi];
if let Some(l) = lower[fi] {
x_new[fi] = x_new[fi].max(l);
}
if let Some(u) = upper[fi] {
x_new[fi] = x_new[fi].min(u);
}
}
x_new
}
}
fn l_bfgs_two_loop_neg(
q_in: &[f64],
s_vecs: &[Vec<f64>],
y_vecs: &[Vec<f64>],
rho_vals: &[f64],
theta: f64,
) -> Vec<f64> {
let n = q_in.len();
let m = s_vecs.len().min(y_vecs.len()).min(rho_vals.len());
let mut q = q_in.to_vec();
let mut alphas = vec![0.0f64; m];
for k in (0..m).rev() {
let s = &s_vecs[k];
let alpha = rho_vals[k] * dot(s, &q);
alphas[k] = alpha;
let y = &y_vecs[k];
for i in 0..n {
q[i] -= alpha * y[i];
}
}
let h0 = if m > 0 {
let sy: f64 = dot(&s_vecs[m - 1], &y_vecs[m - 1]);
let yy: f64 = dot(&y_vecs[m - 1], &y_vecs[m - 1]);
if yy > 1e-300 { sy / yy } else { 1.0 / theta }
} else {
1.0 / theta
};
let mut r: Vec<f64> = q.iter().map(|qi| h0 * qi).collect();
for k in 0..m {
let y = &y_vecs[k];
let beta = rho_vals[k] * dot(y, &r);
let s = &s_vecs[k];
for i in 0..n {
r[i] += s[i] * (alphas[k] - beta);
}
}
r.iter().map(|ri| -ri).collect()
}
fn l_bfgs_product(
v: &[f64],
s_vecs: &[Vec<f64>],
y_vecs: &[Vec<f64>],
rho_vals: &[f64],
theta: f64,
) -> Vec<f64> {
let n = v.len();
let m = s_vecs.len().min(y_vecs.len()).min(rho_vals.len());
let mut result: Vec<f64> = v.iter().map(|vi| theta * vi).collect();
for k in 0..m {
let s = &s_vecs[k];
let y = &y_vecs[k];
let sy: f64 = dot(s, y);
if sy.abs() < 1e-300 {
continue;
}
let sv: f64 = dot(s, v);
let yv: f64 = dot(y, v);
let rho = rho_vals[k];
for i in 0..n {
result[i] += rho * y[i] * yv - theta * rho * sy * s[i] * sv / sy.max(1e-300);
}
}
result
}
#[inline]
fn dot(a: &[f64], b: &[f64]) -> f64 {
a.iter().zip(b.iter()).map(|(ai, bi)| ai * bi).sum()
}
#[derive(Debug, Clone)]
pub struct LBFGSBResult {
pub x: Array1<f64>,
pub f_val: f64,
pub gradient: Array1<f64>,
pub proj_grad_norm: f64,
pub n_iter: usize,
pub n_fev: usize,
pub converged: bool,
pub message: String,
}
#[derive(Debug, Clone)]
pub struct LBFGSBOptions {
pub memory: usize,
pub pgtol: f64,
pub factr: f64,
pub eps: f64,
pub max_iter: usize,
pub max_ls_steps: usize,
pub c1: f64,
pub backtrack_rho: f64,
pub bounds: Option<Bounds>,
}
impl Default for LBFGSBOptions {
fn default() -> Self {
Self {
memory: 10,
pgtol: 1e-5,
factr: 1e7,
eps: f64::EPSILON.sqrt(),
max_iter: 500,
max_ls_steps: 30,
c1: 1e-4,
backtrack_rho: 0.5,
bounds: None,
}
}
}
pub struct LBFGSB {
pub options: LBFGSBOptions,
}
impl LBFGSB {
pub fn new(options: LBFGSBOptions) -> Self {
Self { options }
}
pub fn default_solver() -> Self {
Self {
options: LBFGSBOptions::default(),
}
}
pub fn minimize<F>(&self, mut fun: F, x0: &[f64]) -> Result<LBFGSBResult, OptimizeError>
where
F: FnMut(&ArrayView1<f64>) -> f64,
{
let opts = &self.options;
let n = x0.len();
let m = opts.memory;
let machine_eps = f64::EPSILON;
let ftol = opts.factr * machine_eps;
let (lower, upper) = opts
.bounds
.as_ref()
.map(|b| (b.lower.clone(), b.upper.clone()))
.unwrap_or_else(|| (vec![None; n], vec![None; n]));
let mut x: Vec<f64> = x0.to_vec();
project_point(&mut x, &lower, &upper);
let mut n_fev = 0usize;
let x_arr = Array1::from(x.clone());
let mut f = {
n_fev += 1;
fun(&x_arr.view())
};
let mut g = {
let xa = Array1::from(x.clone());
let grad = finite_difference_gradient(&mut fun, &xa.view(), opts.eps)?;
n_fev += 2 * n;
grad.to_vec()
};
let mut s_vecs: Vec<Vec<f64>> = Vec::with_capacity(m);
let mut y_vecs: Vec<Vec<f64>> = Vec::with_capacity(m);
let mut rho_vals: Vec<f64> = Vec::with_capacity(m);
let mut theta = 1.0f64;
let mut iter = 0usize;
let mut converged = false;
let mut message = "Maximum iterations reached".to_string();
loop {
let pg_norm = ProjectedGradient::optimality_measure(&x, &g, &lower, &upper);
if pg_norm <= opts.pgtol {
converged = true;
message = "Projected gradient norm below tolerance".to_string();
break;
}
if iter >= opts.max_iter {
break;
}
let (x_cauchy, free) = CauchyPoint::compute(&x, &g, &lower, &upper, theta, &s_vecs, &y_vecs);
let x_bar = SubspaceMinimization::minimize(
&x_cauchy,
&x,
&g,
&free,
&lower,
&upper,
theta,
&s_vecs,
&y_vecs,
&rho_vals,
);
let d: Vec<f64> = (0..n).map(|i| x_bar[i] - x[i]).collect();
let slope: f64 = dot(&g, &d);
if slope >= 0.0 {
let pg: Vec<f64> = (0..n).map(|i| {
let at_lb = lower[i].map_or(false, |l| (x[i] - l).abs() < 1e-12);
let at_ub = upper[i].map_or(false, |u| (x[i] - u).abs() < 1e-12);
if at_lb && g[i] > 0.0 { 0.0 }
else if at_ub && g[i] < 0.0 { 0.0 }
else { -g[i] }
}).collect();
let pg_norm_inner = dot(&pg, &pg).sqrt();
if pg_norm_inner < 1e-12 {
converged = true;
message = "Projected gradient is zero, at constrained optimum".to_string();
break;
}
let alpha = projected_backtrack(
&mut fun,
&x,
&pg,
f,
-dot(&g, &pg),
&lower,
&upper,
opts.c1,
opts.backtrack_rho,
opts.max_ls_steps,
&mut n_fev,
);
let mut x_new: Vec<f64> = (0..n).map(|i| x[i] + alpha * pg[i]).collect();
project_point(&mut x_new, &lower, &upper);
let x_new_arr = Array1::from(x_new.clone());
let f_new = {
n_fev += 1;
fun(&x_new_arr.view())
};
let s: Vec<f64> = (0..n).map(|i| x_new[i] - x[i]).collect();
let g_new = {
let xa = Array1::from(x_new.clone());
let grad = finite_difference_gradient(&mut fun, &xa.view(), opts.eps)?;
n_fev += 2 * n;
grad.to_vec()
};
let y: Vec<f64> = (0..n).map(|i| g_new[i] - g[i]).collect();
let sy = dot(&s, &y);
if sy > 1e-10 {
update_lbfgs_history(&mut s_vecs, &mut y_vecs, &mut rho_vals, s, y, sy, m);
theta = dot(&y_vecs.last().expect("unexpected None or Err"), &y_vecs.last().expect("unexpected None or Err"))
/ dot(&s_vecs.last().expect("unexpected None or Err"), &y_vecs.last().expect("unexpected None or Err")).max(1e-300);
}
if (f - f_new).abs() < ftol * (1.0 + f.abs()) {
x = x_new;
f = f_new;
g = g_new;
converged = true;
message = "Function value change below tolerance".to_string();
break;
}
x = x_new;
f = f_new;
g = g_new;
iter += 1;
continue;
}
let alpha = projected_backtrack(
&mut fun,
&x,
&d,
f,
slope,
&lower,
&upper,
opts.c1,
opts.backtrack_rho,
opts.max_ls_steps,
&mut n_fev,
);
let mut x_new: Vec<f64> = (0..n).map(|i| x[i] + alpha * d[i]).collect();
project_point(&mut x_new, &lower, &upper);
let x_new_arr = Array1::from(x_new.clone());
let f_new = {
n_fev += 1;
fun(&x_new_arr.view())
};
let s: Vec<f64> = (0..n).map(|i| x_new[i] - x[i]).collect();
let g_new = {
let xa = Array1::from(x_new.clone());
let grad = finite_difference_gradient(&mut fun, &xa.view(), opts.eps)?;
n_fev += 2 * n;
grad.to_vec()
};
let y: Vec<f64> = (0..n).map(|i| g_new[i] - g[i]).collect();
let sy = dot(&s, &y);
if sy > machine_eps * dot(&y, &y) {
update_lbfgs_history(&mut s_vecs, &mut y_vecs, &mut rho_vals, s, y, sy, m);
if let (Some(y_last), Some(s_last)) = (y_vecs.last(), s_vecs.last()) {
let yy = dot(y_last, y_last);
let sy_last = dot(s_last, y_last);
if sy_last > 1e-300 {
theta = yy / sy_last;
}
}
}
if (f - f_new).abs() < ftol * (1.0 + f.abs()) {
x = x_new;
f = f_new;
g = g_new;
converged = true;
message = "Function value change below tolerance".to_string();
break;
}
x = x_new;
f = f_new;
g = g_new;
iter += 1;
}
let pg_final = ProjectedGradient::optimality_measure(&x, &g, &lower, &upper);
Ok(LBFGSBResult {
x: Array1::from(x),
f_val: f,
gradient: Array1::from(g),
proj_grad_norm: pg_final,
n_iter: iter,
n_fev,
converged,
message,
})
}
}
fn project_point(x: &mut Vec<f64>, lower: &[Option<f64>], upper: &[Option<f64>]) {
for i in 0..x.len() {
if let Some(l) = lower[i] {
if x[i] < l {
x[i] = l;
}
}
if let Some(u) = upper[i] {
if x[i] > u {
x[i] = u;
}
}
}
}
fn projected_backtrack<F>(
fun: &mut F,
x: &[f64],
d: &[f64],
f0: f64,
slope: f64,
lower: &[Option<f64>],
upper: &[Option<f64>],
c1: f64,
rho: f64,
max_steps: usize,
n_fev: &mut usize,
) -> f64
where
F: FnMut(&ArrayView1<f64>) -> f64,
{
let n = x.len();
let mut alpha = 1.0f64;
for _ in 0..max_steps {
let mut x_trial: Vec<f64> = (0..n).map(|i| x[i] + alpha * d[i]).collect();
project_point(&mut x_trial, lower, upper);
let x_arr = Array1::from(x_trial);
*n_fev += 1;
let f_trial = fun(&x_arr.view());
if f_trial <= f0 + c1 * alpha * slope.abs() {
return alpha;
}
alpha *= rho;
if alpha < 1e-14 {
break;
}
}
alpha
}
fn update_lbfgs_history(
s_vecs: &mut Vec<Vec<f64>>,
y_vecs: &mut Vec<Vec<f64>>,
rho_vals: &mut Vec<f64>,
s: Vec<f64>,
y: Vec<f64>,
sy: f64,
m: usize,
) {
if s_vecs.len() >= m {
s_vecs.remove(0);
y_vecs.remove(0);
rho_vals.remove(0);
}
rho_vals.push(1.0 / sy.max(1e-300));
s_vecs.push(s);
y_vecs.push(y);
}
pub fn minimize_lbfgsb_advanced<F>(
fun: F,
x0: &[f64],
options: Option<LBFGSBOptions>,
) -> Result<OptimizeResult<f64>, OptimizeError>
where
F: FnMut(&ArrayView1<f64>) -> f64,
{
let opts = options.unwrap_or_default();
let solver = LBFGSB::new(opts);
let result = solver.minimize(fun, x0)?;
Ok(OptimizeResult {
x: result.x.clone(),
fun: result.f_val,
nit: result.n_iter,
func_evals: result.n_fev,
nfev: result.n_fev,
success: result.converged,
message: result.message,
jacobian: Some(result.gradient),
hessian: None,
})
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_abs_diff_eq;
fn quadratic(x: &ArrayView1<f64>) -> f64 {
(x[0] - 1.0).powi(2) + 4.0 * (x[1] - 2.0).powi(2)
}
fn rosenbrock(x: &ArrayView1<f64>) -> f64 {
(1.0 - x[0]).powi(2) + 100.0 * (x[1] - x[0].powi(2)).powi(2)
}
#[test]
fn test_projected_gradient_optimality() {
let x = vec![1.0, 2.0];
let g = vec![0.0, 0.0];
let lower = vec![None, None];
let upper = vec![None, None];
let opt = ProjectedGradient::optimality_measure(&x, &g, &lower, &upper);
assert_abs_diff_eq!(opt, 0.0, epsilon = 1e-12);
}
#[test]
fn test_lbfgsb_unconstrained_quadratic() {
let mut opts = LBFGSBOptions::default();
opts.pgtol = 1e-6;
let solver = LBFGSB::new(opts);
let result = solver.minimize(quadratic, &[0.0, 0.0]).expect("L-BFGS-B failed");
assert!(result.converged);
assert_abs_diff_eq!(result.x[0], 1.0, epsilon = 1e-4);
assert_abs_diff_eq!(result.x[1], 2.0, epsilon = 1e-4);
}
#[test]
fn test_lbfgsb_bounded_quadratic() {
let mut opts = LBFGSBOptions::default();
opts.bounds = Some(Bounds::new(&[(None, None), (None, Some(1.0))]));
let solver = LBFGSB::new(opts);
let result = solver.minimize(quadratic, &[0.0, 0.0]).expect("L-BFGS-B failed");
assert!(result.converged || result.n_iter > 0);
assert_abs_diff_eq!(result.x[1], 1.0, epsilon = 0.1);
}
#[test]
fn test_lbfgsb_rosenbrock() {
let mut opts = LBFGSBOptions::default();
opts.max_iter = 500;
opts.pgtol = 1e-4;
let solver = LBFGSB::new(opts);
let result = solver.minimize(rosenbrock, &[0.5, 0.5]).expect("L-BFGS-B failed");
assert!(result.converged);
assert_abs_diff_eq!(result.x[0], 1.0, epsilon = 1e-2);
assert_abs_diff_eq!(result.x[1], 1.0, epsilon = 1e-2);
}
#[test]
fn test_cauchy_point_trivial() {
let x = vec![0.5, 0.5];
let g = vec![1.0, 1.0];
let lower = vec![Some(0.0), Some(0.0)];
let upper = vec![Some(1.0), Some(1.0)];
let (xc, _) = CauchyPoint::compute(&x, &g, &lower, &upper, 1.0, &[], &[]);
assert!(xc[0] >= 0.0);
assert!(xc[1] >= 0.0);
assert!(xc[0] <= 1.0);
assert!(xc[1] <= 1.0);
}
}