Skip to main content

scirs2_optimize/unconstrained/
lbfgsb.rs

1//! L-BFGS-B: Limited-memory BFGS with box constraints
2//!
3//! This module implements the L-BFGS-B algorithm of Byrd, Lu, Nocedal & Zhu (1995).
4//! It extends the standard L-BFGS method to handle box constraints of the form
5//! `lᵢ ≤ xᵢ ≤ uᵢ` (any combination of finite/infinite bounds) through:
6//!
7//! 1. **Cauchy point computation** – piecewise linear minimisation along the
8//!    projected gradient to identify an active set.
9//! 2. **Subspace minimisation** – quadratic minimisation in the free-variable
10//!    subspace using the compact L-BFGS representation.
11//! 3. **Backtracking line search** – projected backtracking that keeps the
12//!    iterate inside the feasible box.
13//!
14//! ## References
15//!
16//! - Byrd, R.H., Lu, P., Nocedal, J., Zhu, C. (1995).
17//!   "A limited memory algorithm for bound constrained optimization."
18//!   *SIAM J. Sci. Comput.* 16(5): 1190–1208.
19//! - Zhu, C., Byrd, R.H., Lu, P., Nocedal, J. (1997). "Algorithm 778: L-BFGS-B".
20//!   *ACM Trans. Math. Softw.* 23(4): 550–560.
21
22use crate::error::OptimizeError;
23use crate::unconstrained::result::OptimizeResult;
24use crate::unconstrained::utils::finite_difference_gradient;
25use crate::unconstrained::Bounds;
26use scirs2_core::ndarray::{Array1, ArrayView1};
27
28// ─── Projected gradient ───────────────────────────────────────────────────────
29
30/// Gradient projection onto a feasible box.
31///
32/// Computes the projected gradient direction used to detect active constraints
33/// and to evaluate optimality of the current iterate.
34pub struct ProjectedGradient;
35
36impl ProjectedGradient {
37    /// Compute the projected gradient at `x` given box bounds.
38    ///
39    /// For each component:
40    /// ```text
41    /// pg[i] = g[i]  if lᵢ < xᵢ < uᵢ  (interior)
42    /// pg[i] = min(g[i], 0)  if xᵢ == lᵢ (at lower bound)
43    /// pg[i] = max(g[i], 0)  if xᵢ == uᵢ (at upper bound)
44    /// ```
45    pub fn project(x: &[f64], g: &[f64], lower: &[Option<f64>], upper: &[Option<f64>]) -> Vec<f64> {
46        let n = x.len();
47        let mut pg = vec![0.0f64; n];
48        for i in 0..n {
49            let at_lb = lower[i].map_or(false, |l| (x[i] - l).abs() < 1e-12);
50            let at_ub = upper[i].map_or(false, |u| (x[i] - u).abs() < 1e-12);
51
52            pg[i] = if at_lb && at_ub {
53                0.0 // pinned between identical bounds
54            } else if at_lb {
55                g[i].min(0.0).abs() * g[i].signum() * (-1.0) // allow only non-negative steps
56                                                             // Simplify: if at lower bound, projected gradient component is min(g,0)
57                                                             // Note: projected gradient of f at lower bound = max(g, 0) (feasible direction = +)
58                                                             // Convention here: projected gradient for optimality measure = x - proj(x - g)
59            } else if at_ub {
60                g[i].max(0.0)
61            } else {
62                g[i]
63            };
64        }
65        pg
66    }
67
68    /// Compute the optimality measure: ‖x - P(x - g)‖_∞
69    ///
70    /// This is the standard bounded optimality measure used in L-BFGS-B convergence tests.
71    pub fn optimality_measure(
72        x: &[f64],
73        g: &[f64],
74        lower: &[Option<f64>],
75        upper: &[Option<f64>],
76    ) -> f64 {
77        let n = x.len();
78        let mut max_val = 0.0f64;
79        for i in 0..n {
80            let x_minus_g = x[i] - g[i];
81            // Project x - g onto [lb, ub]
82            let proj = match (lower[i], upper[i]) {
83                (Some(l), Some(u)) => x_minus_g.max(l).min(u),
84                (Some(l), None) => x_minus_g.max(l),
85                (None, Some(u)) => x_minus_g.min(u),
86                (None, None) => x_minus_g,
87            };
88            let val = (x[i] - proj).abs();
89            if val > max_val {
90                max_val = val;
91            }
92        }
93        max_val
94    }
95}
96
97// ─── Cauchy point ─────────────────────────────────────────────────────────────
98
99/// Generalised Cauchy point computation for L-BFGS-B.
100///
101/// Finds the first local minimiser of the quadratic model along the piecewise
102/// linear path `x(t) = P[x - t g]` (the projected steepest descent path).
103///
104/// Returns `(x_cauchy, active_set)` where `active_set[i]` is `true` if variable
105/// `i` is fixed at its bound at the Cauchy point.
106pub struct CauchyPoint;
107
108impl CauchyPoint {
109    /// Compute the generalised Cauchy point.
110    ///
111    /// # Arguments
112    /// * `x`    – current iterate
113    /// * `g`    – gradient at `x`
114    /// * `lower`, `upper` – box bounds
115    /// * `theta` – scaling factor for the identity in the compact BFGS representation
116    /// * `s_vecs`, `y_vecs` – L-BFGS curvature pairs
117    ///
118    /// # Returns
119    /// `(x_cauchy, free_vars)` where `free_vars[i] == true` iff variable `i` is
120    /// not fixed at a bound at the Cauchy point.
121    #[allow(clippy::too_many_arguments)]
122    pub fn compute(
123        x: &[f64],
124        g: &[f64],
125        lower: &[Option<f64>],
126        upper: &[Option<f64>],
127        theta: f64,
128        s_vecs: &[Vec<f64>],
129        y_vecs: &[Vec<f64>],
130    ) -> (Vec<f64>, Vec<bool>) {
131        let n = x.len();
132
133        // Break-points: for each variable, compute t_i = distance to bound in direction d = -g
134        let mut break_pts: Vec<(f64, usize)> = Vec::with_capacity(n);
135        let d: Vec<f64> = (0..n)
136            .map(|i| {
137                if g[i] < 0.0 {
138                    // Moving in positive direction (step = -g > 0)
139                    upper[i].map_or(f64::INFINITY, |u| (u - x[i]) / (-g[i]).max(1e-300))
140                } else if g[i] > 0.0 {
141                    // Moving in negative direction
142                    lower[i].map_or(f64::INFINITY, |l| (x[i] - l) / g[i].max(1e-300))
143                } else {
144                    f64::INFINITY
145                }
146            })
147            .collect();
148
149        for (i, &ti) in d.iter().enumerate() {
150            if ti.is_finite() && ti >= 0.0 {
151                break_pts.push((ti, i));
152            }
153        }
154        break_pts.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
155
156        // Initialise at x, c accumulates displacements along -g
157        let mut xc = x.to_vec();
158        let mut active = vec![false; n];
159        let mut t_prev = 0.0;
160
161        // For each segment between consecutive break-points, check if the
162        // quadratic minimiser is in that segment.
163        // Compact L-BFGS quadratic: m(p) ≈ gᵀp + ½ θ pᵀp - pᵀ W M Wᵀ p
164        // We use a simplified version: θ I as the curvature (first-order correction only)
165        // and include the two-loop L-BFGS correction in the full subspace min.
166
167        // Derivative of the restricted quadratic along the projected steepest-descent path
168        // fp = gᵀ d + θ ‖d‖² − (L-BFGS correction term)
169        // For the Cauchy point we use the θ-scaled model (ignoring curvature pairs for
170        // simplicity of the break-point minimisation; curvature pairs are used in subspace min).
171
172        for (bp, fixed_var) in &break_pts {
173            let t = *bp;
174            let dt = t - t_prev;
175
176            // Derivative of the quadratic model along the piecewise linear path
177            // in the current segment: f'(t) = gᵀ(-g) + θ t ‖g‖² (simplified)
178            let g_dot_d: f64 = (0..n).filter(|&i| !active[i]).map(|i| g[i] * (-g[i])).sum();
179            let d_hd: f64 = {
180                // Approximate Hv using L-BFGS: H ≈ θ I + corrections
181                // For the break-point step, use θ I only
182                let g_free_sq: f64 = (0..n).filter(|&i| !active[i]).map(|i| g[i] * g[i]).sum();
183                theta * g_free_sq + l_bfgs_correction_scalar(g, s_vecs, y_vecs, theta, &active)
184            };
185
186            if d_hd < 1e-300 {
187                // Zero curvature — just take a step to the next break-point
188            } else {
189                // Minimum of quadratic in this segment
190                let t_min = t_prev - g_dot_d / d_hd;
191                if t_min <= t {
192                    // Minimiser is inside this segment
193                    let t_star = t_min.max(t_prev);
194                    // Update xc
195                    for i in 0..n {
196                        if !active[i] {
197                            xc[i] = x[i] - g[i] * t_star;
198                            // Project
199                            if let Some(l) = lower[i] {
200                                xc[i] = xc[i].max(l);
201                            }
202                            if let Some(u) = upper[i] {
203                                xc[i] = xc[i].min(u);
204                            }
205                        }
206                    }
207                    // Clamp to break-points that were already passed
208                    for &(bt, bi) in break_pts.iter().filter(|(bt, _)| *bt <= t_star) {
209                        let _ = bt;
210                        active[bi] = true;
211                        xc[bi] = if g[bi] > 0.0 {
212                            lower[bi].unwrap_or(x[bi])
213                        } else {
214                            upper[bi].unwrap_or(x[bi])
215                        };
216                    }
217                    let free = active.iter().map(|a| !a).collect();
218                    return (xc, free);
219                }
220            }
221
222            // Fix variable at bound
223            active[*fixed_var] = true;
224            xc[*fixed_var] = if g[*fixed_var] > 0.0 {
225                lower[*fixed_var].unwrap_or(x[*fixed_var])
226            } else {
227                upper[*fixed_var].unwrap_or(x[*fixed_var])
228            };
229
230            let _ = dt;
231            t_prev = t;
232        }
233
234        // All break-points exhausted; update remaining free variables
235        for i in 0..n {
236            if !active[i] {
237                xc[i] = x[i] - g[i] * t_prev;
238                if let Some(l) = lower[i] {
239                    xc[i] = xc[i].max(l);
240                }
241                if let Some(u) = upper[i] {
242                    xc[i] = xc[i].min(u);
243                }
244            }
245        }
246
247        let free = active.iter().map(|a| !a).collect();
248        (xc, free)
249    }
250}
251
252/// Scalar L-BFGS curvature correction for the Cauchy point.
253fn l_bfgs_correction_scalar(
254    g: &[f64],
255    s_vecs: &[Vec<f64>],
256    y_vecs: &[Vec<f64>],
257    theta: f64,
258    active: &[bool],
259) -> f64 {
260    if s_vecs.is_empty() {
261        return 0.0;
262    }
263    let n = g.len();
264    let m = s_vecs.len().min(y_vecs.len());
265
266    // Two-loop recursion to get H_inv g and then compute correction
267    // For the Cauchy point scalar we just use a simple diagonal approximation
268    let mut h_diag = 1.0 / theta;
269    if let (Some(s_last), Some(y_last)) = (s_vecs.last(), y_vecs.last()) {
270        let sy: f64 = s_last.iter().zip(y_last.iter()).map(|(s, y)| s * y).sum();
271        let yy: f64 = y_last.iter().map(|y| y * y).sum();
272        if sy > 0.0 && yy > 0.0 {
273            h_diag = sy / yy;
274        }
275    }
276
277    // Approximate gᵀ H⁻¹ g for free variables (diagonal approx)
278    let g_free_sq: f64 = (0..n).filter(|&i| !active[i]).map(|i| g[i] * g[i]).sum();
279    let _ = m;
280    g_free_sq * (1.0 / h_diag - theta) * 0.5
281}
282
283// ─── Subspace minimisation ────────────────────────────────────────────────────
284
285/// Direct subspace minimisation phase for L-BFGS-B.
286///
287/// After the Cauchy point is computed, minimises the quadratic model over the
288/// free-variable subspace (variables not fixed at bounds) using the compact
289/// L-BFGS representation of the Hessian.
290pub struct SubspaceMinimization;
291
292impl SubspaceMinimization {
293    /// Perform subspace minimisation.
294    ///
295    /// # Arguments
296    /// * `x_cauchy` – the Cauchy point
297    /// * `x`        – current iterate
298    /// * `g`        – gradient at `x`
299    /// * `free`     – boolean mask: `true` if variable is free
300    /// * `lower`, `upper` – box bounds
301    /// * `theta`    – L-BFGS scaling factor
302    /// * `s_vecs`, `y_vecs`, `rho_vals` – L-BFGS history
303    ///
304    /// # Returns
305    /// Updated point after subspace minimisation (projected onto box).
306    #[allow(clippy::too_many_arguments)]
307    pub fn minimize(
308        x_cauchy: &[f64],
309        x: &[f64],
310        g: &[f64],
311        free: &[bool],
312        lower: &[Option<f64>],
313        upper: &[Option<f64>],
314        theta: f64,
315        s_vecs: &[Vec<f64>],
316        y_vecs: &[Vec<f64>],
317        rho_vals: &[f64],
318    ) -> Vec<f64> {
319        let n = x.len();
320        let free_indices: Vec<usize> = (0..n).filter(|&i| free[i]).collect();
321        let n_free = free_indices.len();
322
323        if n_free == 0 {
324            return x_cauchy.to_vec();
325        }
326
327        // Extract free-variable gradient and displacement from Cauchy point
328        let r_free: Vec<f64> = free_indices
329            .iter()
330            .map(|&i| {
331                // Reduced gradient: gradient of quadratic at x_cauchy restricted to free vars
332                // r = g + H (x_cauchy - x)  projected to free vars
333                // Approximate with the L-BFGS product
334                g[i]
335            })
336            .collect();
337
338        // Compute H * (x_cauchy - x) for free variables using L-BFGS two-loop
339        let delta: Vec<f64> = (0..n).map(|i| x_cauchy[i] - x[i]).collect();
340        let h_delta = l_bfgs_product(&delta, s_vecs, y_vecs, rho_vals, theta);
341
342        // Reduced gradient at x_cauchy: r_c = g + H * delta (free vars)
343        let r_c: Vec<f64> = free_indices
344            .iter()
345            .map(|&i| r_free[free_indices.iter().position(|&j| j == i).unwrap_or(0)] + h_delta[i])
346            .collect();
347
348        // Compute the reduced Hessian vector product direction = -H_free^{-1} r_c
349        // Use L-BFGS two-loop on the free-variable subspace
350        let mut r_full = vec![0.0f64; n];
351        for (k, &fi) in free_indices.iter().enumerate() {
352            r_full[fi] = r_c[k];
353        }
354        let step_full = l_bfgs_two_loop_neg(&r_full, s_vecs, y_vecs, rho_vals, theta);
355
356        // Only keep free-variable components of the step
357        let mut x_new = x_cauchy.to_vec();
358        for &fi in &free_indices {
359            x_new[fi] += step_full[fi];
360            // Project
361            if let Some(l) = lower[fi] {
362                x_new[fi] = x_new[fi].max(l);
363            }
364            if let Some(u) = upper[fi] {
365                x_new[fi] = x_new[fi].min(u);
366            }
367        }
368
369        x_new
370    }
371}
372
373/// L-BFGS two-loop recursion: returns -H^{-1} q  (the negative product).
374fn l_bfgs_two_loop_neg(
375    q_in: &[f64],
376    s_vecs: &[Vec<f64>],
377    y_vecs: &[Vec<f64>],
378    rho_vals: &[f64],
379    theta: f64,
380) -> Vec<f64> {
381    let n = q_in.len();
382    let m = s_vecs.len().min(y_vecs.len()).min(rho_vals.len());
383    let mut q = q_in.to_vec();
384    let mut alphas = vec![0.0f64; m];
385
386    // First loop (most recent to oldest)
387    for k in (0..m).rev() {
388        let s = &s_vecs[k];
389        let alpha = rho_vals[k] * dot(s, &q);
390        alphas[k] = alpha;
391        let y = &y_vecs[k];
392        for i in 0..n {
393            q[i] -= alpha * y[i];
394        }
395    }
396
397    // Scale by initial Hessian approximation (theta * I inverse = 1/theta * I)
398    let h0 = if m > 0 {
399        let sy: f64 = dot(&s_vecs[m - 1], &y_vecs[m - 1]);
400        let yy: f64 = dot(&y_vecs[m - 1], &y_vecs[m - 1]);
401        if yy > 1e-300 {
402            sy / yy
403        } else {
404            1.0 / theta
405        }
406    } else {
407        1.0 / theta
408    };
409    let mut r: Vec<f64> = q.iter().map(|qi| h0 * qi).collect();
410
411    // Second loop (oldest to most recent)
412    for k in 0..m {
413        let y = &y_vecs[k];
414        let beta = rho_vals[k] * dot(y, &r);
415        let s = &s_vecs[k];
416        for i in 0..n {
417            r[i] += s[i] * (alphas[k] - beta);
418        }
419    }
420
421    // Return negative (we want -H^{-1} q)
422    r.iter().map(|ri| -ri).collect()
423}
424
425/// L-BFGS matrix-vector product: returns H * v using curvature pairs.
426fn l_bfgs_product(
427    v: &[f64],
428    s_vecs: &[Vec<f64>],
429    y_vecs: &[Vec<f64>],
430    rho_vals: &[f64],
431    theta: f64,
432) -> Vec<f64> {
433    // H * v ≈ θ v + corrections via BFGS recursion on inverse then invert
434    // For simplicity we use a single-pass scaling + skew correction
435    let n = v.len();
436    let m = s_vecs.len().min(y_vecs.len()).min(rho_vals.len());
437
438    // Start with θ v
439    let mut result: Vec<f64> = v.iter().map(|vi| theta * vi).collect();
440
441    // Apply rank-2 corrections: H = θI + Σ (rho_k y_k y_kᵀ - θ s_k s_kᵀ) approximately
442    // This is a first-order correction
443    for k in 0..m {
444        let s = &s_vecs[k];
445        let y = &y_vecs[k];
446        let sy: f64 = dot(s, y);
447        if sy.abs() < 1e-300 {
448            continue;
449        }
450        let sv: f64 = dot(s, v);
451        let yv: f64 = dot(y, v);
452        let rho = rho_vals[k];
453        // Δ H v = rho (yy^T - sy (ss^T / sy)) v  [simplified rank-2 correction]
454        for i in 0..n {
455            result[i] += rho * y[i] * yv - theta * rho * sy * s[i] * sv / sy.max(1e-300);
456        }
457    }
458    result
459}
460
461#[inline]
462fn dot(a: &[f64], b: &[f64]) -> f64 {
463    a.iter().zip(b.iter()).map(|(ai, bi)| ai * bi).sum()
464}
465
466// ─── LBFGSB result ────────────────────────────────────────────────────────────
467
468/// Result of the L-BFGS-B optimisation.
469#[derive(Debug, Clone)]
470pub struct LBFGSBResult {
471    /// Solution vector
472    pub x: Array1<f64>,
473    /// Objective function value at solution
474    pub f_val: f64,
475    /// Gradient at solution
476    pub gradient: Array1<f64>,
477    /// Projected gradient norm at solution (optimality measure)
478    pub proj_grad_norm: f64,
479    /// Number of outer iterations
480    pub n_iter: usize,
481    /// Number of function evaluations
482    pub n_fev: usize,
483    /// Whether the algorithm converged
484    pub converged: bool,
485    /// Termination message
486    pub message: String,
487}
488
489// ─── L-BFGS-B solver ─────────────────────────────────────────────────────────
490
491/// Options for the L-BFGS-B solver.
492#[derive(Debug, Clone)]
493pub struct LBFGSBOptions {
494    /// Number of L-BFGS history pairs to retain (memory parameter `m`)
495    pub memory: usize,
496    /// Projected gradient tolerance (convergence criterion)
497    pub pgtol: f64,
498    /// Function value tolerance (factr * machine_eps)
499    pub factr: f64,
500    /// Finite-difference step for gradient computation
501    pub eps: f64,
502    /// Maximum outer iterations
503    pub max_iter: usize,
504    /// Maximum backtracking steps in line search
505    pub max_ls_steps: usize,
506    /// Armijo sufficient-decrease constant
507    pub c1: f64,
508    /// Backtracking reduction factor
509    pub backtrack_rho: f64,
510    /// Box bounds (required for the B in L-BFGS-B)
511    pub bounds: Option<Bounds>,
512}
513
514impl Default for LBFGSBOptions {
515    fn default() -> Self {
516        Self {
517            memory: 10,
518            pgtol: 1e-5,
519            factr: 1e7,
520            eps: f64::EPSILON.sqrt(),
521            max_iter: 500,
522            max_ls_steps: 30,
523            c1: 1e-4,
524            backtrack_rho: 0.5,
525            bounds: None,
526        }
527    }
528}
529
530/// L-BFGS-B solver for box-constrained optimisation.
531pub struct LBFGSB {
532    /// Solver options
533    pub options: LBFGSBOptions,
534}
535
536impl LBFGSB {
537    /// Create with given options.
538    pub fn new(options: LBFGSBOptions) -> Self {
539        Self { options }
540    }
541
542    /// Create with default options.
543    pub fn default_solver() -> Self {
544        Self {
545            options: LBFGSBOptions::default(),
546        }
547    }
548
549    /// Minimise a function subject to box constraints.
550    ///
551    /// # Arguments
552    /// * `fun` – objective function
553    /// * `x0`  – initial iterate (will be projected onto the box)
554    pub fn minimize<F>(&self, mut fun: F, x0: &[f64]) -> Result<LBFGSBResult, OptimizeError>
555    where
556        F: FnMut(&ArrayView1<f64>) -> f64,
557    {
558        let opts = &self.options;
559        let n = x0.len();
560        let m = opts.memory;
561        let machine_eps = f64::EPSILON;
562        let ftol = opts.factr * machine_eps;
563
564        // Extract bound arrays (replicated for lifetime)
565        let (lower, upper) = opts
566            .bounds
567            .as_ref()
568            .map(|b| (b.lower.clone(), b.upper.clone()))
569            .unwrap_or_else(|| (vec![None; n], vec![None; n]));
570
571        // Initialise iterate (projected)
572        let mut x: Vec<f64> = x0.to_vec();
573        project_point(&mut x, &lower, &upper);
574
575        let mut n_fev = 0usize;
576
577        let x_arr = Array1::from(x.clone());
578        let mut f = {
579            n_fev += 1;
580            fun(&x_arr.view())
581        };
582        let mut g = {
583            let xa = Array1::from(x.clone());
584            let grad = finite_difference_gradient(&mut fun, &xa.view(), opts.eps)?;
585            n_fev += 2 * n;
586            grad.to_vec()
587        };
588
589        // L-BFGS history
590        let mut s_vecs: Vec<Vec<f64>> = Vec::with_capacity(m);
591        let mut y_vecs: Vec<Vec<f64>> = Vec::with_capacity(m);
592        let mut rho_vals: Vec<f64> = Vec::with_capacity(m);
593
594        // theta: scaling for the L-BFGS Hessian approximation
595        let mut theta = 1.0f64;
596
597        let mut iter = 0usize;
598        let mut converged = false;
599        let mut message = "Maximum iterations reached".to_string();
600
601        loop {
602            // Optimality check using the projected gradient norm
603            let pg_norm = ProjectedGradient::optimality_measure(&x, &g, &lower, &upper);
604            if pg_norm <= opts.pgtol {
605                converged = true;
606                message = "Projected gradient norm below tolerance".to_string();
607                break;
608            }
609
610            if iter >= opts.max_iter {
611                break;
612            }
613
614            // 1. Compute generalised Cauchy point
615            let (x_cauchy, free) =
616                CauchyPoint::compute(&x, &g, &lower, &upper, theta, &s_vecs, &y_vecs);
617
618            // 2. Subspace minimisation
619            let x_bar = SubspaceMinimization::minimize(
620                &x_cauchy, &x, &g, &free, &lower, &upper, theta, &s_vecs, &y_vecs, &rho_vals,
621            );
622
623            // 3. Compute descent direction and do a projected line search
624            let d: Vec<f64> = (0..n).map(|i| x_bar[i] - x[i]).collect();
625
626            // Check that d is a descent direction
627            let slope: f64 = dot(&g, &d);
628            if slope >= 0.0 {
629                // Fall back to projected gradient direction
630                let pg: Vec<f64> = (0..n)
631                    .map(|i| {
632                        let at_lb = lower[i].map_or(false, |l| (x[i] - l).abs() < 1e-12);
633                        let at_ub = upper[i].map_or(false, |u| (x[i] - u).abs() < 1e-12);
634                        if at_lb && g[i] > 0.0 {
635                            0.0
636                        } else if at_ub && g[i] < 0.0 {
637                            0.0
638                        } else {
639                            -g[i]
640                        }
641                    })
642                    .collect();
643                let pg_norm_inner = dot(&pg, &pg).sqrt();
644                if pg_norm_inner < 1e-12 {
645                    converged = true;
646                    message = "Projected gradient is zero, at constrained optimum".to_string();
647                    break;
648                }
649                // Take a small step along the negative projected gradient
650                let alpha = projected_backtrack(
651                    &mut fun,
652                    &x,
653                    &pg,
654                    f,
655                    -dot(&g, &pg),
656                    &lower,
657                    &upper,
658                    opts.c1,
659                    opts.backtrack_rho,
660                    opts.max_ls_steps,
661                    &mut n_fev,
662                );
663                let mut x_new: Vec<f64> = (0..n).map(|i| x[i] + alpha * pg[i]).collect();
664                project_point(&mut x_new, &lower, &upper);
665
666                let x_new_arr = Array1::from(x_new.clone());
667                let f_new = {
668                    n_fev += 1;
669                    fun(&x_new_arr.view())
670                };
671
672                let s: Vec<f64> = (0..n).map(|i| x_new[i] - x[i]).collect();
673                let g_new = {
674                    let xa = Array1::from(x_new.clone());
675                    let grad = finite_difference_gradient(&mut fun, &xa.view(), opts.eps)?;
676                    n_fev += 2 * n;
677                    grad.to_vec()
678                };
679                let y: Vec<f64> = (0..n).map(|i| g_new[i] - g[i]).collect();
680                let sy = dot(&s, &y);
681                if sy > 1e-10 {
682                    update_lbfgs_history(&mut s_vecs, &mut y_vecs, &mut rho_vals, s, y, sy, m);
683                    theta = dot(
684                        &y_vecs.last().expect("unexpected None or Err"),
685                        &y_vecs.last().expect("unexpected None or Err"),
686                    ) / dot(
687                        &s_vecs.last().expect("unexpected None or Err"),
688                        &y_vecs.last().expect("unexpected None or Err"),
689                    )
690                    .max(1e-300);
691                }
692
693                // Check function-value convergence
694                if (f - f_new).abs() < ftol * (1.0 + f.abs()) {
695                    x = x_new;
696                    f = f_new;
697                    g = g_new;
698                    converged = true;
699                    message = "Function value change below tolerance".to_string();
700                    break;
701                }
702
703                x = x_new;
704                f = f_new;
705                g = g_new;
706                iter += 1;
707                continue;
708            }
709
710            // Projected backtracking line search
711            let alpha = projected_backtrack(
712                &mut fun,
713                &x,
714                &d,
715                f,
716                slope,
717                &lower,
718                &upper,
719                opts.c1,
720                opts.backtrack_rho,
721                opts.max_ls_steps,
722                &mut n_fev,
723            );
724
725            let mut x_new: Vec<f64> = (0..n).map(|i| x[i] + alpha * d[i]).collect();
726            project_point(&mut x_new, &lower, &upper);
727
728            let x_new_arr = Array1::from(x_new.clone());
729            let f_new = {
730                n_fev += 1;
731                fun(&x_new_arr.view())
732            };
733
734            // Update L-BFGS curvature pairs
735            let s: Vec<f64> = (0..n).map(|i| x_new[i] - x[i]).collect();
736            let g_new = {
737                let xa = Array1::from(x_new.clone());
738                let grad = finite_difference_gradient(&mut fun, &xa.view(), opts.eps)?;
739                n_fev += 2 * n;
740                grad.to_vec()
741            };
742            let y: Vec<f64> = (0..n).map(|i| g_new[i] - g[i]).collect();
743
744            let sy = dot(&s, &y);
745            if sy > machine_eps * dot(&y, &y) {
746                update_lbfgs_history(&mut s_vecs, &mut y_vecs, &mut rho_vals, s, y, sy, m);
747                // Update theta: θ = yᵀy / sᵀy
748                if let (Some(y_last), Some(s_last)) = (y_vecs.last(), s_vecs.last()) {
749                    let yy = dot(y_last, y_last);
750                    let sy_last = dot(s_last, y_last);
751                    if sy_last > 1e-300 {
752                        theta = yy / sy_last;
753                    }
754                }
755            }
756
757            // Check function-value convergence
758            if (f - f_new).abs() < ftol * (1.0 + f.abs()) {
759                x = x_new;
760                f = f_new;
761                g = g_new;
762                converged = true;
763                message = "Function value change below tolerance".to_string();
764                break;
765            }
766
767            x = x_new;
768            f = f_new;
769            g = g_new;
770            iter += 1;
771        }
772
773        let pg_final = ProjectedGradient::optimality_measure(&x, &g, &lower, &upper);
774
775        Ok(LBFGSBResult {
776            x: Array1::from(x),
777            f_val: f,
778            gradient: Array1::from(g),
779            proj_grad_norm: pg_final,
780            n_iter: iter,
781            n_fev,
782            converged,
783            message,
784        })
785    }
786}
787
788// ─── Helper functions ─────────────────────────────────────────────────────────
789
790/// Project a point onto the box defined by `lower` and `upper`.
791fn project_point(x: &mut Vec<f64>, lower: &[Option<f64>], upper: &[Option<f64>]) {
792    for i in 0..x.len() {
793        if let Some(l) = lower[i] {
794            if x[i] < l {
795                x[i] = l;
796            }
797        }
798        if let Some(u) = upper[i] {
799            if x[i] > u {
800                x[i] = u;
801            }
802        }
803    }
804}
805
806/// Projected backtracking line search.
807fn projected_backtrack<F>(
808    fun: &mut F,
809    x: &[f64],
810    d: &[f64],
811    f0: f64,
812    slope: f64,
813    lower: &[Option<f64>],
814    upper: &[Option<f64>],
815    c1: f64,
816    rho: f64,
817    max_steps: usize,
818    n_fev: &mut usize,
819) -> f64
820where
821    F: FnMut(&ArrayView1<f64>) -> f64,
822{
823    let n = x.len();
824    let mut alpha = 1.0f64;
825
826    for _ in 0..max_steps {
827        let mut x_trial: Vec<f64> = (0..n).map(|i| x[i] + alpha * d[i]).collect();
828        project_point(&mut x_trial, lower, upper);
829        let x_arr = Array1::from(x_trial);
830        *n_fev += 1;
831        let f_trial = fun(&x_arr.view());
832
833        if f_trial <= f0 + c1 * alpha * slope.abs() {
834            return alpha;
835        }
836        alpha *= rho;
837        if alpha < 1e-14 {
838            break;
839        }
840    }
841    alpha
842}
843
844/// Update L-BFGS history with a new (s, y) pair, evicting oldest if full.
845fn update_lbfgs_history(
846    s_vecs: &mut Vec<Vec<f64>>,
847    y_vecs: &mut Vec<Vec<f64>>,
848    rho_vals: &mut Vec<f64>,
849    s: Vec<f64>,
850    y: Vec<f64>,
851    sy: f64,
852    m: usize,
853) {
854    if s_vecs.len() >= m {
855        s_vecs.remove(0);
856        y_vecs.remove(0);
857        rho_vals.remove(0);
858    }
859    rho_vals.push(1.0 / sy.max(1e-300));
860    s_vecs.push(s);
861    y_vecs.push(y);
862}
863
864/// Wrapper for the standard `OptimizeResult` API.
865pub fn minimize_lbfgsb_advanced<F>(
866    fun: F,
867    x0: &[f64],
868    options: Option<LBFGSBOptions>,
869) -> Result<OptimizeResult<f64>, OptimizeError>
870where
871    F: FnMut(&ArrayView1<f64>) -> f64,
872{
873    let opts = options.unwrap_or_default();
874    let solver = LBFGSB::new(opts);
875    let result = solver.minimize(fun, x0)?;
876
877    Ok(OptimizeResult {
878        x: result.x.clone(),
879        fun: result.f_val,
880        nit: result.n_iter,
881        func_evals: result.n_fev,
882        nfev: result.n_fev,
883        success: result.converged,
884        message: result.message,
885        jacobian: Some(result.gradient),
886        hessian: None,
887    })
888}
889
890// ─── Tests ────────────────────────────────────────────────────────────────────
891
892#[cfg(test)]
893mod tests {
894    use super::*;
895    use approx::assert_abs_diff_eq;
896
897    fn quadratic(x: &ArrayView1<f64>) -> f64 {
898        (x[0] - 1.0).powi(2) + 4.0 * (x[1] - 2.0).powi(2)
899    }
900
901    fn rosenbrock(x: &ArrayView1<f64>) -> f64 {
902        (1.0 - x[0]).powi(2) + 100.0 * (x[1] - x[0].powi(2)).powi(2)
903    }
904
905    #[test]
906    fn test_projected_gradient_optimality() {
907        // At (1.0, 2.0) the gradient of our quadratic is (0, 0) → optimality = 0
908        let x = vec![1.0, 2.0];
909        let g = vec![0.0, 0.0];
910        let lower = vec![None, None];
911        let upper = vec![None, None];
912        let opt = ProjectedGradient::optimality_measure(&x, &g, &lower, &upper);
913        assert_abs_diff_eq!(opt, 0.0, epsilon = 1e-12);
914    }
915
916    #[test]
917    fn test_lbfgsb_unconstrained_quadratic() {
918        let mut opts = LBFGSBOptions::default();
919        opts.pgtol = 1e-6;
920        let solver = LBFGSB::new(opts);
921        let result = solver
922            .minimize(quadratic, &[0.0, 0.0])
923            .expect("L-BFGS-B failed");
924        assert!(result.converged);
925        assert_abs_diff_eq!(result.x[0], 1.0, epsilon = 1e-4);
926        assert_abs_diff_eq!(result.x[1], 2.0, epsilon = 1e-4);
927    }
928
929    #[test]
930    fn test_lbfgsb_bounded_quadratic() {
931        // Minimum of (x-1)² + 4(y-2)² is at (1,2), but constrain y ≤ 1.0
932        let mut opts = LBFGSBOptions::default();
933        opts.bounds = Some(Bounds::new(&[(None, None), (None, Some(1.0))]));
934        let solver = LBFGSB::new(opts);
935        let result = solver
936            .minimize(quadratic, &[0.0, 0.0])
937            .expect("L-BFGS-B failed");
938        assert!(result.converged || result.n_iter > 0);
939        assert_abs_diff_eq!(result.x[1], 1.0, epsilon = 0.1);
940    }
941
942    #[test]
943    fn test_lbfgsb_rosenbrock() {
944        let mut opts = LBFGSBOptions::default();
945        opts.max_iter = 500;
946        opts.pgtol = 1e-4;
947        let solver = LBFGSB::new(opts);
948        let result = solver
949            .minimize(rosenbrock, &[0.5, 0.5])
950            .expect("L-BFGS-B failed");
951        assert!(result.converged);
952        assert_abs_diff_eq!(result.x[0], 1.0, epsilon = 1e-2);
953        assert_abs_diff_eq!(result.x[1], 1.0, epsilon = 1e-2);
954    }
955
956    #[test]
957    fn test_cauchy_point_trivial() {
958        let x = vec![0.5, 0.5];
959        let g = vec![1.0, 1.0];
960        let lower = vec![Some(0.0), Some(0.0)];
961        let upper = vec![Some(1.0), Some(1.0)];
962        let (xc, _) = CauchyPoint::compute(&x, &g, &lower, &upper, 1.0, &[], &[]);
963        // With g = [1,1] > 0, Cauchy moves toward lower bounds
964        assert!(xc[0] >= 0.0);
965        assert!(xc[1] >= 0.0);
966        assert!(xc[0] <= 1.0);
967        assert!(xc[1] <= 1.0);
968    }
969}