Skip to main content

numra_optim/
lbfgsb.rs

1//! L-BFGS-B: bound-constrained limited-memory BFGS.
2//!
3//! Implements a projected L-BFGS method for minimization with simple box
4//! constraints on variables: `lo_i <= x_i <= hi_i`.
5//!
6//! Author: Moussa Leblouba
7//! Date: 8 February 2026
8//! Modified: 2 May 2026
9
10use numra_core::Scalar;
11
12use crate::error::OptimError;
13use crate::lbfgs::two_loop_recursion;
14use crate::types::{IterationRecord, OptimOptions, OptimResult, OptimStatus};
15use std::collections::VecDeque;
16
17/// Options for L-BFGS-B.
18#[derive(Clone, Debug)]
19pub struct LbfgsBOptions<S: Scalar> {
20    pub base: OptimOptions<S>,
21    pub memory: usize,
22}
23
24impl<S: Scalar> Default for LbfgsBOptions<S> {
25    fn default() -> Self {
26        Self {
27            base: OptimOptions::default(),
28            memory: 10,
29        }
30    }
31}
32
33impl<S: Scalar> LbfgsBOptions<S> {
34    pub fn memory(mut self, m: usize) -> Self {
35        self.memory = m;
36        self
37    }
38    pub fn max_iter(mut self, n: usize) -> Self {
39        self.base.max_iter = n;
40        self
41    }
42    pub fn gtol(mut self, tol: S) -> Self {
43        self.base.gtol = tol;
44        self
45    }
46}
47
48/// Project `x` onto the feasible box defined by `bounds`.
49pub fn project<S: Scalar>(x: &mut [S], bounds: &[Option<(S, S)>]) {
50    for (xi, bi) in x.iter_mut().zip(bounds.iter()) {
51        if let Some((lo, hi)) = bi {
52            *xi = xi.clamp(*lo, *hi);
53        }
54    }
55}
56
57/// Compute the projected gradient norm: ||P(x - g) - x||_inf.
58pub fn projected_gradient_norm<S: Scalar>(x: &[S], g: &[S], bounds: &[Option<(S, S)>]) -> S {
59    let mut norm = S::ZERO;
60    for i in 0..x.len() {
61        let mut xi_step = x[i] - g[i];
62        if let Some((lo, hi)) = bounds[i] {
63            xi_step = xi_step.clamp(lo, hi);
64        }
65        norm = norm.max((xi_step - x[i]).abs());
66    }
67    norm
68}
69
70/// Minimize `f` subject to box constraints using projected L-BFGS.
71///
72/// Uses the L-BFGS two-loop recursion for search direction and
73/// projected Armijo backtracking for step acceptance.
74pub fn lbfgsb_minimize<S: Scalar, F, G>(
75    f: F,
76    grad: G,
77    x0: &[S],
78    bounds: &[Option<(S, S)>],
79    opts: &LbfgsBOptions<S>,
80) -> Result<OptimResult<S>, OptimError>
81where
82    F: Fn(&[S]) -> S,
83    G: Fn(&[S], &mut [S]),
84{
85    let start = std::time::Instant::now();
86    let n = x0.len();
87    if n != bounds.len() {
88        return Err(OptimError::DimensionMismatch {
89            expected: n,
90            actual: bounds.len(),
91        });
92    }
93
94    let m = opts.memory;
95    let mut x = x0.to_vec();
96    project(&mut x, bounds);
97
98    let mut g_buf = vec![S::ZERO; n];
99    let mut fval = f(&x);
100    grad(&x, &mut g_buf);
101    let mut n_feval = 1_usize;
102    let mut n_geval = 1_usize;
103
104    let mut s_hist: VecDeque<Vec<S>> = VecDeque::with_capacity(m);
105    let mut y_hist: VecDeque<Vec<S>> = VecDeque::with_capacity(m);
106    let mut rho_hist: VecDeque<S> = VecDeque::with_capacity(m);
107
108    let c1 = S::from_f64(1e-4);
109    let mut f_prev;
110    let mut history = Vec::new();
111
112    for iter in 0..opts.base.max_iter {
113        f_prev = fval;
114        let pg_norm = projected_gradient_norm(&x, &g_buf, bounds);
115        if pg_norm < opts.base.gtol {
116            let active = active_bounds_at(&x, bounds);
117            return Ok((OptimResult {
118                active_bounds: active,
119                history,
120                ..OptimResult::unconstrained(
121                    x,
122                    fval,
123                    g_buf,
124                    iter,
125                    n_feval,
126                    n_geval,
127                    true,
128                    format!(
129                        "Converged: projected gradient norm {:.2e}",
130                        pg_norm.to_f64()
131                    ),
132                    OptimStatus::GradientConverged,
133                )
134            })
135            .with_wall_time(start));
136        }
137
138        // Compute L-BFGS direction d = -H*g (already negated by two_loop_recursion)
139        let d = two_loop_recursion::<S>(&g_buf, &s_hist, &y_hist, &rho_hist);
140
141        // Projected backtracking line search along d
142        // Find alpha such that f(P(x + alpha*d)) <= f(x) + c1 * g^T * (P(x+alpha*d) - x)
143        let mut alpha = S::ONE;
144        let mut x_new = vec![S::ZERO; n];
145        let mut f_new;
146        let mut accepted = false;
147
148        for _ls in 0..30 {
149            for i in 0..n {
150                x_new[i] = x[i] + alpha * d[i];
151            }
152            project(&mut x_new, bounds);
153
154            f_new = f(&x_new);
155            n_feval += 1;
156
157            // Directional derivative along projected step
158            let dg: S = g_buf
159                .iter()
160                .zip(x_new.iter().zip(x.iter()))
161                .map(|(gi, (xn, xo))| *gi * (*xn - *xo))
162                .sum();
163
164            if f_new <= fval + c1 * dg {
165                accepted = true;
166                fval = f_new;
167                break;
168            }
169
170            alpha *= S::from_f64(0.5);
171        }
172
173        if !accepted {
174            // Fall back to projected gradient step with backtracking
175            alpha = S::ONE;
176            for _ls in 0..30 {
177                for i in 0..n {
178                    x_new[i] = x[i] - alpha * g_buf[i];
179                }
180                project(&mut x_new, bounds);
181
182                f_new = f(&x_new);
183                n_feval += 1;
184
185                let dg: S = g_buf
186                    .iter()
187                    .zip(x_new.iter().zip(x.iter()))
188                    .map(|(gi, (xn, xo))| *gi * (*xn - *xo))
189                    .sum();
190
191                if f_new <= fval + c1 * dg {
192                    fval = f_new;
193                    accepted = true;
194                    break;
195                }
196                alpha *= S::from_f64(0.5);
197            }
198
199            if !accepted {
200                // Take the smallest projected gradient step anyway
201                fval = f(&x_new);
202                n_feval += 1;
203            }
204        }
205
206        // Compute step
207        let s: Vec<S> = x_new.iter().zip(x.iter()).map(|(a, b)| *a - *b).collect();
208        let s_norm: S = s.iter().copied().map(|si| si * si).sum::<S>().sqrt();
209
210        history.push(IterationRecord {
211            iteration: iter,
212            objective: fval,
213            gradient_norm: pg_norm,
214            step_size: alpha,
215            constraint_violation: S::ZERO,
216        });
217
218        // Check step convergence
219        let x_norm: S = x.iter().copied().map(|xi| xi * xi).sum::<S>().sqrt();
220        if s_norm < opts.base.xtol * (S::ONE + x_norm) {
221            let active = active_bounds_at(&x_new, bounds);
222            grad(&x_new, &mut g_buf);
223            n_geval += 1;
224            return Ok((OptimResult {
225                active_bounds: active,
226                history,
227                ..OptimResult::unconstrained(
228                    x_new,
229                    fval,
230                    g_buf,
231                    iter + 1,
232                    n_feval,
233                    n_geval,
234                    true,
235                    "step size below tolerance".into(),
236                    OptimStatus::StepConverged,
237                )
238            })
239            .with_wall_time(start));
240        }
241
242        // Check function convergence
243        let f_change = (f_prev - fval).abs();
244        if f_change < opts.base.ftol * (S::ONE + f_prev.abs()) && iter > 0 {
245            grad(&x_new, &mut g_buf);
246            n_geval += 1;
247            let active = active_bounds_at(&x_new, bounds);
248            return Ok((OptimResult {
249                active_bounds: active,
250                history,
251                ..OptimResult::unconstrained(
252                    x_new,
253                    fval,
254                    g_buf,
255                    iter + 1,
256                    n_feval,
257                    n_geval,
258                    true,
259                    "function change below tolerance".into(),
260                    OptimStatus::FunctionConverged,
261                )
262            })
263            .with_wall_time(start));
264        }
265
266        x = x_new;
267
268        // New gradient
269        let mut g_new = vec![S::ZERO; n];
270        grad(&x, &mut g_new);
271        n_geval += 1;
272
273        // L-BFGS update
274        let y: Vec<S> = g_new
275            .iter()
276            .zip(g_buf.iter())
277            .map(|(gn, go)| *gn - *go)
278            .collect();
279        let sy: S = s.iter().zip(y.iter()).map(|(si, yi)| *si * *yi).sum();
280
281        if sy > S::from_f64(1e-16) {
282            if s_hist.len() == m {
283                s_hist.pop_front();
284                y_hist.pop_front();
285                rho_hist.pop_front();
286            }
287            rho_hist.push_back(S::ONE / sy);
288            s_hist.push_back(s);
289            y_hist.push_back(y);
290        }
291
292        g_buf = g_new;
293    }
294
295    let active = active_bounds_at(&x, bounds);
296    Ok((OptimResult {
297        active_bounds: active,
298        history,
299        ..OptimResult::unconstrained(
300            x,
301            fval,
302            g_buf,
303            opts.base.max_iter,
304            n_feval,
305            n_geval,
306            false,
307            format!("Maximum iterations ({}) reached", opts.base.max_iter),
308            OptimStatus::MaxIterations,
309        )
310    })
311    .with_wall_time(start))
312}
313
314/// Return indices of variables that are at their bounds.
315fn active_bounds_at<S: Scalar>(x: &[S], bounds: &[Option<(S, S)>]) -> Vec<usize> {
316    let tol = S::from_f64(1e-12);
317    let mut active = Vec::new();
318    for (i, bi) in bounds.iter().enumerate() {
319        if let Some((lo, hi)) = bi {
320            if (x[i] - *lo).abs() < tol || (x[i] - *hi).abs() < tol {
321                active.push(i);
322            }
323        }
324    }
325    active
326}
327
328#[cfg(test)]
329mod tests {
330    use super::*;
331
332    #[test]
333    fn test_project() {
334        let mut x = vec![-2.0, 5.0, 0.5];
335        let bounds = vec![Some((0.0, 1.0)), Some((0.0, 3.0)), None];
336        project(&mut x, &bounds);
337        assert_eq!(x, vec![0.0, 3.0, 0.5]);
338    }
339
340    #[test]
341    fn test_projected_gradient_norm() {
342        let x = [0.0, 1.0];
343        let g = [-1.0, 2.0];
344        let bounds = vec![Some((0.0, 1.0)), Some((0.0, 1.0))];
345        let pgn = projected_gradient_norm(&x, &g, &bounds);
346        assert!((pgn - 1.0).abs() < 1e-14);
347    }
348
349    #[test]
350    fn test_lbfgsb_quadratic_at_boundary() {
351        // f(x) = (x0-2)^2 + (x1-2)^2, min at (2,2)
352        // bounds: 0 <= x0 <= 1, 0 <= x1 <= 3
353        // Constrained min: (1, 2)
354        let f = |x: &[f64]| (x[0] - 2.0).powi(2) + (x[1] - 2.0).powi(2);
355        let g = |x: &[f64], grad: &mut [f64]| {
356            grad[0] = 2.0 * (x[0] - 2.0);
357            grad[1] = 2.0 * (x[1] - 2.0);
358        };
359        let bounds = vec![Some((0.0, 1.0)), Some((0.0, 3.0))];
360        let opts = LbfgsBOptions {
361            base: OptimOptions::default().gtol(1e-10),
362            memory: 5,
363        };
364
365        let result = lbfgsb_minimize(f, g, &[0.5, 0.5], &bounds, &opts).unwrap();
366        assert!(result.converged, "did not converge: {}", result.message);
367        assert!((result.x[0] - 1.0).abs() < 1e-6, "x0={}", result.x[0]);
368        assert!((result.x[1] - 2.0).abs() < 1e-6, "x1={}", result.x[1]);
369        assert!(result.active_bounds.contains(&0));
370    }
371
372    #[test]
373    fn test_lbfgsb_rosenbrock_bounded() {
374        // Rosenbrock with bounds: 0 <= x0 <= 2, 0 <= x1 <= 2
375        // Unconstrained min at (1,1), which is inside bounds
376        let f = |x: &[f64]| (1.0 - x[0]).powi(2) + 100.0 * (x[1] - x[0] * x[0]).powi(2);
377        let g = |x: &[f64], grad: &mut [f64]| {
378            grad[0] = -2.0 * (1.0 - x[0]) - 400.0 * x[0] * (x[1] - x[0] * x[0]);
379            grad[1] = 200.0 * (x[1] - x[0] * x[0]);
380        };
381        let bounds = vec![Some((0.0, 2.0)), Some((0.0, 2.0))];
382        let opts = LbfgsBOptions {
383            base: OptimOptions::default().gtol(1e-6).max_iter(2000),
384            memory: 10,
385        };
386
387        let result = lbfgsb_minimize(f, g, &[0.1, 0.1], &bounds, &opts).unwrap();
388        assert!(result.converged, "did not converge: {}", result.message);
389        assert!(result.x[0] >= -1e-10 && result.x[0] <= 2.0 + 1e-10);
390        assert!(result.x[1] >= -1e-10 && result.x[1] <= 2.0 + 1e-10);
391        assert!((result.x[0] - 1.0).abs() < 1e-2, "x0={}", result.x[0]);
392        assert!((result.x[1] - 1.0).abs() < 1e-2, "x1={}", result.x[1]);
393    }
394
395    #[test]
396    fn test_lbfgsb_sphere_all_bounded() {
397        // f(x) = sum(x_i^2), n=5, all bounded [1, 10]
398        // Minimum at x = [1,1,1,1,1] (all at lower bound)
399        let f = |x: &[f64]| x.iter().map(|xi| xi * xi).sum::<f64>();
400        let g = |x: &[f64], grad: &mut [f64]| {
401            for i in 0..x.len() {
402                grad[i] = 2.0 * x[i];
403            }
404        };
405        let bounds: Vec<Option<(f64, f64)>> = vec![Some((1.0, 10.0)); 5];
406        let opts = LbfgsBOptions::default().gtol(1e-10);
407
408        let result = lbfgsb_minimize(f, g, &[5.0; 5], &bounds, &opts).unwrap();
409        assert!(result.converged, "did not converge: {}", result.message);
410        for &xi in &result.x {
411            assert!((xi - 1.0).abs() < 1e-6, "x_i={}", xi);
412        }
413    }
414}