Skip to main content

scivex_optim/minimize/
lbfgsb.rs

1//! L-BFGS-B: Limited-memory BFGS with box constraints.
2//!
3//! Extends L-BFGS to handle per-variable lower and upper bounds. Uses
4//! projected gradient steps with a limited-memory (m=10) two-loop
5//! recursion for the approximate inverse Hessian.
6
7use scivex_core::{Float, Tensor};
8
9use crate::error::Result;
10
11use super::{MinimizeOptions, MinimizeResult};
12
13/// Per-variable box constraint.
14///
15/// # Examples
16///
17/// ```
18/// # use scivex_optim::minimize::Bounds;
19/// let b = Bounds::both(0.0_f64, 1.0);
20/// assert_eq!(b.lower, Some(0.0));
21/// assert_eq!(b.upper, Some(1.0));
22/// ```
23#[cfg_attr(
24    feature = "serde-support",
25    derive(serde::Serialize, serde::Deserialize)
26)]
27#[derive(Debug, Clone)]
28pub struct Bounds<T> {
29    /// Optional lower bound.
30    pub lower: Option<T>,
31    /// Optional upper bound.
32    pub upper: Option<T>,
33}
34
35impl<T> Bounds<T> {
36    /// No bounds.
37    ///
38    /// # Examples
39    ///
40    /// ```
41    /// # use scivex_optim::minimize::Bounds;
42    /// let b = Bounds::<f64>::none();
43    /// assert!(b.lower.is_none());
44    /// assert!(b.upper.is_none());
45    /// ```
46    pub fn none() -> Self {
47        Self {
48            lower: None,
49            upper: None,
50        }
51    }
52
53    /// Lower bound only.
54    ///
55    /// # Examples
56    ///
57    /// ```
58    /// # use scivex_optim::minimize::Bounds;
59    /// let b = Bounds::lower(0.0_f64);
60    /// assert_eq!(b.lower, Some(0.0));
61    /// ```
62    pub fn lower(lb: T) -> Self {
63        Self {
64            lower: Some(lb),
65            upper: None,
66        }
67    }
68
69    /// Upper bound only.
70    ///
71    /// # Examples
72    ///
73    /// ```
74    /// # use scivex_optim::minimize::Bounds;
75    /// let b = Bounds::upper(10.0_f64);
76    /// assert_eq!(b.upper, Some(10.0));
77    /// ```
78    pub fn upper(ub: T) -> Self {
79        Self {
80            lower: None,
81            upper: Some(ub),
82        }
83    }
84
85    /// Both lower and upper bounds.
86    ///
87    /// # Examples
88    ///
89    /// ```
90    /// # use scivex_optim::minimize::Bounds;
91    /// let b = Bounds::both(-1.0_f64, 1.0);
92    /// assert_eq!(b.lower, Some(-1.0));
93    /// assert_eq!(b.upper, Some(1.0));
94    /// ```
95    pub fn both(lb: T, ub: T) -> Self {
96        Self {
97            lower: Some(lb),
98            upper: Some(ub),
99        }
100    }
101}
102
103/// Project `x` onto the feasible box defined by `bounds`.
104fn project<T: Float>(x: &mut [T], bounds: &[Bounds<T>]) {
105    for (xi, b) in x.iter_mut().zip(bounds.iter()) {
106        #[allow(clippy::collapsible_if)]
107        if let Some(lb) = b.lower {
108            if *xi < lb {
109                *xi = lb;
110            }
111        }
112        #[allow(clippy::collapsible_if)]
113        if let Some(ub) = b.upper {
114            if *xi > ub {
115                *xi = ub;
116            }
117        }
118    }
119}
120
121/// Minimize `f` using L-BFGS-B (bounded limited-memory BFGS).
122///
123/// Uses a two-loop recursion with `m=10` stored correction pairs, combined
124/// with projected gradient steps for per-variable box constraints.
125///
126/// # Examples
127///
128/// ```
129/// # use scivex_core::Tensor;
130/// # use scivex_optim::minimize::{lbfgsb, Bounds, MinimizeOptions};
131/// // Minimize f(x) = (x-5)^2 with x in [0, 3]
132/// let result = lbfgsb(
133///     |x: &Tensor<f64>| { let v = x.as_slice()[0] - 5.0; v * v },
134///     |x: &Tensor<f64>| { Tensor::from_vec(vec![2.0 * (x.as_slice()[0] - 5.0)], vec![1]).unwrap() },
135///     &Tensor::from_vec(vec![1.0], vec![1]).unwrap(),
136///     &[Bounds::both(0.0, 3.0)],
137///     &MinimizeOptions::default(),
138/// ).unwrap();
139/// assert!((result.x.as_slice()[0] - 3.0).abs() < 1e-6); // clipped to bound
140/// ```
141pub fn lbfgsb<T, F, G>(
142    f: F,
143    grad: G,
144    x0: &Tensor<T>,
145    bounds: &[Bounds<T>],
146    options: &MinimizeOptions<T>,
147) -> Result<MinimizeResult<T>>
148where
149    T: Float,
150    F: Fn(&Tensor<T>) -> T,
151    G: Fn(&Tensor<T>) -> Tensor<T>,
152{
153    let n = x0.numel();
154    let mut state = LbfgsbState::new(&f, &grad, x0, bounds, n);
155
156    for iter in 0..options.max_iter {
157        let pg_norm = projected_gradient_norm(&state.x, &state.g, bounds);
158        if pg_norm < options.gtol {
159            return Ok(state.into_result(iter, true));
160        }
161
162        let d = lbfgs_direction(&state.g, &state.s_hist, &state.y_hist, &state.rho_hist, n);
163        let p: Vec<T> = d.iter().map(|&v| -v).collect();
164
165        let (alpha, fx_new, ls_evals) = projected_line_search(
166            &f,
167            &LineSearchInput {
168                x: &state.x,
169                p: &p,
170                fx: state.fx,
171                g: &state.g,
172                bounds,
173                n,
174            },
175        );
176        state.f_evals += ls_evals;
177
178        let (step_norm, f_change) = state.step(&grad, &p, alpha, fx_new, bounds, n);
179
180        if step_norm < options.xtol
181            || (f_change < options.ftol && f_change < options.ftol * state.fx.abs().max(T::one()))
182        {
183            return Ok(state.into_result(iter + 1, true));
184        }
185    }
186
187    Ok(state.into_result(options.max_iter, false))
188}
189
190struct LbfgsbState<T: Float> {
191    x: Vec<T>,
192    x_tensor: Tensor<T>,
193    fx: T,
194    g: Vec<T>,
195    g_tensor: Tensor<T>,
196    f_evals: usize,
197    g_evals: usize,
198    s_hist: Vec<Vec<T>>,
199    y_hist: Vec<Vec<T>>,
200    rho_hist: Vec<T>,
201}
202
203impl<T: Float> LbfgsbState<T> {
204    fn new<F, G>(f: &F, grad: &G, x0: &Tensor<T>, bounds: &[Bounds<T>], n: usize) -> Self
205    where
206        F: Fn(&Tensor<T>) -> T,
207        G: Fn(&Tensor<T>) -> Tensor<T>,
208    {
209        let mut x = x0.as_slice().to_vec();
210        project(&mut x, bounds);
211        let x_tensor = Tensor::from_vec(x.clone(), vec![n]).expect("projected x0 length matches n");
212        let fx = f(&x_tensor);
213        let g_tensor = grad(&x_tensor);
214        let g = g_tensor.as_slice().to_vec();
215        Self {
216            x,
217            x_tensor,
218            fx,
219            g,
220            g_tensor,
221            f_evals: 1,
222            g_evals: 1,
223            s_hist: Vec::with_capacity(10),
224            y_hist: Vec::with_capacity(10),
225            rho_hist: Vec::with_capacity(10),
226        }
227    }
228
229    fn step<G: Fn(&Tensor<T>) -> Tensor<T>>(
230        &mut self,
231        grad: &G,
232        p: &[T],
233        alpha: T,
234        fx_new: T,
235        bounds: &[Bounds<T>],
236        n: usize,
237    ) -> (T, T) {
238        let mut x_new = vec![T::zero(); n];
239        for j in 0..n {
240            x_new[j] = self.x[j] + alpha * p[j];
241        }
242        project(&mut x_new, bounds);
243
244        let x_new_tensor =
245            Tensor::from_vec(x_new.clone(), vec![n]).expect("new x length matches n");
246        let g_new_tensor = grad(&x_new_tensor);
247        self.g_evals += 1;
248        let g_new = g_new_tensor.as_slice().to_vec();
249
250        let mut sy = T::zero();
251        let mut s_k = vec![T::zero(); n];
252        let mut y_k = vec![T::zero(); n];
253        for j in 0..n {
254            s_k[j] = x_new[j] - self.x[j];
255            y_k[j] = g_new[j] - self.g[j];
256            sy += s_k[j] * y_k[j];
257        }
258
259        let step_norm: T = s_k.iter().map(|&v| v * v).sum::<T>().sqrt();
260        let f_change = (self.fx - fx_new).abs();
261
262        if sy > T::epsilon() {
263            if self.s_hist.len() == 10 {
264                self.s_hist.remove(0);
265                self.y_hist.remove(0);
266                self.rho_hist.remove(0);
267            }
268            self.s_hist.push(s_k);
269            self.y_hist.push(y_k);
270            self.rho_hist.push(T::one() / sy);
271        }
272
273        self.x = x_new;
274        self.x_tensor = x_new_tensor;
275        self.fx = fx_new;
276        self.g = g_new;
277        self.g_tensor = g_new_tensor;
278
279        (step_norm, f_change)
280    }
281
282    fn into_result(self, iterations: usize, converged: bool) -> MinimizeResult<T> {
283        MinimizeResult {
284            x: self.x_tensor,
285            f_val: self.fx,
286            grad: Some(self.g_tensor),
287            iterations,
288            f_evals: self.f_evals,
289            g_evals: self.g_evals,
290            converged,
291        }
292    }
293}
294
295/// Compute the norm of the projected gradient.
296///
297/// For each component, the projected gradient is zero if:
298/// - x_i is at the lower bound and g_i > 0
299/// - x_i is at the upper bound and g_i < 0
300fn projected_gradient_norm<T: Float>(x: &[T], g: &[T], bounds: &[Bounds<T>]) -> T {
301    let mut norm_sq = T::zero();
302    for i in 0..x.len() {
303        let gi = g[i];
304        let at_lower = bounds[i]
305            .lower
306            .is_some_and(|lb| x[i] <= lb && gi > T::zero());
307        let at_upper = bounds[i]
308            .upper
309            .is_some_and(|ub| x[i] >= ub && gi < T::zero());
310        if !at_lower && !at_upper {
311            norm_sq += gi * gi;
312        }
313    }
314    norm_sq.sqrt()
315}
316
317/// L-BFGS two-loop recursion to approximate H_inv * g.
318fn lbfgs_direction<T: Float>(
319    g: &[T],
320    s_history: &[Vec<T>],
321    y_history: &[Vec<T>],
322    rho_history: &[T],
323    n: usize,
324) -> Vec<T> {
325    let k = s_history.len();
326    if k == 0 {
327        // No history: use gradient directly (steepest descent)
328        return g.to_vec();
329    }
330
331    let mut q = g.to_vec();
332    let mut alphas = vec![T::zero(); k];
333
334    // First loop: backward
335    for i in (0..k).rev() {
336        let mut si_q = T::zero();
337        for j in 0..n {
338            si_q += s_history[i][j] * q[j];
339        }
340        alphas[i] = rho_history[i] * si_q;
341        for j in 0..n {
342            q[j] -= alphas[i] * y_history[i][j];
343        }
344    }
345
346    // Initial Hessian approximation: gamma * I
347    // gamma = s_{k-1}^T y_{k-1} / (y_{k-1}^T y_{k-1})
348    let last = k - 1;
349    let mut sy = T::zero();
350    let mut yy = T::zero();
351    for j in 0..n {
352        sy += s_history[last][j] * y_history[last][j];
353        yy += y_history[last][j] * y_history[last][j];
354    }
355    let gamma = if yy > T::epsilon() { sy / yy } else { T::one() };
356
357    let mut r: Vec<T> = q.iter().map(|&v| gamma * v).collect();
358
359    // Second loop: forward
360    for i in 0..k {
361        let mut yi_r = T::zero();
362        for j in 0..n {
363            yi_r += y_history[i][j] * r[j];
364        }
365        let beta = rho_history[i] * yi_r;
366        for j in 0..n {
367            r[j] += (alphas[i] - beta) * s_history[i][j];
368        }
369    }
370
371    r
372}
373
374struct LineSearchInput<'a, T> {
375    x: &'a [T],
376    p: &'a [T],
377    fx: T,
378    g: &'a [T],
379    bounds: &'a [Bounds<T>],
380    n: usize,
381}
382
383/// Projected backtracking line search with Armijo condition.
384fn projected_line_search<T, F>(f: &F, input: &LineSearchInput<'_, T>) -> (T, T, usize)
385where
386    T: Float,
387    F: Fn(&Tensor<T>) -> T,
388{
389    let x = input.x;
390    let p = input.p;
391    let fx = input.fx;
392    let g = input.g;
393    let bounds = input.bounds;
394    let n = input.n;
395
396    let c1 = T::from_f64(1e-4);
397    let shrink = T::from_f64(0.5);
398
399    let dg: T = g.iter().zip(p.iter()).map(|(&gi, &pi)| gi * pi).sum();
400
401    let mut alpha = T::one();
402    let mut evals = 0usize;
403    let max_ls = 20usize;
404
405    for _ in 0..max_ls {
406        let mut x_trial = vec![T::zero(); n];
407        for j in 0..n {
408            x_trial[j] = x[j] + alpha * p[j];
409        }
410        project(&mut x_trial, bounds);
411
412        let t = Tensor::from_vec(x_trial, vec![n]).expect("trial point length matches n");
413        let f_trial = f(&t);
414        evals += 1;
415
416        if f_trial <= fx + c1 * alpha * dg {
417            return (alpha, f_trial, evals);
418        }
419        alpha *= shrink;
420    }
421
422    // Return last attempt
423    let mut x_trial = vec![T::zero(); n];
424    for j in 0..n {
425        x_trial[j] = x[j] + alpha * p[j];
426    }
427    project(&mut x_trial, bounds);
428    let t = Tensor::from_vec(x_trial, vec![n]).expect("trial point length matches n");
429    let f_trial = f(&t);
430    evals += 1;
431    (alpha, f_trial, evals)
432}
433
434#[cfg(test)]
435mod tests {
436    use super::*;
437
438    #[test]
439    fn test_lbfgsb_unconstrained_quadratic() {
440        let f = |x: &Tensor<f64>| {
441            let s = x.as_slice();
442            s[0] * s[0] + s[1] * s[1]
443        };
444        let grad = |x: &Tensor<f64>| {
445            let s = x.as_slice();
446            Tensor::from_vec(vec![2.0 * s[0], 2.0 * s[1]], vec![2]).unwrap()
447        };
448
449        let x0 = Tensor::from_vec(vec![5.0, 5.0], vec![2]).unwrap();
450        let bounds = vec![Bounds::none(), Bounds::none()];
451        let result = lbfgsb(f, grad, &x0, &bounds, &MinimizeOptions::default()).unwrap();
452        assert!(result.converged);
453        let s = result.x.as_slice();
454        assert!(s[0].abs() < 1e-6, "x = {}", s[0]);
455        assert!(s[1].abs() < 1e-6, "y = {}", s[1]);
456    }
457
458    #[test]
459    fn test_lbfgsb_active_bounds() {
460        // f(x, y) = (x - 3)^2 + (y - 3)^2, bounds: x >= 1, y >= 2
461        // Unconstrained min at (3, 3), which is feasible.
462        let f = |x: &Tensor<f64>| {
463            let s = x.as_slice();
464            (s[0] - 3.0).powi(2) + (s[1] - 3.0).powi(2)
465        };
466        let grad = |x: &Tensor<f64>| {
467            let s = x.as_slice();
468            Tensor::from_vec(vec![2.0 * (s[0] - 3.0), 2.0 * (s[1] - 3.0)], vec![2]).unwrap()
469        };
470
471        let x0 = Tensor::from_vec(vec![1.0, 2.0], vec![2]).unwrap();
472        let bounds = vec![Bounds::lower(1.0), Bounds::lower(2.0)];
473        let result = lbfgsb(f, grad, &x0, &bounds, &MinimizeOptions::default()).unwrap();
474        assert!(result.converged);
475        let s = result.x.as_slice();
476        assert!((s[0] - 3.0).abs() < 1e-4, "x = {}", s[0]);
477        assert!((s[1] - 3.0).abs() < 1e-4, "y = {}", s[1]);
478    }
479
480    #[test]
481    fn test_lbfgsb_active_bound_at_solution() {
482        // f(x) = (x - (-1))^2, bound: x >= 0
483        // Unconstrained min at -1, but bound forces solution at 0.
484        let f = |x: &Tensor<f64>| {
485            let s = x.as_slice();
486            (s[0] + 1.0).powi(2)
487        };
488        let grad = |x: &Tensor<f64>| {
489            let s = x.as_slice();
490            Tensor::from_vec(vec![2.0 * (s[0] + 1.0)], vec![1]).unwrap()
491        };
492
493        let x0 = Tensor::from_vec(vec![5.0], vec![1]).unwrap();
494        let bounds = vec![Bounds::lower(0.0)];
495        let result = lbfgsb(f, grad, &x0, &bounds, &MinimizeOptions::default()).unwrap();
496        assert!(result.converged);
497        let s = result.x.as_slice();
498        assert!(s[0].abs() < 1e-6, "x = {}, expected 0.0", s[0]);
499    }
500
501    #[test]
502    fn test_lbfgsb_all_bounded() {
503        // f(x, y) = x^2 + y^2, bounds: 1 <= x <= 2, 1 <= y <= 2
504        // Min at (1, 1) since origin is outside bounds.
505        let f = |x: &Tensor<f64>| {
506            let s = x.as_slice();
507            s[0] * s[0] + s[1] * s[1]
508        };
509        let grad = |x: &Tensor<f64>| {
510            let s = x.as_slice();
511            Tensor::from_vec(vec![2.0 * s[0], 2.0 * s[1]], vec![2]).unwrap()
512        };
513
514        let x0 = Tensor::from_vec(vec![1.5, 1.5], vec![2]).unwrap();
515        let bounds = vec![Bounds::both(1.0, 2.0), Bounds::both(1.0, 2.0)];
516        let result = lbfgsb(f, grad, &x0, &bounds, &MinimizeOptions::default()).unwrap();
517        assert!(result.converged);
518        let s = result.x.as_slice();
519        assert!((s[0] - 1.0).abs() < 1e-4, "x = {}", s[0]);
520        assert!((s[1] - 1.0).abs() < 1e-4, "y = {}", s[1]);
521    }
522}