Skip to main content

numra_optim/
bfgs.rs

1//! BFGS quasi-Newton optimizer.
2//!
3//! Implements the Broyden-Fletcher-Goldfarb-Shanno (BFGS) algorithm for
4//! unconstrained minimization. The method maintains an approximation to the
5//! inverse Hessian matrix and updates it using rank-2 corrections derived
6//! from gradient differences.
7//!
8//! Line search is performed using the strong Wolfe conditions via
9//! [`numra_nonlinear::line_search::wolfe_line_search`].
10//!
11//! Author: Moussa Leblouba
12//! Date: 8 February 2026
13//! Modified: 2 May 2026
14
15use numra_core::Scalar;
16
17use crate::error::OptimError;
18use crate::types::{IterationRecord, OptimOptions, OptimResult, OptimStatus};
19use numra_nonlinear::line_search::{wolfe_line_search, WolfeOptions};
20
21/// Inner product of two slices.
22fn dot<S: Scalar>(a: &[S], b: &[S]) -> S {
23    a.iter().zip(b.iter()).map(|(ai, bi)| *ai * *bi).sum()
24}
25
26/// Euclidean norm of a slice.
27fn norm<S: Scalar>(a: &[S]) -> S {
28    dot(a, a).sqrt()
29}
30
31/// BFGS quasi-Newton optimizer (struct API).
32///
33/// # Example
34///
35/// ```
36/// use numra_optim::{Bfgs, OptimOptions};
37///
38/// let bfgs = Bfgs::new(OptimOptions::default().gtol(1e-10));
39/// let f = |x: &[f64]| x[0] * x[0] + x[1] * x[1];
40/// let g = |x: &[f64], grad: &mut [f64]| { grad[0] = 2.0 * x[0]; grad[1] = 2.0 * x[1]; };
41/// let result = bfgs.minimize(f, g, &[3.0, 4.0]).unwrap();
42/// assert!(result.converged);
43/// ```
44pub struct Bfgs<S: Scalar> {
45    pub options: OptimOptions<S>,
46}
47
48impl<S: Scalar> Bfgs<S> {
49    /// Create a new BFGS optimizer with the given options.
50    pub fn new(options: OptimOptions<S>) -> Self {
51        Self { options }
52    }
53
54    /// Minimize `f` starting from `x0`.
55    ///
56    /// `grad` writes the gradient of `f` at `x` into the provided buffer.
57    pub fn minimize<F, G>(&self, f: F, grad: G, x0: &[S]) -> Result<OptimResult<S>, OptimError>
58    where
59        F: Fn(&[S]) -> S,
60        G: Fn(&[S], &mut [S]),
61    {
62        bfgs_minimize(f, grad, x0, &self.options)
63    }
64}
65
66/// Minimize `f` using the BFGS quasi-Newton method.
67///
68/// # Arguments
69///
70/// * `f` - Objective function mapping `&[S]` to `S`.
71/// * `grad` - Gradient of `f`. Takes `(x, g)` and writes the gradient into `g`.
72/// * `x0` - Initial guess.
73/// * `opts` - Optimizer options (tolerances, max iterations, etc.).
74///
75/// # Returns
76///
77/// An [`OptimResult`] containing the minimizer, minimum value, final gradient,
78/// iteration count, and convergence status.
79pub fn bfgs_minimize<S: Scalar, F, G>(
80    f: F,
81    grad: G,
82    x0: &[S],
83    opts: &OptimOptions<S>,
84) -> Result<OptimResult<S>, OptimError>
85where
86    F: Fn(&[S]) -> S,
87    G: Fn(&[S], &mut [S]),
88{
89    let start = std::time::Instant::now();
90    let n = x0.len();
91    if n == 0 {
92        return Err(OptimError::DimensionMismatch {
93            expected: 1,
94            actual: 0,
95        });
96    }
97
98    // Current iterate
99    let mut x = x0.to_vec();
100    let mut f_val = f(&x);
101    let mut n_feval: usize = 1;
102    let mut n_geval: usize = 0;
103
104    // Current gradient
105    let mut g = vec![S::ZERO; n];
106    grad(&x, &mut g);
107    n_geval += 1;
108
109    // Check if already at minimum
110    let g_norm = norm(&g);
111    if g_norm <= opts.gtol {
112        return Ok(OptimResult::unconstrained(
113            x,
114            f_val,
115            g,
116            0,
117            n_feval,
118            n_geval,
119            true,
120            "gradient norm below tolerance at start".into(),
121            OptimStatus::GradientConverged,
122        )
123        .with_wall_time(start));
124    }
125
126    // Initialize inverse Hessian approximation as identity (row-major flat vec)
127    let mut h_inv = vec![S::ZERO; n * n];
128    for i in 0..n {
129        h_inv[i * n + i] = S::ONE;
130    }
131
132    let mut history = Vec::new();
133
134    // Wolfe line search options (quasi-Newton defaults)
135    let wolfe_opts = WolfeOptions::default();
136
137    // Working buffers
138    let mut d = vec![S::ZERO; n]; // search direction
139    let mut g_new = vec![S::ZERO; n];
140    let mut s = vec![S::ZERO; n]; // step: x_new - x
141    let mut y = vec![S::ZERO; n]; // gradient difference: g_new - g
142    let mut hy = vec![S::ZERO; n]; // H_inv * y
143
144    for iter in 0..opts.max_iter {
145        // Compute search direction: d = -H_inv * g
146        for i in 0..n {
147            let mut sum = S::ZERO;
148            for j in 0..n {
149                sum += h_inv[i * n + j] * g[j];
150            }
151            d[i] = -sum;
152        }
153
154        // Wolfe line search
155        let ls_result = wolfe_line_search(&f, &grad, &x, &d, f_val, &g, &wolfe_opts)?;
156        let alpha = ls_result.step;
157        let f_new = ls_result.f_new;
158        n_feval += ls_result.n_eval;
159
160        // Compute step s = alpha * d and update x
161        for i in 0..n {
162            s[i] = alpha * d[i];
163            x[i] += s[i];
164        }
165
166        // Evaluate gradient at new point
167        grad(&x, &mut g_new);
168        n_geval += 1;
169
170        // Compute gradient difference y = g_new - g
171        for i in 0..n {
172            y[i] = g_new[i] - g[i];
173        }
174
175        // Check convergence: gradient norm
176        let g_new_norm = norm(&g_new);
177
178        history.push(IterationRecord {
179            iteration: iter,
180            objective: f_new,
181            gradient_norm: g_new_norm,
182            step_size: alpha,
183            constraint_violation: S::ZERO,
184        });
185
186        if g_new_norm <= opts.gtol {
187            g.copy_from_slice(&g_new);
188            return Ok((OptimResult {
189                history,
190                ..OptimResult::unconstrained(
191                    x,
192                    f_new,
193                    g,
194                    iter + 1,
195                    n_feval,
196                    n_geval,
197                    true,
198                    "gradient norm below tolerance".into(),
199                    OptimStatus::GradientConverged,
200                )
201            })
202            .with_wall_time(start));
203        }
204
205        // Check convergence: function value change
206        let f_change = (f_new - f_val).abs();
207        if f_change <= opts.ftol * (S::ONE + f_val.abs()) {
208            g.copy_from_slice(&g_new);
209            return Ok((OptimResult {
210                history,
211                ..OptimResult::unconstrained(
212                    x,
213                    f_new,
214                    g,
215                    iter + 1,
216                    n_feval,
217                    n_geval,
218                    true,
219                    "function change below tolerance".into(),
220                    OptimStatus::FunctionConverged,
221                )
222            })
223            .with_wall_time(start));
224        }
225
226        // Check convergence: step size
227        let s_norm = norm(&s);
228        let x_norm = norm(&x);
229        if s_norm <= opts.xtol * (S::ONE + x_norm) {
230            g.copy_from_slice(&g_new);
231            return Ok((OptimResult {
232                history,
233                ..OptimResult::unconstrained(
234                    x,
235                    f_new,
236                    g,
237                    iter + 1,
238                    n_feval,
239                    n_geval,
240                    true,
241                    "step size below tolerance".into(),
242                    OptimStatus::StepConverged,
243                )
244            })
245            .with_wall_time(start));
246        }
247
248        // BFGS update of inverse Hessian
249        let sy = dot(&s, &y);
250        if sy > S::from_f64(1e-16) {
251            let rho = S::ONE / sy;
252
253            // Compute hy = H_inv * y
254            for i in 0..n {
255                let mut sum = S::ZERO;
256                for j in 0..n {
257                    sum += h_inv[i * n + j] * y[j];
258                }
259                hy[i] = sum;
260            }
261
262            let yhy = dot(&y, &hy);
263
264            // Sherman-Morrison-Woodbury BFGS update:
265            // H_new = (I - rho*s*y^T) * H * (I - rho*y*s^T) + rho*s*s^T
266            // Expanded: H_new = H + rho*(1 + rho*y^T*H*y)*s*s^T - rho*(H*y*s^T + s*y^T*H)
267            for i in 0..n {
268                for j in 0..n {
269                    h_inv[i * n + j] +=
270                        rho * ((S::ONE + rho * yhy) * s[i] * s[j] - hy[i] * s[j] - s[i] * hy[j]);
271                }
272            }
273        }
274
275        // Update state for next iteration
276        f_val = f_new;
277        g.copy_from_slice(&g_new);
278    }
279
280    Ok((OptimResult {
281        history,
282        ..OptimResult::unconstrained(
283            x,
284            f_val,
285            g,
286            opts.max_iter,
287            n_feval,
288            n_geval,
289            false,
290            "maximum iterations reached".into(),
291            OptimStatus::MaxIterations,
292        )
293    })
294    .with_wall_time(start))
295}
296
297#[cfg(test)]
298mod tests {
299    use super::*;
300    use approx::assert_abs_diff_eq;
301
302    #[test]
303    fn test_bfgs_quadratic() {
304        // f(x) = x0^2 + 4*x1^2, minimum at (0, 0)
305        let f = |x: &[f64]| x[0] * x[0] + 4.0 * x[1] * x[1];
306        let grad = |x: &[f64], g: &mut [f64]| {
307            g[0] = 2.0 * x[0];
308            g[1] = 8.0 * x[1];
309        };
310
311        let x0 = [5.0, 3.0];
312        let opts = OptimOptions::default().gtol(1e-10);
313        let result = bfgs_minimize(f, grad, &x0, &opts).unwrap();
314
315        assert!(result.converged, "should converge: {}", result.message);
316        assert_abs_diff_eq!(result.x[0], 0.0, epsilon = 1e-8);
317        assert_abs_diff_eq!(result.x[1], 0.0, epsilon = 1e-8);
318        assert_abs_diff_eq!(result.f, 0.0, epsilon = 1e-14);
319        assert!(
320            result.wall_time_secs > 0.0,
321            "wall_time_secs should be positive"
322        );
323    }
324
325    #[test]
326    fn test_bfgs_rosenbrock() {
327        // f(x) = (1 - x0)^2 + 100*(x1 - x0^2)^2, minimum at (1, 1)
328        let f = |x: &[f64]| {
329            let a = 1.0 - x[0];
330            let b = x[1] - x[0] * x[0];
331            a * a + 100.0 * b * b
332        };
333        let grad = |x: &[f64], g: &mut [f64]| {
334            g[0] = -2.0 * (1.0 - x[0]) - 400.0 * x[0] * (x[1] - x[0] * x[0]);
335            g[1] = 200.0 * (x[1] - x[0] * x[0]);
336        };
337
338        let x0 = [-1.0, 1.0];
339        let opts = OptimOptions::default().gtol(1e-8).max_iter(2000);
340        let result = bfgs_minimize(f, grad, &x0, &opts).unwrap();
341
342        assert!(result.converged, "should converge: {}", result.message);
343        assert_abs_diff_eq!(result.x[0], 1.0, epsilon = 1e-5);
344        assert_abs_diff_eq!(result.x[1], 1.0, epsilon = 1e-5);
345    }
346
347    #[test]
348    fn test_bfgs_struct_api() {
349        // Simple quadratic via the Bfgs struct API
350        let f = |x: &[f64]| x[0] * x[0] + x[1] * x[1];
351        let grad = |x: &[f64], g: &mut [f64]| {
352            g[0] = 2.0 * x[0];
353            g[1] = 2.0 * x[1];
354        };
355
356        let bfgs = Bfgs::new(OptimOptions::default().gtol(1e-10));
357        let result = bfgs.minimize(f, grad, &[3.0, 4.0]).unwrap();
358
359        assert!(result.converged);
360        assert_abs_diff_eq!(result.x[0], 0.0, epsilon = 1e-8);
361        assert_abs_diff_eq!(result.x[1], 0.0, epsilon = 1e-8);
362        assert!(result.iterations > 0);
363        assert!(result.n_feval > 0);
364        assert!(result.n_geval > 0);
365    }
366
367    #[test]
368    fn test_bfgs_history() {
369        let result = bfgs_minimize(
370            |x: &[f64]| x[0] * x[0] + x[1] * x[1],
371            |x: &[f64], g: &mut [f64]| {
372                g[0] = 2.0 * x[0];
373                g[1] = 2.0 * x[1];
374            },
375            &[5.0, 3.0],
376            &OptimOptions::default(),
377        )
378        .unwrap();
379        assert!(!result.history.is_empty(), "history should not be empty");
380        // Objective should decrease
381        for w in result.history.windows(2) {
382            assert!(
383                w[1].objective <= w[0].objective + 1e-10,
384                "objective should decrease: {} -> {}",
385                w[0].objective,
386                w[1].objective
387            );
388        }
389    }
390
391    #[test]
392    fn test_bfgs_high_dim() {
393        // f(x) = sum(x_i^2), n=20, start from [1, 2, ..., 20]
394        let n = 20;
395        let f = |x: &[f64]| x.iter().copied().map(|xi| xi * xi).sum::<f64>();
396        let grad = |x: &[f64], g: &mut [f64]| {
397            for i in 0..x.len() {
398                g[i] = 2.0 * x[i];
399            }
400        };
401
402        let x0: Vec<f64> = (1..=n).map(|i| i as f64).collect();
403        let opts = OptimOptions::default().gtol(1e-10);
404        let result = bfgs_minimize(f, grad, &x0, &opts).unwrap();
405
406        assert!(result.converged, "should converge: {}", result.message);
407        for i in 0..n {
408            assert_abs_diff_eq!(result.x[i], 0.0, epsilon = 1e-6);
409        }
410        assert_abs_diff_eq!(result.f, 0.0, epsilon = 1e-10);
411    }
412}