scirs2_optimize/unconstrained/
bfgs.rs

1//! BFGS (Broyden-Fletcher-Goldfarb-Shanno) algorithm for unconstrained optimization
2
3use crate::error::OptimizeError;
4use crate::unconstrained::line_search::backtracking_line_search;
5use crate::unconstrained::result::OptimizeResult;
6use crate::unconstrained::utils::{array_diff_norm, check_convergence, finite_difference_gradient};
7use crate::unconstrained::Options;
8use scirs2_core::ndarray::{Array1, Array2, ArrayView1, Axis};
9
10/// Implements the BFGS algorithm with optional bounds support
11#[allow(dead_code)]
12pub fn minimize_bfgs<F, S>(
13    mut fun: F,
14    x0: Array1<f64>,
15    options: &Options,
16) -> Result<OptimizeResult<S>, OptimizeError>
17where
18    F: FnMut(&ArrayView1<f64>) -> S + Clone,
19    S: Into<f64> + Clone,
20{
21    // Get options or use defaults
22    let ftol = options.ftol;
23    let gtol = options.gtol;
24    let max_iter = options.max_iter;
25    let eps = options.eps;
26    let bounds = options.bounds.as_ref();
27
28    // Initialize variables
29    let n = x0.len();
30    let mut x = x0.to_owned();
31
32    // Ensure initial point is within bounds
33    if let Some(bounds) = bounds {
34        bounds.project(x.as_slice_mut().unwrap());
35    }
36
37    let mut f = fun(&x.view()).into();
38
39    // Calculate initial gradient using finite differences
40    let mut g = finite_difference_gradient(&mut fun, &x.view(), eps)?;
41
42    // Initialize approximation of inverse Hessian with identity matrix
43    let mut h_inv = Array2::eye(n);
44
45    // Initialize counters
46    let mut iter = 0;
47    let mut nfev = 1 + n; // Initial evaluation plus gradient calculations
48
49    // Main loop
50    while iter < max_iter {
51        // Check convergence on gradient
52        if g.mapv(|gi| gi.abs()).sum() < gtol {
53            break;
54        }
55
56        // Compute search direction
57        let mut p = -h_inv.dot(&g);
58
59        // Project search direction for bounded optimization
60        if let Some(bounds) = bounds {
61            for i in 0..n {
62                let mut can_decrease = true;
63                let mut can_increase = true;
64
65                // Check if at boundary
66                if let Some(lb) = bounds.lower[i] {
67                    if x[i] <= lb + eps {
68                        can_decrease = false;
69                    }
70                }
71                if let Some(ub) = bounds.upper[i] {
72                    if x[i] >= ub - eps {
73                        can_increase = false;
74                    }
75                }
76
77                // Project gradient component
78                if (g[i] > 0.0 && !can_decrease) || (g[i] < 0.0 && !can_increase) {
79                    p[i] = 0.0;
80                }
81            }
82
83            // If no movement is possible, we're at a constrained optimum
84            if p.mapv(|pi| pi.abs()).sum() < 1e-10 {
85                break;
86            }
87        }
88
89        // Line search
90        let alpha_init = 1.0;
91        let (alpha, f_new) = backtracking_line_search(
92            &mut fun,
93            &x.view(),
94            f,
95            &p.view(),
96            &g.view(),
97            alpha_init,
98            0.0001,
99            0.5,
100            bounds,
101        );
102
103        nfev += 1; // Count line search evaluations
104
105        // Update position
106        let s = alpha * &p;
107        let x_new = &x + &s;
108
109        // Check step size convergence
110        if array_diff_norm(&x_new.view(), &x.view()) < options.xtol {
111            x = x_new;
112            break;
113        }
114
115        // Calculate new gradient
116        let g_new = finite_difference_gradient(&mut fun, &x_new.view(), eps)?;
117        nfev += n;
118
119        // Gradient difference
120        let y = &g_new - &g;
121
122        // Check convergence on function value
123        if check_convergence(
124            f - f_new,
125            0.0,
126            g_new.mapv(|x| x.abs()).sum(),
127            ftol,
128            0.0,
129            gtol,
130        ) {
131            x = x_new;
132            g = g_new;
133            break;
134        }
135
136        // Update inverse Hessian approximation using BFGS formula
137        let s_dot_y = s.dot(&y);
138        if s_dot_y > 1e-10 {
139            let rho = 1.0 / s_dot_y;
140            let i_mat = Array2::eye(n);
141
142            // Compute (I - ρ y s^T)
143            let y_col = y.clone().insert_axis(Axis(1));
144            let s_row = s.clone().insert_axis(Axis(0));
145            let y_s_t = y_col.dot(&s_row);
146            let term1 = &i_mat - &(&y_s_t * rho);
147
148            // Compute (I - ρ s y^T)
149            let s_col = s.clone().insert_axis(Axis(1));
150            let y_row = y.clone().insert_axis(Axis(0));
151            let s_y_t = s_col.dot(&y_row);
152            let term2 = &i_mat - &(&s_y_t * rho);
153
154            // Update H_inv = (I - ρ y s^T) H (I - ρ s y^T) + ρ s s^T
155            let term3 = term1.dot(&h_inv);
156            h_inv = term3.dot(&term2) + rho * s_col.dot(&s_row);
157        }
158
159        // Update variables for next iteration
160        x = x_new;
161        f = f_new;
162        g = g_new;
163
164        iter += 1;
165    }
166
167    // Final check for bounds
168    if let Some(bounds) = bounds {
169        bounds.project(x.as_slice_mut().unwrap());
170    }
171
172    // Use original function for final value
173    let final_fun = fun(&x.view());
174
175    // Create and return result
176    Ok(OptimizeResult {
177        x,
178        fun: final_fun,
179        nit: iter,
180        func_evals: nfev,
181        nfev,
182        success: iter < max_iter,
183        message: if iter < max_iter {
184            "Optimization terminated successfully.".to_string()
185        } else {
186            "Maximum iterations reached.".to_string()
187        },
188        jacobian: Some(g),
189        hessian: None,
190    })
191}
192
193#[cfg(test)]
194mod tests {
195    use super::*;
196    use crate::unconstrained::Bounds;
197    use approx::assert_abs_diff_eq;
198
199    #[test]
200    fn test_bfgs_quadratic() {
201        let quadratic = |x: &ArrayView1<f64>| -> f64 {
202            let a = Array2::from_shape_vec((2, 2), vec![2.0, 0.0, 0.0, 3.0]).unwrap();
203            let b = Array1::from_vec(vec![-4.0, -6.0]);
204            0.5 * x.dot(&a.dot(x)) + b.dot(x)
205        };
206
207        let x0 = Array1::from_vec(vec![0.0, 0.0]);
208        let options = Options::default();
209
210        let result = minimize_bfgs(quadratic, x0, &options).unwrap();
211
212        assert!(result.success);
213        // Optimal solution: x = A^(-1) * (-b) = [2.0, 2.0]
214        assert_abs_diff_eq!(result.x[0], 2.0, epsilon = 1e-6);
215        assert_abs_diff_eq!(result.x[1], 2.0, epsilon = 1e-6);
216    }
217
218    #[test]
219    fn test_bfgs_rosenbrock() {
220        let rosenbrock = |x: &ArrayView1<f64>| -> f64 {
221            let a = 1.0;
222            let b = 100.0;
223            (a - x[0]).powi(2) + b * (x[1] - x[0].powi(2)).powi(2)
224        };
225
226        let x0 = Array1::from_vec(vec![0.0, 0.0]);
227        let mut options = Options::default();
228        options.max_iter = 2000; // More iterations for Rosenbrock
229
230        let result = minimize_bfgs(rosenbrock, x0, &options).unwrap();
231
232        assert!(result.success);
233        assert_abs_diff_eq!(result.x[0], 1.0, epsilon = 3e-3);
234        assert_abs_diff_eq!(result.x[1], 1.0, epsilon = 5e-3);
235    }
236
237    #[test]
238    fn test_bfgs_with_bounds() {
239        let quadratic =
240            |x: &ArrayView1<f64>| -> f64 { (x[0] - 2.0).powi(2) + (x[1] - 3.0).powi(2) };
241
242        let x0 = Array1::from_vec(vec![0.0, 0.0]);
243        let mut options = Options::default();
244
245        // Constrain solution to [0, 1] x [0, 1]
246        let bounds = Bounds::new(&[(Some(0.0), Some(1.0)), (Some(0.0), Some(1.0))]);
247        options.bounds = Some(bounds);
248
249        let result = minimize_bfgs(quadratic, x0, &options).unwrap();
250
251        assert!(result.success);
252        // The optimal point (2, 3) is outside the bounds, so we should get (1, 1)
253        assert_abs_diff_eq!(result.x[0], 1.0, epsilon = 1e-6);
254        assert_abs_diff_eq!(result.x[1], 1.0, epsilon = 1e-6);
255    }
256}