scirs2_optimize/unconstrained/
quasi_newton.rs

1//! Quasi-Newton algorithms with different update formulas (SR1, DFP, BFGS)
2
3use crate::error::OptimizeError;
4use crate::unconstrained::line_search::backtracking_line_search;
5use crate::unconstrained::result::OptimizeResult;
6use crate::unconstrained::utils::{array_diff_norm, check_convergence, finite_difference_gradient};
7use crate::unconstrained::Options;
8use scirs2_core::ndarray::{Array1, Array2, ArrayView1, Axis};
9
10/// Update formula for Quasi-Newton methods
11#[derive(Debug, Clone, Copy)]
12pub enum UpdateFormula {
13    /// Symmetric Rank-1 (SR1) update
14    SR1,
15    /// Davidon-Fletcher-Powell (DFP) update
16    DFP,
17    /// Broyden-Fletcher-Goldfarb-Shanno (BFGS) update
18    BFGS,
19}
20
21/// Implements quasi-Newton algorithm with different update formulas
22#[allow(dead_code)]
23pub fn minimize_quasi_newton<F, S>(
24    mut fun: F,
25    x0: Array1<f64>,
26    options: &Options,
27    update_formula: UpdateFormula,
28) -> Result<OptimizeResult<S>, OptimizeError>
29where
30    F: FnMut(&ArrayView1<f64>) -> S + Clone,
31    S: Into<f64> + Clone,
32{
33    // Get options or use defaults
34    let ftol = options.ftol;
35    let gtol = options.gtol;
36    let max_iter = options.max_iter;
37    let eps = options.eps;
38    let bounds = options.bounds.as_ref();
39
40    // Initialize variables
41    let n = x0.len();
42    let mut x = x0.to_owned();
43
44    // Ensure initial point is within bounds
45    if let Some(bounds) = bounds {
46        bounds.project(x.as_slice_mut().unwrap());
47    }
48
49    let mut f = fun(&x.view()).into();
50
51    // Calculate initial gradient using finite differences
52    let mut g = finite_difference_gradient(&mut fun, &x.view(), eps)?;
53
54    // Initialize approximation based on update _formula
55    let mut h_inv: Option<Array2<f64>> = None;
56    let mut b_mat: Option<Array2<f64>> = None;
57
58    match update_formula {
59        UpdateFormula::SR1 => {
60            // SR1 works with Hessian approximation B, not inverse
61            b_mat = Some(Array2::eye(n));
62        }
63        UpdateFormula::DFP | UpdateFormula::BFGS => {
64            // DFP and BFGS work with inverse Hessian approximation H
65            h_inv = Some(Array2::eye(n));
66        }
67    }
68
69    // Initialize counters
70    let mut iter = 0;
71    let mut nfev = 1 + n; // Initial evaluation plus gradient calculations
72
73    // Main loop
74    while iter < max_iter {
75        // Check convergence on gradient
76        if g.mapv(|gi| gi.abs()).sum() < gtol {
77            break;
78        }
79
80        // Compute search direction
81        let mut p = match update_formula {
82            UpdateFormula::SR1 => {
83                // For SR1, we need to solve B * p = -g
84                // Using LU decomposition or approximation
85                let b = b_mat.as_ref().unwrap();
86                // Simple approach: use the inverse directly (not efficient for large problems)
87                // In practice, should use linear solver
88                match invert_matrix(b) {
89                    Ok(b_inv) => -b_inv.dot(&g),
90                    Err(_) => {
91                        // If matrix is singular, reset to identity
92                        b_mat = Some(Array2::eye(n));
93                        -&g
94                    }
95                }
96            }
97            UpdateFormula::DFP | UpdateFormula::BFGS => {
98                let h = h_inv.as_ref().unwrap();
99                -h.dot(&g)
100            }
101        };
102
103        // Project search direction for bounded optimization
104        if let Some(bounds) = bounds {
105            for i in 0..n {
106                let mut can_decrease = true;
107                let mut can_increase = true;
108
109                // Check if at boundary
110                if let Some(lb) = bounds.lower[i] {
111                    if x[i] <= lb + eps {
112                        can_decrease = false;
113                    }
114                }
115                if let Some(ub) = bounds.upper[i] {
116                    if x[i] >= ub - eps {
117                        can_increase = false;
118                    }
119                }
120
121                // Project gradient component
122                if (g[i] > 0.0 && !can_decrease) || (g[i] < 0.0 && !can_increase) {
123                    p[i] = 0.0;
124                }
125            }
126
127            // If no movement is possible, we're at a constrained optimum
128            if p.mapv(|pi| pi.abs()).sum() < 1e-10 {
129                break;
130            }
131        }
132
133        // Line search
134        let alpha_init = 1.0;
135        let (alpha, f_new) = backtracking_line_search(
136            &mut fun,
137            &x.view(),
138            f,
139            &p.view(),
140            &g.view(),
141            alpha_init,
142            0.0001,
143            0.5,
144            bounds,
145        );
146
147        nfev += 1; // Count line search evaluations
148
149        // Update position
150        let s = alpha * &p;
151        let x_new = &x + &s;
152
153        // Check step size convergence
154        if array_diff_norm(&x_new.view(), &x.view()) < options.xtol {
155            x = x_new;
156            break;
157        }
158
159        // Calculate new gradient
160        let g_new = finite_difference_gradient(&mut fun, &x_new.view(), eps)?;
161        nfev += n;
162
163        // Gradient difference
164        let y = &g_new - &g;
165
166        // Check convergence on function value
167        if check_convergence(
168            f - f_new,
169            0.0,
170            g_new.mapv(|x| x.abs()).sum(),
171            ftol,
172            0.0,
173            gtol,
174        ) {
175            x = x_new;
176            g = g_new;
177            break;
178        }
179
180        // Update approximation based on _formula
181        match update_formula {
182            UpdateFormula::SR1 => {
183                update_sr1(&mut b_mat, &s, &y);
184            }
185            UpdateFormula::DFP => {
186                update_dfp(&mut h_inv, &s, &y);
187            }
188            UpdateFormula::BFGS => {
189                update_bfgs(&mut h_inv, &s, &y, n);
190            }
191        }
192
193        // Update variables for next iteration
194        x = x_new;
195        f = f_new;
196        g = g_new;
197
198        iter += 1;
199    }
200
201    // Final check for bounds
202    if let Some(bounds) = bounds {
203        bounds.project(x.as_slice_mut().unwrap());
204    }
205
206    // Use original function for final value
207    let final_fun = fun(&x.view());
208
209    // Create and return result
210    Ok(OptimizeResult {
211        x,
212        fun: final_fun,
213        nit: iter,
214        func_evals: nfev,
215        nfev,
216        success: iter < max_iter,
217        message: if iter < max_iter {
218            format!(
219                "Optimization terminated successfully using {} update.",
220                update_formula.name()
221            )
222        } else {
223            "Maximum iterations reached.".to_string()
224        },
225        jacobian: Some(g),
226        hessian: None,
227    })
228}
229
230/// SR1 update formula for Hessian approximation B
231#[allow(dead_code)]
232fn update_sr1(b_mat: &mut Option<Array2<f64>>, s: &Array1<f64>, y: &Array1<f64>) {
233    if let Some(b) = b_mat.as_mut() {
234        let bs = b.dot(s);
235        let v = y - &bs;
236        let denom = v.dot(s);
237
238        // SR1 update with safeguard against division by zero
239        let v_norm = v.mapv(|x| x.powi(2)).sum().sqrt();
240        let s_norm = s.mapv(|x| x.powi(2)).sum().sqrt();
241        if denom.abs() > 1e-8 * v_norm * s_norm {
242            let v_col = v.clone().insert_axis(Axis(1));
243            let v_row = v.clone().insert_axis(Axis(0));
244            *b = &*b + (v_col.dot(&v_row) / denom);
245        }
246    }
247}
248
249/// DFP update formula for inverse Hessian approximation H
250#[allow(dead_code)]
251fn update_dfp(h_inv: &mut Option<Array2<f64>>, s: &Array1<f64>, y: &Array1<f64>) {
252    if let Some(h) = h_inv.as_mut() {
253        let s_dot_y = s.dot(y);
254
255        if s_dot_y > 1e-10 {
256            // DFP formula: H_{k+1} = H_k - (H_k y y^T H_k)/(y^T H_k y) + (s s^T)/(y^T s)
257            let hy = h.dot(y);
258            let ythy = y.dot(&hy);
259
260            if ythy > 1e-10 {
261                // Term 1: - (H_k y y^T H_k)/(y^T H_k y)
262                let hy_col = hy.clone().insert_axis(Axis(1));
263                let hy_row = hy.clone().insert_axis(Axis(0));
264                let term1 = hy_col.dot(&hy_row) / ythy;
265
266                // Term 2: + (s s^T)/(y^T s)
267                let s_col = s.clone().insert_axis(Axis(1));
268                let s_row = s.clone().insert_axis(Axis(0));
269                let term2 = s_col.dot(&s_row) / s_dot_y;
270
271                *h = &*h - &term1 + &term2;
272            }
273        }
274    }
275}
276
277/// BFGS update formula for inverse Hessian approximation H
278#[allow(dead_code)]
279fn update_bfgs(h_inv: &mut Option<Array2<f64>>, s: &Array1<f64>, y: &Array1<f64>, n: usize) {
280    if let Some(h) = h_inv.as_mut() {
281        let s_dot_y = s.dot(y);
282
283        if s_dot_y > 1e-10 {
284            let rho = 1.0 / s_dot_y;
285            let i_mat = Array2::eye(n);
286
287            // Compute (I - ρ y s^T)
288            let y_col = y.clone().insert_axis(Axis(1));
289            let s_row = s.clone().insert_axis(Axis(0));
290            let y_s_t = y_col.dot(&s_row);
291            let term1 = &i_mat - &(&y_s_t * rho);
292
293            // Compute (I - ρ s y^T)
294            let s_col = s.clone().insert_axis(Axis(1));
295            let y_row = y.clone().insert_axis(Axis(0));
296            let s_y_t = s_col.dot(&y_row);
297            let term2 = &i_mat - &(&s_y_t * rho);
298
299            // Update H_inv = (I - ρ y s^T) H (I - ρ s y^T) + ρ s s^T
300            let term3 = term1.dot(h);
301            *h = term3.dot(&term2) + rho * s_col.dot(&s_row);
302        }
303    }
304}
305
306/// Simple matrix inversion using LU decomposition (for small matrices)
307#[allow(dead_code)]
308fn invert_matrix(mat: &Array2<f64>) -> Result<Array2<f64>, OptimizeError> {
309    let n = mat.nrows();
310    if n != mat.ncols() {
311        return Err(OptimizeError::ValueError(
312            "Matrix must be square".to_string(),
313        ));
314    }
315
316    // For small matrices, use Gauss-Jordan elimination
317    // For production code, should use a proper linear algebra library
318    let mut aug = Array2::zeros((n, 2 * n));
319
320    // Create augmented matrix [A | I]
321    for i in 0..n {
322        for j in 0..n {
323            aug[[i, j]] = mat[[i, j]];
324            if i == j {
325                aug[[i, j + n]] = 1.0;
326            }
327        }
328    }
329
330    // Gauss-Jordan elimination
331    for i in 0..n {
332        // Find pivot
333        let mut max_row = i;
334        for k in (i + 1)..n {
335            if aug[[k, i]].abs() > aug[[max_row, i]].abs() {
336                max_row = k;
337            }
338        }
339
340        // Swap rows
341        if max_row != i {
342            for j in 0..(2 * n) {
343                let temp = aug[[i, j]];
344                aug[[i, j]] = aug[[max_row, j]];
345                aug[[max_row, j]] = temp;
346            }
347        }
348
349        // Check for singular matrix
350        if aug[[i, i]].abs() < 1e-10 {
351            return Err(OptimizeError::ValueError("Matrix is singular".to_string()));
352        }
353
354        // Scale pivot row
355        let pivot = aug[[i, i]];
356        for j in 0..(2 * n) {
357            aug[[i, j]] /= pivot;
358        }
359
360        // Eliminate column
361        for k in 0..n {
362            if k != i {
363                let factor = aug[[k, i]];
364                for j in 0..(2 * n) {
365                    aug[[k, j]] -= factor * aug[[i, j]];
366                }
367            }
368        }
369    }
370
371    // Extract inverse from augmented matrix
372    let mut inv = Array2::zeros((n, n));
373    for i in 0..n {
374        for j in 0..n {
375            inv[[i, j]] = aug[[i, j + n]];
376        }
377    }
378
379    Ok(inv)
380}
381
382impl UpdateFormula {
383    /// Get the name of the update formula
384    pub fn name(&self) -> &'static str {
385        match self {
386            UpdateFormula::SR1 => "SR1",
387            UpdateFormula::DFP => "DFP",
388            UpdateFormula::BFGS => "BFGS",
389        }
390    }
391}
392
393/// Convenience functions for specific quasi-Newton methods
394#[allow(dead_code)]
395pub fn minimize_sr1<F, S>(
396    fun: F,
397    x0: Array1<f64>,
398    options: &Options,
399) -> Result<OptimizeResult<S>, OptimizeError>
400where
401    F: FnMut(&ArrayView1<f64>) -> S + Clone,
402    S: Into<f64> + Clone,
403{
404    minimize_quasi_newton(fun, x0, options, UpdateFormula::SR1)
405}
406
407#[allow(dead_code)]
408pub fn minimize_dfp<F, S>(
409    fun: F,
410    x0: Array1<f64>,
411    options: &Options,
412) -> Result<OptimizeResult<S>, OptimizeError>
413where
414    F: FnMut(&ArrayView1<f64>) -> S + Clone,
415    S: Into<f64> + Clone,
416{
417    minimize_quasi_newton(fun, x0, options, UpdateFormula::DFP)
418}
419
420#[cfg(test)]
421mod tests {
422    use super::*;
423    use crate::unconstrained::Bounds;
424    use approx::assert_abs_diff_eq;
425
426    #[test]
427    fn test_sr1_quadratic() {
428        let quadratic = |x: &ArrayView1<f64>| -> f64 {
429            let a = Array2::from_shape_vec((2, 2), vec![2.0, 0.0, 0.0, 3.0]).unwrap();
430            let b = Array1::from_vec(vec![-4.0, -6.0]);
431            0.5 * x.dot(&a.dot(x)) + b.dot(x)
432        };
433
434        let x0 = Array1::from_vec(vec![0.0, 0.0]);
435        let options = Options::default();
436
437        let result = minimize_sr1(quadratic, x0, &options).unwrap();
438
439        assert!(result.success);
440        // Optimal solution: x = A^(-1) * (-b) = [2.0, 2.0]
441        assert_abs_diff_eq!(result.x[0], 2.0, epsilon = 1e-5);
442        assert_abs_diff_eq!(result.x[1], 2.0, epsilon = 1e-5);
443    }
444
445    #[test]
446    fn test_dfp_quadratic() {
447        let quadratic = |x: &ArrayView1<f64>| -> f64 {
448            let a = Array2::from_shape_vec((2, 2), vec![2.0, 0.0, 0.0, 3.0]).unwrap();
449            let b = Array1::from_vec(vec![-4.0, -6.0]);
450            0.5 * x.dot(&a.dot(x)) + b.dot(x)
451        };
452
453        let x0 = Array1::from_vec(vec![0.0, 0.0]);
454        let options = Options::default();
455
456        let result = minimize_dfp(quadratic, x0, &options).unwrap();
457
458        assert!(result.success);
459        // Optimal solution: x = A^(-1) * (-b) = [2.0, 2.0]
460        assert_abs_diff_eq!(result.x[0], 2.0, epsilon = 1e-5);
461        assert_abs_diff_eq!(result.x[1], 2.0, epsilon = 1e-5);
462    }
463
464    #[test]
465    fn test_sr1_rosenbrock() {
466        let rosenbrock = |x: &ArrayView1<f64>| -> f64 {
467            let a = 1.0;
468            let b = 100.0;
469            (a - x[0]).powi(2) + b * (x[1] - x[0].powi(2)).powi(2)
470        };
471
472        let x0 = Array1::from_vec(vec![0.0, 0.0]);
473        let mut options = Options::default();
474        options.max_iter = 5000; // More iterations for Rosenbrock with SR1
475        options.gtol = 1e-4; // Relaxed gradient tolerance for SR1
476
477        let result = minimize_sr1(rosenbrock, x0, &options).unwrap();
478
479        // SR1 can be less stable than BFGS, so we check that reasonable progress was made
480        let function_value = result.fun;
481
482        // Either converged to solution or made significant progress toward optimum
483        if (result.x[0] - 1.0).abs() < 1e-1 && (result.x[1] - 1.0).abs() < 1e-1 {
484            // Successfully converged to optimum - no assertion needed
485        } else {
486            // Should at least significantly improve from initial value (~101)
487            assert!(
488                function_value < 10.0,
489                "Function value {} should be much better than initial ~101",
490                function_value
491            );
492        }
493    }
494
495    #[test]
496    fn test_dfp_rosenbrock() {
497        let rosenbrock = |x: &ArrayView1<f64>| -> f64 {
498            let a = 1.0;
499            let b = 100.0;
500            (a - x[0]).powi(2) + b * (x[1] - x[0].powi(2)).powi(2)
501        };
502
503        let x0 = Array1::from_vec(vec![0.0, 0.0]);
504        let mut options = Options::default();
505        options.max_iter = 2000; // More iterations for Rosenbrock
506
507        let result = minimize_dfp(rosenbrock, x0, &options).unwrap();
508
509        assert!(result.success);
510        assert_abs_diff_eq!(result.x[0], 1.0, epsilon = 1e-2);
511        assert_abs_diff_eq!(result.x[1], 1.0, epsilon = 1e-2);
512    }
513
514    #[test]
515    fn test_dfp_with_bounds() {
516        let quadratic =
517            |x: &ArrayView1<f64>| -> f64 { (x[0] - 2.0).powi(2) + (x[1] - 3.0).powi(2) };
518
519        let x0 = Array1::from_vec(vec![0.0, 0.0]);
520        let mut options = Options::default();
521
522        // Constrain solution to [0, 1] x [0, 1]
523        let bounds = Bounds::new(&[(Some(0.0), Some(1.0)), (Some(0.0), Some(1.0))]);
524        options.bounds = Some(bounds);
525
526        let result = minimize_dfp(quadratic, x0, &options).unwrap();
527
528        assert!(result.success);
529        // The optimal point (2, 3) is outside the bounds, so we should get (1, 1)
530        assert_abs_diff_eq!(result.x[0], 1.0, epsilon = 1e-6);
531        assert_abs_diff_eq!(result.x[1], 1.0, epsilon = 1e-6);
532    }
533}