scirs2_optimize/unconstrained/
quasi_newton.rs1use crate::error::OptimizeError;
4use crate::unconstrained::line_search::backtracking_line_search;
5use crate::unconstrained::result::OptimizeResult;
6use crate::unconstrained::utils::{array_diff_norm, check_convergence, finite_difference_gradient};
7use crate::unconstrained::Options;
8use scirs2_core::ndarray::{Array1, Array2, ArrayView1, Axis};
9
10#[derive(Debug, Clone, Copy)]
12pub enum UpdateFormula {
13 SR1,
15 DFP,
17 BFGS,
19}
20
21#[allow(dead_code)]
23pub fn minimize_quasi_newton<F, S>(
24 mut fun: F,
25 x0: Array1<f64>,
26 options: &Options,
27 update_formula: UpdateFormula,
28) -> Result<OptimizeResult<S>, OptimizeError>
29where
30 F: FnMut(&ArrayView1<f64>) -> S + Clone,
31 S: Into<f64> + Clone,
32{
33 let ftol = options.ftol;
35 let gtol = options.gtol;
36 let max_iter = options.max_iter;
37 let eps = options.eps;
38 let bounds = options.bounds.as_ref();
39
40 let n = x0.len();
42 let mut x = x0.to_owned();
43
44 if let Some(bounds) = bounds {
46 bounds.project(x.as_slice_mut().unwrap());
47 }
48
49 let mut f = fun(&x.view()).into();
50
51 let mut g = finite_difference_gradient(&mut fun, &x.view(), eps)?;
53
54 let mut h_inv: Option<Array2<f64>> = None;
56 let mut b_mat: Option<Array2<f64>> = None;
57
58 match update_formula {
59 UpdateFormula::SR1 => {
60 b_mat = Some(Array2::eye(n));
62 }
63 UpdateFormula::DFP | UpdateFormula::BFGS => {
64 h_inv = Some(Array2::eye(n));
66 }
67 }
68
69 let mut iter = 0;
71 let mut nfev = 1 + n; while iter < max_iter {
75 if g.mapv(|gi| gi.abs()).sum() < gtol {
77 break;
78 }
79
80 let mut p = match update_formula {
82 UpdateFormula::SR1 => {
83 let b = b_mat.as_ref().unwrap();
86 match invert_matrix(b) {
89 Ok(b_inv) => -b_inv.dot(&g),
90 Err(_) => {
91 b_mat = Some(Array2::eye(n));
93 -&g
94 }
95 }
96 }
97 UpdateFormula::DFP | UpdateFormula::BFGS => {
98 let h = h_inv.as_ref().unwrap();
99 -h.dot(&g)
100 }
101 };
102
103 if let Some(bounds) = bounds {
105 for i in 0..n {
106 let mut can_decrease = true;
107 let mut can_increase = true;
108
109 if let Some(lb) = bounds.lower[i] {
111 if x[i] <= lb + eps {
112 can_decrease = false;
113 }
114 }
115 if let Some(ub) = bounds.upper[i] {
116 if x[i] >= ub - eps {
117 can_increase = false;
118 }
119 }
120
121 if (g[i] > 0.0 && !can_decrease) || (g[i] < 0.0 && !can_increase) {
123 p[i] = 0.0;
124 }
125 }
126
127 if p.mapv(|pi| pi.abs()).sum() < 1e-10 {
129 break;
130 }
131 }
132
133 let alpha_init = 1.0;
135 let (alpha, f_new) = backtracking_line_search(
136 &mut fun,
137 &x.view(),
138 f,
139 &p.view(),
140 &g.view(),
141 alpha_init,
142 0.0001,
143 0.5,
144 bounds,
145 );
146
147 nfev += 1; let s = alpha * &p;
151 let x_new = &x + &s;
152
153 if array_diff_norm(&x_new.view(), &x.view()) < options.xtol {
155 x = x_new;
156 break;
157 }
158
159 let g_new = finite_difference_gradient(&mut fun, &x_new.view(), eps)?;
161 nfev += n;
162
163 let y = &g_new - &g;
165
166 if check_convergence(
168 f - f_new,
169 0.0,
170 g_new.mapv(|x| x.abs()).sum(),
171 ftol,
172 0.0,
173 gtol,
174 ) {
175 x = x_new;
176 g = g_new;
177 break;
178 }
179
180 match update_formula {
182 UpdateFormula::SR1 => {
183 update_sr1(&mut b_mat, &s, &y);
184 }
185 UpdateFormula::DFP => {
186 update_dfp(&mut h_inv, &s, &y);
187 }
188 UpdateFormula::BFGS => {
189 update_bfgs(&mut h_inv, &s, &y, n);
190 }
191 }
192
193 x = x_new;
195 f = f_new;
196 g = g_new;
197
198 iter += 1;
199 }
200
201 if let Some(bounds) = bounds {
203 bounds.project(x.as_slice_mut().unwrap());
204 }
205
206 let final_fun = fun(&x.view());
208
209 Ok(OptimizeResult {
211 x,
212 fun: final_fun,
213 nit: iter,
214 func_evals: nfev,
215 nfev,
216 success: iter < max_iter,
217 message: if iter < max_iter {
218 format!(
219 "Optimization terminated successfully using {} update.",
220 update_formula.name()
221 )
222 } else {
223 "Maximum iterations reached.".to_string()
224 },
225 jacobian: Some(g),
226 hessian: None,
227 })
228}
229
230#[allow(dead_code)]
232fn update_sr1(b_mat: &mut Option<Array2<f64>>, s: &Array1<f64>, y: &Array1<f64>) {
233 if let Some(b) = b_mat.as_mut() {
234 let bs = b.dot(s);
235 let v = y - &bs;
236 let denom = v.dot(s);
237
238 let v_norm = v.mapv(|x| x.powi(2)).sum().sqrt();
240 let s_norm = s.mapv(|x| x.powi(2)).sum().sqrt();
241 if denom.abs() > 1e-8 * v_norm * s_norm {
242 let v_col = v.clone().insert_axis(Axis(1));
243 let v_row = v.clone().insert_axis(Axis(0));
244 *b = &*b + (v_col.dot(&v_row) / denom);
245 }
246 }
247}
248
249#[allow(dead_code)]
251fn update_dfp(h_inv: &mut Option<Array2<f64>>, s: &Array1<f64>, y: &Array1<f64>) {
252 if let Some(h) = h_inv.as_mut() {
253 let s_dot_y = s.dot(y);
254
255 if s_dot_y > 1e-10 {
256 let hy = h.dot(y);
258 let ythy = y.dot(&hy);
259
260 if ythy > 1e-10 {
261 let hy_col = hy.clone().insert_axis(Axis(1));
263 let hy_row = hy.clone().insert_axis(Axis(0));
264 let term1 = hy_col.dot(&hy_row) / ythy;
265
266 let s_col = s.clone().insert_axis(Axis(1));
268 let s_row = s.clone().insert_axis(Axis(0));
269 let term2 = s_col.dot(&s_row) / s_dot_y;
270
271 *h = &*h - &term1 + &term2;
272 }
273 }
274 }
275}
276
277#[allow(dead_code)]
279fn update_bfgs(h_inv: &mut Option<Array2<f64>>, s: &Array1<f64>, y: &Array1<f64>, n: usize) {
280 if let Some(h) = h_inv.as_mut() {
281 let s_dot_y = s.dot(y);
282
283 if s_dot_y > 1e-10 {
284 let rho = 1.0 / s_dot_y;
285 let i_mat = Array2::eye(n);
286
287 let y_col = y.clone().insert_axis(Axis(1));
289 let s_row = s.clone().insert_axis(Axis(0));
290 let y_s_t = y_col.dot(&s_row);
291 let term1 = &i_mat - &(&y_s_t * rho);
292
293 let s_col = s.clone().insert_axis(Axis(1));
295 let y_row = y.clone().insert_axis(Axis(0));
296 let s_y_t = s_col.dot(&y_row);
297 let term2 = &i_mat - &(&s_y_t * rho);
298
299 let term3 = term1.dot(h);
301 *h = term3.dot(&term2) + rho * s_col.dot(&s_row);
302 }
303 }
304}
305
306#[allow(dead_code)]
308fn invert_matrix(mat: &Array2<f64>) -> Result<Array2<f64>, OptimizeError> {
309 let n = mat.nrows();
310 if n != mat.ncols() {
311 return Err(OptimizeError::ValueError(
312 "Matrix must be square".to_string(),
313 ));
314 }
315
316 let mut aug = Array2::zeros((n, 2 * n));
319
320 for i in 0..n {
322 for j in 0..n {
323 aug[[i, j]] = mat[[i, j]];
324 if i == j {
325 aug[[i, j + n]] = 1.0;
326 }
327 }
328 }
329
330 for i in 0..n {
332 let mut max_row = i;
334 for k in (i + 1)..n {
335 if aug[[k, i]].abs() > aug[[max_row, i]].abs() {
336 max_row = k;
337 }
338 }
339
340 if max_row != i {
342 for j in 0..(2 * n) {
343 let temp = aug[[i, j]];
344 aug[[i, j]] = aug[[max_row, j]];
345 aug[[max_row, j]] = temp;
346 }
347 }
348
349 if aug[[i, i]].abs() < 1e-10 {
351 return Err(OptimizeError::ValueError("Matrix is singular".to_string()));
352 }
353
354 let pivot = aug[[i, i]];
356 for j in 0..(2 * n) {
357 aug[[i, j]] /= pivot;
358 }
359
360 for k in 0..n {
362 if k != i {
363 let factor = aug[[k, i]];
364 for j in 0..(2 * n) {
365 aug[[k, j]] -= factor * aug[[i, j]];
366 }
367 }
368 }
369 }
370
371 let mut inv = Array2::zeros((n, n));
373 for i in 0..n {
374 for j in 0..n {
375 inv[[i, j]] = aug[[i, j + n]];
376 }
377 }
378
379 Ok(inv)
380}
381
382impl UpdateFormula {
383 pub fn name(&self) -> &'static str {
385 match self {
386 UpdateFormula::SR1 => "SR1",
387 UpdateFormula::DFP => "DFP",
388 UpdateFormula::BFGS => "BFGS",
389 }
390 }
391}
392
393#[allow(dead_code)]
395pub fn minimize_sr1<F, S>(
396 fun: F,
397 x0: Array1<f64>,
398 options: &Options,
399) -> Result<OptimizeResult<S>, OptimizeError>
400where
401 F: FnMut(&ArrayView1<f64>) -> S + Clone,
402 S: Into<f64> + Clone,
403{
404 minimize_quasi_newton(fun, x0, options, UpdateFormula::SR1)
405}
406
407#[allow(dead_code)]
408pub fn minimize_dfp<F, S>(
409 fun: F,
410 x0: Array1<f64>,
411 options: &Options,
412) -> Result<OptimizeResult<S>, OptimizeError>
413where
414 F: FnMut(&ArrayView1<f64>) -> S + Clone,
415 S: Into<f64> + Clone,
416{
417 minimize_quasi_newton(fun, x0, options, UpdateFormula::DFP)
418}
419
420#[cfg(test)]
421mod tests {
422 use super::*;
423 use crate::unconstrained::Bounds;
424 use approx::assert_abs_diff_eq;
425
426 #[test]
427 fn test_sr1_quadratic() {
428 let quadratic = |x: &ArrayView1<f64>| -> f64 {
429 let a = Array2::from_shape_vec((2, 2), vec![2.0, 0.0, 0.0, 3.0]).unwrap();
430 let b = Array1::from_vec(vec![-4.0, -6.0]);
431 0.5 * x.dot(&a.dot(x)) + b.dot(x)
432 };
433
434 let x0 = Array1::from_vec(vec![0.0, 0.0]);
435 let options = Options::default();
436
437 let result = minimize_sr1(quadratic, x0, &options).unwrap();
438
439 assert!(result.success);
440 assert_abs_diff_eq!(result.x[0], 2.0, epsilon = 1e-5);
442 assert_abs_diff_eq!(result.x[1], 2.0, epsilon = 1e-5);
443 }
444
445 #[test]
446 fn test_dfp_quadratic() {
447 let quadratic = |x: &ArrayView1<f64>| -> f64 {
448 let a = Array2::from_shape_vec((2, 2), vec![2.0, 0.0, 0.0, 3.0]).unwrap();
449 let b = Array1::from_vec(vec![-4.0, -6.0]);
450 0.5 * x.dot(&a.dot(x)) + b.dot(x)
451 };
452
453 let x0 = Array1::from_vec(vec![0.0, 0.0]);
454 let options = Options::default();
455
456 let result = minimize_dfp(quadratic, x0, &options).unwrap();
457
458 assert!(result.success);
459 assert_abs_diff_eq!(result.x[0], 2.0, epsilon = 1e-5);
461 assert_abs_diff_eq!(result.x[1], 2.0, epsilon = 1e-5);
462 }
463
464 #[test]
465 fn test_sr1_rosenbrock() {
466 let rosenbrock = |x: &ArrayView1<f64>| -> f64 {
467 let a = 1.0;
468 let b = 100.0;
469 (a - x[0]).powi(2) + b * (x[1] - x[0].powi(2)).powi(2)
470 };
471
472 let x0 = Array1::from_vec(vec![0.0, 0.0]);
473 let mut options = Options::default();
474 options.max_iter = 5000; options.gtol = 1e-4; let result = minimize_sr1(rosenbrock, x0, &options).unwrap();
478
479 let function_value = result.fun;
481
482 if (result.x[0] - 1.0).abs() < 1e-1 && (result.x[1] - 1.0).abs() < 1e-1 {
484 } else {
486 assert!(
488 function_value < 10.0,
489 "Function value {} should be much better than initial ~101",
490 function_value
491 );
492 }
493 }
494
495 #[test]
496 fn test_dfp_rosenbrock() {
497 let rosenbrock = |x: &ArrayView1<f64>| -> f64 {
498 let a = 1.0;
499 let b = 100.0;
500 (a - x[0]).powi(2) + b * (x[1] - x[0].powi(2)).powi(2)
501 };
502
503 let x0 = Array1::from_vec(vec![0.0, 0.0]);
504 let mut options = Options::default();
505 options.max_iter = 2000; let result = minimize_dfp(rosenbrock, x0, &options).unwrap();
508
509 assert!(result.success);
510 assert_abs_diff_eq!(result.x[0], 1.0, epsilon = 1e-2);
511 assert_abs_diff_eq!(result.x[1], 1.0, epsilon = 1e-2);
512 }
513
514 #[test]
515 fn test_dfp_with_bounds() {
516 let quadratic =
517 |x: &ArrayView1<f64>| -> f64 { (x[0] - 2.0).powi(2) + (x[1] - 3.0).powi(2) };
518
519 let x0 = Array1::from_vec(vec![0.0, 0.0]);
520 let mut options = Options::default();
521
522 let bounds = Bounds::new(&[(Some(0.0), Some(1.0)), (Some(0.0), Some(1.0))]);
524 options.bounds = Some(bounds);
525
526 let result = minimize_dfp(quadratic, x0, &options).unwrap();
527
528 assert!(result.success);
529 assert_abs_diff_eq!(result.x[0], 1.0, epsilon = 1e-6);
531 assert_abs_diff_eq!(result.x[1], 1.0, epsilon = 1e-6);
532 }
533}