scirs2_optimize/unconstrained/
lbfgs.rs1use crate::error::OptimizeError;
4use crate::unconstrained::result::OptimizeResult;
5use crate::unconstrained::{Bounds, Options};
6use scirs2_core::ndarray::{Array1, ArrayView1};
7
8#[allow(dead_code)]
10pub fn minimize_lbfgsb<F, S>(
11 mut fun: F,
12 x0: Array1<f64>,
13 options: &Options,
14) -> Result<OptimizeResult<S>, OptimizeError>
15where
16 F: FnMut(&ArrayView1<f64>) -> S,
17 S: Into<f64> + Clone,
18{
19 let m = 10; let factr = 1e7; let pgtol = options.gtol;
23 let max_iter = options.max_iter;
24 let eps = options.eps;
25 let bounds = options.bounds.as_ref();
26
27 let eps_mach = 2.22e-16;
29 let ftol = factr * eps_mach;
30
31 let n = x0.len();
33 let mut x = x0.to_owned();
34
35 if let Some(bounds) = bounds {
37 bounds.project(x.as_slice_mut().unwrap());
38 }
39
40 let mut nfev = 0;
42
43 nfev += 1;
45 let mut f = fun(&x.view()).into();
46
47 let mut g = Array1::zeros(n);
49 calculate_gradient(&mut fun, &x, &mut g, eps, bounds, &mut nfev);
50
51 let mut iter = 0;
53 let mut converged = false;
54
55 let mut s_vectors: Vec<Array1<f64>> = Vec::with_capacity(m);
57 let mut y_vectors: Vec<Array1<f64>> = Vec::with_capacity(m);
58 let mut rho_values: Vec<f64> = Vec::with_capacity(m);
59
60 while iter < max_iter {
62 let g_old = g.clone();
64 let f_old = f;
65
66 let mut free_vars = vec![true; n];
68 if let Some(bounds) = bounds {
69 for i in 0..n {
70 if let Some(lb) = bounds.lower[i] {
73 if (x[i] - lb).abs() < 1e-10 && g[i] > 0.0 {
74 free_vars[i] = false;
75 continue;
76 }
77 }
78 if let Some(ub) = bounds.upper[i] {
81 if (x[i] - ub).abs() < 1e-10 && g[i] < 0.0 {
82 free_vars[i] = false;
83 continue;
84 }
85 }
86 }
87 }
88
89 let mut search_direction = Array1::zeros(n);
91
92 for i in 0..n {
94 if free_vars[i] {
95 search_direction[i] = -g[i];
96 }
97 }
98
99 let mut alpha_values = Vec::with_capacity(s_vectors.len());
101
102 for i in (0..s_vectors.len()).rev() {
104 let rho_i = rho_values[i];
105 let s_i = &s_vectors[i];
106 let y_i = &y_vectors[i];
107
108 let alpha_i = rho_i * s_i.dot(&search_direction);
109 alpha_values.push(alpha_i);
110
111 search_direction = &search_direction - &(alpha_i * y_i);
112 }
113
114 if !s_vectors.is_empty() {
116 let y_last = &y_vectors[s_vectors.len() - 1];
117 let s_last = &s_vectors[s_vectors.len() - 1];
118
119 let ys = y_last.dot(s_last);
120 let yy = y_last.dot(y_last);
121
122 if ys > 0.0 && yy > 0.0 {
123 let gamma = ys / yy;
124 search_direction = &search_direction * gamma;
125 }
126 }
127
128 for (i, &alpha_i) in alpha_values.iter().enumerate() {
130 let idx = s_vectors.len() - 1 - i;
131 let rho_i = rho_values[idx];
132 let s_i = &s_vectors[idx];
133 let y_i = &y_vectors[idx];
134
135 let beta_i = rho_i * y_i.dot(&search_direction);
136 search_direction = &search_direction + &(s_i * (alpha_i - beta_i));
137 }
138
139 search_direction = -search_direction;
141
142 for i in 0..n {
144 if !free_vars[i] {
145 search_direction[i] = 0.0;
146 }
147 }
148
149 if search_direction.dot(&search_direction) < 1e-10 {
151 for i in 0..n {
152 search_direction[i] = -g[i];
153 }
154 project_direction(&mut search_direction, &x, bounds);
155 }
156
157 let (alpha, f_new) =
159 lbfgsb_line_search(&mut fun, &x, &search_direction, f, bounds, &mut nfev);
160
161 if alpha < 1e-10 {
163 converged = true;
164 break;
165 }
166
167 let mut x_new = &x + &(&search_direction * alpha);
169
170 if let Some(bounds) = bounds {
172 let mut x_new_vec = x_new.to_vec();
173 bounds.project(&mut x_new_vec);
174 x_new = Array1::from_vec(x_new_vec);
175 }
176
177 calculate_gradient(&mut fun, &x_new, &mut g, eps, bounds, &mut nfev);
179
180 let s_k = &x_new - &x;
182 let y_k = &g - &g_old;
183
184 let s_dot_y = s_k.dot(&y_k);
186
187 if s_dot_y > 0.0 {
188 if s_vectors.len() == m {
190 s_vectors.remove(0);
192 y_vectors.remove(0);
193 rho_values.remove(0);
194 }
195
196 s_vectors.push(s_k);
198 y_vectors.push(y_k);
199 rho_values.push(1.0 / s_dot_y);
200 }
201
202 x = x_new;
204 f = f_new;
205
206 let pg_norm = projected_gradient_norm(&x, &g, bounds);
209
210 if pg_norm < pgtol {
212 converged = true;
213 break;
214 }
215
216 let f_change = (f_old - f).abs();
218 if f_change < ftol * (1.0 + f.abs()) {
219 converged = true;
220 break;
221 }
222
223 iter += 1;
224 }
225
226 let final_fun = fun(&x.view());
228
229 Ok(OptimizeResult {
231 x,
232 fun: final_fun,
233 nit: iter,
234 func_evals: nfev,
235 nfev,
236 success: converged,
237 message: if converged {
238 "Optimization terminated successfully.".to_string()
239 } else {
240 "Maximum iterations reached.".to_string()
241 },
242 jacobian: Some(g),
243 hessian: None,
244 })
245}
246
247#[allow(dead_code)]
249pub fn minimize_lbfgs<F, S>(
250 mut fun: F,
251 x0: Array1<f64>,
252 options: &Options,
253) -> Result<OptimizeResult<S>, OptimizeError>
254where
255 F: FnMut(&ArrayView1<f64>) -> S,
256 S: Into<f64> + Clone,
257{
258 let m = 10; let ftol = options.ftol;
261 let gtol = options.gtol;
262 let max_iter = options.max_iter;
263 let eps = options.eps;
264
265 let n = x0.len();
267 let mut x = x0.to_owned();
268
269 let mut nfev = 0;
271
272 nfev += 1;
274 let mut f = fun(&x.view()).into();
275
276 let mut g = Array1::zeros(n);
278
279 let mut g_old = Array1::zeros(n);
281 calculate_gradient(&mut fun, &x, &mut g, eps, None, &mut nfev);
282
283 let mut iter = 0;
285 let mut converged = false;
286
287 let mut s_vectors: Vec<Array1<f64>> = Vec::with_capacity(m);
289 let mut y_vectors: Vec<Array1<f64>> = Vec::with_capacity(m);
290 let mut rho_values: Vec<f64> = Vec::with_capacity(m);
291
292 while iter < max_iter {
294 let g_norm = g.dot(&g).sqrt();
296 if g_norm < gtol {
297 converged = true;
298 break;
299 }
300
301 g_old.assign(&g);
303 let f_old = f;
304
305 let mut search_direction = g.clone();
308
309 let mut alpha_values = Vec::with_capacity(s_vectors.len());
311
312 for i in (0..s_vectors.len()).rev() {
314 let rho_i = rho_values[i];
315 let s_i = &s_vectors[i];
316 let y_i = &y_vectors[i];
317
318 let alpha_i = rho_i * s_i.dot(&search_direction);
319 alpha_values.push(alpha_i);
320
321 search_direction = &search_direction - &(alpha_i * y_i);
322 }
323
324 if !s_vectors.is_empty() {
326 let y_last = &y_vectors[s_vectors.len() - 1];
327 let s_last = &s_vectors[s_vectors.len() - 1];
328
329 let ys = y_last.dot(s_last);
330 let yy = y_last.dot(y_last);
331
332 if ys > 0.0 && yy > 0.0 {
333 let gamma = ys / yy;
334 search_direction = &search_direction * gamma;
335 }
336 }
337
338 for (i, &alpha_i) in alpha_values.iter().enumerate() {
340 let idx = s_vectors.len() - 1 - i;
341 let rho_i = rho_values[idx];
342 let s_i = &s_vectors[idx];
343 let y_i = &y_vectors[idx];
344
345 let beta_i = rho_i * y_i.dot(&search_direction);
346 search_direction = &search_direction + &(s_i * (alpha_i - beta_i));
347 }
348
349 search_direction = -search_direction;
351
352 let c1 = 1e-4; let initial_steps = [1.0, 0.5, 0.1, 0.01, 0.001];
357 let mut found_good_step = false;
358 let mut alpha;
359 let mut x_new = x.clone();
360 let mut f_new = f;
361
362 let g_dot_p = g.dot(&search_direction);
364
365 if g_dot_p < 0.0 {
367 for &init_alpha in &initial_steps {
368 alpha = init_alpha;
369 x_new = &x + &(&search_direction * alpha);
370 f_new = fun(&x_new.view()).into();
371 nfev += 1;
372
373 if f_new < f {
375 found_good_step = true;
376 break;
377 }
378
379 let rho = 0.5; let mut backtrack_iter = 0;
382
383 while f_new > f + c1 * alpha * g_dot_p && backtrack_iter < 16 {
384 alpha *= rho;
385 x_new = &x + &(&search_direction * alpha);
386 f_new = fun(&x_new.view()).into();
387 nfev += 1;
388 backtrack_iter += 1;
389
390 if f_new < f {
391 found_good_step = true;
392 break;
393 }
394 }
395
396 if found_good_step {
397 break;
398 }
399 }
400 }
401
402 if !found_good_step {
404 let small_step = 1e-4;
406 search_direction = -g.clone();
407 alpha = small_step / g.dot(&g).sqrt();
408 x_new = &x + &(&search_direction * alpha);
409 f_new = fun(&x_new.view()).into();
410 nfev += 1;
411 }
412
413 let s_k = &x_new - &x;
415
416 if s_k.iter().all(|&si| si.abs() < 1e-10) {
418 x = x_new;
419 converged = true;
420 break;
421 }
422
423 x = x_new;
425
426 calculate_gradient(&mut fun, &x, &mut g, eps, None, &mut nfev);
428
429 let y_k = &g - &g_old;
431
432 let s_dot_y = s_k.dot(&y_k);
434
435 if s_dot_y > 0.0 {
436 if s_vectors.len() == m {
438 s_vectors.remove(0);
440 y_vectors.remove(0);
441 rho_values.remove(0);
442 }
443
444 s_vectors.push(s_k);
446 y_vectors.push(y_k);
447 rho_values.push(1.0 / s_dot_y);
448 }
449
450 f = f_new;
452
453 let f_change = (f_old - f).abs();
455 if f_change < ftol * (1.0 + f.abs()) {
456 converged = true;
457 break;
458 }
459
460 iter += 1;
461 }
462
463 let final_fun = fun(&x.view());
465
466 Ok(OptimizeResult {
468 x,
469 fun: final_fun,
470 nit: iter,
471 func_evals: nfev,
472 nfev,
473 success: converged,
474 message: if converged {
475 "Optimization terminated successfully.".to_string()
476 } else {
477 "Maximum iterations reached.".to_string()
478 },
479 jacobian: Some(g),
480 hessian: None,
481 })
482}
483
484#[allow(dead_code)]
486fn calculate_gradient<F, S>(
487 fun: &mut F,
488 x: &Array1<f64>,
489 g: &mut Array1<f64>,
490 eps: f64,
491 bounds: Option<&Bounds>,
492 nfev: &mut usize,
493) where
494 F: FnMut(&ArrayView1<f64>) -> S,
495 S: Into<f64>,
496{
497 let n = x.len();
498 let f_x = fun(&x.view()).into();
499 *nfev += 1;
500
501 for i in 0..n {
502 let mut x_h = x.clone();
504
505 if let Some(bounds) = bounds {
507 let eps_i = eps * (1.0 + x[i].abs());
508 if let Some(ub) = bounds.upper[i] {
509 if x[i] >= ub - eps_i {
510 x_h[i] = x[i] - eps_i;
512 *nfev += 1;
513 let f_h = fun(&x_h.view()).into();
514 g[i] = (f_x - f_h) / eps_i;
515 continue;
516 }
517 }
518 if let Some(lb) = bounds.lower[i] {
519 if x[i] <= lb + eps_i {
520 x_h[i] = x[i] + eps_i;
522 *nfev += 1;
523 let f_h = fun(&x_h.view()).into();
524 g[i] = (f_h - f_x) / eps_i;
525 continue;
526 }
527 }
528 }
529
530 let eps_i = eps * (1.0 + x[i].abs());
532 x_h[i] = x[i] + eps_i;
533 *nfev += 1;
534 let f_p = fun(&x_h.view()).into();
535
536 x_h[i] = x[i] - eps_i;
537 *nfev += 1;
538 let f_m = fun(&x_h.view()).into();
539
540 g[i] = (f_p - f_m) / (2.0 * eps_i);
541 }
542}
543
544#[allow(dead_code)]
547fn projected_gradient_norm(x: &Array1<f64>, g: &Array1<f64>, bounds: Option<&Bounds>) -> f64 {
548 let n = x.len();
549 let mut pg = Array1::zeros(n);
550
551 for i in 0..n {
552 let xi = x[i];
553 let gi = g[i];
554
555 if let Some(bounds) = bounds {
556 if let Some(lb) = bounds.lower[i] {
558 if (xi - lb).abs() < 1e-10 && gi > 0.0 {
559 pg[i] = 0.0;
561 continue;
562 }
563 }
564
565 if let Some(ub) = bounds.upper[i] {
566 if (xi - ub).abs() < 1e-10 && gi < 0.0 {
567 pg[i] = 0.0;
569 continue;
570 }
571 }
572 }
573
574 pg[i] = gi;
576 }
577
578 pg.mapv(|pgi| pgi.powi(2)).sum().sqrt()
580}
581
582#[allow(dead_code)]
585fn project_direction(direction: &mut Array1<f64>, x: &Array1<f64>, bounds: Option<&Bounds>) {
586 if bounds.is_none() {
587 return; }
589
590 let bounds = bounds.unwrap();
591
592 for i in 0..x.len() {
593 let xi = x[i];
594
595 if let Some(lb) = bounds.lower[i] {
597 if (xi - lb).abs() < 1e-10 && direction[i] < 0.0 {
598 direction[i] = 0.0;
600 }
601 }
602
603 if let Some(ub) = bounds.upper[i] {
604 if (xi - ub).abs() < 1e-10 && direction[i] > 0.0 {
605 direction[i] = 0.0;
607 }
608 }
609 }
610}
611
612#[allow(dead_code)]
614fn lbfgsb_line_search<F, S>(
615 fun: &mut F,
616 x: &Array1<f64>,
617 direction: &Array1<f64>,
618 f_x: f64,
619 bounds: Option<&Bounds>,
620 nfev: &mut usize,
621) -> (f64, f64)
622where
623 F: FnMut(&ArrayView1<f64>) -> S,
624 S: Into<f64>,
625{
626 let (a_min, a_max) = compute_line_bounds(x, direction, bounds);
628
629 let c1 = 1e-4; let rho = 0.5; let mut alpha = if a_max < 1.0 { a_max * 0.99 } else { 1.0 };
635
636 if a_max <= 0.0 || a_min >= a_max {
638 alpha = if a_max > 0.0 { a_max } else { 0.0 };
639 let x_new = x + alpha * direction;
640 *nfev += 1;
641 let f_new = fun(&x_new.view()).into();
642 return (alpha, f_new);
643 }
644
645 let slope = -direction.mapv(|di| di.powi(2)).sum();
647
648 let mut f_line = |alpha: f64| {
650 let mut x_new = x + alpha * direction;
651
652 if let Some(bounds) = bounds {
654 bounds.project(x_new.as_slice_mut().unwrap());
655 }
656
657 *nfev += 1;
658 fun(&x_new.view()).into()
659 };
660
661 let mut f_new = f_line(alpha);
663
664 while f_new > f_x + c1 * alpha * slope && alpha > a_min + 1e-16 {
666 alpha *= rho;
667
668 if alpha < a_min {
670 alpha = a_min;
671 }
672
673 f_new = f_line(alpha);
674
675 if alpha < 1e-10 {
677 break;
678 }
679 }
680
681 (alpha, f_new)
682}
683
684#[allow(dead_code)]
686fn compute_line_bounds(
687 x: &Array1<f64>,
688 direction: &Array1<f64>,
689 bounds: Option<&Bounds>,
690) -> (f64, f64) {
691 if bounds.is_none() {
692 return (f64::NEG_INFINITY, f64::INFINITY);
693 }
694
695 let bounds = bounds.unwrap();
696 let mut a_min = f64::NEG_INFINITY;
697 let mut a_max = f64::INFINITY;
698
699 for i in 0..x.len() {
700 let xi = x[i];
701 let di = direction[i];
702
703 if di.abs() < 1e-16 {
704 continue;
705 }
706
707 if let Some(lb) = bounds.lower[i] {
709 let a_lb = (lb - xi) / di;
710 if di > 0.0 {
711 a_min = a_min.max(a_lb);
712 } else {
713 a_max = a_max.min(a_lb);
714 }
715 }
716
717 if let Some(ub) = bounds.upper[i] {
719 let a_ub = (ub - xi) / di;
720 if di > 0.0 {
721 a_max = a_max.min(a_ub);
722 } else {
723 a_min = a_min.max(a_ub);
724 }
725 }
726 }
727
728 if a_min > a_max {
729 (0.0, 0.0)
730 } else {
731 (a_min, a_max)
732 }
733}
734
735#[cfg(test)]
736mod tests {
737 use super::*;
738 use approx::assert_abs_diff_eq;
739
740 #[test]
741 fn test_lbfgs_quadratic() {
742 let quadratic = |x: &ArrayView1<f64>| -> f64 { x[0] * x[0] + 4.0 * x[1] * x[1] };
743
744 let x0 = Array1::from_vec(vec![2.0, 1.0]);
745 let mut options = Options::default();
746 options.max_iter = 100; let result = minimize_lbfgs(quadratic, x0, &options).unwrap();
749
750 assert!(result.success);
751 assert_abs_diff_eq!(result.x[0], 0.0, epsilon = 1e-2);
752 assert_abs_diff_eq!(result.x[1], 0.0, epsilon = 2e-1);
753 }
754
755 #[test]
756 fn test_lbfgsb_with_bounds() {
757 let quadratic =
758 |x: &ArrayView1<f64>| -> f64 { (x[0] - 2.0).powi(2) + (x[1] - 3.0).powi(2) };
759
760 let x0 = Array1::from_vec(vec![0.0, 0.0]);
761 let mut options = Options::default();
762
763 let bounds = Bounds::new(&[(Some(0.0), Some(1.0)), (Some(0.0), Some(1.0))]);
765 options.bounds = Some(bounds);
766
767 let result = minimize_lbfgsb(quadratic, x0, &options).unwrap();
768
769 assert!(result.x[0] >= 0.0 && result.x[0] <= 1.0);
771 assert!(result.x[1] >= 0.0 && result.x[1] <= 1.0);
772
773 assert!(
776 result.x[0] >= 0.9 || result.x[0].abs() < 0.1,
777 "x[0] = {} should be near 0 or 1",
778 result.x[0]
779 );
780 assert!(
781 result.x[1] >= 0.9 || result.x[1].abs() < 0.1,
782 "x[1] = {} should be near 0 or 1",
783 result.x[1]
784 );
785 }
786}