1use scirs2_core::ndarray::{Array1, Array2};
4use scirs2_core::random::rngs::StdRng;
5use scirs2_core::random::SeedableRng;
6use sklears_core::types::Float;
7
8pub struct LbfgsOptimizer {
10 pub max_iter: usize,
12 pub tol: Float,
14 pub m: usize,
16 pub max_ls: usize,
18}
19
20impl Default for LbfgsOptimizer {
21 fn default() -> Self {
22 Self {
23 max_iter: 100,
24 tol: 1e-4,
25 m: 10,
26 max_ls: 20,
27 }
28 }
29}
30
31impl LbfgsOptimizer {
32 pub fn minimize<F, G>(
34 &self,
35 f: F,
36 grad_f: G,
37 x0: Array1<Float>,
38 ) -> Result<Array1<Float>, String>
39 where
40 F: Fn(&Array1<Float>) -> Float,
41 G: Fn(&Array1<Float>) -> Array1<Float>,
42 {
43 let _n = x0.len();
44 let mut x = x0;
45 let mut f_k = f(&x);
46 let mut g_k = grad_f(&x);
47
48 let mut s_list: Vec<Array1<Float>> = Vec::with_capacity(self.m);
50 let mut y_list: Vec<Array1<Float>> = Vec::with_capacity(self.m);
51 let mut rho_list: Vec<Float> = Vec::with_capacity(self.m);
52
53 for _iter in 0..self.max_iter {
54 let g_norm = g_k.mapv(Float::abs).sum();
56 if g_norm < self.tol {
57 return Ok(x);
58 }
59
60 let p = self.compute_direction(&g_k, &s_list, &y_list, &rho_list);
62
63 let alpha = self.line_search(&f, &x, &p, f_k, &g_k)?;
65
66 let x_new = &x + alpha * &p;
68 let f_new = f(&x_new);
69 let g_new = grad_f(&x_new);
70
71 let s = &x_new - &x;
73 let y = &g_new - &g_k;
74
75 let rho = 1.0 / s.dot(&y).max(1e-10);
76
77 if s_list.len() == self.m {
79 s_list.remove(0);
80 y_list.remove(0);
81 rho_list.remove(0);
82 }
83
84 s_list.push(s);
85 y_list.push(y);
86 rho_list.push(rho);
87
88 x = x_new;
90 f_k = f_new;
91 g_k = g_new;
92 }
93
94 Ok(x)
95 }
96
97 fn compute_direction(
99 &self,
100 g: &Array1<Float>,
101 s_list: &[Array1<Float>],
102 y_list: &[Array1<Float>],
103 rho_list: &[Float],
104 ) -> Array1<Float> {
105 let mut q = g.clone();
106 let k = s_list.len();
107 let mut alpha = vec![0.0; k];
108
109 for i in (0..k).rev() {
111 alpha[i] = rho_list[i] * s_list[i].dot(&q);
112 q = q - alpha[i] * &y_list[i];
113 }
114
115 let mut r = if k > 0 {
117 let gamma = s_list[k - 1].dot(&y_list[k - 1]) / y_list[k - 1].dot(&y_list[k - 1]);
118 gamma * q
119 } else {
120 q
121 };
122
123 for i in 0..k {
125 let beta = rho_list[i] * y_list[i].dot(&r);
126 r = r + (alpha[i] - beta) * &s_list[i];
127 }
128
129 -r
130 }
131
132 fn line_search<F>(
134 &self,
135 f: &F,
136 x: &Array1<Float>,
137 p: &Array1<Float>,
138 f_k: Float,
139 g_k: &Array1<Float>,
140 ) -> Result<Float, String>
141 where
142 F: Fn(&Array1<Float>) -> Float,
143 {
144 let c1 = 1e-4;
145 let rho = 0.5;
146 let mut alpha = 1.0;
147
148 let gp = g_k.dot(p);
149
150 for _ in 0..self.max_ls {
151 let x_new = x + alpha * p;
152 let f_new = f(&x_new);
153
154 if f_new <= f_k + c1 * alpha * gp {
156 return Ok(alpha);
157 }
158
159 alpha *= rho;
160 }
161
162 Err("Line search failed to find suitable step size".to_string())
163 }
164}
165
166pub struct SagOptimizer {
168 pub max_epochs: usize,
170 pub tol: Float,
172 pub learning_rate: Option<Float>,
174 pub random_state: Option<u64>,
176}
177
178impl Default for SagOptimizer {
179 fn default() -> Self {
180 Self {
181 max_epochs: 100,
182 tol: 1e-4,
183 learning_rate: None,
184 random_state: None,
185 }
186 }
187}
188
189impl SagOptimizer {
190 pub fn minimize<F, G>(
194 &self,
195 _f_i: F,
196 grad_f_i: G,
197 x0: Array1<Float>,
198 n_samples: usize,
199 ) -> Result<Array1<Float>, String>
200 where
201 F: Fn(&Array1<Float>, usize) -> Float,
202 G: Fn(&Array1<Float>, usize) -> Array1<Float>,
203 {
204 let n_features = x0.len();
205 let mut x = x0;
206
207 let mut gradient_memory = Array2::zeros((n_samples, n_features));
209 let mut gradient_sum = Array1::zeros(n_features);
210 let mut seen = vec![false; n_samples];
211
212 let alpha = self.learning_rate.unwrap_or(0.01);
214
215 let mut rng = match self.random_state {
217 Some(seed) => StdRng::seed_from_u64(seed),
218 None => StdRng::from_rng(&mut scirs2_core::random::thread_rng()),
219 };
220
221 for _epoch in 0..self.max_epochs {
222 let mut indices: Vec<usize> = (0..n_samples).collect();
224 use scirs2_core::random::SliceRandomExt;
225 indices.shuffle(&mut rng);
226
227 for &i in &indices {
228 let grad_i = grad_f_i(&x, i);
230
231 if seen[i] {
233 gradient_sum = &gradient_sum - &gradient_memory.row(i) + &grad_i;
234 } else {
235 gradient_sum = &gradient_sum + &grad_i;
236 seen[i] = true;
237 }
238
239 gradient_memory.row_mut(i).assign(&grad_i);
241
242 x = &x - alpha * &gradient_sum / n_samples as Float;
244 }
245
246 let avg_grad_norm = gradient_sum.mapv(Float::abs).sum() / n_samples as Float;
248 if avg_grad_norm < self.tol {
249 return Ok(x);
250 }
251 }
252
253 Ok(x)
254 }
255}
256
257pub struct SagaOptimizer {
259 pub max_epochs: usize,
261 pub tol: Float,
263 pub learning_rate: Option<Float>,
265 pub random_state: Option<u64>,
267}
268
269impl Default for SagaOptimizer {
270 fn default() -> Self {
271 Self {
272 max_epochs: 100,
273 tol: 1e-4,
274 learning_rate: None,
275 random_state: None,
276 }
277 }
278}
279
280impl SagaOptimizer {
281 pub fn minimize<F, GradF, ProxG>(
284 &self,
285 _f_i: F,
286 grad_f_i: GradF,
287 prox_g: ProxG,
288 x0: Array1<Float>,
289 n_samples: usize,
290 ) -> Result<Array1<Float>, String>
291 where
292 F: Fn(&Array1<Float>, usize) -> Float,
293 GradF: Fn(&Array1<Float>, usize) -> Array1<Float>,
294 ProxG: Fn(&Array1<Float>, Float) -> Array1<Float>,
295 {
296 let n_features = x0.len();
297 let mut x = x0;
298
299 let mut gradient_memory = Array2::zeros((n_samples, n_features));
301 let mut gradient_avg = Array1::zeros(n_features);
302
303 for i in 0..n_samples {
305 let grad_i = grad_f_i(&x, i);
306 gradient_memory.row_mut(i).assign(&grad_i);
307 gradient_avg = &gradient_avg + &grad_i / n_samples as Float;
308 }
309
310 let alpha = self.learning_rate.unwrap_or(0.01);
312
313 let mut rng = match self.random_state {
315 Some(seed) => StdRng::seed_from_u64(seed),
316 None => StdRng::from_rng(&mut scirs2_core::random::thread_rng()),
317 };
318
319 for _epoch in 0..self.max_epochs {
320 let mut indices: Vec<usize> = (0..n_samples).collect();
322 use scirs2_core::random::SliceRandomExt;
323 indices.shuffle(&mut rng);
324
325 for &i in &indices {
326 let grad_old = gradient_memory.row(i).to_owned();
328
329 let grad_new = grad_f_i(&x, i);
331
332 gradient_memory.row_mut(i).assign(&grad_new);
334 gradient_avg = &gradient_avg + (&grad_new - &grad_old) / n_samples as Float;
335
336 let v = &grad_new - &grad_old + &gradient_avg;
338 let x_intermediate = &x - alpha * &v;
339
340 x = prox_g(&x_intermediate, alpha);
342 }
343
344 let grad_norm = gradient_avg.mapv(Float::abs).sum();
346 if grad_norm < self.tol {
347 return Ok(x);
348 }
349 }
350
351 Ok(x)
352 }
353}
354
355pub struct ProximalGradientOptimizer {
357 pub max_iter: usize,
359 pub tol: Float,
361 pub step_size: Option<Float>,
363 pub use_line_search: bool,
365 pub accelerated: bool,
367 pub beta: Float,
369 pub sigma: Float,
370}
371
372impl Default for ProximalGradientOptimizer {
373 fn default() -> Self {
374 Self {
375 max_iter: 1000,
376 tol: 1e-6,
377 step_size: None,
378 use_line_search: true,
379 accelerated: false,
380 beta: 0.5,
381 sigma: 0.01,
382 }
383 }
384}
385
386impl ProximalGradientOptimizer {
387 pub fn accelerated() -> Self {
389 Self {
390 accelerated: true,
391 ..Default::default()
392 }
393 }
394
395 pub fn minimize<F, GradF, ProxG>(
398 &self,
399 f: F,
400 grad_f: GradF,
401 prox_g: ProxG,
402 x0: Array1<Float>,
403 ) -> Result<Array1<Float>, String>
404 where
405 F: Fn(&Array1<Float>) -> Float,
406 GradF: Fn(&Array1<Float>) -> Array1<Float>,
407 ProxG: Fn(&Array1<Float>, Float) -> Array1<Float>,
408 {
409 let mut x = x0.clone();
410 let mut y = x0; let mut t = 1.0; let mut step_size = self.step_size.unwrap_or(1.0);
415
416 let mut f_prev = f(&x);
417
418 for iter in 0..self.max_iter {
419 let current_point = if self.accelerated { &y } else { &x };
421
422 let grad = grad_f(current_point);
424
425 if self.use_line_search {
427 step_size = self.backtracking_line_search(
428 &f,
429 &grad_f,
430 &prox_g,
431 current_point,
432 &grad,
433 step_size,
434 )?;
435 }
436
437 let x_new = prox_g(&(current_point - step_size * &grad), step_size);
439
440 let diff_norm = (&x_new - &x).mapv(Float::abs).sum();
442 if diff_norm < self.tol {
443 return Ok(x_new);
444 }
445
446 if self.accelerated {
448 let t_new: Float = (1.0_f64 + (1.0_f64 + 4.0_f64 * t * t).sqrt()) / 2.0_f64;
449 let beta = (t - 1.0) / t_new;
450 y = &x_new + beta * (&x_new - &x);
451 t = t_new;
452 }
453
454 x = x_new;
455
456 let f_curr = f(&x);
458 if (f_curr - f_prev).abs() < self.tol * f_prev.abs().max(1.0) && iter > 10 {
459 return Ok(x);
461 }
462 f_prev = f_curr;
463 }
464
465 Ok(x)
466 }
467
468 fn backtracking_line_search<F, GradF, ProxG>(
470 &self,
471 f: &F,
472 _grad_f: &GradF,
473 prox_g: &ProxG,
474 x: &Array1<Float>,
475 grad: &Array1<Float>,
476 mut step_size: Float,
477 ) -> Result<Float, String>
478 where
479 F: Fn(&Array1<Float>) -> Float,
480 GradF: Fn(&Array1<Float>) -> Array1<Float>,
481 ProxG: Fn(&Array1<Float>, Float) -> Array1<Float>,
482 {
483 let f_x = f(x);
484 let max_iter = 50;
485
486 for _ in 0..max_iter {
487 let x_new = prox_g(&(x - step_size * grad), step_size);
489 let f_new = f(&x_new);
490
491 let diff = &x_new - x;
493 let quad_approx = f_x + grad.dot(&diff) + diff.dot(&diff) / (2.0 * step_size);
494
495 if f_new <= quad_approx + self.sigma * step_size * grad.dot(grad) {
496 return Ok(step_size);
497 }
498
499 step_size *= self.beta;
500
501 if step_size < 1e-16 {
502 return Err("Step size became too small in line search".to_string());
503 }
504 }
505
506 Ok(step_size) }
508}
509
510pub type FistaOptimizer = ProximalGradientOptimizer;
512
513pub struct NesterovAcceleratedGradient {
515 pub max_iter: usize,
517 pub tol: Float,
519 pub learning_rate: Float,
521 pub adaptive_lr: bool,
523 pub momentum: Float,
525 pub lr_decay: Float,
527}
528
529impl Default for NesterovAcceleratedGradient {
530 fn default() -> Self {
531 Self {
532 max_iter: 1000,
533 tol: 1e-6,
534 learning_rate: 0.01,
535 adaptive_lr: false,
536 momentum: 0.9,
537 lr_decay: 0.999,
538 }
539 }
540}
541
542impl NesterovAcceleratedGradient {
543 pub fn adaptive() -> Self {
545 Self {
546 adaptive_lr: true,
547 ..Default::default()
548 }
549 }
550
551 pub fn minimize<F, GradF>(
553 &self,
554 f: F,
555 grad_f: GradF,
556 x0: Array1<Float>,
557 ) -> Result<Array1<Float>, String>
558 where
559 F: Fn(&Array1<Float>) -> Float,
560 GradF: Fn(&Array1<Float>) -> Array1<Float>,
561 {
562 let mut x = x0.clone();
563 let mut v = Array1::zeros(x0.len()); let mut lr = self.learning_rate;
565
566 let mut f_prev = f(&x);
567
568 for iter in 0..self.max_iter {
569 let y = &x + self.momentum * &v;
571
572 let grad = grad_f(&y);
574
575 let grad_norm = grad.mapv(Float::abs).sum();
577 if grad_norm < self.tol {
578 return Ok(x);
579 }
580
581 v = self.momentum * &v - lr * &grad;
583 x = &x + &v;
584
585 if self.adaptive_lr {
587 let f_curr = f(&x);
588 if f_curr > f_prev + 1e-10 {
589 lr *= 0.9;
591 x = &x - &v; v = Array1::zeros(x0.len()); continue;
594 } else if f_curr < f_prev - 0.01 * lr * grad_norm {
595 lr = (lr * 1.005).min(self.learning_rate * 1.5); }
598
599 if (f_curr - f_prev).abs() < self.tol * f_prev.abs().max(1.0) {
601 return Ok(x);
602 }
603 f_prev = f_curr;
604 } else {
605 lr *= self.lr_decay;
607
608 if iter % 10 == 0 {
610 let f_curr = f(&x);
611 if (f_curr - f_prev).abs() < self.tol * f_prev.abs().max(1.0) {
612 return Ok(x);
613 }
614 f_prev = f_curr;
615 }
616 }
617 }
618
619 Ok(x)
620 }
621}
622
623pub mod proximal {
625 use super::*;
626
627 pub fn prox_l1(x: &Array1<Float>, alpha_lambda: Float) -> Array1<Float> {
629 x.mapv(|xi| soft_threshold(xi, alpha_lambda))
630 }
631
632 pub fn prox_l2(x: &Array1<Float>, alpha_lambda: Float) -> Array1<Float> {
634 x / (1.0 + alpha_lambda)
635 }
636
637 pub fn prox_elastic_net(x: &Array1<Float>, alpha: Float, l1_ratio: Float) -> Array1<Float> {
639 let l1_prox = prox_l1(x, alpha * l1_ratio);
640 prox_l2(&l1_prox, alpha * (1.0 - l1_ratio))
641 }
642
643 #[inline]
645 fn soft_threshold(x: Float, lambda: Float) -> Float {
646 if x > lambda {
647 x - lambda
648 } else if x < -lambda {
649 x + lambda
650 } else {
651 0.0
652 }
653 }
654}
655
656#[allow(non_snake_case)]
657#[cfg(test)]
658mod tests {
659 use super::*;
660 use approx::assert_abs_diff_eq;
661 use scirs2_core::ndarray::array;
662
663 #[test]
664 fn test_lbfgs_quadratic() {
665 let f = |x: &Array1<Float>| -> Float {
670 0.5 * (2.0 * x[0] * x[0] + 4.0 * x[1] * x[1]) - x[0] - 2.0 * x[1]
671 };
672
673 let grad_f =
674 |x: &Array1<Float>| -> Array1<Float> { array![2.0 * x[0] - 1.0, 4.0 * x[1] - 2.0] };
675
676 let optimizer = LbfgsOptimizer::default();
677 let x0 = array![0.0, 0.0];
678
679 let result = optimizer.minimize(f, grad_f, x0).unwrap();
680
681 assert_abs_diff_eq!(result[0], 0.5, epsilon = 1e-6);
682 assert_abs_diff_eq!(result[1], 0.5, epsilon = 1e-6);
683 }
684
685 #[test]
686 fn test_proximal_operators() {
687 use proximal::*;
688
689 let x = array![2.0, -1.5, 0.5, -0.3];
690
691 let prox_x = prox_l1(&x, 0.5);
693 assert_abs_diff_eq!(prox_x[0], 1.5);
694 assert_abs_diff_eq!(prox_x[1], -1.0);
695 assert_abs_diff_eq!(prox_x[2], 0.0);
696 assert_abs_diff_eq!(prox_x[3], 0.0);
697
698 let prox_x = prox_l2(&x, 0.5);
700 assert_abs_diff_eq!(prox_x[0], 2.0 / 1.5);
701 assert_abs_diff_eq!(prox_x[1], -1.5 / 1.5);
702 }
703
704 #[test]
705 fn test_proximal_gradient_lasso() {
706 use proximal::*;
707
708 let a = array![[1.0, 0.0], [0.0, 1.0]];
713 let b = array![2.0, 1.0];
714 let lambda = 0.5;
715
716 let f = |x: &Array1<Float>| -> Float {
717 let residual = a.dot(x) - &b;
718 0.5 * residual.dot(&residual)
719 };
720
721 let grad_f = |x: &Array1<Float>| -> Array1<Float> {
722 let residual = a.dot(x) - &b;
723 a.t().dot(&residual)
724 };
725
726 let prox_g = |x: &Array1<Float>, t: Float| -> Array1<Float> { prox_l1(x, lambda * t) };
727
728 let optimizer = ProximalGradientOptimizer::default();
729 let x0 = array![0.0, 0.0];
730
731 let result = optimizer.minimize(f, grad_f, prox_g, x0).unwrap();
732
733 assert!(result[0] > 1.0 && result[0] < 2.0);
735 assert!(result[1] > 0.0 && result[1] < 1.0);
736 }
737
738 #[test]
739 fn test_fista_accelerated() {
740 use proximal::*;
741
742 let a = array![[1.0, 0.0], [0.0, 1.0]];
744 let b = array![2.0, 1.0];
745 let lambda = 0.5;
746
747 let f = |x: &Array1<Float>| -> Float {
748 let residual = a.dot(x) - &b;
749 0.5 * residual.dot(&residual)
750 };
751
752 let grad_f = |x: &Array1<Float>| -> Array1<Float> {
753 let residual = a.dot(x) - &b;
754 a.t().dot(&residual)
755 };
756
757 let prox_g = |x: &Array1<Float>, t: Float| -> Array1<Float> { prox_l1(x, lambda * t) };
758
759 let optimizer = ProximalGradientOptimizer::accelerated();
760 let x0 = array![0.0, 0.0];
761
762 let result = optimizer.minimize(f, grad_f, prox_g, x0).unwrap();
763
764 assert!(result[0] > 1.0 && result[0] < 2.0);
766 assert!(result[1] > 0.0 && result[1] < 1.0);
767 }
768
769 #[test]
770 fn test_nesterov_accelerated_gradient() {
771 let f = |x: &Array1<Float>| -> Float {
776 0.5 * (4.0 * x[0] * x[0] + x[1] * x[1]) - 2.0 * x[0] - x[1]
777 };
778
779 let grad_f = |x: &Array1<Float>| -> Array1<Float> { array![4.0 * x[0] - 2.0, x[1] - 1.0] };
780
781 let optimizer = NesterovAcceleratedGradient {
782 learning_rate: 0.1,
783 max_iter: 100,
784 tol: 1e-8,
785 ..Default::default()
786 };
787 let x0 = array![0.0, 0.0];
788
789 let result = optimizer.minimize(f, grad_f, x0).unwrap();
790
791 assert_abs_diff_eq!(result[0], 0.5, epsilon = 1e-4);
792 assert_abs_diff_eq!(result[1], 1.0, epsilon = 1e-4);
793 }
794
795 #[test]
796 fn test_nesterov_adaptive() {
797 let f = |x: &Array1<Float>| -> Float { x[0] * x[0] + x[1] * x[1] };
800 let grad_f = |x: &Array1<Float>| -> Array1<Float> { array![2.0 * x[0], 2.0 * x[1]] };
801
802 let mut optimizer = NesterovAcceleratedGradient::adaptive();
803 optimizer.max_iter = 500; optimizer.tol = 1e-3; optimizer.learning_rate = 0.01; optimizer.momentum = 0.9; let x0 = array![1.0, 1.0]; let result = optimizer.minimize(f, grad_f, x0).unwrap();
810
811 assert_abs_diff_eq!(result[0], 0.0, epsilon = 0.1);
813 assert_abs_diff_eq!(result[1], 0.0, epsilon = 0.1);
814 }
815}