1use crate::error::OptimizeError;
8use crate::unconstrained::result::OptimizeResult;
9use scirs2_core::ndarray::{Array1, ArrayView1, ArrayView2, ScalarOperand};
10use scirs2_core::numeric::Float;
11use scirs2_core::random::seq::SliceRandom;
12use std::collections::HashMap;
13
14#[derive(Debug, Clone)]
16pub struct LassoOptimizer<F: Float> {
17 pub lambda: F,
19 pub learning_rate: F,
21 pub max_iter: usize,
23 pub tol: F,
25 pub accelerated: bool,
27}
28
29impl<F: Float + ScalarOperand> LassoOptimizer<F> {
30 pub fn new(lambda: F, learning_rate: F) -> Self {
32 Self {
33 lambda,
34 learning_rate,
35 max_iter: 1000,
36 tol: F::from(1e-6).unwrap(),
37 accelerated: false,
38 }
39 }
40
41 pub fn fista(lambda: F, learning_rate: F) -> Self {
43 Self {
44 lambda,
45 learning_rate,
46 max_iter: 1000,
47 tol: F::from(1e-6).unwrap(),
48 accelerated: true,
49 }
50 }
51
52 fn soft_threshold(&self, x: F, threshold: F) -> F {
54 if x > threshold {
55 x - threshold
56 } else if x < -threshold {
57 x + threshold
58 } else {
59 F::zero()
60 }
61 }
62
63 fn prox_l1(&self, x: &Array1<F>, step_size: F) -> Array1<F> {
65 let threshold = self.lambda * step_size;
66 x.mapv(|xi| self.soft_threshold(xi, threshold))
67 }
68
69 pub fn minimize<G>(
71 &self,
72 mut grad_fn: G,
73 x0: &Array1<F>,
74 ) -> Result<OptimizeResult<F>, OptimizeError>
75 where
76 G: FnMut(&ArrayView1<F>) -> Array1<F>,
77 F: Into<f64> + Copy,
78 {
79 let mut x = x0.clone();
80 let mut y = x0.clone(); let mut t = F::one(); let _prev_loss = F::infinity();
84
85 for iter in 0..self.max_iter {
86 let grad = if self.accelerated {
88 grad_fn(&y.view())
89 } else {
90 grad_fn(&x.view())
91 };
92
93 let x_new = if self.accelerated {
95 &y - &(&grad * self.learning_rate)
96 } else {
97 &x - &(&grad * self.learning_rate)
98 };
99
100 let x_prox = self.prox_l1(&x_new, self.learning_rate);
102
103 if self.accelerated {
105 let t_new = (F::one() + (F::one() + F::from(4).unwrap() * t * t).sqrt())
106 / F::from(2).unwrap();
107 let beta = (t - F::one()) / t_new;
108 y = &x_prox + &((&x_prox - &x) * beta);
109 t = t_new;
110 }
111
112 let change = (&x_prox - &x).mapv(|xi| xi.abs()).sum();
114 if change < self.tol {
115 return Ok(OptimizeResult {
116 x: x_prox.mapv(|v| v.into()),
117 fun: F::zero(),
118 nit: iter,
119 func_evals: iter,
120 nfev: iter,
121 success: true,
122 message: "Optimization terminated successfully.".to_string(),
123 jacobian: Some(grad.mapv(|v| v.into())),
124 hessian: None,
125 });
126 }
127
128 x = x_prox;
129 }
130
131 Err(OptimizeError::ConvergenceError(
132 "Maximum iterations reached".to_string(),
133 ))
134 }
135}
136
137#[derive(Debug, Clone)]
139pub struct GroupLassoOptimizer<F: Float> {
140 pub lambda: F,
142 pub learning_rate: F,
144 pub max_iter: usize,
146 pub tol: F,
148 pub groups: Vec<usize>,
150}
151
152impl<F: Float + ScalarOperand> GroupLassoOptimizer<F> {
153 pub fn new(lambda: F, learning_rate: F, groups: Vec<usize>) -> Self {
155 Self {
156 lambda,
157 learning_rate,
158 max_iter: 1000,
159 tol: F::from(1e-6).unwrap(),
160 groups,
161 }
162 }
163
164 fn prox_group_l1(&self, x: &Array1<F>, step_size: F) -> Array1<F> {
166 let mut result = x.clone();
167 let threshold = self.lambda * step_size;
168
169 let mut groups_map: HashMap<usize, Vec<usize>> = HashMap::new();
171 for (feature_idx, &group_idx) in self.groups.iter().enumerate() {
172 groups_map.entry(group_idx).or_default().push(feature_idx);
173 }
174
175 for (_, feature_indices) in groups_map {
177 let group_norm = feature_indices
179 .iter()
180 .map(|&idx| x[idx] * x[idx])
181 .fold(F::zero(), |acc, x| acc + x)
182 .sqrt();
183
184 if group_norm > threshold {
185 let scale = (group_norm - threshold) / group_norm;
186 for &idx in &feature_indices {
187 result[idx] = result[idx] * scale;
188 }
189 } else {
190 for &idx in &feature_indices {
192 result[idx] = F::zero();
193 }
194 }
195 }
196
197 result
198 }
199
200 pub fn minimize<G>(
202 &self,
203 mut grad_fn: G,
204 x0: &Array1<F>,
205 ) -> Result<OptimizeResult<F>, OptimizeError>
206 where
207 G: FnMut(&ArrayView1<F>) -> Array1<F>,
208 F: Into<f64> + Copy,
209 {
210 let mut x = x0.clone();
211
212 for iter in 0..self.max_iter {
213 let grad = grad_fn(&x.view());
215
216 let x_new = &x - &(&grad * self.learning_rate);
218
219 let x_prox = self.prox_group_l1(&x_new, self.learning_rate);
221
222 let change = (&x_prox - &x).mapv(|xi| xi.abs()).sum();
224 if change < self.tol {
225 return Ok(OptimizeResult {
226 x: x_prox.mapv(|v| v.into()),
227 fun: F::zero(),
228 nit: iter,
229 func_evals: iter,
230 nfev: iter,
231 success: true,
232 message: "Optimization terminated successfully.".to_string(),
233 jacobian: Some(grad.mapv(|v| v.into())),
234 hessian: None,
235 });
236 }
237
238 x = x_prox;
239 }
240
241 Err(OptimizeError::ConvergenceError(
242 "Maximum iterations reached".to_string(),
243 ))
244 }
245}
246
247#[derive(Debug, Clone)]
249pub struct ElasticNetOptimizer<F: Float> {
250 pub lambda1: F,
252 pub lambda2: F,
254 pub learning_rate: F,
256 pub max_iter: usize,
258 pub tol: F,
260}
261
262impl<F: Float + ScalarOperand> ElasticNetOptimizer<F> {
263 pub fn new(lambda1: F, lambda2: F, learning_rate: F) -> Self {
265 Self {
266 lambda1,
267 lambda2,
268 learning_rate,
269 max_iter: 1000,
270 tol: F::from(1e-6).unwrap(),
271 }
272 }
273
274 fn prox_elastic_net(&self, x: &Array1<F>, step_size: F) -> Array1<F> {
276 let l1_threshold = self.lambda1 * step_size;
277 let l2_factor = F::one() / (F::one() + self.lambda2 * step_size);
278
279 x.mapv(|xi| {
280 let soft_thresh = if xi > l1_threshold {
281 xi - l1_threshold
282 } else if xi < -l1_threshold {
283 xi + l1_threshold
284 } else {
285 F::zero()
286 };
287 soft_thresh * l2_factor
288 })
289 }
290
291 pub fn minimize<G>(
293 &self,
294 mut grad_fn: G,
295 x0: &Array1<F>,
296 ) -> Result<OptimizeResult<F>, OptimizeError>
297 where
298 G: FnMut(&ArrayView1<F>) -> Array1<F>,
299 F: Into<f64> + Copy,
300 {
301 let mut x = x0.clone();
302
303 for iter in 0..self.max_iter {
304 let grad = grad_fn(&x.view());
306
307 let x_new = &x - &(&grad * self.learning_rate);
309
310 let x_prox = self.prox_elastic_net(&x_new, self.learning_rate);
312
313 let change = (&x_prox - &x).mapv(|xi| xi.abs()).sum();
315 if change < self.tol {
316 return Ok(OptimizeResult {
317 x: x_prox.mapv(|v| v.into()),
318 fun: F::zero(),
319 nit: iter,
320 func_evals: iter,
321 nfev: iter,
322 success: true,
323 message: "Optimization terminated successfully.".to_string(),
324 jacobian: Some(grad.mapv(|v| v.into())),
325 hessian: None,
326 });
327 }
328
329 x = x_prox;
330 }
331
332 Err(OptimizeError::ConvergenceError(
333 "Maximum iterations reached".to_string(),
334 ))
335 }
336}
337
338#[derive(Debug, Clone)]
340pub struct ADMMOptimizer<F: Float> {
341 pub rho: F,
343 pub eps_pri: F,
345 pub eps_dual: F,
347 pub max_iter: usize,
349}
350
351impl<F: Float + ScalarOperand> ADMMOptimizer<F> {
352 pub fn new(rho: F) -> Self {
354 Self {
355 rho,
356 eps_pri: F::from(1e-3).unwrap(),
357 eps_dual: F::from(1e-3).unwrap(),
358 max_iter: 1000,
359 }
360 }
361
362 pub fn solve_lasso<LossGrad, Data>(
364 &self,
365 loss_grad: LossGrad,
366 lambda: F,
367 x0: &Array1<F>,
368 data: &Data,
369 ) -> Result<OptimizeResult<F>, OptimizeError>
370 where
371 LossGrad: Fn(&ArrayView1<F>, &Data) -> Array1<F>,
372 Data: Clone,
373 F: Into<f64> + Copy,
374 {
375 let n = x0.len();
376 let mut x = x0.clone();
377 let mut z = Array1::zeros(n);
378 let mut u = Array1::zeros(n); for iter in 0..self.max_iter {
381 let grad_loss = loss_grad(&x.view(), data);
383 let grad_augmented = &grad_loss + &((&x - &z + &u) * self.rho);
384
385 let grad_norm = grad_augmented.mapv(|g| g * g).sum().sqrt();
387 let lr = if grad_norm > F::epsilon() {
388 F::one() / (F::one() + self.rho)
389 } else {
390 F::from(0.1).unwrap()
391 };
392 x = &x - &(&grad_augmented * lr);
393
394 let z_old = z.clone();
396 let threshold = lambda / self.rho;
397 z = (&x + &u).mapv(|xi| {
398 if xi > threshold {
399 xi - threshold
400 } else if xi < -threshold {
401 xi + threshold
402 } else {
403 F::zero()
404 }
405 });
406
407 u = &u + &x - &z;
409
410 let r_norm = (&x - &z).mapv(|xi| xi * xi).sum().sqrt(); let s_norm = ((&z - &z_old) * self.rho).mapv(|xi| xi * xi).sum().sqrt(); let x_norm = x.mapv(|xi| xi * xi).sum().sqrt();
415 let z_norm = z.mapv(|xi| xi * xi).sum().sqrt();
416 let u_norm = u.mapv(|xi| xi * xi).sum().sqrt();
417
418 let eps_pri_thresh =
419 self.eps_pri * (F::sqrt(F::from(n).unwrap()) + F::max(x_norm, z_norm));
420 let eps_dual_thresh = self.eps_dual * F::sqrt(F::from(n).unwrap()) * self.rho * u_norm;
421
422 if r_norm < eps_pri_thresh && s_norm < eps_dual_thresh {
423 return Ok(OptimizeResult {
424 x: x.mapv(|v| v.into()),
425 fun: F::zero(),
426 nit: iter,
427 func_evals: iter,
428 nfev: iter,
429 success: true,
430 message: "ADMM converged successfully.".to_string(),
431 jacobian: Some(grad_loss.mapv(|v| v.into())),
432 hessian: None,
433 });
434 }
435 }
436
437 Err(OptimizeError::ConvergenceError(
438 "Maximum iterations reached".to_string(),
439 ))
440 }
441}
442
443#[derive(Debug, Clone)]
445pub struct CoordinateDescentOptimizer<F: Float> {
446 pub max_iter: usize,
448 pub tol: F,
450 pub random: bool,
452}
453
454impl<F: Float + ScalarOperand> Default for CoordinateDescentOptimizer<F> {
455 fn default() -> Self {
456 Self {
457 max_iter: 1000,
458 tol: F::from(1e-6).unwrap(),
459 random: false,
460 }
461 }
462}
463
464impl<F: Float + ScalarOperand> CoordinateDescentOptimizer<F> {
465 pub fn new() -> Self {
467 Self::default()
468 }
469
470 pub fn random() -> Self {
472 Self {
473 max_iter: 1000,
474 tol: F::from(1e-6).unwrap(),
475 random: true,
476 }
477 }
478
479 pub fn minimize<Obj, Grad1D>(
481 &self,
482 obj_fn: Obj,
483 grad_1d_fn: Grad1D,
484 x0: &Array1<F>,
485 ) -> Result<OptimizeResult<F>, OptimizeError>
486 where
487 Obj: Fn(&ArrayView1<F>) -> F,
488 Grad1D: Fn(&ArrayView1<F>, usize) -> F, F: Into<f64> + Copy,
490 {
491 let mut x = x0.clone();
492 let n = x.len();
493
494 for iter in 0..self.max_iter {
495 let x_old = x.clone();
496
497 let coords: Vec<usize> = if self.random {
499 use scirs2_core::random::{rng, seq::SliceRandom};
500 let mut coords: Vec<usize> = (0..n).collect();
501 coords.shuffle(&mut scirs2_core::random::rng());
502 coords
503 } else {
504 (0..n).collect()
505 };
506
507 for &i in &coords {
509 let grad_i = grad_1d_fn(&x.view(), i);
510
511 let lr = F::from(0.01).unwrap();
513 x[i] = x[i] - lr * grad_i;
514 }
515
516 let change = (&x - &x_old).mapv(|xi| xi.abs()).sum();
518 if change < self.tol {
519 let final_obj = obj_fn(&x.view());
520 return Ok(OptimizeResult {
521 x: x.mapv(|v| v.into()),
522 fun: final_obj,
523 nit: iter,
524 func_evals: iter * n,
525 nfev: iter * n,
526 success: true,
527 message: "Coordinate descent converged successfully.".to_string(),
528 jacobian: Some(Array1::zeros(n)), hessian: None,
530 });
531 }
532 }
533
534 Err(OptimizeError::ConvergenceError(
535 "Maximum iterations reached".to_string(),
536 ))
537 }
538}
539
540pub mod ml_problems {
542 use super::*;
543
544 pub fn lasso_regression<F: Float + ScalarOperand + Into<f64> + Copy>(
546 a: &ArrayView2<F>,
547 b: &ArrayView1<F>,
548 lambda: F,
549 learning_rate: Option<F>,
550 ) -> Result<OptimizeResult<F>, OptimizeError> {
551 let lr = learning_rate.unwrap_or_else(|| F::from(0.01).unwrap());
552 let optimizer = LassoOptimizer::new(lambda, lr);
553
554 let n = a.ncols();
555 let x0 = Array1::zeros(n);
556
557 let grad_fn = |x: &ArrayView1<F>| -> Array1<F> {
558 let residual = a.dot(x) - b;
559 a.t().dot(&residual) * F::from(2).unwrap()
560 };
561
562 optimizer.minimize(grad_fn, &x0)
563 }
564
565 pub fn group_lasso_regression<F: Float + ScalarOperand + Into<f64> + Copy>(
567 a: &ArrayView2<F>,
568 b: &ArrayView1<F>,
569 lambda: F,
570 groups: Vec<usize>,
571 learning_rate: Option<F>,
572 ) -> Result<OptimizeResult<F>, OptimizeError> {
573 let lr = learning_rate.unwrap_or_else(|| F::from(0.01).unwrap());
574 let optimizer = GroupLassoOptimizer::new(lambda, lr, groups);
575
576 let n = a.ncols();
577 let x0 = Array1::zeros(n);
578
579 let grad_fn = |x: &ArrayView1<F>| -> Array1<F> {
580 let residual = a.dot(x) - b;
581 a.t().dot(&residual) * F::from(2).unwrap()
582 };
583
584 optimizer.minimize(grad_fn, &x0)
585 }
586
587 pub fn elastic_net_regression<F: Float + ScalarOperand + Into<f64> + Copy>(
589 a: &ArrayView2<F>,
590 b: &ArrayView1<F>,
591 lambda1: F,
592 lambda2: F,
593 learning_rate: Option<F>,
594 ) -> Result<OptimizeResult<F>, OptimizeError> {
595 let lr = learning_rate.unwrap_or_else(|| F::from(0.01).unwrap());
596 let optimizer = ElasticNetOptimizer::new(lambda1, lambda2, lr);
597
598 let n = a.ncols();
599 let x0 = Array1::zeros(n);
600
601 let grad_fn = |x: &ArrayView1<F>| -> Array1<F> {
602 let residual = a.dot(x) - b;
603 a.t().dot(&residual) * F::from(2).unwrap()
604 };
605
606 optimizer.minimize(grad_fn, &x0)
607 }
608}
609
610#[cfg(test)]
611mod tests {
612 use super::*;
613 use approx::assert_abs_diff_eq;
614 use scirs2_core::ndarray::array;
615
616 #[test]
617 fn test_lasso_soft_threshold() {
618 let optimizer = LassoOptimizer::new(1.0, 0.1);
619
620 assert_abs_diff_eq!(optimizer.soft_threshold(2.0, 1.0), 1.0, epsilon = 1e-10);
622 assert_abs_diff_eq!(optimizer.soft_threshold(-2.0, 1.0), -1.0, epsilon = 1e-10);
623 assert_abs_diff_eq!(optimizer.soft_threshold(0.5, 1.0), 0.0, epsilon = 1e-10);
624 }
625
626 #[test]
627 fn test_lasso_prox_operator() {
628 let optimizer = LassoOptimizer::new(1.0, 0.1);
629 let x = array![2.0, -2.0, 0.5, -0.5];
630 let result = optimizer.prox_l1(&x, 1.0);
631
632 assert_abs_diff_eq!(result[0], 1.0, epsilon = 1e-10);
633 assert_abs_diff_eq!(result[1], -1.0, epsilon = 1e-10);
634 assert_abs_diff_eq!(result[2], 0.0, epsilon = 1e-10);
635 assert_abs_diff_eq!(result[3], 0.0, epsilon = 1e-10);
636 }
637
638 #[test]
639 fn test_elastic_net_prox_operator() {
640 let optimizer = ElasticNetOptimizer::new(1.0, 1.0, 0.1);
641 let x = array![3.0, -3.0, 0.5];
642 let result = optimizer.prox_elastic_net(&x, 1.0);
643
644 assert!(result[0] > 0.0 && result[0] < 2.0);
646 assert!(result[1] < 0.0 && result[1] > -2.0);
647 assert_abs_diff_eq!(result[2], 0.0, epsilon = 1e-10);
648 }
649
650 #[test]
651 fn test_group_lasso_simple() {
652 let optimizer = GroupLassoOptimizer::new(1.0, 0.1, vec![0, 0, 1, 1]);
653 let x = array![1.0, 1.0, 0.1, 0.1];
654 let result = optimizer.prox_group_l1(&x, 1.0);
655
656 assert!(result[0] > 0.0 && result[0] < 1.0);
658 assert!(result[1] > 0.0 && result[1] < 1.0);
659
660 assert_abs_diff_eq!(result[2], 0.0, epsilon = 1e-10);
662 assert_abs_diff_eq!(result[3], 0.0, epsilon = 1e-10);
663 }
664
665 #[test]
666 fn test_coordinate_descent_optimizer() {
667 let optimizer = CoordinateDescentOptimizer::new();
668
669 let obj_fn = |x: &ArrayView1<f64>| x.mapv(|xi| xi * xi).sum();
671 let grad_1d_fn = |x: &ArrayView1<f64>, i: usize| 2.0 * x[i];
672
673 let x0 = array![1.0, 1.0];
674 let result = optimizer.minimize(obj_fn, grad_1d_fn, &x0).unwrap();
675
676 assert!(result.x[0].abs() < 0.1);
678 assert!(result.x[1].abs() < 0.1);
679 assert!(result.success);
680 }
681
682 #[test]
683 fn test_admm_optimizer() {
684 let optimizer = ADMMOptimizer::new(1.0);
685
686 let loss_grad = |x: &ArrayView1<f64>, _data: &()| x.mapv(|xi| 2.0 * xi);
688
689 let x0 = array![2.0];
690 let lambda = 1.0;
691 let result = optimizer.solve_lasso(loss_grad, lambda, &x0, &()).unwrap();
692
693 assert!(result.x[0].abs() < 1.0); assert!(result.success);
696 }
697
698 #[test]
699 fn test_ml_lasso_regression() {
700 let a = array![[1.0, 0.0], [1.0, 0.0], [1.0, 0.0]];
702 let b = array![1.0, 1.1, 0.9];
703 let lambda = 0.1;
704
705 let result = ml_problems::lasso_regression(&a.view(), &b.view(), lambda, None).unwrap();
706
707 assert!(result.x[0] > 0.5);
709 assert!(result.x[1].abs() < 0.1);
710 }
711}