1use crate::error::OptimizeError;
8use crate::unconstrained::result::OptimizeResult;
9use ndarray::{Array1, ArrayView1, ArrayView2, ScalarOperand};
10use num_traits::Float;
11use std::collections::HashMap;
12
13#[derive(Debug, Clone)]
15pub struct LassoOptimizer<F: Float> {
16 pub lambda: F,
18 pub learning_rate: F,
20 pub max_iter: usize,
22 pub tol: F,
24 pub accelerated: bool,
26}
27
28impl<F: Float + ScalarOperand> LassoOptimizer<F> {
29 pub fn new(lambda: F, learning_rate: F) -> Self {
31 Self {
32 lambda,
33 learning_rate,
34 max_iter: 1000,
35 tol: F::from(1e-6).unwrap(),
36 accelerated: false,
37 }
38 }
39
40 pub fn fista(lambda: F, learning_rate: F) -> Self {
42 Self {
43 lambda,
44 learning_rate,
45 max_iter: 1000,
46 tol: F::from(1e-6).unwrap(),
47 accelerated: true,
48 }
49 }
50
51 fn soft_threshold(&self, x: F, threshold: F) -> F {
53 if x > threshold {
54 x - threshold
55 } else if x < -threshold {
56 x + threshold
57 } else {
58 F::zero()
59 }
60 }
61
62 fn prox_l1(&self, x: &Array1<F>, step_size: F) -> Array1<F> {
64 let threshold = self.lambda * step_size;
65 x.mapv(|xi| self.soft_threshold(xi, threshold))
66 }
67
68 pub fn minimize<G>(
70 &self,
71 mut grad_fn: G,
72 x0: &Array1<F>,
73 ) -> Result<OptimizeResult<F>, OptimizeError>
74 where
75 G: FnMut(&ArrayView1<F>) -> Array1<F>,
76 F: Into<f64> + Copy,
77 {
78 let mut x = x0.clone();
79 let mut y = x0.clone(); let mut t = F::one(); let _prev_loss = F::infinity();
83
84 for iter in 0..self.max_iter {
85 let grad = if self.accelerated {
87 grad_fn(&y.view())
88 } else {
89 grad_fn(&x.view())
90 };
91
92 let x_new = if self.accelerated {
94 &y - &(&grad * self.learning_rate)
95 } else {
96 &x - &(&grad * self.learning_rate)
97 };
98
99 let x_prox = self.prox_l1(&x_new, self.learning_rate);
101
102 if self.accelerated {
104 let t_new = (F::one() + (F::one() + F::from(4).unwrap() * t * t).sqrt())
105 / F::from(2).unwrap();
106 let beta = (t - F::one()) / t_new;
107 y = &x_prox + &((&x_prox - &x) * beta);
108 t = t_new;
109 }
110
111 let change = (&x_prox - &x).mapv(|xi| xi.abs()).sum();
113 if change < self.tol {
114 return Ok(OptimizeResult {
115 x: x_prox.mapv(|v| v.into()),
116 fun: F::zero(),
117 iterations: iter,
118 nit: iter,
119 func_evals: iter,
120 nfev: iter,
121 success: true,
122 message: "Optimization terminated successfully.".to_string(),
123 jacobian: Some(grad.mapv(|v| v.into())),
124 hessian: None,
125 });
126 }
127
128 x = x_prox;
129 }
130
131 Err(OptimizeError::ConvergenceError(
132 "Maximum iterations reached".to_string(),
133 ))
134 }
135}
136
137#[derive(Debug, Clone)]
139pub struct GroupLassoOptimizer<F: Float> {
140 pub lambda: F,
142 pub learning_rate: F,
144 pub max_iter: usize,
146 pub tol: F,
148 pub groups: Vec<usize>,
150}
151
152impl<F: Float + ScalarOperand> GroupLassoOptimizer<F> {
153 pub fn new(lambda: F, learning_rate: F, groups: Vec<usize>) -> Self {
155 Self {
156 lambda,
157 learning_rate,
158 max_iter: 1000,
159 tol: F::from(1e-6).unwrap(),
160 groups,
161 }
162 }
163
164 fn prox_group_l1(&self, x: &Array1<F>, step_size: F) -> Array1<F> {
166 let mut result = x.clone();
167 let threshold = self.lambda * step_size;
168
169 let mut groups_map: HashMap<usize, Vec<usize>> = HashMap::new();
171 for (feature_idx, &group_idx) in self.groups.iter().enumerate() {
172 groups_map.entry(group_idx).or_default().push(feature_idx);
173 }
174
175 for (_, feature_indices) in groups_map {
177 let group_norm = feature_indices
179 .iter()
180 .map(|&idx| x[idx] * x[idx])
181 .fold(F::zero(), |acc, x| acc + x)
182 .sqrt();
183
184 if group_norm > threshold {
185 let scale = (group_norm - threshold) / group_norm;
186 for &idx in &feature_indices {
187 result[idx] = result[idx] * scale;
188 }
189 } else {
190 for &idx in &feature_indices {
192 result[idx] = F::zero();
193 }
194 }
195 }
196
197 result
198 }
199
200 pub fn minimize<G>(
202 &self,
203 mut grad_fn: G,
204 x0: &Array1<F>,
205 ) -> Result<OptimizeResult<F>, OptimizeError>
206 where
207 G: FnMut(&ArrayView1<F>) -> Array1<F>,
208 F: Into<f64> + Copy,
209 {
210 let mut x = x0.clone();
211
212 for iter in 0..self.max_iter {
213 let grad = grad_fn(&x.view());
215
216 let x_new = &x - &(&grad * self.learning_rate);
218
219 let x_prox = self.prox_group_l1(&x_new, self.learning_rate);
221
222 let change = (&x_prox - &x).mapv(|xi| xi.abs()).sum();
224 if change < self.tol {
225 return Ok(OptimizeResult {
226 x: x_prox.mapv(|v| v.into()),
227 fun: F::zero(),
228 iterations: iter,
229 nit: iter,
230 func_evals: iter,
231 nfev: iter,
232 success: true,
233 message: "Optimization terminated successfully.".to_string(),
234 jacobian: Some(grad.mapv(|v| v.into())),
235 hessian: None,
236 });
237 }
238
239 x = x_prox;
240 }
241
242 Err(OptimizeError::ConvergenceError(
243 "Maximum iterations reached".to_string(),
244 ))
245 }
246}
247
248#[derive(Debug, Clone)]
250pub struct ElasticNetOptimizer<F: Float> {
251 pub lambda1: F,
253 pub lambda2: F,
255 pub learning_rate: F,
257 pub max_iter: usize,
259 pub tol: F,
261}
262
263impl<F: Float + ScalarOperand> ElasticNetOptimizer<F> {
264 pub fn new(lambda1: F, lambda2: F, learning_rate: F) -> Self {
266 Self {
267 lambda1,
268 lambda2,
269 learning_rate,
270 max_iter: 1000,
271 tol: F::from(1e-6).unwrap(),
272 }
273 }
274
275 fn prox_elastic_net(&self, x: &Array1<F>, step_size: F) -> Array1<F> {
277 let l1_threshold = self.lambda1 * step_size;
278 let l2_factor = F::one() / (F::one() + self.lambda2 * step_size);
279
280 x.mapv(|xi| {
281 let soft_thresh = if xi > l1_threshold {
282 xi - l1_threshold
283 } else if xi < -l1_threshold {
284 xi + l1_threshold
285 } else {
286 F::zero()
287 };
288 soft_thresh * l2_factor
289 })
290 }
291
292 pub fn minimize<G>(
294 &self,
295 mut grad_fn: G,
296 x0: &Array1<F>,
297 ) -> Result<OptimizeResult<F>, OptimizeError>
298 where
299 G: FnMut(&ArrayView1<F>) -> Array1<F>,
300 F: Into<f64> + Copy,
301 {
302 let mut x = x0.clone();
303
304 for iter in 0..self.max_iter {
305 let grad = grad_fn(&x.view());
307
308 let x_new = &x - &(&grad * self.learning_rate);
310
311 let x_prox = self.prox_elastic_net(&x_new, self.learning_rate);
313
314 let change = (&x_prox - &x).mapv(|xi| xi.abs()).sum();
316 if change < self.tol {
317 return Ok(OptimizeResult {
318 x: x_prox.mapv(|v| v.into()),
319 fun: F::zero(),
320 iterations: iter,
321 nit: iter,
322 func_evals: iter,
323 nfev: iter,
324 success: true,
325 message: "Optimization terminated successfully.".to_string(),
326 jacobian: Some(grad.mapv(|v| v.into())),
327 hessian: None,
328 });
329 }
330
331 x = x_prox;
332 }
333
334 Err(OptimizeError::ConvergenceError(
335 "Maximum iterations reached".to_string(),
336 ))
337 }
338}
339
340#[derive(Debug, Clone)]
342pub struct ADMMOptimizer<F: Float> {
343 pub rho: F,
345 pub eps_pri: F,
347 pub eps_dual: F,
349 pub max_iter: usize,
351}
352
353impl<F: Float + ScalarOperand> ADMMOptimizer<F> {
354 pub fn new(rho: F) -> Self {
356 Self {
357 rho,
358 eps_pri: F::from(1e-3).unwrap(),
359 eps_dual: F::from(1e-3).unwrap(),
360 max_iter: 1000,
361 }
362 }
363
364 pub fn solve_lasso<LossGrad, Data>(
366 &self,
367 loss_grad: LossGrad,
368 lambda: F,
369 x0: &Array1<F>,
370 data: &Data,
371 ) -> Result<OptimizeResult<F>, OptimizeError>
372 where
373 LossGrad: Fn(&ArrayView1<F>, &Data) -> Array1<F>,
374 Data: Clone,
375 F: Into<f64> + Copy,
376 {
377 let n = x0.len();
378 let mut x = x0.clone();
379 let mut z = Array1::zeros(n);
380 let mut u = Array1::zeros(n); for iter in 0..self.max_iter {
383 let grad_loss = loss_grad(&x.view(), data);
385 let grad_augmented = &grad_loss + &((&x - &z + &u) * self.rho);
386
387 let grad_norm = grad_augmented.mapv(|g| g * g).sum().sqrt();
389 let lr = if grad_norm > F::epsilon() {
390 F::one() / (F::one() + self.rho)
391 } else {
392 F::from(0.1).unwrap()
393 };
394 x = &x - &(&grad_augmented * lr);
395
396 let z_old = z.clone();
398 let threshold = lambda / self.rho;
399 z = (&x + &u).mapv(|xi| {
400 if xi > threshold {
401 xi - threshold
402 } else if xi < -threshold {
403 xi + threshold
404 } else {
405 F::zero()
406 }
407 });
408
409 u = &u + &x - &z;
411
412 let r_norm = (&x - &z).mapv(|xi| xi * xi).sum().sqrt(); let s_norm = ((&z - &z_old) * self.rho).mapv(|xi| xi * xi).sum().sqrt(); let x_norm = x.mapv(|xi| xi * xi).sum().sqrt();
417 let z_norm = z.mapv(|xi| xi * xi).sum().sqrt();
418 let u_norm = u.mapv(|xi| xi * xi).sum().sqrt();
419
420 let eps_pri_thresh =
421 self.eps_pri * (F::sqrt(F::from(n).unwrap()) + F::max(x_norm, z_norm));
422 let eps_dual_thresh = self.eps_dual * F::sqrt(F::from(n).unwrap()) * self.rho * u_norm;
423
424 if r_norm < eps_pri_thresh && s_norm < eps_dual_thresh {
425 return Ok(OptimizeResult {
426 x: x.mapv(|v| v.into()),
427 fun: F::zero(),
428 iterations: iter,
429 nit: iter,
430 func_evals: iter,
431 nfev: iter,
432 success: true,
433 message: "ADMM converged successfully.".to_string(),
434 jacobian: Some(grad_loss.mapv(|v| v.into())),
435 hessian: None,
436 });
437 }
438 }
439
440 Err(OptimizeError::ConvergenceError(
441 "Maximum iterations reached".to_string(),
442 ))
443 }
444}
445
446#[derive(Debug, Clone)]
448pub struct CoordinateDescentOptimizer<F: Float> {
449 pub max_iter: usize,
451 pub tol: F,
453 pub random: bool,
455}
456
457impl<F: Float + ScalarOperand> Default for CoordinateDescentOptimizer<F> {
458 fn default() -> Self {
459 Self {
460 max_iter: 1000,
461 tol: F::from(1e-6).unwrap(),
462 random: false,
463 }
464 }
465}
466
467impl<F: Float + ScalarOperand> CoordinateDescentOptimizer<F> {
468 pub fn new() -> Self {
470 Self::default()
471 }
472
473 pub fn random() -> Self {
475 Self {
476 max_iter: 1000,
477 tol: F::from(1e-6).unwrap(),
478 random: true,
479 }
480 }
481
482 pub fn minimize<Obj, Grad1D>(
484 &self,
485 obj_fn: Obj,
486 grad_1d_fn: Grad1D,
487 x0: &Array1<F>,
488 ) -> Result<OptimizeResult<F>, OptimizeError>
489 where
490 Obj: Fn(&ArrayView1<F>) -> F,
491 Grad1D: Fn(&ArrayView1<F>, usize) -> F, F: Into<f64> + Copy,
493 {
494 let mut x = x0.clone();
495 let n = x.len();
496
497 for iter in 0..self.max_iter {
498 let x_old = x.clone();
499
500 let coords: Vec<usize> = if self.random {
502 use rand::{rng, seq::SliceRandom};
503 let mut coords: Vec<usize> = (0..n).collect();
504 coords.shuffle(&mut rng());
505 coords
506 } else {
507 (0..n).collect()
508 };
509
510 for &i in &coords {
512 let grad_i = grad_1d_fn(&x.view(), i);
513
514 let lr = F::from(0.01).unwrap();
516 x[i] = x[i] - lr * grad_i;
517 }
518
519 let change = (&x - &x_old).mapv(|xi| xi.abs()).sum();
521 if change < self.tol {
522 let final_obj = obj_fn(&x.view());
523 return Ok(OptimizeResult {
524 x: x.mapv(|v| v.into()),
525 fun: final_obj,
526 iterations: iter,
527 nit: iter,
528 func_evals: iter * n,
529 nfev: iter * n,
530 success: true,
531 message: "Coordinate descent converged successfully.".to_string(),
532 jacobian: Some(Array1::zeros(n)), hessian: None,
534 });
535 }
536 }
537
538 Err(OptimizeError::ConvergenceError(
539 "Maximum iterations reached".to_string(),
540 ))
541 }
542}
543
544pub mod ml_problems {
546 use super::*;
547
548 pub fn lasso_regression<F: Float + ScalarOperand + Into<f64> + Copy>(
550 a: &ArrayView2<F>,
551 b: &ArrayView1<F>,
552 lambda: F,
553 learning_rate: Option<F>,
554 ) -> Result<OptimizeResult<F>, OptimizeError> {
555 let lr = learning_rate.unwrap_or_else(|| F::from(0.01).unwrap());
556 let optimizer = LassoOptimizer::new(lambda, lr);
557
558 let n = a.ncols();
559 let x0 = Array1::zeros(n);
560
561 let grad_fn = |x: &ArrayView1<F>| -> Array1<F> {
562 let residual = a.dot(x) - b;
563 a.t().dot(&residual) * F::from(2).unwrap()
564 };
565
566 optimizer.minimize(grad_fn, &x0)
567 }
568
569 pub fn group_lasso_regression<F: Float + ScalarOperand + Into<f64> + Copy>(
571 a: &ArrayView2<F>,
572 b: &ArrayView1<F>,
573 lambda: F,
574 groups: Vec<usize>,
575 learning_rate: Option<F>,
576 ) -> Result<OptimizeResult<F>, OptimizeError> {
577 let lr = learning_rate.unwrap_or_else(|| F::from(0.01).unwrap());
578 let optimizer = GroupLassoOptimizer::new(lambda, lr, groups);
579
580 let n = a.ncols();
581 let x0 = Array1::zeros(n);
582
583 let grad_fn = |x: &ArrayView1<F>| -> Array1<F> {
584 let residual = a.dot(x) - b;
585 a.t().dot(&residual) * F::from(2).unwrap()
586 };
587
588 optimizer.minimize(grad_fn, &x0)
589 }
590
591 pub fn elastic_net_regression<F: Float + ScalarOperand + Into<f64> + Copy>(
593 a: &ArrayView2<F>,
594 b: &ArrayView1<F>,
595 lambda1: F,
596 lambda2: F,
597 learning_rate: Option<F>,
598 ) -> Result<OptimizeResult<F>, OptimizeError> {
599 let lr = learning_rate.unwrap_or_else(|| F::from(0.01).unwrap());
600 let optimizer = ElasticNetOptimizer::new(lambda1, lambda2, lr);
601
602 let n = a.ncols();
603 let x0 = Array1::zeros(n);
604
605 let grad_fn = |x: &ArrayView1<F>| -> Array1<F> {
606 let residual = a.dot(x) - b;
607 a.t().dot(&residual) * F::from(2).unwrap()
608 };
609
610 optimizer.minimize(grad_fn, &x0)
611 }
612}
613
614#[cfg(test)]
615mod tests {
616 use super::*;
617 use approx::assert_abs_diff_eq;
618 use ndarray::array;
619
620 #[test]
621 fn test_lasso_soft_threshold() {
622 let optimizer = LassoOptimizer::new(1.0, 0.1);
623
624 assert_abs_diff_eq!(optimizer.soft_threshold(2.0, 1.0), 1.0, epsilon = 1e-10);
626 assert_abs_diff_eq!(optimizer.soft_threshold(-2.0, 1.0), -1.0, epsilon = 1e-10);
627 assert_abs_diff_eq!(optimizer.soft_threshold(0.5, 1.0), 0.0, epsilon = 1e-10);
628 }
629
630 #[test]
631 fn test_lasso_prox_operator() {
632 let optimizer = LassoOptimizer::new(1.0, 0.1);
633 let x = array![2.0, -2.0, 0.5, -0.5];
634 let result = optimizer.prox_l1(&x, 1.0);
635
636 assert_abs_diff_eq!(result[0], 1.0, epsilon = 1e-10);
637 assert_abs_diff_eq!(result[1], -1.0, epsilon = 1e-10);
638 assert_abs_diff_eq!(result[2], 0.0, epsilon = 1e-10);
639 assert_abs_diff_eq!(result[3], 0.0, epsilon = 1e-10);
640 }
641
642 #[test]
643 fn test_elastic_net_prox_operator() {
644 let optimizer = ElasticNetOptimizer::new(1.0, 1.0, 0.1);
645 let x = array![3.0, -3.0, 0.5];
646 let result = optimizer.prox_elastic_net(&x, 1.0);
647
648 assert!(result[0] > 0.0 && result[0] < 2.0);
650 assert!(result[1] < 0.0 && result[1] > -2.0);
651 assert_abs_diff_eq!(result[2], 0.0, epsilon = 1e-10);
652 }
653
654 #[test]
655 fn test_group_lasso_simple() {
656 let optimizer = GroupLassoOptimizer::new(1.0, 0.1, vec![0, 0, 1, 1]);
657 let x = array![1.0, 1.0, 0.1, 0.1];
658 let result = optimizer.prox_group_l1(&x, 1.0);
659
660 assert!(result[0] > 0.0 && result[0] < 1.0);
662 assert!(result[1] > 0.0 && result[1] < 1.0);
663
664 assert_abs_diff_eq!(result[2], 0.0, epsilon = 1e-10);
666 assert_abs_diff_eq!(result[3], 0.0, epsilon = 1e-10);
667 }
668
669 #[test]
670 fn test_coordinate_descent_optimizer() {
671 let optimizer = CoordinateDescentOptimizer::new();
672
673 let obj_fn = |x: &ArrayView1<f64>| x.mapv(|xi| xi * xi).sum();
675 let grad_1d_fn = |x: &ArrayView1<f64>, i: usize| 2.0 * x[i];
676
677 let x0 = array![1.0, 1.0];
678 let result = optimizer.minimize(obj_fn, grad_1d_fn, &x0).unwrap();
679
680 assert!(result.x[0].abs() < 0.1);
682 assert!(result.x[1].abs() < 0.1);
683 assert!(result.success);
684 }
685
686 #[test]
687 fn test_admm_optimizer() {
688 let optimizer = ADMMOptimizer::new(1.0);
689
690 let loss_grad = |x: &ArrayView1<f64>, _data: &()| x.mapv(|xi| 2.0 * xi);
692
693 let x0 = array![2.0];
694 let lambda = 1.0;
695 let result = optimizer.solve_lasso(loss_grad, lambda, &x0, &()).unwrap();
696
697 assert!(result.x[0].abs() < 1.0); assert!(result.success);
700 }
701
702 #[test]
703 fn test_ml_lasso_regression() {
704 let a = array![[1.0, 0.0], [1.0, 0.0], [1.0, 0.0]];
706 let b = array![1.0, 1.1, 0.9];
707 let lambda = 0.1;
708
709 let result = ml_problems::lasso_regression(&a.view(), &b.view(), lambda, None).unwrap();
710
711 assert!(result.x[0] > 0.5);
713 assert!(result.x[1].abs() < 0.1);
714 }
715}