scirs2_optimize/
ml_optimizers.rs

1//! Specialized optimizers for machine learning applications
2//!
3//! This module provides optimization algorithms specifically designed for
4//! machine learning tasks, including regularized optimization, feature selection,
5//! and deep learning optimizers with advanced features.
6
7use crate::error::OptimizeError;
8use crate::unconstrained::result::OptimizeResult;
9use scirs2_core::ndarray::{Array1, ArrayView1, ArrayView2, ScalarOperand};
10use scirs2_core::numeric::Float;
11use scirs2_core::random::seq::SliceRandom;
12use std::collections::HashMap;
13
14/// L1 regularization (Lasso) optimizer using proximal gradient descent
15#[derive(Debug, Clone)]
16pub struct LassoOptimizer<F: Float> {
17    /// L1 regularization parameter (lambda)
18    pub lambda: F,
19    /// Learning rate
20    pub learning_rate: F,
21    /// Maximum number of iterations
22    pub max_iter: usize,
23    /// Convergence tolerance
24    pub tol: F,
25    /// Whether to use accelerated proximal gradient (FISTA)
26    pub accelerated: bool,
27}
28
29impl<F: Float + ScalarOperand> LassoOptimizer<F> {
30    /// Create a new Lasso optimizer
31    pub fn new(lambda: F, learning_rate: F) -> Self {
32        Self {
33            lambda,
34            learning_rate,
35            max_iter: 1000,
36            tol: F::from(1e-6).expect("Failed to convert constant to float"),
37            accelerated: false,
38        }
39    }
40
41    /// Create a new accelerated Lasso optimizer (FISTA)
42    pub fn fista(lambda: F, learning_rate: F) -> Self {
43        Self {
44            lambda,
45            learning_rate,
46            max_iter: 1000,
47            tol: F::from(1e-6).expect("Failed to convert constant to float"),
48            accelerated: true,
49        }
50    }
51
52    /// Soft thresholding operator for L1 proximal operator
53    fn soft_threshold(&self, x: F, threshold: F) -> F {
54        if x > threshold {
55            x - threshold
56        } else if x < -threshold {
57            x + threshold
58        } else {
59            F::zero()
60        }
61    }
62
63    /// Proximal operator for L1 regularization
64    fn prox_l1(&self, x: &Array1<F>, step_size: F) -> Array1<F> {
65        let threshold = self.lambda * step_size;
66        x.mapv(|xi| self.soft_threshold(xi, threshold))
67    }
68
69    /// Optimize using proximal gradient descent
70    pub fn minimize<G>(
71        &self,
72        mut grad_fn: G,
73        x0: &Array1<F>,
74    ) -> Result<OptimizeResult<F>, OptimizeError>
75    where
76        G: FnMut(&ArrayView1<F>) -> Array1<F>,
77        F: Into<f64> + Copy,
78    {
79        let mut x = x0.clone();
80        let mut y = x0.clone(); // For FISTA acceleration
81        let mut t = F::one(); // FISTA parameter
82
83        let _prev_loss = F::infinity();
84
85        for iter in 0..self.max_iter {
86            // Compute gradient
87            let grad = if self.accelerated {
88                grad_fn(&y.view())
89            } else {
90                grad_fn(&x.view())
91            };
92
93            // Gradient step
94            let x_new = if self.accelerated {
95                &y - &(&grad * self.learning_rate)
96            } else {
97                &x - &(&grad * self.learning_rate)
98            };
99
100            // Proximal step (L1 regularization)
101            let x_prox = self.prox_l1(&x_new, self.learning_rate);
102
103            // FISTA acceleration
104            if self.accelerated {
105                let t_new = (F::one()
106                    + (F::one()
107                        + F::from(4).expect("Failed to convert constant to float") * t * t)
108                        .sqrt())
109                    / F::from(2).expect("Failed to convert constant to float");
110                let beta = (t - F::one()) / t_new;
111                y = &x_prox + &((&x_prox - &x) * beta);
112                t = t_new;
113            }
114
115            // Check convergence
116            let change = (&x_prox - &x).mapv(|xi| xi.abs()).sum();
117            if change < self.tol {
118                return Ok(OptimizeResult {
119                    x: x_prox.mapv(|v| v.into()),
120                    fun: F::zero(),
121                    nit: iter,
122                    func_evals: iter,
123                    nfev: iter,
124                    success: true,
125                    message: "Optimization terminated successfully.".to_string(),
126                    jacobian: Some(grad.mapv(|v| v.into())),
127                    hessian: None,
128                });
129            }
130
131            x = x_prox;
132        }
133
134        Err(OptimizeError::ConvergenceError(
135            "Maximum iterations reached".to_string(),
136        ))
137    }
138}
139
140/// Group Lasso optimizer for grouped feature selection
141#[derive(Debug, Clone)]
142pub struct GroupLassoOptimizer<F: Float> {
143    /// Group Lasso regularization parameter
144    pub lambda: F,
145    /// Learning rate
146    pub learning_rate: F,
147    /// Maximum number of iterations
148    pub max_iter: usize,
149    /// Convergence tolerance
150    pub tol: F,
151    /// Group structure: maps feature index to group index
152    pub groups: Vec<usize>,
153}
154
155impl<F: Float + ScalarOperand> GroupLassoOptimizer<F> {
156    /// Create a new Group Lasso optimizer
157    pub fn new(lambda: F, learning_rate: F, groups: Vec<usize>) -> Self {
158        Self {
159            lambda,
160            learning_rate,
161            max_iter: 1000,
162            tol: F::from(1e-6).expect("Failed to convert constant to float"),
163            groups,
164        }
165    }
166
167    /// Proximal operator for group L1 regularization
168    fn prox_group_l1(&self, x: &Array1<F>, step_size: F) -> Array1<F> {
169        let mut result = x.clone();
170        let threshold = self.lambda * step_size;
171
172        // Group variables by group index
173        let mut groups_map: HashMap<usize, Vec<usize>> = HashMap::new();
174        for (feature_idx, &group_idx) in self.groups.iter().enumerate() {
175            groups_map.entry(group_idx).or_default().push(feature_idx);
176        }
177
178        // Apply group soft thresholding
179        for (_, feature_indices) in groups_map {
180            // Compute group norm
181            let group_norm = feature_indices
182                .iter()
183                .map(|&idx| x[idx] * x[idx])
184                .fold(F::zero(), |acc, x| acc + x)
185                .sqrt();
186
187            if group_norm > threshold {
188                let scale = (group_norm - threshold) / group_norm;
189                for &idx in &feature_indices {
190                    result[idx] = result[idx] * scale;
191                }
192            } else {
193                // Zero out the entire group
194                for &idx in &feature_indices {
195                    result[idx] = F::zero();
196                }
197            }
198        }
199
200        result
201    }
202
203    /// Optimize using proximal gradient descent
204    pub fn minimize<G>(
205        &self,
206        mut grad_fn: G,
207        x0: &Array1<F>,
208    ) -> Result<OptimizeResult<F>, OptimizeError>
209    where
210        G: FnMut(&ArrayView1<F>) -> Array1<F>,
211        F: Into<f64> + Copy,
212    {
213        let mut x = x0.clone();
214
215        for iter in 0..self.max_iter {
216            // Compute gradient
217            let grad = grad_fn(&x.view());
218
219            // Gradient step
220            let x_new = &x - &(&grad * self.learning_rate);
221
222            // Proximal step (group L1 regularization)
223            let x_prox = self.prox_group_l1(&x_new, self.learning_rate);
224
225            // Check convergence
226            let change = (&x_prox - &x).mapv(|xi| xi.abs()).sum();
227            if change < self.tol {
228                return Ok(OptimizeResult {
229                    x: x_prox.mapv(|v| v.into()),
230                    fun: F::zero(),
231                    nit: iter,
232                    func_evals: iter,
233                    nfev: iter,
234                    success: true,
235                    message: "Optimization terminated successfully.".to_string(),
236                    jacobian: Some(grad.mapv(|v| v.into())),
237                    hessian: None,
238                });
239            }
240
241            x = x_prox;
242        }
243
244        Err(OptimizeError::ConvergenceError(
245            "Maximum iterations reached".to_string(),
246        ))
247    }
248}
249
250/// Elastic Net optimizer (L1 + L2 regularization)
251#[derive(Debug, Clone)]
252pub struct ElasticNetOptimizer<F: Float> {
253    /// L1 regularization parameter
254    pub lambda1: F,
255    /// L2 regularization parameter  
256    pub lambda2: F,
257    /// Learning rate
258    pub learning_rate: F,
259    /// Maximum number of iterations
260    pub max_iter: usize,
261    /// Convergence tolerance
262    pub tol: F,
263}
264
265impl<F: Float + ScalarOperand> ElasticNetOptimizer<F> {
266    /// Create a new Elastic Net optimizer
267    pub fn new(lambda1: F, lambda2: F, learning_rate: F) -> Self {
268        Self {
269            lambda1,
270            lambda2,
271            learning_rate,
272            max_iter: 1000,
273            tol: F::from(1e-6).expect("Failed to convert constant to float"),
274        }
275    }
276
277    /// Proximal operator for elastic net regularization
278    fn prox_elastic_net(&self, x: &Array1<F>, step_size: F) -> Array1<F> {
279        let l1_threshold = self.lambda1 * step_size;
280        let l2_factor = F::one() / (F::one() + self.lambda2 * step_size);
281
282        x.mapv(|xi| {
283            let soft_thresh = if xi > l1_threshold {
284                xi - l1_threshold
285            } else if xi < -l1_threshold {
286                xi + l1_threshold
287            } else {
288                F::zero()
289            };
290            soft_thresh * l2_factor
291        })
292    }
293
294    /// Optimize using proximal gradient descent
295    pub fn minimize<G>(
296        &self,
297        mut grad_fn: G,
298        x0: &Array1<F>,
299    ) -> Result<OptimizeResult<F>, OptimizeError>
300    where
301        G: FnMut(&ArrayView1<F>) -> Array1<F>,
302        F: Into<f64> + Copy,
303    {
304        let mut x = x0.clone();
305
306        for iter in 0..self.max_iter {
307            // Compute gradient
308            let grad = grad_fn(&x.view());
309
310            // Gradient step
311            let x_new = &x - &(&grad * self.learning_rate);
312
313            // Proximal step (elastic net regularization)
314            let x_prox = self.prox_elastic_net(&x_new, self.learning_rate);
315
316            // Check convergence
317            let change = (&x_prox - &x).mapv(|xi| xi.abs()).sum();
318            if change < self.tol {
319                return Ok(OptimizeResult {
320                    x: x_prox.mapv(|v| v.into()),
321                    fun: F::zero(),
322                    nit: iter,
323                    func_evals: iter,
324                    nfev: iter,
325                    success: true,
326                    message: "Optimization terminated successfully.".to_string(),
327                    jacobian: Some(grad.mapv(|v| v.into())),
328                    hessian: None,
329                });
330            }
331
332            x = x_prox;
333        }
334
335        Err(OptimizeError::ConvergenceError(
336            "Maximum iterations reached".to_string(),
337        ))
338    }
339}
340
341/// ADMM (Alternating Direction Method of Multipliers) optimizer for general constrained problems
342#[derive(Debug, Clone)]
343pub struct ADMMOptimizer<F: Float> {
344    /// Penalty parameter
345    pub rho: F,
346    /// Primary tolerance
347    pub eps_pri: F,
348    /// Dual tolerance
349    pub eps_dual: F,
350    /// Maximum number of iterations
351    pub max_iter: usize,
352}
353
354impl<F: Float + ScalarOperand> ADMMOptimizer<F> {
355    /// Create a new ADMM optimizer
356    pub fn new(rho: F) -> Self {
357        Self {
358            rho,
359            eps_pri: F::from(1e-3).expect("Failed to convert constant to float"),
360            eps_dual: F::from(1e-3).expect("Failed to convert constant to float"),
361            max_iter: 1000,
362        }
363    }
364
365    /// Solve Lasso using ADMM
366    pub fn solve_lasso<LossGrad, Data>(
367        &self,
368        loss_grad: LossGrad,
369        lambda: F,
370        x0: &Array1<F>,
371        data: &Data,
372    ) -> Result<OptimizeResult<F>, OptimizeError>
373    where
374        LossGrad: Fn(&ArrayView1<F>, &Data) -> Array1<F>,
375        Data: Clone,
376        F: Into<f64> + Copy,
377    {
378        let n = x0.len();
379        let mut x = x0.clone();
380        let mut z = Array1::zeros(n);
381        let mut u = Array1::zeros(n); // Scaled dual variable
382
383        for iter in 0..self.max_iter {
384            // x-update: minimize loss + (rho/2)||x - z + u||^2
385            let grad_loss = loss_grad(&x.view(), data);
386            let grad_augmented = &grad_loss + &((&x - &z + &u) * self.rho);
387
388            // Adaptive learning rate for better convergence
389            let grad_norm = grad_augmented.mapv(|g| g * g).sum().sqrt();
390            let lr = if grad_norm > F::epsilon() {
391                F::one() / (F::one() + self.rho)
392            } else {
393                F::from(0.1).expect("Failed to convert constant to float")
394            };
395            x = &x - &(&grad_augmented * lr);
396
397            // z-update: soft thresholding
398            let z_old = z.clone();
399            let threshold = lambda / self.rho;
400            z = (&x + &u).mapv(|xi| {
401                if xi > threshold {
402                    xi - threshold
403                } else if xi < -threshold {
404                    xi + threshold
405                } else {
406                    F::zero()
407                }
408            });
409
410            // u-update: dual variable update
411            u = &u + &x - &z;
412
413            // Check convergence
414            let r_norm = (&x - &z).mapv(|xi| xi * xi).sum().sqrt(); // Primal residual
415            let s_norm = ((&z - &z_old) * self.rho).mapv(|xi| xi * xi).sum().sqrt(); // Dual residual
416
417            let x_norm = x.mapv(|xi| xi * xi).sum().sqrt();
418            let z_norm = z.mapv(|xi| xi * xi).sum().sqrt();
419            let u_norm = u.mapv(|xi| xi * xi).sum().sqrt();
420
421            let eps_pri_thresh = self.eps_pri
422                * (F::sqrt(F::from(n).expect("Failed to convert to float"))
423                    + F::max(x_norm, z_norm));
424            let eps_dual_thresh = self.eps_dual
425                * F::sqrt(F::from(n).expect("Failed to convert to float"))
426                * self.rho
427                * u_norm;
428
429            if r_norm < eps_pri_thresh && s_norm < eps_dual_thresh {
430                return Ok(OptimizeResult {
431                    x: x.mapv(|v| v.into()),
432                    fun: F::zero(),
433                    nit: iter,
434                    func_evals: iter,
435                    nfev: iter,
436                    success: true,
437                    message: "ADMM converged successfully.".to_string(),
438                    jacobian: Some(grad_loss.mapv(|v| v.into())),
439                    hessian: None,
440                });
441            }
442        }
443
444        Err(OptimizeError::ConvergenceError(
445            "Maximum iterations reached".to_string(),
446        ))
447    }
448}
449
450/// Coordinate Descent optimizer for separable problems
451#[derive(Debug, Clone)]
452pub struct CoordinateDescentOptimizer<F: Float> {
453    /// Maximum number of iterations
454    pub max_iter: usize,
455    /// Convergence tolerance
456    pub tol: F,
457    /// Whether to use random coordinate selection
458    pub random: bool,
459}
460
461impl<F: Float + ScalarOperand> Default for CoordinateDescentOptimizer<F> {
462    fn default() -> Self {
463        Self {
464            max_iter: 1000,
465            tol: F::from(1e-6).expect("Failed to convert constant to float"),
466            random: false,
467        }
468    }
469}
470
471impl<F: Float + ScalarOperand> CoordinateDescentOptimizer<F> {
472    /// Create a new coordinate descent optimizer
473    pub fn new() -> Self {
474        Self::default()
475    }
476
477    /// Create a new random coordinate descent optimizer
478    pub fn random() -> Self {
479        Self {
480            max_iter: 1000,
481            tol: F::from(1e-6).expect("Failed to convert constant to float"),
482            random: true,
483        }
484    }
485
486    /// Minimize using coordinate descent
487    pub fn minimize<Obj, Grad1D>(
488        &self,
489        obj_fn: Obj,
490        grad_1d_fn: Grad1D,
491        x0: &Array1<F>,
492    ) -> Result<OptimizeResult<F>, OptimizeError>
493    where
494        Obj: Fn(&ArrayView1<F>) -> F,
495        Grad1D: Fn(&ArrayView1<F>, usize) -> F, // Partial derivative w.r.t. coordinate i
496        F: Into<f64> + Copy,
497    {
498        let mut x = x0.clone();
499        let n = x.len();
500
501        for iter in 0..self.max_iter {
502            let x_old = x.clone();
503
504            // Choose coordinate order
505            let coords: Vec<usize> = if self.random {
506                use scirs2_core::random::{rng, seq::SliceRandom};
507                let mut coords: Vec<usize> = (0..n).collect();
508                coords.shuffle(&mut scirs2_core::random::rng());
509                coords
510            } else {
511                (0..n).collect()
512            };
513
514            // Update each coordinate
515            for &i in &coords {
516                let grad_i = grad_1d_fn(&x.view(), i);
517
518                // Simple gradient step for coordinate i
519                let lr = F::from(0.01).expect("Failed to convert constant to float");
520                x[i] = x[i] - lr * grad_i;
521            }
522
523            // Check convergence
524            let change = (&x - &x_old).mapv(|xi| xi.abs()).sum();
525            if change < self.tol {
526                let final_obj = obj_fn(&x.view());
527                return Ok(OptimizeResult {
528                    x: x.mapv(|v| v.into()),
529                    fun: final_obj,
530                    nit: iter,
531                    func_evals: iter * n,
532                    nfev: iter * n,
533                    success: true,
534                    message: "Coordinate descent converged successfully.".to_string(),
535                    jacobian: Some(Array1::zeros(n)), // Could compute full gradient here
536                    hessian: None,
537                });
538            }
539        }
540
541        Err(OptimizeError::ConvergenceError(
542            "Maximum iterations reached".to_string(),
543        ))
544    }
545}
546
547/// Convenience functions for common ML optimization problems
548pub mod ml_problems {
549    use super::*;
550
551    /// Solve Lasso regression: min ||Ax - b||^2 + lambda ||x||_1
552    pub fn lasso_regression<F: Float + ScalarOperand + Into<f64> + Copy>(
553        a: &ArrayView2<F>,
554        b: &ArrayView1<F>,
555        lambda: F,
556        learning_rate: Option<F>,
557    ) -> Result<OptimizeResult<F>, OptimizeError> {
558        let lr = learning_rate
559            .unwrap_or_else(|| F::from(0.01).expect("Failed to convert constant to float"));
560        let optimizer = LassoOptimizer::new(lambda, lr);
561
562        let n = a.ncols();
563        let x0 = Array1::zeros(n);
564
565        let grad_fn = |x: &ArrayView1<F>| -> Array1<F> {
566            let residual = a.dot(x) - b;
567            a.t().dot(&residual) * F::from(2).expect("Failed to convert constant to float")
568        };
569
570        optimizer.minimize(grad_fn, &x0)
571    }
572
573    /// Solve Group Lasso regression
574    pub fn group_lasso_regression<F: Float + ScalarOperand + Into<f64> + Copy>(
575        a: &ArrayView2<F>,
576        b: &ArrayView1<F>,
577        lambda: F,
578        groups: Vec<usize>,
579        learning_rate: Option<F>,
580    ) -> Result<OptimizeResult<F>, OptimizeError> {
581        let lr = learning_rate
582            .unwrap_or_else(|| F::from(0.01).expect("Failed to convert constant to float"));
583        let optimizer = GroupLassoOptimizer::new(lambda, lr, groups);
584
585        let n = a.ncols();
586        let x0 = Array1::zeros(n);
587
588        let grad_fn = |x: &ArrayView1<F>| -> Array1<F> {
589            let residual = a.dot(x) - b;
590            a.t().dot(&residual) * F::from(2).expect("Failed to convert constant to float")
591        };
592
593        optimizer.minimize(grad_fn, &x0)
594    }
595
596    /// Solve Elastic Net regression: min ||Ax - b||^2 + lambda1 ||x||_1 + lambda2 ||x||^2
597    pub fn elastic_net_regression<F: Float + ScalarOperand + Into<f64> + Copy>(
598        a: &ArrayView2<F>,
599        b: &ArrayView1<F>,
600        lambda1: F,
601        lambda2: F,
602        learning_rate: Option<F>,
603    ) -> Result<OptimizeResult<F>, OptimizeError> {
604        let lr = learning_rate
605            .unwrap_or_else(|| F::from(0.01).expect("Failed to convert constant to float"));
606        let optimizer = ElasticNetOptimizer::new(lambda1, lambda2, lr);
607
608        let n = a.ncols();
609        let x0 = Array1::zeros(n);
610
611        let grad_fn = |x: &ArrayView1<F>| -> Array1<F> {
612            let residual = a.dot(x) - b;
613            a.t().dot(&residual) * F::from(2).expect("Failed to convert constant to float")
614        };
615
616        optimizer.minimize(grad_fn, &x0)
617    }
618}
619
620#[cfg(test)]
621mod tests {
622    use super::*;
623    use approx::assert_abs_diff_eq;
624    use scirs2_core::ndarray::array;
625
626    #[test]
627    fn test_lasso_soft_threshold() {
628        let optimizer = LassoOptimizer::new(1.0, 0.1);
629
630        // Test soft thresholding
631        assert_abs_diff_eq!(optimizer.soft_threshold(2.0, 1.0), 1.0, epsilon = 1e-10);
632        assert_abs_diff_eq!(optimizer.soft_threshold(-2.0, 1.0), -1.0, epsilon = 1e-10);
633        assert_abs_diff_eq!(optimizer.soft_threshold(0.5, 1.0), 0.0, epsilon = 1e-10);
634    }
635
636    #[test]
637    fn test_lasso_prox_operator() {
638        let optimizer = LassoOptimizer::new(1.0, 0.1);
639        let x = array![2.0, -2.0, 0.5, -0.5];
640        let result = optimizer.prox_l1(&x, 1.0);
641
642        assert_abs_diff_eq!(result[0], 1.0, epsilon = 1e-10);
643        assert_abs_diff_eq!(result[1], -1.0, epsilon = 1e-10);
644        assert_abs_diff_eq!(result[2], 0.0, epsilon = 1e-10);
645        assert_abs_diff_eq!(result[3], 0.0, epsilon = 1e-10);
646    }
647
648    #[test]
649    fn test_elastic_net_prox_operator() {
650        let optimizer = ElasticNetOptimizer::new(1.0, 1.0, 0.1);
651        let x = array![3.0, -3.0, 0.5];
652        let result = optimizer.prox_elastic_net(&x, 1.0);
653
654        // With L2 regularization, should shrink more than pure L1
655        assert!(result[0] > 0.0 && result[0] < 2.0);
656        assert!(result[1] < 0.0 && result[1] > -2.0);
657        assert_abs_diff_eq!(result[2], 0.0, epsilon = 1e-10);
658    }
659
660    #[test]
661    fn test_group_lasso_simple() {
662        let optimizer = GroupLassoOptimizer::new(1.0, 0.1, vec![0, 0, 1, 1]);
663        let x = array![1.0, 1.0, 0.1, 0.1];
664        let result = optimizer.prox_group_l1(&x, 1.0);
665
666        // First group (0,1) should be shrunk but not zeroed
667        assert!(result[0] > 0.0 && result[0] < 1.0);
668        assert!(result[1] > 0.0 && result[1] < 1.0);
669
670        // Second group (2,3) should be zeroed (small norm)
671        assert_abs_diff_eq!(result[2], 0.0, epsilon = 1e-10);
672        assert_abs_diff_eq!(result[3], 0.0, epsilon = 1e-10);
673    }
674
675    #[test]
676    fn test_coordinate_descent_optimizer() {
677        let optimizer = CoordinateDescentOptimizer::new();
678
679        // Simple quadratic: f(x) = x1^2 + x2^2
680        let obj_fn = |x: &ArrayView1<f64>| x.mapv(|xi| xi * xi).sum();
681        let grad_1d_fn = |x: &ArrayView1<f64>, i: usize| 2.0 * x[i];
682
683        let x0 = array![1.0, 1.0];
684        let result = optimizer
685            .minimize(obj_fn, grad_1d_fn, &x0)
686            .expect("Operation failed");
687
688        // Should converge to origin
689        assert!(result.x[0].abs() < 0.1);
690        assert!(result.x[1].abs() < 0.1);
691        assert!(result.success);
692    }
693
694    #[test]
695    fn test_admm_optimizer() {
696        let optimizer = ADMMOptimizer::new(1.0);
697
698        // Simple test: minimize x^2 + lambda|x|
699        let loss_grad = |x: &ArrayView1<f64>, _data: &()| x.mapv(|xi| 2.0 * xi);
700
701        let x0 = array![2.0];
702        let lambda = 1.0;
703        let result = optimizer
704            .solve_lasso(loss_grad, lambda, &x0, &())
705            .expect("Operation failed");
706
707        // Should converge to a sparse solution
708        assert!(result.x[0].abs() < 1.0); // Should be shrunk
709        assert!(result.success);
710    }
711
712    #[test]
713    fn test_ml_lasso_regression() {
714        // Simple 2D problem: y = x1 + noise, x2 should be zero
715        let a = array![[1.0, 0.0], [1.0, 0.0], [1.0, 0.0]];
716        let b = array![1.0, 1.1, 0.9];
717        let lambda = 0.1;
718
719        let result = ml_problems::lasso_regression(&a.view(), &b.view(), lambda, None)
720            .expect("Operation failed");
721
722        // x1 should be close to 1, x2 should be close to 0
723        assert!(result.x[0] > 0.5);
724        assert!(result.x[1].abs() < 0.1);
725    }
726}