sklears_ensemble/
regularized.rs

1//! Regularized Ensemble Methods
2//!
3//! This module provides ensemble methods with L1/L2 regularization for ensemble weights,
4//! dropout techniques for robustness, and advanced weight optimization strategies.
5
6use crate::gradient_boosting::{GradientBoostingRegressor, TrainedGradientBoostingRegressor};
7// ❌ REMOVED: rand_chacha dependencies (SciRS2 Policy violations)
8// ❌ rand_chacha::rand_core - use scirs2_core::random instead
9// ❌ rand_chacha::scirs2_core::random::rngs::StdRng - use scirs2_core::random instead
10use scirs2_core::ndarray::{Array1, Array2};
11use sklears_core::{
12    error::Result as SklResult,
13    prelude::{Predict, SklearsError},
14    traits::{Estimator, Fit},
15};
16
17/// Helper function to generate random f64 from scirs2_core::random::RngCore
18fn gen_f64(rng: &mut impl scirs2_core::random::RngCore) -> f64 {
19    let mut bytes = [0u8; 8];
20    rng.fill_bytes(&mut bytes);
21    f64::from_le_bytes(bytes) / f64::from_le_bytes([255u8; 8])
22}
23
24/// Helper function to generate random value in range from scirs2_core::random::RngCore
25fn gen_range_usize(
26    rng: &mut impl scirs2_core::random::RngCore,
27    range: std::ops::Range<usize>,
28) -> usize {
29    let mut bytes = [0u8; 8];
30    rng.fill_bytes(&mut bytes);
31    let val = u64::from_le_bytes(bytes);
32    range.start + (val as usize % (range.end - range.start))
33}
34
35// Use concrete type instead of trait object for simplicity
36type BoxedEstimator = Box<TrainedGradientBoostingRegressor>;
37
38/// Configuration for regularized ensemble methods
39#[derive(Debug, Clone)]
40pub struct RegularizedEnsembleConfig {
41    /// Number of base estimators
42    pub n_estimators: usize,
43    /// L1 regularization strength (Lasso)
44    pub alpha_l1: f64,
45    /// L2 regularization strength (Ridge)
46    pub alpha_l2: f64,
47    /// Elastic net mixing parameter (0 = Ridge, 1 = Lasso)
48    pub l1_ratio: f64,
49    /// Weight optimization algorithm
50    pub weight_optimizer: WeightOptimizer,
51    /// Dropout probability for ensemble training
52    pub dropout_probability: f64,
53    /// Whether to use noise injection for robustness
54    pub noise_injection: bool,
55    /// Noise variance for injection
56    pub noise_variance: f64,
57    /// Maximum number of optimization iterations
58    pub max_iterations: usize,
59    /// Convergence tolerance for weight optimization
60    pub tolerance: f64,
61    /// Learning rate for gradient-based optimization
62    pub learning_rate: f64,
63    /// Weight decay factor
64    pub weight_decay: f64,
65    /// Random seed for reproducibility
66    pub random_state: Option<u64>,
67}
68
69impl Default for RegularizedEnsembleConfig {
70    fn default() -> Self {
71        Self {
72            n_estimators: 10,
73            alpha_l1: 0.1,
74            alpha_l2: 0.1,
75            l1_ratio: 0.5,
76            weight_optimizer: WeightOptimizer::CoordinateDescent,
77            dropout_probability: 0.1,
78            noise_injection: false,
79            noise_variance: 0.01,
80            max_iterations: 1000,
81            tolerance: 1e-6,
82            learning_rate: 0.01,
83            weight_decay: 0.0001,
84            random_state: None,
85        }
86    }
87}
88
89/// Weight optimization algorithms for ensemble weights
90#[derive(Debug, Clone, PartialEq)]
91pub enum WeightOptimizer {
92    /// Coordinate descent for elastic net
93    CoordinateDescent,
94    /// Stochastic gradient descent
95    SGD,
96    /// Adam optimizer
97    Adam,
98    /// RMSprop optimizer
99    RMSprop,
100    /// Limited-memory BFGS
101    LBFGS,
102    /// Proximal gradient method
103    ProximalGradient,
104    /// Alternating direction method of multipliers
105    ADMM,
106}
107
108/// Regularized ensemble classifier with L1/L2 regularization
109pub struct RegularizedEnsembleClassifier {
110    config: RegularizedEnsembleConfig,
111    base_estimators: Vec<BoxedEstimator>,
112    ensemble_weights: Vec<f64>,
113    feature_weights: Option<Vec<f64>>,
114    optimizer_state: OptimizerState,
115    regularization_path: Vec<RegularizationStep>,
116    is_fitted: bool,
117}
118
119/// Regularized ensemble regressor with L1/L2 regularization
120pub struct RegularizedEnsembleRegressor {
121    config: RegularizedEnsembleConfig,
122    base_estimators: Vec<BoxedEstimator>,
123    ensemble_weights: Vec<f64>,
124    feature_weights: Option<Vec<f64>>,
125    optimizer_state: OptimizerState,
126    regularization_path: Vec<RegularizationStep>,
127    is_fitted: bool,
128}
129
130/// State for optimization algorithms
131#[derive(Debug, Clone)]
132pub struct OptimizerState {
133    /// Momentum terms for optimizers that use them
134    pub momentum: Vec<f64>,
135    /// Second moment estimates (for Adam)
136    pub variance: Vec<f64>,
137    /// Iteration counter
138    pub iteration: usize,
139    /// Current loss value
140    pub current_loss: f64,
141    /// Previous loss value for convergence checking
142    pub previous_loss: f64,
143    /// Gradient history for L-BFGS
144    pub gradient_history: Vec<Vec<f64>>,
145    /// Weight history for L-BFGS
146    pub weight_history: Vec<Vec<f64>>,
147}
148
149impl Default for OptimizerState {
150    fn default() -> Self {
151        Self {
152            momentum: Vec::new(),
153            variance: Vec::new(),
154            iteration: 0,
155            current_loss: f64::INFINITY,
156            previous_loss: f64::INFINITY,
157            gradient_history: Vec::new(),
158            weight_history: Vec::new(),
159        }
160    }
161}
162
163/// Information about each regularization step
164#[derive(Debug, Clone)]
165pub struct RegularizationStep {
166    /// Alpha value used in this step
167    pub alpha: f64,
168    /// L1 ratio used in this step
169    pub l1_ratio: f64,
170    /// Weights at this step
171    pub weights: Vec<f64>,
172    /// Loss value at this step
173    pub loss: f64,
174    /// Number of non-zero weights (sparsity)
175    pub n_nonzero: usize,
176}
177
178/// Dropout ensemble for robustness training
179pub struct DropoutEnsemble {
180    config: RegularizedEnsembleConfig,
181    estimators: Vec<BoxedEstimator>,
182    dropout_masks: Vec<Vec<bool>>,
183    rng: scirs2_core::random::CoreRandom<scirs2_core::random::rngs::StdRng>,
184}
185
186impl RegularizedEnsembleConfig {
187    pub fn builder() -> RegularizedEnsembleConfigBuilder {
188        RegularizedEnsembleConfigBuilder::default()
189    }
190}
191
192#[derive(Default)]
193pub struct RegularizedEnsembleConfigBuilder {
194    config: RegularizedEnsembleConfig,
195}
196
197impl RegularizedEnsembleConfigBuilder {
198    pub fn n_estimators(mut self, n_estimators: usize) -> Self {
199        self.config.n_estimators = n_estimators;
200        self
201    }
202
203    pub fn alpha_l1(mut self, alpha: f64) -> Self {
204        self.config.alpha_l1 = alpha;
205        self
206    }
207
208    pub fn alpha_l2(mut self, alpha: f64) -> Self {
209        self.config.alpha_l2 = alpha;
210        self
211    }
212
213    pub fn l1_ratio(mut self, ratio: f64) -> Self {
214        self.config.l1_ratio = ratio.clamp(0.0, 1.0);
215        self
216    }
217
218    pub fn weight_optimizer(mut self, optimizer: WeightOptimizer) -> Self {
219        self.config.weight_optimizer = optimizer;
220        self
221    }
222
223    pub fn dropout_probability(mut self, prob: f64) -> Self {
224        self.config.dropout_probability = prob.clamp(0.0, 1.0);
225        self
226    }
227
228    pub fn noise_injection(mut self, inject: bool) -> Self {
229        self.config.noise_injection = inject;
230        self
231    }
232
233    pub fn max_iterations(mut self, max_iter: usize) -> Self {
234        self.config.max_iterations = max_iter;
235        self
236    }
237
238    pub fn tolerance(mut self, tol: f64) -> Self {
239        self.config.tolerance = tol;
240        self
241    }
242
243    pub fn learning_rate(mut self, lr: f64) -> Self {
244        self.config.learning_rate = lr;
245        self
246    }
247
248    pub fn random_state(mut self, seed: u64) -> Self {
249        self.config.random_state = Some(seed);
250        self
251    }
252
253    pub fn build(self) -> RegularizedEnsembleConfig {
254        self.config
255    }
256}
257
258impl RegularizedEnsembleRegressor {
259    pub fn new(config: RegularizedEnsembleConfig) -> Self {
260        Self {
261            config,
262            base_estimators: Vec::new(),
263            ensemble_weights: Vec::new(),
264            feature_weights: None,
265            optimizer_state: OptimizerState::default(),
266            regularization_path: Vec::new(),
267            is_fitted: false,
268        }
269    }
270
271    pub fn builder() -> RegularizedEnsembleRegressorBuilder {
272        RegularizedEnsembleRegressorBuilder::new()
273    }
274
275    /// Train base estimators
276    fn train_base_estimators(&mut self, X: &Array2<f64>, y: &Vec<f64>) -> SklResult<()> {
277        self.base_estimators.clear();
278
279        for i in 0..self.config.n_estimators {
280            // Add noise injection if enabled
281            let (X_train, y_train) = if self.config.noise_injection {
282                self.inject_noise(X, y, i)?
283            } else {
284                (X.clone(), y.clone())
285            };
286
287            // Create and train base estimator
288            let y_train_array = Array1::from_vec(y_train.clone());
289            let estimator = GradientBoostingRegressor::builder()
290                .n_estimators(50)
291                .learning_rate(0.1)
292                .max_depth(4)
293                .build()
294                .fit(&X_train, &y_train_array)?;
295
296            self.base_estimators.push(Box::new(estimator));
297        }
298
299        // Initialize weights uniformly
300        self.ensemble_weights =
301            vec![1.0 / self.config.n_estimators as f64; self.config.n_estimators];
302
303        Ok(())
304    }
305
306    /// Inject noise into training data for robustness
307    fn inject_noise(
308        &self,
309        X: &Array2<f64>,
310        y: &Vec<f64>,
311        seed_offset: usize,
312    ) -> SklResult<(Array2<f64>, Vec<f64>)> {
313        let seed = self.config.random_state.unwrap_or(42) + seed_offset as u64;
314        let mut rng = scirs2_core::random::seeded_rng(seed);
315
316        let shape = X.shape();
317        let (n_samples, n_features) = (shape[0], shape[1]);
318        let noise_std = self.config.noise_variance.sqrt();
319
320        let mut X_noisy = X.clone();
321        let mut y_noisy = y.clone();
322
323        // Add Gaussian noise to features
324        for i in 0..n_samples {
325            for j in 0..n_features {
326                let noise = gen_f64(&mut rng) * noise_std;
327                X_noisy[[i, j]] += noise;
328            }
329
330            // Add noise to targets
331            let target_noise = gen_f64(&mut rng) * noise_std * 0.1; // Smaller noise for targets
332            y_noisy[i] += target_noise;
333        }
334
335        Ok((X_noisy, y_noisy))
336    }
337
338    /// Optimize ensemble weights with regularization
339    fn optimize_ensemble_weights(&mut self, X: &Array2<f64>, y: &Vec<f64>) -> SklResult<()> {
340        // Get predictions from all base estimators
341        let base_predictions = self.get_base_predictions(X)?;
342
343        // Initialize optimizer state
344        self.optimizer_state = OptimizerState::default();
345        self.optimizer_state.momentum = vec![0.0; self.config.n_estimators];
346        self.optimizer_state.variance = vec![0.0; self.config.n_estimators];
347
348        // Optimize weights based on selected algorithm
349        match self.config.weight_optimizer {
350            WeightOptimizer::CoordinateDescent => {
351                self.coordinate_descent_optimization(&base_predictions, y)?;
352            }
353            WeightOptimizer::SGD => {
354                self.sgd_optimization(&base_predictions, y)?;
355            }
356            WeightOptimizer::Adam => {
357                self.adam_optimization(&base_predictions, y)?;
358            }
359            WeightOptimizer::ProximalGradient => {
360                self.proximal_gradient_optimization(&base_predictions, y)?;
361            }
362            _ => {
363                // Default to coordinate descent
364                self.coordinate_descent_optimization(&base_predictions, y)?;
365            }
366        }
367
368        Ok(())
369    }
370
371    /// Get predictions from all base estimators
372    fn get_base_predictions(&self, X: &Array2<f64>) -> SklResult<Vec<Vec<f64>>> {
373        let mut predictions = Vec::new();
374
375        for estimator in &self.base_estimators {
376            let pred = estimator.predict(X)?;
377            predictions.push(pred.to_vec());
378        }
379
380        Ok(predictions)
381    }
382
383    /// Coordinate descent optimization for elastic net
384    fn coordinate_descent_optimization(
385        &mut self,
386        predictions: &[Vec<f64>],
387        y: &[f64],
388    ) -> SklResult<()> {
389        let n_samples = y.len();
390        let n_estimators = self.config.n_estimators;
391
392        for iteration in 0..self.config.max_iterations {
393            let mut max_weight_change: f64 = 0.0;
394
395            for j in 0..n_estimators {
396                let old_weight = self.ensemble_weights[j];
397
398                // Compute partial residual
399                let mut partial_residual = 0.0;
400                for i in 0..n_samples {
401                    let mut ensemble_pred = 0.0;
402                    for k in 0..n_estimators {
403                        if k != j {
404                            ensemble_pred += self.ensemble_weights[k] * predictions[k][i];
405                        }
406                    }
407                    partial_residual += predictions[j][i] * (y[i] - ensemble_pred);
408                }
409
410                // Compute L2 norm squared for this estimator
411                let mut l2_norm_sq = 0.0;
412                for i in 0..n_samples {
413                    l2_norm_sq += predictions[j][i] * predictions[j][i];
414                }
415
416                // Soft thresholding for L1 regularization
417                let l1_penalty = self.config.alpha_l1 * self.config.l1_ratio;
418                let l2_penalty = self.config.alpha_l2 * (1.0 - self.config.l1_ratio);
419
420                let numerator =
421                    self.soft_threshold(partial_residual, l1_penalty * n_samples as f64);
422                let denominator = l2_norm_sq + l2_penalty * n_samples as f64;
423
424                self.ensemble_weights[j] = if denominator > 0.0 {
425                    numerator / denominator
426                } else {
427                    0.0
428                };
429
430                let weight_change = (self.ensemble_weights[j] - old_weight).abs();
431                max_weight_change = max_weight_change.max(weight_change);
432            }
433
434            // Check convergence
435            if max_weight_change < self.config.tolerance {
436                break;
437            }
438
439            // Record regularization step
440            let loss = self.compute_loss(predictions, y);
441            let n_nonzero = self
442                .ensemble_weights
443                .iter()
444                .filter(|&&w| w.abs() > 1e-10)
445                .count();
446
447            self.regularization_path.push(RegularizationStep {
448                alpha: self.config.alpha_l1 + self.config.alpha_l2,
449                l1_ratio: self.config.l1_ratio,
450                weights: self.ensemble_weights.clone(),
451                loss,
452                n_nonzero,
453            });
454        }
455
456        Ok(())
457    }
458
459    /// Stochastic gradient descent optimization
460    fn sgd_optimization(&mut self, predictions: &[Vec<f64>], y: &[f64]) -> SklResult<()> {
461        let mut rng = if let Some(seed) = self.config.random_state {
462            scirs2_core::random::seeded_rng(seed)
463        } else {
464            scirs2_core::random::seeded_rng(42)
465        };
466
467        let n_samples = y.len();
468        let n_estimators = self.config.n_estimators;
469
470        for iteration in 0..self.config.max_iterations {
471            // Randomly sample a batch
472            let sample_idx = gen_range_usize(&mut rng, 0..n_samples);
473
474            // Compute prediction for this sample
475            let mut pred = 0.0;
476            for j in 0..n_estimators {
477                pred += self.ensemble_weights[j] * predictions[j][sample_idx];
478            }
479
480            let error = pred - y[sample_idx];
481
482            // Compute gradients and update weights
483            for j in 0..n_estimators {
484                let gradient = error * predictions[j][sample_idx];
485
486                // Add L1 and L2 regularization terms
487                let l1_grad =
488                    self.config.alpha_l1 * self.config.l1_ratio * self.ensemble_weights[j].signum();
489                let l2_grad =
490                    self.config.alpha_l2 * (1.0 - self.config.l1_ratio) * self.ensemble_weights[j];
491
492                let total_gradient = gradient + l1_grad + l2_grad;
493
494                // Update with momentum
495                self.optimizer_state.momentum[j] = 0.9 * self.optimizer_state.momentum[j]
496                    + self.config.learning_rate * total_gradient;
497                self.ensemble_weights[j] -= self.optimizer_state.momentum[j];
498            }
499
500            // Apply weight decay
501            for weight in &mut self.ensemble_weights {
502                *weight *= 1.0 - self.config.weight_decay;
503            }
504
505            // Check convergence periodically
506            if iteration % 100 == 0 {
507                let loss = self.compute_loss(predictions, y);
508                if (self.optimizer_state.previous_loss - loss).abs() < self.config.tolerance {
509                    break;
510                }
511                self.optimizer_state.previous_loss = loss;
512            }
513        }
514
515        Ok(())
516    }
517
518    /// Adam optimization algorithm
519    fn adam_optimization(&mut self, predictions: &[Vec<f64>], y: &[f64]) -> SklResult<()> {
520        let mut rng = if let Some(seed) = self.config.random_state {
521            scirs2_core::random::seeded_rng(seed)
522        } else {
523            scirs2_core::random::seeded_rng(42)
524        };
525
526        let n_samples = y.len();
527        let n_estimators = self.config.n_estimators;
528        let beta1 = 0.9;
529        let beta2 = 0.999;
530        let epsilon = 1e-8;
531
532        for iteration in 0..self.config.max_iterations {
533            // Randomly sample a batch
534            let sample_idx = gen_range_usize(&mut rng, 0..n_samples);
535
536            // Compute prediction for this sample
537            let mut pred = 0.0;
538            for j in 0..n_estimators {
539                pred += self.ensemble_weights[j] * predictions[j][sample_idx];
540            }
541
542            let error = pred - y[sample_idx];
543
544            // Compute gradients and update weights using Adam
545            for j in 0..n_estimators {
546                let gradient = error * predictions[j][sample_idx];
547
548                // Add regularization terms
549                let l1_grad =
550                    self.config.alpha_l1 * self.config.l1_ratio * self.ensemble_weights[j].signum();
551                let l2_grad =
552                    self.config.alpha_l2 * (1.0 - self.config.l1_ratio) * self.ensemble_weights[j];
553
554                let total_gradient = gradient + l1_grad + l2_grad;
555
556                // Update biased first moment estimate
557                self.optimizer_state.momentum[j] =
558                    beta1 * self.optimizer_state.momentum[j] + (1.0 - beta1) * total_gradient;
559
560                // Update biased second raw moment estimate
561                self.optimizer_state.variance[j] = beta2 * self.optimizer_state.variance[j]
562                    + (1.0 - beta2) * total_gradient * total_gradient;
563
564                // Compute bias-corrected first and second moment estimates
565                let m_hat =
566                    self.optimizer_state.momentum[j] / (1.0 - beta1.powi((iteration + 1) as i32));
567                let v_hat =
568                    self.optimizer_state.variance[j] / (1.0 - beta2.powi((iteration + 1) as i32));
569
570                // Update weights
571                self.ensemble_weights[j] -=
572                    self.config.learning_rate * m_hat / (v_hat.sqrt() + epsilon);
573            }
574
575            // Check convergence periodically
576            if iteration % 100 == 0 {
577                let loss = self.compute_loss(predictions, y);
578                if (self.optimizer_state.previous_loss - loss).abs() < self.config.tolerance {
579                    break;
580                }
581                self.optimizer_state.previous_loss = loss;
582            }
583        }
584
585        Ok(())
586    }
587
588    /// Proximal gradient optimization
589    fn proximal_gradient_optimization(
590        &mut self,
591        predictions: &[Vec<f64>],
592        y: &[f64],
593    ) -> SklResult<()> {
594        let n_samples = y.len();
595        let n_estimators = self.config.n_estimators;
596
597        for _iteration in 0..self.config.max_iterations {
598            // Compute full gradient
599            let mut gradients = vec![0.0; n_estimators];
600
601            for i in 0..n_samples {
602                let mut pred = 0.0;
603                for j in 0..n_estimators {
604                    pred += self.ensemble_weights[j] * predictions[j][i];
605                }
606
607                let error = pred - y[i];
608
609                for j in 0..n_estimators {
610                    gradients[j] += error * predictions[j][i] / n_samples as f64;
611                }
612            }
613
614            // Gradient step
615            for j in 0..n_estimators {
616                self.ensemble_weights[j] -= self.config.learning_rate * gradients[j];
617            }
618
619            // Proximal operator for L1 regularization
620            let threshold = self.config.learning_rate * self.config.alpha_l1 * self.config.l1_ratio;
621            for weight in &mut self.ensemble_weights {
622                let original_weight = *weight;
623                *weight = Self::soft_threshold_static(original_weight, threshold);
624            }
625
626            // L2 regularization (closed form)
627            let l2_shrinkage = 1.0
628                / (1.0
629                    + self.config.learning_rate
630                        * self.config.alpha_l2
631                        * (1.0 - self.config.l1_ratio));
632            for weight in &mut self.ensemble_weights {
633                *weight *= l2_shrinkage;
634            }
635        }
636
637        Ok(())
638    }
639
640    /// Soft thresholding operator for L1 regularization
641    fn soft_threshold(&self, x: f64, threshold: f64) -> f64 {
642        Self::soft_threshold_static(x, threshold)
643    }
644
645    /// Static version of soft thresholding operator for L1 regularization
646    fn soft_threshold_static(x: f64, threshold: f64) -> f64 {
647        if x > threshold {
648            x - threshold
649        } else if x < -threshold {
650            x + threshold
651        } else {
652            0.0
653        }
654    }
655
656    /// Compute loss function (MSE + regularization)
657    fn compute_loss(&self, predictions: &[Vec<f64>], y: &[f64]) -> f64 {
658        let n_samples = y.len();
659        let mut mse = 0.0;
660
661        for i in 0..n_samples {
662            let mut pred = 0.0;
663            for j in 0..self.config.n_estimators {
664                pred += self.ensemble_weights[j] * predictions[j][i];
665            }
666            let error = pred - y[i];
667            mse += error * error;
668        }
669        mse /= n_samples as f64;
670
671        // Add regularization terms
672        let l1_penalty = self.config.alpha_l1
673            * self.config.l1_ratio
674            * self.ensemble_weights.iter().map(|&w| w.abs()).sum::<f64>();
675        let l2_penalty = self.config.alpha_l2
676            * (1.0 - self.config.l1_ratio)
677            * self.ensemble_weights.iter().map(|&w| w * w).sum::<f64>()
678            / 2.0;
679
680        mse + l1_penalty + l2_penalty
681    }
682
683    /// Get ensemble weights
684    pub fn get_ensemble_weights(&self) -> &[f64] {
685        &self.ensemble_weights
686    }
687
688    /// Get regularization path
689    pub fn get_regularization_path(&self) -> &[RegularizationStep] {
690        &self.regularization_path
691    }
692
693    /// Get sparsity level (fraction of zero weights)
694    pub fn get_sparsity(&self) -> f64 {
695        let n_nonzero = self
696            .ensemble_weights
697            .iter()
698            .filter(|&&w| w.abs() > 1e-10)
699            .count();
700        1.0 - (n_nonzero as f64 / self.ensemble_weights.len() as f64)
701    }
702}
703
704impl DropoutEnsemble {
705    pub fn new(config: RegularizedEnsembleConfig) -> Self {
706        let rng = if let Some(seed) = config.random_state {
707            scirs2_core::random::seeded_rng(seed)
708        } else {
709            scirs2_core::random::seeded_rng(42)
710        };
711
712        Self {
713            config,
714            estimators: Vec::new(),
715            dropout_masks: Vec::new(),
716            rng,
717        }
718    }
719
720    /// Train ensemble with dropout
721    #[allow(non_snake_case)]
722    pub fn fit(&mut self, X: &Array2<f64>, y: &Vec<f64>) -> SklResult<()> {
723        self.estimators.clear();
724        self.dropout_masks.clear();
725
726        for _ in 0..self.config.n_estimators {
727            // Generate dropout mask
728            let mut mask = Vec::new();
729            for _ in 0..X.shape()[1] {
730                mask.push(gen_f64(&mut self.rng) > self.config.dropout_probability);
731            }
732
733            // Apply dropout to features
734            let X_dropout = self.apply_dropout_mask(X, &mask)?;
735
736            // Train estimator on dropout data
737            let y_array = Array1::from_vec(y.clone());
738            let estimator = GradientBoostingRegressor::builder()
739                .n_estimators(50)
740                .learning_rate(0.1)
741                .max_depth(4)
742                .build()
743                .fit(&X_dropout, &y_array)?;
744
745            self.estimators.push(Box::new(estimator));
746            self.dropout_masks.push(mask);
747        }
748
749        Ok(())
750    }
751
752    /// Apply dropout mask to features
753    fn apply_dropout_mask(&self, X: &Array2<f64>, mask: &[bool]) -> SklResult<Array2<f64>> {
754        let shape = X.shape();
755        let (n_samples, n_features) = (shape[0], shape[1]);
756        let mut X_dropout = X.clone();
757
758        for i in 0..n_samples {
759            for j in 0..n_features {
760                if !mask[j] {
761                    X_dropout[[i, j]] = 0.0;
762                } else {
763                    // Scale by dropout probability to maintain expected values
764                    X_dropout[[i, j]] /= 1.0 - self.config.dropout_probability;
765                }
766            }
767        }
768
769        Ok(X_dropout)
770    }
771
772    /// Predict with dropout ensemble
773    #[allow(non_snake_case)]
774    pub fn predict(&self, X: &Array2<f64>) -> SklResult<Vec<f64>> {
775        if self.estimators.is_empty() {
776            return Err(SklearsError::NotFitted {
777                operation: "prediction".to_string(),
778            });
779        }
780
781        let n_samples = X.shape()[0];
782        let mut predictions = vec![0.0; n_samples];
783
784        for (estimator, mask) in self.estimators.iter().zip(self.dropout_masks.iter()) {
785            let X_masked = self.apply_dropout_mask(X, mask)?;
786            let pred = estimator.predict(&X_masked)?;
787
788            for (i, &p) in pred.iter().enumerate() {
789                predictions[i] += p;
790            }
791        }
792
793        // Average predictions
794        for p in &mut predictions {
795            *p /= self.estimators.len() as f64;
796        }
797
798        Ok(predictions)
799    }
800}
801
802pub struct RegularizedEnsembleRegressorBuilder {
803    config: RegularizedEnsembleConfig,
804}
805
806impl Default for RegularizedEnsembleRegressorBuilder {
807    fn default() -> Self {
808        Self::new()
809    }
810}
811
812impl RegularizedEnsembleRegressorBuilder {
813    pub fn new() -> Self {
814        Self {
815            config: RegularizedEnsembleConfig::default(),
816        }
817    }
818
819    pub fn config(mut self, config: RegularizedEnsembleConfig) -> Self {
820        self.config = config;
821        self
822    }
823
824    pub fn n_estimators(mut self, n_estimators: usize) -> Self {
825        self.config.n_estimators = n_estimators;
826        self
827    }
828
829    pub fn alpha_l1(mut self, alpha: f64) -> Self {
830        self.config.alpha_l1 = alpha;
831        self
832    }
833
834    pub fn alpha_l2(mut self, alpha: f64) -> Self {
835        self.config.alpha_l2 = alpha;
836        self
837    }
838
839    pub fn weight_optimizer(mut self, optimizer: WeightOptimizer) -> Self {
840        self.config.weight_optimizer = optimizer;
841        self
842    }
843
844    pub fn build(self) -> RegularizedEnsembleRegressor {
845        RegularizedEnsembleRegressor::new(self.config)
846    }
847}
848
849impl Estimator for RegularizedEnsembleRegressor {
850    type Config = RegularizedEnsembleConfig;
851    type Error = SklearsError;
852    type Float = f64;
853
854    fn config(&self) -> &Self::Config {
855        &self.config
856    }
857}
858
859impl Fit<Array2<f64>, Vec<f64>> for RegularizedEnsembleRegressor {
860    type Fitted = Self;
861
862    fn fit(mut self, X: &Array2<f64>, y: &Vec<f64>) -> SklResult<Self::Fitted> {
863        // Train base estimators
864        self.train_base_estimators(X, y)?;
865
866        // Optimize ensemble weights with regularization
867        self.optimize_ensemble_weights(X, y)?;
868
869        self.is_fitted = true;
870        Ok(self)
871    }
872}
873
874impl Predict<Array2<f64>, Vec<f64>> for RegularizedEnsembleRegressor {
875    fn predict(&self, X: &Array2<f64>) -> SklResult<Vec<f64>> {
876        if !self.is_fitted {
877            return Err(SklearsError::NotFitted {
878                operation: "prediction".to_string(),
879            });
880        }
881
882        let base_predictions = self.get_base_predictions(X)?;
883        let n_samples = X.shape()[0];
884        let mut predictions = vec![0.0; n_samples];
885
886        for i in 0..n_samples {
887            for j in 0..self.config.n_estimators {
888                predictions[i] += self.ensemble_weights[j] * base_predictions[j][i];
889            }
890        }
891
892        Ok(predictions)
893    }
894}
895
896#[allow(non_snake_case)]
897#[cfg(test)]
898mod tests {
899    use super::*;
900    use scirs2_core::ndarray::Array2;
901
902    #[test]
903    fn test_regularized_config() {
904        let config = RegularizedEnsembleConfig::builder()
905            .n_estimators(5)
906            .alpha_l1(0.1)
907            .alpha_l2(0.2)
908            .l1_ratio(0.7)
909            .build();
910
911        assert_eq!(config.n_estimators, 5);
912        assert_eq!(config.alpha_l1, 0.1);
913        assert_eq!(config.alpha_l2, 0.2);
914        assert_eq!(config.l1_ratio, 0.7);
915    }
916
917    #[test]
918    fn test_soft_threshold() {
919        let config = RegularizedEnsembleConfig::default();
920        let ensemble = RegularizedEnsembleRegressor::new(config);
921
922        assert_eq!(ensemble.soft_threshold(1.0, 0.5), 0.5);
923        assert_eq!(ensemble.soft_threshold(-1.0, 0.5), -0.5);
924        assert_eq!(ensemble.soft_threshold(0.3, 0.5), 0.0);
925        assert_eq!(ensemble.soft_threshold(-0.3, 0.5), 0.0);
926    }
927
928    #[test]
929    fn test_regularized_ensemble_basic() {
930        let config = RegularizedEnsembleConfig::builder()
931            .n_estimators(3)
932            .alpha_l1(0.1)
933            .alpha_l2(0.1)
934            .max_iterations(10)
935            .random_state(42)
936            .build();
937
938        let ensemble = RegularizedEnsembleRegressor::new(config);
939
940        // Test basic configuration
941        assert_eq!(ensemble.config.n_estimators, 3);
942        assert_eq!(ensemble.config.alpha_l1, 0.1);
943        assert_eq!(ensemble.config.alpha_l2, 0.1);
944        assert!(!ensemble.is_fitted);
945    }
946
947    #[test]
948    #[allow(non_snake_case)]
949    fn test_dropout_ensemble() {
950        let config = RegularizedEnsembleConfig::builder()
951            .n_estimators(3)
952            .dropout_probability(0.2)
953            .random_state(42)
954            .build();
955
956        let mut dropout_ensemble = DropoutEnsemble::new(config);
957
958        let X = Array2::from_shape_vec((3, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
959        let y = vec![1.0, 2.0, 3.0];
960
961        dropout_ensemble.fit(&X, &y).unwrap();
962
963        let X_test = Array2::from_shape_vec((1, 2), vec![7.0, 8.0]).unwrap();
964
965        let predictions = dropout_ensemble.predict(&X_test).unwrap();
966        assert_eq!(predictions.len(), 1);
967    }
968
969    #[test]
970    fn test_sparsity_calculation() {
971        let config = RegularizedEnsembleConfig::default();
972        let mut ensemble = RegularizedEnsembleRegressor::new(config);
973
974        // Set some weights to zero
975        ensemble.ensemble_weights = vec![0.5, 0.0, 0.3, 0.0, 0.2];
976
977        let sparsity = ensemble.get_sparsity();
978        assert_eq!(sparsity, 0.4); // 2 out of 5 weights are zero
979    }
980}