1use crate::gradient_boosting::{GradientBoostingRegressor, TrainedGradientBoostingRegressor};
7use scirs2_core::ndarray::{Array1, Array2};
11use sklears_core::{
12 error::Result as SklResult,
13 prelude::{Predict, SklearsError},
14 traits::{Estimator, Fit},
15};
16
17fn gen_f64(rng: &mut impl scirs2_core::random::RngCore) -> f64 {
19 let mut bytes = [0u8; 8];
20 rng.fill_bytes(&mut bytes);
21 f64::from_le_bytes(bytes) / f64::from_le_bytes([255u8; 8])
22}
23
24fn gen_range_usize(
26 rng: &mut impl scirs2_core::random::RngCore,
27 range: std::ops::Range<usize>,
28) -> usize {
29 let mut bytes = [0u8; 8];
30 rng.fill_bytes(&mut bytes);
31 let val = u64::from_le_bytes(bytes);
32 range.start + (val as usize % (range.end - range.start))
33}
34
35type BoxedEstimator = Box<TrainedGradientBoostingRegressor>;
37
38#[derive(Debug, Clone)]
40pub struct RegularizedEnsembleConfig {
41 pub n_estimators: usize,
43 pub alpha_l1: f64,
45 pub alpha_l2: f64,
47 pub l1_ratio: f64,
49 pub weight_optimizer: WeightOptimizer,
51 pub dropout_probability: f64,
53 pub noise_injection: bool,
55 pub noise_variance: f64,
57 pub max_iterations: usize,
59 pub tolerance: f64,
61 pub learning_rate: f64,
63 pub weight_decay: f64,
65 pub random_state: Option<u64>,
67}
68
69impl Default for RegularizedEnsembleConfig {
70 fn default() -> Self {
71 Self {
72 n_estimators: 10,
73 alpha_l1: 0.1,
74 alpha_l2: 0.1,
75 l1_ratio: 0.5,
76 weight_optimizer: WeightOptimizer::CoordinateDescent,
77 dropout_probability: 0.1,
78 noise_injection: false,
79 noise_variance: 0.01,
80 max_iterations: 1000,
81 tolerance: 1e-6,
82 learning_rate: 0.01,
83 weight_decay: 0.0001,
84 random_state: None,
85 }
86 }
87}
88
89#[derive(Debug, Clone, PartialEq)]
91pub enum WeightOptimizer {
92 CoordinateDescent,
94 SGD,
96 Adam,
98 RMSprop,
100 LBFGS,
102 ProximalGradient,
104 ADMM,
106}
107
108pub struct RegularizedEnsembleClassifier {
110 config: RegularizedEnsembleConfig,
111 base_estimators: Vec<BoxedEstimator>,
112 ensemble_weights: Vec<f64>,
113 feature_weights: Option<Vec<f64>>,
114 optimizer_state: OptimizerState,
115 regularization_path: Vec<RegularizationStep>,
116 is_fitted: bool,
117}
118
119pub struct RegularizedEnsembleRegressor {
121 config: RegularizedEnsembleConfig,
122 base_estimators: Vec<BoxedEstimator>,
123 ensemble_weights: Vec<f64>,
124 feature_weights: Option<Vec<f64>>,
125 optimizer_state: OptimizerState,
126 regularization_path: Vec<RegularizationStep>,
127 is_fitted: bool,
128}
129
130#[derive(Debug, Clone)]
132pub struct OptimizerState {
133 pub momentum: Vec<f64>,
135 pub variance: Vec<f64>,
137 pub iteration: usize,
139 pub current_loss: f64,
141 pub previous_loss: f64,
143 pub gradient_history: Vec<Vec<f64>>,
145 pub weight_history: Vec<Vec<f64>>,
147}
148
149impl Default for OptimizerState {
150 fn default() -> Self {
151 Self {
152 momentum: Vec::new(),
153 variance: Vec::new(),
154 iteration: 0,
155 current_loss: f64::INFINITY,
156 previous_loss: f64::INFINITY,
157 gradient_history: Vec::new(),
158 weight_history: Vec::new(),
159 }
160 }
161}
162
163#[derive(Debug, Clone)]
165pub struct RegularizationStep {
166 pub alpha: f64,
168 pub l1_ratio: f64,
170 pub weights: Vec<f64>,
172 pub loss: f64,
174 pub n_nonzero: usize,
176}
177
178pub struct DropoutEnsemble {
180 config: RegularizedEnsembleConfig,
181 estimators: Vec<BoxedEstimator>,
182 dropout_masks: Vec<Vec<bool>>,
183 rng: scirs2_core::random::CoreRandom<scirs2_core::random::rngs::StdRng>,
184}
185
186impl RegularizedEnsembleConfig {
187 pub fn builder() -> RegularizedEnsembleConfigBuilder {
188 RegularizedEnsembleConfigBuilder::default()
189 }
190}
191
192#[derive(Default)]
193pub struct RegularizedEnsembleConfigBuilder {
194 config: RegularizedEnsembleConfig,
195}
196
197impl RegularizedEnsembleConfigBuilder {
198 pub fn n_estimators(mut self, n_estimators: usize) -> Self {
199 self.config.n_estimators = n_estimators;
200 self
201 }
202
203 pub fn alpha_l1(mut self, alpha: f64) -> Self {
204 self.config.alpha_l1 = alpha;
205 self
206 }
207
208 pub fn alpha_l2(mut self, alpha: f64) -> Self {
209 self.config.alpha_l2 = alpha;
210 self
211 }
212
213 pub fn l1_ratio(mut self, ratio: f64) -> Self {
214 self.config.l1_ratio = ratio.clamp(0.0, 1.0);
215 self
216 }
217
218 pub fn weight_optimizer(mut self, optimizer: WeightOptimizer) -> Self {
219 self.config.weight_optimizer = optimizer;
220 self
221 }
222
223 pub fn dropout_probability(mut self, prob: f64) -> Self {
224 self.config.dropout_probability = prob.clamp(0.0, 1.0);
225 self
226 }
227
228 pub fn noise_injection(mut self, inject: bool) -> Self {
229 self.config.noise_injection = inject;
230 self
231 }
232
233 pub fn max_iterations(mut self, max_iter: usize) -> Self {
234 self.config.max_iterations = max_iter;
235 self
236 }
237
238 pub fn tolerance(mut self, tol: f64) -> Self {
239 self.config.tolerance = tol;
240 self
241 }
242
243 pub fn learning_rate(mut self, lr: f64) -> Self {
244 self.config.learning_rate = lr;
245 self
246 }
247
248 pub fn random_state(mut self, seed: u64) -> Self {
249 self.config.random_state = Some(seed);
250 self
251 }
252
253 pub fn build(self) -> RegularizedEnsembleConfig {
254 self.config
255 }
256}
257
258impl RegularizedEnsembleRegressor {
259 pub fn new(config: RegularizedEnsembleConfig) -> Self {
260 Self {
261 config,
262 base_estimators: Vec::new(),
263 ensemble_weights: Vec::new(),
264 feature_weights: None,
265 optimizer_state: OptimizerState::default(),
266 regularization_path: Vec::new(),
267 is_fitted: false,
268 }
269 }
270
271 pub fn builder() -> RegularizedEnsembleRegressorBuilder {
272 RegularizedEnsembleRegressorBuilder::new()
273 }
274
275 fn train_base_estimators(&mut self, X: &Array2<f64>, y: &Vec<f64>) -> SklResult<()> {
277 self.base_estimators.clear();
278
279 for i in 0..self.config.n_estimators {
280 let (X_train, y_train) = if self.config.noise_injection {
282 self.inject_noise(X, y, i)?
283 } else {
284 (X.clone(), y.clone())
285 };
286
287 let y_train_array = Array1::from_vec(y_train.clone());
289 let estimator = GradientBoostingRegressor::builder()
290 .n_estimators(50)
291 .learning_rate(0.1)
292 .max_depth(4)
293 .build()
294 .fit(&X_train, &y_train_array)?;
295
296 self.base_estimators.push(Box::new(estimator));
297 }
298
299 self.ensemble_weights =
301 vec![1.0 / self.config.n_estimators as f64; self.config.n_estimators];
302
303 Ok(())
304 }
305
306 fn inject_noise(
308 &self,
309 X: &Array2<f64>,
310 y: &Vec<f64>,
311 seed_offset: usize,
312 ) -> SklResult<(Array2<f64>, Vec<f64>)> {
313 let seed = self.config.random_state.unwrap_or(42) + seed_offset as u64;
314 let mut rng = scirs2_core::random::seeded_rng(seed);
315
316 let shape = X.shape();
317 let (n_samples, n_features) = (shape[0], shape[1]);
318 let noise_std = self.config.noise_variance.sqrt();
319
320 let mut X_noisy = X.clone();
321 let mut y_noisy = y.clone();
322
323 for i in 0..n_samples {
325 for j in 0..n_features {
326 let noise = gen_f64(&mut rng) * noise_std;
327 X_noisy[[i, j]] += noise;
328 }
329
330 let target_noise = gen_f64(&mut rng) * noise_std * 0.1; y_noisy[i] += target_noise;
333 }
334
335 Ok((X_noisy, y_noisy))
336 }
337
338 fn optimize_ensemble_weights(&mut self, X: &Array2<f64>, y: &Vec<f64>) -> SklResult<()> {
340 let base_predictions = self.get_base_predictions(X)?;
342
343 self.optimizer_state = OptimizerState::default();
345 self.optimizer_state.momentum = vec![0.0; self.config.n_estimators];
346 self.optimizer_state.variance = vec![0.0; self.config.n_estimators];
347
348 match self.config.weight_optimizer {
350 WeightOptimizer::CoordinateDescent => {
351 self.coordinate_descent_optimization(&base_predictions, y)?;
352 }
353 WeightOptimizer::SGD => {
354 self.sgd_optimization(&base_predictions, y)?;
355 }
356 WeightOptimizer::Adam => {
357 self.adam_optimization(&base_predictions, y)?;
358 }
359 WeightOptimizer::ProximalGradient => {
360 self.proximal_gradient_optimization(&base_predictions, y)?;
361 }
362 _ => {
363 self.coordinate_descent_optimization(&base_predictions, y)?;
365 }
366 }
367
368 Ok(())
369 }
370
371 fn get_base_predictions(&self, X: &Array2<f64>) -> SklResult<Vec<Vec<f64>>> {
373 let mut predictions = Vec::new();
374
375 for estimator in &self.base_estimators {
376 let pred = estimator.predict(X)?;
377 predictions.push(pred.to_vec());
378 }
379
380 Ok(predictions)
381 }
382
383 fn coordinate_descent_optimization(
385 &mut self,
386 predictions: &[Vec<f64>],
387 y: &[f64],
388 ) -> SklResult<()> {
389 let n_samples = y.len();
390 let n_estimators = self.config.n_estimators;
391
392 for iteration in 0..self.config.max_iterations {
393 let mut max_weight_change: f64 = 0.0;
394
395 for j in 0..n_estimators {
396 let old_weight = self.ensemble_weights[j];
397
398 let mut partial_residual = 0.0;
400 for i in 0..n_samples {
401 let mut ensemble_pred = 0.0;
402 for k in 0..n_estimators {
403 if k != j {
404 ensemble_pred += self.ensemble_weights[k] * predictions[k][i];
405 }
406 }
407 partial_residual += predictions[j][i] * (y[i] - ensemble_pred);
408 }
409
410 let mut l2_norm_sq = 0.0;
412 for i in 0..n_samples {
413 l2_norm_sq += predictions[j][i] * predictions[j][i];
414 }
415
416 let l1_penalty = self.config.alpha_l1 * self.config.l1_ratio;
418 let l2_penalty = self.config.alpha_l2 * (1.0 - self.config.l1_ratio);
419
420 let numerator =
421 self.soft_threshold(partial_residual, l1_penalty * n_samples as f64);
422 let denominator = l2_norm_sq + l2_penalty * n_samples as f64;
423
424 self.ensemble_weights[j] = if denominator > 0.0 {
425 numerator / denominator
426 } else {
427 0.0
428 };
429
430 let weight_change = (self.ensemble_weights[j] - old_weight).abs();
431 max_weight_change = max_weight_change.max(weight_change);
432 }
433
434 if max_weight_change < self.config.tolerance {
436 break;
437 }
438
439 let loss = self.compute_loss(predictions, y);
441 let n_nonzero = self
442 .ensemble_weights
443 .iter()
444 .filter(|&&w| w.abs() > 1e-10)
445 .count();
446
447 self.regularization_path.push(RegularizationStep {
448 alpha: self.config.alpha_l1 + self.config.alpha_l2,
449 l1_ratio: self.config.l1_ratio,
450 weights: self.ensemble_weights.clone(),
451 loss,
452 n_nonzero,
453 });
454 }
455
456 Ok(())
457 }
458
459 fn sgd_optimization(&mut self, predictions: &[Vec<f64>], y: &[f64]) -> SklResult<()> {
461 let mut rng = if let Some(seed) = self.config.random_state {
462 scirs2_core::random::seeded_rng(seed)
463 } else {
464 scirs2_core::random::seeded_rng(42)
465 };
466
467 let n_samples = y.len();
468 let n_estimators = self.config.n_estimators;
469
470 for iteration in 0..self.config.max_iterations {
471 let sample_idx = gen_range_usize(&mut rng, 0..n_samples);
473
474 let mut pred = 0.0;
476 for j in 0..n_estimators {
477 pred += self.ensemble_weights[j] * predictions[j][sample_idx];
478 }
479
480 let error = pred - y[sample_idx];
481
482 for j in 0..n_estimators {
484 let gradient = error * predictions[j][sample_idx];
485
486 let l1_grad =
488 self.config.alpha_l1 * self.config.l1_ratio * self.ensemble_weights[j].signum();
489 let l2_grad =
490 self.config.alpha_l2 * (1.0 - self.config.l1_ratio) * self.ensemble_weights[j];
491
492 let total_gradient = gradient + l1_grad + l2_grad;
493
494 self.optimizer_state.momentum[j] = 0.9 * self.optimizer_state.momentum[j]
496 + self.config.learning_rate * total_gradient;
497 self.ensemble_weights[j] -= self.optimizer_state.momentum[j];
498 }
499
500 for weight in &mut self.ensemble_weights {
502 *weight *= 1.0 - self.config.weight_decay;
503 }
504
505 if iteration % 100 == 0 {
507 let loss = self.compute_loss(predictions, y);
508 if (self.optimizer_state.previous_loss - loss).abs() < self.config.tolerance {
509 break;
510 }
511 self.optimizer_state.previous_loss = loss;
512 }
513 }
514
515 Ok(())
516 }
517
518 fn adam_optimization(&mut self, predictions: &[Vec<f64>], y: &[f64]) -> SklResult<()> {
520 let mut rng = if let Some(seed) = self.config.random_state {
521 scirs2_core::random::seeded_rng(seed)
522 } else {
523 scirs2_core::random::seeded_rng(42)
524 };
525
526 let n_samples = y.len();
527 let n_estimators = self.config.n_estimators;
528 let beta1 = 0.9;
529 let beta2 = 0.999;
530 let epsilon = 1e-8;
531
532 for iteration in 0..self.config.max_iterations {
533 let sample_idx = gen_range_usize(&mut rng, 0..n_samples);
535
536 let mut pred = 0.0;
538 for j in 0..n_estimators {
539 pred += self.ensemble_weights[j] * predictions[j][sample_idx];
540 }
541
542 let error = pred - y[sample_idx];
543
544 for j in 0..n_estimators {
546 let gradient = error * predictions[j][sample_idx];
547
548 let l1_grad =
550 self.config.alpha_l1 * self.config.l1_ratio * self.ensemble_weights[j].signum();
551 let l2_grad =
552 self.config.alpha_l2 * (1.0 - self.config.l1_ratio) * self.ensemble_weights[j];
553
554 let total_gradient = gradient + l1_grad + l2_grad;
555
556 self.optimizer_state.momentum[j] =
558 beta1 * self.optimizer_state.momentum[j] + (1.0 - beta1) * total_gradient;
559
560 self.optimizer_state.variance[j] = beta2 * self.optimizer_state.variance[j]
562 + (1.0 - beta2) * total_gradient * total_gradient;
563
564 let m_hat =
566 self.optimizer_state.momentum[j] / (1.0 - beta1.powi((iteration + 1) as i32));
567 let v_hat =
568 self.optimizer_state.variance[j] / (1.0 - beta2.powi((iteration + 1) as i32));
569
570 self.ensemble_weights[j] -=
572 self.config.learning_rate * m_hat / (v_hat.sqrt() + epsilon);
573 }
574
575 if iteration % 100 == 0 {
577 let loss = self.compute_loss(predictions, y);
578 if (self.optimizer_state.previous_loss - loss).abs() < self.config.tolerance {
579 break;
580 }
581 self.optimizer_state.previous_loss = loss;
582 }
583 }
584
585 Ok(())
586 }
587
588 fn proximal_gradient_optimization(
590 &mut self,
591 predictions: &[Vec<f64>],
592 y: &[f64],
593 ) -> SklResult<()> {
594 let n_samples = y.len();
595 let n_estimators = self.config.n_estimators;
596
597 for _iteration in 0..self.config.max_iterations {
598 let mut gradients = vec![0.0; n_estimators];
600
601 for i in 0..n_samples {
602 let mut pred = 0.0;
603 for j in 0..n_estimators {
604 pred += self.ensemble_weights[j] * predictions[j][i];
605 }
606
607 let error = pred - y[i];
608
609 for j in 0..n_estimators {
610 gradients[j] += error * predictions[j][i] / n_samples as f64;
611 }
612 }
613
614 for j in 0..n_estimators {
616 self.ensemble_weights[j] -= self.config.learning_rate * gradients[j];
617 }
618
619 let threshold = self.config.learning_rate * self.config.alpha_l1 * self.config.l1_ratio;
621 for weight in &mut self.ensemble_weights {
622 let original_weight = *weight;
623 *weight = Self::soft_threshold_static(original_weight, threshold);
624 }
625
626 let l2_shrinkage = 1.0
628 / (1.0
629 + self.config.learning_rate
630 * self.config.alpha_l2
631 * (1.0 - self.config.l1_ratio));
632 for weight in &mut self.ensemble_weights {
633 *weight *= l2_shrinkage;
634 }
635 }
636
637 Ok(())
638 }
639
640 fn soft_threshold(&self, x: f64, threshold: f64) -> f64 {
642 Self::soft_threshold_static(x, threshold)
643 }
644
645 fn soft_threshold_static(x: f64, threshold: f64) -> f64 {
647 if x > threshold {
648 x - threshold
649 } else if x < -threshold {
650 x + threshold
651 } else {
652 0.0
653 }
654 }
655
656 fn compute_loss(&self, predictions: &[Vec<f64>], y: &[f64]) -> f64 {
658 let n_samples = y.len();
659 let mut mse = 0.0;
660
661 for i in 0..n_samples {
662 let mut pred = 0.0;
663 for j in 0..self.config.n_estimators {
664 pred += self.ensemble_weights[j] * predictions[j][i];
665 }
666 let error = pred - y[i];
667 mse += error * error;
668 }
669 mse /= n_samples as f64;
670
671 let l1_penalty = self.config.alpha_l1
673 * self.config.l1_ratio
674 * self.ensemble_weights.iter().map(|&w| w.abs()).sum::<f64>();
675 let l2_penalty = self.config.alpha_l2
676 * (1.0 - self.config.l1_ratio)
677 * self.ensemble_weights.iter().map(|&w| w * w).sum::<f64>()
678 / 2.0;
679
680 mse + l1_penalty + l2_penalty
681 }
682
683 pub fn get_ensemble_weights(&self) -> &[f64] {
685 &self.ensemble_weights
686 }
687
688 pub fn get_regularization_path(&self) -> &[RegularizationStep] {
690 &self.regularization_path
691 }
692
693 pub fn get_sparsity(&self) -> f64 {
695 let n_nonzero = self
696 .ensemble_weights
697 .iter()
698 .filter(|&&w| w.abs() > 1e-10)
699 .count();
700 1.0 - (n_nonzero as f64 / self.ensemble_weights.len() as f64)
701 }
702}
703
704impl DropoutEnsemble {
705 pub fn new(config: RegularizedEnsembleConfig) -> Self {
706 let rng = if let Some(seed) = config.random_state {
707 scirs2_core::random::seeded_rng(seed)
708 } else {
709 scirs2_core::random::seeded_rng(42)
710 };
711
712 Self {
713 config,
714 estimators: Vec::new(),
715 dropout_masks: Vec::new(),
716 rng,
717 }
718 }
719
720 #[allow(non_snake_case)]
722 pub fn fit(&mut self, X: &Array2<f64>, y: &Vec<f64>) -> SklResult<()> {
723 self.estimators.clear();
724 self.dropout_masks.clear();
725
726 for _ in 0..self.config.n_estimators {
727 let mut mask = Vec::new();
729 for _ in 0..X.shape()[1] {
730 mask.push(gen_f64(&mut self.rng) > self.config.dropout_probability);
731 }
732
733 let X_dropout = self.apply_dropout_mask(X, &mask)?;
735
736 let y_array = Array1::from_vec(y.clone());
738 let estimator = GradientBoostingRegressor::builder()
739 .n_estimators(50)
740 .learning_rate(0.1)
741 .max_depth(4)
742 .build()
743 .fit(&X_dropout, &y_array)?;
744
745 self.estimators.push(Box::new(estimator));
746 self.dropout_masks.push(mask);
747 }
748
749 Ok(())
750 }
751
752 fn apply_dropout_mask(&self, X: &Array2<f64>, mask: &[bool]) -> SklResult<Array2<f64>> {
754 let shape = X.shape();
755 let (n_samples, n_features) = (shape[0], shape[1]);
756 let mut X_dropout = X.clone();
757
758 for i in 0..n_samples {
759 for j in 0..n_features {
760 if !mask[j] {
761 X_dropout[[i, j]] = 0.0;
762 } else {
763 X_dropout[[i, j]] /= 1.0 - self.config.dropout_probability;
765 }
766 }
767 }
768
769 Ok(X_dropout)
770 }
771
772 #[allow(non_snake_case)]
774 pub fn predict(&self, X: &Array2<f64>) -> SklResult<Vec<f64>> {
775 if self.estimators.is_empty() {
776 return Err(SklearsError::NotFitted {
777 operation: "prediction".to_string(),
778 });
779 }
780
781 let n_samples = X.shape()[0];
782 let mut predictions = vec![0.0; n_samples];
783
784 for (estimator, mask) in self.estimators.iter().zip(self.dropout_masks.iter()) {
785 let X_masked = self.apply_dropout_mask(X, mask)?;
786 let pred = estimator.predict(&X_masked)?;
787
788 for (i, &p) in pred.iter().enumerate() {
789 predictions[i] += p;
790 }
791 }
792
793 for p in &mut predictions {
795 *p /= self.estimators.len() as f64;
796 }
797
798 Ok(predictions)
799 }
800}
801
802pub struct RegularizedEnsembleRegressorBuilder {
803 config: RegularizedEnsembleConfig,
804}
805
806impl Default for RegularizedEnsembleRegressorBuilder {
807 fn default() -> Self {
808 Self::new()
809 }
810}
811
812impl RegularizedEnsembleRegressorBuilder {
813 pub fn new() -> Self {
814 Self {
815 config: RegularizedEnsembleConfig::default(),
816 }
817 }
818
819 pub fn config(mut self, config: RegularizedEnsembleConfig) -> Self {
820 self.config = config;
821 self
822 }
823
824 pub fn n_estimators(mut self, n_estimators: usize) -> Self {
825 self.config.n_estimators = n_estimators;
826 self
827 }
828
829 pub fn alpha_l1(mut self, alpha: f64) -> Self {
830 self.config.alpha_l1 = alpha;
831 self
832 }
833
834 pub fn alpha_l2(mut self, alpha: f64) -> Self {
835 self.config.alpha_l2 = alpha;
836 self
837 }
838
839 pub fn weight_optimizer(mut self, optimizer: WeightOptimizer) -> Self {
840 self.config.weight_optimizer = optimizer;
841 self
842 }
843
844 pub fn build(self) -> RegularizedEnsembleRegressor {
845 RegularizedEnsembleRegressor::new(self.config)
846 }
847}
848
849impl Estimator for RegularizedEnsembleRegressor {
850 type Config = RegularizedEnsembleConfig;
851 type Error = SklearsError;
852 type Float = f64;
853
854 fn config(&self) -> &Self::Config {
855 &self.config
856 }
857}
858
859impl Fit<Array2<f64>, Vec<f64>> for RegularizedEnsembleRegressor {
860 type Fitted = Self;
861
862 fn fit(mut self, X: &Array2<f64>, y: &Vec<f64>) -> SklResult<Self::Fitted> {
863 self.train_base_estimators(X, y)?;
865
866 self.optimize_ensemble_weights(X, y)?;
868
869 self.is_fitted = true;
870 Ok(self)
871 }
872}
873
874impl Predict<Array2<f64>, Vec<f64>> for RegularizedEnsembleRegressor {
875 fn predict(&self, X: &Array2<f64>) -> SklResult<Vec<f64>> {
876 if !self.is_fitted {
877 return Err(SklearsError::NotFitted {
878 operation: "prediction".to_string(),
879 });
880 }
881
882 let base_predictions = self.get_base_predictions(X)?;
883 let n_samples = X.shape()[0];
884 let mut predictions = vec![0.0; n_samples];
885
886 for i in 0..n_samples {
887 for j in 0..self.config.n_estimators {
888 predictions[i] += self.ensemble_weights[j] * base_predictions[j][i];
889 }
890 }
891
892 Ok(predictions)
893 }
894}
895
896#[allow(non_snake_case)]
897#[cfg(test)]
898mod tests {
899 use super::*;
900 use scirs2_core::ndarray::Array2;
901
902 #[test]
903 fn test_regularized_config() {
904 let config = RegularizedEnsembleConfig::builder()
905 .n_estimators(5)
906 .alpha_l1(0.1)
907 .alpha_l2(0.2)
908 .l1_ratio(0.7)
909 .build();
910
911 assert_eq!(config.n_estimators, 5);
912 assert_eq!(config.alpha_l1, 0.1);
913 assert_eq!(config.alpha_l2, 0.2);
914 assert_eq!(config.l1_ratio, 0.7);
915 }
916
917 #[test]
918 fn test_soft_threshold() {
919 let config = RegularizedEnsembleConfig::default();
920 let ensemble = RegularizedEnsembleRegressor::new(config);
921
922 assert_eq!(ensemble.soft_threshold(1.0, 0.5), 0.5);
923 assert_eq!(ensemble.soft_threshold(-1.0, 0.5), -0.5);
924 assert_eq!(ensemble.soft_threshold(0.3, 0.5), 0.0);
925 assert_eq!(ensemble.soft_threshold(-0.3, 0.5), 0.0);
926 }
927
928 #[test]
929 fn test_regularized_ensemble_basic() {
930 let config = RegularizedEnsembleConfig::builder()
931 .n_estimators(3)
932 .alpha_l1(0.1)
933 .alpha_l2(0.1)
934 .max_iterations(10)
935 .random_state(42)
936 .build();
937
938 let ensemble = RegularizedEnsembleRegressor::new(config);
939
940 assert_eq!(ensemble.config.n_estimators, 3);
942 assert_eq!(ensemble.config.alpha_l1, 0.1);
943 assert_eq!(ensemble.config.alpha_l2, 0.1);
944 assert!(!ensemble.is_fitted);
945 }
946
947 #[test]
948 #[allow(non_snake_case)]
949 fn test_dropout_ensemble() {
950 let config = RegularizedEnsembleConfig::builder()
951 .n_estimators(3)
952 .dropout_probability(0.2)
953 .random_state(42)
954 .build();
955
956 let mut dropout_ensemble = DropoutEnsemble::new(config);
957
958 let X = Array2::from_shape_vec((3, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
959 let y = vec![1.0, 2.0, 3.0];
960
961 dropout_ensemble.fit(&X, &y).unwrap();
962
963 let X_test = Array2::from_shape_vec((1, 2), vec![7.0, 8.0]).unwrap();
964
965 let predictions = dropout_ensemble.predict(&X_test).unwrap();
966 assert_eq!(predictions.len(), 1);
967 }
968
969 #[test]
970 fn test_sparsity_calculation() {
971 let config = RegularizedEnsembleConfig::default();
972 let mut ensemble = RegularizedEnsembleRegressor::new(config);
973
974 ensemble.ensemble_weights = vec![0.5, 0.0, 0.3, 0.0, 0.2];
976
977 let sparsity = ensemble.get_sparsity();
978 assert_eq!(sparsity, 0.4); }
980}