1use crate::decision_tree::{
34 self, ClassificationCriterion, Node, build_classification_tree_with_feature_subset,
35 build_regression_tree_with_feature_subset,
36};
37use ferrolearn_core::error::FerroError;
38use ferrolearn_core::introspection::{HasClasses, HasFeatureImportances};
39use ferrolearn_core::pipeline::{FittedPipelineEstimator, PipelineEstimator};
40use ferrolearn_core::traits::{Fit, Predict};
41use ndarray::{Array1, Array2};
42use num_traits::{Float, FromPrimitive, ToPrimitive};
43use rand::SeedableRng;
44use rand::rngs::StdRng;
45use rand::seq::index::sample as rand_sample_indices;
46use rayon::prelude::*;
47
48#[derive(Debug, Clone)]
62pub struct BaggingClassifier<F> {
63 pub n_estimators: usize,
65 pub max_samples: f64,
67 pub max_features: f64,
69 pub bootstrap: bool,
71 pub bootstrap_features: bool,
73 pub random_state: Option<u64>,
75 pub max_depth: Option<usize>,
77 _marker: std::marker::PhantomData<F>,
78}
79
80impl<F: Float> BaggingClassifier<F> {
81 #[must_use]
88 pub fn new() -> Self {
89 Self {
90 n_estimators: 10,
91 max_samples: 1.0,
92 max_features: 1.0,
93 bootstrap: true,
94 bootstrap_features: false,
95 random_state: None,
96 max_depth: None,
97 _marker: std::marker::PhantomData,
98 }
99 }
100
101 #[must_use]
103 pub fn with_n_estimators(mut self, n: usize) -> Self {
104 self.n_estimators = n;
105 self
106 }
107
108 #[must_use]
110 pub fn with_max_samples(mut self, frac: f64) -> Self {
111 self.max_samples = frac;
112 self
113 }
114
115 #[must_use]
117 pub fn with_max_features(mut self, frac: f64) -> Self {
118 self.max_features = frac;
119 self
120 }
121
122 #[must_use]
124 pub fn with_bootstrap(mut self, bootstrap: bool) -> Self {
125 self.bootstrap = bootstrap;
126 self
127 }
128
129 #[must_use]
131 pub fn with_bootstrap_features(mut self, bootstrap_features: bool) -> Self {
132 self.bootstrap_features = bootstrap_features;
133 self
134 }
135
136 #[must_use]
138 pub fn with_random_state(mut self, seed: u64) -> Self {
139 self.random_state = Some(seed);
140 self
141 }
142
143 #[must_use]
145 pub fn with_max_depth(mut self, max_depth: Option<usize>) -> Self {
146 self.max_depth = max_depth;
147 self
148 }
149}
150
151impl<F: Float> Default for BaggingClassifier<F> {
152 fn default() -> Self {
153 Self::new()
154 }
155}
156
157#[derive(Debug, Clone)]
166pub struct FittedBaggingClassifier<F> {
167 trees: Vec<Vec<Node<F>>>,
169 feature_indices: Vec<Vec<usize>>,
171 classes: Vec<usize>,
173 n_features: usize,
175 feature_importances: Array1<F>,
178}
179
180impl<F: Float + Send + Sync + 'static> FittedBaggingClassifier<F> {
181 #[must_use]
183 pub fn trees(&self) -> &[Vec<Node<F>>] {
184 &self.trees
185 }
186
187 #[must_use]
189 pub fn n_features(&self) -> usize {
190 self.n_features
191 }
192
193 pub fn score(&self, x: &Array2<F>, y: &Array1<usize>) -> Result<F, FerroError> {
201 if x.nrows() != y.len() {
202 return Err(FerroError::ShapeMismatch {
203 expected: vec![x.nrows()],
204 actual: vec![y.len()],
205 context: "y length must match number of samples in X".into(),
206 });
207 }
208 let preds = self.predict(x)?;
209 Ok(crate::mean_accuracy(&preds, y))
210 }
211
212 pub fn predict_proba(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
226 if x.ncols() != self.n_features {
227 return Err(FerroError::ShapeMismatch {
228 expected: vec![self.n_features],
229 actual: vec![x.ncols()],
230 context: "number of features must match fitted model".into(),
231 });
232 }
233 let n_samples = x.nrows();
234 let n_classes = self.classes.len();
235 let n_trees_f = F::from(self.trees.len()).unwrap();
236 let mut proba = Array2::<F>::zeros((n_samples, n_classes));
237
238 for i in 0..n_samples {
239 let row = x.row(i);
240 for (t, tree_nodes) in self.trees.iter().enumerate() {
241 let feat_idx = &self.feature_indices[t];
242 let sub_row: Vec<F> = feat_idx.iter().map(|&fi| row[fi]).collect();
243 let sub_view = ndarray::Array1::from(sub_row);
244 let leaf_idx = decision_tree::traverse(tree_nodes, &sub_view.view());
245 match &tree_nodes[leaf_idx] {
246 Node::Leaf {
247 class_distribution: Some(dist),
248 ..
249 } => {
250 for (j, &p) in dist.iter().enumerate().take(n_classes) {
251 proba[[i, j]] = proba[[i, j]] + p;
252 }
253 }
254 Node::Leaf { value, .. } => {
255 let class_idx = value.to_f64().map_or(0, |f| f.round() as usize);
256 if class_idx < n_classes {
257 proba[[i, class_idx]] = proba[[i, class_idx]] + F::one();
258 }
259 }
260 _ => {}
261 }
262 }
263 for j in 0..n_classes {
264 proba[[i, j]] = proba[[i, j]] / n_trees_f;
265 }
266 }
267 Ok(proba)
268 }
269
270 pub fn predict_log_proba(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
277 let proba = self.predict_proba(x)?;
278 Ok(crate::log_proba(&proba))
279 }
280}
281
282impl<F: Float + Send + Sync + 'static> Fit<Array2<F>, Array1<usize>> for BaggingClassifier<F> {
283 type Fitted = FittedBaggingClassifier<F>;
284 type Error = FerroError;
285
286 fn fit(
298 &self,
299 x: &Array2<F>,
300 y: &Array1<usize>,
301 ) -> Result<FittedBaggingClassifier<F>, FerroError> {
302 let (n_samples, n_features) = x.dim();
303
304 if n_samples != y.len() {
305 return Err(FerroError::ShapeMismatch {
306 expected: vec![n_samples],
307 actual: vec![y.len()],
308 context: "y length must match number of samples in X".into(),
309 });
310 }
311 if n_samples == 0 {
312 return Err(FerroError::InsufficientSamples {
313 required: 1,
314 actual: 0,
315 context: "BaggingClassifier requires at least one sample".into(),
316 });
317 }
318 if self.n_estimators == 0 {
319 return Err(FerroError::InvalidParameter {
320 name: "n_estimators".into(),
321 reason: "must be at least 1".into(),
322 });
323 }
324 if self.max_samples <= 0.0 || self.max_samples > 1.0 {
325 return Err(FerroError::InvalidParameter {
326 name: "max_samples".into(),
327 reason: "must be in (0.0, 1.0]".into(),
328 });
329 }
330 if self.max_features <= 0.0 || self.max_features > 1.0 {
331 return Err(FerroError::InvalidParameter {
332 name: "max_features".into(),
333 reason: "must be in (0.0, 1.0]".into(),
334 });
335 }
336
337 let mut classes: Vec<usize> = y.iter().copied().collect();
339 classes.sort_unstable();
340 classes.dedup();
341 let n_classes = classes.len();
342
343 let y_mapped: Vec<usize> = y
344 .iter()
345 .map(|&c| classes.iter().position(|&cl| cl == c).unwrap())
346 .collect();
347
348 let n_sample_draw = ((n_samples as f64) * self.max_samples).ceil().max(1.0) as usize;
349 let n_feature_draw = ((n_features as f64) * self.max_features).ceil().max(1.0) as usize;
350 let n_feature_draw = n_feature_draw.min(n_features);
351
352 let params = decision_tree::TreeParams {
353 max_depth: self.max_depth,
354 min_samples_split: 2,
355 min_samples_leaf: 1,
356 };
357 let bootstrap = self.bootstrap;
358 let bootstrap_features = self.bootstrap_features;
359
360 let tree_seeds: Vec<u64> = if let Some(seed) = self.random_state {
362 let mut master_rng = StdRng::seed_from_u64(seed);
363 (0..self.n_estimators)
364 .map(|_| {
365 use rand::RngCore;
366 master_rng.next_u64()
367 })
368 .collect()
369 } else {
370 (0..self.n_estimators)
371 .map(|_| {
372 use rand::RngCore;
373 rand::rng().next_u64()
374 })
375 .collect()
376 };
377
378 let results: Vec<(Vec<Node<F>>, Vec<usize>)> = tree_seeds
380 .par_iter()
381 .map(|&seed| {
382 let mut rng = StdRng::seed_from_u64(seed);
383
384 let sample_indices: Vec<usize> = if bootstrap {
386 (0..n_sample_draw)
388 .map(|_| {
389 use rand::RngCore;
390 (rng.next_u64() as usize) % n_samples
391 })
392 .collect()
393 } else {
394 rand_sample_indices(&mut rng, n_samples, n_sample_draw).into_vec()
396 };
397
398 let feat_indices: Vec<usize> = if bootstrap_features {
400 (0..n_feature_draw)
402 .map(|_| {
403 use rand::RngCore;
404 (rng.next_u64() as usize) % n_features
405 })
406 .collect()
407 } else if n_feature_draw == n_features {
408 (0..n_features).collect()
409 } else {
410 rand_sample_indices(&mut rng, n_features, n_feature_draw).into_vec()
411 };
412
413 let tree = build_classification_tree_with_feature_subset(
414 x,
415 &y_mapped,
416 n_classes,
417 &sample_indices,
418 &feat_indices,
419 ¶ms,
420 ClassificationCriterion::Gini,
421 );
422
423 (tree, feat_indices)
424 })
425 .collect();
426
427 let (trees, feature_indices): (Vec<_>, Vec<_>) = results.into_iter().unzip();
428 let feature_importances = decision_tree::aggregate_tree_importances(
429 &trees,
430 Some(&feature_indices),
431 None,
432 n_features,
433 );
434
435 Ok(FittedBaggingClassifier {
436 trees,
437 feature_indices,
438 classes,
439 n_features,
440 feature_importances,
441 })
442 }
443}
444
445impl<F: Float + Send + Sync + 'static> HasFeatureImportances<F> for FittedBaggingClassifier<F> {
446 fn feature_importances(&self) -> &Array1<F> {
447 &self.feature_importances
448 }
449}
450
451impl<F: Float + Send + Sync + 'static> Predict<Array2<F>> for FittedBaggingClassifier<F> {
452 type Output = Array1<usize>;
453 type Error = FerroError;
454
455 fn predict(&self, x: &Array2<F>) -> Result<Array1<usize>, FerroError> {
462 if x.ncols() != self.n_features {
463 return Err(FerroError::ShapeMismatch {
464 expected: vec![self.n_features],
465 actual: vec![x.ncols()],
466 context: "number of features must match fitted model".into(),
467 });
468 }
469
470 let n_samples = x.nrows();
471 let n_classes = self.classes.len();
472 let mut predictions = Array1::zeros(n_samples);
473
474 for i in 0..n_samples {
475 let row = x.row(i);
476 let mut votes = vec![0usize; n_classes];
477
478 for (t, tree_nodes) in self.trees.iter().enumerate() {
479 let feat_idx = &self.feature_indices[t];
481 let sub_row: Vec<F> = feat_idx.iter().map(|&fi| row[fi]).collect();
482 let sub_view = ndarray::Array1::from(sub_row);
483
484 let leaf_idx = decision_tree::traverse(tree_nodes, &sub_view.view());
485 if let Node::Leaf { value, .. } = tree_nodes[leaf_idx] {
486 let class_idx = value.to_f64().map(|f| f.round() as usize).unwrap_or(0);
487 if class_idx < n_classes {
488 votes[class_idx] += 1;
489 }
490 }
491 }
492
493 let winner = votes
494 .iter()
495 .enumerate()
496 .max_by_key(|&(_, &count)| count)
497 .map(|(idx, _)| idx)
498 .unwrap_or(0);
499 predictions[i] = self.classes[winner];
500 }
501
502 Ok(predictions)
503 }
504}
505
506impl<F: Float + Send + Sync + 'static> HasClasses for FittedBaggingClassifier<F> {
507 fn classes(&self) -> &[usize] {
508 &self.classes
509 }
510
511 fn n_classes(&self) -> usize {
512 self.classes.len()
513 }
514}
515
516impl<F: Float + ToPrimitive + FromPrimitive + Send + Sync + 'static> PipelineEstimator<F>
518 for BaggingClassifier<F>
519{
520 fn fit_pipeline(
521 &self,
522 x: &Array2<F>,
523 y: &Array1<F>,
524 ) -> Result<Box<dyn FittedPipelineEstimator<F>>, FerroError> {
525 let y_usize: Array1<usize> = y.mapv(|v| v.to_usize().unwrap_or(0));
526 let fitted = self.fit(x, &y_usize)?;
527 Ok(Box::new(FittedBaggingClassifierPipelineAdapter(fitted)))
528 }
529}
530
531struct FittedBaggingClassifierPipelineAdapter<F: Float + Send + Sync + 'static>(
533 FittedBaggingClassifier<F>,
534);
535
536impl<F: Float + ToPrimitive + FromPrimitive + Send + Sync + 'static> FittedPipelineEstimator<F>
537 for FittedBaggingClassifierPipelineAdapter<F>
538{
539 fn predict_pipeline(&self, x: &Array2<F>) -> Result<Array1<F>, FerroError> {
540 let preds = self.0.predict(x)?;
541 Ok(preds.mapv(|v| F::from_usize(v).unwrap_or_else(F::nan)))
542 }
543}
544
545#[derive(Debug, Clone)]
578pub struct BaggingRegressor<F> {
579 pub n_estimators: usize,
581 pub max_samples: f64,
583 pub max_features: f64,
585 pub bootstrap: bool,
587 pub bootstrap_features: bool,
589 pub random_state: Option<u64>,
591 pub max_depth: Option<usize>,
593 _marker: std::marker::PhantomData<F>,
594}
595
596impl<F: Float> BaggingRegressor<F> {
597 #[must_use]
604 pub fn new() -> Self {
605 Self {
606 n_estimators: 10,
607 max_samples: 1.0,
608 max_features: 1.0,
609 bootstrap: true,
610 bootstrap_features: false,
611 random_state: None,
612 max_depth: None,
613 _marker: std::marker::PhantomData,
614 }
615 }
616
617 #[must_use]
619 pub fn with_n_estimators(mut self, n: usize) -> Self {
620 self.n_estimators = n;
621 self
622 }
623
624 #[must_use]
626 pub fn with_max_samples(mut self, frac: f64) -> Self {
627 self.max_samples = frac;
628 self
629 }
630
631 #[must_use]
633 pub fn with_max_features(mut self, frac: f64) -> Self {
634 self.max_features = frac;
635 self
636 }
637
638 #[must_use]
640 pub fn with_bootstrap(mut self, bootstrap: bool) -> Self {
641 self.bootstrap = bootstrap;
642 self
643 }
644
645 #[must_use]
647 pub fn with_bootstrap_features(mut self, bootstrap_features: bool) -> Self {
648 self.bootstrap_features = bootstrap_features;
649 self
650 }
651
652 #[must_use]
654 pub fn with_random_state(mut self, seed: u64) -> Self {
655 self.random_state = Some(seed);
656 self
657 }
658
659 #[must_use]
661 pub fn with_max_depth(mut self, max_depth: Option<usize>) -> Self {
662 self.max_depth = max_depth;
663 self
664 }
665}
666
667impl<F: Float> Default for BaggingRegressor<F> {
668 fn default() -> Self {
669 Self::new()
670 }
671}
672
673#[derive(Debug, Clone)]
682pub struct FittedBaggingRegressor<F> {
683 trees: Vec<Vec<Node<F>>>,
685 feature_indices: Vec<Vec<usize>>,
687 n_features: usize,
689 feature_importances: Array1<F>,
692}
693
694impl<F: Float + Send + Sync + 'static> HasFeatureImportances<F> for FittedBaggingRegressor<F> {
695 fn feature_importances(&self) -> &Array1<F> {
696 &self.feature_importances
697 }
698}
699
700impl<F: Float + Send + Sync + 'static> FittedBaggingRegressor<F> {
701 #[must_use]
703 pub fn trees(&self) -> &[Vec<Node<F>>] {
704 &self.trees
705 }
706
707 #[must_use]
709 pub fn n_features(&self) -> usize {
710 self.n_features
711 }
712
713 pub fn score(&self, x: &Array2<F>, y: &Array1<F>) -> Result<F, FerroError> {
721 if x.nrows() != y.len() {
722 return Err(FerroError::ShapeMismatch {
723 expected: vec![x.nrows()],
724 actual: vec![y.len()],
725 context: "y length must match number of samples in X".into(),
726 });
727 }
728 let preds = self.predict(x)?;
729 Ok(crate::r2_score(&preds, y))
730 }
731}
732
733impl<F: Float + Send + Sync + 'static> Fit<Array2<F>, Array1<F>> for BaggingRegressor<F> {
734 type Fitted = FittedBaggingRegressor<F>;
735 type Error = FerroError;
736
737 fn fit(&self, x: &Array2<F>, y: &Array1<F>) -> Result<FittedBaggingRegressor<F>, FerroError> {
746 let (n_samples, n_features) = x.dim();
747
748 if n_samples != y.len() {
749 return Err(FerroError::ShapeMismatch {
750 expected: vec![n_samples],
751 actual: vec![y.len()],
752 context: "y length must match number of samples in X".into(),
753 });
754 }
755 if n_samples == 0 {
756 return Err(FerroError::InsufficientSamples {
757 required: 1,
758 actual: 0,
759 context: "BaggingRegressor requires at least one sample".into(),
760 });
761 }
762 if self.n_estimators == 0 {
763 return Err(FerroError::InvalidParameter {
764 name: "n_estimators".into(),
765 reason: "must be at least 1".into(),
766 });
767 }
768 if self.max_samples <= 0.0 || self.max_samples > 1.0 {
769 return Err(FerroError::InvalidParameter {
770 name: "max_samples".into(),
771 reason: "must be in (0.0, 1.0]".into(),
772 });
773 }
774 if self.max_features <= 0.0 || self.max_features > 1.0 {
775 return Err(FerroError::InvalidParameter {
776 name: "max_features".into(),
777 reason: "must be in (0.0, 1.0]".into(),
778 });
779 }
780
781 let n_sample_draw = ((n_samples as f64) * self.max_samples).ceil().max(1.0) as usize;
782 let n_feature_draw = ((n_features as f64) * self.max_features).ceil().max(1.0) as usize;
783 let n_feature_draw = n_feature_draw.min(n_features);
784
785 let params = decision_tree::TreeParams {
786 max_depth: self.max_depth,
787 min_samples_split: 2,
788 min_samples_leaf: 1,
789 };
790 let bootstrap = self.bootstrap;
791 let bootstrap_features = self.bootstrap_features;
792
793 let tree_seeds: Vec<u64> = if let Some(seed) = self.random_state {
795 let mut master_rng = StdRng::seed_from_u64(seed);
796 (0..self.n_estimators)
797 .map(|_| {
798 use rand::RngCore;
799 master_rng.next_u64()
800 })
801 .collect()
802 } else {
803 (0..self.n_estimators)
804 .map(|_| {
805 use rand::RngCore;
806 rand::rng().next_u64()
807 })
808 .collect()
809 };
810
811 let results: Vec<(Vec<Node<F>>, Vec<usize>)> = tree_seeds
813 .par_iter()
814 .map(|&seed| {
815 let mut rng = StdRng::seed_from_u64(seed);
816
817 let sample_indices: Vec<usize> = if bootstrap {
819 (0..n_sample_draw)
820 .map(|_| {
821 use rand::RngCore;
822 (rng.next_u64() as usize) % n_samples
823 })
824 .collect()
825 } else {
826 rand_sample_indices(&mut rng, n_samples, n_sample_draw).into_vec()
827 };
828
829 let feat_indices: Vec<usize> = if bootstrap_features {
831 (0..n_feature_draw)
832 .map(|_| {
833 use rand::RngCore;
834 (rng.next_u64() as usize) % n_features
835 })
836 .collect()
837 } else if n_feature_draw == n_features {
838 (0..n_features).collect()
839 } else {
840 rand_sample_indices(&mut rng, n_features, n_feature_draw).into_vec()
841 };
842
843 let tree = build_regression_tree_with_feature_subset(
844 x,
845 y,
846 &sample_indices,
847 &feat_indices,
848 ¶ms,
849 );
850
851 (tree, feat_indices)
852 })
853 .collect();
854
855 let (trees, feature_indices): (Vec<_>, Vec<_>) = results.into_iter().unzip();
856 let feature_importances = decision_tree::aggregate_tree_importances(
857 &trees,
858 Some(&feature_indices),
859 None,
860 n_features,
861 );
862
863 Ok(FittedBaggingRegressor {
864 trees,
865 feature_indices,
866 n_features,
867 feature_importances,
868 })
869 }
870}
871
872impl<F: Float + Send + Sync + 'static> Predict<Array2<F>> for FittedBaggingRegressor<F> {
873 type Output = Array1<F>;
874 type Error = FerroError;
875
876 fn predict(&self, x: &Array2<F>) -> Result<Array1<F>, FerroError> {
883 if x.ncols() != self.n_features {
884 return Err(FerroError::ShapeMismatch {
885 expected: vec![self.n_features],
886 actual: vec![x.ncols()],
887 context: "number of features must match fitted model".into(),
888 });
889 }
890
891 let n_samples = x.nrows();
892 let n_trees_f = F::from(self.trees.len()).unwrap();
893 let mut predictions = Array1::zeros(n_samples);
894
895 for i in 0..n_samples {
896 let row = x.row(i);
897 let mut sum = F::zero();
898
899 for (t, tree_nodes) in self.trees.iter().enumerate() {
900 let feat_idx = &self.feature_indices[t];
901 let sub_row: Vec<F> = feat_idx.iter().map(|&fi| row[fi]).collect();
902 let sub_view = ndarray::Array1::from(sub_row);
903
904 let leaf_idx = decision_tree::traverse(tree_nodes, &sub_view.view());
905 if let Node::Leaf { value, .. } = tree_nodes[leaf_idx] {
906 sum = sum + value;
907 }
908 }
909
910 predictions[i] = sum / n_trees_f;
911 }
912
913 Ok(predictions)
914 }
915}
916
917impl<F: Float + Send + Sync + 'static> PipelineEstimator<F> for BaggingRegressor<F> {
919 fn fit_pipeline(
920 &self,
921 x: &Array2<F>,
922 y: &Array1<F>,
923 ) -> Result<Box<dyn FittedPipelineEstimator<F>>, FerroError> {
924 let fitted = self.fit(x, y)?;
925 Ok(Box::new(fitted))
926 }
927}
928
929impl<F: Float + Send + Sync + 'static> FittedPipelineEstimator<F> for FittedBaggingRegressor<F> {
930 fn predict_pipeline(&self, x: &Array2<F>) -> Result<Array1<F>, FerroError> {
931 self.predict(x)
932 }
933}
934
935#[cfg(test)]
940mod tests {
941 use super::*;
942 use ndarray::array;
943
944 #[test]
947 fn test_bagging_classifier_simple() {
948 let x = Array2::from_shape_vec(
949 (8, 2),
950 vec![
951 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0, 8.0, 9.0,
952 ],
953 )
954 .unwrap();
955 let y = array![0, 0, 0, 0, 1, 1, 1, 1];
956
957 let model = BaggingClassifier::<f64>::new()
958 .with_n_estimators(20)
959 .with_random_state(42);
960 let fitted = model.fit(&x, &y).unwrap();
961 let preds = fitted.predict(&x).unwrap();
962
963 assert_eq!(preds.len(), 8);
964 for i in 0..4 {
965 assert_eq!(preds[i], 0);
966 }
967 for i in 4..8 {
968 assert_eq!(preds[i], 1);
969 }
970 }
971
972 #[test]
973 fn test_bagging_classifier_reproducibility() {
974 let x = Array2::from_shape_vec(
975 (8, 2),
976 vec![
977 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0, 8.0, 9.0,
978 ],
979 )
980 .unwrap();
981 let y = array![0, 0, 0, 0, 1, 1, 1, 1];
982
983 let model = BaggingClassifier::<f64>::new()
984 .with_n_estimators(10)
985 .with_random_state(123);
986
987 let fitted1 = model.fit(&x, &y).unwrap();
988 let fitted2 = model.fit(&x, &y).unwrap();
989
990 let preds1 = fitted1.predict(&x).unwrap();
991 let preds2 = fitted2.predict(&x).unwrap();
992
993 assert_eq!(preds1, preds2);
994 }
995
996 #[test]
997 fn test_bagging_classifier_has_classes() {
998 let x = Array2::from_shape_vec(
999 (6, 2),
1000 vec![1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0],
1001 )
1002 .unwrap();
1003 let y = array![0, 0, 0, 1, 1, 1];
1004
1005 let model = BaggingClassifier::<f64>::new()
1006 .with_n_estimators(5)
1007 .with_random_state(42);
1008 let fitted = model.fit(&x, &y).unwrap();
1009
1010 assert_eq!(fitted.classes(), &[0, 1]);
1011 assert_eq!(fitted.n_classes(), 2);
1012 }
1013
1014 #[test]
1015 fn test_bagging_classifier_feature_subsample() {
1016 let x = Array2::from_shape_vec(
1017 (8, 4),
1018 vec![
1019 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0,
1020 5.0, 0.0, 0.0, 0.0, 6.0, 0.0, 0.0, 0.0, 7.0, 0.0, 0.0, 0.0, 8.0, 0.0, 0.0, 0.0,
1021 ],
1022 )
1023 .unwrap();
1024 let y = array![0, 0, 0, 0, 1, 1, 1, 1];
1025
1026 let model = BaggingClassifier::<f64>::new()
1027 .with_n_estimators(20)
1028 .with_max_features(0.5)
1029 .with_random_state(42);
1030 let fitted = model.fit(&x, &y).unwrap();
1031 let preds = fitted.predict(&x).unwrap();
1032
1033 assert_eq!(preds.len(), 8);
1034 }
1035
1036 #[test]
1037 fn test_bagging_classifier_no_bootstrap() {
1038 let x = Array2::from_shape_vec(
1039 (8, 2),
1040 vec![
1041 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0, 8.0, 9.0,
1042 ],
1043 )
1044 .unwrap();
1045 let y = array![0, 0, 0, 0, 1, 1, 1, 1];
1046
1047 let model = BaggingClassifier::<f64>::new()
1048 .with_n_estimators(10)
1049 .with_bootstrap(false)
1050 .with_random_state(42);
1051 let fitted = model.fit(&x, &y).unwrap();
1052 let preds = fitted.predict(&x).unwrap();
1053
1054 assert_eq!(preds.len(), 8);
1055 for i in 0..4 {
1056 assert_eq!(preds[i], 0);
1057 }
1058 for i in 4..8 {
1059 assert_eq!(preds[i], 1);
1060 }
1061 }
1062
1063 #[test]
1064 fn test_bagging_classifier_shape_mismatch() {
1065 let x =
1066 Array2::from_shape_vec((4, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
1067 let y = array![0, 0, 1];
1068
1069 let model = BaggingClassifier::<f64>::new();
1070 assert!(model.fit(&x, &y).is_err());
1071 }
1072
1073 #[test]
1074 fn test_bagging_classifier_empty_data() {
1075 let x = Array2::<f64>::zeros((0, 2));
1076 let y = Array1::<usize>::zeros(0);
1077
1078 let model = BaggingClassifier::<f64>::new();
1079 assert!(model.fit(&x, &y).is_err());
1080 }
1081
1082 #[test]
1083 fn test_bagging_classifier_invalid_max_samples() {
1084 let x =
1085 Array2::from_shape_vec((4, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
1086 let y = array![0, 0, 1, 1];
1087
1088 let model = BaggingClassifier::<f64>::new().with_max_samples(0.0);
1089 assert!(model.fit(&x, &y).is_err());
1090
1091 let model = BaggingClassifier::<f64>::new().with_max_samples(1.5);
1092 assert!(model.fit(&x, &y).is_err());
1093 }
1094
1095 #[test]
1096 fn test_bagging_classifier_predict_shape_mismatch() {
1097 let x_train = Array2::from_shape_vec(
1098 (6, 2),
1099 vec![1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0],
1100 )
1101 .unwrap();
1102 let y_train = array![0, 0, 0, 1, 1, 1];
1103
1104 let model = BaggingClassifier::<f64>::new()
1105 .with_n_estimators(5)
1106 .with_random_state(42);
1107 let fitted = model.fit(&x_train, &y_train).unwrap();
1108
1109 let x_bad = Array2::from_shape_vec((2, 3), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1110 assert!(fitted.predict(&x_bad).is_err());
1111 }
1112
1113 #[test]
1114 fn test_bagging_classifier_multiclass() {
1115 let x = Array2::from_shape_vec((9, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
1116 .unwrap();
1117 let y = array![0, 0, 0, 1, 1, 1, 2, 2, 2];
1118
1119 let model = BaggingClassifier::<f64>::new()
1120 .with_n_estimators(20)
1121 .with_random_state(42);
1122 let fitted = model.fit(&x, &y).unwrap();
1123 let preds = fitted.predict(&x).unwrap();
1124
1125 assert_eq!(preds.len(), 9);
1126 assert_eq!(fitted.n_classes(), 3);
1127 }
1128
1129 #[test]
1130 fn test_bagging_classifier_with_max_depth() {
1131 let x = Array2::from_shape_vec(
1132 (8, 2),
1133 vec![
1134 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0, 8.0, 9.0,
1135 ],
1136 )
1137 .unwrap();
1138 let y = array![0, 0, 0, 0, 1, 1, 1, 1];
1139
1140 let model = BaggingClassifier::<f64>::new()
1141 .with_n_estimators(20)
1142 .with_max_depth(Some(2))
1143 .with_random_state(42);
1144 let fitted = model.fit(&x, &y).unwrap();
1145 let preds = fitted.predict(&x).unwrap();
1146
1147 assert_eq!(preds.len(), 8);
1148 }
1149
1150 #[test]
1153 fn test_bagging_regressor_simple() {
1154 let x = Array2::from_shape_vec((6, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1155 let y = Array1::from(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
1156
1157 let model = BaggingRegressor::<f64>::new()
1158 .with_n_estimators(20)
1159 .with_random_state(42);
1160 let fitted = model.fit(&x, &y).unwrap();
1161 let preds = fitted.predict(&x).unwrap();
1162
1163 assert_eq!(preds.len(), 6);
1164 for i in 0..6 {
1166 assert!((preds[i] - y[i]).abs() < 2.0);
1167 }
1168 }
1169
1170 #[test]
1171 fn test_bagging_regressor_reproducibility() {
1172 let x = Array2::from_shape_vec((6, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1173 let y = Array1::from(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
1174
1175 let model = BaggingRegressor::<f64>::new()
1176 .with_n_estimators(10)
1177 .with_random_state(123);
1178
1179 let fitted1 = model.fit(&x, &y).unwrap();
1180 let fitted2 = model.fit(&x, &y).unwrap();
1181
1182 let preds1 = fitted1.predict(&x).unwrap();
1183 let preds2 = fitted2.predict(&x).unwrap();
1184
1185 assert_eq!(preds1, preds2);
1186 }
1187
1188 #[test]
1189 fn test_bagging_regressor_shape_mismatch() {
1190 let x =
1191 Array2::from_shape_vec((4, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
1192 let y = Array1::from(vec![1.0, 2.0, 3.0]);
1193
1194 let model = BaggingRegressor::<f64>::new();
1195 assert!(model.fit(&x, &y).is_err());
1196 }
1197
1198 #[test]
1199 fn test_bagging_regressor_empty_data() {
1200 let x = Array2::<f64>::zeros((0, 2));
1201 let y = Array1::<f64>::zeros(0);
1202
1203 let model = BaggingRegressor::<f64>::new();
1204 assert!(model.fit(&x, &y).is_err());
1205 }
1206
1207 #[test]
1208 fn test_bagging_regressor_predict_shape_mismatch() {
1209 let x_train = Array2::from_shape_vec(
1210 (6, 2),
1211 vec![1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0],
1212 )
1213 .unwrap();
1214 let y_train = Array1::from(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
1215
1216 let model = BaggingRegressor::<f64>::new()
1217 .with_n_estimators(5)
1218 .with_random_state(42);
1219 let fitted = model.fit(&x_train, &y_train).unwrap();
1220
1221 let x_bad = Array2::from_shape_vec((2, 3), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1222 assert!(fitted.predict(&x_bad).is_err());
1223 }
1224
1225 #[test]
1226 fn test_bagging_regressor_feature_subsample() {
1227 let x = Array2::from_shape_vec(
1228 (8, 4),
1229 vec![
1230 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0,
1231 5.0, 0.0, 0.0, 0.0, 6.0, 0.0, 0.0, 0.0, 7.0, 0.0, 0.0, 0.0, 8.0, 0.0, 0.0, 0.0,
1232 ],
1233 )
1234 .unwrap();
1235 let y = Array1::from(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);
1236
1237 let model = BaggingRegressor::<f64>::new()
1238 .with_n_estimators(20)
1239 .with_max_features(0.5)
1240 .with_random_state(42);
1241 let fitted = model.fit(&x, &y).unwrap();
1242 let preds = fitted.predict(&x).unwrap();
1243
1244 assert_eq!(preds.len(), 8);
1245 }
1246
1247 #[test]
1248 fn test_bagging_regressor_with_max_depth() {
1249 let x = Array2::from_shape_vec((6, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1250 let y = Array1::from(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
1251
1252 let model = BaggingRegressor::<f64>::new()
1253 .with_n_estimators(20)
1254 .with_max_depth(Some(2))
1255 .with_random_state(42);
1256 let fitted = model.fit(&x, &y).unwrap();
1257 let preds = fitted.predict(&x).unwrap();
1258
1259 assert_eq!(preds.len(), 6);
1260 }
1261
1262 #[test]
1263 fn test_bagging_classifier_default() {
1264 let model = BaggingClassifier::<f64>::default();
1265 assert_eq!(model.n_estimators, 10);
1266 assert!((model.max_samples - 1.0).abs() < f64::EPSILON);
1267 assert!((model.max_features - 1.0).abs() < f64::EPSILON);
1268 assert!(model.bootstrap);
1269 assert!(!model.bootstrap_features);
1270 assert!(model.random_state.is_none());
1271 assert!(model.max_depth.is_none());
1272 }
1273
1274 #[test]
1275 fn test_bagging_regressor_default() {
1276 let model = BaggingRegressor::<f64>::default();
1277 assert_eq!(model.n_estimators, 10);
1278 assert!((model.max_samples - 1.0).abs() < f64::EPSILON);
1279 assert!((model.max_features - 1.0).abs() < f64::EPSILON);
1280 assert!(model.bootstrap);
1281 assert!(!model.bootstrap_features);
1282 assert!(model.random_state.is_none());
1283 assert!(model.max_depth.is_none());
1284 }
1285
1286 #[test]
1287 fn test_bagging_classifier_zero_estimators() {
1288 let x =
1289 Array2::from_shape_vec((4, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
1290 let y = array![0, 0, 1, 1];
1291
1292 let model = BaggingClassifier::<f64>::new().with_n_estimators(0);
1293 assert!(model.fit(&x, &y).is_err());
1294 }
1295
1296 #[test]
1297 fn test_bagging_regressor_zero_estimators() {
1298 let x =
1299 Array2::from_shape_vec((4, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
1300 let y = Array1::from(vec![1.0, 2.0, 3.0, 4.0]);
1301
1302 let model = BaggingRegressor::<f64>::new().with_n_estimators(0);
1303 assert!(model.fit(&x, &y).is_err());
1304 }
1305
1306 #[test]
1307 fn test_bagging_classifier_bootstrap_features() {
1308 let x = Array2::from_shape_vec(
1309 (8, 4),
1310 vec![
1311 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0,
1312 5.0, 0.0, 0.0, 0.0, 6.0, 0.0, 0.0, 0.0, 7.0, 0.0, 0.0, 0.0, 8.0, 0.0, 0.0, 0.0,
1313 ],
1314 )
1315 .unwrap();
1316 let y = array![0, 0, 0, 0, 1, 1, 1, 1];
1317
1318 let model = BaggingClassifier::<f64>::new()
1319 .with_n_estimators(10)
1320 .with_max_features(0.5)
1321 .with_bootstrap_features(true)
1322 .with_random_state(42);
1323 let fitted = model.fit(&x, &y).unwrap();
1324 let preds = fitted.predict(&x).unwrap();
1325
1326 assert_eq!(preds.len(), 8);
1327 }
1328
1329 #[test]
1330 fn test_bagging_regressor_no_bootstrap() {
1331 let x = Array2::from_shape_vec((6, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1332 let y = Array1::from(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
1333
1334 let model = BaggingRegressor::<f64>::new()
1335 .with_n_estimators(10)
1336 .with_bootstrap(false)
1337 .with_random_state(42);
1338 let fitted = model.fit(&x, &y).unwrap();
1339 let preds = fitted.predict(&x).unwrap();
1340
1341 assert_eq!(preds.len(), 6);
1342 }
1343
1344 #[test]
1345 fn test_bagging_classifier_max_samples_subsample() {
1346 let x = Array2::from_shape_vec(
1347 (8, 2),
1348 vec![
1349 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0, 8.0, 9.0,
1350 ],
1351 )
1352 .unwrap();
1353 let y = array![0, 0, 0, 0, 1, 1, 1, 1];
1354
1355 let model = BaggingClassifier::<f64>::new()
1356 .with_n_estimators(20)
1357 .with_max_samples(0.5)
1358 .with_random_state(42);
1359 let fitted = model.fit(&x, &y).unwrap();
1360 let preds = fitted.predict(&x).unwrap();
1361
1362 assert_eq!(preds.len(), 8);
1363 }
1364}