ferrolearn_tree/
bagging.rs

1//! Bagging classifiers and regressors.
2//!
3//! This module provides [`BaggingClassifier`] and [`BaggingRegressor`],
4//! which build ensembles of decision trees using bootstrap sampling of
5//! both samples and features. Trees are built in parallel via `rayon`.
6//!
7//! Unlike [`RandomForestClassifier`](crate::RandomForestClassifier) (which
8//! randomises features at each split), Bagging selects a random subset of
9//! features *per estimator* and trains each tree on the full selected feature
10//! set. The interface is designed to be generalised to arbitrary base
11//! estimators in the future.
12//!
13//! # Examples
14//!
15//! ```
16//! use ferrolearn_tree::BaggingClassifier;
17//! use ferrolearn_core::{Fit, Predict};
18//! use ndarray::{array, Array1, Array2};
19//!
20//! let x = Array2::from_shape_vec((8, 2), vec![
21//!     1.0, 2.0,  2.0, 3.0,  3.0, 3.0,  4.0, 4.0,
22//!     5.0, 6.0,  6.0, 7.0,  7.0, 8.0,  8.0, 9.0,
23//! ]).unwrap();
24//! let y = array![0, 0, 0, 0, 1, 1, 1, 1];
25//!
26//! let model = BaggingClassifier::<f64>::new()
27//!     .with_n_estimators(10)
28//!     .with_random_state(42);
29//! let fitted = model.fit(&x, &y).unwrap();
30//! let preds = fitted.predict(&x).unwrap();
31//! ```
32
33use crate::decision_tree::{
34    self, ClassificationCriterion, Node, build_classification_tree_with_feature_subset,
35    build_regression_tree_with_feature_subset,
36};
37use ferrolearn_core::error::FerroError;
38use ferrolearn_core::introspection::{HasClasses, HasFeatureImportances};
39use ferrolearn_core::pipeline::{FittedPipelineEstimator, PipelineEstimator};
40use ferrolearn_core::traits::{Fit, Predict};
41use ndarray::{Array1, Array2};
42use num_traits::{Float, FromPrimitive, ToPrimitive};
43use rand::SeedableRng;
44use rand::rngs::StdRng;
45use rand::seq::index::sample as rand_sample_indices;
46use rayon::prelude::*;
47
48// ---------------------------------------------------------------------------
49// BaggingClassifier
50// ---------------------------------------------------------------------------
51
52/// Bootstrap aggregation (bagging) classifier using decision trees.
53///
54/// Each estimator is trained on a bootstrap sample of the data, optionally
55/// with a random subset of features. Final predictions are made by majority
56/// vote across all estimators.
57///
58/// # Type Parameters
59///
60/// - `F`: The floating-point type (`f32` or `f64`).
61#[derive(Debug, Clone)]
62pub struct BaggingClassifier<F> {
63    /// Number of estimators in the ensemble.
64    pub n_estimators: usize,
65    /// Fraction of samples to draw for each estimator (default 1.0).
66    pub max_samples: f64,
67    /// Fraction of features to draw for each estimator (default 1.0).
68    pub max_features: f64,
69    /// Whether to sample with replacement (default `true`).
70    pub bootstrap: bool,
71    /// Whether to sample features with replacement (default `false`).
72    pub bootstrap_features: bool,
73    /// Random seed for reproducibility.
74    pub random_state: Option<u64>,
75    /// Maximum depth of each base decision tree. `None` means unlimited.
76    pub max_depth: Option<usize>,
77    _marker: std::marker::PhantomData<F>,
78}
79
80impl<F: Float> BaggingClassifier<F> {
81    /// Create a new `BaggingClassifier` with default settings.
82    ///
83    /// Defaults: `n_estimators = 10`, `max_samples = 1.0`,
84    /// `max_features = 1.0`, `bootstrap = true`,
85    /// `bootstrap_features = false`, `random_state = None`,
86    /// `max_depth = None`.
87    #[must_use]
88    pub fn new() -> Self {
89        Self {
90            n_estimators: 10,
91            max_samples: 1.0,
92            max_features: 1.0,
93            bootstrap: true,
94            bootstrap_features: false,
95            random_state: None,
96            max_depth: None,
97            _marker: std::marker::PhantomData,
98        }
99    }
100
101    /// Set the number of estimators.
102    #[must_use]
103    pub fn with_n_estimators(mut self, n: usize) -> Self {
104        self.n_estimators = n;
105        self
106    }
107
108    /// Set the fraction of samples to draw per estimator.
109    #[must_use]
110    pub fn with_max_samples(mut self, frac: f64) -> Self {
111        self.max_samples = frac;
112        self
113    }
114
115    /// Set the fraction of features to draw per estimator.
116    #[must_use]
117    pub fn with_max_features(mut self, frac: f64) -> Self {
118        self.max_features = frac;
119        self
120    }
121
122    /// Set whether to sample with replacement.
123    #[must_use]
124    pub fn with_bootstrap(mut self, bootstrap: bool) -> Self {
125        self.bootstrap = bootstrap;
126        self
127    }
128
129    /// Set whether to sample features with replacement.
130    #[must_use]
131    pub fn with_bootstrap_features(mut self, bootstrap_features: bool) -> Self {
132        self.bootstrap_features = bootstrap_features;
133        self
134    }
135
136    /// Set the random seed for reproducibility.
137    #[must_use]
138    pub fn with_random_state(mut self, seed: u64) -> Self {
139        self.random_state = Some(seed);
140        self
141    }
142
143    /// Set the maximum depth of each base decision tree.
144    #[must_use]
145    pub fn with_max_depth(mut self, max_depth: Option<usize>) -> Self {
146        self.max_depth = max_depth;
147        self
148    }
149}
150
151impl<F: Float> Default for BaggingClassifier<F> {
152    fn default() -> Self {
153        Self::new()
154    }
155}
156
157// ---------------------------------------------------------------------------
158// FittedBaggingClassifier
159// ---------------------------------------------------------------------------
160
161/// A fitted bagging classifier.
162///
163/// Stores the ensemble of fitted decision trees and aggregates their
164/// predictions by majority vote.
165#[derive(Debug, Clone)]
166pub struct FittedBaggingClassifier<F> {
167    /// Individual tree node vectors.
168    trees: Vec<Vec<Node<F>>>,
169    /// Feature indices used by each tree (maps tree features back to original).
170    feature_indices: Vec<Vec<usize>>,
171    /// Sorted unique class labels.
172    classes: Vec<usize>,
173    /// Number of features in the original data.
174    n_features: usize,
175    /// Per-feature importance scores aggregated across the ensemble
176    /// (normalized to sum to 1).
177    feature_importances: Array1<F>,
178}
179
180impl<F: Float + Send + Sync + 'static> FittedBaggingClassifier<F> {
181    /// Returns a reference to the individual tree node vectors.
182    #[must_use]
183    pub fn trees(&self) -> &[Vec<Node<F>>] {
184        &self.trees
185    }
186
187    /// Returns the number of features the model was trained on.
188    #[must_use]
189    pub fn n_features(&self) -> usize {
190        self.n_features
191    }
192
193    /// Mean accuracy on the given test data and labels.
194    /// Equivalent to sklearn's `ClassifierMixin.score`.
195    ///
196    /// # Errors
197    ///
198    /// Returns [`FerroError::ShapeMismatch`] if `x.nrows() != y.len()` or
199    /// the feature count does not match the training data.
200    pub fn score(&self, x: &Array2<F>, y: &Array1<usize>) -> Result<F, FerroError> {
201        if x.nrows() != y.len() {
202            return Err(FerroError::ShapeMismatch {
203                expected: vec![x.nrows()],
204                actual: vec![y.len()],
205                context: "y length must match number of samples in X".into(),
206            });
207        }
208        let preds = self.predict(x)?;
209        Ok(crate::mean_accuracy(&preds, y))
210    }
211
212    /// Predict class probabilities by averaging per-tree class
213    /// distributions across the bagged ensemble. Mirrors sklearn's
214    /// `BaggingClassifier.predict_proba`.
215    ///
216    /// Returns an `(n_samples, n_classes)` array. Each tree contributes
217    /// either its leaf's full class distribution or a one-hot vote based
218    /// on the leaf's predicted class. Each tree gets a row sub-set
219    /// according to the `feature_indices` it was trained on.
220    ///
221    /// # Errors
222    ///
223    /// Returns [`FerroError::ShapeMismatch`] if the number of features
224    /// does not match the fitted model.
225    pub fn predict_proba(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
226        if x.ncols() != self.n_features {
227            return Err(FerroError::ShapeMismatch {
228                expected: vec![self.n_features],
229                actual: vec![x.ncols()],
230                context: "number of features must match fitted model".into(),
231            });
232        }
233        let n_samples = x.nrows();
234        let n_classes = self.classes.len();
235        let n_trees_f = F::from(self.trees.len()).unwrap();
236        let mut proba = Array2::<F>::zeros((n_samples, n_classes));
237
238        for i in 0..n_samples {
239            let row = x.row(i);
240            for (t, tree_nodes) in self.trees.iter().enumerate() {
241                let feat_idx = &self.feature_indices[t];
242                let sub_row: Vec<F> = feat_idx.iter().map(|&fi| row[fi]).collect();
243                let sub_view = ndarray::Array1::from(sub_row);
244                let leaf_idx = decision_tree::traverse(tree_nodes, &sub_view.view());
245                match &tree_nodes[leaf_idx] {
246                    Node::Leaf {
247                        class_distribution: Some(dist),
248                        ..
249                    } => {
250                        for (j, &p) in dist.iter().enumerate().take(n_classes) {
251                            proba[[i, j]] = proba[[i, j]] + p;
252                        }
253                    }
254                    Node::Leaf { value, .. } => {
255                        let class_idx = value.to_f64().map_or(0, |f| f.round() as usize);
256                        if class_idx < n_classes {
257                            proba[[i, class_idx]] = proba[[i, class_idx]] + F::one();
258                        }
259                    }
260                    _ => {}
261                }
262            }
263            for j in 0..n_classes {
264                proba[[i, j]] = proba[[i, j]] / n_trees_f;
265            }
266        }
267        Ok(proba)
268    }
269
270    /// Element-wise log of [`predict_proba`](Self::predict_proba). Mirrors
271    /// sklearn's `ClassifierMixin.predict_log_proba`.
272    ///
273    /// # Errors
274    ///
275    /// Forwards any error from [`predict_proba`](Self::predict_proba).
276    pub fn predict_log_proba(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
277        let proba = self.predict_proba(x)?;
278        Ok(crate::log_proba(&proba))
279    }
280}
281
282impl<F: Float + Send + Sync + 'static> Fit<Array2<F>, Array1<usize>> for BaggingClassifier<F> {
283    type Fitted = FittedBaggingClassifier<F>;
284    type Error = FerroError;
285
286    /// Fit the bagging classifier by building `n_estimators` decision trees in parallel.
287    ///
288    /// Each tree is trained on a (bootstrap) sample of the data with a random
289    /// subset of features.
290    ///
291    /// # Errors
292    ///
293    /// Returns [`FerroError::ShapeMismatch`] if `x` and `y` have different
294    /// numbers of samples.
295    /// Returns [`FerroError::InsufficientSamples`] if there are no samples.
296    /// Returns [`FerroError::InvalidParameter`] for invalid hyperparameters.
297    fn fit(
298        &self,
299        x: &Array2<F>,
300        y: &Array1<usize>,
301    ) -> Result<FittedBaggingClassifier<F>, FerroError> {
302        let (n_samples, n_features) = x.dim();
303
304        if n_samples != y.len() {
305            return Err(FerroError::ShapeMismatch {
306                expected: vec![n_samples],
307                actual: vec![y.len()],
308                context: "y length must match number of samples in X".into(),
309            });
310        }
311        if n_samples == 0 {
312            return Err(FerroError::InsufficientSamples {
313                required: 1,
314                actual: 0,
315                context: "BaggingClassifier requires at least one sample".into(),
316            });
317        }
318        if self.n_estimators == 0 {
319            return Err(FerroError::InvalidParameter {
320                name: "n_estimators".into(),
321                reason: "must be at least 1".into(),
322            });
323        }
324        if self.max_samples <= 0.0 || self.max_samples > 1.0 {
325            return Err(FerroError::InvalidParameter {
326                name: "max_samples".into(),
327                reason: "must be in (0.0, 1.0]".into(),
328            });
329        }
330        if self.max_features <= 0.0 || self.max_features > 1.0 {
331            return Err(FerroError::InvalidParameter {
332                name: "max_features".into(),
333                reason: "must be in (0.0, 1.0]".into(),
334            });
335        }
336
337        // Determine unique classes.
338        let mut classes: Vec<usize> = y.iter().copied().collect();
339        classes.sort_unstable();
340        classes.dedup();
341        let n_classes = classes.len();
342
343        let y_mapped: Vec<usize> = y
344            .iter()
345            .map(|&c| classes.iter().position(|&cl| cl == c).unwrap())
346            .collect();
347
348        let n_sample_draw = ((n_samples as f64) * self.max_samples).ceil().max(1.0) as usize;
349        let n_feature_draw = ((n_features as f64) * self.max_features).ceil().max(1.0) as usize;
350        let n_feature_draw = n_feature_draw.min(n_features);
351
352        let params = decision_tree::TreeParams {
353            max_depth: self.max_depth,
354            min_samples_split: 2,
355            min_samples_leaf: 1,
356        };
357        let bootstrap = self.bootstrap;
358        let bootstrap_features = self.bootstrap_features;
359
360        // Generate per-tree seeds sequentially for determinism.
361        let tree_seeds: Vec<u64> = if let Some(seed) = self.random_state {
362            let mut master_rng = StdRng::seed_from_u64(seed);
363            (0..self.n_estimators)
364                .map(|_| {
365                    use rand::RngCore;
366                    master_rng.next_u64()
367                })
368                .collect()
369        } else {
370            (0..self.n_estimators)
371                .map(|_| {
372                    use rand::RngCore;
373                    rand::rng().next_u64()
374                })
375                .collect()
376        };
377
378        // Build trees in parallel.
379        let results: Vec<(Vec<Node<F>>, Vec<usize>)> = tree_seeds
380            .par_iter()
381            .map(|&seed| {
382                let mut rng = StdRng::seed_from_u64(seed);
383
384                // Sample indices.
385                let sample_indices: Vec<usize> = if bootstrap {
386                    // With replacement.
387                    (0..n_sample_draw)
388                        .map(|_| {
389                            use rand::RngCore;
390                            (rng.next_u64() as usize) % n_samples
391                        })
392                        .collect()
393                } else {
394                    // Without replacement.
395                    rand_sample_indices(&mut rng, n_samples, n_sample_draw).into_vec()
396                };
397
398                // Feature indices.
399                let feat_indices: Vec<usize> = if bootstrap_features {
400                    // With replacement.
401                    (0..n_feature_draw)
402                        .map(|_| {
403                            use rand::RngCore;
404                            (rng.next_u64() as usize) % n_features
405                        })
406                        .collect()
407                } else if n_feature_draw == n_features {
408                    (0..n_features).collect()
409                } else {
410                    rand_sample_indices(&mut rng, n_features, n_feature_draw).into_vec()
411                };
412
413                let tree = build_classification_tree_with_feature_subset(
414                    x,
415                    &y_mapped,
416                    n_classes,
417                    &sample_indices,
418                    &feat_indices,
419                    &params,
420                    ClassificationCriterion::Gini,
421                );
422
423                (tree, feat_indices)
424            })
425            .collect();
426
427        let (trees, feature_indices): (Vec<_>, Vec<_>) = results.into_iter().unzip();
428        let feature_importances = decision_tree::aggregate_tree_importances(
429            &trees,
430            Some(&feature_indices),
431            None,
432            n_features,
433        );
434
435        Ok(FittedBaggingClassifier {
436            trees,
437            feature_indices,
438            classes,
439            n_features,
440            feature_importances,
441        })
442    }
443}
444
445impl<F: Float + Send + Sync + 'static> HasFeatureImportances<F> for FittedBaggingClassifier<F> {
446    fn feature_importances(&self) -> &Array1<F> {
447        &self.feature_importances
448    }
449}
450
451impl<F: Float + Send + Sync + 'static> Predict<Array2<F>> for FittedBaggingClassifier<F> {
452    type Output = Array1<usize>;
453    type Error = FerroError;
454
455    /// Predict class labels by majority vote across all trees.
456    ///
457    /// # Errors
458    ///
459    /// Returns [`FerroError::ShapeMismatch`] if the number of features does
460    /// not match the fitted model.
461    fn predict(&self, x: &Array2<F>) -> Result<Array1<usize>, FerroError> {
462        if x.ncols() != self.n_features {
463            return Err(FerroError::ShapeMismatch {
464                expected: vec![self.n_features],
465                actual: vec![x.ncols()],
466                context: "number of features must match fitted model".into(),
467            });
468        }
469
470        let n_samples = x.nrows();
471        let n_classes = self.classes.len();
472        let mut predictions = Array1::zeros(n_samples);
473
474        for i in 0..n_samples {
475            let row = x.row(i);
476            let mut votes = vec![0usize; n_classes];
477
478            for (t, tree_nodes) in self.trees.iter().enumerate() {
479                // Build a subsetted row using only the features this tree was trained on.
480                let feat_idx = &self.feature_indices[t];
481                let sub_row: Vec<F> = feat_idx.iter().map(|&fi| row[fi]).collect();
482                let sub_view = ndarray::Array1::from(sub_row);
483
484                let leaf_idx = decision_tree::traverse(tree_nodes, &sub_view.view());
485                if let Node::Leaf { value, .. } = tree_nodes[leaf_idx] {
486                    let class_idx = value.to_f64().map(|f| f.round() as usize).unwrap_or(0);
487                    if class_idx < n_classes {
488                        votes[class_idx] += 1;
489                    }
490                }
491            }
492
493            let winner = votes
494                .iter()
495                .enumerate()
496                .max_by_key(|&(_, &count)| count)
497                .map(|(idx, _)| idx)
498                .unwrap_or(0);
499            predictions[i] = self.classes[winner];
500        }
501
502        Ok(predictions)
503    }
504}
505
506impl<F: Float + Send + Sync + 'static> HasClasses for FittedBaggingClassifier<F> {
507    fn classes(&self) -> &[usize] {
508        &self.classes
509    }
510
511    fn n_classes(&self) -> usize {
512        self.classes.len()
513    }
514}
515
516// Pipeline integration.
517impl<F: Float + ToPrimitive + FromPrimitive + Send + Sync + 'static> PipelineEstimator<F>
518    for BaggingClassifier<F>
519{
520    fn fit_pipeline(
521        &self,
522        x: &Array2<F>,
523        y: &Array1<F>,
524    ) -> Result<Box<dyn FittedPipelineEstimator<F>>, FerroError> {
525        let y_usize: Array1<usize> = y.mapv(|v| v.to_usize().unwrap_or(0));
526        let fitted = self.fit(x, &y_usize)?;
527        Ok(Box::new(FittedBaggingClassifierPipelineAdapter(fitted)))
528    }
529}
530
531/// Pipeline adapter for `FittedBaggingClassifier<F>`.
532struct FittedBaggingClassifierPipelineAdapter<F: Float + Send + Sync + 'static>(
533    FittedBaggingClassifier<F>,
534);
535
536impl<F: Float + ToPrimitive + FromPrimitive + Send + Sync + 'static> FittedPipelineEstimator<F>
537    for FittedBaggingClassifierPipelineAdapter<F>
538{
539    fn predict_pipeline(&self, x: &Array2<F>) -> Result<Array1<F>, FerroError> {
540        let preds = self.0.predict(x)?;
541        Ok(preds.mapv(|v| F::from_usize(v).unwrap_or_else(F::nan)))
542    }
543}
544
545// ---------------------------------------------------------------------------
546// BaggingRegressor
547// ---------------------------------------------------------------------------
548
549/// Bootstrap aggregation (bagging) regressor using decision trees.
550///
551/// Each estimator is trained on a bootstrap sample of the data, optionally
552/// with a random subset of features. Final predictions are the mean across
553/// all estimators.
554///
555/// # Type Parameters
556///
557/// - `F`: The floating-point type (`f32` or `f64`).
558///
559/// # Examples
560///
561/// ```
562/// use ferrolearn_tree::BaggingRegressor;
563/// use ferrolearn_core::{Fit, Predict};
564/// use ndarray::{Array1, Array2};
565///
566/// let x = Array2::from_shape_vec((6, 1), vec![
567///     1.0, 2.0, 3.0, 4.0, 5.0, 6.0,
568/// ]).unwrap();
569/// let y = Array1::from(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
570///
571/// let model = BaggingRegressor::<f64>::new()
572///     .with_n_estimators(10)
573///     .with_random_state(42);
574/// let fitted = model.fit(&x, &y).unwrap();
575/// let preds = fitted.predict(&x).unwrap();
576/// ```
577#[derive(Debug, Clone)]
578pub struct BaggingRegressor<F> {
579    /// Number of estimators in the ensemble.
580    pub n_estimators: usize,
581    /// Fraction of samples to draw for each estimator (default 1.0).
582    pub max_samples: f64,
583    /// Fraction of features to draw for each estimator (default 1.0).
584    pub max_features: f64,
585    /// Whether to sample with replacement (default `true`).
586    pub bootstrap: bool,
587    /// Whether to sample features with replacement (default `false`).
588    pub bootstrap_features: bool,
589    /// Random seed for reproducibility.
590    pub random_state: Option<u64>,
591    /// Maximum depth of each base decision tree. `None` means unlimited.
592    pub max_depth: Option<usize>,
593    _marker: std::marker::PhantomData<F>,
594}
595
596impl<F: Float> BaggingRegressor<F> {
597    /// Create a new `BaggingRegressor` with default settings.
598    ///
599    /// Defaults: `n_estimators = 10`, `max_samples = 1.0`,
600    /// `max_features = 1.0`, `bootstrap = true`,
601    /// `bootstrap_features = false`, `random_state = None`,
602    /// `max_depth = None`.
603    #[must_use]
604    pub fn new() -> Self {
605        Self {
606            n_estimators: 10,
607            max_samples: 1.0,
608            max_features: 1.0,
609            bootstrap: true,
610            bootstrap_features: false,
611            random_state: None,
612            max_depth: None,
613            _marker: std::marker::PhantomData,
614        }
615    }
616
617    /// Set the number of estimators.
618    #[must_use]
619    pub fn with_n_estimators(mut self, n: usize) -> Self {
620        self.n_estimators = n;
621        self
622    }
623
624    /// Set the fraction of samples to draw per estimator.
625    #[must_use]
626    pub fn with_max_samples(mut self, frac: f64) -> Self {
627        self.max_samples = frac;
628        self
629    }
630
631    /// Set the fraction of features to draw per estimator.
632    #[must_use]
633    pub fn with_max_features(mut self, frac: f64) -> Self {
634        self.max_features = frac;
635        self
636    }
637
638    /// Set whether to sample with replacement.
639    #[must_use]
640    pub fn with_bootstrap(mut self, bootstrap: bool) -> Self {
641        self.bootstrap = bootstrap;
642        self
643    }
644
645    /// Set whether to sample features with replacement.
646    #[must_use]
647    pub fn with_bootstrap_features(mut self, bootstrap_features: bool) -> Self {
648        self.bootstrap_features = bootstrap_features;
649        self
650    }
651
652    /// Set the random seed for reproducibility.
653    #[must_use]
654    pub fn with_random_state(mut self, seed: u64) -> Self {
655        self.random_state = Some(seed);
656        self
657    }
658
659    /// Set the maximum depth of each base decision tree.
660    #[must_use]
661    pub fn with_max_depth(mut self, max_depth: Option<usize>) -> Self {
662        self.max_depth = max_depth;
663        self
664    }
665}
666
667impl<F: Float> Default for BaggingRegressor<F> {
668    fn default() -> Self {
669        Self::new()
670    }
671}
672
673// ---------------------------------------------------------------------------
674// FittedBaggingRegressor
675// ---------------------------------------------------------------------------
676
677/// A fitted bagging regressor.
678///
679/// Stores the ensemble of fitted decision trees and aggregates their
680/// predictions by averaging.
681#[derive(Debug, Clone)]
682pub struct FittedBaggingRegressor<F> {
683    /// Individual tree node vectors.
684    trees: Vec<Vec<Node<F>>>,
685    /// Feature indices used by each tree.
686    feature_indices: Vec<Vec<usize>>,
687    /// Number of features in the original data.
688    n_features: usize,
689    /// Per-feature importance scores aggregated across the ensemble
690    /// (normalized to sum to 1).
691    feature_importances: Array1<F>,
692}
693
694impl<F: Float + Send + Sync + 'static> HasFeatureImportances<F> for FittedBaggingRegressor<F> {
695    fn feature_importances(&self) -> &Array1<F> {
696        &self.feature_importances
697    }
698}
699
700impl<F: Float + Send + Sync + 'static> FittedBaggingRegressor<F> {
701    /// Returns a reference to the individual tree node vectors.
702    #[must_use]
703    pub fn trees(&self) -> &[Vec<Node<F>>] {
704        &self.trees
705    }
706
707    /// Returns the number of features the model was trained on.
708    #[must_use]
709    pub fn n_features(&self) -> usize {
710        self.n_features
711    }
712
713    /// R² coefficient of determination on the given test data.
714    /// Equivalent to sklearn's `RegressorMixin.score`.
715    ///
716    /// # Errors
717    ///
718    /// Returns [`FerroError::ShapeMismatch`] if `x.nrows() != y.len()` or
719    /// the feature count does not match the training data.
720    pub fn score(&self, x: &Array2<F>, y: &Array1<F>) -> Result<F, FerroError> {
721        if x.nrows() != y.len() {
722            return Err(FerroError::ShapeMismatch {
723                expected: vec![x.nrows()],
724                actual: vec![y.len()],
725                context: "y length must match number of samples in X".into(),
726            });
727        }
728        let preds = self.predict(x)?;
729        Ok(crate::r2_score(&preds, y))
730    }
731}
732
733impl<F: Float + Send + Sync + 'static> Fit<Array2<F>, Array1<F>> for BaggingRegressor<F> {
734    type Fitted = FittedBaggingRegressor<F>;
735    type Error = FerroError;
736
737    /// Fit the bagging regressor by building `n_estimators` decision trees in parallel.
738    ///
739    /// # Errors
740    ///
741    /// Returns [`FerroError::ShapeMismatch`] if `x` and `y` have different
742    /// numbers of samples.
743    /// Returns [`FerroError::InsufficientSamples`] if there are no samples.
744    /// Returns [`FerroError::InvalidParameter`] for invalid hyperparameters.
745    fn fit(&self, x: &Array2<F>, y: &Array1<F>) -> Result<FittedBaggingRegressor<F>, FerroError> {
746        let (n_samples, n_features) = x.dim();
747
748        if n_samples != y.len() {
749            return Err(FerroError::ShapeMismatch {
750                expected: vec![n_samples],
751                actual: vec![y.len()],
752                context: "y length must match number of samples in X".into(),
753            });
754        }
755        if n_samples == 0 {
756            return Err(FerroError::InsufficientSamples {
757                required: 1,
758                actual: 0,
759                context: "BaggingRegressor requires at least one sample".into(),
760            });
761        }
762        if self.n_estimators == 0 {
763            return Err(FerroError::InvalidParameter {
764                name: "n_estimators".into(),
765                reason: "must be at least 1".into(),
766            });
767        }
768        if self.max_samples <= 0.0 || self.max_samples > 1.0 {
769            return Err(FerroError::InvalidParameter {
770                name: "max_samples".into(),
771                reason: "must be in (0.0, 1.0]".into(),
772            });
773        }
774        if self.max_features <= 0.0 || self.max_features > 1.0 {
775            return Err(FerroError::InvalidParameter {
776                name: "max_features".into(),
777                reason: "must be in (0.0, 1.0]".into(),
778            });
779        }
780
781        let n_sample_draw = ((n_samples as f64) * self.max_samples).ceil().max(1.0) as usize;
782        let n_feature_draw = ((n_features as f64) * self.max_features).ceil().max(1.0) as usize;
783        let n_feature_draw = n_feature_draw.min(n_features);
784
785        let params = decision_tree::TreeParams {
786            max_depth: self.max_depth,
787            min_samples_split: 2,
788            min_samples_leaf: 1,
789        };
790        let bootstrap = self.bootstrap;
791        let bootstrap_features = self.bootstrap_features;
792
793        // Generate per-tree seeds sequentially for determinism.
794        let tree_seeds: Vec<u64> = if let Some(seed) = self.random_state {
795            let mut master_rng = StdRng::seed_from_u64(seed);
796            (0..self.n_estimators)
797                .map(|_| {
798                    use rand::RngCore;
799                    master_rng.next_u64()
800                })
801                .collect()
802        } else {
803            (0..self.n_estimators)
804                .map(|_| {
805                    use rand::RngCore;
806                    rand::rng().next_u64()
807                })
808                .collect()
809        };
810
811        // Build trees in parallel.
812        let results: Vec<(Vec<Node<F>>, Vec<usize>)> = tree_seeds
813            .par_iter()
814            .map(|&seed| {
815                let mut rng = StdRng::seed_from_u64(seed);
816
817                // Sample indices.
818                let sample_indices: Vec<usize> = if bootstrap {
819                    (0..n_sample_draw)
820                        .map(|_| {
821                            use rand::RngCore;
822                            (rng.next_u64() as usize) % n_samples
823                        })
824                        .collect()
825                } else {
826                    rand_sample_indices(&mut rng, n_samples, n_sample_draw).into_vec()
827                };
828
829                // Feature indices.
830                let feat_indices: Vec<usize> = if bootstrap_features {
831                    (0..n_feature_draw)
832                        .map(|_| {
833                            use rand::RngCore;
834                            (rng.next_u64() as usize) % n_features
835                        })
836                        .collect()
837                } else if n_feature_draw == n_features {
838                    (0..n_features).collect()
839                } else {
840                    rand_sample_indices(&mut rng, n_features, n_feature_draw).into_vec()
841                };
842
843                let tree = build_regression_tree_with_feature_subset(
844                    x,
845                    y,
846                    &sample_indices,
847                    &feat_indices,
848                    &params,
849                );
850
851                (tree, feat_indices)
852            })
853            .collect();
854
855        let (trees, feature_indices): (Vec<_>, Vec<_>) = results.into_iter().unzip();
856        let feature_importances = decision_tree::aggregate_tree_importances(
857            &trees,
858            Some(&feature_indices),
859            None,
860            n_features,
861        );
862
863        Ok(FittedBaggingRegressor {
864            trees,
865            feature_indices,
866            n_features,
867            feature_importances,
868        })
869    }
870}
871
872impl<F: Float + Send + Sync + 'static> Predict<Array2<F>> for FittedBaggingRegressor<F> {
873    type Output = Array1<F>;
874    type Error = FerroError;
875
876    /// Predict target values by averaging across all trees.
877    ///
878    /// # Errors
879    ///
880    /// Returns [`FerroError::ShapeMismatch`] if the number of features does
881    /// not match the fitted model.
882    fn predict(&self, x: &Array2<F>) -> Result<Array1<F>, FerroError> {
883        if x.ncols() != self.n_features {
884            return Err(FerroError::ShapeMismatch {
885                expected: vec![self.n_features],
886                actual: vec![x.ncols()],
887                context: "number of features must match fitted model".into(),
888            });
889        }
890
891        let n_samples = x.nrows();
892        let n_trees_f = F::from(self.trees.len()).unwrap();
893        let mut predictions = Array1::zeros(n_samples);
894
895        for i in 0..n_samples {
896            let row = x.row(i);
897            let mut sum = F::zero();
898
899            for (t, tree_nodes) in self.trees.iter().enumerate() {
900                let feat_idx = &self.feature_indices[t];
901                let sub_row: Vec<F> = feat_idx.iter().map(|&fi| row[fi]).collect();
902                let sub_view = ndarray::Array1::from(sub_row);
903
904                let leaf_idx = decision_tree::traverse(tree_nodes, &sub_view.view());
905                if let Node::Leaf { value, .. } = tree_nodes[leaf_idx] {
906                    sum = sum + value;
907                }
908            }
909
910            predictions[i] = sum / n_trees_f;
911        }
912
913        Ok(predictions)
914    }
915}
916
917// Pipeline integration.
918impl<F: Float + Send + Sync + 'static> PipelineEstimator<F> for BaggingRegressor<F> {
919    fn fit_pipeline(
920        &self,
921        x: &Array2<F>,
922        y: &Array1<F>,
923    ) -> Result<Box<dyn FittedPipelineEstimator<F>>, FerroError> {
924        let fitted = self.fit(x, y)?;
925        Ok(Box::new(fitted))
926    }
927}
928
929impl<F: Float + Send + Sync + 'static> FittedPipelineEstimator<F> for FittedBaggingRegressor<F> {
930    fn predict_pipeline(&self, x: &Array2<F>) -> Result<Array1<F>, FerroError> {
931        self.predict(x)
932    }
933}
934
935// ---------------------------------------------------------------------------
936// Tests
937// ---------------------------------------------------------------------------
938
939#[cfg(test)]
940mod tests {
941    use super::*;
942    use ndarray::array;
943
944    // -- BaggingClassifier tests --
945
946    #[test]
947    fn test_bagging_classifier_simple() {
948        let x = Array2::from_shape_vec(
949            (8, 2),
950            vec![
951                1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0, 8.0, 9.0,
952            ],
953        )
954        .unwrap();
955        let y = array![0, 0, 0, 0, 1, 1, 1, 1];
956
957        let model = BaggingClassifier::<f64>::new()
958            .with_n_estimators(20)
959            .with_random_state(42);
960        let fitted = model.fit(&x, &y).unwrap();
961        let preds = fitted.predict(&x).unwrap();
962
963        assert_eq!(preds.len(), 8);
964        for i in 0..4 {
965            assert_eq!(preds[i], 0);
966        }
967        for i in 4..8 {
968            assert_eq!(preds[i], 1);
969        }
970    }
971
972    #[test]
973    fn test_bagging_classifier_reproducibility() {
974        let x = Array2::from_shape_vec(
975            (8, 2),
976            vec![
977                1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0, 8.0, 9.0,
978            ],
979        )
980        .unwrap();
981        let y = array![0, 0, 0, 0, 1, 1, 1, 1];
982
983        let model = BaggingClassifier::<f64>::new()
984            .with_n_estimators(10)
985            .with_random_state(123);
986
987        let fitted1 = model.fit(&x, &y).unwrap();
988        let fitted2 = model.fit(&x, &y).unwrap();
989
990        let preds1 = fitted1.predict(&x).unwrap();
991        let preds2 = fitted2.predict(&x).unwrap();
992
993        assert_eq!(preds1, preds2);
994    }
995
996    #[test]
997    fn test_bagging_classifier_has_classes() {
998        let x = Array2::from_shape_vec(
999            (6, 2),
1000            vec![1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0],
1001        )
1002        .unwrap();
1003        let y = array![0, 0, 0, 1, 1, 1];
1004
1005        let model = BaggingClassifier::<f64>::new()
1006            .with_n_estimators(5)
1007            .with_random_state(42);
1008        let fitted = model.fit(&x, &y).unwrap();
1009
1010        assert_eq!(fitted.classes(), &[0, 1]);
1011        assert_eq!(fitted.n_classes(), 2);
1012    }
1013
1014    #[test]
1015    fn test_bagging_classifier_feature_subsample() {
1016        let x = Array2::from_shape_vec(
1017            (8, 4),
1018            vec![
1019                1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0,
1020                5.0, 0.0, 0.0, 0.0, 6.0, 0.0, 0.0, 0.0, 7.0, 0.0, 0.0, 0.0, 8.0, 0.0, 0.0, 0.0,
1021            ],
1022        )
1023        .unwrap();
1024        let y = array![0, 0, 0, 0, 1, 1, 1, 1];
1025
1026        let model = BaggingClassifier::<f64>::new()
1027            .with_n_estimators(20)
1028            .with_max_features(0.5)
1029            .with_random_state(42);
1030        let fitted = model.fit(&x, &y).unwrap();
1031        let preds = fitted.predict(&x).unwrap();
1032
1033        assert_eq!(preds.len(), 8);
1034    }
1035
1036    #[test]
1037    fn test_bagging_classifier_no_bootstrap() {
1038        let x = Array2::from_shape_vec(
1039            (8, 2),
1040            vec![
1041                1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0, 8.0, 9.0,
1042            ],
1043        )
1044        .unwrap();
1045        let y = array![0, 0, 0, 0, 1, 1, 1, 1];
1046
1047        let model = BaggingClassifier::<f64>::new()
1048            .with_n_estimators(10)
1049            .with_bootstrap(false)
1050            .with_random_state(42);
1051        let fitted = model.fit(&x, &y).unwrap();
1052        let preds = fitted.predict(&x).unwrap();
1053
1054        assert_eq!(preds.len(), 8);
1055        for i in 0..4 {
1056            assert_eq!(preds[i], 0);
1057        }
1058        for i in 4..8 {
1059            assert_eq!(preds[i], 1);
1060        }
1061    }
1062
1063    #[test]
1064    fn test_bagging_classifier_shape_mismatch() {
1065        let x =
1066            Array2::from_shape_vec((4, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
1067        let y = array![0, 0, 1];
1068
1069        let model = BaggingClassifier::<f64>::new();
1070        assert!(model.fit(&x, &y).is_err());
1071    }
1072
1073    #[test]
1074    fn test_bagging_classifier_empty_data() {
1075        let x = Array2::<f64>::zeros((0, 2));
1076        let y = Array1::<usize>::zeros(0);
1077
1078        let model = BaggingClassifier::<f64>::new();
1079        assert!(model.fit(&x, &y).is_err());
1080    }
1081
1082    #[test]
1083    fn test_bagging_classifier_invalid_max_samples() {
1084        let x =
1085            Array2::from_shape_vec((4, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
1086        let y = array![0, 0, 1, 1];
1087
1088        let model = BaggingClassifier::<f64>::new().with_max_samples(0.0);
1089        assert!(model.fit(&x, &y).is_err());
1090
1091        let model = BaggingClassifier::<f64>::new().with_max_samples(1.5);
1092        assert!(model.fit(&x, &y).is_err());
1093    }
1094
1095    #[test]
1096    fn test_bagging_classifier_predict_shape_mismatch() {
1097        let x_train = Array2::from_shape_vec(
1098            (6, 2),
1099            vec![1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0],
1100        )
1101        .unwrap();
1102        let y_train = array![0, 0, 0, 1, 1, 1];
1103
1104        let model = BaggingClassifier::<f64>::new()
1105            .with_n_estimators(5)
1106            .with_random_state(42);
1107        let fitted = model.fit(&x_train, &y_train).unwrap();
1108
1109        let x_bad = Array2::from_shape_vec((2, 3), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1110        assert!(fitted.predict(&x_bad).is_err());
1111    }
1112
1113    #[test]
1114    fn test_bagging_classifier_multiclass() {
1115        let x = Array2::from_shape_vec((9, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
1116            .unwrap();
1117        let y = array![0, 0, 0, 1, 1, 1, 2, 2, 2];
1118
1119        let model = BaggingClassifier::<f64>::new()
1120            .with_n_estimators(20)
1121            .with_random_state(42);
1122        let fitted = model.fit(&x, &y).unwrap();
1123        let preds = fitted.predict(&x).unwrap();
1124
1125        assert_eq!(preds.len(), 9);
1126        assert_eq!(fitted.n_classes(), 3);
1127    }
1128
1129    #[test]
1130    fn test_bagging_classifier_with_max_depth() {
1131        let x = Array2::from_shape_vec(
1132            (8, 2),
1133            vec![
1134                1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0, 8.0, 9.0,
1135            ],
1136        )
1137        .unwrap();
1138        let y = array![0, 0, 0, 0, 1, 1, 1, 1];
1139
1140        let model = BaggingClassifier::<f64>::new()
1141            .with_n_estimators(20)
1142            .with_max_depth(Some(2))
1143            .with_random_state(42);
1144        let fitted = model.fit(&x, &y).unwrap();
1145        let preds = fitted.predict(&x).unwrap();
1146
1147        assert_eq!(preds.len(), 8);
1148    }
1149
1150    // -- BaggingRegressor tests --
1151
1152    #[test]
1153    fn test_bagging_regressor_simple() {
1154        let x = Array2::from_shape_vec((6, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1155        let y = Array1::from(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
1156
1157        let model = BaggingRegressor::<f64>::new()
1158            .with_n_estimators(20)
1159            .with_random_state(42);
1160        let fitted = model.fit(&x, &y).unwrap();
1161        let preds = fitted.predict(&x).unwrap();
1162
1163        assert_eq!(preds.len(), 6);
1164        // Predictions should be reasonable approximations.
1165        for i in 0..6 {
1166            assert!((preds[i] - y[i]).abs() < 2.0);
1167        }
1168    }
1169
1170    #[test]
1171    fn test_bagging_regressor_reproducibility() {
1172        let x = Array2::from_shape_vec((6, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1173        let y = Array1::from(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
1174
1175        let model = BaggingRegressor::<f64>::new()
1176            .with_n_estimators(10)
1177            .with_random_state(123);
1178
1179        let fitted1 = model.fit(&x, &y).unwrap();
1180        let fitted2 = model.fit(&x, &y).unwrap();
1181
1182        let preds1 = fitted1.predict(&x).unwrap();
1183        let preds2 = fitted2.predict(&x).unwrap();
1184
1185        assert_eq!(preds1, preds2);
1186    }
1187
1188    #[test]
1189    fn test_bagging_regressor_shape_mismatch() {
1190        let x =
1191            Array2::from_shape_vec((4, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
1192        let y = Array1::from(vec![1.0, 2.0, 3.0]);
1193
1194        let model = BaggingRegressor::<f64>::new();
1195        assert!(model.fit(&x, &y).is_err());
1196    }
1197
1198    #[test]
1199    fn test_bagging_regressor_empty_data() {
1200        let x = Array2::<f64>::zeros((0, 2));
1201        let y = Array1::<f64>::zeros(0);
1202
1203        let model = BaggingRegressor::<f64>::new();
1204        assert!(model.fit(&x, &y).is_err());
1205    }
1206
1207    #[test]
1208    fn test_bagging_regressor_predict_shape_mismatch() {
1209        let x_train = Array2::from_shape_vec(
1210            (6, 2),
1211            vec![1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0],
1212        )
1213        .unwrap();
1214        let y_train = Array1::from(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
1215
1216        let model = BaggingRegressor::<f64>::new()
1217            .with_n_estimators(5)
1218            .with_random_state(42);
1219        let fitted = model.fit(&x_train, &y_train).unwrap();
1220
1221        let x_bad = Array2::from_shape_vec((2, 3), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1222        assert!(fitted.predict(&x_bad).is_err());
1223    }
1224
1225    #[test]
1226    fn test_bagging_regressor_feature_subsample() {
1227        let x = Array2::from_shape_vec(
1228            (8, 4),
1229            vec![
1230                1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0,
1231                5.0, 0.0, 0.0, 0.0, 6.0, 0.0, 0.0, 0.0, 7.0, 0.0, 0.0, 0.0, 8.0, 0.0, 0.0, 0.0,
1232            ],
1233        )
1234        .unwrap();
1235        let y = Array1::from(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);
1236
1237        let model = BaggingRegressor::<f64>::new()
1238            .with_n_estimators(20)
1239            .with_max_features(0.5)
1240            .with_random_state(42);
1241        let fitted = model.fit(&x, &y).unwrap();
1242        let preds = fitted.predict(&x).unwrap();
1243
1244        assert_eq!(preds.len(), 8);
1245    }
1246
1247    #[test]
1248    fn test_bagging_regressor_with_max_depth() {
1249        let x = Array2::from_shape_vec((6, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1250        let y = Array1::from(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
1251
1252        let model = BaggingRegressor::<f64>::new()
1253            .with_n_estimators(20)
1254            .with_max_depth(Some(2))
1255            .with_random_state(42);
1256        let fitted = model.fit(&x, &y).unwrap();
1257        let preds = fitted.predict(&x).unwrap();
1258
1259        assert_eq!(preds.len(), 6);
1260    }
1261
1262    #[test]
1263    fn test_bagging_classifier_default() {
1264        let model = BaggingClassifier::<f64>::default();
1265        assert_eq!(model.n_estimators, 10);
1266        assert!((model.max_samples - 1.0).abs() < f64::EPSILON);
1267        assert!((model.max_features - 1.0).abs() < f64::EPSILON);
1268        assert!(model.bootstrap);
1269        assert!(!model.bootstrap_features);
1270        assert!(model.random_state.is_none());
1271        assert!(model.max_depth.is_none());
1272    }
1273
1274    #[test]
1275    fn test_bagging_regressor_default() {
1276        let model = BaggingRegressor::<f64>::default();
1277        assert_eq!(model.n_estimators, 10);
1278        assert!((model.max_samples - 1.0).abs() < f64::EPSILON);
1279        assert!((model.max_features - 1.0).abs() < f64::EPSILON);
1280        assert!(model.bootstrap);
1281        assert!(!model.bootstrap_features);
1282        assert!(model.random_state.is_none());
1283        assert!(model.max_depth.is_none());
1284    }
1285
1286    #[test]
1287    fn test_bagging_classifier_zero_estimators() {
1288        let x =
1289            Array2::from_shape_vec((4, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
1290        let y = array![0, 0, 1, 1];
1291
1292        let model = BaggingClassifier::<f64>::new().with_n_estimators(0);
1293        assert!(model.fit(&x, &y).is_err());
1294    }
1295
1296    #[test]
1297    fn test_bagging_regressor_zero_estimators() {
1298        let x =
1299            Array2::from_shape_vec((4, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
1300        let y = Array1::from(vec![1.0, 2.0, 3.0, 4.0]);
1301
1302        let model = BaggingRegressor::<f64>::new().with_n_estimators(0);
1303        assert!(model.fit(&x, &y).is_err());
1304    }
1305
1306    #[test]
1307    fn test_bagging_classifier_bootstrap_features() {
1308        let x = Array2::from_shape_vec(
1309            (8, 4),
1310            vec![
1311                1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0,
1312                5.0, 0.0, 0.0, 0.0, 6.0, 0.0, 0.0, 0.0, 7.0, 0.0, 0.0, 0.0, 8.0, 0.0, 0.0, 0.0,
1313            ],
1314        )
1315        .unwrap();
1316        let y = array![0, 0, 0, 0, 1, 1, 1, 1];
1317
1318        let model = BaggingClassifier::<f64>::new()
1319            .with_n_estimators(10)
1320            .with_max_features(0.5)
1321            .with_bootstrap_features(true)
1322            .with_random_state(42);
1323        let fitted = model.fit(&x, &y).unwrap();
1324        let preds = fitted.predict(&x).unwrap();
1325
1326        assert_eq!(preds.len(), 8);
1327    }
1328
1329    #[test]
1330    fn test_bagging_regressor_no_bootstrap() {
1331        let x = Array2::from_shape_vec((6, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1332        let y = Array1::from(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
1333
1334        let model = BaggingRegressor::<f64>::new()
1335            .with_n_estimators(10)
1336            .with_bootstrap(false)
1337            .with_random_state(42);
1338        let fitted = model.fit(&x, &y).unwrap();
1339        let preds = fitted.predict(&x).unwrap();
1340
1341        assert_eq!(preds.len(), 6);
1342    }
1343
1344    #[test]
1345    fn test_bagging_classifier_max_samples_subsample() {
1346        let x = Array2::from_shape_vec(
1347            (8, 2),
1348            vec![
1349                1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0, 8.0, 9.0,
1350            ],
1351        )
1352        .unwrap();
1353        let y = array![0, 0, 0, 0, 1, 1, 1, 1];
1354
1355        let model = BaggingClassifier::<f64>::new()
1356            .with_n_estimators(20)
1357            .with_max_samples(0.5)
1358            .with_random_state(42);
1359        let fitted = model.fit(&x, &y).unwrap();
1360        let preds = fitted.predict(&x).unwrap();
1361
1362        assert_eq!(preds.len(), 8);
1363    }
1364}
ferrolearn_tree/bagging.rs

ferrolearn_tree/
bagging.rs