ferrolearn_tree/
gradient_boosting.rs

1//! Gradient boosting classifiers and regressors.
2//!
3//! This module provides [`GradientBoostingClassifier`] and [`GradientBoostingRegressor`],
4//! which build ensembles of decision trees sequentially. Each tree fits the negative
5//! gradient (pseudo-residuals) of the loss function, progressively reducing prediction error.
6//!
7//! # Regression Losses
8//!
9//! - **`LeastSquares`** (L2): mean squared error; pseudo-residuals are `y - F(x)`.
10//! - **`Lad`** (L1): least absolute deviation; pseudo-residuals are `sign(y - F(x))`.
11//! - **`Huber`**: a blend of L2 (for small residuals) and L1 (for large residuals),
12//!   controlled by the `alpha` quantile parameter (default 0.9).
13//!
14//! # Classification Loss
15//!
16//! - **`LogLoss`**: binary and multiclass logistic loss. For binary classification a
17//!   single model is trained on log-odds; for *K*-class problems *K* trees are built
18//!   per boosting round (one-vs-rest in probability space via softmax).
19//!
20//! # Examples
21//!
22//! ```
23//! use ferrolearn_tree::GradientBoostingRegressor;
24//! use ferrolearn_core::{Fit, Predict};
25//! use ndarray::{array, Array1, Array2};
26//!
27//! let x = Array2::from_shape_vec((8, 1), vec![
28//!     1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
29//! ]).unwrap();
30//! let y = array![1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 5.0, 5.0];
31//!
32//! let model = GradientBoostingRegressor::<f64>::new()
33//!     .with_n_estimators(50)
34//!     .with_learning_rate(0.1)
35//!     .with_random_state(42);
36//! let fitted = model.fit(&x, &y).unwrap();
37//! let preds = fitted.predict(&x).unwrap();
38//! assert_eq!(preds.len(), 8);
39//! ```
40
41use ferrolearn_core::error::FerroError;
42use ferrolearn_core::introspection::{HasClasses, HasFeatureImportances};
43use ferrolearn_core::pipeline::{FittedPipelineEstimator, PipelineEstimator};
44use ferrolearn_core::traits::{Fit, Predict};
45use ndarray::{Array1, Array2};
46use num_traits::{Float, FromPrimitive, ToPrimitive};
47use rand::SeedableRng;
48use rand::rngs::StdRng;
49use rand::seq::index::sample as rand_sample_indices;
50
51use crate::decision_tree::{
52    self, Node, build_regression_tree_with_feature_subset, compute_feature_importances,
53};
54
55// ---------------------------------------------------------------------------
56// Regression loss enum
57// ---------------------------------------------------------------------------
58
59/// Loss function for gradient boosting regression.
60#[derive(Debug, Clone, Copy, PartialEq, Eq)]
61pub enum RegressionLoss {
62    /// Least squares (L2) loss.
63    LeastSquares,
64    /// Least absolute deviation (L1) loss.
65    Lad,
66    /// Huber loss: L2 for small residuals, L1 for large residuals.
67    Huber,
68}
69
70/// Loss function for gradient boosting classification.
71#[derive(Debug, Clone, Copy, PartialEq, Eq)]
72pub enum ClassificationLoss {
73    /// Log-loss (logistic / cross-entropy) for binary and multiclass.
74    LogLoss,
75}
76
77// ---------------------------------------------------------------------------
78// GradientBoostingRegressor
79// ---------------------------------------------------------------------------
80
81/// Gradient boosting regressor.
82///
83/// Builds an additive model in a forward stage-wise fashion, fitting each
84/// regression tree to the negative gradient of the loss function evaluated
85/// on the current ensemble prediction.
86///
87/// # Type Parameters
88///
89/// - `F`: The floating-point type (`f32` or `f64`).
90#[derive(Debug, Clone)]
91pub struct GradientBoostingRegressor<F> {
92    /// Number of boosting stages (trees).
93    pub n_estimators: usize,
94    /// Learning rate (shrinkage) applied to each tree's contribution.
95    pub learning_rate: f64,
96    /// Maximum depth of each tree.
97    pub max_depth: Option<usize>,
98    /// Minimum number of samples required to split an internal node.
99    pub min_samples_split: usize,
100    /// Minimum number of samples required in a leaf node.
101    pub min_samples_leaf: usize,
102    /// Fraction of samples to use for fitting each tree (stochastic boosting).
103    pub subsample: f64,
104    /// Loss function.
105    pub loss: RegressionLoss,
106    /// Alpha quantile for Huber loss (only used when `loss == Huber`).
107    pub huber_alpha: f64,
108    /// Random seed for reproducibility.
109    pub random_state: Option<u64>,
110    _marker: std::marker::PhantomData<F>,
111}
112
113impl<F: Float> GradientBoostingRegressor<F> {
114    /// Create a new `GradientBoostingRegressor` with default settings.
115    ///
116    /// Defaults: `n_estimators = 100`, `learning_rate = 0.1`,
117    /// `max_depth = Some(3)`, `min_samples_split = 2`,
118    /// `min_samples_leaf = 1`, `subsample = 1.0`,
119    /// `loss = LeastSquares`, `huber_alpha = 0.9`.
120    #[must_use]
121    pub fn new() -> Self {
122        Self {
123            n_estimators: 100,
124            learning_rate: 0.1,
125            max_depth: Some(3),
126            min_samples_split: 2,
127            min_samples_leaf: 1,
128            subsample: 1.0,
129            loss: RegressionLoss::LeastSquares,
130            huber_alpha: 0.9,
131            random_state: None,
132            _marker: std::marker::PhantomData,
133        }
134    }
135
136    /// Set the number of boosting stages.
137    #[must_use]
138    pub fn with_n_estimators(mut self, n: usize) -> Self {
139        self.n_estimators = n;
140        self
141    }
142
143    /// Set the learning rate (shrinkage).
144    #[must_use]
145    pub fn with_learning_rate(mut self, lr: f64) -> Self {
146        self.learning_rate = lr;
147        self
148    }
149
150    /// Set the maximum tree depth.
151    #[must_use]
152    pub fn with_max_depth(mut self, d: Option<usize>) -> Self {
153        self.max_depth = d;
154        self
155    }
156
157    /// Set the minimum number of samples to split a node.
158    #[must_use]
159    pub fn with_min_samples_split(mut self, n: usize) -> Self {
160        self.min_samples_split = n;
161        self
162    }
163
164    /// Set the minimum number of samples in a leaf.
165    #[must_use]
166    pub fn with_min_samples_leaf(mut self, n: usize) -> Self {
167        self.min_samples_leaf = n;
168        self
169    }
170
171    /// Set the subsample ratio (fraction of training data per tree).
172    #[must_use]
173    pub fn with_subsample(mut self, ratio: f64) -> Self {
174        self.subsample = ratio;
175        self
176    }
177
178    /// Set the loss function.
179    #[must_use]
180    pub fn with_loss(mut self, loss: RegressionLoss) -> Self {
181        self.loss = loss;
182        self
183    }
184
185    /// Set the alpha quantile for Huber loss.
186    #[must_use]
187    pub fn with_huber_alpha(mut self, alpha: f64) -> Self {
188        self.huber_alpha = alpha;
189        self
190    }
191
192    /// Set the random seed for reproducibility.
193    #[must_use]
194    pub fn with_random_state(mut self, seed: u64) -> Self {
195        self.random_state = Some(seed);
196        self
197    }
198}
199
200impl<F: Float> Default for GradientBoostingRegressor<F> {
201    fn default() -> Self {
202        Self::new()
203    }
204}
205
206// ---------------------------------------------------------------------------
207// FittedGradientBoostingRegressor
208// ---------------------------------------------------------------------------
209
210/// A fitted gradient boosting regressor.
211///
212/// Stores the initial prediction (intercept) and the sequence of fitted trees.
213/// Predictions are computed as `init + learning_rate * sum(tree_predictions)`.
214#[derive(Debug, Clone)]
215pub struct FittedGradientBoostingRegressor<F> {
216    /// Initial prediction (mean of training targets for L2 loss, median for L1/Huber).
217    init: F,
218    /// Learning rate used during training.
219    learning_rate: F,
220    /// Sequence of fitted trees (one per boosting round).
221    trees: Vec<Vec<Node<F>>>,
222    /// Number of features.
223    n_features: usize,
224    /// Per-feature importance scores (normalised).
225    feature_importances: Array1<F>,
226}
227
228impl<F: Float + Send + Sync + 'static> Fit<Array2<F>, Array1<F>> for GradientBoostingRegressor<F> {
229    type Fitted = FittedGradientBoostingRegressor<F>;
230    type Error = FerroError;
231
232    /// Fit the gradient boosting regressor.
233    ///
234    /// # Errors
235    ///
236    /// Returns [`FerroError::ShapeMismatch`] if `x` and `y` have different
237    /// numbers of samples.
238    /// Returns [`FerroError::InsufficientSamples`] if there are no samples.
239    /// Returns [`FerroError::InvalidParameter`] for invalid hyperparameters.
240    fn fit(
241        &self,
242        x: &Array2<F>,
243        y: &Array1<F>,
244    ) -> Result<FittedGradientBoostingRegressor<F>, FerroError> {
245        let (n_samples, n_features) = x.dim();
246
247        if n_samples != y.len() {
248            return Err(FerroError::ShapeMismatch {
249                expected: vec![n_samples],
250                actual: vec![y.len()],
251                context: "y length must match number of samples in X".into(),
252            });
253        }
254        if n_samples == 0 {
255            return Err(FerroError::InsufficientSamples {
256                required: 1,
257                actual: 0,
258                context: "GradientBoostingRegressor requires at least one sample".into(),
259            });
260        }
261        if self.n_estimators == 0 {
262            return Err(FerroError::InvalidParameter {
263                name: "n_estimators".into(),
264                reason: "must be at least 1".into(),
265            });
266        }
267        if self.learning_rate <= 0.0 {
268            return Err(FerroError::InvalidParameter {
269                name: "learning_rate".into(),
270                reason: "must be positive".into(),
271            });
272        }
273        if self.subsample <= 0.0 || self.subsample > 1.0 {
274            return Err(FerroError::InvalidParameter {
275                name: "subsample".into(),
276                reason: "must be in (0, 1]".into(),
277            });
278        }
279
280        let lr = F::from(self.learning_rate).unwrap();
281        let params = decision_tree::TreeParams {
282            max_depth: self.max_depth,
283            min_samples_split: self.min_samples_split,
284            min_samples_leaf: self.min_samples_leaf,
285        };
286
287        // Initial prediction.
288        let init = match self.loss {
289            RegressionLoss::LeastSquares => {
290                let sum: F = y.iter().copied().fold(F::zero(), |a, b| a + b);
291                sum / F::from(n_samples).unwrap()
292            }
293            RegressionLoss::Lad | RegressionLoss::Huber => median_f(y),
294        };
295
296        // Current predictions for each sample.
297        let mut f_vals = Array1::from_elem(n_samples, init);
298
299        let all_features: Vec<usize> = (0..n_features).collect();
300        let subsample_size = ((self.subsample * n_samples as f64).ceil() as usize)
301            .max(1)
302            .min(n_samples);
303
304        let mut rng = if let Some(seed) = self.random_state {
305            StdRng::seed_from_u64(seed)
306        } else {
307            use rand::RngCore;
308            StdRng::seed_from_u64(rand::rng().next_u64())
309        };
310
311        let mut trees = Vec::with_capacity(self.n_estimators);
312
313        for _ in 0..self.n_estimators {
314            // Compute pseudo-residuals (negative gradient).
315            let residuals = compute_regression_residuals(y, &f_vals, self.loss, self.huber_alpha);
316
317            // Subsample indices.
318            let sample_indices = if subsample_size < n_samples {
319                rand_sample_indices(&mut rng, n_samples, subsample_size).into_vec()
320            } else {
321                (0..n_samples).collect()
322            };
323
324            // Build a regression tree on the pseudo-residuals.
325            let tree = build_regression_tree_with_feature_subset(
326                x,
327                &residuals,
328                &sample_indices,
329                &all_features,
330                &params,
331            );
332
333            // Update predictions.
334            for i in 0..n_samples {
335                let row = x.row(i);
336                let leaf_idx = decision_tree::traverse(&tree, &row);
337                if let Node::Leaf { value, .. } = tree[leaf_idx] {
338                    f_vals[i] = f_vals[i] + lr * value;
339                }
340            }
341
342            trees.push(tree);
343        }
344
345        // Compute feature importances across all trees.
346        let mut total_importances = Array1::<F>::zeros(n_features);
347        for tree_nodes in &trees {
348            let tree_imp = compute_feature_importances(tree_nodes, n_features, n_samples);
349            total_importances = total_importances + tree_imp;
350        }
351        let imp_sum: F = total_importances
352            .iter()
353            .copied()
354            .fold(F::zero(), |a, b| a + b);
355        if imp_sum > F::zero() {
356            total_importances.mapv_inplace(|v| v / imp_sum);
357        }
358
359        Ok(FittedGradientBoostingRegressor {
360            init,
361            learning_rate: lr,
362            trees,
363            n_features,
364            feature_importances: total_importances,
365        })
366    }
367}
368
369impl<F: Float + Send + Sync + 'static> FittedGradientBoostingRegressor<F> {
370    /// Returns the initial prediction (intercept) of the boosted model.
371    #[must_use]
372    pub fn init(&self) -> F {
373        self.init
374    }
375
376    /// Returns the learning rate used during training.
377    #[must_use]
378    pub fn learning_rate(&self) -> F {
379        self.learning_rate
380    }
381
382    /// Returns a reference to the sequence of fitted trees.
383    #[must_use]
384    pub fn trees(&self) -> &[Vec<Node<F>>] {
385        &self.trees
386    }
387
388    /// Returns the number of features the model was trained on.
389    #[must_use]
390    pub fn n_features(&self) -> usize {
391        self.n_features
392    }
393}
394
395impl<F: Float + Send + Sync + 'static> Predict<Array2<F>> for FittedGradientBoostingRegressor<F> {
396    type Output = Array1<F>;
397    type Error = FerroError;
398
399    /// Predict target values.
400    ///
401    /// # Errors
402    ///
403    /// Returns [`FerroError::ShapeMismatch`] if the number of features does
404    /// not match the fitted model.
405    fn predict(&self, x: &Array2<F>) -> Result<Array1<F>, FerroError> {
406        if x.ncols() != self.n_features {
407            return Err(FerroError::ShapeMismatch {
408                expected: vec![self.n_features],
409                actual: vec![x.ncols()],
410                context: "number of features must match fitted model".into(),
411            });
412        }
413
414        let n_samples = x.nrows();
415        let mut predictions = Array1::from_elem(n_samples, self.init);
416
417        for i in 0..n_samples {
418            let row = x.row(i);
419            for tree_nodes in &self.trees {
420                let leaf_idx = decision_tree::traverse(tree_nodes, &row);
421                if let Node::Leaf { value, .. } = tree_nodes[leaf_idx] {
422                    predictions[i] = predictions[i] + self.learning_rate * value;
423                }
424            }
425        }
426
427        Ok(predictions)
428    }
429}
430
431impl<F: Float + Send + Sync + 'static> HasFeatureImportances<F>
432    for FittedGradientBoostingRegressor<F>
433{
434    fn feature_importances(&self) -> &Array1<F> {
435        &self.feature_importances
436    }
437}
438
439// Pipeline integration.
440impl<F: Float + Send + Sync + 'static> PipelineEstimator<F> for GradientBoostingRegressor<F> {
441    fn fit_pipeline(
442        &self,
443        x: &Array2<F>,
444        y: &Array1<F>,
445    ) -> Result<Box<dyn FittedPipelineEstimator<F>>, FerroError> {
446        let fitted = self.fit(x, y)?;
447        Ok(Box::new(fitted))
448    }
449}
450
451impl<F: Float + Send + Sync + 'static> FittedPipelineEstimator<F>
452    for FittedGradientBoostingRegressor<F>
453{
454    fn predict_pipeline(&self, x: &Array2<F>) -> Result<Array1<F>, FerroError> {
455        self.predict(x)
456    }
457}
458
459// ---------------------------------------------------------------------------
460// GradientBoostingClassifier
461// ---------------------------------------------------------------------------
462
463/// Gradient boosting classifier.
464///
465/// For binary classification a single model is trained on log-odds residuals.
466/// For multiclass (*K* classes), *K* regression trees are built per boosting
467/// round (one-vs-rest in probability space via softmax).
468///
469/// # Type Parameters
470///
471/// - `F`: The floating-point type (`f32` or `f64`).
472#[derive(Debug, Clone)]
473pub struct GradientBoostingClassifier<F> {
474    /// Number of boosting stages.
475    pub n_estimators: usize,
476    /// Learning rate (shrinkage).
477    pub learning_rate: f64,
478    /// Maximum depth of each tree.
479    pub max_depth: Option<usize>,
480    /// Minimum number of samples required to split an internal node.
481    pub min_samples_split: usize,
482    /// Minimum number of samples required in a leaf node.
483    pub min_samples_leaf: usize,
484    /// Fraction of samples to use for fitting each tree.
485    pub subsample: f64,
486    /// Classification loss function.
487    pub loss: ClassificationLoss,
488    /// Random seed for reproducibility.
489    pub random_state: Option<u64>,
490    _marker: std::marker::PhantomData<F>,
491}
492
493impl<F: Float> GradientBoostingClassifier<F> {
494    /// Create a new `GradientBoostingClassifier` with default settings.
495    ///
496    /// Defaults: `n_estimators = 100`, `learning_rate = 0.1`,
497    /// `max_depth = Some(3)`, `min_samples_split = 2`,
498    /// `min_samples_leaf = 1`, `subsample = 1.0`,
499    /// `loss = LogLoss`.
500    #[must_use]
501    pub fn new() -> Self {
502        Self {
503            n_estimators: 100,
504            learning_rate: 0.1,
505            max_depth: Some(3),
506            min_samples_split: 2,
507            min_samples_leaf: 1,
508            subsample: 1.0,
509            loss: ClassificationLoss::LogLoss,
510            random_state: None,
511            _marker: std::marker::PhantomData,
512        }
513    }
514
515    /// Set the number of boosting stages.
516    #[must_use]
517    pub fn with_n_estimators(mut self, n: usize) -> Self {
518        self.n_estimators = n;
519        self
520    }
521
522    /// Set the learning rate (shrinkage).
523    #[must_use]
524    pub fn with_learning_rate(mut self, lr: f64) -> Self {
525        self.learning_rate = lr;
526        self
527    }
528
529    /// Set the maximum tree depth.
530    #[must_use]
531    pub fn with_max_depth(mut self, d: Option<usize>) -> Self {
532        self.max_depth = d;
533        self
534    }
535
536    /// Set the minimum number of samples to split a node.
537    #[must_use]
538    pub fn with_min_samples_split(mut self, n: usize) -> Self {
539        self.min_samples_split = n;
540        self
541    }
542
543    /// Set the minimum number of samples in a leaf.
544    #[must_use]
545    pub fn with_min_samples_leaf(mut self, n: usize) -> Self {
546        self.min_samples_leaf = n;
547        self
548    }
549
550    /// Set the subsample ratio.
551    #[must_use]
552    pub fn with_subsample(mut self, ratio: f64) -> Self {
553        self.subsample = ratio;
554        self
555    }
556
557    /// Set the random seed for reproducibility.
558    #[must_use]
559    pub fn with_random_state(mut self, seed: u64) -> Self {
560        self.random_state = Some(seed);
561        self
562    }
563}
564
565impl<F: Float> Default for GradientBoostingClassifier<F> {
566    fn default() -> Self {
567        Self::new()
568    }
569}
570
571// ---------------------------------------------------------------------------
572// FittedGradientBoostingClassifier
573// ---------------------------------------------------------------------------
574
575/// A fitted gradient boosting classifier.
576///
577/// For binary classification, stores a single sequence of trees predicting log-odds.
578/// For multiclass, stores `K` sequences of trees (one per class).
579#[derive(Debug, Clone)]
580pub struct FittedGradientBoostingClassifier<F> {
581    /// Sorted unique class labels.
582    classes: Vec<usize>,
583    /// Initial predictions per class (log-odds or log-prior).
584    init: Vec<F>,
585    /// Learning rate.
586    learning_rate: F,
587    /// Trees: for binary, `trees[0]` has all trees. For multiclass,
588    /// `trees[k]` has trees for class k.
589    trees: Vec<Vec<Vec<Node<F>>>>,
590    /// Number of features.
591    n_features: usize,
592    /// Per-feature importance scores (normalised).
593    feature_importances: Array1<F>,
594}
595
596impl<F: Float + Send + Sync + 'static> Fit<Array2<F>, Array1<usize>>
597    for GradientBoostingClassifier<F>
598{
599    type Fitted = FittedGradientBoostingClassifier<F>;
600    type Error = FerroError;
601
602    /// Fit the gradient boosting classifier.
603    ///
604    /// # Errors
605    ///
606    /// Returns [`FerroError::ShapeMismatch`] if `x` and `y` have different
607    /// numbers of samples.
608    /// Returns [`FerroError::InsufficientSamples`] if there are no samples.
609    /// Returns [`FerroError::InvalidParameter`] for invalid hyperparameters.
610    fn fit(
611        &self,
612        x: &Array2<F>,
613        y: &Array1<usize>,
614    ) -> Result<FittedGradientBoostingClassifier<F>, FerroError> {
615        let (n_samples, n_features) = x.dim();
616
617        if n_samples != y.len() {
618            return Err(FerroError::ShapeMismatch {
619                expected: vec![n_samples],
620                actual: vec![y.len()],
621                context: "y length must match number of samples in X".into(),
622            });
623        }
624        if n_samples == 0 {
625            return Err(FerroError::InsufficientSamples {
626                required: 1,
627                actual: 0,
628                context: "GradientBoostingClassifier requires at least one sample".into(),
629            });
630        }
631        if self.n_estimators == 0 {
632            return Err(FerroError::InvalidParameter {
633                name: "n_estimators".into(),
634                reason: "must be at least 1".into(),
635            });
636        }
637        if self.learning_rate <= 0.0 {
638            return Err(FerroError::InvalidParameter {
639                name: "learning_rate".into(),
640                reason: "must be positive".into(),
641            });
642        }
643        if self.subsample <= 0.0 || self.subsample > 1.0 {
644            return Err(FerroError::InvalidParameter {
645                name: "subsample".into(),
646                reason: "must be in (0, 1]".into(),
647            });
648        }
649
650        // Determine unique classes.
651        let mut classes: Vec<usize> = y.iter().copied().collect();
652        classes.sort_unstable();
653        classes.dedup();
654        let n_classes = classes.len();
655
656        if n_classes < 2 {
657            return Err(FerroError::InvalidParameter {
658                name: "y".into(),
659                reason: "need at least 2 distinct classes".into(),
660            });
661        }
662
663        let y_mapped: Vec<usize> = y
664            .iter()
665            .map(|&c| classes.iter().position(|&cl| cl == c).unwrap())
666            .collect();
667
668        let lr = F::from(self.learning_rate).unwrap();
669        let params = decision_tree::TreeParams {
670            max_depth: self.max_depth,
671            min_samples_split: self.min_samples_split,
672            min_samples_leaf: self.min_samples_leaf,
673        };
674
675        let all_features: Vec<usize> = (0..n_features).collect();
676        let subsample_size = ((self.subsample * n_samples as f64).ceil() as usize)
677            .max(1)
678            .min(n_samples);
679
680        let mut rng = if let Some(seed) = self.random_state {
681            StdRng::seed_from_u64(seed)
682        } else {
683            use rand::RngCore;
684            StdRng::seed_from_u64(rand::rng().next_u64())
685        };
686
687        if n_classes == 2 {
688            // Binary classification: single model on log-odds.
689            self.fit_binary(
690                x,
691                &y_mapped,
692                n_samples,
693                n_features,
694                &classes,
695                lr,
696                &params,
697                &all_features,
698                subsample_size,
699                &mut rng,
700            )
701        } else {
702            // Multiclass: K trees per round.
703            self.fit_multiclass(
704                x,
705                &y_mapped,
706                n_samples,
707                n_features,
708                n_classes,
709                &classes,
710                lr,
711                &params,
712                &all_features,
713                subsample_size,
714                &mut rng,
715            )
716        }
717    }
718}
719
720impl<F: Float + Send + Sync + 'static> GradientBoostingClassifier<F> {
721    /// Fit binary classification (log-loss on log-odds).
722    #[allow(clippy::too_many_arguments)]
723    fn fit_binary(
724        &self,
725        x: &Array2<F>,
726        y_mapped: &[usize],
727        n_samples: usize,
728        n_features: usize,
729        classes: &[usize],
730        lr: F,
731        params: &decision_tree::TreeParams,
732        all_features: &[usize],
733        subsample_size: usize,
734        rng: &mut StdRng,
735    ) -> Result<FittedGradientBoostingClassifier<F>, FerroError> {
736        // Count positive class proportion for initial log-odds.
737        let pos_count = y_mapped.iter().filter(|&&c| c == 1).count();
738        let p = F::from(pos_count).unwrap() / F::from(n_samples).unwrap();
739        let eps = F::from(1e-15).unwrap();
740        let p_clipped = p.max(eps).min(F::one() - eps);
741        let init_val = (p_clipped / (F::one() - p_clipped)).ln();
742
743        let mut f_vals = Array1::from_elem(n_samples, init_val);
744        let mut trees_seq: Vec<Vec<Node<F>>> = Vec::with_capacity(self.n_estimators);
745
746        for _ in 0..self.n_estimators {
747            // Compute probabilities from current log-odds.
748            let probs: Vec<F> = f_vals.iter().map(|&fv| sigmoid(fv)).collect();
749
750            // Pseudo-residuals: y - p.
751            let mut residuals = Array1::zeros(n_samples);
752            for i in 0..n_samples {
753                let yi = F::from(y_mapped[i]).unwrap();
754                residuals[i] = yi - probs[i];
755            }
756
757            // Subsample.
758            let sample_indices = if subsample_size < n_samples {
759                rand_sample_indices(rng, n_samples, subsample_size).into_vec()
760            } else {
761                (0..n_samples).collect()
762            };
763
764            // Build tree on residuals.
765            let tree = build_regression_tree_with_feature_subset(
766                x,
767                &residuals,
768                &sample_indices,
769                all_features,
770                params,
771            );
772
773            // Update f_vals.
774            for i in 0..n_samples {
775                let row = x.row(i);
776                let leaf_idx = decision_tree::traverse(&tree, &row);
777                if let Node::Leaf { value, .. } = tree[leaf_idx] {
778                    f_vals[i] = f_vals[i] + lr * value;
779                }
780            }
781
782            trees_seq.push(tree);
783        }
784
785        // Feature importances.
786        let mut total_importances = Array1::<F>::zeros(n_features);
787        for tree_nodes in &trees_seq {
788            let tree_imp = compute_feature_importances(tree_nodes, n_features, n_samples);
789            total_importances = total_importances + tree_imp;
790        }
791        let imp_sum: F = total_importances
792            .iter()
793            .copied()
794            .fold(F::zero(), |a, b| a + b);
795        if imp_sum > F::zero() {
796            total_importances.mapv_inplace(|v| v / imp_sum);
797        }
798
799        Ok(FittedGradientBoostingClassifier {
800            classes: classes.to_vec(),
801            init: vec![init_val],
802            learning_rate: lr,
803            trees: vec![trees_seq],
804            n_features,
805            feature_importances: total_importances,
806        })
807    }
808
809    /// Fit multiclass classification (K trees per round, softmax).
810    #[allow(clippy::too_many_arguments)]
811    fn fit_multiclass(
812        &self,
813        x: &Array2<F>,
814        y_mapped: &[usize],
815        n_samples: usize,
816        n_features: usize,
817        n_classes: usize,
818        classes: &[usize],
819        lr: F,
820        params: &decision_tree::TreeParams,
821        all_features: &[usize],
822        subsample_size: usize,
823        rng: &mut StdRng,
824    ) -> Result<FittedGradientBoostingClassifier<F>, FerroError> {
825        // Initial log-prior for each class.
826        let mut class_counts = vec![0usize; n_classes];
827        for &c in y_mapped {
828            class_counts[c] += 1;
829        }
830        let n_f = F::from(n_samples).unwrap();
831        let eps = F::from(1e-15).unwrap();
832        let init_vals: Vec<F> = class_counts
833            .iter()
834            .map(|&cnt| {
835                let p = (F::from(cnt).unwrap() / n_f).max(eps);
836                p.ln()
837            })
838            .collect();
839
840        // f_vals[k][i] = current raw score for class k, sample i.
841        let mut f_vals: Vec<Array1<F>> = init_vals
842            .iter()
843            .map(|&init| Array1::from_elem(n_samples, init))
844            .collect();
845
846        let mut trees_per_class: Vec<Vec<Vec<Node<F>>>> = (0..n_classes)
847            .map(|_| Vec::with_capacity(self.n_estimators))
848            .collect();
849
850        for _ in 0..self.n_estimators {
851            // Compute softmax probabilities.
852            let probs = softmax_matrix(&f_vals, n_samples, n_classes);
853
854            // Subsample.
855            let sample_indices = if subsample_size < n_samples {
856                rand_sample_indices(rng, n_samples, subsample_size).into_vec()
857            } else {
858                (0..n_samples).collect()
859            };
860
861            // For each class, compute residuals and fit a tree.
862            for k in 0..n_classes {
863                let mut residuals = Array1::zeros(n_samples);
864                for i in 0..n_samples {
865                    let yi_k = if y_mapped[i] == k {
866                        F::one()
867                    } else {
868                        F::zero()
869                    };
870                    residuals[i] = yi_k - probs[k][i];
871                }
872
873                let tree = build_regression_tree_with_feature_subset(
874                    x,
875                    &residuals,
876                    &sample_indices,
877                    all_features,
878                    params,
879                );
880
881                // Update f_vals for class k.
882                for (i, fv) in f_vals[k].iter_mut().enumerate() {
883                    let row = x.row(i);
884                    let leaf_idx = decision_tree::traverse(&tree, &row);
885                    if let Node::Leaf { value, .. } = tree[leaf_idx] {
886                        *fv = *fv + lr * value;
887                    }
888                }
889
890                trees_per_class[k].push(tree);
891            }
892        }
893
894        // Feature importances aggregated across all classes and rounds.
895        let mut total_importances = Array1::<F>::zeros(n_features);
896        for class_trees in &trees_per_class {
897            for tree_nodes in class_trees {
898                let tree_imp = compute_feature_importances(tree_nodes, n_features, n_samples);
899                total_importances = total_importances + tree_imp;
900            }
901        }
902        let imp_sum: F = total_importances
903            .iter()
904            .copied()
905            .fold(F::zero(), |a, b| a + b);
906        if imp_sum > F::zero() {
907            total_importances.mapv_inplace(|v| v / imp_sum);
908        }
909
910        Ok(FittedGradientBoostingClassifier {
911            classes: classes.to_vec(),
912            init: init_vals,
913            learning_rate: lr,
914            trees: trees_per_class,
915            n_features,
916            feature_importances: total_importances,
917        })
918    }
919}
920
921impl<F: Float + Send + Sync + 'static> FittedGradientBoostingClassifier<F> {
922    /// Returns the initial predictions per class (log-odds or log-prior).
923    #[must_use]
924    pub fn init(&self) -> &[F] {
925        &self.init
926    }
927
928    /// Returns the learning rate used during training.
929    #[must_use]
930    pub fn learning_rate(&self) -> F {
931        self.learning_rate
932    }
933
934    /// Returns a reference to the tree ensemble.
935    ///
936    /// For binary classification, `trees()[0]` contains all trees.
937    /// For multiclass, `trees()[k]` contains trees for class `k`.
938    #[must_use]
939    pub fn trees(&self) -> &[Vec<Vec<Node<F>>>] {
940        &self.trees
941    }
942
943    /// Returns the number of features the model was trained on.
944    #[must_use]
945    pub fn n_features(&self) -> usize {
946        self.n_features
947    }
948}
949
950impl<F: Float + Send + Sync + 'static> Predict<Array2<F>> for FittedGradientBoostingClassifier<F> {
951    type Output = Array1<usize>;
952    type Error = FerroError;
953
954    /// Predict class labels.
955    ///
956    /// # Errors
957    ///
958    /// Returns [`FerroError::ShapeMismatch`] if the number of features does
959    /// not match the fitted model.
960    fn predict(&self, x: &Array2<F>) -> Result<Array1<usize>, FerroError> {
961        if x.ncols() != self.n_features {
962            return Err(FerroError::ShapeMismatch {
963                expected: vec![self.n_features],
964                actual: vec![x.ncols()],
965                context: "number of features must match fitted model".into(),
966            });
967        }
968
969        let n_samples = x.nrows();
970        let n_classes = self.classes.len();
971
972        if n_classes == 2 {
973            // Binary: single log-odds model.
974            let init = self.init[0];
975            let mut predictions = Array1::zeros(n_samples);
976            for i in 0..n_samples {
977                let row = x.row(i);
978                let mut f_val = init;
979                for tree_nodes in &self.trees[0] {
980                    let leaf_idx = decision_tree::traverse(tree_nodes, &row);
981                    if let Node::Leaf { value, .. } = tree_nodes[leaf_idx] {
982                        f_val = f_val + self.learning_rate * value;
983                    }
984                }
985                let prob = sigmoid(f_val);
986                let class_idx = if prob >= F::from(0.5).unwrap() { 1 } else { 0 };
987                predictions[i] = self.classes[class_idx];
988            }
989            Ok(predictions)
990        } else {
991            // Multiclass: K models, argmax of softmax.
992            let mut predictions = Array1::zeros(n_samples);
993            for i in 0..n_samples {
994                let row = x.row(i);
995                let mut scores = Vec::with_capacity(n_classes);
996                for k in 0..n_classes {
997                    let mut f_val = self.init[k];
998                    for tree_nodes in &self.trees[k] {
999                        let leaf_idx = decision_tree::traverse(tree_nodes, &row);
1000                        if let Node::Leaf { value, .. } = tree_nodes[leaf_idx] {
1001                            f_val = f_val + self.learning_rate * value;
1002                        }
1003                    }
1004                    scores.push(f_val);
1005                }
1006                let best_k = scores
1007                    .iter()
1008                    .enumerate()
1009                    .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
1010                    .map_or(0, |(k, _)| k);
1011                predictions[i] = self.classes[best_k];
1012            }
1013            Ok(predictions)
1014        }
1015    }
1016}
1017
1018impl<F: Float + Send + Sync + 'static> HasFeatureImportances<F>
1019    for FittedGradientBoostingClassifier<F>
1020{
1021    fn feature_importances(&self) -> &Array1<F> {
1022        &self.feature_importances
1023    }
1024}
1025
1026impl<F: Float + Send + Sync + 'static> HasClasses for FittedGradientBoostingClassifier<F> {
1027    fn classes(&self) -> &[usize] {
1028        &self.classes
1029    }
1030
1031    fn n_classes(&self) -> usize {
1032        self.classes.len()
1033    }
1034}
1035
1036// Pipeline integration.
1037impl<F: Float + ToPrimitive + FromPrimitive + Send + Sync + 'static> PipelineEstimator<F>
1038    for GradientBoostingClassifier<F>
1039{
1040    fn fit_pipeline(
1041        &self,
1042        x: &Array2<F>,
1043        y: &Array1<F>,
1044    ) -> Result<Box<dyn FittedPipelineEstimator<F>>, FerroError> {
1045        let y_usize: Array1<usize> = y.mapv(|v| v.to_usize().unwrap_or(0));
1046        let fitted = self.fit(x, &y_usize)?;
1047        Ok(Box::new(FittedGbcPipelineAdapter(fitted)))
1048    }
1049}
1050
1051/// Pipeline adapter for `FittedGradientBoostingClassifier<F>`.
1052struct FittedGbcPipelineAdapter<F: Float + Send + Sync + 'static>(
1053    FittedGradientBoostingClassifier<F>,
1054);
1055
1056impl<F: Float + ToPrimitive + FromPrimitive + Send + Sync + 'static> FittedPipelineEstimator<F>
1057    for FittedGbcPipelineAdapter<F>
1058{
1059    fn predict_pipeline(&self, x: &Array2<F>) -> Result<Array1<F>, FerroError> {
1060        let preds = self.0.predict(x)?;
1061        Ok(preds.mapv(|v| F::from_usize(v).unwrap_or_else(F::nan)))
1062    }
1063}
1064
1065// ---------------------------------------------------------------------------
1066// Internal helpers
1067// ---------------------------------------------------------------------------
1068
1069/// Sigmoid function: 1 / (1 + exp(-x)).
1070fn sigmoid<F: Float>(x: F) -> F {
1071    F::one() / (F::one() + (-x).exp())
1072}
1073
1074/// Compute softmax probabilities for each class across all samples.
1075///
1076/// Returns `probs[k][i]` = probability of class k for sample i.
1077fn softmax_matrix<F: Float>(
1078    f_vals: &[Array1<F>],
1079    n_samples: usize,
1080    n_classes: usize,
1081) -> Vec<Vec<F>> {
1082    let mut probs: Vec<Vec<F>> = vec![vec![F::zero(); n_samples]; n_classes];
1083
1084    for i in 0..n_samples {
1085        // Find max for numerical stability.
1086        let max_val = (0..n_classes)
1087            .map(|k| f_vals[k][i])
1088            .fold(F::neg_infinity(), |a, b| if b > a { b } else { a });
1089
1090        let mut sum = F::zero();
1091        let mut exps = vec![F::zero(); n_classes];
1092        for k in 0..n_classes {
1093            exps[k] = (f_vals[k][i] - max_val).exp();
1094            sum = sum + exps[k];
1095        }
1096
1097        let eps = F::from(1e-15).unwrap();
1098        if sum < eps {
1099            sum = eps;
1100        }
1101
1102        for k in 0..n_classes {
1103            probs[k][i] = exps[k] / sum;
1104        }
1105    }
1106
1107    probs
1108}
1109
1110/// Compute the median of an Array1.
1111fn median_f<F: Float>(arr: &Array1<F>) -> F {
1112    let mut sorted: Vec<F> = arr.iter().copied().collect();
1113    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
1114    let n = sorted.len();
1115    if n == 0 {
1116        return F::zero();
1117    }
1118    if n % 2 == 1 {
1119        sorted[n / 2]
1120    } else {
1121        (sorted[n / 2 - 1] + sorted[n / 2]) / F::from(2.0).unwrap()
1122    }
1123}
1124
1125/// Compute the quantile of a slice at level `alpha` (0..1).
1126fn quantile_f<F: Float>(vals: &[F], alpha: f64) -> F {
1127    if vals.is_empty() {
1128        return F::zero();
1129    }
1130    let mut sorted: Vec<F> = vals.to_vec();
1131    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
1132    let idx = ((sorted.len() as f64 - 1.0) * alpha).round() as usize;
1133    let idx = idx.min(sorted.len() - 1);
1134    sorted[idx]
1135}
1136
1137/// Compute pseudo-residuals (negative gradient) for regression losses.
1138fn compute_regression_residuals<F: Float>(
1139    y: &Array1<F>,
1140    f_vals: &Array1<F>,
1141    loss: RegressionLoss,
1142    huber_alpha: f64,
1143) -> Array1<F> {
1144    let n = y.len();
1145    match loss {
1146        RegressionLoss::LeastSquares => {
1147            // negative gradient of 0.5*(y - f)^2 is (y - f)
1148            let mut residuals = Array1::zeros(n);
1149            for i in 0..n {
1150                residuals[i] = y[i] - f_vals[i];
1151            }
1152            residuals
1153        }
1154        RegressionLoss::Lad => {
1155            // negative gradient of |y - f| is sign(y - f)
1156            let mut residuals = Array1::zeros(n);
1157            for i in 0..n {
1158                let diff = y[i] - f_vals[i];
1159                residuals[i] = if diff > F::zero() {
1160                    F::one()
1161                } else if diff < F::zero() {
1162                    -F::one()
1163                } else {
1164                    F::zero()
1165                };
1166            }
1167            residuals
1168        }
1169        RegressionLoss::Huber => {
1170            // Compute residuals and delta from quantile.
1171            let raw_residuals: Vec<F> = (0..n).map(|i| (y[i] - f_vals[i]).abs()).collect();
1172            let delta = quantile_f(&raw_residuals, huber_alpha);
1173
1174            let mut residuals = Array1::zeros(n);
1175            for i in 0..n {
1176                let diff = y[i] - f_vals[i];
1177                if diff.abs() <= delta {
1178                    residuals[i] = diff;
1179                } else if diff > F::zero() {
1180                    residuals[i] = delta;
1181                } else {
1182                    residuals[i] = -delta;
1183                }
1184            }
1185            residuals
1186        }
1187    }
1188}
1189
1190// ---------------------------------------------------------------------------
1191// Tests
1192// ---------------------------------------------------------------------------
1193
1194#[cfg(test)]
1195mod tests {
1196    use super::*;
1197    use approx::assert_relative_eq;
1198    use ndarray::array;
1199
1200    // -- Regressor tests --
1201
1202    #[test]
1203    fn test_gbr_simple_least_squares() {
1204        let x =
1205            Array2::from_shape_vec((8, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
1206        let y = array![1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 5.0, 5.0];
1207
1208        let model = GradientBoostingRegressor::<f64>::new()
1209            .with_n_estimators(50)
1210            .with_learning_rate(0.1)
1211            .with_random_state(42);
1212        let fitted = model.fit(&x, &y).unwrap();
1213        let preds = fitted.predict(&x).unwrap();
1214
1215        assert_eq!(preds.len(), 8);
1216        for i in 0..4 {
1217            assert!(preds[i] < 3.0, "Expected ~1.0, got {}", preds[i]);
1218        }
1219        for i in 4..8 {
1220            assert!(preds[i] > 3.0, "Expected ~5.0, got {}", preds[i]);
1221        }
1222    }
1223
1224    #[test]
1225    fn test_gbr_lad_loss() {
1226        let x =
1227            Array2::from_shape_vec((8, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
1228        let y = array![1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 5.0, 5.0];
1229
1230        let model = GradientBoostingRegressor::<f64>::new()
1231            .with_n_estimators(50)
1232            .with_loss(RegressionLoss::Lad)
1233            .with_random_state(42);
1234        let fitted = model.fit(&x, &y).unwrap();
1235        let preds = fitted.predict(&x).unwrap();
1236
1237        assert_eq!(preds.len(), 8);
1238        // LAD should still separate the two groups.
1239        for i in 0..4 {
1240            assert!(preds[i] < 3.5, "LAD expected <3.5, got {}", preds[i]);
1241        }
1242        for i in 4..8 {
1243            assert!(preds[i] > 2.5, "LAD expected >2.5, got {}", preds[i]);
1244        }
1245    }
1246
1247    #[test]
1248    fn test_gbr_huber_loss() {
1249        let x =
1250            Array2::from_shape_vec((8, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
1251        let y = array![1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 5.0, 5.0];
1252
1253        let model = GradientBoostingRegressor::<f64>::new()
1254            .with_n_estimators(50)
1255            .with_loss(RegressionLoss::Huber)
1256            .with_huber_alpha(0.9)
1257            .with_random_state(42);
1258        let fitted = model.fit(&x, &y).unwrap();
1259        let preds = fitted.predict(&x).unwrap();
1260
1261        assert_eq!(preds.len(), 8);
1262    }
1263
1264    #[test]
1265    fn test_gbr_reproducibility() {
1266        let x =
1267            Array2::from_shape_vec((8, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
1268        let y = array![1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 5.0, 5.0];
1269
1270        let model = GradientBoostingRegressor::<f64>::new()
1271            .with_n_estimators(20)
1272            .with_random_state(123);
1273
1274        let fitted1 = model.fit(&x, &y).unwrap();
1275        let fitted2 = model.fit(&x, &y).unwrap();
1276
1277        let preds1 = fitted1.predict(&x).unwrap();
1278        let preds2 = fitted2.predict(&x).unwrap();
1279
1280        for (p1, p2) in preds1.iter().zip(preds2.iter()) {
1281            assert_relative_eq!(*p1, *p2, epsilon = 1e-10);
1282        }
1283    }
1284
1285    #[test]
1286    fn test_gbr_feature_importances() {
1287        let x = Array2::from_shape_vec(
1288            (10, 3),
1289            vec![
1290                1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 0.0, 0.0, 4.0, 0.0, 0.0, 5.0, 0.0, 0.0, 6.0,
1291                0.0, 0.0, 7.0, 0.0, 0.0, 8.0, 0.0, 0.0, 9.0, 0.0, 0.0, 10.0, 0.0, 0.0,
1292            ],
1293        )
1294        .unwrap();
1295        let y = array![1.0, 1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 5.0, 5.0, 5.0];
1296
1297        let model = GradientBoostingRegressor::<f64>::new()
1298            .with_n_estimators(20)
1299            .with_random_state(42);
1300        let fitted = model.fit(&x, &y).unwrap();
1301        let importances = fitted.feature_importances();
1302
1303        assert_eq!(importances.len(), 3);
1304        // First feature should be most important.
1305        assert!(importances[0] > importances[1]);
1306        assert!(importances[0] > importances[2]);
1307    }
1308
1309    #[test]
1310    fn test_gbr_shape_mismatch_fit() {
1311        let x = Array2::from_shape_vec((3, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1312        let y = array![1.0, 2.0];
1313
1314        let model = GradientBoostingRegressor::<f64>::new().with_n_estimators(5);
1315        assert!(model.fit(&x, &y).is_err());
1316    }
1317
1318    #[test]
1319    fn test_gbr_shape_mismatch_predict() {
1320        let x =
1321            Array2::from_shape_vec((4, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
1322        let y = array![1.0, 2.0, 3.0, 4.0];
1323
1324        let model = GradientBoostingRegressor::<f64>::new()
1325            .with_n_estimators(5)
1326            .with_random_state(0);
1327        let fitted = model.fit(&x, &y).unwrap();
1328
1329        let x_bad = Array2::from_shape_vec((2, 3), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1330        assert!(fitted.predict(&x_bad).is_err());
1331    }
1332
1333    #[test]
1334    fn test_gbr_empty_data() {
1335        let x = Array2::<f64>::zeros((0, 2));
1336        let y = Array1::<f64>::zeros(0);
1337
1338        let model = GradientBoostingRegressor::<f64>::new().with_n_estimators(5);
1339        assert!(model.fit(&x, &y).is_err());
1340    }
1341
1342    #[test]
1343    fn test_gbr_zero_estimators() {
1344        let x = Array2::from_shape_vec((4, 1), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
1345        let y = array![1.0, 2.0, 3.0, 4.0];
1346
1347        let model = GradientBoostingRegressor::<f64>::new().with_n_estimators(0);
1348        assert!(model.fit(&x, &y).is_err());
1349    }
1350
1351    #[test]
1352    fn test_gbr_invalid_learning_rate() {
1353        let x = Array2::from_shape_vec((4, 1), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
1354        let y = array![1.0, 2.0, 3.0, 4.0];
1355
1356        let model = GradientBoostingRegressor::<f64>::new()
1357            .with_n_estimators(5)
1358            .with_learning_rate(0.0);
1359        assert!(model.fit(&x, &y).is_err());
1360    }
1361
1362    #[test]
1363    fn test_gbr_invalid_subsample() {
1364        let x = Array2::from_shape_vec((4, 1), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
1365        let y = array![1.0, 2.0, 3.0, 4.0];
1366
1367        let model = GradientBoostingRegressor::<f64>::new()
1368            .with_n_estimators(5)
1369            .with_subsample(0.0);
1370        assert!(model.fit(&x, &y).is_err());
1371
1372        let model2 = GradientBoostingRegressor::<f64>::new()
1373            .with_n_estimators(5)
1374            .with_subsample(1.5);
1375        assert!(model2.fit(&x, &y).is_err());
1376    }
1377
1378    #[test]
1379    fn test_gbr_subsample() {
1380        let x =
1381            Array2::from_shape_vec((8, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
1382        let y = array![1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 5.0, 5.0];
1383
1384        let model = GradientBoostingRegressor::<f64>::new()
1385            .with_n_estimators(50)
1386            .with_subsample(0.5)
1387            .with_random_state(42);
1388        let fitted = model.fit(&x, &y).unwrap();
1389        let preds = fitted.predict(&x).unwrap();
1390
1391        assert_eq!(preds.len(), 8);
1392    }
1393
1394    #[test]
1395    fn test_gbr_pipeline_integration() {
1396        let x = Array2::from_shape_vec((4, 1), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
1397        let y = array![1.0, 2.0, 3.0, 4.0];
1398
1399        let model = GradientBoostingRegressor::<f64>::new()
1400            .with_n_estimators(10)
1401            .with_random_state(42);
1402        let fitted = model.fit_pipeline(&x, &y).unwrap();
1403        let preds = fitted.predict_pipeline(&x).unwrap();
1404        assert_eq!(preds.len(), 4);
1405    }
1406
1407    #[test]
1408    fn test_gbr_f32_support() {
1409        let x = Array2::from_shape_vec((4, 1), vec![1.0f32, 2.0, 3.0, 4.0]).unwrap();
1410        let y = Array1::from_vec(vec![1.0f32, 2.0, 3.0, 4.0]);
1411
1412        let model = GradientBoostingRegressor::<f32>::new()
1413            .with_n_estimators(10)
1414            .with_random_state(42);
1415        let fitted = model.fit(&x, &y).unwrap();
1416        let preds = fitted.predict(&x).unwrap();
1417        assert_eq!(preds.len(), 4);
1418    }
1419
1420    #[test]
1421    fn test_gbr_max_depth() {
1422        let x =
1423            Array2::from_shape_vec((8, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
1424        let y = array![1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 5.0, 5.0];
1425
1426        let model = GradientBoostingRegressor::<f64>::new()
1427            .with_n_estimators(20)
1428            .with_max_depth(Some(1))
1429            .with_random_state(42);
1430        let fitted = model.fit(&x, &y).unwrap();
1431        let preds = fitted.predict(&x).unwrap();
1432        assert_eq!(preds.len(), 8);
1433    }
1434
1435    #[test]
1436    fn test_gbr_default_trait() {
1437        let model = GradientBoostingRegressor::<f64>::default();
1438        assert_eq!(model.n_estimators, 100);
1439        assert!((model.learning_rate - 0.1).abs() < 1e-10);
1440    }
1441
1442    // -- Classifier tests --
1443
1444    #[test]
1445    fn test_gbc_binary_simple() {
1446        let x = Array2::from_shape_vec(
1447            (8, 2),
1448            vec![
1449                1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0, 8.0, 9.0,
1450            ],
1451        )
1452        .unwrap();
1453        let y = array![0, 0, 0, 0, 1, 1, 1, 1];
1454
1455        let model = GradientBoostingClassifier::<f64>::new()
1456            .with_n_estimators(50)
1457            .with_learning_rate(0.1)
1458            .with_random_state(42);
1459        let fitted = model.fit(&x, &y).unwrap();
1460        let preds = fitted.predict(&x).unwrap();
1461
1462        assert_eq!(preds.len(), 8);
1463        for i in 0..4 {
1464            assert_eq!(preds[i], 0, "Expected 0 at index {}, got {}", i, preds[i]);
1465        }
1466        for i in 4..8 {
1467            assert_eq!(preds[i], 1, "Expected 1 at index {}, got {}", i, preds[i]);
1468        }
1469    }
1470
1471    #[test]
1472    fn test_gbc_multiclass() {
1473        let x = Array2::from_shape_vec((9, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
1474            .unwrap();
1475        let y = array![0, 0, 0, 1, 1, 1, 2, 2, 2];
1476
1477        let model = GradientBoostingClassifier::<f64>::new()
1478            .with_n_estimators(50)
1479            .with_learning_rate(0.1)
1480            .with_random_state(42);
1481        let fitted = model.fit(&x, &y).unwrap();
1482        let preds = fitted.predict(&x).unwrap();
1483
1484        assert_eq!(preds.len(), 9);
1485        // At least training data should mostly be correct.
1486        let correct = preds.iter().zip(y.iter()).filter(|(p, t)| p == t).count();
1487        assert!(
1488            correct >= 6,
1489            "Expected at least 6/9 correct, got {correct}/9"
1490        );
1491    }
1492
1493    #[test]
1494    fn test_gbc_has_classes() {
1495        let x = Array2::from_shape_vec((6, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1496        let y = array![0, 1, 2, 0, 1, 2];
1497
1498        let model = GradientBoostingClassifier::<f64>::new()
1499            .with_n_estimators(5)
1500            .with_random_state(0);
1501        let fitted = model.fit(&x, &y).unwrap();
1502
1503        assert_eq!(fitted.classes(), &[0, 1, 2]);
1504        assert_eq!(fitted.n_classes(), 3);
1505    }
1506
1507    #[test]
1508    fn test_gbc_reproducibility() {
1509        let x = Array2::from_shape_vec(
1510            (8, 2),
1511            vec![
1512                1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0, 8.0, 9.0,
1513            ],
1514        )
1515        .unwrap();
1516        let y = array![0, 0, 0, 0, 1, 1, 1, 1];
1517
1518        let model = GradientBoostingClassifier::<f64>::new()
1519            .with_n_estimators(10)
1520            .with_random_state(42);
1521
1522        let fitted1 = model.fit(&x, &y).unwrap();
1523        let fitted2 = model.fit(&x, &y).unwrap();
1524
1525        let preds1 = fitted1.predict(&x).unwrap();
1526        let preds2 = fitted2.predict(&x).unwrap();
1527        assert_eq!(preds1, preds2);
1528    }
1529
1530    #[test]
1531    fn test_gbc_feature_importances() {
1532        let x = Array2::from_shape_vec(
1533            (10, 3),
1534            vec![
1535                1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 0.0, 0.0, 4.0, 0.0, 0.0, 5.0, 0.0, 0.0, 6.0,
1536                0.0, 0.0, 7.0, 0.0, 0.0, 8.0, 0.0, 0.0, 9.0, 0.0, 0.0, 10.0, 0.0, 0.0,
1537            ],
1538        )
1539        .unwrap();
1540        let y = array![0, 0, 0, 0, 0, 1, 1, 1, 1, 1];
1541
1542        let model = GradientBoostingClassifier::<f64>::new()
1543            .with_n_estimators(20)
1544            .with_random_state(42);
1545        let fitted = model.fit(&x, &y).unwrap();
1546        let importances = fitted.feature_importances();
1547
1548        assert_eq!(importances.len(), 3);
1549        assert!(importances[0] > importances[1]);
1550        assert!(importances[0] > importances[2]);
1551    }
1552
1553    #[test]
1554    fn test_gbc_shape_mismatch_fit() {
1555        let x = Array2::from_shape_vec((3, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1556        let y = array![0, 1];
1557
1558        let model = GradientBoostingClassifier::<f64>::new().with_n_estimators(5);
1559        assert!(model.fit(&x, &y).is_err());
1560    }
1561
1562    #[test]
1563    fn test_gbc_shape_mismatch_predict() {
1564        let x =
1565            Array2::from_shape_vec((4, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
1566        let y = array![0, 0, 1, 1];
1567
1568        let model = GradientBoostingClassifier::<f64>::new()
1569            .with_n_estimators(5)
1570            .with_random_state(0);
1571        let fitted = model.fit(&x, &y).unwrap();
1572
1573        let x_bad = Array2::from_shape_vec((2, 3), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1574        assert!(fitted.predict(&x_bad).is_err());
1575    }
1576
1577    #[test]
1578    fn test_gbc_empty_data() {
1579        let x = Array2::<f64>::zeros((0, 2));
1580        let y = Array1::<usize>::zeros(0);
1581
1582        let model = GradientBoostingClassifier::<f64>::new().with_n_estimators(5);
1583        assert!(model.fit(&x, &y).is_err());
1584    }
1585
1586    #[test]
1587    fn test_gbc_single_class() {
1588        let x = Array2::from_shape_vec((3, 1), vec![1.0, 2.0, 3.0]).unwrap();
1589        let y = array![0, 0, 0];
1590
1591        let model = GradientBoostingClassifier::<f64>::new().with_n_estimators(5);
1592        assert!(model.fit(&x, &y).is_err());
1593    }
1594
1595    #[test]
1596    fn test_gbc_zero_estimators() {
1597        let x = Array2::from_shape_vec((4, 1), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
1598        let y = array![0, 0, 1, 1];
1599
1600        let model = GradientBoostingClassifier::<f64>::new().with_n_estimators(0);
1601        assert!(model.fit(&x, &y).is_err());
1602    }
1603
1604    #[test]
1605    fn test_gbc_pipeline_integration() {
1606        let x = Array2::from_shape_vec((6, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1607        let y = Array1::from_vec(vec![0.0, 0.0, 0.0, 1.0, 1.0, 1.0]);
1608
1609        let model = GradientBoostingClassifier::<f64>::new()
1610            .with_n_estimators(10)
1611            .with_random_state(42);
1612        let fitted = model.fit_pipeline(&x, &y).unwrap();
1613        let preds = fitted.predict_pipeline(&x).unwrap();
1614        assert_eq!(preds.len(), 6);
1615    }
1616
1617    #[test]
1618    fn test_gbc_f32_support() {
1619        let x = Array2::from_shape_vec((6, 1), vec![1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1620        let y = array![0, 0, 0, 1, 1, 1];
1621
1622        let model = GradientBoostingClassifier::<f32>::new()
1623            .with_n_estimators(10)
1624            .with_random_state(42);
1625        let fitted = model.fit(&x, &y).unwrap();
1626        let preds = fitted.predict(&x).unwrap();
1627        assert_eq!(preds.len(), 6);
1628    }
1629
1630    #[test]
1631    fn test_gbc_subsample() {
1632        let x = Array2::from_shape_vec(
1633            (8, 2),
1634            vec![
1635                1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0, 8.0, 9.0,
1636            ],
1637        )
1638        .unwrap();
1639        let y = array![0, 0, 0, 0, 1, 1, 1, 1];
1640
1641        let model = GradientBoostingClassifier::<f64>::new()
1642            .with_n_estimators(20)
1643            .with_subsample(0.5)
1644            .with_random_state(42);
1645        let fitted = model.fit(&x, &y).unwrap();
1646        let preds = fitted.predict(&x).unwrap();
1647        assert_eq!(preds.len(), 8);
1648    }
1649
1650    #[test]
1651    fn test_gbc_default_trait() {
1652        let model = GradientBoostingClassifier::<f64>::default();
1653        assert_eq!(model.n_estimators, 100);
1654        assert!((model.learning_rate - 0.1).abs() < 1e-10);
1655    }
1656
1657    #[test]
1658    fn test_gbc_non_contiguous_labels() {
1659        let x = Array2::from_shape_vec((6, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1660        let y = array![10, 10, 10, 20, 20, 20];
1661
1662        let model = GradientBoostingClassifier::<f64>::new()
1663            .with_n_estimators(20)
1664            .with_random_state(42);
1665        let fitted = model.fit(&x, &y).unwrap();
1666        let preds = fitted.predict(&x).unwrap();
1667
1668        assert_eq!(preds.len(), 6);
1669        for &p in &preds {
1670            assert!(p == 10 || p == 20);
1671        }
1672    }
1673
1674    // -- Helper tests --
1675
1676    #[test]
1677    fn test_sigmoid() {
1678        assert_relative_eq!(sigmoid(0.0f64), 0.5, epsilon = 1e-10);
1679        assert!(sigmoid(10.0f64) > 0.999);
1680        assert!(sigmoid(-10.0f64) < 0.001);
1681    }
1682
1683    #[test]
1684    fn test_median_f_odd() {
1685        let arr = array![3.0, 1.0, 2.0];
1686        assert_relative_eq!(median_f(&arr), 2.0, epsilon = 1e-10);
1687    }
1688
1689    #[test]
1690    fn test_median_f_even() {
1691        let arr = array![4.0, 1.0, 3.0, 2.0];
1692        assert_relative_eq!(median_f(&arr), 2.5, epsilon = 1e-10);
1693    }
1694
1695    #[test]
1696    fn test_median_f_empty() {
1697        let arr = Array1::<f64>::zeros(0);
1698        assert_relative_eq!(median_f(&arr), 0.0, epsilon = 1e-10);
1699    }
1700
1701    #[test]
1702    fn test_quantile_f() {
1703        let vals = vec![1.0, 2.0, 3.0, 4.0, 5.0];
1704        let q90 = quantile_f(&vals, 0.9);
1705        assert!((4.0..=5.0).contains(&q90));
1706    }
1707
1708    #[test]
1709    fn test_regression_residuals_least_squares() {
1710        let y = array![1.0, 2.0, 3.0];
1711        let f = array![0.5, 2.5, 2.0];
1712        let r = compute_regression_residuals(&y, &f, RegressionLoss::LeastSquares, 0.9);
1713        assert_relative_eq!(r[0], 0.5, epsilon = 1e-10);
1714        assert_relative_eq!(r[1], -0.5, epsilon = 1e-10);
1715        assert_relative_eq!(r[2], 1.0, epsilon = 1e-10);
1716    }
1717
1718    #[test]
1719    fn test_regression_residuals_lad() {
1720        let y = array![1.0, 2.0, 3.0];
1721        let f = array![0.5, 2.5, 3.0];
1722        let r = compute_regression_residuals(&y, &f, RegressionLoss::Lad, 0.9);
1723        assert_relative_eq!(r[0], 1.0, epsilon = 1e-10);
1724        assert_relative_eq!(r[1], -1.0, epsilon = 1e-10);
1725        assert_relative_eq!(r[2], 0.0, epsilon = 1e-10);
1726    }
1727
1728    #[test]
1729    fn test_regression_residuals_huber() {
1730        let y = array![1.0, 2.0, 10.0, 3.0, 4.0];
1731        let f = array![1.5, 2.5, 2.0, 3.5, 4.5];
1732        // abs residuals: [0.5, 0.5, 8.0, 0.5, 0.5]
1733        // alpha=0.9 quantile index = round(4 * 0.9) = 4 => sorted[4] = 8.0
1734        // So delta = 8.0, meaning all residuals are within delta and treated as L2.
1735        let r = compute_regression_residuals(&y, &f, RegressionLoss::Huber, 0.9);
1736        // All residuals should be y - f.
1737        assert_relative_eq!(r[0], -0.5, epsilon = 1e-10);
1738        assert_relative_eq!(r[1], -0.5, epsilon = 1e-10);
1739        assert_relative_eq!(r[2], 8.0, epsilon = 1e-10);
1740        assert_relative_eq!(r[3], -0.5, epsilon = 1e-10);
1741        assert_relative_eq!(r[4], -0.5, epsilon = 1e-10);
1742
1743        // Test with lower alpha to trigger clipping.
1744        // alpha=0.1, quantile idx = round(4*0.1) = 0 => sorted[0] = 0.5
1745        // delta = 0.5, so the 8.0 residual is clipped.
1746        let r2 = compute_regression_residuals(&y, &f, RegressionLoss::Huber, 0.1);
1747        assert_relative_eq!(r2[0], -0.5, epsilon = 1e-10);
1748        // Third residual: diff=8.0 > delta=0.5, so clipped to delta=0.5.
1749        assert_relative_eq!(r2[2], 0.5, epsilon = 1e-10);
1750    }
1751
1752    #[test]
1753    fn test_gbc_multiclass_4_classes() {
1754        let x = Array2::from_shape_vec(
1755            (12, 1),
1756            vec![
1757                1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0,
1758            ],
1759        )
1760        .unwrap();
1761        let y = array![0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3];
1762
1763        let model = GradientBoostingClassifier::<f64>::new()
1764            .with_n_estimators(50)
1765            .with_random_state(42);
1766        let fitted = model.fit(&x, &y).unwrap();
1767        let preds = fitted.predict(&x).unwrap();
1768
1769        assert_eq!(preds.len(), 12);
1770        assert_eq!(fitted.n_classes(), 4);
1771    }
1772
1773    #[test]
1774    fn test_gbc_invalid_learning_rate() {
1775        let x = Array2::from_shape_vec((4, 1), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
1776        let y = array![0, 0, 1, 1];
1777
1778        let model = GradientBoostingClassifier::<f64>::new()
1779            .with_n_estimators(5)
1780            .with_learning_rate(-0.1);
1781        assert!(model.fit(&x, &y).is_err());
1782    }
1783}
ferrolearn_tree/gradient_boosting.rs

ferrolearn_tree/
gradient_boosting.rs