Skip to main content

ferrolearn_tree/
lib.rs

1//! # ferrolearn-tree
2//!
3//! Decision tree and ensemble tree models for the ferrolearn machine learning framework.
4//!
5//! This crate provides implementations of:
6//!
7//! - **[`DecisionTreeClassifier`]** / **[`DecisionTreeRegressor`]** — CART decision trees
8//!   with configurable splitting criteria, depth limits, and minimum sample constraints.
9//! - **[`RandomForestClassifier`]** / **[`RandomForestRegressor`]** — Bootstrap-aggregated
10//!   ensembles of decision trees with random feature subsets, built in parallel via `rayon`.
11//! - **[`GradientBoostingClassifier`]** / **[`GradientBoostingRegressor`]** — Gradient boosting
12//!   ensembles that sequentially fit trees to the negative gradient of a loss function.
13//! - **[`HistGradientBoostingClassifier`]** / **[`HistGradientBoostingRegressor`]** —
14//!   Histogram-based gradient boosting with O(n_bins) split finding, subtraction trick,
15//!   native NaN support, and optional best-first (leaf-wise) growth.
16//! - **[`AdaBoostClassifier`]** — Adaptive Boosting using decision tree stumps with
17//!   SAMME and SAMME.R algorithms.
18//! - **[`ExtraTreeClassifier`]** / **[`ExtraTreeRegressor`]** — Extremely randomized
19//!   trees where split thresholds are chosen randomly rather than via exhaustive search.
20//! - **[`ExtraTreesClassifier`]** / **[`ExtraTreesRegressor`]** — Ensembles of
21//!   extremely randomized trees with Rayon parallel fitting. No bootstrap by default.
22//! - **[`IsolationForest`]** — Anomaly detection via random isolation trees.
23//! - **[`VotingClassifier`]** / **[`VotingRegressor`]** — Ensembles of decision trees
24//!   with varying hyperparameters, aggregated by majority vote or averaging.
25//! - **[`RandomTreesEmbedding`]** — Unsupervised feature transformation via one-hot
26//!   encoded leaf indices across an ensemble of randomly built trees.
27//!
28//! # Design
29//!
30//! Each model follows the compile-time safety pattern:
31//!
32//! - The unfitted struct (e.g., `DecisionTreeClassifier<F>`) holds hyperparameters
33//!   and implements [`Fit`](ferrolearn_core::Fit).
34//! - Calling `fit()` produces a new fitted type (e.g., `FittedDecisionTreeClassifier<F>`)
35//!   that implements [`Predict`](ferrolearn_core::Predict).
36//! - Calling `predict()` on an unfitted model is a compile-time error.
37//!
38//! # Pipeline Integration
39//!
40//! All models implement [`PipelineEstimator`](ferrolearn_core::pipeline::PipelineEstimator)
41//! for `f64`, allowing them to be used as the final step in a
42//! [`Pipeline`](ferrolearn_core::pipeline::Pipeline).
43//!
44//! # Float Generics
45//!
46//! All models are generic over `F: num_traits::Float + Send + Sync + 'static`,
47//! supporting both `f32` and `f64`.
48
49pub mod adaboost;
50pub mod adaboost_regressor;
51pub mod bagging;
52pub mod decision_tree;
53pub mod extra_tree;
54pub mod extra_trees_ensemble;
55pub mod gradient_boosting;
56pub mod hist_gradient_boosting;
57pub mod isolation_forest;
58pub mod random_forest;
59pub mod random_trees_embedding;
60pub mod voting;
61
62// Re-export the main types at the crate root.
63pub use adaboost::{AdaBoostAlgorithm, AdaBoostClassifier, FittedAdaBoostClassifier};
64pub use adaboost_regressor::{AdaBoostLoss, AdaBoostRegressor, FittedAdaBoostRegressor};
65pub use bagging::{
66    BaggingClassifier, BaggingRegressor, FittedBaggingClassifier, FittedBaggingRegressor,
67};
68pub use decision_tree::{
69    ClassificationCriterion, DecisionTreeClassifier, DecisionTreeRegressor,
70    FittedDecisionTreeClassifier, FittedDecisionTreeRegressor, Node, RegressionCriterion,
71};
72pub use extra_tree::{
73    ExtraTreeClassifier, ExtraTreeRegressor, FittedExtraTreeClassifier, FittedExtraTreeRegressor,
74};
75pub use extra_trees_ensemble::{
76    ExtraTreesClassifier, ExtraTreesRegressor, FittedExtraTreesClassifier,
77    FittedExtraTreesRegressor,
78};
79pub use gradient_boosting::{
80    ClassificationLoss, FittedGradientBoostingClassifier, FittedGradientBoostingRegressor,
81    GradientBoostingClassifier, GradientBoostingRegressor, RegressionLoss,
82};
83pub use hist_gradient_boosting::{
84    FittedHistGradientBoostingClassifier, FittedHistGradientBoostingRegressor,
85    HistClassificationLoss, HistGradientBoostingClassifier, HistGradientBoostingRegressor,
86    HistNode, HistRegressionLoss,
87};
88pub use isolation_forest::{FittedIsolationForest, IsolationForest};
89pub use random_forest::{
90    FittedRandomForestClassifier, FittedRandomForestRegressor, MaxFeatures, RandomForestClassifier,
91    RandomForestRegressor,
92};
93pub use random_trees_embedding::{FittedRandomTreesEmbedding, RandomTreesEmbedding};
94pub use voting::{
95    FittedVotingClassifier, FittedVotingRegressor, VotingClassifier, VotingRegressor,
96};
97
98use ndarray::{Array1, Array2};
99use num_traits::Float;
100
101/// Element-wise natural log of a probability matrix, used as the body of
102/// every classifier `predict_log_proba` method in this crate. Clamps
103/// values below `1e-300` so `ln(0)` never produces `-inf` / `NaN`.
104pub(crate) fn log_proba<F: Float>(proba: &Array2<F>) -> Array2<F> {
105    let eps = F::from(1e-300).unwrap();
106    proba.mapv(|p| if p > eps { p.ln() } else { eps.ln() })
107}
108
109/// Mean accuracy: `(sum(predictions == targets)) / n`.
110///
111/// Used as the body of every classifier `score(&self, x, y)` method in
112/// this crate to mirror sklearn's `ClassifierMixin.score`.
113pub(crate) fn mean_accuracy<F: Float>(predictions: &Array1<usize>, targets: &Array1<usize>) -> F {
114    let n = targets.len();
115    if n == 0 {
116        return F::zero();
117    }
118    let correct = predictions
119        .iter()
120        .zip(targets.iter())
121        .filter(|(p, t)| p == t)
122        .count();
123    F::from(correct).unwrap() / F::from(n).unwrap()
124}
125
126/// R² coefficient of determination: `1 - SSres / SStot`.
127///
128/// Used as the body of every regressor `score(&self, x, y)` method in
129/// this crate to mirror sklearn's `RegressorMixin.score`. Constant-y
130/// returns `1.0` if predictions are also constant-perfect, else
131/// `F::neg_infinity()` to flag the genuine miss.
132pub(crate) fn r2_score<F: Float>(y_pred: &Array1<F>, y_true: &Array1<F>) -> F {
133    let n = y_true.len();
134    if n == 0 {
135        return F::zero();
136    }
137    let mean = y_true.iter().copied().fold(F::zero(), |a, b| a + b) / F::from(n).unwrap();
138    let mut ss_res = F::zero();
139    let mut ss_tot = F::zero();
140    for i in 0..n {
141        let r = y_true[i] - y_pred[i];
142        let t = y_true[i] - mean;
143        ss_res = ss_res + r * r;
144        ss_tot = ss_tot + t * t;
145    }
146    if ss_tot == F::zero() {
147        if ss_res == F::zero() {
148            F::one()
149        } else {
150            F::neg_infinity()
151        }
152    } else {
153        F::one() - ss_res / ss_tot
154    }
155}