ferrolearn_tree/lib.rs
1//! # ferrolearn-tree
2//!
3//! Decision tree and ensemble tree models for the ferrolearn machine learning framework.
4//!
5//! This crate provides implementations of:
6//!
7//! - **[`DecisionTreeClassifier`]** / **[`DecisionTreeRegressor`]** — CART decision trees
8//! with configurable splitting criteria, depth limits, and minimum sample constraints.
9//! - **[`RandomForestClassifier`]** / **[`RandomForestRegressor`]** — Bootstrap-aggregated
10//! ensembles of decision trees with random feature subsets, built in parallel via `rayon`.
11//! - **[`GradientBoostingClassifier`]** / **[`GradientBoostingRegressor`]** — Gradient boosting
12//! ensembles that sequentially fit trees to the negative gradient of a loss function.
13//! - **[`HistGradientBoostingClassifier`]** / **[`HistGradientBoostingRegressor`]** —
14//! Histogram-based gradient boosting with O(n_bins) split finding, subtraction trick,
15//! native NaN support, and optional best-first (leaf-wise) growth.
16//! - **[`AdaBoostClassifier`]** — Adaptive Boosting using decision tree stumps with
17//! SAMME and SAMME.R algorithms.
18//! - **[`ExtraTreeClassifier`]** / **[`ExtraTreeRegressor`]** — Extremely randomized
19//! trees where split thresholds are chosen randomly rather than via exhaustive search.
20//! - **[`ExtraTreesClassifier`]** / **[`ExtraTreesRegressor`]** — Ensembles of
21//! extremely randomized trees with Rayon parallel fitting. No bootstrap by default.
22//! - **[`IsolationForest`]** — Anomaly detection via random isolation trees.
23//! - **[`VotingClassifier`]** / **[`VotingRegressor`]** — Ensembles of decision trees
24//! with varying hyperparameters, aggregated by majority vote or averaging.
25//! - **[`RandomTreesEmbedding`]** — Unsupervised feature transformation via one-hot
26//! encoded leaf indices across an ensemble of randomly built trees.
27//!
28//! # Design
29//!
30//! Each model follows the compile-time safety pattern:
31//!
32//! - The unfitted struct (e.g., `DecisionTreeClassifier<F>`) holds hyperparameters
33//! and implements [`Fit`](ferrolearn_core::Fit).
34//! - Calling `fit()` produces a new fitted type (e.g., `FittedDecisionTreeClassifier<F>`)
35//! that implements [`Predict`](ferrolearn_core::Predict).
36//! - Calling `predict()` on an unfitted model is a compile-time error.
37//!
38//! # Pipeline Integration
39//!
40//! All models implement [`PipelineEstimator`](ferrolearn_core::pipeline::PipelineEstimator)
41//! for `f64`, allowing them to be used as the final step in a
42//! [`Pipeline`](ferrolearn_core::pipeline::Pipeline).
43//!
44//! # Float Generics
45//!
46//! All models are generic over `F: num_traits::Float + Send + Sync + 'static`,
47//! supporting both `f32` and `f64`.
48
49pub mod adaboost;
50pub mod adaboost_regressor;
51pub mod bagging;
52pub mod decision_tree;
53pub mod extra_tree;
54pub mod extra_trees_ensemble;
55pub mod gradient_boosting;
56pub mod hist_gradient_boosting;
57pub mod isolation_forest;
58pub mod random_forest;
59pub mod random_trees_embedding;
60pub mod voting;
61
62// Re-export the main types at the crate root.
63pub use adaboost::{AdaBoostAlgorithm, AdaBoostClassifier, FittedAdaBoostClassifier};
64pub use adaboost_regressor::{AdaBoostLoss, AdaBoostRegressor, FittedAdaBoostRegressor};
65pub use bagging::{
66 BaggingClassifier, BaggingRegressor, FittedBaggingClassifier, FittedBaggingRegressor,
67};
68pub use decision_tree::{
69 ClassificationCriterion, DecisionTreeClassifier, DecisionTreeRegressor,
70 FittedDecisionTreeClassifier, FittedDecisionTreeRegressor, Node, RegressionCriterion,
71};
72pub use extra_tree::{
73 ExtraTreeClassifier, ExtraTreeRegressor, FittedExtraTreeClassifier, FittedExtraTreeRegressor,
74};
75pub use extra_trees_ensemble::{
76 ExtraTreesClassifier, ExtraTreesRegressor, FittedExtraTreesClassifier,
77 FittedExtraTreesRegressor,
78};
79pub use gradient_boosting::{
80 ClassificationLoss, FittedGradientBoostingClassifier, FittedGradientBoostingRegressor,
81 GradientBoostingClassifier, GradientBoostingRegressor, RegressionLoss,
82};
83pub use hist_gradient_boosting::{
84 FittedHistGradientBoostingClassifier, FittedHistGradientBoostingRegressor,
85 HistClassificationLoss, HistGradientBoostingClassifier, HistGradientBoostingRegressor,
86 HistNode, HistRegressionLoss,
87};
88pub use isolation_forest::{FittedIsolationForest, IsolationForest};
89pub use random_forest::{
90 FittedRandomForestClassifier, FittedRandomForestRegressor, MaxFeatures, RandomForestClassifier,
91 RandomForestRegressor,
92};
93pub use random_trees_embedding::{FittedRandomTreesEmbedding, RandomTreesEmbedding};
94pub use voting::{
95 FittedVotingClassifier, FittedVotingRegressor, VotingClassifier, VotingRegressor,
96};
97
98use ndarray::{Array1, Array2};
99use num_traits::Float;
100
101/// Element-wise natural log of a probability matrix, used as the body of
102/// every classifier `predict_log_proba` method in this crate. Clamps
103/// values below `1e-300` so `ln(0)` never produces `-inf` / `NaN`.
104pub(crate) fn log_proba<F: Float>(proba: &Array2<F>) -> Array2<F> {
105 let eps = F::from(1e-300).unwrap();
106 proba.mapv(|p| if p > eps { p.ln() } else { eps.ln() })
107}
108
109/// Mean accuracy: `(sum(predictions == targets)) / n`.
110///
111/// Used as the body of every classifier `score(&self, x, y)` method in
112/// this crate to mirror sklearn's `ClassifierMixin.score`.
113pub(crate) fn mean_accuracy<F: Float>(predictions: &Array1<usize>, targets: &Array1<usize>) -> F {
114 let n = targets.len();
115 if n == 0 {
116 return F::zero();
117 }
118 let correct = predictions
119 .iter()
120 .zip(targets.iter())
121 .filter(|(p, t)| p == t)
122 .count();
123 F::from(correct).unwrap() / F::from(n).unwrap()
124}
125
126/// R² coefficient of determination: `1 - SSres / SStot`.
127///
128/// Used as the body of every regressor `score(&self, x, y)` method in
129/// this crate to mirror sklearn's `RegressorMixin.score`. Constant-y
130/// returns `1.0` if predictions are also constant-perfect, else
131/// `F::neg_infinity()` to flag the genuine miss.
132pub(crate) fn r2_score<F: Float>(y_pred: &Array1<F>, y_true: &Array1<F>) -> F {
133 let n = y_true.len();
134 if n == 0 {
135 return F::zero();
136 }
137 let mean = y_true.iter().copied().fold(F::zero(), |a, b| a + b) / F::from(n).unwrap();
138 let mut ss_res = F::zero();
139 let mut ss_tot = F::zero();
140 for i in 0..n {
141 let r = y_true[i] - y_pred[i];
142 let t = y_true[i] - mean;
143 ss_res = ss_res + r * r;
144 ss_tot = ss_tot + t * t;
145 }
146 if ss_tot == F::zero() {
147 if ss_res == F::zero() {
148 F::one()
149 } else {
150 F::neg_infinity()
151 }
152 } else {
153 F::one() - ss_res / ss_tot
154 }
155}