gbrt_rs/tree/mod.rs
1//! Decision tree implementations for gradient boosting.
2//!
3//! This module provides the complete decision tree infrastructure for gradient boosting,
4//! including tree structures, split finding algorithms, split criteria, and recursive
5//! tree construction. Trees built by this module are specifically designed to fit
6//! **gradients and hessians** rather than raw target values, enabling second-order
7//! optimization.
8//!
9//! # Architecture
10//!
11//! The tree module follows a modular design with clear separation of concerns:
12//!
13//! - [`types`]: Core tree data structures (`Tree`, `TreeNode`)
14//! - [`decision_tree`]: High-level decision tree interface
15//! - [`node`]: Tree construction logic (`TreeBuilder`)
16//! - [`splitter`]: Split finding algorithms (`BestSplitter`)
17//! - [`criterion`]: Split quality evaluation (`MSECriterion`, `FriedmanMSECriterion`)
18//!
19//! # Key Components
20//!
21//! - [`DecisionTree`]: Main entry point for building trees in boosting
22//! - [`TreeBuilder`]: Recursive tree construction with pluggable components
23//! - [`BestSplitter`]: Exhaustive split search with histogram approximation
24//! - [`MSECriterion`]: Basic MSE split criterion with L2 regularization
25//! - [`FriedmanMSECriterion`]: Enhanced criterion with L1/L2 regularization (XGBoost-style)
26//!
27//! # Tree Structure
28//!
29//! Trees are immutable after construction and store comprehensive statistics
30//! at each node for diagnostics and feature importance calculation.
31
32
33mod types;
34mod decision_tree;
35mod node;
36mod splitter;
37mod criterion;
38
39/// Core tree data structures
40pub use types::{
41 // Complete decision tree with metadata and prediction methods.
42 Tree,
43 // Recursive enum representing internal splits or leaf nodes.
44 TreeNode
45};
46
47/// High-level tree interface
48pub use decision_tree::{
49 // User-facing decision tree builder for gradient boosting.
50 DecisionTree
51};
52
53/// Tree construction
54pub use node::{
55 // Flexible tree builder with pluggable splitters and criteria.
56 TreeBuilder
57};
58
59/// Split finding
60pub use splitter::{
61 // Trait for pluggable split-finding algorithms.
62 Splitter,
63 // Default exhaustive split finder with histogram approximation.
64 BestSplitter,
65 // Candidate split with precomputed statistics.
66 SplitCandidate,
67 // Errors from split finding operations.
68 SplitterError
69};
70
71/// Split criteria
72pub use criterion::{
73 // Trait for evaluating split quality (gain calculation).
74 SplitCriterion,
75 // Basic MSE criterion with L2 regularization.
76 MSECriterion,
77 // Enhanced MSE criterion with L1/L2 regularization and gamma pruning.
78 FriedmanMSECriterion,
79 // Factory function for creating criteria from string names.
80 create_criterion,
81 // Errors from criterion computation.
82 CriterionError
83};
84
85
86/// Errors that can occur during tree construction and manipulation.
87///
88/// This enum aggregates errors from all tree-related submodules, providing
89/// a unified error type for tree operations. It uses the `#[from]` attribute
90/// to enable automatic conversion from submodule-specific errors.
91///
92/// # Error Variants
93///
94/// - `SplitterError`: Failed to find optimal split (e.g., insufficient samples)
95/// - `CriterionError`: Error computing split gain or leaf values
96/// - `DataError`: Invalid training data structure
97/// - `FeatureMatrixError`: Problem accessing feature values
98/// - `BuildingError`: General tree construction failure
99/// - `ConfigError`: Invalid tree configuration
100///
101/// # Conversions
102///
103/// `TreeError` can be converted to `CoreError` for integration with the
104/// main boosting pipeline.
105#[derive(thiserror::Error, Debug)]
106pub enum TreeError {
107 // Split finding failed (e.g., no valid splits, insufficient samples).
108 #[error("Splitter error: {0}")]
109 SplitterError(#[from] SplitterError),
110
111 // Split criterion computation failed (e.g., numerical instability).
112 #[error("Criterion error: {0}")]
113 CriterionError(#[from] CriterionError),
114
115 // Training data is invalid or corrupted.
116 #[error("Data error: {0}")]
117 DataError(#[from] crate::data::DataError),
118
119 // Feature matrix access error (e.g., index out of bounds).
120 #[error("Feature matrix error: {0}")]
121 FeatureMatrixError(#[from] crate::data::FeatureMatrixError),
122
123 // General tree construction failure.
124 #[error("Tree building error: {0}")]
125 BuildingError(String),
126
127 // Invalid tree hyperparameters or configuration.
128 #[error("Invalid tree configuration: {0}")]
129 ConfigError(String),
130}
131
132/// Result type for tree operations.
133///
134/// This is a convenient type alias for `Result<T, TreeError>`.
135pub type TreeResult<T> = std::result::Result<T, TreeError>;
136
137/// Conversion from tree errors to core library errors.
138///
139/// This `From` implementation enables seamless error propagation from
140/// tree construction up to the main [`crate::core::CoreError`] type used
141/// by the gradient booster.
142///
143/// All tree errors are converted to `CoreError::TrainingError` with a
144/// descriptive message.
145impl From<TreeError> for crate::core::CoreError {
146 fn from(err: TreeError) -> Self {
147 crate::core::CoreError::TrainingError(err.to_string())
148 }
149}
150
151