gbrt_rs/tree/
mod.rs

1//! Decision tree implementations for gradient boosting.
2//!
3//! This module provides the complete decision tree infrastructure for gradient boosting,
4//! including tree structures, split finding algorithms, split criteria, and recursive
5//! tree construction. Trees built by this module are specifically designed to fit
6//! **gradients and hessians** rather than raw target values, enabling second-order
7//! optimization.
8//!
9//! # Architecture
10//!
11//! The tree module follows a modular design with clear separation of concerns:
12//!
13//! - [`types`]: Core tree data structures (`Tree`, `TreeNode`)
14//! - [`decision_tree`]: High-level decision tree interface
15//! - [`node`]: Tree construction logic (`TreeBuilder`)
16//! - [`splitter`]: Split finding algorithms (`BestSplitter`)
17//! - [`criterion`]: Split quality evaluation (`MSECriterion`, `FriedmanMSECriterion`)
18//!
19//! # Key Components
20//!
21//! - [`DecisionTree`]: Main entry point for building trees in boosting
22//! - [`TreeBuilder`]: Recursive tree construction with pluggable components
23//! - [`BestSplitter`]: Exhaustive split search with histogram approximation
24//! - [`MSECriterion`]: Basic MSE split criterion with L2 regularization
25//! - [`FriedmanMSECriterion`]: Enhanced criterion with L1/L2 regularization (XGBoost-style)
26//!
27//! # Tree Structure
28//!
29//! Trees are immutable after construction and store comprehensive statistics
30//! at each node for diagnostics and feature importance calculation.
31
32
33mod types;
34mod decision_tree;
35mod node;
36mod splitter;
37mod criterion;
38
39/// Core tree data structures
40pub use types::{
41    // Complete decision tree with metadata and prediction methods.
42    Tree, 
43    // Recursive enum representing internal splits or leaf nodes.
44    TreeNode
45};
46
47/// High-level tree interface
48pub use decision_tree::{
49    // User-facing decision tree builder for gradient boosting.
50    DecisionTree
51};
52
53/// Tree construction
54pub use node::{
55    // Flexible tree builder with pluggable splitters and criteria.
56    TreeBuilder
57};
58
59/// Split finding
60pub use splitter::{
61    // Trait for pluggable split-finding algorithms.
62    Splitter, 
63    // Default exhaustive split finder with histogram approximation.
64    BestSplitter, 
65    // Candidate split with precomputed statistics.
66    SplitCandidate, 
67    // Errors from split finding operations.
68    SplitterError
69};
70
71/// Split criteria
72pub use criterion::{
73    // Trait for evaluating split quality (gain calculation).
74    SplitCriterion, 
75    // Basic MSE criterion with L2 regularization.
76    MSECriterion, 
77    // Enhanced MSE criterion with L1/L2 regularization and gamma pruning.
78    FriedmanMSECriterion, 
79    // Factory function for creating criteria from string names.
80    create_criterion, 
81    // Errors from criterion computation.
82    CriterionError
83};
84
85
86/// Errors that can occur during tree construction and manipulation.
87///
88/// This enum aggregates errors from all tree-related submodules, providing
89/// a unified error type for tree operations. It uses the `#[from]` attribute
90/// to enable automatic conversion from submodule-specific errors.
91///
92/// # Error Variants
93///
94/// - `SplitterError`: Failed to find optimal split (e.g., insufficient samples)
95/// - `CriterionError`: Error computing split gain or leaf values
96/// - `DataError`: Invalid training data structure
97/// - `FeatureMatrixError`: Problem accessing feature values
98/// - `BuildingError`: General tree construction failure
99/// - `ConfigError`: Invalid tree configuration
100///
101/// # Conversions
102///
103/// `TreeError` can be converted to `CoreError` for integration with the
104/// main boosting pipeline.
105#[derive(thiserror::Error, Debug)]
106pub enum TreeError {
107    // Split finding failed (e.g., no valid splits, insufficient samples).
108    #[error("Splitter error: {0}")]
109    SplitterError(#[from] SplitterError),
110    
111    // Split criterion computation failed (e.g., numerical instability).
112    #[error("Criterion error: {0}")]
113    CriterionError(#[from] CriterionError),
114    
115    // Training data is invalid or corrupted.
116    #[error("Data error: {0}")]
117    DataError(#[from] crate::data::DataError),
118    
119    // Feature matrix access error (e.g., index out of bounds).
120    #[error("Feature matrix error: {0}")]
121    FeatureMatrixError(#[from] crate::data::FeatureMatrixError),
122
123    // General tree construction failure.
124    #[error("Tree building error: {0}")]
125    BuildingError(String),
126    
127    // Invalid tree hyperparameters or configuration.
128    #[error("Invalid tree configuration: {0}")]
129    ConfigError(String),
130}
131
132/// Result type for tree operations.
133///
134/// This is a convenient type alias for `Result<T, TreeError>`.
135pub type TreeResult<T> = std::result::Result<T, TreeError>;
136
137/// Conversion from tree errors to core library errors.
138///
139/// This `From` implementation enables seamless error propagation from
140/// tree construction up to the main [`crate::core::CoreError`] type used
141/// by the gradient booster.
142///
143/// All tree errors are converted to `CoreError::TrainingError` with a
144/// descriptive message.
145impl From<TreeError> for crate::core::CoreError {
146    fn from(err: TreeError) -> Self {
147        crate::core::CoreError::TrainingError(err.to_string())
148    }
149}
150
151