Skip to main content

irithyll_core/tree/
builder.rs

1//! Incremental tree construction with histogram-based splitting.
2//!
3//! [`TreeConfig`] defines all hyperparameters for a single streaming decision tree:
4//! depth limits, regularization, binning granularity, and the Hoeffding bound
5//! confidence parameter that controls when splits are committed.
6
7use alloc::vec::Vec;
8
9use crate::feature::FeatureType;
10use crate::tree::leaf_model::LeafModelType;
11
12/// Configuration for a single streaming decision tree.
13///
14/// These parameters control tree growth, regularization, and the statistical
15/// confidence required before committing a split decision.
16///
17/// # Defaults
18///
19/// | Parameter                | Default |
20/// |--------------------------|---------|
21/// | `max_depth`              | 6       |
22/// | `n_bins`                 | 64      |
23/// | `lambda`                 | 1.0     |
24/// | `gamma`                  | 0.0     |
25/// | `grace_period`           | 200     |
26/// | `delta`                  | 1e-7    |
27/// | `feature_subsample_rate` | 1.0     |
28#[derive(Debug, Clone)]
29pub struct TreeConfig {
30    /// Maximum tree depth. Deeper trees capture more interactions but risk
31    /// overfitting on streaming data. Default: 6.
32    pub max_depth: usize,
33
34    /// Number of histogram bins per feature. More bins give finer split
35    /// resolution at the cost of memory and slower convergence. Default: 64.
36    pub n_bins: usize,
37
38    /// L2 regularization parameter (lambda). Penalizes large leaf weights,
39    /// helping prevent overfitting. Appears in the denominator of the
40    /// leaf weight formula: w = -G / (H + lambda). Default: 1.0.
41    pub lambda: f64,
42
43    /// Minimum split gain threshold (gamma). A candidate split must achieve
44    /// gain > gamma to be accepted. Higher values produce more conservative
45    /// trees. Default: 0.0.
46    pub gamma: f64,
47
48    /// Minimum number of samples a leaf must accumulate before evaluating
49    /// potential splits. Also controls when bin edges are computed from
50    /// observed feature values. Default: 200.
51    pub grace_period: usize,
52
53    /// Hoeffding bound confidence parameter (delta). Smaller values require
54    /// more statistical evidence before committing a split, producing more
55    /// conservative but more reliable trees. The bound guarantees that the
56    /// chosen split is within epsilon of optimal with probability 1-delta.
57    /// Default: 1e-7.
58    pub delta: f64,
59
60    /// Fraction of features to consider at each split evaluation. 1.0 means
61    /// all features are evaluated; smaller values introduce randomness
62    /// (similar to random forest feature bagging). Default: 1.0.
63    pub feature_subsample_rate: f64,
64
65    /// Random seed for feature subsampling. Default: 42.
66    ///
67    /// Set by the ensemble orchestrator to ensure deterministic, diverse
68    /// behavior across trees (typically `config.seed ^ step_index`).
69    pub seed: u64,
70
71    /// Per-sample decay factor for leaf statistics.
72    ///
73    /// Computed from `leaf_half_life` as `exp(-ln(2) / half_life)`.
74    /// When `Some(alpha)`, leaf gradient/hessian sums and histogram bins
75    /// are decayed by `alpha` before each new accumulation.
76    /// `None` (default) means no decay.
77    pub leaf_decay_alpha: Option<f64>,
78
79    /// Re-evaluation interval for max-depth leaves.
80    ///
81    /// When `Some(n)`, leaves at max depth will re-evaluate potential splits
82    /// every `n` samples, allowing the tree to adapt its structure over time.
83    /// `None` (default) disables re-evaluation.
84    pub split_reeval_interval: Option<usize>,
85
86    /// Per-feature type declarations (continuous vs categorical).
87    ///
88    /// When `Some`, categorical features use one-bin-per-category binning and
89    /// Fisher optimal binary partitioning. `None` (default) treats all features
90    /// as continuous.
91    pub feature_types: Option<Vec<FeatureType>>,
92
93    /// Per-leaf gradient clipping threshold in standard deviations.
94    ///
95    /// When `Some(sigma)`, leaf-level EWMA gradient statistics are tracked and
96    /// incoming gradients are clamped to `mean ± sigma * std_dev`.
97    /// `None` (default) disables clipping.
98    pub gradient_clip_sigma: Option<f64>,
99
100    /// Per-feature monotonic constraints: +1 = increasing, -1 = decreasing, 0 = free.
101    ///
102    /// Candidate splits violating monotonicity are rejected.
103    /// `None` (default) means no constraints.
104    pub monotone_constraints: Option<Vec<i8>>,
105
106    /// Maximum absolute leaf output value.
107    ///
108    /// When `Some(max)`, leaf predictions are clamped to `[-max, max]`.
109    /// Prevents runaway leaf weights from causing prediction explosions
110    /// in feedback loops. `None` (default) means no clamping.
111    pub max_leaf_output: Option<f64>,
112
113    /// Per-leaf adaptive output bound (sigma multiplier).
114    ///
115    /// When `Some(k)`, each leaf tracks EWMA of its own output weight and
116    /// clamps predictions to `|output_mean| + k * output_std`.
117    /// `None` (default) disables adaptive bounds.
118    pub adaptive_leaf_bound: Option<f64>,
119
120    /// Minimum hessian sum before a leaf produces non-zero output.
121    ///
122    /// When `Some(min_h)`, leaves with `hess_sum < min_h` return 0.0.
123    /// Prevents post-replacement spikes from fresh leaves with insufficient
124    /// samples. `None` (default) means all leaves contribute immediately.
125    pub min_hessian_sum: Option<f64>,
126
127    /// Leaf prediction model type.
128    ///
129    /// Controls how each leaf computes its prediction:
130    /// - [`ClosedForm`](LeafModelType::ClosedForm) (default): constant leaf weight
131    ///   `w = -G / (H + lambda)`.
132    /// - [`Linear`](LeafModelType::Linear): online ridge regression with AdaGrad
133    ///   optimization, learning a local `w . x + b` surface. Optional `decay` for
134    ///   concept drift. Recommended for low-depth trees (depth 2--4).
135    /// - [`MLP`](LeafModelType::MLP): single hidden layer neural network per leaf.
136    ///   Optional `decay` for concept drift.
137    /// - [`Adaptive`](LeafModelType::Adaptive): starts as closed-form, auto-promotes
138    ///   to a more complex model when the Hoeffding bound (using [`delta`](Self::delta))
139    ///   confirms it is statistically superior. No arbitrary thresholds.
140    pub leaf_model_type: LeafModelType,
141}
142
143impl Default for TreeConfig {
144    fn default() -> Self {
145        Self {
146            max_depth: 6,
147            n_bins: 64,
148            lambda: 1.0,
149            gamma: 0.0,
150            grace_period: 200,
151            delta: 1e-7,
152            feature_subsample_rate: 1.0,
153            seed: 42,
154            leaf_decay_alpha: None,
155            split_reeval_interval: None,
156            feature_types: None,
157            gradient_clip_sigma: None,
158            monotone_constraints: None,
159            max_leaf_output: None,
160            adaptive_leaf_bound: None,
161            min_hessian_sum: None,
162            leaf_model_type: LeafModelType::default(),
163        }
164    }
165}
166
167impl TreeConfig {
168    /// Create a new `TreeConfig` with default parameters.
169    ///
170    /// Equivalent to `TreeConfig::default()`, but provided as a named
171    /// constructor for clarity in builder chains.
172    pub fn new() -> Self {
173        Self::default()
174    }
175
176    /// Set the maximum tree depth.
177    #[inline]
178    pub fn max_depth(mut self, max_depth: usize) -> Self {
179        self.max_depth = max_depth;
180        self
181    }
182
183    /// Set the number of histogram bins per feature.
184    #[inline]
185    pub fn n_bins(mut self, n_bins: usize) -> Self {
186        self.n_bins = n_bins;
187        self
188    }
189
190    /// Set the L2 regularization parameter (lambda).
191    #[inline]
192    pub fn lambda(mut self, lambda: f64) -> Self {
193        self.lambda = lambda;
194        self
195    }
196
197    /// Set the minimum split gain threshold (gamma).
198    #[inline]
199    pub fn gamma(mut self, gamma: f64) -> Self {
200        self.gamma = gamma;
201        self
202    }
203
204    /// Set the grace period (minimum samples before evaluating splits).
205    #[inline]
206    pub fn grace_period(mut self, grace_period: usize) -> Self {
207        self.grace_period = grace_period;
208        self
209    }
210
211    /// Set the Hoeffding bound confidence parameter (delta).
212    #[inline]
213    pub fn delta(mut self, delta: f64) -> Self {
214        self.delta = delta;
215        self
216    }
217
218    /// Set the feature subsample rate.
219    #[inline]
220    pub fn feature_subsample_rate(mut self, rate: f64) -> Self {
221        self.feature_subsample_rate = rate.clamp(0.0, 1.0);
222        self
223    }
224
225    /// Set the random seed for feature subsampling.
226    #[inline]
227    pub fn seed(mut self, seed: u64) -> Self {
228        self.seed = seed;
229        self
230    }
231
232    /// Set the per-sample decay factor for leaf statistics.
233    #[inline]
234    pub fn leaf_decay_alpha(mut self, alpha: f64) -> Self {
235        self.leaf_decay_alpha = Some(alpha);
236        self
237    }
238
239    /// Optionally set the per-sample decay factor for leaf statistics.
240    #[inline]
241    pub fn leaf_decay_alpha_opt(mut self, alpha: Option<f64>) -> Self {
242        self.leaf_decay_alpha = alpha;
243        self
244    }
245
246    /// Set the re-evaluation interval for max-depth leaves.
247    #[inline]
248    pub fn split_reeval_interval(mut self, interval: usize) -> Self {
249        self.split_reeval_interval = Some(interval);
250        self
251    }
252
253    /// Optionally set the re-evaluation interval for max-depth leaves.
254    #[inline]
255    pub fn split_reeval_interval_opt(mut self, interval: Option<usize>) -> Self {
256        self.split_reeval_interval = interval;
257        self
258    }
259
260    /// Set the per-feature type declarations.
261    #[inline]
262    pub fn feature_types(mut self, types: Vec<FeatureType>) -> Self {
263        self.feature_types = Some(types);
264        self
265    }
266
267    /// Optionally set the per-feature type declarations.
268    #[inline]
269    pub fn feature_types_opt(mut self, types: Option<Vec<FeatureType>>) -> Self {
270        self.feature_types = types;
271        self
272    }
273
274    /// Set the gradient clipping threshold in standard deviations.
275    #[inline]
276    pub fn gradient_clip_sigma(mut self, sigma: f64) -> Self {
277        self.gradient_clip_sigma = Some(sigma);
278        self
279    }
280
281    /// Optionally set the gradient clipping threshold.
282    #[inline]
283    pub fn gradient_clip_sigma_opt(mut self, sigma: Option<f64>) -> Self {
284        self.gradient_clip_sigma = sigma;
285        self
286    }
287
288    /// Set per-feature monotonic constraints.
289    #[inline]
290    pub fn monotone_constraints(mut self, constraints: Vec<i8>) -> Self {
291        self.monotone_constraints = Some(constraints);
292        self
293    }
294
295    /// Optionally set per-feature monotonic constraints.
296    #[inline]
297    pub fn monotone_constraints_opt(mut self, constraints: Option<Vec<i8>>) -> Self {
298        self.monotone_constraints = constraints;
299        self
300    }
301
302    /// Set the maximum absolute leaf output value.
303    #[inline]
304    pub fn max_leaf_output(mut self, max: f64) -> Self {
305        self.max_leaf_output = Some(max);
306        self
307    }
308
309    /// Optionally set the maximum absolute leaf output value.
310    #[inline]
311    pub fn max_leaf_output_opt(mut self, max: Option<f64>) -> Self {
312        self.max_leaf_output = max;
313        self
314    }
315
316    /// Optionally set per-leaf adaptive output bound.
317    #[inline]
318    pub fn adaptive_leaf_bound_opt(mut self, k: Option<f64>) -> Self {
319        self.adaptive_leaf_bound = k;
320        self
321    }
322
323    /// Set the minimum hessian sum for leaf output.
324    #[inline]
325    pub fn min_hessian_sum(mut self, min_h: f64) -> Self {
326        self.min_hessian_sum = Some(min_h);
327        self
328    }
329
330    /// Optionally set the minimum hessian sum for leaf output.
331    #[inline]
332    pub fn min_hessian_sum_opt(mut self, min_h: Option<f64>) -> Self {
333        self.min_hessian_sum = min_h;
334        self
335    }
336
337    /// Set the leaf prediction model type.
338    ///
339    /// [`LeafModelType::Linear`] is recommended for low-depth configurations
340    /// (depth 2--4) where per-leaf linear models significantly reduce
341    /// approximation error compared to constant leaves.
342    ///
343    /// [`LeafModelType::Adaptive`] automatically selects between closed-form and
344    /// a trainable model per leaf, using the Hoeffding bound for promotion.
345    #[inline]
346    pub fn leaf_model_type(mut self, lmt: LeafModelType) -> Self {
347        self.leaf_model_type = lmt;
348        self
349    }
350}
351
352#[cfg(test)]
353mod tests {
354    use super::*;
355
356    #[test]
357    fn default_values() {
358        let cfg = TreeConfig::default();
359        assert_eq!(cfg.max_depth, 6);
360        assert_eq!(cfg.n_bins, 64);
361        assert!((cfg.lambda - 1.0).abs() < f64::EPSILON);
362        assert!((cfg.gamma - 0.0).abs() < f64::EPSILON);
363        assert_eq!(cfg.grace_period, 200);
364        assert!((cfg.delta - 1e-7).abs() < f64::EPSILON);
365        assert!((cfg.feature_subsample_rate - 1.0).abs() < f64::EPSILON);
366    }
367
368    #[test]
369    fn new_equals_default() {
370        let a = TreeConfig::new();
371        let b = TreeConfig::default();
372        assert_eq!(a.max_depth, b.max_depth);
373        assert_eq!(a.n_bins, b.n_bins);
374        assert!((a.lambda - b.lambda).abs() < f64::EPSILON);
375        assert!((a.gamma - b.gamma).abs() < f64::EPSILON);
376        assert_eq!(a.grace_period, b.grace_period);
377        assert!((a.delta - b.delta).abs() < f64::EPSILON);
378        assert!((a.feature_subsample_rate - b.feature_subsample_rate).abs() < f64::EPSILON);
379    }
380
381    #[test]
382    fn builder_chain() {
383        let cfg = TreeConfig::new()
384            .max_depth(10)
385            .n_bins(128)
386            .lambda(0.5)
387            .gamma(0.1)
388            .grace_period(500)
389            .delta(1e-3)
390            .feature_subsample_rate(0.8);
391
392        assert_eq!(cfg.max_depth, 10);
393        assert_eq!(cfg.n_bins, 128);
394        assert!((cfg.lambda - 0.5).abs() < f64::EPSILON);
395        assert!((cfg.gamma - 0.1).abs() < f64::EPSILON);
396        assert_eq!(cfg.grace_period, 500);
397        assert!((cfg.delta - 1e-3).abs() < f64::EPSILON);
398        assert!((cfg.feature_subsample_rate - 0.8).abs() < f64::EPSILON);
399    }
400
401    #[test]
402    fn feature_subsample_rate_clamped() {
403        let cfg = TreeConfig::new().feature_subsample_rate(1.5);
404        assert!((cfg.feature_subsample_rate - 1.0).abs() < f64::EPSILON);
405
406        let cfg = TreeConfig::new().feature_subsample_rate(-0.3);
407        assert!((cfg.feature_subsample_rate - 0.0).abs() < f64::EPSILON);
408    }
409
410    #[test]
411    fn max_leaf_output_builder() {
412        let cfg = TreeConfig::new().max_leaf_output(1.5);
413        assert_eq!(cfg.max_leaf_output, Some(1.5));
414    }
415
416    #[test]
417    fn min_hessian_sum_builder() {
418        let cfg = TreeConfig::new().min_hessian_sum(10.0);
419        assert_eq!(cfg.min_hessian_sum, Some(10.0));
420    }
421
422    #[test]
423    fn max_leaf_output_default_none() {
424        let cfg = TreeConfig::default();
425        assert!(cfg.max_leaf_output.is_none());
426        assert!(cfg.min_hessian_sum.is_none());
427    }
428}