irithyll_core/tree/builder.rs
1//! Incremental tree construction with histogram-based splitting.
2//!
3//! [`TreeConfig`] defines all hyperparameters for a single streaming decision tree:
4//! depth limits, regularization, binning granularity, and the Hoeffding bound
5//! confidence parameter that controls when splits are committed.
6
7use alloc::vec::Vec;
8
9use crate::feature::FeatureType;
10use crate::tree::leaf_model::LeafModelType;
11
12/// Configuration for a single streaming decision tree.
13///
14/// These parameters control tree growth, regularization, and the statistical
15/// confidence required before committing a split decision.
16///
17/// # Defaults
18///
19/// | Parameter | Default |
20/// |--------------------------|---------|
21/// | `max_depth` | 6 |
22/// | `n_bins` | 64 |
23/// | `lambda` | 1.0 |
24/// | `gamma` | 0.0 |
25/// | `grace_period` | 200 |
26/// | `delta` | 1e-7 |
27/// | `feature_subsample_rate` | 1.0 |
28#[derive(Debug, Clone)]
29pub struct TreeConfig {
30 /// Maximum tree depth. Deeper trees capture more interactions but risk
31 /// overfitting on streaming data. Default: 6.
32 pub max_depth: usize,
33
34 /// Number of histogram bins per feature. More bins give finer split
35 /// resolution at the cost of memory and slower convergence. Default: 64.
36 pub n_bins: usize,
37
38 /// L2 regularization parameter (lambda). Penalizes large leaf weights,
39 /// helping prevent overfitting. Appears in the denominator of the
40 /// leaf weight formula: w = -G / (H + lambda). Default: 1.0.
41 pub lambda: f64,
42
43 /// Minimum split gain threshold (gamma). A candidate split must achieve
44 /// gain > gamma to be accepted. Higher values produce more conservative
45 /// trees. Default: 0.0.
46 pub gamma: f64,
47
48 /// Minimum number of samples a leaf must accumulate before evaluating
49 /// potential splits. Also controls when bin edges are computed from
50 /// observed feature values. Default: 200.
51 pub grace_period: usize,
52
53 /// Hoeffding bound confidence parameter (delta). Smaller values require
54 /// more statistical evidence before committing a split, producing more
55 /// conservative but more reliable trees. The bound guarantees that the
56 /// chosen split is within epsilon of optimal with probability 1-delta.
57 /// Default: 1e-7.
58 pub delta: f64,
59
60 /// Fraction of features to consider at each split evaluation. 1.0 means
61 /// all features are evaluated; smaller values introduce randomness
62 /// (similar to random forest feature bagging). Default: 1.0.
63 pub feature_subsample_rate: f64,
64
65 /// Random seed for feature subsampling. Default: 42.
66 ///
67 /// Set by the ensemble orchestrator to ensure deterministic, diverse
68 /// behavior across trees (typically `config.seed ^ step_index`).
69 pub seed: u64,
70
71 /// Per-sample decay factor for leaf statistics.
72 ///
73 /// Computed from `leaf_half_life` as `exp(-ln(2) / half_life)`.
74 /// When `Some(alpha)`, leaf gradient/hessian sums and histogram bins
75 /// are decayed by `alpha` before each new accumulation.
76 /// `None` (default) means no decay.
77 pub leaf_decay_alpha: Option<f64>,
78
79 /// Re-evaluation interval for max-depth leaves.
80 ///
81 /// When `Some(n)`, leaves at max depth will re-evaluate potential splits
82 /// every `n` samples, allowing the tree to adapt its structure over time.
83 /// `None` (default) disables re-evaluation.
84 pub split_reeval_interval: Option<usize>,
85
86 /// Per-feature type declarations (continuous vs categorical).
87 ///
88 /// When `Some`, categorical features use one-bin-per-category binning and
89 /// Fisher optimal binary partitioning. `None` (default) treats all features
90 /// as continuous.
91 pub feature_types: Option<Vec<FeatureType>>,
92
93 /// Per-leaf gradient clipping threshold in standard deviations.
94 ///
95 /// When `Some(sigma)`, leaf-level EWMA gradient statistics are tracked and
96 /// incoming gradients are clamped to `mean ± sigma * std_dev`.
97 /// `None` (default) disables clipping.
98 pub gradient_clip_sigma: Option<f64>,
99
100 /// Per-feature monotonic constraints: +1 = increasing, -1 = decreasing, 0 = free.
101 ///
102 /// Candidate splits violating monotonicity are rejected.
103 /// `None` (default) means no constraints.
104 pub monotone_constraints: Option<Vec<i8>>,
105
106 /// Maximum absolute leaf output value.
107 ///
108 /// When `Some(max)`, leaf predictions are clamped to `[-max, max]`.
109 /// Prevents runaway leaf weights from causing prediction explosions
110 /// in feedback loops. `None` (default) means no clamping.
111 pub max_leaf_output: Option<f64>,
112
113 /// Per-leaf adaptive output bound (sigma multiplier).
114 ///
115 /// When `Some(k)`, each leaf tracks EWMA of its own output weight and
116 /// clamps predictions to `|output_mean| + k * output_std`.
117 /// `None` (default) disables adaptive bounds.
118 pub adaptive_leaf_bound: Option<f64>,
119
120 /// Per-split information criterion (Lunde-Kleppe-Skaug 2020).
121 ///
122 /// When `Some(cir_factor)`, replaces `max_depth` with a per-split
123 /// generalization test. Each candidate split must have
124 /// `gain > cir_factor * sigma^2_g / n * n_features`.
125 /// `max_depth * 2` becomes a hard safety ceiling.
126 ///
127 /// Typical: 7.5 (<=10 features), 9.0 (<=50), 11.0 (<=200).
128 /// `None` (default) uses static `max_depth` only.
129 pub adaptive_depth: Option<f64>,
130
131 /// Minimum hessian sum before a leaf produces non-zero output.
132 ///
133 /// When `Some(min_h)`, leaves with `hess_sum < min_h` return 0.0.
134 /// Prevents post-replacement spikes from fresh leaves with insufficient
135 /// samples. `None` (default) means all leaves contribute immediately.
136 pub min_hessian_sum: Option<f64>,
137
138 /// Leaf prediction model type.
139 ///
140 /// Controls how each leaf computes its prediction:
141 /// - [`ClosedForm`](LeafModelType::ClosedForm) (default): constant leaf weight
142 /// `w = -G / (H + lambda)`.
143 /// - [`Linear`](LeafModelType::Linear): online ridge regression with AdaGrad
144 /// optimization, learning a local `w . x + b` surface. Optional `decay` for
145 /// concept drift. Recommended for low-depth trees (depth 2--4).
146 /// - [`MLP`](LeafModelType::MLP): single hidden layer neural network per leaf.
147 /// Optional `decay` for concept drift.
148 /// - [`Adaptive`](LeafModelType::Adaptive): starts as closed-form, auto-promotes
149 /// to a more complex model when the Hoeffding bound (using [`delta`](Self::delta))
150 /// confirms it is statistically superior. No arbitrary thresholds.
151 pub leaf_model_type: LeafModelType,
152
153 /// Hoeffding bound range parameter (R).
154 ///
155 /// The Hoeffding bound is `epsilon = sqrt(R^2 * ln(1/delta) / (2n))`.
156 /// R is an upper bound on the range of the gain function. The default
157 /// R=1.0 is conservative but over-estimates when targets have low
158 /// variance. Set to `sqrt(target_variance)` for data-proportional bounds.
159 ///
160 /// `None` (default) uses R=1.0.
161 pub hoeffding_r: Option<f64>,
162}
163
164impl Default for TreeConfig {
165 fn default() -> Self {
166 Self {
167 max_depth: 6,
168 n_bins: 64,
169 lambda: 1.0,
170 gamma: 0.0,
171 grace_period: 200,
172 delta: 1e-7,
173 feature_subsample_rate: 1.0,
174 seed: 42,
175 leaf_decay_alpha: None,
176 split_reeval_interval: None,
177 feature_types: None,
178 gradient_clip_sigma: None,
179 monotone_constraints: None,
180 max_leaf_output: None,
181 adaptive_leaf_bound: None,
182 adaptive_depth: None,
183 min_hessian_sum: None,
184 leaf_model_type: LeafModelType::default(),
185 hoeffding_r: None,
186 }
187 }
188}
189
190impl TreeConfig {
191 /// Create a new `TreeConfig` with default parameters.
192 ///
193 /// Equivalent to `TreeConfig::default()`, but provided as a named
194 /// constructor for clarity in builder chains.
195 pub fn new() -> Self {
196 Self::default()
197 }
198
199 /// Set the maximum tree depth.
200 #[inline]
201 pub fn max_depth(mut self, max_depth: usize) -> Self {
202 self.max_depth = max_depth;
203 self
204 }
205
206 /// Set the number of histogram bins per feature.
207 #[inline]
208 pub fn n_bins(mut self, n_bins: usize) -> Self {
209 self.n_bins = n_bins;
210 self
211 }
212
213 /// Set the L2 regularization parameter (lambda).
214 #[inline]
215 pub fn lambda(mut self, lambda: f64) -> Self {
216 self.lambda = lambda;
217 self
218 }
219
220 /// Set the minimum split gain threshold (gamma).
221 #[inline]
222 pub fn gamma(mut self, gamma: f64) -> Self {
223 self.gamma = gamma;
224 self
225 }
226
227 /// Set the grace period (minimum samples before evaluating splits).
228 #[inline]
229 pub fn grace_period(mut self, grace_period: usize) -> Self {
230 self.grace_period = grace_period;
231 self
232 }
233
234 /// Set the Hoeffding bound confidence parameter (delta).
235 #[inline]
236 pub fn delta(mut self, delta: f64) -> Self {
237 self.delta = delta;
238 self
239 }
240
241 /// Set the feature subsample rate.
242 #[inline]
243 pub fn feature_subsample_rate(mut self, rate: f64) -> Self {
244 self.feature_subsample_rate = rate.clamp(0.0, 1.0);
245 self
246 }
247
248 /// Set the random seed for feature subsampling.
249 #[inline]
250 pub fn seed(mut self, seed: u64) -> Self {
251 self.seed = seed;
252 self
253 }
254
255 /// Set the per-sample decay factor for leaf statistics.
256 #[inline]
257 pub fn leaf_decay_alpha(mut self, alpha: f64) -> Self {
258 self.leaf_decay_alpha = Some(alpha);
259 self
260 }
261
262 /// Optionally set the per-sample decay factor for leaf statistics.
263 #[inline]
264 pub fn leaf_decay_alpha_opt(mut self, alpha: Option<f64>) -> Self {
265 self.leaf_decay_alpha = alpha;
266 self
267 }
268
269 /// Set the re-evaluation interval for max-depth leaves.
270 #[inline]
271 pub fn split_reeval_interval(mut self, interval: usize) -> Self {
272 self.split_reeval_interval = Some(interval);
273 self
274 }
275
276 /// Optionally set the re-evaluation interval for max-depth leaves.
277 #[inline]
278 pub fn split_reeval_interval_opt(mut self, interval: Option<usize>) -> Self {
279 self.split_reeval_interval = interval;
280 self
281 }
282
283 /// Set the per-feature type declarations.
284 #[inline]
285 pub fn feature_types(mut self, types: Vec<FeatureType>) -> Self {
286 self.feature_types = Some(types);
287 self
288 }
289
290 /// Optionally set the per-feature type declarations.
291 #[inline]
292 pub fn feature_types_opt(mut self, types: Option<Vec<FeatureType>>) -> Self {
293 self.feature_types = types;
294 self
295 }
296
297 /// Set the gradient clipping threshold in standard deviations.
298 #[inline]
299 pub fn gradient_clip_sigma(mut self, sigma: f64) -> Self {
300 self.gradient_clip_sigma = Some(sigma);
301 self
302 }
303
304 /// Optionally set the gradient clipping threshold.
305 #[inline]
306 pub fn gradient_clip_sigma_opt(mut self, sigma: Option<f64>) -> Self {
307 self.gradient_clip_sigma = sigma;
308 self
309 }
310
311 /// Set per-feature monotonic constraints.
312 #[inline]
313 pub fn monotone_constraints(mut self, constraints: Vec<i8>) -> Self {
314 self.monotone_constraints = Some(constraints);
315 self
316 }
317
318 /// Optionally set per-feature monotonic constraints.
319 #[inline]
320 pub fn monotone_constraints_opt(mut self, constraints: Option<Vec<i8>>) -> Self {
321 self.monotone_constraints = constraints;
322 self
323 }
324
325 /// Set the maximum absolute leaf output value.
326 #[inline]
327 pub fn max_leaf_output(mut self, max: f64) -> Self {
328 self.max_leaf_output = Some(max);
329 self
330 }
331
332 /// Optionally set the maximum absolute leaf output value.
333 #[inline]
334 pub fn max_leaf_output_opt(mut self, max: Option<f64>) -> Self {
335 self.max_leaf_output = max;
336 self
337 }
338
339 /// Optionally set per-leaf adaptive output bound.
340 #[inline]
341 pub fn adaptive_leaf_bound_opt(mut self, k: Option<f64>) -> Self {
342 self.adaptive_leaf_bound = k;
343 self
344 }
345
346 /// Set the per-split information criterion factor (Lunde-Kleppe-Skaug 2020).
347 ///
348 /// When enabled, each candidate split must pass a generalization test
349 /// before being committed. `max_depth * 2` becomes a hard safety ceiling.
350 /// Typical: 7.5 (<=10 features), 9.0 (<=50), 11.0 (<=200).
351 #[inline]
352 pub fn adaptive_depth(mut self, factor: f64) -> Self {
353 self.adaptive_depth = Some(factor);
354 self
355 }
356
357 /// Optionally set the per-split information criterion factor.
358 #[inline]
359 pub fn adaptive_depth_opt(mut self, factor: Option<f64>) -> Self {
360 self.adaptive_depth = factor;
361 self
362 }
363
364 /// Set the minimum hessian sum for leaf output.
365 #[inline]
366 pub fn min_hessian_sum(mut self, min_h: f64) -> Self {
367 self.min_hessian_sum = Some(min_h);
368 self
369 }
370
371 /// Optionally set the minimum hessian sum for leaf output.
372 #[inline]
373 pub fn min_hessian_sum_opt(mut self, min_h: Option<f64>) -> Self {
374 self.min_hessian_sum = min_h;
375 self
376 }
377
378 /// Set the leaf prediction model type.
379 ///
380 /// [`LeafModelType::Linear`] is recommended for low-depth configurations
381 /// (depth 2--4) where per-leaf linear models significantly reduce
382 /// approximation error compared to constant leaves.
383 ///
384 /// [`LeafModelType::Adaptive`] automatically selects between closed-form and
385 /// a trainable model per leaf, using the Hoeffding bound for promotion.
386 #[inline]
387 pub fn leaf_model_type(mut self, lmt: LeafModelType) -> Self {
388 self.leaf_model_type = lmt;
389 self
390 }
391
392 /// Set the Hoeffding bound range parameter (R).
393 ///
394 /// Controls `epsilon = sqrt(R^2 * ln(1/delta) / (2n))`. Set to
395 /// `sqrt(target_variance)` for data-proportional split thresholds.
396 #[inline]
397 pub fn hoeffding_r(mut self, r: f64) -> Self {
398 self.hoeffding_r = Some(r);
399 self
400 }
401
402 /// Optionally set the Hoeffding bound range parameter.
403 #[inline]
404 pub fn hoeffding_r_opt(mut self, r: Option<f64>) -> Self {
405 self.hoeffding_r = r;
406 self
407 }
408}
409
410#[cfg(test)]
411mod tests {
412 use super::*;
413
414 #[test]
415 fn default_values() {
416 let cfg = TreeConfig::default();
417 assert_eq!(cfg.max_depth, 6);
418 assert_eq!(cfg.n_bins, 64);
419 assert!((cfg.lambda - 1.0).abs() < f64::EPSILON);
420 assert!((cfg.gamma - 0.0).abs() < f64::EPSILON);
421 assert_eq!(cfg.grace_period, 200);
422 assert!((cfg.delta - 1e-7).abs() < f64::EPSILON);
423 assert!((cfg.feature_subsample_rate - 1.0).abs() < f64::EPSILON);
424 }
425
426 #[test]
427 fn new_equals_default() {
428 let a = TreeConfig::new();
429 let b = TreeConfig::default();
430 assert_eq!(a.max_depth, b.max_depth);
431 assert_eq!(a.n_bins, b.n_bins);
432 assert!((a.lambda - b.lambda).abs() < f64::EPSILON);
433 assert!((a.gamma - b.gamma).abs() < f64::EPSILON);
434 assert_eq!(a.grace_period, b.grace_period);
435 assert!((a.delta - b.delta).abs() < f64::EPSILON);
436 assert!((a.feature_subsample_rate - b.feature_subsample_rate).abs() < f64::EPSILON);
437 }
438
439 #[test]
440 fn builder_chain() {
441 let cfg = TreeConfig::new()
442 .max_depth(10)
443 .n_bins(128)
444 .lambda(0.5)
445 .gamma(0.1)
446 .grace_period(500)
447 .delta(1e-3)
448 .feature_subsample_rate(0.8);
449
450 assert_eq!(cfg.max_depth, 10);
451 assert_eq!(cfg.n_bins, 128);
452 assert!((cfg.lambda - 0.5).abs() < f64::EPSILON);
453 assert!((cfg.gamma - 0.1).abs() < f64::EPSILON);
454 assert_eq!(cfg.grace_period, 500);
455 assert!((cfg.delta - 1e-3).abs() < f64::EPSILON);
456 assert!((cfg.feature_subsample_rate - 0.8).abs() < f64::EPSILON);
457 }
458
459 #[test]
460 fn feature_subsample_rate_clamped() {
461 let cfg = TreeConfig::new().feature_subsample_rate(1.5);
462 assert!((cfg.feature_subsample_rate - 1.0).abs() < f64::EPSILON);
463
464 let cfg = TreeConfig::new().feature_subsample_rate(-0.3);
465 assert!((cfg.feature_subsample_rate - 0.0).abs() < f64::EPSILON);
466 }
467
468 #[test]
469 fn max_leaf_output_builder() {
470 let cfg = TreeConfig::new().max_leaf_output(1.5);
471 assert_eq!(cfg.max_leaf_output, Some(1.5));
472 }
473
474 #[test]
475 fn min_hessian_sum_builder() {
476 let cfg = TreeConfig::new().min_hessian_sum(10.0);
477 assert_eq!(cfg.min_hessian_sum, Some(10.0));
478 }
479
480 #[test]
481 fn max_leaf_output_default_none() {
482 let cfg = TreeConfig::default();
483 assert!(cfg.max_leaf_output.is_none());
484 assert!(cfg.min_hessian_sum.is_none());
485 }
486}