1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
use crate::ensemble::SGBT;
use crate::loss::Loss;
impl<L: Loss> SGBT<L> {
/// Predict the raw output for a feature vector.
///
/// Always uses sigmoid-blended soft routing with auto-calibrated per-feature
/// bandwidths derived from median split threshold gaps. Features that have
/// never been split on use hard routing (bandwidth = infinity).
pub fn predict(&self, features: &[f64]) -> f64 {
let mut pred = self.base_prediction;
if self.auto_bandwidths.is_empty() {
for step in &self.steps {
pred += self.config.learning_rate * step.predict(features);
}
} else {
for step in &self.steps {
pred += self.config.learning_rate
* step.predict_smooth_auto(features, &self.auto_bandwidths);
}
}
pred
}
/// Predict using sigmoid-blended soft routing with an explicit bandwidth.
///
/// Uses a single bandwidth for all features. For auto-calibrated per-feature
/// bandwidths, use [`predict()`](SGBT::predict) which always uses smooth routing.
pub fn predict_smooth(&self, features: &[f64], bandwidth: f64) -> f64 {
let mut pred = self.base_prediction;
for step in &self.steps {
pred += self.config.learning_rate * step.predict_smooth(features, bandwidth);
}
pred
}
/// Per-feature auto-calibrated bandwidths used by `predict()`.
///
/// Empty before the first training sample. Each entry corresponds to a
/// feature index; `f64::INFINITY` means that feature has no splits and
/// uses hard routing.
pub fn auto_bandwidths(&self) -> &[f64] {
&self.auto_bandwidths
}
/// Predict with parent-leaf linear interpolation.
///
/// Blends each leaf prediction with its parent's preserved prediction
/// based on sample count, preventing stale predictions from fresh leaves.
pub fn predict_interpolated(&self, features: &[f64]) -> f64 {
let mut pred = self.base_prediction;
for step in &self.steps {
pred += self.config.learning_rate * step.predict_interpolated(features);
}
pred
}
/// Predict with sibling-based interpolation for feature-continuous predictions.
///
/// At each split node near the threshold boundary, blends left and right
/// subtree predictions linearly based on distance from the threshold.
/// Uses auto-calibrated bandwidths as the interpolation margin.
/// Predictions vary continuously as features change, eliminating
/// step-function artifacts.
pub fn predict_sibling_interpolated(&self, features: &[f64]) -> f64 {
let mut pred = self.base_prediction;
for step in &self.steps {
pred += self.config.learning_rate
* step.predict_sibling_interpolated(features, &self.auto_bandwidths);
}
pred
}
/// Predict with graduated active-shadow blending.
///
/// Smoothly transitions between active and shadow trees during replacement,
/// eliminating prediction dips. Requires `shadow_warmup` to be configured.
/// When disabled, equivalent to `predict()`.
pub fn predict_graduated(&self, features: &[f64]) -> f64 {
let mut pred = self.base_prediction;
for step in &self.steps {
pred += self.config.learning_rate * step.predict_graduated(features);
}
pred
}
/// Predict with graduated blending + sibling interpolation (premium path).
///
/// Combines graduated active-shadow handoff (no prediction dips during
/// tree replacement) with feature-continuous sibling interpolation
/// (no step-function artifacts near split boundaries).
pub fn predict_graduated_sibling_interpolated(&self, features: &[f64]) -> f64 {
let mut pred = self.base_prediction;
for step in &self.steps {
pred += self.config.learning_rate
* step.predict_graduated_sibling_interpolated(features, &self.auto_bandwidths);
}
pred
}
/// Predict with loss transform applied (e.g., sigmoid for logistic loss).
pub fn predict_transformed(&self, features: &[f64]) -> f64 {
self.loss.predict_transform(self.predict(features))
}
/// Predict probability (alias for `predict_transformed`).
pub fn predict_proba(&self, features: &[f64]) -> f64 {
self.predict_transformed(features)
}
/// Predict with confidence estimation.
///
/// Returns `(prediction, confidence)` where confidence = 1 / sqrt(sum_variance).
/// Higher confidence indicates more certain predictions (leaves have seen
/// more hessian mass). Confidence of 0.0 means the model has no information.
///
/// The variance per tree is estimated as `1 / (H_sum + lambda)` at the
/// leaf where the sample lands. The ensemble variance is the sum of
/// per-tree variances (scaled by learning_rate²), and confidence is
/// the reciprocal of the standard deviation.
pub fn predict_with_confidence(&self, features: &[f64]) -> (f64, f64) {
let mut pred = self.base_prediction;
let mut total_variance = 0.0;
let lr2 = self.config.learning_rate * self.config.learning_rate;
for step in &self.steps {
let (value, variance) = step.predict_with_variance(features);
pred += self.config.learning_rate * value;
total_variance += lr2 * variance;
}
let confidence = if total_variance > 0.0 && total_variance.is_finite() {
1.0 / total_variance.sqrt()
} else {
0.0
};
(pred, confidence)
}
/// Batch prediction.
pub fn predict_batch(&self, feature_matrix: &[Vec<f64>]) -> Vec<f64> {
feature_matrix.iter().map(|f| self.predict(f)).collect()
}
}