sklears_multioutput/
ensemble.rs

1//! Ensemble methods for multi-output learning
2
3// Use SciRS2-Core for arrays and random number generation (SciRS2 Policy)
4use scirs2_core::ndarray::{Array1, Array2, ArrayView2};
5use scirs2_core::random::{thread_rng, Rng};
6use sklears_core::{
7    error::{Result as SklResult, SklearsError},
8    traits::{Estimator, Fit, Predict, Untrained},
9    types::Float,
10};
11
12/// Gradient Boosting Multi-Output
13///
14/// A gradient boosting implementation that can handle multiple output variables simultaneously.
15/// This implementation builds an additive model of weak learners (multi-target regression trees)
16/// in a forward stage-wise fashion, optimizing a loss function for all targets jointly.
17///
18/// # Mathematical Foundation
19///
20/// For multi-target regression, gradient boosting minimizes:
21/// - L(y, F(x)) = Σ_i Σ_j (y_ij - F_j(x_i))^2 for all samples i and targets j
22/// - F_j(x) = F_0j + Σ_m ρ_m * h_mj(x) where h_mj are weak learners for target j at stage m
23/// - At each stage, fit weak learners to negative gradients: -∂L/∂F_j
24///
25/// # Examples
26///
27/// ```
28/// use sklears_multioutput::GradientBoostingMultiOutput;
29/// use sklears_core::traits::{Predict, Fit};
30/// // Use SciRS2-Core for arrays and random number generation (SciRS2 Policy)
31/// use scirs2_core::ndarray::array;
32///
33/// let X = array![[1.0, 2.0], [2.0, 3.0], [3.0, 1.0], [4.0, 4.0]];
34/// let y = array![[1.5, 2.5], [2.5, 3.5], [3.5, 1.5], [4.5, 4.5]];
35///
36/// let gbm = GradientBoostingMultiOutput::new()
37///     .n_estimators(50)
38///     .learning_rate(0.1)
39///     .max_depth(3);
40///
41/// let fitted = gbm.fit(&X.view(), &y.view()).unwrap();
42/// let predictions = fitted.predict(&X.view()).unwrap();
43/// ```
44#[derive(Debug, Clone)]
45pub struct GradientBoostingMultiOutput<S = Untrained> {
46    state: S,
47    /// Number of boosting stages to perform
48    n_estimators: usize,
49    /// Learning rate shrinks the contribution of each tree
50    learning_rate: Float,
51    /// Maximum depth of the individual regression estimators
52    max_depth: usize,
53    /// Minimum number of samples required to split an internal node
54    min_samples_split: usize,
55    /// Random state for reproducibility
56    random_state: Option<u64>,
57}
58
59/// Trained state for Gradient Boosting Multi-Output
60#[derive(Debug, Clone)]
61pub struct GradientBoostingMultiOutputTrained {
62    /// Initial predictions (mean of targets)
63    pub initial_predictions: Array1<Float>,
64    /// Weak learners for each stage and target
65    pub estimators: Vec<Vec<WeakLearner>>,
66    /// Number of features
67    pub n_features: usize,
68    /// Number of targets
69    pub n_targets: usize,
70}
71
72/// A simple weak learner (decision stump) for gradient boosting
73#[derive(Debug, Clone)]
74pub struct WeakLearner {
75    /// Feature index to split on
76    feature_idx: usize,
77    /// Threshold value for splitting
78    threshold: Float,
79    /// Prediction for left branch (feature <= threshold)
80    left_value: Float,
81    /// Prediction for right branch (feature > threshold)
82    right_value: Float,
83}
84
85impl GradientBoostingMultiOutput<Untrained> {
86    /// Create a new GradientBoostingMultiOutput instance
87    pub fn new() -> Self {
88        Self {
89            state: Untrained,
90            n_estimators: 100,
91            learning_rate: 0.1,
92            max_depth: 3,
93            min_samples_split: 2,
94            random_state: None,
95        }
96    }
97
98    /// Set the number of estimators
99    pub fn n_estimators(mut self, n_estimators: usize) -> Self {
100        self.n_estimators = n_estimators;
101        self
102    }
103
104    /// Set the learning rate
105    pub fn learning_rate(mut self, learning_rate: Float) -> Self {
106        self.learning_rate = learning_rate;
107        self
108    }
109
110    /// Set the maximum depth
111    pub fn max_depth(mut self, max_depth: usize) -> Self {
112        self.max_depth = max_depth;
113        self
114    }
115
116    /// Set the minimum samples to split
117    pub fn min_samples_split(mut self, min_samples_split: usize) -> Self {
118        self.min_samples_split = min_samples_split;
119        self
120    }
121
122    /// Set the random state
123    pub fn random_state(mut self, random_state: Option<u64>) -> Self {
124        self.random_state = random_state;
125        self
126    }
127}
128
129impl Default for GradientBoostingMultiOutput<Untrained> {
130    fn default() -> Self {
131        Self::new()
132    }
133}
134
135impl Estimator for GradientBoostingMultiOutput<Untrained> {
136    type Config = ();
137    type Error = SklearsError;
138    type Float = Float;
139
140    fn config(&self) -> &Self::Config {
141        &()
142    }
143}
144
145impl Fit<ArrayView2<'_, Float>, ArrayView2<'_, Float>> for GradientBoostingMultiOutput<Untrained> {
146    type Fitted = GradientBoostingMultiOutput<GradientBoostingMultiOutputTrained>;
147
148    fn fit(self, x: &ArrayView2<'_, Float>, y: &ArrayView2<'_, Float>) -> SklResult<Self::Fitted> {
149        let (n_samples, n_features) = x.dim();
150        let (y_samples, n_targets) = y.dim();
151
152        if n_samples != y_samples {
153            return Err(SklearsError::InvalidInput(
154                "Number of samples in X and y must match".to_string(),
155            ));
156        }
157
158        if n_samples < self.min_samples_split {
159            return Err(SklearsError::InvalidInput(
160                "Not enough samples to perform gradient boosting".to_string(),
161            ));
162        }
163
164        // Initialize random number generator
165        let mut rng = thread_rng();
166
167        // Initialize predictions with the mean of each target
168        let mut initial_predictions = Array1::<Float>::zeros(n_targets);
169        for target_idx in 0..n_targets {
170            let target_sum: Float = y.column(target_idx).sum();
171            initial_predictions[target_idx] = target_sum / n_samples as Float;
172        }
173
174        // Initialize current predictions
175        let mut current_predictions = Array2::<Float>::zeros((n_samples, n_targets));
176        for target_idx in 0..n_targets {
177            for sample_idx in 0..n_samples {
178                current_predictions[[sample_idx, target_idx]] = initial_predictions[target_idx];
179            }
180        }
181
182        let mut estimators = Vec::new();
183
184        // Boosting iterations
185        for _stage in 0..self.n_estimators {
186            let mut stage_estimators = Vec::new();
187
188            // Train one weak learner for each target
189            for target_idx in 0..n_targets {
190                // Compute negative gradients (residuals for squared loss)
191                let mut residuals = Array1::<Float>::zeros(n_samples);
192                for sample_idx in 0..n_samples {
193                    residuals[sample_idx] =
194                        y[[sample_idx, target_idx]] - current_predictions[[sample_idx, target_idx]];
195                }
196
197                // Train weak learner on residuals
198                let weak_learner = self.train_weak_learner(x, &residuals, &mut rng)?;
199
200                // Make predictions with this weak learner
201                let weak_predictions = self.predict_weak_learner(&weak_learner, x);
202
203                // Update current predictions
204                for sample_idx in 0..n_samples {
205                    current_predictions[[sample_idx, target_idx]] +=
206                        self.learning_rate * weak_predictions[sample_idx];
207                }
208
209                stage_estimators.push(weak_learner);
210            }
211
212            estimators.push(stage_estimators);
213        }
214
215        Ok(GradientBoostingMultiOutput {
216            state: GradientBoostingMultiOutputTrained {
217                initial_predictions,
218                estimators,
219                n_features,
220                n_targets,
221            },
222            n_estimators: self.n_estimators,
223            learning_rate: self.learning_rate,
224            max_depth: self.max_depth,
225            min_samples_split: self.min_samples_split,
226            random_state: self.random_state,
227        })
228    }
229}
230
231impl GradientBoostingMultiOutput<Untrained> {
232    fn train_weak_learner(
233        &self,
234        x: &ArrayView2<'_, Float>,
235        residuals: &Array1<Float>,
236        rng: &mut scirs2_core::random::CoreRandom,
237    ) -> SklResult<WeakLearner> {
238        let (n_samples, n_features) = x.dim();
239
240        if n_samples < 2 {
241            return Err(SklearsError::InvalidInput(
242                "Need at least 2 samples to train weak learner".to_string(),
243            ));
244        }
245
246        let mut best_loss = Float::INFINITY;
247        let mut best_feature = 0;
248        let mut best_threshold = 0.0;
249        let mut best_left_value = 0.0;
250        let mut best_right_value = 0.0;
251
252        // Try random features and thresholds
253        let n_trials = (n_features * 10).min(100);
254        for _ in 0..n_trials {
255            let feature_idx = rng.gen_range(0..n_features);
256
257            // Get feature values and find min/max
258            let feature_values: Vec<Float> = (0..n_samples).map(|i| x[[i, feature_idx]]).collect();
259
260            let min_val = feature_values
261                .iter()
262                .cloned()
263                .fold(Float::INFINITY, Float::min);
264            let max_val = feature_values
265                .iter()
266                .cloned()
267                .fold(Float::NEG_INFINITY, Float::max);
268
269            if (max_val - min_val).abs() < 1e-10 {
270                continue; // Skip if all values are the same
271            }
272
273            // Try random thresholds
274            for _ in 0..10 {
275                let threshold = min_val + rng.gen::<Float>() * (max_val - min_val);
276
277                // Split samples based on threshold
278                let mut left_residuals = Vec::new();
279                let mut right_residuals = Vec::new();
280
281                for sample_idx in 0..n_samples {
282                    if x[[sample_idx, feature_idx]] <= threshold {
283                        left_residuals.push(residuals[sample_idx]);
284                    } else {
285                        right_residuals.push(residuals[sample_idx]);
286                    }
287                }
288
289                if left_residuals.is_empty() || right_residuals.is_empty() {
290                    continue;
291                }
292
293                // Compute predictions for each split
294                let left_value =
295                    left_residuals.iter().sum::<Float>() / left_residuals.len() as Float;
296                let right_value =
297                    right_residuals.iter().sum::<Float>() / right_residuals.len() as Float;
298
299                // Compute loss (mean squared error)
300                let left_loss: Float = left_residuals
301                    .iter()
302                    .map(|&r| (r - left_value).powi(2))
303                    .sum();
304                let right_loss: Float = right_residuals
305                    .iter()
306                    .map(|&r| (r - right_value).powi(2))
307                    .sum();
308                let total_loss = left_loss + right_loss;
309
310                if total_loss < best_loss {
311                    best_loss = total_loss;
312                    best_feature = feature_idx;
313                    best_threshold = threshold;
314                    best_left_value = left_value;
315                    best_right_value = right_value;
316                }
317            }
318        }
319
320        Ok(WeakLearner {
321            feature_idx: best_feature,
322            threshold: best_threshold,
323            left_value: best_left_value,
324            right_value: best_right_value,
325        })
326    }
327
328    fn predict_weak_learner(
329        &self,
330        learner: &WeakLearner,
331        x: &ArrayView2<'_, Float>,
332    ) -> Array1<Float> {
333        let n_samples = x.nrows();
334        let mut predictions = Array1::<Float>::zeros(n_samples);
335
336        for sample_idx in 0..n_samples {
337            if x[[sample_idx, learner.feature_idx]] <= learner.threshold {
338                predictions[sample_idx] = learner.left_value;
339            } else {
340                predictions[sample_idx] = learner.right_value;
341            }
342        }
343
344        predictions
345    }
346}
347
348impl Predict<ArrayView2<'_, Float>, Array2<Float>>
349    for GradientBoostingMultiOutput<GradientBoostingMultiOutputTrained>
350{
351    fn predict(&self, x: &ArrayView2<'_, Float>) -> SklResult<Array2<Float>> {
352        let (n_samples, n_features) = x.dim();
353
354        if n_features != self.state.n_features {
355            return Err(SklearsError::InvalidInput(format!(
356                "Expected {} features, got {}",
357                self.state.n_features, n_features
358            )));
359        }
360
361        let mut predictions = Array2::<Float>::zeros((n_samples, self.state.n_targets));
362
363        // Initialize with base predictions
364        for target_idx in 0..self.state.n_targets {
365            for sample_idx in 0..n_samples {
366                predictions[[sample_idx, target_idx]] = self.state.initial_predictions[target_idx];
367            }
368        }
369
370        // Add contributions from all stages
371        for stage_estimators in &self.state.estimators {
372            for (target_idx, weak_learner) in stage_estimators.iter().enumerate() {
373                for sample_idx in 0..n_samples {
374                    let prediction =
375                        if x[[sample_idx, weak_learner.feature_idx]] <= weak_learner.threshold {
376                            weak_learner.left_value
377                        } else {
378                            weak_learner.right_value
379                        };
380
381                    predictions[[sample_idx, target_idx]] += self.learning_rate * prediction;
382                }
383            }
384        }
385
386        Ok(predictions)
387    }
388}
389
390impl GradientBoostingMultiOutput<GradientBoostingMultiOutputTrained> {
391    /// Get the feature importance scores
392    pub fn feature_importances(&self) -> Array1<Float> {
393        let mut importances = Array1::<Float>::zeros(self.state.n_features);
394
395        for stage_estimators in &self.state.estimators {
396            for weak_learner in stage_estimators {
397                // Simple importance: frequency of feature usage
398                importances[weak_learner.feature_idx] += 1.0;
399            }
400        }
401
402        // Normalize by total number of estimators
403        let total = importances.sum();
404        if total > 0.0 {
405            importances /= total;
406        }
407
408        importances
409    }
410
411    /// Get the training loss history
412    pub fn training_loss_history(&self) -> Vec<Float> {
413        // This would require storing training history during fit
414        // For now, return empty vec as placeholder
415        Vec::new()
416    }
417}