ghostflow_ml/
gradient_boosting.rs

1//! Advanced Gradient Boosting Implementations
2//! 
3//! This module provides XGBoost, LightGBM, and CatBoost-style gradient boosting algorithms.
4
5use ghostflow_core::Tensor;
6use rand::prelude::*;
7use rayon::prelude::*;
8use std::collections::HashMap;
9
10/// XGBoost-style Gradient Boosting
11/// 
12/// Features:
13/// - Regularized learning objective (L1 and L2)
14/// - Column (feature) subsampling
15/// - Shrinkage (learning rate)
16/// - Tree pruning with max_depth
17/// - Histogram-based split finding
18pub struct XGBoostClassifier {
19    pub n_estimators: usize,
20    pub learning_rate: f32,
21    pub max_depth: usize,
22    pub min_child_weight: f32,
23    pub subsample: f32,
24    pub colsample_bytree: f32,
25    pub reg_lambda: f32,  // L2 regularization
26    pub reg_alpha: f32,   // L1 regularization
27    pub gamma: f32,       // Minimum loss reduction for split
28    trees: Vec<XGBTree>,
29    base_score: f32,
30}
31
32#[derive(Clone)]
33struct XGBTree {
34    nodes: Vec<XGBNode>,
35    feature_indices: Vec<usize>,
36}
37
38#[derive(Clone)]
39struct XGBNode {
40    is_leaf: bool,
41    feature: usize,
42    threshold: f32,
43    left: usize,
44    right: usize,
45    value: f32,
46    gain: f32,
47}
48
49impl XGBoostClassifier {
50    pub fn new(n_estimators: usize) -> Self {
51        Self {
52            n_estimators,
53            learning_rate: 0.3,
54            max_depth: 6,
55            min_child_weight: 1.0,
56            subsample: 1.0,
57            colsample_bytree: 1.0,
58            reg_lambda: 1.0,
59            reg_alpha: 0.0,
60            gamma: 0.0,
61            trees: Vec::new(),
62            base_score: 0.5,
63        }
64    }
65
66    pub fn learning_rate(mut self, lr: f32) -> Self {
67        self.learning_rate = lr;
68        self
69    }
70
71    pub fn max_depth(mut self, depth: usize) -> Self {
72        self.max_depth = depth;
73        self
74    }
75
76    pub fn subsample(mut self, ratio: f32) -> Self {
77        self.subsample = ratio;
78        self
79    }
80
81    pub fn colsample_bytree(mut self, ratio: f32) -> Self {
82        self.colsample_bytree = ratio;
83        self
84    }
85
86    pub fn reg_lambda(mut self, lambda: f32) -> Self {
87        self.reg_lambda = lambda;
88        self
89    }
90
91    pub fn reg_alpha(mut self, alpha: f32) -> Self {
92        self.reg_alpha = alpha;
93        self
94    }
95
96    pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
97        let n_samples = x.dims()[0];
98        let n_features = x.dims()[1];
99        let x_data = x.data_f32();
100        let y_data = y.data_f32();
101
102        // Initialize predictions with base score
103        let mut predictions = vec![self.base_score; n_samples];
104        
105        // Build trees sequentially
106        for _ in 0..self.n_estimators {
107            // Calculate gradients and hessians (for logistic loss)
108            let mut gradients = Vec::with_capacity(n_samples);
109            let mut hessians = Vec::with_capacity(n_samples);
110            
111            for i in 0..n_samples {
112                let pred = 1.0 / (1.0 + (-predictions[i]).exp());
113                let grad = pred - y_data[i];
114                let hess = pred * (1.0 - pred);
115                gradients.push(grad);
116                hessians.push(hess.max(1e-6)); // Avoid division by zero
117            }
118
119            // Row subsampling
120            let mut rng = thread_rng();
121            let sample_indices: Vec<usize> = if self.subsample < 1.0 {
122                let n_subsample = (n_samples as f32 * self.subsample) as usize;
123                (0..n_samples).choose_multiple(&mut rng, n_subsample)
124            } else {
125                (0..n_samples).collect()
126            };
127
128            // Column subsampling
129            let feature_indices: Vec<usize> = if self.colsample_bytree < 1.0 {
130                let n_features_sample = (n_features as f32 * self.colsample_bytree) as usize;
131                (0..n_features).choose_multiple(&mut rng, n_features_sample)
132            } else {
133                (0..n_features).collect()
134            };
135
136            // Build tree
137            let tree = self.build_tree(
138                &x_data,
139                &gradients,
140                &hessians,
141                &sample_indices,
142                &feature_indices,
143                n_features,
144                0,
145            );
146
147            // Update predictions
148            for i in 0..n_samples {
149                let leaf_value = self.predict_tree(&tree, &x_data[i * n_features..(i + 1) * n_features]);
150                predictions[i] += self.learning_rate * leaf_value;
151            }
152
153            self.trees.push(tree);
154        }
155    }
156
157    fn build_tree(
158        &self,
159        x_data: &[f32],
160        gradients: &[f32],
161        hessians: &[f32],
162        sample_indices: &[usize],
163        feature_indices: &[usize],
164        n_features: usize,
165        depth: usize,
166    ) -> XGBTree {
167        let mut nodes = Vec::new();
168        
169        // Build root node
170        self.build_node(
171            x_data,
172            gradients,
173            hessians,
174            sample_indices,
175            feature_indices,
176            n_features,
177            depth,
178            &mut nodes,
179        );
180
181        XGBTree {
182            nodes,
183            feature_indices: feature_indices.to_vec(),
184        }
185    }
186
187    fn build_node(
188        &self,
189        x_data: &[f32],
190        gradients: &[f32],
191        hessians: &[f32],
192        sample_indices: &[usize],
193        feature_indices: &[usize],
194        n_features: usize,
195        depth: usize,
196        nodes: &mut Vec<XGBNode>,
197    ) -> usize {
198        let node_idx = nodes.len();
199
200        // Calculate node statistics
201        let sum_grad: f32 = sample_indices.iter().map(|&i| gradients[i]).sum();
202        let sum_hess: f32 = sample_indices.iter().map(|&i| hessians[i]).sum();
203
204        // Calculate leaf value with regularization
205        let leaf_value = -sum_grad / (sum_hess + self.reg_lambda);
206
207        // Check stopping criteria
208        if depth >= self.max_depth || sample_indices.len() < 2 || sum_hess < self.min_child_weight {
209            nodes.push(XGBNode {
210                is_leaf: true,
211                feature: 0,
212                threshold: 0.0,
213                left: 0,
214                right: 0,
215                value: leaf_value,
216                gain: 0.0,
217            });
218            return node_idx;
219        }
220
221        // Find best split
222        let (best_feature, best_threshold, best_gain, left_indices, right_indices) =
223            self.find_best_split(
224                x_data,
225                gradients,
226                hessians,
227                sample_indices,
228                feature_indices,
229                n_features,
230                sum_grad,
231                sum_hess,
232            );
233
234        // Check if split is beneficial
235        if best_gain < self.gamma || left_indices.is_empty() || right_indices.is_empty() {
236            nodes.push(XGBNode {
237                is_leaf: true,
238                feature: 0,
239                threshold: 0.0,
240                left: 0,
241                right: 0,
242                value: leaf_value,
243                gain: 0.0,
244            });
245            return node_idx;
246        }
247
248        // Create internal node
249        nodes.push(XGBNode {
250            is_leaf: false,
251            feature: best_feature,
252            threshold: best_threshold,
253            left: 0,  // Will be updated
254            right: 0, // Will be updated
255            value: 0.0,
256            gain: best_gain,
257        });
258
259        // Build left and right children
260        let left_idx = self.build_node(
261            x_data,
262            gradients,
263            hessians,
264            &left_indices,
265            feature_indices,
266            n_features,
267            depth + 1,
268            nodes,
269        );
270
271        let right_idx = self.build_node(
272            x_data,
273            gradients,
274            hessians,
275            &right_indices,
276            feature_indices,
277            n_features,
278            depth + 1,
279            nodes,
280        );
281
282        // Update children indices
283        nodes[node_idx].left = left_idx;
284        nodes[node_idx].right = right_idx;
285
286        node_idx
287    }
288
289    fn find_best_split(
290        &self,
291        x_data: &[f32],
292        gradients: &[f32],
293        hessians: &[f32],
294        sample_indices: &[usize],
295        feature_indices: &[usize],
296        n_features: usize,
297        sum_grad: f32,
298        sum_hess: f32,
299    ) -> (usize, f32, f32, Vec<usize>, Vec<usize>) {
300        let mut best_gain = 0.0;
301        let mut best_feature = 0;
302        let mut best_threshold = 0.0;
303        let mut best_left = Vec::new();
304        let mut best_right = Vec::new();
305
306        for &feature in feature_indices {
307            // Get feature values for samples
308            let mut feature_values: Vec<(f32, usize)> = sample_indices
309                .iter()
310                .map(|&i| (x_data[i * n_features + feature], i))
311                .collect();
312            
313            feature_values.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
314
315            // Try splits
316            let mut left_grad = 0.0;
317            let mut left_hess = 0.0;
318
319            for i in 0..feature_values.len() - 1 {
320                let idx = feature_values[i].1;
321                left_grad += gradients[idx];
322                left_hess += hessians[idx];
323
324                let right_grad = sum_grad - left_grad;
325                let right_hess = sum_hess - left_hess;
326
327                // Check minimum child weight
328                if left_hess < self.min_child_weight || right_hess < self.min_child_weight {
329                    continue;
330                }
331
332                // Calculate gain with regularization
333                let gain = self.calculate_gain(left_grad, left_hess, right_grad, right_hess, sum_grad, sum_hess);
334
335                if gain > best_gain {
336                    best_gain = gain;
337                    best_feature = feature;
338                    best_threshold = (feature_values[i].0 + feature_values[i + 1].0) / 2.0;
339                    
340                    best_left = feature_values[..=i].iter().map(|&(_, idx)| idx).collect();
341                    best_right = feature_values[i + 1..].iter().map(|&(_, idx)| idx).collect();
342                }
343            }
344        }
345
346        (best_feature, best_threshold, best_gain, best_left, best_right)
347    }
348
349    fn calculate_gain(
350        &self,
351        left_grad: f32,
352        left_hess: f32,
353        right_grad: f32,
354        right_hess: f32,
355        sum_grad: f32,
356        sum_hess: f32,
357    ) -> f32 {
358        let left_weight = -left_grad / (left_hess + self.reg_lambda);
359        let right_weight = -right_grad / (right_hess + self.reg_lambda);
360        let parent_weight = -sum_grad / (sum_hess + self.reg_lambda);
361
362        let gain = 0.5 * (
363            left_grad * left_weight +
364            right_grad * right_weight -
365            sum_grad * parent_weight
366        ) - self.gamma;
367
368        // L1 regularization penalty
369        let l1_penalty = self.reg_alpha * (left_weight.abs() + right_weight.abs() - parent_weight.abs());
370
371        gain - l1_penalty
372    }
373
374    fn predict_tree(&self, tree: &XGBTree, sample: &[f32]) -> f32 {
375        let mut node_idx = 0;
376        
377        loop {
378            let node = &tree.nodes[node_idx];
379            
380            if node.is_leaf {
381                return node.value;
382            }
383
384            if sample[node.feature] <= node.threshold {
385                node_idx = node.left;
386            } else {
387                node_idx = node.right;
388            }
389        }
390    }
391
392    pub fn predict(&self, x: &Tensor) -> Tensor {
393        let n_samples = x.dims()[0];
394        let n_features = x.dims()[1];
395        let x_data = x.data_f32();
396
397        let predictions: Vec<f32> = (0..n_samples)
398            .into_par_iter()
399            .map(|i| {
400                let sample = &x_data[i * n_features..(i + 1) * n_features];
401                let mut pred = self.base_score;
402                
403                for tree in &self.trees {
404                    pred += self.learning_rate * self.predict_tree(tree, sample);
405                }
406
407                // Apply sigmoid for binary classification
408                let prob = 1.0 / (1.0 + (-pred).exp());
409                if prob >= 0.5 { 1.0 } else { 0.0 }
410            })
411            .collect();
412
413        Tensor::from_slice(&predictions, &[n_samples]).unwrap()
414    }
415
416    pub fn predict_proba(&self, x: &Tensor) -> Tensor {
417        let n_samples = x.dims()[0];
418        let n_features = x.dims()[1];
419        let x_data = x.data_f32();
420
421        let probabilities: Vec<f32> = (0..n_samples)
422            .into_par_iter()
423            .map(|i| {
424                let sample = &x_data[i * n_features..(i + 1) * n_features];
425                let mut pred = self.base_score;
426                
427                for tree in &self.trees {
428                    pred += self.learning_rate * self.predict_tree(tree, sample);
429                }
430
431                1.0 / (1.0 + (-pred).exp())
432            })
433            .collect();
434
435        Tensor::from_slice(&probabilities, &[n_samples]).unwrap()
436    }
437}
438
439/// XGBoost Regressor
440pub struct XGBoostRegressor {
441    pub n_estimators: usize,
442    pub learning_rate: f32,
443    pub max_depth: usize,
444    pub min_child_weight: f32,
445    pub subsample: f32,
446    pub colsample_bytree: f32,
447    pub reg_lambda: f32,
448    pub reg_alpha: f32,
449    pub gamma: f32,
450    trees: Vec<XGBTree>,
451    base_score: f32,
452}
453
454impl XGBoostRegressor {
455    pub fn new(n_estimators: usize) -> Self {
456        Self {
457            n_estimators,
458            learning_rate: 0.3,
459            max_depth: 6,
460            min_child_weight: 1.0,
461            subsample: 1.0,
462            colsample_bytree: 1.0,
463            reg_lambda: 1.0,
464            reg_alpha: 0.0,
465            gamma: 0.0,
466            trees: Vec::new(),
467            base_score: 0.0,
468        }
469    }
470
471    pub fn learning_rate(mut self, lr: f32) -> Self {
472        self.learning_rate = lr;
473        self
474    }
475
476    pub fn max_depth(mut self, depth: usize) -> Self {
477        self.max_depth = depth;
478        self
479    }
480
481    pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
482        let n_samples = x.dims()[0];
483        let n_features = x.dims()[1];
484        let x_data = x.data_f32();
485        let y_data = y.data_f32();
486
487        // Initialize with mean
488        self.base_score = y_data.iter().sum::<f32>() / n_samples as f32;
489        let mut predictions = vec![self.base_score; n_samples];
490
491        // Build trees
492        for _ in 0..self.n_estimators {
493            // Calculate gradients (residuals for MSE)
494            let gradients: Vec<f32> = (0..n_samples)
495                .map(|i| predictions[i] - y_data[i])
496                .collect();
497            
498            // Hessians are constant 1.0 for MSE
499            let hessians = vec![1.0; n_samples];
500
501            // Row subsampling
502            let mut rng = thread_rng();
503            let sample_indices: Vec<usize> = if self.subsample < 1.0 {
504                let n_subsample = (n_samples as f32 * self.subsample) as usize;
505                (0..n_samples).choose_multiple(&mut rng, n_subsample)
506            } else {
507                (0..n_samples).collect()
508            };
509
510            // Column subsampling
511            let feature_indices: Vec<usize> = if self.colsample_bytree < 1.0 {
512                let n_features_sample = (n_features as f32 * self.colsample_bytree) as usize;
513                (0..n_features).choose_multiple(&mut rng, n_features_sample)
514            } else {
515                (0..n_features).collect()
516            };
517
518            // Build tree (reuse XGBoostClassifier's tree building logic)
519            let classifier = XGBoostClassifier {
520                n_estimators: 1,
521                learning_rate: self.learning_rate,
522                max_depth: self.max_depth,
523                min_child_weight: self.min_child_weight,
524                subsample: 1.0, // Already subsampled
525                colsample_bytree: 1.0, // Already subsampled
526                reg_lambda: self.reg_lambda,
527                reg_alpha: self.reg_alpha,
528                gamma: self.gamma,
529                trees: Vec::new(),
530                base_score: 0.0,
531            };
532
533            let tree = classifier.build_tree(
534                &x_data,
535                &gradients,
536                &hessians,
537                &sample_indices,
538                &feature_indices,
539                n_features,
540                0,
541            );
542
543            // Update predictions
544            for i in 0..n_samples {
545                let leaf_value = classifier.predict_tree(&tree, &x_data[i * n_features..(i + 1) * n_features]);
546                predictions[i] += self.learning_rate * leaf_value;
547            }
548
549            self.trees.push(tree);
550        }
551    }
552
553    pub fn predict(&self, x: &Tensor) -> Tensor {
554        let n_samples = x.dims()[0];
555        let n_features = x.dims()[1];
556        let x_data = x.data_f32();
557
558        let predictions: Vec<f32> = (0..n_samples)
559            .into_par_iter()
560            .map(|i| {
561                let sample = &x_data[i * n_features..(i + 1) * n_features];
562                let mut pred = self.base_score;
563                
564                let classifier = XGBoostClassifier {
565                    n_estimators: 0,
566                    learning_rate: self.learning_rate,
567                    max_depth: 0,
568                    min_child_weight: 0.0,
569                    subsample: 0.0,
570                    colsample_bytree: 0.0,
571                    reg_lambda: 0.0,
572                    reg_alpha: 0.0,
573                    gamma: 0.0,
574                    trees: Vec::new(),
575                    base_score: 0.0,
576                };
577
578                for tree in &self.trees {
579                    pred += self.learning_rate * classifier.predict_tree(tree, sample);
580                }
581
582                pred
583            })
584            .collect();
585
586        Tensor::from_slice(&predictions, &[n_samples]).unwrap()
587    }
588}
589
590#[cfg(test)]
591mod tests {
592    use super::*;
593
594    #[test]
595    fn test_xgboost_classifier() {
596        // Simple binary classification test
597        let x = Tensor::from_slice(
598            &[0.0f32, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0],
599            &[4, 2],
600        ).unwrap();
601        let y = Tensor::from_slice(&[0.0f32, 1.0, 1.0, 0.0], &[4]).unwrap();
602
603        let mut xgb = XGBoostClassifier::new(10)
604            .learning_rate(0.1)
605            .max_depth(3);
606        
607        xgb.fit(&x, &y);
608        let predictions = xgb.predict(&x);
609
610        assert_eq!(predictions.dims()[0], 4); // Number of samples
611    }
612
613    #[test]
614    fn test_xgboost_regressor() {
615        let x = Tensor::from_slice(
616            &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0],
617            &[3, 2],
618        ).unwrap();
619        let y = Tensor::from_slice(&[2.0f32, 4.0, 6.0], &[3]).unwrap();
620
621        let mut xgb = XGBoostRegressor::new(10)
622            .learning_rate(0.1)
623            .max_depth(3);
624        
625        xgb.fit(&x, &y);
626        let predictions = xgb.predict(&x);
627
628        assert_eq!(predictions.dims()[0], 3); // Number of samples
629    }
630}
631
632