1use ferrolearn_core::error::FerroError;
42use ferrolearn_core::introspection::{HasClasses, HasFeatureImportances};
43use ferrolearn_core::pipeline::{FittedPipelineEstimator, PipelineEstimator};
44use ferrolearn_core::traits::{Fit, Predict};
45use ndarray::{Array1, Array2};
46use num_traits::{Float, FromPrimitive, ToPrimitive};
47use rand::SeedableRng;
48use rand::rngs::StdRng;
49use rand::seq::index::sample as rand_sample_indices;
50
51use crate::decision_tree::{
52 self, Node, build_regression_tree_with_feature_subset, compute_feature_importances,
53};
54
55#[derive(Debug, Clone, Copy, PartialEq, Eq)]
61pub enum RegressionLoss {
62 LeastSquares,
64 Lad,
66 Huber,
68}
69
70#[derive(Debug, Clone, Copy, PartialEq, Eq)]
72pub enum ClassificationLoss {
73 LogLoss,
75}
76
77#[derive(Debug, Clone)]
91pub struct GradientBoostingRegressor<F> {
92 pub n_estimators: usize,
94 pub learning_rate: f64,
96 pub max_depth: Option<usize>,
98 pub min_samples_split: usize,
100 pub min_samples_leaf: usize,
102 pub subsample: f64,
104 pub loss: RegressionLoss,
106 pub huber_alpha: f64,
108 pub random_state: Option<u64>,
110 _marker: std::marker::PhantomData<F>,
111}
112
113impl<F: Float> GradientBoostingRegressor<F> {
114 #[must_use]
121 pub fn new() -> Self {
122 Self {
123 n_estimators: 100,
124 learning_rate: 0.1,
125 max_depth: Some(3),
126 min_samples_split: 2,
127 min_samples_leaf: 1,
128 subsample: 1.0,
129 loss: RegressionLoss::LeastSquares,
130 huber_alpha: 0.9,
131 random_state: None,
132 _marker: std::marker::PhantomData,
133 }
134 }
135
136 #[must_use]
138 pub fn with_n_estimators(mut self, n: usize) -> Self {
139 self.n_estimators = n;
140 self
141 }
142
143 #[must_use]
145 pub fn with_learning_rate(mut self, lr: f64) -> Self {
146 self.learning_rate = lr;
147 self
148 }
149
150 #[must_use]
152 pub fn with_max_depth(mut self, d: Option<usize>) -> Self {
153 self.max_depth = d;
154 self
155 }
156
157 #[must_use]
159 pub fn with_min_samples_split(mut self, n: usize) -> Self {
160 self.min_samples_split = n;
161 self
162 }
163
164 #[must_use]
166 pub fn with_min_samples_leaf(mut self, n: usize) -> Self {
167 self.min_samples_leaf = n;
168 self
169 }
170
171 #[must_use]
173 pub fn with_subsample(mut self, ratio: f64) -> Self {
174 self.subsample = ratio;
175 self
176 }
177
178 #[must_use]
180 pub fn with_loss(mut self, loss: RegressionLoss) -> Self {
181 self.loss = loss;
182 self
183 }
184
185 #[must_use]
187 pub fn with_huber_alpha(mut self, alpha: f64) -> Self {
188 self.huber_alpha = alpha;
189 self
190 }
191
192 #[must_use]
194 pub fn with_random_state(mut self, seed: u64) -> Self {
195 self.random_state = Some(seed);
196 self
197 }
198}
199
200impl<F: Float> Default for GradientBoostingRegressor<F> {
201 fn default() -> Self {
202 Self::new()
203 }
204}
205
206#[derive(Debug, Clone)]
215pub struct FittedGradientBoostingRegressor<F> {
216 init: F,
218 learning_rate: F,
220 trees: Vec<Vec<Node<F>>>,
222 n_features: usize,
224 feature_importances: Array1<F>,
226}
227
228impl<F: Float + Send + Sync + 'static> Fit<Array2<F>, Array1<F>> for GradientBoostingRegressor<F> {
229 type Fitted = FittedGradientBoostingRegressor<F>;
230 type Error = FerroError;
231
232 fn fit(
241 &self,
242 x: &Array2<F>,
243 y: &Array1<F>,
244 ) -> Result<FittedGradientBoostingRegressor<F>, FerroError> {
245 let (n_samples, n_features) = x.dim();
246
247 if n_samples != y.len() {
248 return Err(FerroError::ShapeMismatch {
249 expected: vec![n_samples],
250 actual: vec![y.len()],
251 context: "y length must match number of samples in X".into(),
252 });
253 }
254 if n_samples == 0 {
255 return Err(FerroError::InsufficientSamples {
256 required: 1,
257 actual: 0,
258 context: "GradientBoostingRegressor requires at least one sample".into(),
259 });
260 }
261 if self.n_estimators == 0 {
262 return Err(FerroError::InvalidParameter {
263 name: "n_estimators".into(),
264 reason: "must be at least 1".into(),
265 });
266 }
267 if self.learning_rate <= 0.0 {
268 return Err(FerroError::InvalidParameter {
269 name: "learning_rate".into(),
270 reason: "must be positive".into(),
271 });
272 }
273 if self.subsample <= 0.0 || self.subsample > 1.0 {
274 return Err(FerroError::InvalidParameter {
275 name: "subsample".into(),
276 reason: "must be in (0, 1]".into(),
277 });
278 }
279
280 let lr = F::from(self.learning_rate).unwrap();
281 let params = decision_tree::TreeParams {
282 max_depth: self.max_depth,
283 min_samples_split: self.min_samples_split,
284 min_samples_leaf: self.min_samples_leaf,
285 };
286
287 let init = match self.loss {
289 RegressionLoss::LeastSquares => {
290 let sum: F = y.iter().copied().fold(F::zero(), |a, b| a + b);
291 sum / F::from(n_samples).unwrap()
292 }
293 RegressionLoss::Lad | RegressionLoss::Huber => median_f(y),
294 };
295
296 let mut f_vals = Array1::from_elem(n_samples, init);
298
299 let all_features: Vec<usize> = (0..n_features).collect();
300 let subsample_size = ((self.subsample * n_samples as f64).ceil() as usize)
301 .max(1)
302 .min(n_samples);
303
304 let mut rng = if let Some(seed) = self.random_state {
305 StdRng::seed_from_u64(seed)
306 } else {
307 use rand::RngCore;
308 StdRng::seed_from_u64(rand::rng().next_u64())
309 };
310
311 let mut trees = Vec::with_capacity(self.n_estimators);
312
313 for _ in 0..self.n_estimators {
314 let residuals = compute_regression_residuals(y, &f_vals, self.loss, self.huber_alpha);
316
317 let sample_indices = if subsample_size < n_samples {
319 rand_sample_indices(&mut rng, n_samples, subsample_size).into_vec()
320 } else {
321 (0..n_samples).collect()
322 };
323
324 let tree = build_regression_tree_with_feature_subset(
326 x,
327 &residuals,
328 &sample_indices,
329 &all_features,
330 ¶ms,
331 );
332
333 for i in 0..n_samples {
335 let row = x.row(i);
336 let leaf_idx = decision_tree::traverse(&tree, &row);
337 if let Node::Leaf { value, .. } = tree[leaf_idx] {
338 f_vals[i] = f_vals[i] + lr * value;
339 }
340 }
341
342 trees.push(tree);
343 }
344
345 let mut total_importances = Array1::<F>::zeros(n_features);
347 for tree_nodes in &trees {
348 let tree_imp = compute_feature_importances(tree_nodes, n_features, n_samples);
349 total_importances = total_importances + tree_imp;
350 }
351 let imp_sum: F = total_importances
352 .iter()
353 .copied()
354 .fold(F::zero(), |a, b| a + b);
355 if imp_sum > F::zero() {
356 total_importances.mapv_inplace(|v| v / imp_sum);
357 }
358
359 Ok(FittedGradientBoostingRegressor {
360 init,
361 learning_rate: lr,
362 trees,
363 n_features,
364 feature_importances: total_importances,
365 })
366 }
367}
368
369impl<F: Float + Send + Sync + 'static> FittedGradientBoostingRegressor<F> {
370 #[must_use]
372 pub fn init(&self) -> F {
373 self.init
374 }
375
376 #[must_use]
378 pub fn learning_rate(&self) -> F {
379 self.learning_rate
380 }
381
382 #[must_use]
384 pub fn trees(&self) -> &[Vec<Node<F>>] {
385 &self.trees
386 }
387
388 #[must_use]
390 pub fn n_features(&self) -> usize {
391 self.n_features
392 }
393}
394
395impl<F: Float + Send + Sync + 'static> Predict<Array2<F>> for FittedGradientBoostingRegressor<F> {
396 type Output = Array1<F>;
397 type Error = FerroError;
398
399 fn predict(&self, x: &Array2<F>) -> Result<Array1<F>, FerroError> {
406 if x.ncols() != self.n_features {
407 return Err(FerroError::ShapeMismatch {
408 expected: vec![self.n_features],
409 actual: vec![x.ncols()],
410 context: "number of features must match fitted model".into(),
411 });
412 }
413
414 let n_samples = x.nrows();
415 let mut predictions = Array1::from_elem(n_samples, self.init);
416
417 for i in 0..n_samples {
418 let row = x.row(i);
419 for tree_nodes in &self.trees {
420 let leaf_idx = decision_tree::traverse(tree_nodes, &row);
421 if let Node::Leaf { value, .. } = tree_nodes[leaf_idx] {
422 predictions[i] = predictions[i] + self.learning_rate * value;
423 }
424 }
425 }
426
427 Ok(predictions)
428 }
429}
430
431impl<F: Float + Send + Sync + 'static> HasFeatureImportances<F>
432 for FittedGradientBoostingRegressor<F>
433{
434 fn feature_importances(&self) -> &Array1<F> {
435 &self.feature_importances
436 }
437}
438
439impl<F: Float + Send + Sync + 'static> PipelineEstimator<F> for GradientBoostingRegressor<F> {
441 fn fit_pipeline(
442 &self,
443 x: &Array2<F>,
444 y: &Array1<F>,
445 ) -> Result<Box<dyn FittedPipelineEstimator<F>>, FerroError> {
446 let fitted = self.fit(x, y)?;
447 Ok(Box::new(fitted))
448 }
449}
450
451impl<F: Float + Send + Sync + 'static> FittedPipelineEstimator<F>
452 for FittedGradientBoostingRegressor<F>
453{
454 fn predict_pipeline(&self, x: &Array2<F>) -> Result<Array1<F>, FerroError> {
455 self.predict(x)
456 }
457}
458
459#[derive(Debug, Clone)]
473pub struct GradientBoostingClassifier<F> {
474 pub n_estimators: usize,
476 pub learning_rate: f64,
478 pub max_depth: Option<usize>,
480 pub min_samples_split: usize,
482 pub min_samples_leaf: usize,
484 pub subsample: f64,
486 pub loss: ClassificationLoss,
488 pub random_state: Option<u64>,
490 _marker: std::marker::PhantomData<F>,
491}
492
493impl<F: Float> GradientBoostingClassifier<F> {
494 #[must_use]
501 pub fn new() -> Self {
502 Self {
503 n_estimators: 100,
504 learning_rate: 0.1,
505 max_depth: Some(3),
506 min_samples_split: 2,
507 min_samples_leaf: 1,
508 subsample: 1.0,
509 loss: ClassificationLoss::LogLoss,
510 random_state: None,
511 _marker: std::marker::PhantomData,
512 }
513 }
514
515 #[must_use]
517 pub fn with_n_estimators(mut self, n: usize) -> Self {
518 self.n_estimators = n;
519 self
520 }
521
522 #[must_use]
524 pub fn with_learning_rate(mut self, lr: f64) -> Self {
525 self.learning_rate = lr;
526 self
527 }
528
529 #[must_use]
531 pub fn with_max_depth(mut self, d: Option<usize>) -> Self {
532 self.max_depth = d;
533 self
534 }
535
536 #[must_use]
538 pub fn with_min_samples_split(mut self, n: usize) -> Self {
539 self.min_samples_split = n;
540 self
541 }
542
543 #[must_use]
545 pub fn with_min_samples_leaf(mut self, n: usize) -> Self {
546 self.min_samples_leaf = n;
547 self
548 }
549
550 #[must_use]
552 pub fn with_subsample(mut self, ratio: f64) -> Self {
553 self.subsample = ratio;
554 self
555 }
556
557 #[must_use]
559 pub fn with_random_state(mut self, seed: u64) -> Self {
560 self.random_state = Some(seed);
561 self
562 }
563}
564
565impl<F: Float> Default for GradientBoostingClassifier<F> {
566 fn default() -> Self {
567 Self::new()
568 }
569}
570
571#[derive(Debug, Clone)]
580pub struct FittedGradientBoostingClassifier<F> {
581 classes: Vec<usize>,
583 init: Vec<F>,
585 learning_rate: F,
587 trees: Vec<Vec<Vec<Node<F>>>>,
590 n_features: usize,
592 feature_importances: Array1<F>,
594}
595
596impl<F: Float + Send + Sync + 'static> Fit<Array2<F>, Array1<usize>>
597 for GradientBoostingClassifier<F>
598{
599 type Fitted = FittedGradientBoostingClassifier<F>;
600 type Error = FerroError;
601
602 fn fit(
611 &self,
612 x: &Array2<F>,
613 y: &Array1<usize>,
614 ) -> Result<FittedGradientBoostingClassifier<F>, FerroError> {
615 let (n_samples, n_features) = x.dim();
616
617 if n_samples != y.len() {
618 return Err(FerroError::ShapeMismatch {
619 expected: vec![n_samples],
620 actual: vec![y.len()],
621 context: "y length must match number of samples in X".into(),
622 });
623 }
624 if n_samples == 0 {
625 return Err(FerroError::InsufficientSamples {
626 required: 1,
627 actual: 0,
628 context: "GradientBoostingClassifier requires at least one sample".into(),
629 });
630 }
631 if self.n_estimators == 0 {
632 return Err(FerroError::InvalidParameter {
633 name: "n_estimators".into(),
634 reason: "must be at least 1".into(),
635 });
636 }
637 if self.learning_rate <= 0.0 {
638 return Err(FerroError::InvalidParameter {
639 name: "learning_rate".into(),
640 reason: "must be positive".into(),
641 });
642 }
643 if self.subsample <= 0.0 || self.subsample > 1.0 {
644 return Err(FerroError::InvalidParameter {
645 name: "subsample".into(),
646 reason: "must be in (0, 1]".into(),
647 });
648 }
649
650 let mut classes: Vec<usize> = y.iter().copied().collect();
652 classes.sort_unstable();
653 classes.dedup();
654 let n_classes = classes.len();
655
656 if n_classes < 2 {
657 return Err(FerroError::InvalidParameter {
658 name: "y".into(),
659 reason: "need at least 2 distinct classes".into(),
660 });
661 }
662
663 let y_mapped: Vec<usize> = y
664 .iter()
665 .map(|&c| classes.iter().position(|&cl| cl == c).unwrap())
666 .collect();
667
668 let lr = F::from(self.learning_rate).unwrap();
669 let params = decision_tree::TreeParams {
670 max_depth: self.max_depth,
671 min_samples_split: self.min_samples_split,
672 min_samples_leaf: self.min_samples_leaf,
673 };
674
675 let all_features: Vec<usize> = (0..n_features).collect();
676 let subsample_size = ((self.subsample * n_samples as f64).ceil() as usize)
677 .max(1)
678 .min(n_samples);
679
680 let mut rng = if let Some(seed) = self.random_state {
681 StdRng::seed_from_u64(seed)
682 } else {
683 use rand::RngCore;
684 StdRng::seed_from_u64(rand::rng().next_u64())
685 };
686
687 if n_classes == 2 {
688 self.fit_binary(
690 x,
691 &y_mapped,
692 n_samples,
693 n_features,
694 &classes,
695 lr,
696 ¶ms,
697 &all_features,
698 subsample_size,
699 &mut rng,
700 )
701 } else {
702 self.fit_multiclass(
704 x,
705 &y_mapped,
706 n_samples,
707 n_features,
708 n_classes,
709 &classes,
710 lr,
711 ¶ms,
712 &all_features,
713 subsample_size,
714 &mut rng,
715 )
716 }
717 }
718}
719
720impl<F: Float + Send + Sync + 'static> GradientBoostingClassifier<F> {
721 #[allow(clippy::too_many_arguments)]
723 fn fit_binary(
724 &self,
725 x: &Array2<F>,
726 y_mapped: &[usize],
727 n_samples: usize,
728 n_features: usize,
729 classes: &[usize],
730 lr: F,
731 params: &decision_tree::TreeParams,
732 all_features: &[usize],
733 subsample_size: usize,
734 rng: &mut StdRng,
735 ) -> Result<FittedGradientBoostingClassifier<F>, FerroError> {
736 let pos_count = y_mapped.iter().filter(|&&c| c == 1).count();
738 let p = F::from(pos_count).unwrap() / F::from(n_samples).unwrap();
739 let eps = F::from(1e-15).unwrap();
740 let p_clipped = p.max(eps).min(F::one() - eps);
741 let init_val = (p_clipped / (F::one() - p_clipped)).ln();
742
743 let mut f_vals = Array1::from_elem(n_samples, init_val);
744 let mut trees_seq: Vec<Vec<Node<F>>> = Vec::with_capacity(self.n_estimators);
745
746 for _ in 0..self.n_estimators {
747 let probs: Vec<F> = f_vals.iter().map(|&fv| sigmoid(fv)).collect();
749
750 let mut residuals = Array1::zeros(n_samples);
752 for i in 0..n_samples {
753 let yi = F::from(y_mapped[i]).unwrap();
754 residuals[i] = yi - probs[i];
755 }
756
757 let sample_indices = if subsample_size < n_samples {
759 rand_sample_indices(rng, n_samples, subsample_size).into_vec()
760 } else {
761 (0..n_samples).collect()
762 };
763
764 let tree = build_regression_tree_with_feature_subset(
766 x,
767 &residuals,
768 &sample_indices,
769 all_features,
770 params,
771 );
772
773 for i in 0..n_samples {
775 let row = x.row(i);
776 let leaf_idx = decision_tree::traverse(&tree, &row);
777 if let Node::Leaf { value, .. } = tree[leaf_idx] {
778 f_vals[i] = f_vals[i] + lr * value;
779 }
780 }
781
782 trees_seq.push(tree);
783 }
784
785 let mut total_importances = Array1::<F>::zeros(n_features);
787 for tree_nodes in &trees_seq {
788 let tree_imp = compute_feature_importances(tree_nodes, n_features, n_samples);
789 total_importances = total_importances + tree_imp;
790 }
791 let imp_sum: F = total_importances
792 .iter()
793 .copied()
794 .fold(F::zero(), |a, b| a + b);
795 if imp_sum > F::zero() {
796 total_importances.mapv_inplace(|v| v / imp_sum);
797 }
798
799 Ok(FittedGradientBoostingClassifier {
800 classes: classes.to_vec(),
801 init: vec![init_val],
802 learning_rate: lr,
803 trees: vec![trees_seq],
804 n_features,
805 feature_importances: total_importances,
806 })
807 }
808
809 #[allow(clippy::too_many_arguments)]
811 fn fit_multiclass(
812 &self,
813 x: &Array2<F>,
814 y_mapped: &[usize],
815 n_samples: usize,
816 n_features: usize,
817 n_classes: usize,
818 classes: &[usize],
819 lr: F,
820 params: &decision_tree::TreeParams,
821 all_features: &[usize],
822 subsample_size: usize,
823 rng: &mut StdRng,
824 ) -> Result<FittedGradientBoostingClassifier<F>, FerroError> {
825 let mut class_counts = vec![0usize; n_classes];
827 for &c in y_mapped {
828 class_counts[c] += 1;
829 }
830 let n_f = F::from(n_samples).unwrap();
831 let eps = F::from(1e-15).unwrap();
832 let init_vals: Vec<F> = class_counts
833 .iter()
834 .map(|&cnt| {
835 let p = (F::from(cnt).unwrap() / n_f).max(eps);
836 p.ln()
837 })
838 .collect();
839
840 let mut f_vals: Vec<Array1<F>> = init_vals
842 .iter()
843 .map(|&init| Array1::from_elem(n_samples, init))
844 .collect();
845
846 let mut trees_per_class: Vec<Vec<Vec<Node<F>>>> = (0..n_classes)
847 .map(|_| Vec::with_capacity(self.n_estimators))
848 .collect();
849
850 for _ in 0..self.n_estimators {
851 let probs = softmax_matrix(&f_vals, n_samples, n_classes);
853
854 let sample_indices = if subsample_size < n_samples {
856 rand_sample_indices(rng, n_samples, subsample_size).into_vec()
857 } else {
858 (0..n_samples).collect()
859 };
860
861 for k in 0..n_classes {
863 let mut residuals = Array1::zeros(n_samples);
864 for i in 0..n_samples {
865 let yi_k = if y_mapped[i] == k {
866 F::one()
867 } else {
868 F::zero()
869 };
870 residuals[i] = yi_k - probs[k][i];
871 }
872
873 let tree = build_regression_tree_with_feature_subset(
874 x,
875 &residuals,
876 &sample_indices,
877 all_features,
878 params,
879 );
880
881 for (i, fv) in f_vals[k].iter_mut().enumerate() {
883 let row = x.row(i);
884 let leaf_idx = decision_tree::traverse(&tree, &row);
885 if let Node::Leaf { value, .. } = tree[leaf_idx] {
886 *fv = *fv + lr * value;
887 }
888 }
889
890 trees_per_class[k].push(tree);
891 }
892 }
893
894 let mut total_importances = Array1::<F>::zeros(n_features);
896 for class_trees in &trees_per_class {
897 for tree_nodes in class_trees {
898 let tree_imp = compute_feature_importances(tree_nodes, n_features, n_samples);
899 total_importances = total_importances + tree_imp;
900 }
901 }
902 let imp_sum: F = total_importances
903 .iter()
904 .copied()
905 .fold(F::zero(), |a, b| a + b);
906 if imp_sum > F::zero() {
907 total_importances.mapv_inplace(|v| v / imp_sum);
908 }
909
910 Ok(FittedGradientBoostingClassifier {
911 classes: classes.to_vec(),
912 init: init_vals,
913 learning_rate: lr,
914 trees: trees_per_class,
915 n_features,
916 feature_importances: total_importances,
917 })
918 }
919}
920
921impl<F: Float + Send + Sync + 'static> FittedGradientBoostingClassifier<F> {
922 #[must_use]
924 pub fn init(&self) -> &[F] {
925 &self.init
926 }
927
928 #[must_use]
930 pub fn learning_rate(&self) -> F {
931 self.learning_rate
932 }
933
934 #[must_use]
939 pub fn trees(&self) -> &[Vec<Vec<Node<F>>>] {
940 &self.trees
941 }
942
943 #[must_use]
945 pub fn n_features(&self) -> usize {
946 self.n_features
947 }
948}
949
950impl<F: Float + Send + Sync + 'static> Predict<Array2<F>> for FittedGradientBoostingClassifier<F> {
951 type Output = Array1<usize>;
952 type Error = FerroError;
953
954 fn predict(&self, x: &Array2<F>) -> Result<Array1<usize>, FerroError> {
961 if x.ncols() != self.n_features {
962 return Err(FerroError::ShapeMismatch {
963 expected: vec![self.n_features],
964 actual: vec![x.ncols()],
965 context: "number of features must match fitted model".into(),
966 });
967 }
968
969 let n_samples = x.nrows();
970 let n_classes = self.classes.len();
971
972 if n_classes == 2 {
973 let init = self.init[0];
975 let mut predictions = Array1::zeros(n_samples);
976 for i in 0..n_samples {
977 let row = x.row(i);
978 let mut f_val = init;
979 for tree_nodes in &self.trees[0] {
980 let leaf_idx = decision_tree::traverse(tree_nodes, &row);
981 if let Node::Leaf { value, .. } = tree_nodes[leaf_idx] {
982 f_val = f_val + self.learning_rate * value;
983 }
984 }
985 let prob = sigmoid(f_val);
986 let class_idx = if prob >= F::from(0.5).unwrap() { 1 } else { 0 };
987 predictions[i] = self.classes[class_idx];
988 }
989 Ok(predictions)
990 } else {
991 let mut predictions = Array1::zeros(n_samples);
993 for i in 0..n_samples {
994 let row = x.row(i);
995 let mut scores = Vec::with_capacity(n_classes);
996 for k in 0..n_classes {
997 let mut f_val = self.init[k];
998 for tree_nodes in &self.trees[k] {
999 let leaf_idx = decision_tree::traverse(tree_nodes, &row);
1000 if let Node::Leaf { value, .. } = tree_nodes[leaf_idx] {
1001 f_val = f_val + self.learning_rate * value;
1002 }
1003 }
1004 scores.push(f_val);
1005 }
1006 let best_k = scores
1007 .iter()
1008 .enumerate()
1009 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
1010 .map_or(0, |(k, _)| k);
1011 predictions[i] = self.classes[best_k];
1012 }
1013 Ok(predictions)
1014 }
1015 }
1016}
1017
1018impl<F: Float + Send + Sync + 'static> HasFeatureImportances<F>
1019 for FittedGradientBoostingClassifier<F>
1020{
1021 fn feature_importances(&self) -> &Array1<F> {
1022 &self.feature_importances
1023 }
1024}
1025
1026impl<F: Float + Send + Sync + 'static> HasClasses for FittedGradientBoostingClassifier<F> {
1027 fn classes(&self) -> &[usize] {
1028 &self.classes
1029 }
1030
1031 fn n_classes(&self) -> usize {
1032 self.classes.len()
1033 }
1034}
1035
1036impl<F: Float + ToPrimitive + FromPrimitive + Send + Sync + 'static> PipelineEstimator<F>
1038 for GradientBoostingClassifier<F>
1039{
1040 fn fit_pipeline(
1041 &self,
1042 x: &Array2<F>,
1043 y: &Array1<F>,
1044 ) -> Result<Box<dyn FittedPipelineEstimator<F>>, FerroError> {
1045 let y_usize: Array1<usize> = y.mapv(|v| v.to_usize().unwrap_or(0));
1046 let fitted = self.fit(x, &y_usize)?;
1047 Ok(Box::new(FittedGbcPipelineAdapter(fitted)))
1048 }
1049}
1050
1051struct FittedGbcPipelineAdapter<F: Float + Send + Sync + 'static>(
1053 FittedGradientBoostingClassifier<F>,
1054);
1055
1056impl<F: Float + ToPrimitive + FromPrimitive + Send + Sync + 'static> FittedPipelineEstimator<F>
1057 for FittedGbcPipelineAdapter<F>
1058{
1059 fn predict_pipeline(&self, x: &Array2<F>) -> Result<Array1<F>, FerroError> {
1060 let preds = self.0.predict(x)?;
1061 Ok(preds.mapv(|v| F::from_usize(v).unwrap_or_else(F::nan)))
1062 }
1063}
1064
1065fn sigmoid<F: Float>(x: F) -> F {
1071 F::one() / (F::one() + (-x).exp())
1072}
1073
1074fn softmax_matrix<F: Float>(
1078 f_vals: &[Array1<F>],
1079 n_samples: usize,
1080 n_classes: usize,
1081) -> Vec<Vec<F>> {
1082 let mut probs: Vec<Vec<F>> = vec![vec![F::zero(); n_samples]; n_classes];
1083
1084 for i in 0..n_samples {
1085 let max_val = (0..n_classes)
1087 .map(|k| f_vals[k][i])
1088 .fold(F::neg_infinity(), |a, b| if b > a { b } else { a });
1089
1090 let mut sum = F::zero();
1091 let mut exps = vec![F::zero(); n_classes];
1092 for k in 0..n_classes {
1093 exps[k] = (f_vals[k][i] - max_val).exp();
1094 sum = sum + exps[k];
1095 }
1096
1097 let eps = F::from(1e-15).unwrap();
1098 if sum < eps {
1099 sum = eps;
1100 }
1101
1102 for k in 0..n_classes {
1103 probs[k][i] = exps[k] / sum;
1104 }
1105 }
1106
1107 probs
1108}
1109
1110fn median_f<F: Float>(arr: &Array1<F>) -> F {
1112 let mut sorted: Vec<F> = arr.iter().copied().collect();
1113 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
1114 let n = sorted.len();
1115 if n == 0 {
1116 return F::zero();
1117 }
1118 if n % 2 == 1 {
1119 sorted[n / 2]
1120 } else {
1121 (sorted[n / 2 - 1] + sorted[n / 2]) / F::from(2.0).unwrap()
1122 }
1123}
1124
1125fn quantile_f<F: Float>(vals: &[F], alpha: f64) -> F {
1127 if vals.is_empty() {
1128 return F::zero();
1129 }
1130 let mut sorted: Vec<F> = vals.to_vec();
1131 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
1132 let idx = ((sorted.len() as f64 - 1.0) * alpha).round() as usize;
1133 let idx = idx.min(sorted.len() - 1);
1134 sorted[idx]
1135}
1136
1137fn compute_regression_residuals<F: Float>(
1139 y: &Array1<F>,
1140 f_vals: &Array1<F>,
1141 loss: RegressionLoss,
1142 huber_alpha: f64,
1143) -> Array1<F> {
1144 let n = y.len();
1145 match loss {
1146 RegressionLoss::LeastSquares => {
1147 let mut residuals = Array1::zeros(n);
1149 for i in 0..n {
1150 residuals[i] = y[i] - f_vals[i];
1151 }
1152 residuals
1153 }
1154 RegressionLoss::Lad => {
1155 let mut residuals = Array1::zeros(n);
1157 for i in 0..n {
1158 let diff = y[i] - f_vals[i];
1159 residuals[i] = if diff > F::zero() {
1160 F::one()
1161 } else if diff < F::zero() {
1162 -F::one()
1163 } else {
1164 F::zero()
1165 };
1166 }
1167 residuals
1168 }
1169 RegressionLoss::Huber => {
1170 let raw_residuals: Vec<F> = (0..n).map(|i| (y[i] - f_vals[i]).abs()).collect();
1172 let delta = quantile_f(&raw_residuals, huber_alpha);
1173
1174 let mut residuals = Array1::zeros(n);
1175 for i in 0..n {
1176 let diff = y[i] - f_vals[i];
1177 if diff.abs() <= delta {
1178 residuals[i] = diff;
1179 } else if diff > F::zero() {
1180 residuals[i] = delta;
1181 } else {
1182 residuals[i] = -delta;
1183 }
1184 }
1185 residuals
1186 }
1187 }
1188}
1189
1190#[cfg(test)]
1195mod tests {
1196 use super::*;
1197 use approx::assert_relative_eq;
1198 use ndarray::array;
1199
1200 #[test]
1203 fn test_gbr_simple_least_squares() {
1204 let x =
1205 Array2::from_shape_vec((8, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
1206 let y = array![1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 5.0, 5.0];
1207
1208 let model = GradientBoostingRegressor::<f64>::new()
1209 .with_n_estimators(50)
1210 .with_learning_rate(0.1)
1211 .with_random_state(42);
1212 let fitted = model.fit(&x, &y).unwrap();
1213 let preds = fitted.predict(&x).unwrap();
1214
1215 assert_eq!(preds.len(), 8);
1216 for i in 0..4 {
1217 assert!(preds[i] < 3.0, "Expected ~1.0, got {}", preds[i]);
1218 }
1219 for i in 4..8 {
1220 assert!(preds[i] > 3.0, "Expected ~5.0, got {}", preds[i]);
1221 }
1222 }
1223
1224 #[test]
1225 fn test_gbr_lad_loss() {
1226 let x =
1227 Array2::from_shape_vec((8, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
1228 let y = array![1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 5.0, 5.0];
1229
1230 let model = GradientBoostingRegressor::<f64>::new()
1231 .with_n_estimators(50)
1232 .with_loss(RegressionLoss::Lad)
1233 .with_random_state(42);
1234 let fitted = model.fit(&x, &y).unwrap();
1235 let preds = fitted.predict(&x).unwrap();
1236
1237 assert_eq!(preds.len(), 8);
1238 for i in 0..4 {
1240 assert!(preds[i] < 3.5, "LAD expected <3.5, got {}", preds[i]);
1241 }
1242 for i in 4..8 {
1243 assert!(preds[i] > 2.5, "LAD expected >2.5, got {}", preds[i]);
1244 }
1245 }
1246
1247 #[test]
1248 fn test_gbr_huber_loss() {
1249 let x =
1250 Array2::from_shape_vec((8, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
1251 let y = array![1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 5.0, 5.0];
1252
1253 let model = GradientBoostingRegressor::<f64>::new()
1254 .with_n_estimators(50)
1255 .with_loss(RegressionLoss::Huber)
1256 .with_huber_alpha(0.9)
1257 .with_random_state(42);
1258 let fitted = model.fit(&x, &y).unwrap();
1259 let preds = fitted.predict(&x).unwrap();
1260
1261 assert_eq!(preds.len(), 8);
1262 }
1263
1264 #[test]
1265 fn test_gbr_reproducibility() {
1266 let x =
1267 Array2::from_shape_vec((8, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
1268 let y = array![1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 5.0, 5.0];
1269
1270 let model = GradientBoostingRegressor::<f64>::new()
1271 .with_n_estimators(20)
1272 .with_random_state(123);
1273
1274 let fitted1 = model.fit(&x, &y).unwrap();
1275 let fitted2 = model.fit(&x, &y).unwrap();
1276
1277 let preds1 = fitted1.predict(&x).unwrap();
1278 let preds2 = fitted2.predict(&x).unwrap();
1279
1280 for (p1, p2) in preds1.iter().zip(preds2.iter()) {
1281 assert_relative_eq!(*p1, *p2, epsilon = 1e-10);
1282 }
1283 }
1284
1285 #[test]
1286 fn test_gbr_feature_importances() {
1287 let x = Array2::from_shape_vec(
1288 (10, 3),
1289 vec![
1290 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 0.0, 0.0, 4.0, 0.0, 0.0, 5.0, 0.0, 0.0, 6.0,
1291 0.0, 0.0, 7.0, 0.0, 0.0, 8.0, 0.0, 0.0, 9.0, 0.0, 0.0, 10.0, 0.0, 0.0,
1292 ],
1293 )
1294 .unwrap();
1295 let y = array![1.0, 1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 5.0, 5.0, 5.0];
1296
1297 let model = GradientBoostingRegressor::<f64>::new()
1298 .with_n_estimators(20)
1299 .with_random_state(42);
1300 let fitted = model.fit(&x, &y).unwrap();
1301 let importances = fitted.feature_importances();
1302
1303 assert_eq!(importances.len(), 3);
1304 assert!(importances[0] > importances[1]);
1306 assert!(importances[0] > importances[2]);
1307 }
1308
1309 #[test]
1310 fn test_gbr_shape_mismatch_fit() {
1311 let x = Array2::from_shape_vec((3, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1312 let y = array![1.0, 2.0];
1313
1314 let model = GradientBoostingRegressor::<f64>::new().with_n_estimators(5);
1315 assert!(model.fit(&x, &y).is_err());
1316 }
1317
1318 #[test]
1319 fn test_gbr_shape_mismatch_predict() {
1320 let x =
1321 Array2::from_shape_vec((4, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
1322 let y = array![1.0, 2.0, 3.0, 4.0];
1323
1324 let model = GradientBoostingRegressor::<f64>::new()
1325 .with_n_estimators(5)
1326 .with_random_state(0);
1327 let fitted = model.fit(&x, &y).unwrap();
1328
1329 let x_bad = Array2::from_shape_vec((2, 3), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1330 assert!(fitted.predict(&x_bad).is_err());
1331 }
1332
1333 #[test]
1334 fn test_gbr_empty_data() {
1335 let x = Array2::<f64>::zeros((0, 2));
1336 let y = Array1::<f64>::zeros(0);
1337
1338 let model = GradientBoostingRegressor::<f64>::new().with_n_estimators(5);
1339 assert!(model.fit(&x, &y).is_err());
1340 }
1341
1342 #[test]
1343 fn test_gbr_zero_estimators() {
1344 let x = Array2::from_shape_vec((4, 1), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
1345 let y = array![1.0, 2.0, 3.0, 4.0];
1346
1347 let model = GradientBoostingRegressor::<f64>::new().with_n_estimators(0);
1348 assert!(model.fit(&x, &y).is_err());
1349 }
1350
1351 #[test]
1352 fn test_gbr_invalid_learning_rate() {
1353 let x = Array2::from_shape_vec((4, 1), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
1354 let y = array![1.0, 2.0, 3.0, 4.0];
1355
1356 let model = GradientBoostingRegressor::<f64>::new()
1357 .with_n_estimators(5)
1358 .with_learning_rate(0.0);
1359 assert!(model.fit(&x, &y).is_err());
1360 }
1361
1362 #[test]
1363 fn test_gbr_invalid_subsample() {
1364 let x = Array2::from_shape_vec((4, 1), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
1365 let y = array![1.0, 2.0, 3.0, 4.0];
1366
1367 let model = GradientBoostingRegressor::<f64>::new()
1368 .with_n_estimators(5)
1369 .with_subsample(0.0);
1370 assert!(model.fit(&x, &y).is_err());
1371
1372 let model2 = GradientBoostingRegressor::<f64>::new()
1373 .with_n_estimators(5)
1374 .with_subsample(1.5);
1375 assert!(model2.fit(&x, &y).is_err());
1376 }
1377
1378 #[test]
1379 fn test_gbr_subsample() {
1380 let x =
1381 Array2::from_shape_vec((8, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
1382 let y = array![1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 5.0, 5.0];
1383
1384 let model = GradientBoostingRegressor::<f64>::new()
1385 .with_n_estimators(50)
1386 .with_subsample(0.5)
1387 .with_random_state(42);
1388 let fitted = model.fit(&x, &y).unwrap();
1389 let preds = fitted.predict(&x).unwrap();
1390
1391 assert_eq!(preds.len(), 8);
1392 }
1393
1394 #[test]
1395 fn test_gbr_pipeline_integration() {
1396 let x = Array2::from_shape_vec((4, 1), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
1397 let y = array![1.0, 2.0, 3.0, 4.0];
1398
1399 let model = GradientBoostingRegressor::<f64>::new()
1400 .with_n_estimators(10)
1401 .with_random_state(42);
1402 let fitted = model.fit_pipeline(&x, &y).unwrap();
1403 let preds = fitted.predict_pipeline(&x).unwrap();
1404 assert_eq!(preds.len(), 4);
1405 }
1406
1407 #[test]
1408 fn test_gbr_f32_support() {
1409 let x = Array2::from_shape_vec((4, 1), vec![1.0f32, 2.0, 3.0, 4.0]).unwrap();
1410 let y = Array1::from_vec(vec![1.0f32, 2.0, 3.0, 4.0]);
1411
1412 let model = GradientBoostingRegressor::<f32>::new()
1413 .with_n_estimators(10)
1414 .with_random_state(42);
1415 let fitted = model.fit(&x, &y).unwrap();
1416 let preds = fitted.predict(&x).unwrap();
1417 assert_eq!(preds.len(), 4);
1418 }
1419
1420 #[test]
1421 fn test_gbr_max_depth() {
1422 let x =
1423 Array2::from_shape_vec((8, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
1424 let y = array![1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 5.0, 5.0];
1425
1426 let model = GradientBoostingRegressor::<f64>::new()
1427 .with_n_estimators(20)
1428 .with_max_depth(Some(1))
1429 .with_random_state(42);
1430 let fitted = model.fit(&x, &y).unwrap();
1431 let preds = fitted.predict(&x).unwrap();
1432 assert_eq!(preds.len(), 8);
1433 }
1434
1435 #[test]
1436 fn test_gbr_default_trait() {
1437 let model = GradientBoostingRegressor::<f64>::default();
1438 assert_eq!(model.n_estimators, 100);
1439 assert!((model.learning_rate - 0.1).abs() < 1e-10);
1440 }
1441
1442 #[test]
1445 fn test_gbc_binary_simple() {
1446 let x = Array2::from_shape_vec(
1447 (8, 2),
1448 vec![
1449 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0, 8.0, 9.0,
1450 ],
1451 )
1452 .unwrap();
1453 let y = array![0, 0, 0, 0, 1, 1, 1, 1];
1454
1455 let model = GradientBoostingClassifier::<f64>::new()
1456 .with_n_estimators(50)
1457 .with_learning_rate(0.1)
1458 .with_random_state(42);
1459 let fitted = model.fit(&x, &y).unwrap();
1460 let preds = fitted.predict(&x).unwrap();
1461
1462 assert_eq!(preds.len(), 8);
1463 for i in 0..4 {
1464 assert_eq!(preds[i], 0, "Expected 0 at index {}, got {}", i, preds[i]);
1465 }
1466 for i in 4..8 {
1467 assert_eq!(preds[i], 1, "Expected 1 at index {}, got {}", i, preds[i]);
1468 }
1469 }
1470
1471 #[test]
1472 fn test_gbc_multiclass() {
1473 let x = Array2::from_shape_vec((9, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
1474 .unwrap();
1475 let y = array![0, 0, 0, 1, 1, 1, 2, 2, 2];
1476
1477 let model = GradientBoostingClassifier::<f64>::new()
1478 .with_n_estimators(50)
1479 .with_learning_rate(0.1)
1480 .with_random_state(42);
1481 let fitted = model.fit(&x, &y).unwrap();
1482 let preds = fitted.predict(&x).unwrap();
1483
1484 assert_eq!(preds.len(), 9);
1485 let correct = preds.iter().zip(y.iter()).filter(|(p, t)| p == t).count();
1487 assert!(
1488 correct >= 6,
1489 "Expected at least 6/9 correct, got {correct}/9"
1490 );
1491 }
1492
1493 #[test]
1494 fn test_gbc_has_classes() {
1495 let x = Array2::from_shape_vec((6, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1496 let y = array![0, 1, 2, 0, 1, 2];
1497
1498 let model = GradientBoostingClassifier::<f64>::new()
1499 .with_n_estimators(5)
1500 .with_random_state(0);
1501 let fitted = model.fit(&x, &y).unwrap();
1502
1503 assert_eq!(fitted.classes(), &[0, 1, 2]);
1504 assert_eq!(fitted.n_classes(), 3);
1505 }
1506
1507 #[test]
1508 fn test_gbc_reproducibility() {
1509 let x = Array2::from_shape_vec(
1510 (8, 2),
1511 vec![
1512 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0, 8.0, 9.0,
1513 ],
1514 )
1515 .unwrap();
1516 let y = array![0, 0, 0, 0, 1, 1, 1, 1];
1517
1518 let model = GradientBoostingClassifier::<f64>::new()
1519 .with_n_estimators(10)
1520 .with_random_state(42);
1521
1522 let fitted1 = model.fit(&x, &y).unwrap();
1523 let fitted2 = model.fit(&x, &y).unwrap();
1524
1525 let preds1 = fitted1.predict(&x).unwrap();
1526 let preds2 = fitted2.predict(&x).unwrap();
1527 assert_eq!(preds1, preds2);
1528 }
1529
1530 #[test]
1531 fn test_gbc_feature_importances() {
1532 let x = Array2::from_shape_vec(
1533 (10, 3),
1534 vec![
1535 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 0.0, 0.0, 4.0, 0.0, 0.0, 5.0, 0.0, 0.0, 6.0,
1536 0.0, 0.0, 7.0, 0.0, 0.0, 8.0, 0.0, 0.0, 9.0, 0.0, 0.0, 10.0, 0.0, 0.0,
1537 ],
1538 )
1539 .unwrap();
1540 let y = array![0, 0, 0, 0, 0, 1, 1, 1, 1, 1];
1541
1542 let model = GradientBoostingClassifier::<f64>::new()
1543 .with_n_estimators(20)
1544 .with_random_state(42);
1545 let fitted = model.fit(&x, &y).unwrap();
1546 let importances = fitted.feature_importances();
1547
1548 assert_eq!(importances.len(), 3);
1549 assert!(importances[0] > importances[1]);
1550 assert!(importances[0] > importances[2]);
1551 }
1552
1553 #[test]
1554 fn test_gbc_shape_mismatch_fit() {
1555 let x = Array2::from_shape_vec((3, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1556 let y = array![0, 1];
1557
1558 let model = GradientBoostingClassifier::<f64>::new().with_n_estimators(5);
1559 assert!(model.fit(&x, &y).is_err());
1560 }
1561
1562 #[test]
1563 fn test_gbc_shape_mismatch_predict() {
1564 let x =
1565 Array2::from_shape_vec((4, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
1566 let y = array![0, 0, 1, 1];
1567
1568 let model = GradientBoostingClassifier::<f64>::new()
1569 .with_n_estimators(5)
1570 .with_random_state(0);
1571 let fitted = model.fit(&x, &y).unwrap();
1572
1573 let x_bad = Array2::from_shape_vec((2, 3), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1574 assert!(fitted.predict(&x_bad).is_err());
1575 }
1576
1577 #[test]
1578 fn test_gbc_empty_data() {
1579 let x = Array2::<f64>::zeros((0, 2));
1580 let y = Array1::<usize>::zeros(0);
1581
1582 let model = GradientBoostingClassifier::<f64>::new().with_n_estimators(5);
1583 assert!(model.fit(&x, &y).is_err());
1584 }
1585
1586 #[test]
1587 fn test_gbc_single_class() {
1588 let x = Array2::from_shape_vec((3, 1), vec![1.0, 2.0, 3.0]).unwrap();
1589 let y = array![0, 0, 0];
1590
1591 let model = GradientBoostingClassifier::<f64>::new().with_n_estimators(5);
1592 assert!(model.fit(&x, &y).is_err());
1593 }
1594
1595 #[test]
1596 fn test_gbc_zero_estimators() {
1597 let x = Array2::from_shape_vec((4, 1), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
1598 let y = array![0, 0, 1, 1];
1599
1600 let model = GradientBoostingClassifier::<f64>::new().with_n_estimators(0);
1601 assert!(model.fit(&x, &y).is_err());
1602 }
1603
1604 #[test]
1605 fn test_gbc_pipeline_integration() {
1606 let x = Array2::from_shape_vec((6, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1607 let y = Array1::from_vec(vec![0.0, 0.0, 0.0, 1.0, 1.0, 1.0]);
1608
1609 let model = GradientBoostingClassifier::<f64>::new()
1610 .with_n_estimators(10)
1611 .with_random_state(42);
1612 let fitted = model.fit_pipeline(&x, &y).unwrap();
1613 let preds = fitted.predict_pipeline(&x).unwrap();
1614 assert_eq!(preds.len(), 6);
1615 }
1616
1617 #[test]
1618 fn test_gbc_f32_support() {
1619 let x = Array2::from_shape_vec((6, 1), vec![1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1620 let y = array![0, 0, 0, 1, 1, 1];
1621
1622 let model = GradientBoostingClassifier::<f32>::new()
1623 .with_n_estimators(10)
1624 .with_random_state(42);
1625 let fitted = model.fit(&x, &y).unwrap();
1626 let preds = fitted.predict(&x).unwrap();
1627 assert_eq!(preds.len(), 6);
1628 }
1629
1630 #[test]
1631 fn test_gbc_subsample() {
1632 let x = Array2::from_shape_vec(
1633 (8, 2),
1634 vec![
1635 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0, 8.0, 9.0,
1636 ],
1637 )
1638 .unwrap();
1639 let y = array![0, 0, 0, 0, 1, 1, 1, 1];
1640
1641 let model = GradientBoostingClassifier::<f64>::new()
1642 .with_n_estimators(20)
1643 .with_subsample(0.5)
1644 .with_random_state(42);
1645 let fitted = model.fit(&x, &y).unwrap();
1646 let preds = fitted.predict(&x).unwrap();
1647 assert_eq!(preds.len(), 8);
1648 }
1649
1650 #[test]
1651 fn test_gbc_default_trait() {
1652 let model = GradientBoostingClassifier::<f64>::default();
1653 assert_eq!(model.n_estimators, 100);
1654 assert!((model.learning_rate - 0.1).abs() < 1e-10);
1655 }
1656
1657 #[test]
1658 fn test_gbc_non_contiguous_labels() {
1659 let x = Array2::from_shape_vec((6, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1660 let y = array![10, 10, 10, 20, 20, 20];
1661
1662 let model = GradientBoostingClassifier::<f64>::new()
1663 .with_n_estimators(20)
1664 .with_random_state(42);
1665 let fitted = model.fit(&x, &y).unwrap();
1666 let preds = fitted.predict(&x).unwrap();
1667
1668 assert_eq!(preds.len(), 6);
1669 for &p in &preds {
1670 assert!(p == 10 || p == 20);
1671 }
1672 }
1673
1674 #[test]
1677 fn test_sigmoid() {
1678 assert_relative_eq!(sigmoid(0.0f64), 0.5, epsilon = 1e-10);
1679 assert!(sigmoid(10.0f64) > 0.999);
1680 assert!(sigmoid(-10.0f64) < 0.001);
1681 }
1682
1683 #[test]
1684 fn test_median_f_odd() {
1685 let arr = array![3.0, 1.0, 2.0];
1686 assert_relative_eq!(median_f(&arr), 2.0, epsilon = 1e-10);
1687 }
1688
1689 #[test]
1690 fn test_median_f_even() {
1691 let arr = array![4.0, 1.0, 3.0, 2.0];
1692 assert_relative_eq!(median_f(&arr), 2.5, epsilon = 1e-10);
1693 }
1694
1695 #[test]
1696 fn test_median_f_empty() {
1697 let arr = Array1::<f64>::zeros(0);
1698 assert_relative_eq!(median_f(&arr), 0.0, epsilon = 1e-10);
1699 }
1700
1701 #[test]
1702 fn test_quantile_f() {
1703 let vals = vec![1.0, 2.0, 3.0, 4.0, 5.0];
1704 let q90 = quantile_f(&vals, 0.9);
1705 assert!((4.0..=5.0).contains(&q90));
1706 }
1707
1708 #[test]
1709 fn test_regression_residuals_least_squares() {
1710 let y = array![1.0, 2.0, 3.0];
1711 let f = array![0.5, 2.5, 2.0];
1712 let r = compute_regression_residuals(&y, &f, RegressionLoss::LeastSquares, 0.9);
1713 assert_relative_eq!(r[0], 0.5, epsilon = 1e-10);
1714 assert_relative_eq!(r[1], -0.5, epsilon = 1e-10);
1715 assert_relative_eq!(r[2], 1.0, epsilon = 1e-10);
1716 }
1717
1718 #[test]
1719 fn test_regression_residuals_lad() {
1720 let y = array![1.0, 2.0, 3.0];
1721 let f = array![0.5, 2.5, 3.0];
1722 let r = compute_regression_residuals(&y, &f, RegressionLoss::Lad, 0.9);
1723 assert_relative_eq!(r[0], 1.0, epsilon = 1e-10);
1724 assert_relative_eq!(r[1], -1.0, epsilon = 1e-10);
1725 assert_relative_eq!(r[2], 0.0, epsilon = 1e-10);
1726 }
1727
1728 #[test]
1729 fn test_regression_residuals_huber() {
1730 let y = array![1.0, 2.0, 10.0, 3.0, 4.0];
1731 let f = array![1.5, 2.5, 2.0, 3.5, 4.5];
1732 let r = compute_regression_residuals(&y, &f, RegressionLoss::Huber, 0.9);
1736 assert_relative_eq!(r[0], -0.5, epsilon = 1e-10);
1738 assert_relative_eq!(r[1], -0.5, epsilon = 1e-10);
1739 assert_relative_eq!(r[2], 8.0, epsilon = 1e-10);
1740 assert_relative_eq!(r[3], -0.5, epsilon = 1e-10);
1741 assert_relative_eq!(r[4], -0.5, epsilon = 1e-10);
1742
1743 let r2 = compute_regression_residuals(&y, &f, RegressionLoss::Huber, 0.1);
1747 assert_relative_eq!(r2[0], -0.5, epsilon = 1e-10);
1748 assert_relative_eq!(r2[2], 0.5, epsilon = 1e-10);
1750 }
1751
1752 #[test]
1753 fn test_gbc_multiclass_4_classes() {
1754 let x = Array2::from_shape_vec(
1755 (12, 1),
1756 vec![
1757 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0,
1758 ],
1759 )
1760 .unwrap();
1761 let y = array![0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3];
1762
1763 let model = GradientBoostingClassifier::<f64>::new()
1764 .with_n_estimators(50)
1765 .with_random_state(42);
1766 let fitted = model.fit(&x, &y).unwrap();
1767 let preds = fitted.predict(&x).unwrap();
1768
1769 assert_eq!(preds.len(), 12);
1770 assert_eq!(fitted.n_classes(), 4);
1771 }
1772
1773 #[test]
1774 fn test_gbc_invalid_learning_rate() {
1775 let x = Array2::from_shape_vec((4, 1), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
1776 let y = array![0, 0, 1, 1];
1777
1778 let model = GradientBoostingClassifier::<f64>::new()
1779 .with_n_estimators(5)
1780 .with_learning_rate(-0.1);
1781 assert!(model.fit(&x, &y).is_err());
1782 }
1783}