1use ferrolearn_core::error::FerroError;
32use ferrolearn_core::introspection::HasClasses;
33use ferrolearn_core::pipeline::{FittedPipelineEstimator, PipelineEstimator};
34use ferrolearn_core::traits::{Fit, Predict};
35use ndarray::{Array1, Array2};
36use num_traits::{Float, FromPrimitive, ToPrimitive};
37use serde::{Deserialize, Serialize};
38
39use crate::decision_tree::{
40 ClassificationCriterion, DecisionTreeClassifier, DecisionTreeRegressor,
41 FittedDecisionTreeClassifier, FittedDecisionTreeRegressor,
42};
43
44#[derive(Debug, Clone, Serialize, Deserialize)]
61pub struct VotingClassifier<F> {
62 pub max_depths: Vec<Option<usize>>,
65 pub min_samples_split: usize,
67 pub min_samples_leaf: usize,
69 pub criterion: ClassificationCriterion,
71 _marker: std::marker::PhantomData<F>,
72}
73
74impl<F: Float> VotingClassifier<F> {
75 #[must_use]
80 pub fn new() -> Self {
81 Self {
82 max_depths: vec![Some(2), Some(4), Some(6), None],
83 min_samples_split: 2,
84 min_samples_leaf: 1,
85 criterion: ClassificationCriterion::Gini,
86 _marker: std::marker::PhantomData,
87 }
88 }
89
90 #[must_use]
94 pub fn with_max_depths(mut self, max_depths: Vec<Option<usize>>) -> Self {
95 self.max_depths = max_depths;
96 self
97 }
98
99 #[must_use]
101 pub fn with_min_samples_split(mut self, min_samples_split: usize) -> Self {
102 self.min_samples_split = min_samples_split;
103 self
104 }
105
106 #[must_use]
108 pub fn with_min_samples_leaf(mut self, min_samples_leaf: usize) -> Self {
109 self.min_samples_leaf = min_samples_leaf;
110 self
111 }
112
113 #[must_use]
115 pub fn with_criterion(mut self, criterion: ClassificationCriterion) -> Self {
116 self.criterion = criterion;
117 self
118 }
119}
120
121impl<F: Float> Default for VotingClassifier<F> {
122 fn default() -> Self {
123 Self::new()
124 }
125}
126
127#[derive(Debug, Clone)]
136pub struct FittedVotingClassifier<F> {
137 trees: Vec<FittedDecisionTreeClassifier<F>>,
139 classes: Vec<usize>,
141}
142
143impl<F: Float + Send + Sync + 'static> FittedVotingClassifier<F> {
144 #[must_use]
146 pub fn n_estimators(&self) -> usize {
147 self.trees.len()
148 }
149
150 pub fn score(&self, x: &Array2<F>, y: &Array1<usize>) -> Result<F, FerroError> {
158 if x.nrows() != y.len() {
159 return Err(FerroError::ShapeMismatch {
160 expected: vec![x.nrows()],
161 actual: vec![y.len()],
162 context: "y length must match number of samples in X".into(),
163 });
164 }
165 let preds = self.predict(x)?;
166 Ok(crate::mean_accuracy(&preds, y))
167 }
168
169 pub fn predict_proba(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
179 let n_samples = x.nrows();
180 let n_classes = self.classes.len();
181 let n_trees_f = F::from(self.trees.len()).unwrap();
182 let mut proba = Array2::<F>::zeros((n_samples, n_classes));
183
184 for tree in &self.trees {
185 let tree_proba = tree.predict_proba(x)?;
186 for i in 0..n_samples {
187 for j in 0..n_classes {
188 proba[[i, j]] = proba[[i, j]] + tree_proba[[i, j]];
189 }
190 }
191 }
192 for i in 0..n_samples {
193 for j in 0..n_classes {
194 proba[[i, j]] = proba[[i, j]] / n_trees_f;
195 }
196 }
197 Ok(proba)
198 }
199
200 pub fn predict_log_proba(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
207 let proba = self.predict_proba(x)?;
208 Ok(crate::log_proba(&proba))
209 }
210}
211
212impl<F: Float + Send + Sync + 'static> Fit<Array2<F>, Array1<usize>> for VotingClassifier<F> {
213 type Fitted = FittedVotingClassifier<F>;
214 type Error = FerroError;
215
216 fn fit(
225 &self,
226 x: &Array2<F>,
227 y: &Array1<usize>,
228 ) -> Result<FittedVotingClassifier<F>, FerroError> {
229 let n_samples = x.nrows();
230
231 if n_samples != y.len() {
232 return Err(FerroError::ShapeMismatch {
233 expected: vec![n_samples],
234 actual: vec![y.len()],
235 context: "y length must match number of samples in X".into(),
236 });
237 }
238 if n_samples == 0 {
239 return Err(FerroError::InsufficientSamples {
240 required: 1,
241 actual: 0,
242 context: "VotingClassifier requires at least one sample".into(),
243 });
244 }
245 if self.max_depths.is_empty() {
246 return Err(FerroError::InvalidParameter {
247 name: "max_depths".into(),
248 reason: "must contain at least one entry".into(),
249 });
250 }
251
252 let mut classes: Vec<usize> = y.iter().copied().collect();
254 classes.sort_unstable();
255 classes.dedup();
256
257 let mut trees = Vec::with_capacity(self.max_depths.len());
258 for &max_depth in &self.max_depths {
259 let tree = DecisionTreeClassifier::<F>::new()
260 .with_max_depth(max_depth)
261 .with_min_samples_split(self.min_samples_split)
262 .with_min_samples_leaf(self.min_samples_leaf)
263 .with_criterion(self.criterion);
264 let fitted = tree.fit(x, y)?;
265 trees.push(fitted);
266 }
267
268 Ok(FittedVotingClassifier { trees, classes })
269 }
270}
271
272impl<F: Float + Send + Sync + 'static> Predict<Array2<F>> for FittedVotingClassifier<F> {
273 type Output = Array1<usize>;
274 type Error = FerroError;
275
276 fn predict(&self, x: &Array2<F>) -> Result<Array1<usize>, FerroError> {
283 let n_samples = x.nrows();
284 let n_classes = self.classes.len();
285
286 let all_preds: Vec<Array1<usize>> = self
288 .trees
289 .iter()
290 .map(|tree| tree.predict(x))
291 .collect::<Result<Vec<_>, _>>()?;
292
293 let mut predictions = Array1::zeros(n_samples);
294 for i in 0..n_samples {
295 let mut votes = vec![0usize; n_classes];
296 for tree_preds in &all_preds {
297 let pred = tree_preds[i];
298 if let Some(class_idx) = self.classes.iter().position(|&c| c == pred) {
299 votes[class_idx] += 1;
300 }
301 }
302 let winner = votes
303 .iter()
304 .enumerate()
305 .max_by_key(|&(_, &count)| count)
306 .map_or(0, |(idx, _)| idx);
307 predictions[i] = self.classes[winner];
308 }
309
310 Ok(predictions)
311 }
312}
313
314impl<F: Float + Send + Sync + 'static> HasClasses for FittedVotingClassifier<F> {
315 fn classes(&self) -> &[usize] {
316 &self.classes
317 }
318
319 fn n_classes(&self) -> usize {
320 self.classes.len()
321 }
322}
323
324impl<F: Float + ToPrimitive + FromPrimitive + Send + Sync + 'static> PipelineEstimator<F>
326 for VotingClassifier<F>
327{
328 fn fit_pipeline(
329 &self,
330 x: &Array2<F>,
331 y: &Array1<F>,
332 ) -> Result<Box<dyn FittedPipelineEstimator<F>>, FerroError> {
333 let y_usize: Array1<usize> = y.mapv(|v| v.to_usize().unwrap_or(0));
334 let fitted = self.fit(x, &y_usize)?;
335 Ok(Box::new(FittedVotingClassifierPipelineAdapter(fitted)))
336 }
337}
338
339struct FittedVotingClassifierPipelineAdapter<F: Float + Send + Sync + 'static>(
341 FittedVotingClassifier<F>,
342);
343
344impl<F: Float + ToPrimitive + FromPrimitive + Send + Sync + 'static> FittedPipelineEstimator<F>
345 for FittedVotingClassifierPipelineAdapter<F>
346{
347 fn predict_pipeline(&self, x: &Array2<F>) -> Result<Array1<F>, FerroError> {
348 let preds = self.0.predict(x)?;
349 Ok(preds.mapv(|v| F::from_usize(v).unwrap_or_else(F::nan)))
350 }
351}
352
353#[derive(Debug, Clone, Serialize, Deserialize)]
390pub struct VotingRegressor<F> {
391 pub max_depths: Vec<Option<usize>>,
393 pub min_samples_split: usize,
395 pub min_samples_leaf: usize,
397 _marker: std::marker::PhantomData<F>,
398}
399
400impl<F: Float> VotingRegressor<F> {
401 #[must_use]
406 pub fn new() -> Self {
407 Self {
408 max_depths: vec![Some(2), Some(4), Some(6), None],
409 min_samples_split: 2,
410 min_samples_leaf: 1,
411 _marker: std::marker::PhantomData,
412 }
413 }
414
415 #[must_use]
417 pub fn with_max_depths(mut self, max_depths: Vec<Option<usize>>) -> Self {
418 self.max_depths = max_depths;
419 self
420 }
421
422 #[must_use]
424 pub fn with_min_samples_split(mut self, min_samples_split: usize) -> Self {
425 self.min_samples_split = min_samples_split;
426 self
427 }
428
429 #[must_use]
431 pub fn with_min_samples_leaf(mut self, min_samples_leaf: usize) -> Self {
432 self.min_samples_leaf = min_samples_leaf;
433 self
434 }
435}
436
437impl<F: Float> Default for VotingRegressor<F> {
438 fn default() -> Self {
439 Self::new()
440 }
441}
442
443#[derive(Debug, Clone)]
452pub struct FittedVotingRegressor<F> {
453 trees: Vec<FittedDecisionTreeRegressor<F>>,
455}
456
457impl<F: Float + Send + Sync + 'static> FittedVotingRegressor<F> {
458 #[must_use]
460 pub fn n_estimators(&self) -> usize {
461 self.trees.len()
462 }
463
464 pub fn score(&self, x: &Array2<F>, y: &Array1<F>) -> Result<F, FerroError> {
472 if x.nrows() != y.len() {
473 return Err(FerroError::ShapeMismatch {
474 expected: vec![x.nrows()],
475 actual: vec![y.len()],
476 context: "y length must match number of samples in X".into(),
477 });
478 }
479 let preds = self.predict(x)?;
480 Ok(crate::r2_score(&preds, y))
481 }
482}
483
484impl<F: Float + Send + Sync + 'static> Fit<Array2<F>, Array1<F>> for VotingRegressor<F> {
485 type Fitted = FittedVotingRegressor<F>;
486 type Error = FerroError;
487
488 fn fit(&self, x: &Array2<F>, y: &Array1<F>) -> Result<FittedVotingRegressor<F>, FerroError> {
497 let n_samples = x.nrows();
498
499 if n_samples != y.len() {
500 return Err(FerroError::ShapeMismatch {
501 expected: vec![n_samples],
502 actual: vec![y.len()],
503 context: "y length must match number of samples in X".into(),
504 });
505 }
506 if n_samples == 0 {
507 return Err(FerroError::InsufficientSamples {
508 required: 1,
509 actual: 0,
510 context: "VotingRegressor requires at least one sample".into(),
511 });
512 }
513 if self.max_depths.is_empty() {
514 return Err(FerroError::InvalidParameter {
515 name: "max_depths".into(),
516 reason: "must contain at least one entry".into(),
517 });
518 }
519
520 let mut trees = Vec::with_capacity(self.max_depths.len());
521 for &max_depth in &self.max_depths {
522 let tree = DecisionTreeRegressor::<F>::new()
523 .with_max_depth(max_depth)
524 .with_min_samples_split(self.min_samples_split)
525 .with_min_samples_leaf(self.min_samples_leaf);
526 let fitted = tree.fit(x, y)?;
527 trees.push(fitted);
528 }
529
530 Ok(FittedVotingRegressor { trees })
531 }
532}
533
534impl<F: Float + Send + Sync + 'static> Predict<Array2<F>> for FittedVotingRegressor<F> {
535 type Output = Array1<F>;
536 type Error = FerroError;
537
538 fn predict(&self, x: &Array2<F>) -> Result<Array1<F>, FerroError> {
545 let n_samples = x.nrows();
546 let n_trees_f = F::from(self.trees.len()).unwrap();
547
548 let all_preds: Vec<Array1<F>> = self
549 .trees
550 .iter()
551 .map(|tree| tree.predict(x))
552 .collect::<Result<Vec<_>, _>>()?;
553
554 let mut predictions = Array1::zeros(n_samples);
555 for i in 0..n_samples {
556 let mut sum = F::zero();
557 for tree_preds in &all_preds {
558 sum = sum + tree_preds[i];
559 }
560 predictions[i] = sum / n_trees_f;
561 }
562
563 Ok(predictions)
564 }
565}
566
567impl<F: Float + Send + Sync + 'static> PipelineEstimator<F> for VotingRegressor<F> {
569 fn fit_pipeline(
570 &self,
571 x: &Array2<F>,
572 y: &Array1<F>,
573 ) -> Result<Box<dyn FittedPipelineEstimator<F>>, FerroError> {
574 let fitted = self.fit(x, y)?;
575 Ok(Box::new(fitted))
576 }
577}
578
579impl<F: Float + Send + Sync + 'static> FittedPipelineEstimator<F> for FittedVotingRegressor<F> {
580 fn predict_pipeline(&self, x: &Array2<F>) -> Result<Array1<F>, FerroError> {
581 self.predict(x)
582 }
583}
584
585#[cfg(test)]
590mod tests {
591 use super::*;
592 use ndarray::array;
593
594 fn make_classification_data() -> (Array2<f64>, Array1<usize>) {
595 let x = Array2::from_shape_vec(
596 (8, 2),
597 vec![
598 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0, 8.0, 9.0,
599 ],
600 )
601 .unwrap();
602 let y = array![0, 0, 0, 0, 1, 1, 1, 1];
603 (x, y)
604 }
605
606 fn make_regression_data() -> (Array2<f64>, Array1<f64>) {
607 let x = Array2::from_shape_vec(
608 (6, 2),
609 vec![1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0],
610 )
611 .unwrap();
612 let y = array![1.0, 2.0, 3.0, 5.0, 6.0, 7.0];
613 (x, y)
614 }
615
616 #[test]
619 fn test_voting_classifier_default() {
620 let model = VotingClassifier::<f64>::new();
621 assert_eq!(model.max_depths.len(), 4);
622 assert_eq!(model.min_samples_split, 2);
623 assert_eq!(model.min_samples_leaf, 1);
624 }
625
626 #[test]
627 fn test_voting_classifier_builder() {
628 let model = VotingClassifier::<f64>::new()
629 .with_max_depths(vec![Some(1), Some(3)])
630 .with_min_samples_split(5)
631 .with_min_samples_leaf(2)
632 .with_criterion(ClassificationCriterion::Entropy);
633 assert_eq!(model.max_depths.len(), 2);
634 assert_eq!(model.min_samples_split, 5);
635 assert_eq!(model.min_samples_leaf, 2);
636 assert_eq!(model.criterion, ClassificationCriterion::Entropy);
637 }
638
639 #[test]
640 fn test_voting_classifier_fit_predict() {
641 let (x, y) = make_classification_data();
642 let model = VotingClassifier::<f64>::new();
643 let fitted = model.fit(&x, &y).unwrap();
644 let preds = fitted.predict(&x).unwrap();
645
646 assert_eq!(preds.len(), 8);
647 for i in 0..4 {
649 assert_eq!(preds[i], 0, "sample {i} should be class 0");
650 }
651 for i in 4..8 {
652 assert_eq!(preds[i], 1, "sample {i} should be class 1");
653 }
654 }
655
656 #[test]
657 fn test_voting_classifier_has_classes() {
658 let (x, y) = make_classification_data();
659 let model = VotingClassifier::<f64>::new();
660 let fitted = model.fit(&x, &y).unwrap();
661 assert_eq!(fitted.classes(), &[0, 1]);
662 assert_eq!(fitted.n_classes(), 2);
663 }
664
665 #[test]
666 fn test_voting_classifier_n_estimators() {
667 let (x, y) = make_classification_data();
668 let model = VotingClassifier::<f64>::new().with_max_depths(vec![Some(2), Some(4), None]);
669 let fitted = model.fit(&x, &y).unwrap();
670 assert_eq!(fitted.n_estimators(), 3);
671 }
672
673 #[test]
674 fn test_voting_classifier_empty_data_error() {
675 let x = Array2::<f64>::zeros((0, 2));
676 let y = Array1::<usize>::zeros(0);
677 let model = VotingClassifier::<f64>::new();
678 let result = model.fit(&x, &y);
679 assert!(result.is_err());
680 }
681
682 #[test]
683 fn test_voting_classifier_shape_mismatch_error() {
684 let x = Array2::<f64>::zeros((5, 2));
685 let y = Array1::<usize>::zeros(3);
686 let model = VotingClassifier::<f64>::new();
687 let result = model.fit(&x, &y);
688 assert!(result.is_err());
689 }
690
691 #[test]
692 fn test_voting_classifier_empty_depths_error() {
693 let (x, y) = make_classification_data();
694 let model = VotingClassifier::<f64>::new().with_max_depths(vec![]);
695 let result = model.fit(&x, &y);
696 assert!(result.is_err());
697 }
698
699 #[test]
700 fn test_voting_classifier_multiclass() {
701 let x = Array2::from_shape_vec(
702 (9, 2),
703 vec![
704 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 4.0, 4.0, 5.0, 4.0, 4.0, 5.0, 8.0, 8.0, 9.0, 8.0,
705 8.0, 9.0,
706 ],
707 )
708 .unwrap();
709 let y = array![0, 0, 0, 1, 1, 1, 2, 2, 2];
710
711 let model = VotingClassifier::<f64>::new();
712 let fitted = model.fit(&x, &y).unwrap();
713 let preds = fitted.predict(&x).unwrap();
714 assert_eq!(preds.len(), 9);
715 assert_eq!(fitted.n_classes(), 3);
716 }
717
718 #[test]
721 fn test_voting_regressor_default() {
722 let model = VotingRegressor::<f64>::new();
723 assert_eq!(model.max_depths.len(), 4);
724 assert_eq!(model.min_samples_split, 2);
725 assert_eq!(model.min_samples_leaf, 1);
726 }
727
728 #[test]
729 fn test_voting_regressor_builder() {
730 let model = VotingRegressor::<f64>::new()
731 .with_max_depths(vec![Some(1), Some(5)])
732 .with_min_samples_split(3)
733 .with_min_samples_leaf(2);
734 assert_eq!(model.max_depths.len(), 2);
735 assert_eq!(model.min_samples_split, 3);
736 assert_eq!(model.min_samples_leaf, 2);
737 }
738
739 #[test]
740 fn test_voting_regressor_fit_predict() {
741 let (x, y) = make_regression_data();
742 let model = VotingRegressor::<f64>::new();
743 let fitted = model.fit(&x, &y).unwrap();
744 let preds = fitted.predict(&x).unwrap();
745
746 assert_eq!(preds.len(), 6);
747 for i in 0..6 {
750 let err = (preds[i] - y[i]).abs();
751 assert!(
752 err < 3.0,
753 "prediction {:.2} should be close to target {:.2}",
754 preds[i],
755 y[i]
756 );
757 }
758 }
759
760 #[test]
761 fn test_voting_regressor_n_estimators() {
762 let (x, y) = make_regression_data();
763 let model = VotingRegressor::<f64>::new().with_max_depths(vec![Some(2), None]);
764 let fitted = model.fit(&x, &y).unwrap();
765 assert_eq!(fitted.n_estimators(), 2);
766 }
767
768 #[test]
769 fn test_voting_regressor_empty_data_error() {
770 let x = Array2::<f64>::zeros((0, 2));
771 let y = Array1::<f64>::zeros(0);
772 let model = VotingRegressor::<f64>::new();
773 let result = model.fit(&x, &y);
774 assert!(result.is_err());
775 }
776
777 #[test]
778 fn test_voting_regressor_shape_mismatch_error() {
779 let x = Array2::<f64>::zeros((5, 2));
780 let y = Array1::<f64>::zeros(3);
781 let model = VotingRegressor::<f64>::new();
782 let result = model.fit(&x, &y);
783 assert!(result.is_err());
784 }
785
786 #[test]
787 fn test_voting_regressor_empty_depths_error() {
788 let (x, y) = make_regression_data();
789 let model = VotingRegressor::<f64>::new().with_max_depths(vec![]);
790 let result = model.fit(&x, &y);
791 assert!(result.is_err());
792 }
793
794 #[test]
795 fn test_voting_regressor_averaging() {
796 let (x, y) = make_regression_data();
799 let model = VotingRegressor::<f64>::new().with_max_depths(vec![None]);
800 let fitted = model.fit(&x, &y).unwrap();
801 let preds = fitted.predict(&x).unwrap();
802
803 for i in 0..6 {
804 assert!(
805 (preds[i] - y[i]).abs() < 1e-10,
806 "single unlimited tree should overfit training data"
807 );
808 }
809 }
810
811 #[test]
812 fn test_voting_classifier_f32() {
813 let x = Array2::<f32>::from_shape_vec(
814 (6, 2),
815 vec![1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0],
816 )
817 .unwrap();
818 let y = array![0, 0, 0, 1, 1, 1];
819 let model = VotingClassifier::<f32>::new();
820 let fitted = model.fit(&x, &y).unwrap();
821 let preds = fitted.predict(&x).unwrap();
822 assert_eq!(preds.len(), 6);
823 }
824
825 #[test]
826 fn test_voting_regressor_f32() {
827 let x = Array2::<f32>::from_shape_vec(
828 (6, 2),
829 vec![1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0],
830 )
831 .unwrap();
832 let y = array![1.0_f32, 2.0, 3.0, 5.0, 6.0, 7.0];
833 let model = VotingRegressor::<f32>::new();
834 let fitted = model.fit(&x, &y).unwrap();
835 let preds = fitted.predict(&x).unwrap();
836 assert_eq!(preds.len(), 6);
837 }
838}