1use crate::decision_tree::{
33 self, ClassificationCriterion, Node, build_classification_tree_with_feature_subset,
34};
35use ferrolearn_core::error::FerroError;
36use ferrolearn_core::introspection::{HasClasses, HasFeatureImportances};
37use ferrolearn_core::pipeline::{FittedPipelineEstimator, PipelineEstimator};
38use ferrolearn_core::traits::{Fit, Predict};
39use ndarray::{Array1, Array2};
40use num_traits::{Float, FromPrimitive, ToPrimitive};
41
42#[derive(Debug, Clone, Copy, PartialEq, Eq)]
48pub enum AdaBoostAlgorithm {
49 Samme,
53 SammeR,
57}
58
59#[derive(Debug, Clone)]
74pub struct AdaBoostClassifier<F> {
75 pub n_estimators: usize,
77 pub learning_rate: f64,
79 pub algorithm: AdaBoostAlgorithm,
81 pub random_state: Option<u64>,
83 _marker: std::marker::PhantomData<F>,
84}
85
86impl<F: Float> AdaBoostClassifier<F> {
87 #[must_use]
95 pub fn new() -> Self {
96 Self {
97 n_estimators: 50,
98 learning_rate: 1.0,
99 algorithm: AdaBoostAlgorithm::Samme,
100 random_state: None,
101 _marker: std::marker::PhantomData,
102 }
103 }
104
105 #[must_use]
107 pub fn with_n_estimators(mut self, n: usize) -> Self {
108 self.n_estimators = n;
109 self
110 }
111
112 #[must_use]
114 pub fn with_learning_rate(mut self, lr: f64) -> Self {
115 self.learning_rate = lr;
116 self
117 }
118
119 #[must_use]
121 pub fn with_algorithm(mut self, algo: AdaBoostAlgorithm) -> Self {
122 self.algorithm = algo;
123 self
124 }
125
126 #[must_use]
128 pub fn with_random_state(mut self, seed: u64) -> Self {
129 self.random_state = Some(seed);
130 self
131 }
132}
133
134impl<F: Float> Default for AdaBoostClassifier<F> {
135 fn default() -> Self {
136 Self::new()
137 }
138}
139
140#[derive(Debug, Clone)]
150pub struct FittedAdaBoostClassifier<F> {
151 classes: Vec<usize>,
153 estimators: Vec<Vec<Node<F>>>,
155 estimator_weights: Vec<F>,
157 n_features: usize,
159 n_classes: usize,
161 algorithm: AdaBoostAlgorithm,
163 feature_importances: Array1<F>,
166}
167
168impl<F: Float + Send + Sync + 'static> HasFeatureImportances<F> for FittedAdaBoostClassifier<F> {
169 fn feature_importances(&self) -> &Array1<F> {
170 &self.feature_importances
171 }
172}
173
174impl<F: Float + Send + Sync + 'static> Fit<Array2<F>, Array1<usize>> for AdaBoostClassifier<F> {
175 type Fitted = FittedAdaBoostClassifier<F>;
176 type Error = FerroError;
177
178 fn fit(
187 &self,
188 x: &Array2<F>,
189 y: &Array1<usize>,
190 ) -> Result<FittedAdaBoostClassifier<F>, FerroError> {
191 let (n_samples, n_features) = x.dim();
192
193 if n_samples != y.len() {
194 return Err(FerroError::ShapeMismatch {
195 expected: vec![n_samples],
196 actual: vec![y.len()],
197 context: "y length must match number of samples in X".into(),
198 });
199 }
200 if n_samples == 0 {
201 return Err(FerroError::InsufficientSamples {
202 required: 1,
203 actual: 0,
204 context: "AdaBoostClassifier requires at least one sample".into(),
205 });
206 }
207 if self.n_estimators == 0 {
208 return Err(FerroError::InvalidParameter {
209 name: "n_estimators".into(),
210 reason: "must be at least 1".into(),
211 });
212 }
213 if self.learning_rate <= 0.0 {
214 return Err(FerroError::InvalidParameter {
215 name: "learning_rate".into(),
216 reason: "must be positive".into(),
217 });
218 }
219
220 let mut classes: Vec<usize> = y.iter().copied().collect();
222 classes.sort_unstable();
223 classes.dedup();
224 let n_classes = classes.len();
225
226 if n_classes < 2 {
227 return Err(FerroError::InvalidParameter {
228 name: "y".into(),
229 reason: "need at least 2 distinct classes".into(),
230 });
231 }
232
233 let y_mapped: Vec<usize> = y
234 .iter()
235 .map(|&c| classes.iter().position(|&cl| cl == c).unwrap())
236 .collect();
237
238 match self.algorithm {
239 AdaBoostAlgorithm::Samme => {
240 self.fit_samme(x, &y_mapped, n_samples, n_features, n_classes, &classes)
241 }
242 AdaBoostAlgorithm::SammeR => {
243 self.fit_samme_r(x, &y_mapped, n_samples, n_features, n_classes, &classes)
244 }
245 }
246 }
247}
248
249impl<F: Float + Send + Sync + 'static> AdaBoostClassifier<F> {
250 fn fit_samme(
252 &self,
253 x: &Array2<F>,
254 y_mapped: &[usize],
255 n_samples: usize,
256 n_features: usize,
257 n_classes: usize,
258 classes: &[usize],
259 ) -> Result<FittedAdaBoostClassifier<F>, FerroError> {
260 let lr = F::from(self.learning_rate).unwrap();
261 let n_f = F::from(n_samples).unwrap();
262 let eps = F::from(1e-10).unwrap();
263
264 let mut weights = vec![F::one() / n_f; n_samples];
266
267 let all_features: Vec<usize> = (0..n_features).collect();
268 let stump_params = decision_tree::TreeParams {
269 max_depth: Some(1),
270 min_samples_split: 2,
271 min_samples_leaf: 1,
272 };
273
274 let mut estimators = Vec::with_capacity(self.n_estimators);
275 let mut estimator_weights = Vec::with_capacity(self.n_estimators);
276
277 for _ in 0..self.n_estimators {
278 let indices = resample_weighted(&weights, n_samples);
280
281 let tree = build_classification_tree_with_feature_subset(
282 x,
283 y_mapped,
284 n_classes,
285 &indices,
286 &all_features,
287 &stump_params,
288 ClassificationCriterion::Gini,
289 );
290
291 let mut weighted_error = F::zero();
293 let mut preds = vec![0usize; n_samples];
294 for i in 0..n_samples {
295 let row = x.row(i);
296 let leaf_idx = decision_tree::traverse(&tree, &row);
297 if let Node::Leaf { value, .. } = tree[leaf_idx] {
298 preds[i] = value.to_f64().map_or(0, |f| f.round() as usize);
299 }
300 if preds[i] != y_mapped[i] {
301 weighted_error = weighted_error + weights[i];
302 }
303 }
304
305 let weight_sum: F = weights.iter().copied().fold(F::zero(), |a, b| a + b);
307 let err = if weight_sum > F::zero() {
308 weighted_error / weight_sum
309 } else {
310 F::from(0.5).unwrap()
311 };
312
313 if err >= F::one() - F::one() / F::from(n_classes).unwrap() {
315 if estimators.is_empty() {
317 estimators.push(tree);
319 estimator_weights.push(F::one());
320 }
321 break;
322 }
323
324 let alpha = lr * ((F::one() - err).max(eps) / err.max(eps)).ln()
326 + lr * (F::from(n_classes - 1).unwrap()).ln();
327
328 for i in 0..n_samples {
330 if preds[i] != y_mapped[i] {
331 weights[i] = weights[i] * alpha.exp();
332 }
333 }
334
335 let new_sum: F = weights.iter().copied().fold(F::zero(), |a, b| a + b);
337 if new_sum > F::zero() {
338 for w in &mut weights {
339 *w = *w / new_sum;
340 }
341 }
342
343 estimators.push(tree);
344 estimator_weights.push(alpha);
345 }
346
347 let feature_importances = decision_tree::aggregate_tree_importances(
348 &estimators,
349 None,
350 Some(&estimator_weights),
351 n_features,
352 );
353
354 Ok(FittedAdaBoostClassifier {
355 classes: classes.to_vec(),
356 estimators,
357 estimator_weights,
358 n_features,
359 n_classes,
360 algorithm: AdaBoostAlgorithm::Samme,
361 feature_importances,
362 })
363 }
364
365 fn fit_samme_r(
367 &self,
368 x: &Array2<F>,
369 y_mapped: &[usize],
370 n_samples: usize,
371 n_features: usize,
372 n_classes: usize,
373 classes: &[usize],
374 ) -> Result<FittedAdaBoostClassifier<F>, FerroError> {
375 let lr = F::from(self.learning_rate).unwrap();
376 let n_f = F::from(n_samples).unwrap();
377 let eps = F::from(1e-10).unwrap();
378 let k_f = F::from(n_classes).unwrap();
379
380 let mut weights = vec![F::one() / n_f; n_samples];
382
383 let all_features: Vec<usize> = (0..n_features).collect();
384 let stump_params = decision_tree::TreeParams {
385 max_depth: Some(1),
386 min_samples_split: 2,
387 min_samples_leaf: 1,
388 };
389
390 let mut estimators = Vec::with_capacity(self.n_estimators);
391 let mut estimator_weights = Vec::with_capacity(self.n_estimators);
392
393 for _ in 0..self.n_estimators {
394 let indices = resample_weighted(&weights, n_samples);
395
396 let tree = build_classification_tree_with_feature_subset(
397 x,
398 y_mapped,
399 n_classes,
400 &indices,
401 &all_features,
402 &stump_params,
403 ClassificationCriterion::Gini,
404 );
405
406 let mut proba = vec![vec![F::zero(); n_classes]; n_samples];
408 for (i, proba_row) in proba.iter_mut().enumerate() {
409 let row = x.row(i);
410 let leaf_idx = decision_tree::traverse(&tree, &row);
411 if let Node::Leaf {
412 class_distribution: Some(ref dist),
413 ..
414 } = tree[leaf_idx]
415 {
416 for (k, &p) in dist.iter().enumerate() {
417 proba_row[k] = p.max(eps);
418 }
419 } else {
420 for val in proba_row.iter_mut() {
422 *val = F::one() / k_f;
423 }
424 }
425 let row_sum: F = proba_row.iter().copied().fold(F::zero(), |a, b| a + b);
427 if row_sum > F::zero() {
428 for val in proba_row.iter_mut() {
429 *val = *val / row_sum;
430 }
431 }
432 }
433
434 let factor = lr * (k_f - F::one()) / k_f;
439 let mut any_update = false;
440
441 for i in 0..n_samples {
442 let p_correct = proba[i][y_mapped[i]].max(eps);
443 let exponent = -factor * p_correct.ln();
444 weights[i] = weights[i] * exponent.exp();
445 if exponent.abs() > eps {
446 any_update = true;
447 }
448 }
449
450 let new_sum: F = weights.iter().copied().fold(F::zero(), |a, b| a + b);
452 if new_sum > F::zero() {
453 for w in &mut weights {
454 *w = *w / new_sum;
455 }
456 }
457
458 estimators.push(tree);
459 estimator_weights.push(F::one()); if !any_update {
462 break;
463 }
464 }
465
466 let feature_importances = decision_tree::aggregate_tree_importances(
467 &estimators,
468 None,
469 Some(&estimator_weights),
470 n_features,
471 );
472
473 Ok(FittedAdaBoostClassifier {
474 classes: classes.to_vec(),
475 estimators,
476 estimator_weights,
477 n_features,
478 n_classes,
479 algorithm: AdaBoostAlgorithm::SammeR,
480 feature_importances,
481 })
482 }
483}
484
485impl<F: Float + Send + Sync + 'static> Predict<Array2<F>> for FittedAdaBoostClassifier<F> {
486 type Output = Array1<usize>;
487 type Error = FerroError;
488
489 fn predict(&self, x: &Array2<F>) -> Result<Array1<usize>, FerroError> {
499 if x.ncols() != self.n_features {
500 return Err(FerroError::ShapeMismatch {
501 expected: vec![self.n_features],
502 actual: vec![x.ncols()],
503 context: "number of features must match fitted model".into(),
504 });
505 }
506
507 let n_samples = x.nrows();
508
509 match self.algorithm {
510 AdaBoostAlgorithm::Samme => self.predict_samme(x, n_samples),
511 AdaBoostAlgorithm::SammeR => self.predict_samme_r(x, n_samples),
512 }
513 }
514}
515
516impl<F: Float + Send + Sync + 'static> FittedAdaBoostClassifier<F> {
517 fn predict_samme(&self, x: &Array2<F>, n_samples: usize) -> Result<Array1<usize>, FerroError> {
519 let mut predictions = Array1::zeros(n_samples);
520
521 for i in 0..n_samples {
522 let row = x.row(i);
523 let mut class_scores = vec![F::zero(); self.n_classes];
524
525 for (t, tree_nodes) in self.estimators.iter().enumerate() {
526 let leaf_idx = decision_tree::traverse(tree_nodes, &row);
527 if let Node::Leaf { value, .. } = tree_nodes[leaf_idx] {
528 let class_idx = value.to_f64().map_or(0, |f| f.round() as usize);
529 if class_idx < self.n_classes {
530 class_scores[class_idx] =
531 class_scores[class_idx] + self.estimator_weights[t];
532 }
533 }
534 }
535
536 let best = class_scores
537 .iter()
538 .enumerate()
539 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
540 .map_or(0, |(k, _)| k);
541 predictions[i] = self.classes[best];
542 }
543
544 Ok(predictions)
545 }
546
547 fn predict_samme_r(
549 &self,
550 x: &Array2<F>,
551 n_samples: usize,
552 ) -> Result<Array1<usize>, FerroError> {
553 let eps = F::from(1e-10).unwrap();
554 let k_f = F::from(self.n_classes).unwrap();
555 let k_minus_1 = k_f - F::one();
556
557 let mut predictions = Array1::zeros(n_samples);
558
559 for i in 0..n_samples {
560 let row = x.row(i);
561 let mut accumulated = vec![F::zero(); self.n_classes];
562
563 for tree_nodes in &self.estimators {
564 let leaf_idx = decision_tree::traverse(tree_nodes, &row);
565 if let Node::Leaf {
566 class_distribution: Some(ref dist),
567 ..
568 } = tree_nodes[leaf_idx]
569 {
570 let log_probs: Vec<F> = dist.iter().map(|&p| p.max(eps).ln()).collect();
572 let mean_log: F = log_probs.iter().copied().fold(F::zero(), |a, b| a + b) / k_f;
573
574 for k in 0..self.n_classes {
575 accumulated[k] = accumulated[k] + k_minus_1 * (log_probs[k] - mean_log);
576 }
577 } else {
578 if let Node::Leaf { value, .. } = tree_nodes[leaf_idx] {
580 let class_idx = value.to_f64().map_or(0, |f| f.round() as usize);
581 if class_idx < self.n_classes {
582 accumulated[class_idx] = accumulated[class_idx] + F::one();
583 }
584 }
585 }
586 }
587
588 let best = accumulated
589 .iter()
590 .enumerate()
591 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
592 .map_or(0, |(k, _)| k);
593 predictions[i] = self.classes[best];
594 }
595
596 Ok(predictions)
597 }
598
599 pub fn score(&self, x: &Array2<F>, y: &Array1<usize>) -> Result<F, FerroError> {
607 if x.nrows() != y.len() {
608 return Err(FerroError::ShapeMismatch {
609 expected: vec![x.nrows()],
610 actual: vec![y.len()],
611 context: "y length must match number of samples in X".into(),
612 });
613 }
614 let preds = self.predict(x)?;
615 Ok(crate::mean_accuracy(&preds, y))
616 }
617
618 pub fn predict_proba(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
632 if x.ncols() != self.n_features {
633 return Err(FerroError::ShapeMismatch {
634 expected: vec![self.n_features],
635 actual: vec![x.ncols()],
636 context: "number of features must match fitted model".into(),
637 });
638 }
639 let n_samples = x.nrows();
640 let n_classes = self.n_classes;
641 let mut proba = Array2::<F>::zeros((n_samples, n_classes));
642
643 match self.algorithm {
644 AdaBoostAlgorithm::Samme => {
645 for i in 0..n_samples {
646 let row = x.row(i);
647 let mut scores = vec![F::zero(); n_classes];
648 for (t, tree_nodes) in self.estimators.iter().enumerate() {
649 let leaf_idx = decision_tree::traverse(tree_nodes, &row);
650 if let Node::Leaf { value, .. } = tree_nodes[leaf_idx] {
651 let class_idx = value.to_f64().map_or(0, |f| f.round() as usize);
652 if class_idx < n_classes {
653 scores[class_idx] = scores[class_idx] + self.estimator_weights[t];
654 }
655 }
656 }
657 let total: F = scores.iter().copied().fold(F::zero(), |a, b| a + b);
658 if total > F::zero() {
659 for k in 0..n_classes {
660 proba[[i, k]] = scores[k] / total;
661 }
662 } else {
663 let u = F::one() / F::from(n_classes).unwrap();
664 for k in 0..n_classes {
665 proba[[i, k]] = u;
666 }
667 }
668 }
669 }
670 AdaBoostAlgorithm::SammeR => {
671 let eps = F::from(1e-10).unwrap();
672 let k_f = F::from(n_classes).unwrap();
673 let k_minus_1 = k_f - F::one();
674 for i in 0..n_samples {
675 let row = x.row(i);
676 let mut accumulated = vec![F::zero(); n_classes];
677 for tree_nodes in &self.estimators {
678 let leaf_idx = decision_tree::traverse(tree_nodes, &row);
679 if let Node::Leaf {
680 class_distribution: Some(ref dist),
681 ..
682 } = tree_nodes[leaf_idx]
683 {
684 let log_probs: Vec<F> =
685 dist.iter().map(|&p| p.max(eps).ln()).collect();
686 let mean_log: F =
687 log_probs.iter().copied().fold(F::zero(), |a, b| a + b) / k_f;
688 for k in 0..n_classes {
689 accumulated[k] =
690 accumulated[k] + k_minus_1 * (log_probs[k] - mean_log);
691 }
692 } else if let Node::Leaf { value, .. } = tree_nodes[leaf_idx] {
693 let class_idx = value.to_f64().map_or(0, |f| f.round() as usize);
694 if class_idx < n_classes {
695 accumulated[class_idx] = accumulated[class_idx] + F::one();
696 }
697 }
698 }
699 let max_score = accumulated
701 .iter()
702 .copied()
703 .fold(F::neg_infinity(), |a, b| if b > a { b } else { a });
704 let mut sum_exp = F::zero();
705 for k in 0..n_classes {
706 let e = (accumulated[k] - max_score).exp();
707 proba[[i, k]] = e;
708 sum_exp = sum_exp + e;
709 }
710 if sum_exp > F::zero() {
711 for k in 0..n_classes {
712 proba[[i, k]] = proba[[i, k]] / sum_exp;
713 }
714 }
715 }
716 }
717 }
718 Ok(proba)
719 }
720
721 pub fn predict_log_proba(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
728 let proba = self.predict_proba(x)?;
729 Ok(crate::log_proba(&proba))
730 }
731
732 pub fn decision_function(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
747 if x.ncols() != self.n_features {
748 return Err(FerroError::ShapeMismatch {
749 expected: vec![self.n_features],
750 actual: vec![x.ncols()],
751 context: "number of features must match fitted model".into(),
752 });
753 }
754 let n_samples = x.nrows();
755 let n_classes = self.n_classes;
756 let mut out = Array2::<F>::zeros((n_samples, n_classes));
757
758 match self.algorithm {
759 AdaBoostAlgorithm::Samme => {
760 for i in 0..n_samples {
761 let row = x.row(i);
762 for (t, tree_nodes) in self.estimators.iter().enumerate() {
763 let leaf_idx = decision_tree::traverse(tree_nodes, &row);
764 if let Node::Leaf { value, .. } = tree_nodes[leaf_idx] {
765 let class_idx = value.to_f64().map_or(0, |f| f.round() as usize);
766 if class_idx < n_classes {
767 out[[i, class_idx]] =
768 out[[i, class_idx]] + self.estimator_weights[t];
769 }
770 }
771 }
772 }
773 }
774 AdaBoostAlgorithm::SammeR => {
775 let eps = F::from(1e-10).unwrap();
776 let k_f = F::from(n_classes).unwrap();
777 let k_minus_1 = k_f - F::one();
778 for i in 0..n_samples {
779 let row = x.row(i);
780 for tree_nodes in &self.estimators {
781 let leaf_idx = decision_tree::traverse(tree_nodes, &row);
782 if let Node::Leaf {
783 class_distribution: Some(ref dist),
784 ..
785 } = tree_nodes[leaf_idx]
786 {
787 let log_probs: Vec<F> =
788 dist.iter().map(|&p| p.max(eps).ln()).collect();
789 let mean_log: F =
790 log_probs.iter().copied().fold(F::zero(), |a, b| a + b) / k_f;
791 for k in 0..n_classes {
792 out[[i, k]] =
793 out[[i, k]] + k_minus_1 * (log_probs[k] - mean_log);
794 }
795 } else if let Node::Leaf { value, .. } = tree_nodes[leaf_idx] {
796 let class_idx = value.to_f64().map_or(0, |f| f.round() as usize);
797 if class_idx < n_classes {
798 out[[i, class_idx]] = out[[i, class_idx]] + F::one();
799 }
800 }
801 }
802 }
803 }
804 }
805 Ok(out)
806 }
807}
808
809impl<F: Float + Send + Sync + 'static> HasClasses for FittedAdaBoostClassifier<F> {
810 fn classes(&self) -> &[usize] {
811 &self.classes
812 }
813
814 fn n_classes(&self) -> usize {
815 self.classes.len()
816 }
817}
818
819impl<F: Float + ToPrimitive + FromPrimitive + Send + Sync + 'static> PipelineEstimator<F>
821 for AdaBoostClassifier<F>
822{
823 fn fit_pipeline(
824 &self,
825 x: &Array2<F>,
826 y: &Array1<F>,
827 ) -> Result<Box<dyn FittedPipelineEstimator<F>>, FerroError> {
828 let y_usize: Array1<usize> = y.mapv(|v| v.to_usize().unwrap_or(0));
829 let fitted = self.fit(x, &y_usize)?;
830 Ok(Box::new(FittedAdaBoostPipelineAdapter(fitted)))
831 }
832}
833
834struct FittedAdaBoostPipelineAdapter<F: Float + Send + Sync + 'static>(FittedAdaBoostClassifier<F>);
836
837impl<F: Float + ToPrimitive + FromPrimitive + Send + Sync + 'static> FittedPipelineEstimator<F>
838 for FittedAdaBoostPipelineAdapter<F>
839{
840 fn predict_pipeline(&self, x: &Array2<F>) -> Result<Array1<F>, FerroError> {
841 let preds = self.0.predict(x)?;
842 Ok(preds.mapv(|v| F::from_usize(v).unwrap_or_else(F::nan)))
843 }
844}
845
846fn resample_weighted<F: Float>(weights: &[F], n: usize) -> Vec<usize> {
855 if weights.is_empty() {
856 return Vec::new();
857 }
858
859 let mut cumsum = Vec::with_capacity(weights.len());
861 let mut running = F::zero();
862 for &w in weights {
863 running = running + w;
864 cumsum.push(running);
865 }
866
867 let total = running;
869 if total <= F::zero() {
870 return (0..n).collect();
871 }
872
873 let mut indices = Vec::with_capacity(n);
874 let step = total / F::from(n).unwrap();
875 let mut threshold = step / F::from(2.0).unwrap(); let mut j = 0;
877
878 for _ in 0..n {
879 while j < cumsum.len() - 1 && cumsum[j] < threshold {
880 j += 1;
881 }
882 indices.push(j);
883 threshold = threshold + step;
884 }
885
886 indices
887}
888
889#[cfg(test)]
894mod tests {
895 use super::*;
896 use ndarray::array;
897
898 #[test]
901 fn test_adaboost_sammer_binary_simple() {
902 let x = Array2::from_shape_vec(
903 (8, 2),
904 vec![
905 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0, 8.0, 9.0,
906 ],
907 )
908 .unwrap();
909 let y = array![0, 0, 0, 0, 1, 1, 1, 1];
910
911 let model = AdaBoostClassifier::<f64>::new()
912 .with_n_estimators(50)
913 .with_random_state(42);
914 let fitted = model.fit(&x, &y).unwrap();
915 let preds = fitted.predict(&x).unwrap();
916
917 assert_eq!(preds.len(), 8);
918 for i in 0..4 {
919 assert_eq!(preds[i], 0);
920 }
921 for i in 4..8 {
922 assert_eq!(preds[i], 1);
923 }
924 }
925
926 #[test]
927 fn test_adaboost_sammer_multiclass() {
928 let x = Array2::from_shape_vec((9, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
929 .unwrap();
930 let y = array![0, 0, 0, 1, 1, 1, 2, 2, 2];
931
932 let model = AdaBoostClassifier::<f64>::new()
933 .with_n_estimators(50)
934 .with_random_state(42);
935 let fitted = model.fit(&x, &y).unwrap();
936 let preds = fitted.predict(&x).unwrap();
937
938 assert_eq!(preds.len(), 9);
939 let correct = preds.iter().zip(y.iter()).filter(|(p, t)| p == t).count();
940 assert!(
941 correct >= 5,
942 "Expected at least 5/9 correct, got {correct}/9"
943 );
944 }
945
946 #[test]
949 fn test_adaboost_samme_binary_simple() {
950 let x = Array2::from_shape_vec(
951 (8, 2),
952 vec![
953 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0, 8.0, 9.0,
954 ],
955 )
956 .unwrap();
957 let y = array![0, 0, 0, 0, 1, 1, 1, 1];
958
959 let model = AdaBoostClassifier::<f64>::new()
960 .with_n_estimators(50)
961 .with_algorithm(AdaBoostAlgorithm::Samme)
962 .with_random_state(42);
963 let fitted = model.fit(&x, &y).unwrap();
964 let preds = fitted.predict(&x).unwrap();
965
966 assert_eq!(preds.len(), 8);
967 for i in 0..4 {
968 assert_eq!(preds[i], 0);
969 }
970 for i in 4..8 {
971 assert_eq!(preds[i], 1);
972 }
973 }
974
975 #[test]
976 fn test_adaboost_samme_multiclass() {
977 let x = Array2::from_shape_vec((9, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
978 .unwrap();
979 let y = array![0, 0, 0, 1, 1, 1, 2, 2, 2];
980
981 let model = AdaBoostClassifier::<f64>::new()
982 .with_n_estimators(50)
983 .with_algorithm(AdaBoostAlgorithm::Samme)
984 .with_random_state(42);
985 let fitted = model.fit(&x, &y).unwrap();
986 let preds = fitted.predict(&x).unwrap();
987
988 assert_eq!(preds.len(), 9);
989 let correct = preds.iter().zip(y.iter()).filter(|(p, t)| p == t).count();
990 assert!(
991 correct >= 5,
992 "Expected at least 5/9 correct for SAMME multiclass, got {correct}/9"
993 );
994 }
995
996 #[test]
999 fn test_adaboost_has_classes() {
1000 let x = Array2::from_shape_vec((6, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1001 let y = array![0, 1, 2, 0, 1, 2];
1002
1003 let model = AdaBoostClassifier::<f64>::new()
1004 .with_n_estimators(5)
1005 .with_random_state(0);
1006 let fitted = model.fit(&x, &y).unwrap();
1007
1008 assert_eq!(fitted.classes(), &[0, 1, 2]);
1009 assert_eq!(fitted.n_classes(), 3);
1010 }
1011
1012 #[test]
1013 fn test_adaboost_reproducibility() {
1014 let x = Array2::from_shape_vec(
1015 (8, 2),
1016 vec![
1017 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0, 8.0, 9.0,
1018 ],
1019 )
1020 .unwrap();
1021 let y = array![0, 0, 0, 0, 1, 1, 1, 1];
1022
1023 let model = AdaBoostClassifier::<f64>::new()
1024 .with_n_estimators(10)
1025 .with_random_state(42);
1026
1027 let fitted1 = model.fit(&x, &y).unwrap();
1028 let fitted2 = model.fit(&x, &y).unwrap();
1029
1030 let preds1 = fitted1.predict(&x).unwrap();
1031 let preds2 = fitted2.predict(&x).unwrap();
1032 assert_eq!(preds1, preds2);
1033 }
1034
1035 #[test]
1036 fn test_adaboost_shape_mismatch_fit() {
1037 let x = Array2::from_shape_vec((3, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1038 let y = array![0, 1];
1039
1040 let model = AdaBoostClassifier::<f64>::new().with_n_estimators(5);
1041 assert!(model.fit(&x, &y).is_err());
1042 }
1043
1044 #[test]
1045 fn test_adaboost_shape_mismatch_predict() {
1046 let x =
1047 Array2::from_shape_vec((4, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
1048 let y = array![0, 0, 1, 1];
1049
1050 let model = AdaBoostClassifier::<f64>::new()
1051 .with_n_estimators(5)
1052 .with_random_state(0);
1053 let fitted = model.fit(&x, &y).unwrap();
1054
1055 let x_bad = Array2::from_shape_vec((2, 3), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1056 assert!(fitted.predict(&x_bad).is_err());
1057 }
1058
1059 #[test]
1060 fn test_adaboost_empty_data() {
1061 let x = Array2::<f64>::zeros((0, 2));
1062 let y = Array1::<usize>::zeros(0);
1063
1064 let model = AdaBoostClassifier::<f64>::new().with_n_estimators(5);
1065 assert!(model.fit(&x, &y).is_err());
1066 }
1067
1068 #[test]
1069 fn test_adaboost_single_class() {
1070 let x = Array2::from_shape_vec((3, 1), vec![1.0, 2.0, 3.0]).unwrap();
1071 let y = array![0, 0, 0];
1072
1073 let model = AdaBoostClassifier::<f64>::new().with_n_estimators(5);
1074 assert!(model.fit(&x, &y).is_err());
1075 }
1076
1077 #[test]
1078 fn test_adaboost_zero_estimators() {
1079 let x = Array2::from_shape_vec((4, 1), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
1080 let y = array![0, 0, 1, 1];
1081
1082 let model = AdaBoostClassifier::<f64>::new().with_n_estimators(0);
1083 assert!(model.fit(&x, &y).is_err());
1084 }
1085
1086 #[test]
1087 fn test_adaboost_invalid_learning_rate() {
1088 let x = Array2::from_shape_vec((4, 1), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
1089 let y = array![0, 0, 1, 1];
1090
1091 let model = AdaBoostClassifier::<f64>::new()
1092 .with_n_estimators(5)
1093 .with_learning_rate(0.0);
1094 assert!(model.fit(&x, &y).is_err());
1095 }
1096
1097 #[test]
1098 fn test_adaboost_pipeline_integration() {
1099 let x = Array2::from_shape_vec((6, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1100 let y = Array1::from_vec(vec![0.0, 0.0, 0.0, 1.0, 1.0, 1.0]);
1101
1102 let model = AdaBoostClassifier::<f64>::new()
1103 .with_n_estimators(10)
1104 .with_random_state(42);
1105 let fitted = model.fit_pipeline(&x, &y).unwrap();
1106 let preds = fitted.predict_pipeline(&x).unwrap();
1107 assert_eq!(preds.len(), 6);
1108 }
1109
1110 #[test]
1111 fn test_adaboost_f32_support() {
1112 let x = Array2::from_shape_vec((6, 1), vec![1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1113 let y = array![0, 0, 0, 1, 1, 1];
1114
1115 let model = AdaBoostClassifier::<f32>::new()
1116 .with_n_estimators(10)
1117 .with_random_state(42);
1118 let fitted = model.fit(&x, &y).unwrap();
1119 let preds = fitted.predict(&x).unwrap();
1120 assert_eq!(preds.len(), 6);
1121 }
1122
1123 #[test]
1124 fn test_adaboost_default_trait() {
1125 let model = AdaBoostClassifier::<f64>::default();
1126 assert_eq!(model.n_estimators, 50);
1127 assert!((model.learning_rate - 1.0).abs() < 1e-10);
1128 assert_eq!(model.algorithm, AdaBoostAlgorithm::Samme);
1129 }
1130
1131 #[test]
1132 fn test_adaboost_non_contiguous_labels() {
1133 let x = Array2::from_shape_vec((6, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1134 let y = array![10, 10, 10, 20, 20, 20];
1135
1136 let model = AdaBoostClassifier::<f64>::new()
1137 .with_n_estimators(20)
1138 .with_random_state(42);
1139 let fitted = model.fit(&x, &y).unwrap();
1140 let preds = fitted.predict(&x).unwrap();
1141
1142 assert_eq!(preds.len(), 6);
1143 for &p in &preds {
1144 assert!(p == 10 || p == 20);
1145 }
1146 }
1147
1148 #[test]
1149 fn test_adaboost_sammer_learning_rate_effect() {
1150 let x =
1151 Array2::from_shape_vec((8, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
1152 let y = array![0, 0, 0, 0, 1, 1, 1, 1];
1153
1154 let model = AdaBoostClassifier::<f64>::new()
1156 .with_n_estimators(50)
1157 .with_learning_rate(0.1)
1158 .with_random_state(42);
1159 let fitted = model.fit(&x, &y).unwrap();
1160 let preds = fitted.predict(&x).unwrap();
1161 assert_eq!(preds.len(), 8);
1162 }
1163
1164 #[test]
1165 fn test_adaboost_samme_learning_rate_effect() {
1166 let x =
1167 Array2::from_shape_vec((8, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
1168 let y = array![0, 0, 0, 0, 1, 1, 1, 1];
1169
1170 let model = AdaBoostClassifier::<f64>::new()
1171 .with_n_estimators(50)
1172 .with_algorithm(AdaBoostAlgorithm::Samme)
1173 .with_learning_rate(0.5)
1174 .with_random_state(42);
1175 let fitted = model.fit(&x, &y).unwrap();
1176 let preds = fitted.predict(&x).unwrap();
1177 assert_eq!(preds.len(), 8);
1178 }
1179
1180 #[test]
1181 fn test_adaboost_many_features() {
1182 let x = Array2::from_shape_vec(
1184 (8, 4),
1185 vec![
1186 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0,
1187 5.0, 0.0, 0.0, 0.0, 6.0, 0.0, 0.0, 0.0, 7.0, 0.0, 0.0, 0.0, 8.0, 0.0, 0.0, 0.0,
1188 ],
1189 )
1190 .unwrap();
1191 let y = array![0, 0, 0, 0, 1, 1, 1, 1];
1192
1193 let model = AdaBoostClassifier::<f64>::new()
1194 .with_n_estimators(20)
1195 .with_random_state(42);
1196 let fitted = model.fit(&x, &y).unwrap();
1197 let preds = fitted.predict(&x).unwrap();
1198 assert_eq!(preds.len(), 8);
1199 }
1200
1201 #[test]
1202 fn test_adaboost_4_classes() {
1203 let x = Array2::from_shape_vec(
1204 (12, 1),
1205 vec![
1206 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0,
1207 ],
1208 )
1209 .unwrap();
1210 let y = array![0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3];
1211
1212 let model = AdaBoostClassifier::<f64>::new()
1213 .with_n_estimators(50)
1214 .with_random_state(42);
1215 let fitted = model.fit(&x, &y).unwrap();
1216 let preds = fitted.predict(&x).unwrap();
1217
1218 assert_eq!(preds.len(), 12);
1219 assert_eq!(fitted.n_classes(), 4);
1220 }
1221
1222 #[test]
1225 fn test_resample_weighted_uniform() {
1226 let weights = vec![0.25, 0.25, 0.25, 0.25];
1227 let indices = resample_weighted(&weights, 4);
1228 assert_eq!(indices, vec![0, 1, 2, 3]);
1230 }
1231
1232 #[test]
1233 fn test_resample_weighted_skewed() {
1234 let weights = vec![0.0, 0.0, 0.0, 1.0];
1235 let indices = resample_weighted(&weights, 4);
1236 assert_eq!(indices.len(), 4);
1237 for &idx in &indices {
1239 assert_eq!(idx, 3);
1240 }
1241 }
1242
1243 #[test]
1244 fn test_resample_weighted_empty() {
1245 let weights: Vec<f64> = Vec::new();
1246 let indices = resample_weighted(&weights, 0);
1247 assert!(indices.is_empty());
1248 }
1249
1250 #[test]
1251 fn test_adaboost_sammer_single_estimator() {
1252 let x = Array2::from_shape_vec((6, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1253 let y = array![0, 0, 0, 1, 1, 1];
1254
1255 let model = AdaBoostClassifier::<f64>::new()
1256 .with_n_estimators(1)
1257 .with_random_state(42);
1258 let fitted = model.fit(&x, &y).unwrap();
1259 let preds = fitted.predict(&x).unwrap();
1260 assert_eq!(preds.len(), 6);
1261 }
1262
1263 #[test]
1264 fn test_adaboost_samme_single_estimator() {
1265 let x = Array2::from_shape_vec((6, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1266 let y = array![0, 0, 0, 1, 1, 1];
1267
1268 let model = AdaBoostClassifier::<f64>::new()
1269 .with_n_estimators(1)
1270 .with_algorithm(AdaBoostAlgorithm::Samme)
1271 .with_random_state(42);
1272 let fitted = model.fit(&x, &y).unwrap();
1273 let preds = fitted.predict(&x).unwrap();
1274 assert_eq!(preds.len(), 6);
1275 }
1276
1277 #[test]
1278 fn test_adaboost_negative_learning_rate() {
1279 let x = Array2::from_shape_vec((4, 1), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
1280 let y = array![0, 0, 1, 1];
1281
1282 let model = AdaBoostClassifier::<f64>::new()
1283 .with_n_estimators(5)
1284 .with_learning_rate(-0.1);
1285 assert!(model.fit(&x, &y).is_err());
1286 }
1287}