1use scirs2_core::ndarray::{Array1, Array2, ArrayView1, ArrayView2};
6use sklears_core::{
7 error::Result as SklResult,
8 prelude::{Predict, SklearsError},
9 traits::{Estimator, Fit, Untrained},
10 types::{Float, FloatBounds},
11};
12
13use crate::PipelinePredictor;
14
15pub struct AdaBoostClassifier<S = Untrained> {
17 state: S,
18 base_estimators: Vec<Box<dyn PipelinePredictor>>,
19 n_estimators: usize,
20 learning_rate: f64,
21 algorithm: AdaBoostAlgorithm,
22 random_state: Option<u64>,
23}
24
25#[derive(Debug, Clone)]
27pub enum AdaBoostAlgorithm {
28 SAMME,
30 SAMMER,
32}
33
34pub struct AdaBoostTrained {
36 fitted_estimators: Vec<Box<dyn PipelinePredictor>>,
37 estimator_weights: Array1<f64>,
38 estimator_errors: Array1<f64>,
39 classes: Array1<f64>,
40 n_features_in: usize,
41 feature_names_in: Option<Vec<String>>,
42}
43
44impl AdaBoostClassifier<Untrained> {
45 #[must_use]
47 pub fn new() -> Self {
48 Self {
49 state: Untrained,
50 base_estimators: Vec::new(),
51 n_estimators: 50,
52 learning_rate: 1.0,
53 algorithm: AdaBoostAlgorithm::SAMME,
54 random_state: None,
55 }
56 }
57
58 #[must_use]
60 pub fn n_estimators(mut self, n_estimators: usize) -> Self {
61 self.n_estimators = n_estimators;
62 self
63 }
64
65 #[must_use]
67 pub fn learning_rate(mut self, learning_rate: f64) -> Self {
68 self.learning_rate = learning_rate;
69 self
70 }
71
72 #[must_use]
74 pub fn algorithm(mut self, algorithm: AdaBoostAlgorithm) -> Self {
75 self.algorithm = algorithm;
76 self
77 }
78
79 #[must_use]
81 pub fn base_estimator(mut self, estimator: Box<dyn PipelinePredictor>) -> Self {
82 self.base_estimators.push(estimator);
83 self
84 }
85
86 #[must_use]
88 pub fn random_state(mut self, seed: u64) -> Self {
89 self.random_state = Some(seed);
90 self
91 }
92}
93
94impl Default for AdaBoostClassifier<Untrained> {
95 fn default() -> Self {
96 Self::new()
97 }
98}
99
100impl Estimator for AdaBoostClassifier<Untrained> {
101 type Config = ();
102 type Error = SklearsError;
103 type Float = Float;
104
105 fn config(&self) -> &Self::Config {
106 &()
107 }
108}
109
110impl Fit<ArrayView2<'_, Float>, Option<&ArrayView1<'_, Float>>> for AdaBoostClassifier<Untrained> {
111 type Fitted = AdaBoostClassifier<AdaBoostTrained>;
112
113 fn fit(
114 self,
115 x: &ArrayView2<'_, Float>,
116 y: &Option<&ArrayView1<'_, Float>>,
117 ) -> SklResult<Self::Fitted> {
118 if let Some(y_values) = y.as_ref() {
119 let n_samples = x.nrows();
120 let mut sample_weights = Array1::from_elem(n_samples, 1.0 / n_samples as f64);
121
122 let mut fitted_estimators = Vec::new();
123 let mut estimator_weights = Array1::zeros(self.n_estimators);
124 let mut estimator_errors = Array1::zeros(self.n_estimators);
125
126 let mut classes: Vec<f64> = y_values.to_vec();
128 classes.sort_by(|a, b| a.partial_cmp(b).unwrap());
129 classes.dedup();
130 let classes = Array1::from(classes);
131 let n_classes = classes.len();
132
133 for t in 0..self.n_estimators {
134 let mut estimator = if t < self.base_estimators.len() {
136 self.base_estimators[t].clone_predictor()
137 } else {
138 if let Some(last) = self.base_estimators.last() {
140 last.clone_predictor()
141 } else {
142 return Err(SklearsError::InvalidInput(
143 "No base estimators provided".to_string(),
144 ));
145 }
146 };
147
148 estimator.fit(x, y_values)?;
150
151 let predictions = estimator.predict(x)?;
153
154 let mut error = 0.0;
156 for i in 0..n_samples {
157 if (predictions[i] - y_values[i]).abs() > 1e-10 {
158 error += sample_weights[i];
159 }
160 }
161
162 estimator_errors[t] = error;
163
164 if error <= 0.0 {
166 estimator_weights[t] = 1.0;
167 fitted_estimators.push(estimator);
168 break;
169 }
170
171 if error >= 1.0 - 1.0 / n_classes as f64 {
172 break;
174 }
175
176 let alpha = self.learning_rate
178 * (((1.0 - error) / error).ln() + (n_classes as f64 - 1.0).ln());
179 estimator_weights[t] = alpha;
180
181 for i in 0..n_samples {
183 if (predictions[i] - y_values[i]).abs() > 1e-10 {
184 sample_weights[i] *= (alpha).exp();
185 }
186 }
187
188 let weight_sum: f64 = sample_weights.sum();
190 if weight_sum > 0.0 {
191 sample_weights.mapv_inplace(|w| w / weight_sum);
192 }
193
194 fitted_estimators.push(estimator);
195 }
196
197 Ok(AdaBoostClassifier {
198 state: AdaBoostTrained {
199 fitted_estimators,
200 estimator_weights,
201 estimator_errors,
202 classes,
203 n_features_in: x.ncols(),
204 feature_names_in: None,
205 },
206 base_estimators: Vec::new(),
207 n_estimators: self.n_estimators,
208 learning_rate: self.learning_rate,
209 algorithm: self.algorithm,
210 random_state: self.random_state,
211 })
212 } else {
213 Err(SklearsError::InvalidInput(
214 "Target values required for AdaBoost".to_string(),
215 ))
216 }
217 }
218}
219
220impl AdaBoostClassifier<AdaBoostTrained> {
221 pub fn predict(&self, x: &ArrayView2<'_, Float>) -> SklResult<Array1<f64>> {
223 let n_samples = x.nrows();
224 let n_classes = self.state.classes.len();
225 let mut class_predictions = Array2::zeros((n_samples, n_classes));
226
227 for (estimator, &weight) in self
228 .state
229 .fitted_estimators
230 .iter()
231 .zip(self.state.estimator_weights.iter())
232 {
233 let predictions = estimator.predict(x)?;
234
235 for i in 0..n_samples {
236 let pred_class = predictions[i];
237 for (j, &class_val) in self.state.classes.iter().enumerate() {
239 if (pred_class - class_val).abs() < 1e-10 {
240 class_predictions[[i, j]] += weight;
241 break;
242 }
243 }
244 }
245 }
246
247 let mut final_predictions = Array1::zeros(n_samples);
249 for i in 0..n_samples {
250 let mut max_weight = f64::NEG_INFINITY;
251 let mut best_class = self.state.classes[0];
252
253 for j in 0..n_classes {
254 if class_predictions[[i, j]] > max_weight {
255 max_weight = class_predictions[[i, j]];
256 best_class = self.state.classes[j];
257 }
258 }
259
260 final_predictions[i] = best_class;
261 }
262
263 Ok(final_predictions)
264 }
265
266 #[must_use]
268 pub fn estimators(&self) -> &[Box<dyn PipelinePredictor>] {
269 &self.state.fitted_estimators
270 }
271
272 #[must_use]
274 pub fn estimator_weights(&self) -> &Array1<f64> {
275 &self.state.estimator_weights
276 }
277
278 #[must_use]
280 pub fn estimator_errors(&self) -> &Array1<f64> {
281 &self.state.estimator_errors
282 }
283}
284
285pub struct GradientBoostingRegressor<S = Untrained> {
287 state: S,
288 base_estimators: Vec<Box<dyn PipelinePredictor>>,
289 n_estimators: usize,
290 learning_rate: f64,
291 max_depth: Option<usize>,
292 min_samples_split: usize,
293 min_samples_leaf: usize,
294 subsample: f64,
295 loss_function: LossFunction,
296 random_state: Option<u64>,
297}
298
299#[derive(Debug, Clone)]
301pub enum LossFunction {
302 LeastSquares,
304 LeastAbsoluteDeviation,
306 Huber { delta: f64 },
308 Quantile { alpha: f64 },
310}
311
312pub struct GradientBoostingTrained {
314 fitted_estimators: Vec<Box<dyn PipelinePredictor>>,
315 initial_prediction: f64,
316 loss_function: LossFunction,
317 n_features_in: usize,
318 feature_names_in: Option<Vec<String>>,
319 train_score: Vec<f64>,
320}
321
322impl GradientBoostingRegressor<Untrained> {
323 #[must_use]
325 pub fn new() -> Self {
326 Self {
327 state: Untrained,
328 base_estimators: Vec::new(),
329 n_estimators: 100,
330 learning_rate: 0.1,
331 max_depth: Some(3),
332 min_samples_split: 2,
333 min_samples_leaf: 1,
334 subsample: 1.0,
335 loss_function: LossFunction::LeastSquares,
336 random_state: None,
337 }
338 }
339
340 #[must_use]
342 pub fn n_estimators(mut self, n_estimators: usize) -> Self {
343 self.n_estimators = n_estimators;
344 self
345 }
346
347 #[must_use]
349 pub fn learning_rate(mut self, learning_rate: f64) -> Self {
350 self.learning_rate = learning_rate;
351 self
352 }
353
354 #[must_use]
356 pub fn max_depth(mut self, max_depth: Option<usize>) -> Self {
357 self.max_depth = max_depth;
358 self
359 }
360
361 #[must_use]
363 pub fn min_samples_split(mut self, min_samples_split: usize) -> Self {
364 self.min_samples_split = min_samples_split;
365 self
366 }
367
368 #[must_use]
370 pub fn min_samples_leaf(mut self, min_samples_leaf: usize) -> Self {
371 self.min_samples_leaf = min_samples_leaf;
372 self
373 }
374
375 #[must_use]
377 pub fn subsample(mut self, subsample: f64) -> Self {
378 self.subsample = subsample;
379 self
380 }
381
382 #[must_use]
384 pub fn loss_function(mut self, loss_function: LossFunction) -> Self {
385 self.loss_function = loss_function;
386 self
387 }
388
389 #[must_use]
391 pub fn base_estimator(mut self, estimator: Box<dyn PipelinePredictor>) -> Self {
392 self.base_estimators.push(estimator);
393 self
394 }
395
396 #[must_use]
398 pub fn random_state(mut self, seed: u64) -> Self {
399 self.random_state = Some(seed);
400 self
401 }
402}
403
404impl Default for GradientBoostingRegressor<Untrained> {
405 fn default() -> Self {
406 Self::new()
407 }
408}
409
410impl Estimator for GradientBoostingRegressor<Untrained> {
411 type Config = ();
412 type Error = SklearsError;
413 type Float = Float;
414
415 fn config(&self) -> &Self::Config {
416 &()
417 }
418}
419
420impl Fit<ArrayView2<'_, Float>, Option<&ArrayView1<'_, Float>>>
421 for GradientBoostingRegressor<Untrained>
422{
423 type Fitted = GradientBoostingRegressor<GradientBoostingTrained>;
424
425 fn fit(
426 self,
427 x: &ArrayView2<'_, Float>,
428 y: &Option<&ArrayView1<'_, Float>>,
429 ) -> SklResult<Self::Fitted> {
430 if let Some(y_values) = y.as_ref() {
431 let n_samples = x.nrows();
432
433 let initial_prediction = match self.loss_function {
435 LossFunction::LeastSquares => y_values.mean().unwrap_or(0.0),
436 _ => y_values.mean().unwrap_or(0.0), };
438
439 let mut current_predictions = Array1::from_elem(n_samples, initial_prediction);
440 let mut fitted_estimators = Vec::new();
441 let mut train_scores = Vec::new();
442
443 for t in 0..self.n_estimators {
444 let residuals = self.calculate_residuals(y_values, ¤t_predictions)?;
446
447 let mut estimator = if t < self.base_estimators.len() {
449 self.base_estimators[t].clone_predictor()
450 } else if let Some(last) = self.base_estimators.last() {
451 last.clone_predictor()
452 } else {
453 return Err(SklearsError::InvalidInput(
454 "No base estimators provided".to_string(),
455 ));
456 };
457
458 estimator.fit(x, &residuals.view())?;
460
461 let predictions = estimator.predict(x)?;
463
464 for i in 0..n_samples {
466 current_predictions[i] += self.learning_rate * predictions[i];
467 }
468
469 let score = self.calculate_loss(y_values, ¤t_predictions)?;
471 train_scores.push(score);
472
473 fitted_estimators.push(estimator);
474 }
475
476 Ok(GradientBoostingRegressor {
477 state: GradientBoostingTrained {
478 fitted_estimators,
479 initial_prediction,
480 loss_function: self.loss_function.clone(),
481 n_features_in: x.ncols(),
482 feature_names_in: None,
483 train_score: train_scores,
484 },
485 base_estimators: Vec::new(),
486 n_estimators: self.n_estimators,
487 learning_rate: self.learning_rate,
488 max_depth: self.max_depth,
489 min_samples_split: self.min_samples_split,
490 min_samples_leaf: self.min_samples_leaf,
491 subsample: self.subsample,
492 loss_function: LossFunction::LeastSquares, random_state: self.random_state,
494 })
495 } else {
496 Err(SklearsError::InvalidInput(
497 "Target values required for Gradient Boosting".to_string(),
498 ))
499 }
500 }
501}
502
503impl GradientBoostingRegressor<Untrained> {
504 fn calculate_residuals(
505 &self,
506 y_true: &ArrayView1<'_, Float>,
507 y_pred: &Array1<f64>,
508 ) -> SklResult<Array1<f64>> {
509 let mut residuals = Array1::zeros(y_true.len());
510
511 match self.loss_function {
512 LossFunction::LeastSquares => {
513 for i in 0..y_true.len() {
514 residuals[i] = y_true[i] - y_pred[i];
515 }
516 }
517 LossFunction::LeastAbsoluteDeviation => {
518 for i in 0..y_true.len() {
519 let diff = y_true[i] - y_pred[i];
520 residuals[i] = if diff > 0.0 {
521 1.0
522 } else if diff < 0.0 {
523 -1.0
524 } else {
525 0.0
526 };
527 }
528 }
529 LossFunction::Huber { delta } => {
530 for i in 0..y_true.len() {
531 let diff = y_true[i] - y_pred[i];
532 if diff.abs() <= delta {
533 residuals[i] = diff;
534 } else {
535 residuals[i] = delta * diff.signum();
536 }
537 }
538 }
539 LossFunction::Quantile { alpha: _ } => {
540 for i in 0..y_true.len() {
542 residuals[i] = y_true[i] - y_pred[i];
543 }
544 }
545 }
546
547 Ok(residuals)
548 }
549
550 fn calculate_loss(
551 &self,
552 y_true: &ArrayView1<'_, Float>,
553 y_pred: &Array1<f64>,
554 ) -> SklResult<f64> {
555 let mut loss = 0.0;
556 let n = y_true.len();
557
558 match self.loss_function {
559 LossFunction::LeastSquares => {
560 for i in 0..n {
561 let diff = y_true[i] - y_pred[i];
562 loss += diff * diff;
563 }
564 loss /= n as f64;
565 }
566 LossFunction::LeastAbsoluteDeviation => {
567 for i in 0..n {
568 loss += (y_true[i] - y_pred[i]).abs();
569 }
570 loss /= n as f64;
571 }
572 LossFunction::Huber { delta } => {
573 for i in 0..n {
574 let diff = (y_true[i] - y_pred[i]).abs();
575 if diff <= delta {
576 loss += 0.5 * diff * diff;
577 } else {
578 loss += delta * (diff - 0.5 * delta);
579 }
580 }
581 loss /= n as f64;
582 }
583 LossFunction::Quantile { alpha } => {
584 for i in 0..n {
585 let diff = y_true[i] - y_pred[i];
586 if diff >= 0.0 {
587 loss += alpha * diff;
588 } else {
589 loss += (alpha - 1.0) * diff;
590 }
591 }
592 loss /= n as f64;
593 }
594 }
595
596 Ok(loss)
597 }
598}
599
600impl GradientBoostingRegressor<GradientBoostingTrained> {
601 pub fn predict(&self, x: &ArrayView2<'_, Float>) -> SklResult<Array1<f64>> {
603 let n_samples = x.nrows();
604 let mut predictions = Array1::from_elem(n_samples, self.state.initial_prediction);
605
606 for (estimator, learning_rate) in self
607 .state
608 .fitted_estimators
609 .iter()
610 .zip(std::iter::repeat(self.learning_rate))
611 {
612 let estimator_predictions = estimator.predict(x)?;
613
614 for i in 0..n_samples {
615 predictions[i] += learning_rate * estimator_predictions[i];
616 }
617 }
618
619 Ok(predictions)
620 }
621
622 #[must_use]
624 pub fn estimators(&self) -> &[Box<dyn PipelinePredictor>] {
625 &self.state.fitted_estimators
626 }
627
628 #[must_use]
630 pub fn train_scores(&self) -> &[f64] {
631 &self.state.train_score
632 }
633
634 #[must_use]
636 pub fn initial_prediction(&self) -> f64 {
637 self.state.initial_prediction
638 }
639}
640
641#[allow(non_snake_case)]
642#[cfg(test)]
643mod tests {
644 use super::*;
645 use crate::mock::MockPredictor;
646 use scirs2_core::ndarray::array;
647
648 #[test]
649 fn test_adaboost_creation() {
650 let adaboost = AdaBoostClassifier::new()
651 .n_estimators(10)
652 .learning_rate(0.5)
653 .base_estimator(Box::new(MockPredictor::new()));
654
655 assert_eq!(adaboost.n_estimators, 10);
656 assert_eq!(adaboost.learning_rate, 0.5);
657 }
658
659 #[test]
660 fn test_gradient_boosting_creation() {
661 let gb = GradientBoostingRegressor::new()
662 .n_estimators(50)
663 .learning_rate(0.1)
664 .max_depth(Some(3))
665 .base_estimator(Box::new(MockPredictor::new()));
666
667 assert_eq!(gb.n_estimators, 50);
668 assert_eq!(gb.learning_rate, 0.1);
669 assert_eq!(gb.max_depth, Some(3));
670 }
671
672 #[test]
673 fn test_loss_functions() {
674 let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
675 let y_true = array![1.0, 2.0, 3.0];
676 let y_pred = array![1.1, 1.9, 3.1];
677
678 let gb = GradientBoostingRegressor::new();
679 let loss = gb.calculate_loss(&y_true.view(), &y_pred).unwrap();
680
681 assert!(loss >= 0.0);
682 assert!(loss < 1.0); }
684}