Skip to main content

sklears_python/
ensemble.rs

1//! Python bindings for ensemble methods
2//!
3//! This module provides PyO3-based Python bindings for sklears ensemble algorithms.
4//! It includes implementations for Gradient Boosting, AdaBoost, Voting, and Stacking classifiers.
5
6use crate::linear::common::core_array1_to_py;
7use crate::utils::{numpy_to_ndarray1, numpy_to_ndarray2};
8use numpy::{PyArray1, PyArray2};
9use pyo3::exceptions::{PyRuntimeError, PyValueError};
10use pyo3::prelude::*;
11use pyo3::types::PyList;
12use scirs2_core::ndarray::{Array1, Array2};
13use sklears_core::traits::{Fit, Predict, Trained, Untrained};
14use sklears_ensemble::gradient_boosting::{
15    TrainedGradientBoostingClassifier, TrainedGradientBoostingRegressor,
16};
17use sklears_ensemble::{
18    AdaBoostClassifier, BaggingClassifier, GradientBoostingClassifier, GradientBoostingConfig,
19    GradientBoostingRegressor, LossFunction, VotingClassifier, VotingClassifierConfig,
20    VotingStrategy,
21};
22
23/// Python wrapper for GradientBoostingClassifier
24#[pyclass(name = "GradientBoostingClassifier")]
25pub struct PyGradientBoostingClassifier {
26    inner: Option<GradientBoostingClassifier>,
27    trained: Option<TrainedGradientBoostingClassifier>,
28}
29
30#[pymethods]
31impl PyGradientBoostingClassifier {
32    #[new]
33    #[pyo3(signature = (
34        n_estimators=100,
35        learning_rate=0.1,
36        max_depth=3,
37        min_samples_split=2,
38        min_samples_leaf=1,
39        subsample=1.0,
40        loss="squared_loss",
41        random_state=None,
42        validation_fraction=0.1
43    ))]
44    fn new(
45        n_estimators: usize,
46        learning_rate: f64,
47        max_depth: usize,
48        min_samples_split: usize,
49        min_samples_leaf: usize,
50        subsample: f64,
51        loss: &str,
52        random_state: Option<u64>,
53        validation_fraction: f64,
54    ) -> PyResult<Self> {
55        let loss_function = match loss {
56            "squared_loss" => LossFunction::SquaredLoss,
57            "absolute_loss" => LossFunction::AbsoluteLoss,
58            "huber" => LossFunction::HuberLoss,
59            "quantile" => LossFunction::QuantileLoss,
60            "logistic" => LossFunction::LogisticLoss,
61            "deviance" => LossFunction::DevianceLoss,
62            "exponential" => LossFunction::ExponentialLoss,
63            _ => {
64                return Err(PyValueError::new_err(format!(
65                    "Unknown loss function: {}",
66                    loss
67                )))
68            }
69        };
70
71        let config = GradientBoostingConfig {
72            n_estimators,
73            learning_rate,
74            max_depth,
75            min_samples_split,
76            min_samples_leaf,
77            subsample,
78            loss_function,
79            random_state,
80            validation_fraction,
81            ..Default::default()
82        };
83
84        Ok(Self {
85            inner: Some(GradientBoostingClassifier::new(config)),
86            trained: None,
87        })
88    }
89
90    /// Fit the gradient boosting classifier
91    fn fit(&mut self, x: &Bound<'_, PyArray2<f64>>, y: &Bound<'_, PyArray1<f64>>) -> PyResult<()> {
92        let x_array = numpy_to_ndarray2(x)?;
93        let y_array = numpy_to_ndarray1(y)?;
94
95        let model = self.inner.take().ok_or_else(|| {
96            PyRuntimeError::new_err("Model has already been fitted or was not initialized")
97        })?;
98
99        match model.fit(&x_array, &y_array) {
100            Ok(trained_model) => {
101                self.trained = Some(trained_model);
102                Ok(())
103            }
104            Err(e) => Err(PyRuntimeError::new_err(format!(
105                "Failed to fit model: {}",
106                e
107            ))),
108        }
109    }
110
111    /// Make predictions using the fitted model
112    fn predict<'py>(
113        &self,
114        py: Python<'py>,
115        x: &Bound<'py, PyArray2<f64>>,
116    ) -> PyResult<Py<PyArray1<f64>>> {
117        let trained_model = self.trained.as_ref().ok_or_else(|| {
118            PyRuntimeError::new_err("Model must be fitted before making predictions")
119        })?;
120
121        let x_array = numpy_to_ndarray2(x)?;
122
123        let predictions: Array1<f64> =
124            Predict::<Array2<f64>, Array1<f64>>::predict(trained_model, &x_array)
125                .map_err(|e| PyRuntimeError::new_err(format!("Prediction failed: {}", e)))?;
126        Ok(core_array1_to_py(py, &predictions))
127    }
128
129    /// Get feature importances
130    fn feature_importances_<'py>(&self, py: Python<'py>) -> PyResult<Py<PyArray1<f64>>> {
131        let trained_model = self.trained.as_ref().ok_or_else(|| {
132            PyRuntimeError::new_err("Model must be fitted before accessing feature importances")
133        })?;
134
135        let importances = trained_model.feature_importances_gain();
136        Ok(core_array1_to_py(py, importances))
137    }
138
139    fn __repr__(&self) -> String {
140        if self.trained.is_some() {
141            "GradientBoostingClassifier(fitted=True)".to_string()
142        } else {
143            "GradientBoostingClassifier(fitted=False)".to_string()
144        }
145    }
146}
147
148/// Python wrapper for GradientBoostingRegressor
149#[pyclass(name = "GradientBoostingRegressor")]
150pub struct PyGradientBoostingRegressor {
151    inner: Option<GradientBoostingRegressor>,
152    trained: Option<TrainedGradientBoostingRegressor>,
153}
154
155#[pymethods]
156impl PyGradientBoostingRegressor {
157    #[new]
158    #[pyo3(signature = (
159        n_estimators=100,
160        learning_rate=0.1,
161        max_depth=3,
162        min_samples_split=2,
163        min_samples_leaf=1,
164        subsample=1.0,
165        loss="squared_loss",
166        random_state=None,
167        validation_fraction=0.1
168    ))]
169    fn new(
170        n_estimators: usize,
171        learning_rate: f64,
172        max_depth: usize,
173        min_samples_split: usize,
174        min_samples_leaf: usize,
175        subsample: f64,
176        loss: &str,
177        random_state: Option<u64>,
178        validation_fraction: f64,
179    ) -> PyResult<Self> {
180        let loss_function = match loss {
181            "squared_loss" => LossFunction::SquaredLoss,
182            "absolute_loss" => LossFunction::AbsoluteLoss,
183            "huber" => LossFunction::HuberLoss,
184            "quantile" => LossFunction::QuantileLoss,
185            _ => {
186                return Err(PyValueError::new_err(format!(
187                    "Unknown loss function for regression: {}",
188                    loss
189                )))
190            }
191        };
192
193        let config = GradientBoostingConfig {
194            n_estimators,
195            learning_rate,
196            max_depth,
197            min_samples_split,
198            min_samples_leaf,
199            subsample,
200            loss_function,
201            random_state,
202            validation_fraction,
203            ..Default::default()
204        };
205
206        Ok(Self {
207            inner: Some(GradientBoostingRegressor::new(config)),
208            trained: None,
209        })
210    }
211
212    /// Fit the gradient boosting regressor
213    fn fit(&mut self, x: &Bound<'_, PyArray2<f64>>, y: &Bound<'_, PyArray1<f64>>) -> PyResult<()> {
214        let x_array = numpy_to_ndarray2(x)?;
215        let y_array = numpy_to_ndarray1(y)?;
216
217        let model = self.inner.take().ok_or_else(|| {
218            PyRuntimeError::new_err("Model has already been fitted or was not initialized")
219        })?;
220
221        match model.fit(&x_array, &y_array) {
222            Ok(trained_model) => {
223                self.trained = Some(trained_model);
224                Ok(())
225            }
226            Err(e) => Err(PyRuntimeError::new_err(format!(
227                "Failed to fit model: {}",
228                e
229            ))),
230        }
231    }
232
233    /// Make predictions using the fitted model
234    fn predict<'py>(
235        &self,
236        py: Python<'py>,
237        x: &Bound<'py, PyArray2<f64>>,
238    ) -> PyResult<Py<PyArray1<f64>>> {
239        let trained_model = self.trained.as_ref().ok_or_else(|| {
240            PyRuntimeError::new_err("Model must be fitted before making predictions")
241        })?;
242
243        let x_array = numpy_to_ndarray2(x)?;
244
245        let predictions: Array1<f64> =
246            Predict::<Array2<f64>, Array1<f64>>::predict(trained_model, &x_array)
247                .map_err(|e| PyRuntimeError::new_err(format!("Prediction failed: {}", e)))?;
248        Ok(core_array1_to_py(py, &predictions))
249    }
250
251    fn __repr__(&self) -> String {
252        if self.trained.is_some() {
253            "GradientBoostingRegressor(fitted=True)".to_string()
254        } else {
255            "GradientBoostingRegressor(fitted=False)".to_string()
256        }
257    }
258}
259
260/// Python wrapper for AdaBoost Classifier
261#[pyclass(name = "AdaBoostClassifier")]
262pub struct PyAdaBoostClassifier {
263    inner: Option<AdaBoostClassifier<Untrained>>,
264    trained: Option<AdaBoostClassifier<Trained>>,
265}
266
267#[pymethods]
268impl PyAdaBoostClassifier {
269    #[new]
270    #[pyo3(signature = (n_estimators=50, learning_rate=1.0, random_state=None))]
271    fn new(n_estimators: usize, learning_rate: f64, random_state: Option<u64>) -> PyResult<Self> {
272        let mut model = AdaBoostClassifier::new()
273            .n_estimators(n_estimators)
274            .learning_rate(learning_rate);
275
276        if let Some(seed) = random_state {
277            model = model.random_state(seed);
278        }
279
280        Ok(Self {
281            inner: Some(model),
282            trained: None,
283        })
284    }
285
286    /// Fit the AdaBoost classifier
287    fn fit(&mut self, x: &Bound<'_, PyArray2<f64>>, y: &Bound<'_, PyArray1<f64>>) -> PyResult<()> {
288        let x_array = numpy_to_ndarray2(x)?;
289        let y_array = numpy_to_ndarray1(y)?;
290
291        let model = self.inner.take().ok_or_else(|| {
292            PyRuntimeError::new_err("Model has already been fitted or was not initialized")
293        })?;
294
295        match model.fit(&x_array, &y_array) {
296            Ok(trained_model) => {
297                self.trained = Some(trained_model);
298                Ok(())
299            }
300            Err(e) => Err(PyRuntimeError::new_err(format!(
301                "Failed to fit model: {}",
302                e
303            ))),
304        }
305    }
306
307    /// Make predictions using the fitted model
308    fn predict<'py>(
309        &self,
310        py: Python<'py>,
311        x: &Bound<'py, PyArray2<f64>>,
312    ) -> PyResult<Py<PyArray1<f64>>> {
313        let trained_model = self.trained.as_ref().ok_or_else(|| {
314            PyRuntimeError::new_err("Model must be fitted before making predictions")
315        })?;
316
317        let x_array = numpy_to_ndarray2(x)?;
318
319        let predictions: Array1<f64> =
320            Predict::<Array2<f64>, Array1<f64>>::predict(trained_model, &x_array)
321                .map_err(|e| PyRuntimeError::new_err(format!("Prediction failed: {}", e)))?;
322        Ok(core_array1_to_py(py, &predictions))
323    }
324
325    fn __repr__(&self) -> String {
326        if self.trained.is_some() {
327            "AdaBoostClassifier(fitted=True)".to_string()
328        } else {
329            "AdaBoostClassifier(fitted=False)".to_string()
330        }
331    }
332}
333
334/// Python wrapper for Voting Classifier
335#[pyclass(name = "VotingClassifier")]
336pub struct PyVotingClassifier {
337    inner: Option<VotingClassifier<Untrained>>,
338    trained: Option<VotingClassifier<Trained>>,
339}
340
341#[pymethods]
342impl PyVotingClassifier {
343    #[new]
344    #[pyo3(signature = (_estimators, voting="hard", weights=None))]
345    fn new(
346        _estimators: &Bound<'_, PyList>,
347        voting: &str,
348        weights: Option<Vec<f64>>,
349    ) -> PyResult<Self> {
350        let voting_strategy = match voting {
351            "hard" => VotingStrategy::Hard,
352            "soft" => VotingStrategy::Soft,
353            _ => {
354                return Err(PyValueError::new_err(format!(
355                    "Unknown voting strategy: {}",
356                    voting
357                )))
358            }
359        };
360
361        let config = VotingClassifierConfig {
362            voting: voting_strategy,
363            weights,
364            ..Default::default()
365        };
366
367        Ok(Self {
368            inner: Some(VotingClassifier::new(config)),
369            trained: None,
370        })
371    }
372
373    /// Fit the voting classifier
374    fn fit(&mut self, x: &Bound<'_, PyArray2<f64>>, y: &Bound<'_, PyArray1<f64>>) -> PyResult<()> {
375        let x_array = numpy_to_ndarray2(x)?;
376        let y_array = numpy_to_ndarray1(y)?;
377
378        let model = self.inner.take().ok_or_else(|| {
379            PyRuntimeError::new_err("Model has already been fitted or was not initialized")
380        })?;
381
382        match model.fit(&x_array, &y_array) {
383            Ok(trained_model) => {
384                self.trained = Some(trained_model);
385                Ok(())
386            }
387            Err(e) => Err(PyRuntimeError::new_err(format!(
388                "Failed to fit model: {}",
389                e
390            ))),
391        }
392    }
393
394    /// Make predictions using the fitted model
395    fn predict<'py>(
396        &self,
397        py: Python<'py>,
398        x: &Bound<'py, PyArray2<f64>>,
399    ) -> PyResult<Py<PyArray1<f64>>> {
400        let trained_model = self.trained.as_ref().ok_or_else(|| {
401            PyRuntimeError::new_err("Model must be fitted before making predictions")
402        })?;
403
404        let x_array = numpy_to_ndarray2(x)?;
405
406        let predictions: Array1<f64> =
407            Predict::<Array2<f64>, Array1<f64>>::predict(trained_model, &x_array)
408                .map_err(|e| PyRuntimeError::new_err(format!("Prediction failed: {}", e)))?;
409        Ok(core_array1_to_py(py, &predictions))
410    }
411
412    fn __repr__(&self) -> String {
413        if self.trained.is_some() {
414            "VotingClassifier(fitted=True)".to_string()
415        } else {
416            "VotingClassifier(fitted=False)".to_string()
417        }
418    }
419}
420
421/// Python wrapper for Bagging Classifier
422#[pyclass(name = "BaggingClassifier")]
423pub struct PyBaggingClassifier {
424    inner: Option<BaggingClassifier<Untrained>>,
425    trained: Option<BaggingClassifier<Trained>>,
426}
427
428#[pymethods]
429impl PyBaggingClassifier {
430    #[new]
431    #[pyo3(signature = (
432        n_estimators=10,
433        max_samples=None,
434        max_features=None,
435        bootstrap=true,
436        bootstrap_features=false,
437        random_state=None
438    ))]
439    fn new(
440        n_estimators: usize,
441        max_samples: Option<usize>,
442        max_features: Option<usize>,
443        bootstrap: bool,
444        bootstrap_features: bool,
445        random_state: Option<u64>,
446    ) -> PyResult<Self> {
447        let mut model = BaggingClassifier::new()
448            .n_estimators(n_estimators)
449            .bootstrap(bootstrap)
450            .bootstrap_features(bootstrap_features);
451
452        if let Some(samples) = max_samples {
453            model = model.max_samples(Some(samples));
454        }
455
456        if let Some(features) = max_features {
457            model = model.max_features(Some(features));
458        }
459
460        if let Some(seed) = random_state {
461            model = model.random_state(seed);
462        }
463
464        Ok(Self {
465            inner: Some(model),
466            trained: None,
467        })
468    }
469
470    /// Fit the bagging classifier
471    fn fit(&mut self, x: &Bound<'_, PyArray2<f64>>, y: &Bound<'_, PyArray1<f64>>) -> PyResult<()> {
472        let x_array = numpy_to_ndarray2(x)?;
473        let y_array = numpy_to_ndarray1(y)?;
474
475        // Convert y to integer array for BaggingClassifier (Fit<Array2<Float>, Array1<Int>>)
476        let y_int: Vec<i32> = y_array.iter().map(|&val| val as i32).collect();
477        let y_int_array = Array1::from_vec(y_int);
478
479        let model = self.inner.take().ok_or_else(|| {
480            PyRuntimeError::new_err("Model has already been fitted or was not initialized")
481        })?;
482
483        match model.fit(&x_array, &y_int_array) {
484            Ok(trained_model) => {
485                self.trained = Some(trained_model);
486                Ok(())
487            }
488            Err(e) => Err(PyRuntimeError::new_err(format!(
489                "Failed to fit model: {}",
490                e
491            ))),
492        }
493    }
494
495    /// Make predictions using the fitted model
496    fn predict<'py>(
497        &self,
498        py: Python<'py>,
499        x: &Bound<'py, PyArray2<f64>>,
500    ) -> PyResult<Py<PyArray1<f64>>> {
501        let trained_model = self.trained.as_ref().ok_or_else(|| {
502            PyRuntimeError::new_err("Model must be fitted before making predictions")
503        })?;
504
505        let x_array = numpy_to_ndarray2(x)?;
506
507        let predictions: Array1<i32> =
508            Predict::<Array2<f64>, Array1<i32>>::predict(trained_model, &x_array)
509                .map_err(|e| PyRuntimeError::new_err(format!("Prediction failed: {}", e)))?;
510        // Convert i32 predictions to f64
511        let predictions_f64: Vec<f64> = predictions.iter().map(|&v| v as f64).collect();
512        Ok(PyArray1::from_vec(py, predictions_f64).unbind())
513    }
514
515    fn __repr__(&self) -> String {
516        if self.trained.is_some() {
517            "BaggingClassifier(fitted=True)".to_string()
518        } else {
519            "BaggingClassifier(fitted=False)".to_string()
520        }
521    }
522}