Skip to main content

sklears_python/
ensemble.rs

1//! Python bindings for ensemble methods
2//!
3//! This module provides PyO3-based Python bindings for sklears ensemble algorithms.
4//! It includes implementations for Gradient Boosting, AdaBoost, Voting, and Stacking classifiers.
5
6use crate::linear::common::core_array1_to_py;
7use crate::utils::{numpy_to_ndarray1, numpy_to_ndarray2};
8use numpy::{PyArray1, PyArray2};
9use pyo3::exceptions::{PyRuntimeError, PyValueError};
10use pyo3::prelude::*;
11use pyo3::types::PyList;
12use scirs2_core::ndarray::{Array1, Array2};
13use sklears_core::traits::{Fit, Predict, Trained, Untrained};
14use sklears_ensemble::gradient_boosting::{
15    TrainedGradientBoostingClassifier, TrainedGradientBoostingRegressor,
16};
17use sklears_ensemble::{
18    AdaBoostClassifier, BaggingClassifier, GradientBoostingClassifier, GradientBoostingConfig,
19    GradientBoostingRegressor, LossFunction, VotingClassifier, VotingClassifierConfig,
20    VotingStrategy,
21};
22
23/// Python wrapper for GradientBoostingClassifier
24#[pyclass(name = "GradientBoostingClassifier")]
25pub struct PyGradientBoostingClassifier {
26    inner: Option<GradientBoostingClassifier>,
27    trained: Option<TrainedGradientBoostingClassifier>,
28}
29
30#[pymethods]
31impl PyGradientBoostingClassifier {
32    #[new]
33    #[allow(clippy::too_many_arguments)] // scikit-learn API compatibility requires matching argument count
34    #[pyo3(signature = (
35        n_estimators=100,
36        learning_rate=0.1,
37        max_depth=3,
38        min_samples_split=2,
39        min_samples_leaf=1,
40        subsample=1.0,
41        loss="squared_loss",
42        random_state=None,
43        validation_fraction=0.1
44    ))]
45    fn new(
46        n_estimators: usize,
47        learning_rate: f64,
48        max_depth: usize,
49        min_samples_split: usize,
50        min_samples_leaf: usize,
51        subsample: f64,
52        loss: &str,
53        random_state: Option<u64>,
54        validation_fraction: f64,
55    ) -> PyResult<Self> {
56        let loss_function = match loss {
57            "squared_loss" => LossFunction::SquaredLoss,
58            "absolute_loss" => LossFunction::AbsoluteLoss,
59            "huber" => LossFunction::HuberLoss,
60            "quantile" => LossFunction::QuantileLoss,
61            "logistic" => LossFunction::LogisticLoss,
62            "deviance" => LossFunction::DevianceLoss,
63            "exponential" => LossFunction::ExponentialLoss,
64            _ => {
65                return Err(PyValueError::new_err(format!(
66                    "Unknown loss function: {}",
67                    loss
68                )))
69            }
70        };
71
72        let config = GradientBoostingConfig {
73            n_estimators,
74            learning_rate,
75            max_depth,
76            min_samples_split,
77            min_samples_leaf,
78            subsample,
79            loss_function,
80            random_state,
81            validation_fraction,
82            ..Default::default()
83        };
84
85        Ok(Self {
86            inner: Some(GradientBoostingClassifier::new(config)),
87            trained: None,
88        })
89    }
90
91    /// Fit the gradient boosting classifier
92    fn fit(&mut self, x: &Bound<'_, PyArray2<f64>>, y: &Bound<'_, PyArray1<f64>>) -> PyResult<()> {
93        let x_array = numpy_to_ndarray2(x)?;
94        let y_array = numpy_to_ndarray1(y)?;
95
96        let model = self.inner.take().ok_or_else(|| {
97            PyRuntimeError::new_err("Model has already been fitted or was not initialized")
98        })?;
99
100        match model.fit(&x_array, &y_array) {
101            Ok(trained_model) => {
102                self.trained = Some(trained_model);
103                Ok(())
104            }
105            Err(e) => Err(PyRuntimeError::new_err(format!(
106                "Failed to fit model: {}",
107                e
108            ))),
109        }
110    }
111
112    /// Make predictions using the fitted model
113    fn predict<'py>(
114        &self,
115        py: Python<'py>,
116        x: &Bound<'py, PyArray2<f64>>,
117    ) -> PyResult<Py<PyArray1<f64>>> {
118        let trained_model = self.trained.as_ref().ok_or_else(|| {
119            PyRuntimeError::new_err("Model must be fitted before making predictions")
120        })?;
121
122        let x_array = numpy_to_ndarray2(x)?;
123
124        let predictions: Array1<f64> =
125            Predict::<Array2<f64>, Array1<f64>>::predict(trained_model, &x_array)
126                .map_err(|e| PyRuntimeError::new_err(format!("Prediction failed: {}", e)))?;
127        Ok(core_array1_to_py(py, &predictions))
128    }
129
130    /// Get feature importances
131    fn feature_importances_<'py>(&self, py: Python<'py>) -> PyResult<Py<PyArray1<f64>>> {
132        let trained_model = self.trained.as_ref().ok_or_else(|| {
133            PyRuntimeError::new_err("Model must be fitted before accessing feature importances")
134        })?;
135
136        let importances = trained_model.feature_importances_gain();
137        Ok(core_array1_to_py(py, importances))
138    }
139
140    fn __repr__(&self) -> String {
141        if self.trained.is_some() {
142            "GradientBoostingClassifier(fitted=True)".to_string()
143        } else {
144            "GradientBoostingClassifier(fitted=False)".to_string()
145        }
146    }
147}
148
149/// Python wrapper for GradientBoostingRegressor
150#[pyclass(name = "GradientBoostingRegressor")]
151pub struct PyGradientBoostingRegressor {
152    inner: Option<GradientBoostingRegressor>,
153    trained: Option<TrainedGradientBoostingRegressor>,
154}
155
156#[pymethods]
157impl PyGradientBoostingRegressor {
158    #[new]
159    #[allow(clippy::too_many_arguments)] // scikit-learn API compatibility requires matching argument count
160    #[pyo3(signature = (
161        n_estimators=100,
162        learning_rate=0.1,
163        max_depth=3,
164        min_samples_split=2,
165        min_samples_leaf=1,
166        subsample=1.0,
167        loss="squared_loss",
168        random_state=None,
169        validation_fraction=0.1
170    ))]
171    fn new(
172        n_estimators: usize,
173        learning_rate: f64,
174        max_depth: usize,
175        min_samples_split: usize,
176        min_samples_leaf: usize,
177        subsample: f64,
178        loss: &str,
179        random_state: Option<u64>,
180        validation_fraction: f64,
181    ) -> PyResult<Self> {
182        let loss_function = match loss {
183            "squared_loss" => LossFunction::SquaredLoss,
184            "absolute_loss" => LossFunction::AbsoluteLoss,
185            "huber" => LossFunction::HuberLoss,
186            "quantile" => LossFunction::QuantileLoss,
187            _ => {
188                return Err(PyValueError::new_err(format!(
189                    "Unknown loss function for regression: {}",
190                    loss
191                )))
192            }
193        };
194
195        let config = GradientBoostingConfig {
196            n_estimators,
197            learning_rate,
198            max_depth,
199            min_samples_split,
200            min_samples_leaf,
201            subsample,
202            loss_function,
203            random_state,
204            validation_fraction,
205            ..Default::default()
206        };
207
208        Ok(Self {
209            inner: Some(GradientBoostingRegressor::new(config)),
210            trained: None,
211        })
212    }
213
214    /// Fit the gradient boosting regressor
215    fn fit(&mut self, x: &Bound<'_, PyArray2<f64>>, y: &Bound<'_, PyArray1<f64>>) -> PyResult<()> {
216        let x_array = numpy_to_ndarray2(x)?;
217        let y_array = numpy_to_ndarray1(y)?;
218
219        let model = self.inner.take().ok_or_else(|| {
220            PyRuntimeError::new_err("Model has already been fitted or was not initialized")
221        })?;
222
223        match model.fit(&x_array, &y_array) {
224            Ok(trained_model) => {
225                self.trained = Some(trained_model);
226                Ok(())
227            }
228            Err(e) => Err(PyRuntimeError::new_err(format!(
229                "Failed to fit model: {}",
230                e
231            ))),
232        }
233    }
234
235    /// Make predictions using the fitted model
236    fn predict<'py>(
237        &self,
238        py: Python<'py>,
239        x: &Bound<'py, PyArray2<f64>>,
240    ) -> PyResult<Py<PyArray1<f64>>> {
241        let trained_model = self.trained.as_ref().ok_or_else(|| {
242            PyRuntimeError::new_err("Model must be fitted before making predictions")
243        })?;
244
245        let x_array = numpy_to_ndarray2(x)?;
246
247        let predictions: Array1<f64> =
248            Predict::<Array2<f64>, Array1<f64>>::predict(trained_model, &x_array)
249                .map_err(|e| PyRuntimeError::new_err(format!("Prediction failed: {}", e)))?;
250        Ok(core_array1_to_py(py, &predictions))
251    }
252
253    fn __repr__(&self) -> String {
254        if self.trained.is_some() {
255            "GradientBoostingRegressor(fitted=True)".to_string()
256        } else {
257            "GradientBoostingRegressor(fitted=False)".to_string()
258        }
259    }
260}
261
262/// Python wrapper for AdaBoost Classifier
263#[pyclass(name = "AdaBoostClassifier")]
264pub struct PyAdaBoostClassifier {
265    inner: Option<AdaBoostClassifier<Untrained>>,
266    trained: Option<AdaBoostClassifier<Trained>>,
267}
268
269#[pymethods]
270impl PyAdaBoostClassifier {
271    #[new]
272    #[pyo3(signature = (n_estimators=50, learning_rate=1.0, random_state=None))]
273    fn new(n_estimators: usize, learning_rate: f64, random_state: Option<u64>) -> PyResult<Self> {
274        let mut model = AdaBoostClassifier::new()
275            .n_estimators(n_estimators)
276            .learning_rate(learning_rate);
277
278        if let Some(seed) = random_state {
279            model = model.random_state(seed);
280        }
281
282        Ok(Self {
283            inner: Some(model),
284            trained: None,
285        })
286    }
287
288    /// Fit the AdaBoost classifier
289    fn fit(&mut self, x: &Bound<'_, PyArray2<f64>>, y: &Bound<'_, PyArray1<f64>>) -> PyResult<()> {
290        let x_array = numpy_to_ndarray2(x)?;
291        let y_array = numpy_to_ndarray1(y)?;
292
293        let model = self.inner.take().ok_or_else(|| {
294            PyRuntimeError::new_err("Model has already been fitted or was not initialized")
295        })?;
296
297        match model.fit(&x_array, &y_array) {
298            Ok(trained_model) => {
299                self.trained = Some(trained_model);
300                Ok(())
301            }
302            Err(e) => Err(PyRuntimeError::new_err(format!(
303                "Failed to fit model: {}",
304                e
305            ))),
306        }
307    }
308
309    /// Make predictions using the fitted model
310    fn predict<'py>(
311        &self,
312        py: Python<'py>,
313        x: &Bound<'py, PyArray2<f64>>,
314    ) -> PyResult<Py<PyArray1<f64>>> {
315        let trained_model = self.trained.as_ref().ok_or_else(|| {
316            PyRuntimeError::new_err("Model must be fitted before making predictions")
317        })?;
318
319        let x_array = numpy_to_ndarray2(x)?;
320
321        let predictions: Array1<f64> =
322            Predict::<Array2<f64>, Array1<f64>>::predict(trained_model, &x_array)
323                .map_err(|e| PyRuntimeError::new_err(format!("Prediction failed: {}", e)))?;
324        Ok(core_array1_to_py(py, &predictions))
325    }
326
327    fn __repr__(&self) -> String {
328        if self.trained.is_some() {
329            "AdaBoostClassifier(fitted=True)".to_string()
330        } else {
331            "AdaBoostClassifier(fitted=False)".to_string()
332        }
333    }
334}
335
336/// Python wrapper for Voting Classifier
337#[pyclass(name = "VotingClassifier")]
338pub struct PyVotingClassifier {
339    inner: Option<VotingClassifier<Untrained>>,
340    trained: Option<VotingClassifier<Trained>>,
341}
342
343#[pymethods]
344impl PyVotingClassifier {
345    #[new]
346    #[pyo3(signature = (_estimators, voting="hard", weights=None))]
347    fn new(
348        _estimators: &Bound<'_, PyList>,
349        voting: &str,
350        weights: Option<Vec<f64>>,
351    ) -> PyResult<Self> {
352        let voting_strategy = match voting {
353            "hard" => VotingStrategy::Hard,
354            "soft" => VotingStrategy::Soft,
355            _ => {
356                return Err(PyValueError::new_err(format!(
357                    "Unknown voting strategy: {}",
358                    voting
359                )))
360            }
361        };
362
363        let config = VotingClassifierConfig {
364            voting: voting_strategy,
365            weights,
366            ..Default::default()
367        };
368
369        Ok(Self {
370            inner: Some(VotingClassifier::new(config)),
371            trained: None,
372        })
373    }
374
375    /// Fit the voting classifier
376    fn fit(&mut self, x: &Bound<'_, PyArray2<f64>>, y: &Bound<'_, PyArray1<f64>>) -> PyResult<()> {
377        let x_array = numpy_to_ndarray2(x)?;
378        let y_array = numpy_to_ndarray1(y)?;
379
380        let model = self.inner.take().ok_or_else(|| {
381            PyRuntimeError::new_err("Model has already been fitted or was not initialized")
382        })?;
383
384        match model.fit(&x_array, &y_array) {
385            Ok(trained_model) => {
386                self.trained = Some(trained_model);
387                Ok(())
388            }
389            Err(e) => Err(PyRuntimeError::new_err(format!(
390                "Failed to fit model: {}",
391                e
392            ))),
393        }
394    }
395
396    /// Make predictions using the fitted model
397    fn predict<'py>(
398        &self,
399        py: Python<'py>,
400        x: &Bound<'py, PyArray2<f64>>,
401    ) -> PyResult<Py<PyArray1<f64>>> {
402        let trained_model = self.trained.as_ref().ok_or_else(|| {
403            PyRuntimeError::new_err("Model must be fitted before making predictions")
404        })?;
405
406        let x_array = numpy_to_ndarray2(x)?;
407
408        let predictions: Array1<f64> =
409            Predict::<Array2<f64>, Array1<f64>>::predict(trained_model, &x_array)
410                .map_err(|e| PyRuntimeError::new_err(format!("Prediction failed: {}", e)))?;
411        Ok(core_array1_to_py(py, &predictions))
412    }
413
414    fn __repr__(&self) -> String {
415        if self.trained.is_some() {
416            "VotingClassifier(fitted=True)".to_string()
417        } else {
418            "VotingClassifier(fitted=False)".to_string()
419        }
420    }
421}
422
423/// Python wrapper for Bagging Classifier
424#[pyclass(name = "BaggingClassifier")]
425pub struct PyBaggingClassifier {
426    inner: Option<BaggingClassifier<Untrained>>,
427    trained: Option<BaggingClassifier<Trained>>,
428}
429
430#[pymethods]
431impl PyBaggingClassifier {
432    #[new]
433    #[pyo3(signature = (
434        n_estimators=10,
435        max_samples=None,
436        max_features=None,
437        bootstrap=true,
438        bootstrap_features=false,
439        random_state=None
440    ))]
441    fn new(
442        n_estimators: usize,
443        max_samples: Option<usize>,
444        max_features: Option<usize>,
445        bootstrap: bool,
446        bootstrap_features: bool,
447        random_state: Option<u64>,
448    ) -> PyResult<Self> {
449        let mut model = BaggingClassifier::new()
450            .n_estimators(n_estimators)
451            .bootstrap(bootstrap)
452            .bootstrap_features(bootstrap_features);
453
454        if let Some(samples) = max_samples {
455            model = model.max_samples(Some(samples));
456        }
457
458        if let Some(features) = max_features {
459            model = model.max_features(Some(features));
460        }
461
462        if let Some(seed) = random_state {
463            model = model.random_state(seed);
464        }
465
466        Ok(Self {
467            inner: Some(model),
468            trained: None,
469        })
470    }
471
472    /// Fit the bagging classifier
473    fn fit(&mut self, x: &Bound<'_, PyArray2<f64>>, y: &Bound<'_, PyArray1<f64>>) -> PyResult<()> {
474        let x_array = numpy_to_ndarray2(x)?;
475        let y_array = numpy_to_ndarray1(y)?;
476
477        // Convert y to integer array for BaggingClassifier (Fit<Array2<Float>, Array1<Int>>)
478        let y_int: Vec<i32> = y_array.iter().map(|&val| val as i32).collect();
479        let y_int_array = Array1::from_vec(y_int);
480
481        let model = self.inner.take().ok_or_else(|| {
482            PyRuntimeError::new_err("Model has already been fitted or was not initialized")
483        })?;
484
485        match model.fit(&x_array, &y_int_array) {
486            Ok(trained_model) => {
487                self.trained = Some(trained_model);
488                Ok(())
489            }
490            Err(e) => Err(PyRuntimeError::new_err(format!(
491                "Failed to fit model: {}",
492                e
493            ))),
494        }
495    }
496
497    /// Make predictions using the fitted model
498    fn predict<'py>(
499        &self,
500        py: Python<'py>,
501        x: &Bound<'py, PyArray2<f64>>,
502    ) -> PyResult<Py<PyArray1<f64>>> {
503        let trained_model = self.trained.as_ref().ok_or_else(|| {
504            PyRuntimeError::new_err("Model must be fitted before making predictions")
505        })?;
506
507        let x_array = numpy_to_ndarray2(x)?;
508
509        let predictions: Array1<i32> =
510            Predict::<Array2<f64>, Array1<i32>>::predict(trained_model, &x_array)
511                .map_err(|e| PyRuntimeError::new_err(format!("Prediction failed: {}", e)))?;
512        // Convert i32 predictions to f64
513        let predictions_f64: Vec<f64> = predictions.iter().map(|&v| v as f64).collect();
514        Ok(PyArray1::from_vec(py, predictions_f64).unbind())
515    }
516
517    fn __repr__(&self) -> String {
518        if self.trained.is_some() {
519            "BaggingClassifier(fitted=True)".to_string()
520        } else {
521            "BaggingClassifier(fitted=False)".to_string()
522        }
523    }
524}