Skip to main content

sklears_python/
ensemble.rs

1//! Python bindings for ensemble methods
2//!
3//! This module provides PyO3-based Python bindings for sklears ensemble algorithms.
4//! It includes implementations for Gradient Boosting, AdaBoost, Voting, and Stacking classifiers.
5
6use crate::utils::{numpy_to_ndarray1, numpy_to_ndarray2};
7use numpy::{IntoPyArray, PyArray1, PyArray2};
8use pyo3::exceptions::{PyRuntimeError, PyValueError};
9use pyo3::prelude::*;
10use pyo3::types::PyList;
11use scirs2_core::ndarray::{Array1, Array2};
12use sklears_core::traits::{Fit, Predict, Trained, Untrained};
13use sklears_ensemble::gradient_boosting::{
14    TrainedGradientBoostingClassifier, TrainedGradientBoostingRegressor,
15};
16use sklears_ensemble::{
17    AdaBoostClassifier, BaggingClassifier, GradientBoostingClassifier, GradientBoostingConfig,
18    GradientBoostingRegressor, LossFunction, VotingClassifier, VotingClassifierConfig,
19    VotingStrategy,
20};
21
22/// Python wrapper for GradientBoostingClassifier
23#[pyclass(name = "GradientBoostingClassifier")]
24pub struct PyGradientBoostingClassifier {
25    inner: Option<GradientBoostingClassifier>,
26    trained: Option<TrainedGradientBoostingClassifier>,
27}
28
29#[pymethods]
30impl PyGradientBoostingClassifier {
31    #[new]
32    #[pyo3(signature = (
33        n_estimators=100,
34        learning_rate=0.1,
35        max_depth=3,
36        min_samples_split=2,
37        min_samples_leaf=1,
38        subsample=1.0,
39        loss="squared_loss",
40        random_state=None,
41        validation_fraction=0.1
42    ))]
43    fn new(
44        n_estimators: usize,
45        learning_rate: f64,
46        max_depth: usize,
47        min_samples_split: usize,
48        min_samples_leaf: usize,
49        subsample: f64,
50        loss: &str,
51        random_state: Option<u64>,
52        validation_fraction: f64,
53    ) -> PyResult<Self> {
54        let loss_function = match loss {
55            "squared_loss" => LossFunction::SquaredLoss,
56            "absolute_loss" => LossFunction::AbsoluteLoss,
57            "huber" => LossFunction::HuberLoss,
58            "quantile" => LossFunction::QuantileLoss,
59            "logistic" => LossFunction::LogisticLoss,
60            "deviance" => LossFunction::DevianceLoss,
61            "exponential" => LossFunction::ExponentialLoss,
62            _ => {
63                return Err(PyValueError::new_err(format!(
64                    "Unknown loss function: {}",
65                    loss
66                )))
67            }
68        };
69
70        let config = GradientBoostingConfig {
71            n_estimators,
72            learning_rate,
73            max_depth,
74            min_samples_split,
75            min_samples_leaf,
76            subsample,
77            loss_function,
78            random_state,
79            validation_fraction,
80            ..Default::default()
81        };
82
83        Ok(Self {
84            inner: Some(GradientBoostingClassifier::new(config)),
85            trained: None,
86        })
87    }
88
89    /// Fit the gradient boosting classifier
90    fn fit(&mut self, x: &Bound<'_, PyArray2<f64>>, y: &Bound<'_, PyArray1<f64>>) -> PyResult<()> {
91        let x_array = numpy_to_ndarray2(x)?;
92        let y_array = numpy_to_ndarray1(y)?;
93
94        let model = self.inner.take().ok_or_else(|| {
95            PyRuntimeError::new_err("Model has already been fitted or was not initialized")
96        })?;
97
98        match model.fit(&x_array, &y_array) {
99            Ok(trained_model) => {
100                self.trained = Some(trained_model);
101                Ok(())
102            }
103            Err(e) => Err(PyRuntimeError::new_err(format!(
104                "Failed to fit model: {}",
105                e
106            ))),
107        }
108    }
109
110    /// Make predictions using the fitted model
111    fn predict<'py>(
112        &self,
113        py: Python<'py>,
114        x: &Bound<'py, PyArray2<f64>>,
115    ) -> PyResult<Py<PyArray1<f64>>> {
116        let trained_model = self.trained.as_ref().ok_or_else(|| {
117            PyRuntimeError::new_err("Model must be fitted before making predictions")
118        })?;
119
120        let x_array = numpy_to_ndarray2(x)?;
121
122        let predictions: Array1<f64> =
123            Predict::<Array2<f64>, Array1<f64>>::predict(trained_model, &x_array)
124                .map_err(|e| PyRuntimeError::new_err(format!("Prediction failed: {}", e)))?;
125        Ok(predictions.into_pyarray(py).unbind())
126    }
127
128    /// Get feature importances
129    fn feature_importances_<'py>(&self, py: Python<'py>) -> PyResult<Py<PyArray1<f64>>> {
130        let trained_model = self.trained.as_ref().ok_or_else(|| {
131            PyRuntimeError::new_err("Model must be fitted before accessing feature importances")
132        })?;
133
134        let importances = trained_model.feature_importances_gain();
135        Ok(importances.clone().into_pyarray(py).unbind())
136    }
137
138    fn __repr__(&self) -> String {
139        if self.trained.is_some() {
140            "GradientBoostingClassifier(fitted=True)".to_string()
141        } else {
142            "GradientBoostingClassifier(fitted=False)".to_string()
143        }
144    }
145}
146
147/// Python wrapper for GradientBoostingRegressor
148#[pyclass(name = "GradientBoostingRegressor")]
149pub struct PyGradientBoostingRegressor {
150    inner: Option<GradientBoostingRegressor>,
151    trained: Option<TrainedGradientBoostingRegressor>,
152}
153
154#[pymethods]
155impl PyGradientBoostingRegressor {
156    #[new]
157    #[pyo3(signature = (
158        n_estimators=100,
159        learning_rate=0.1,
160        max_depth=3,
161        min_samples_split=2,
162        min_samples_leaf=1,
163        subsample=1.0,
164        loss="squared_loss",
165        random_state=None,
166        validation_fraction=0.1
167    ))]
168    fn new(
169        n_estimators: usize,
170        learning_rate: f64,
171        max_depth: usize,
172        min_samples_split: usize,
173        min_samples_leaf: usize,
174        subsample: f64,
175        loss: &str,
176        random_state: Option<u64>,
177        validation_fraction: f64,
178    ) -> PyResult<Self> {
179        let loss_function = match loss {
180            "squared_loss" => LossFunction::SquaredLoss,
181            "absolute_loss" => LossFunction::AbsoluteLoss,
182            "huber" => LossFunction::HuberLoss,
183            "quantile" => LossFunction::QuantileLoss,
184            _ => {
185                return Err(PyValueError::new_err(format!(
186                    "Unknown loss function for regression: {}",
187                    loss
188                )))
189            }
190        };
191
192        let config = GradientBoostingConfig {
193            n_estimators,
194            learning_rate,
195            max_depth,
196            min_samples_split,
197            min_samples_leaf,
198            subsample,
199            loss_function,
200            random_state,
201            validation_fraction,
202            ..Default::default()
203        };
204
205        Ok(Self {
206            inner: Some(GradientBoostingRegressor::new(config)),
207            trained: None,
208        })
209    }
210
211    /// Fit the gradient boosting regressor
212    fn fit(&mut self, x: &Bound<'_, PyArray2<f64>>, y: &Bound<'_, PyArray1<f64>>) -> PyResult<()> {
213        let x_array = numpy_to_ndarray2(x)?;
214        let y_array = numpy_to_ndarray1(y)?;
215
216        let model = self.inner.take().ok_or_else(|| {
217            PyRuntimeError::new_err("Model has already been fitted or was not initialized")
218        })?;
219
220        match model.fit(&x_array, &y_array) {
221            Ok(trained_model) => {
222                self.trained = Some(trained_model);
223                Ok(())
224            }
225            Err(e) => Err(PyRuntimeError::new_err(format!(
226                "Failed to fit model: {}",
227                e
228            ))),
229        }
230    }
231
232    /// Make predictions using the fitted model
233    fn predict<'py>(
234        &self,
235        py: Python<'py>,
236        x: &Bound<'py, PyArray2<f64>>,
237    ) -> PyResult<Py<PyArray1<f64>>> {
238        let trained_model = self.trained.as_ref().ok_or_else(|| {
239            PyRuntimeError::new_err("Model must be fitted before making predictions")
240        })?;
241
242        let x_array = numpy_to_ndarray2(x)?;
243
244        let predictions: Array1<f64> =
245            Predict::<Array2<f64>, Array1<f64>>::predict(trained_model, &x_array)
246                .map_err(|e| PyRuntimeError::new_err(format!("Prediction failed: {}", e)))?;
247        Ok(predictions.into_pyarray(py).unbind())
248    }
249
250    fn __repr__(&self) -> String {
251        if self.trained.is_some() {
252            "GradientBoostingRegressor(fitted=True)".to_string()
253        } else {
254            "GradientBoostingRegressor(fitted=False)".to_string()
255        }
256    }
257}
258
259/// Python wrapper for AdaBoost Classifier
260#[pyclass(name = "AdaBoostClassifier")]
261pub struct PyAdaBoostClassifier {
262    inner: Option<AdaBoostClassifier<Untrained>>,
263    trained: Option<AdaBoostClassifier<Trained>>,
264}
265
266#[pymethods]
267impl PyAdaBoostClassifier {
268    #[new]
269    #[pyo3(signature = (n_estimators=50, learning_rate=1.0, random_state=None))]
270    fn new(n_estimators: usize, learning_rate: f64, random_state: Option<u64>) -> PyResult<Self> {
271        let mut model = AdaBoostClassifier::new()
272            .n_estimators(n_estimators)
273            .learning_rate(learning_rate);
274
275        if let Some(seed) = random_state {
276            model = model.random_state(seed);
277        }
278
279        Ok(Self {
280            inner: Some(model),
281            trained: None,
282        })
283    }
284
285    /// Fit the AdaBoost classifier
286    fn fit(&mut self, x: &Bound<'_, PyArray2<f64>>, y: &Bound<'_, PyArray1<f64>>) -> PyResult<()> {
287        let x_array = numpy_to_ndarray2(x)?;
288        let y_array = numpy_to_ndarray1(y)?;
289
290        let model = self.inner.take().ok_or_else(|| {
291            PyRuntimeError::new_err("Model has already been fitted or was not initialized")
292        })?;
293
294        match model.fit(&x_array, &y_array) {
295            Ok(trained_model) => {
296                self.trained = Some(trained_model);
297                Ok(())
298            }
299            Err(e) => Err(PyRuntimeError::new_err(format!(
300                "Failed to fit model: {}",
301                e
302            ))),
303        }
304    }
305
306    /// Make predictions using the fitted model
307    fn predict<'py>(
308        &self,
309        py: Python<'py>,
310        x: &Bound<'py, PyArray2<f64>>,
311    ) -> PyResult<Py<PyArray1<f64>>> {
312        let trained_model = self.trained.as_ref().ok_or_else(|| {
313            PyRuntimeError::new_err("Model must be fitted before making predictions")
314        })?;
315
316        let x_array = numpy_to_ndarray2(x)?;
317
318        let predictions: Array1<f64> =
319            Predict::<Array2<f64>, Array1<f64>>::predict(trained_model, &x_array)
320                .map_err(|e| PyRuntimeError::new_err(format!("Prediction failed: {}", e)))?;
321        Ok(predictions.into_pyarray(py).unbind())
322    }
323
324    fn __repr__(&self) -> String {
325        if self.trained.is_some() {
326            "AdaBoostClassifier(fitted=True)".to_string()
327        } else {
328            "AdaBoostClassifier(fitted=False)".to_string()
329        }
330    }
331}
332
333/// Python wrapper for Voting Classifier
334#[pyclass(name = "VotingClassifier")]
335pub struct PyVotingClassifier {
336    inner: Option<VotingClassifier<Untrained>>,
337    trained: Option<VotingClassifier<Trained>>,
338}
339
340#[pymethods]
341impl PyVotingClassifier {
342    #[new]
343    #[pyo3(signature = (_estimators, voting="hard", weights=None))]
344    fn new(
345        _estimators: &Bound<'_, PyList>,
346        voting: &str,
347        weights: Option<Vec<f64>>,
348    ) -> PyResult<Self> {
349        let voting_strategy = match voting {
350            "hard" => VotingStrategy::Hard,
351            "soft" => VotingStrategy::Soft,
352            _ => {
353                return Err(PyValueError::new_err(format!(
354                    "Unknown voting strategy: {}",
355                    voting
356                )))
357            }
358        };
359
360        let config = VotingClassifierConfig {
361            voting: voting_strategy,
362            weights,
363            ..Default::default()
364        };
365
366        Ok(Self {
367            inner: Some(VotingClassifier::new(config)),
368            trained: None,
369        })
370    }
371
372    /// Fit the voting classifier
373    fn fit(&mut self, x: &Bound<'_, PyArray2<f64>>, y: &Bound<'_, PyArray1<f64>>) -> PyResult<()> {
374        let x_array = numpy_to_ndarray2(x)?;
375        let y_array = numpy_to_ndarray1(y)?;
376
377        let model = self.inner.take().ok_or_else(|| {
378            PyRuntimeError::new_err("Model has already been fitted or was not initialized")
379        })?;
380
381        match model.fit(&x_array, &y_array) {
382            Ok(trained_model) => {
383                self.trained = Some(trained_model);
384                Ok(())
385            }
386            Err(e) => Err(PyRuntimeError::new_err(format!(
387                "Failed to fit model: {}",
388                e
389            ))),
390        }
391    }
392
393    /// Make predictions using the fitted model
394    fn predict<'py>(
395        &self,
396        py: Python<'py>,
397        x: &Bound<'py, PyArray2<f64>>,
398    ) -> PyResult<Py<PyArray1<f64>>> {
399        let trained_model = self.trained.as_ref().ok_or_else(|| {
400            PyRuntimeError::new_err("Model must be fitted before making predictions")
401        })?;
402
403        let x_array = numpy_to_ndarray2(x)?;
404
405        let predictions: Array1<f64> =
406            Predict::<Array2<f64>, Array1<f64>>::predict(trained_model, &x_array)
407                .map_err(|e| PyRuntimeError::new_err(format!("Prediction failed: {}", e)))?;
408        Ok(predictions.into_pyarray(py).unbind())
409    }
410
411    fn __repr__(&self) -> String {
412        if self.trained.is_some() {
413            "VotingClassifier(fitted=True)".to_string()
414        } else {
415            "VotingClassifier(fitted=False)".to_string()
416        }
417    }
418}
419
420/// Python wrapper for Bagging Classifier
421#[pyclass(name = "BaggingClassifier")]
422pub struct PyBaggingClassifier {
423    inner: Option<BaggingClassifier<Untrained>>,
424    trained: Option<BaggingClassifier<Trained>>,
425}
426
427#[pymethods]
428impl PyBaggingClassifier {
429    #[new]
430    #[pyo3(signature = (
431        n_estimators=10,
432        max_samples=None,
433        max_features=None,
434        bootstrap=true,
435        bootstrap_features=false,
436        random_state=None
437    ))]
438    fn new(
439        n_estimators: usize,
440        max_samples: Option<usize>,
441        max_features: Option<usize>,
442        bootstrap: bool,
443        bootstrap_features: bool,
444        random_state: Option<u64>,
445    ) -> PyResult<Self> {
446        let mut model = BaggingClassifier::new()
447            .n_estimators(n_estimators)
448            .bootstrap(bootstrap)
449            .bootstrap_features(bootstrap_features);
450
451        if let Some(samples) = max_samples {
452            model = model.max_samples(Some(samples));
453        }
454
455        if let Some(features) = max_features {
456            model = model.max_features(Some(features));
457        }
458
459        if let Some(seed) = random_state {
460            model = model.random_state(seed);
461        }
462
463        Ok(Self {
464            inner: Some(model),
465            trained: None,
466        })
467    }
468
469    /// Fit the bagging classifier
470    fn fit(&mut self, x: &Bound<'_, PyArray2<f64>>, y: &Bound<'_, PyArray1<f64>>) -> PyResult<()> {
471        let x_array = numpy_to_ndarray2(x)?;
472        let y_array = numpy_to_ndarray1(y)?;
473
474        // Convert y to integer array for BaggingClassifier (Fit<Array2<Float>, Array1<Int>>)
475        let y_int: Vec<i32> = y_array.iter().map(|&val| val as i32).collect();
476        let y_int_array = Array1::from_vec(y_int);
477
478        let model = self.inner.take().ok_or_else(|| {
479            PyRuntimeError::new_err("Model has already been fitted or was not initialized")
480        })?;
481
482        match model.fit(&x_array, &y_int_array) {
483            Ok(trained_model) => {
484                self.trained = Some(trained_model);
485                Ok(())
486            }
487            Err(e) => Err(PyRuntimeError::new_err(format!(
488                "Failed to fit model: {}",
489                e
490            ))),
491        }
492    }
493
494    /// Make predictions using the fitted model
495    fn predict<'py>(
496        &self,
497        py: Python<'py>,
498        x: &Bound<'py, PyArray2<f64>>,
499    ) -> PyResult<Py<PyArray1<f64>>> {
500        let trained_model = self.trained.as_ref().ok_or_else(|| {
501            PyRuntimeError::new_err("Model must be fitted before making predictions")
502        })?;
503
504        let x_array = numpy_to_ndarray2(x)?;
505
506        let predictions: Array1<i32> =
507            Predict::<Array2<f64>, Array1<i32>>::predict(trained_model, &x_array)
508                .map_err(|e| PyRuntimeError::new_err(format!("Prediction failed: {}", e)))?;
509        // Convert i32 predictions to f64
510        let predictions_f64: Vec<f64> = predictions.iter().map(|&v| v as f64).collect();
511        Ok(PyArray1::from_vec(py, predictions_f64).unbind())
512    }
513
514    fn __repr__(&self) -> String {
515        if self.trained.is_some() {
516            "BaggingClassifier(fitted=True)".to_string()
517        } else {
518            "BaggingClassifier(fitted=False)".to_string()
519        }
520    }
521}