Skip to main content

sklears_python/
tree.rs

1//! Python bindings for tree-based algorithms
2//!
3//! This module provides PyO3-based Python bindings for sklears tree algorithms,
4//! including Decision Trees, Random Forest, and Extra Trees.
5
6use crate::utils::{numpy_to_ndarray1, numpy_to_ndarray2};
7use numpy::{IntoPyArray, PyArray1, PyArray2};
8use pyo3::exceptions::{PyRuntimeError, PyValueError};
9use pyo3::prelude::*;
10use scirs2_core::ndarray::{Array1, Array2};
11use sklears_core::traits::{Fit, Predict, Trained};
12use sklears_tree::random_forest::RandomForestRegressor;
13use sklears_tree::{DecisionTree, MaxFeatures, RandomForestClassifier, SplitCriterion};
14
15/// Python wrapper for Decision Tree Classifier
16#[pyclass(name = "DecisionTreeClassifier")]
17pub struct PyDecisionTreeClassifier {
18    inner: Option<DecisionTree>,
19    trained: Option<DecisionTree<Trained>>,
20}
21
22#[pymethods]
23impl PyDecisionTreeClassifier {
24    #[new]
25    #[allow(clippy::too_many_arguments)]
26    #[pyo3(signature = (
27        criterion="gini",
28        _splitter="best",
29        max_depth=None,
30        min_samples_split=2,
31        min_samples_leaf=1,
32        _min_weight_fraction_leaf=0.0,
33        _max_features=None,
34        random_state=None,
35        _max_leaf_nodes=None,
36        _min_impurity_decrease=0.0,
37        _class_weight=None,
38        _ccp_alpha=0.0
39    ))]
40    fn new(
41        criterion: &str,
42        _splitter: &str,
43        max_depth: Option<usize>,
44        min_samples_split: usize,
45        min_samples_leaf: usize,
46        _min_weight_fraction_leaf: f64,
47        _max_features: Option<&str>,
48        random_state: Option<u64>,
49        _max_leaf_nodes: Option<usize>,
50        _min_impurity_decrease: f64,
51        _class_weight: Option<&str>,
52        _ccp_alpha: f64,
53    ) -> PyResult<Self> {
54        let split_criterion = match criterion {
55            "gini" => SplitCriterion::Gini,
56            "entropy" => SplitCriterion::Entropy,
57            "log_loss" => SplitCriterion::LogLoss,
58            _ => {
59                return Err(PyValueError::new_err(format!(
60                    "Unknown criterion: {}",
61                    criterion
62                )))
63            }
64        };
65
66        let mut tree = DecisionTree::new()
67            .criterion(split_criterion)
68            .min_samples_split(min_samples_split)
69            .min_samples_leaf(min_samples_leaf);
70
71        if let Some(depth) = max_depth {
72            tree = tree.max_depth(depth);
73        }
74
75        if let Some(seed) = random_state {
76            tree = tree.random_state(Some(seed));
77        }
78
79        Ok(Self {
80            inner: Some(tree),
81            trained: None,
82        })
83    }
84
85    /// Fit the decision tree classifier
86    fn fit<'py>(
87        &mut self,
88        x: &Bound<'py, PyArray2<f64>>,
89        y: &Bound<'py, PyArray1<f64>>,
90    ) -> PyResult<()> {
91        let x_array = numpy_to_ndarray2(x)?;
92        let y_array = numpy_to_ndarray1(y)?;
93
94        let model = self.inner.take().ok_or_else(|| {
95            PyRuntimeError::new_err("Model has already been fitted or was not initialized")
96        })?;
97
98        match model.fit(&x_array, &y_array) {
99            Ok(trained_model) => {
100                self.trained = Some(trained_model);
101                Ok(())
102            }
103            Err(e) => Err(PyRuntimeError::new_err(format!(
104                "Failed to fit model: {}",
105                e
106            ))),
107        }
108    }
109
110    /// Make predictions using the fitted model
111    fn predict<'py>(
112        &self,
113        py: Python<'py>,
114        x: &Bound<'py, PyArray2<f64>>,
115    ) -> PyResult<Py<PyArray1<f64>>> {
116        let trained_model = self.trained.as_ref().ok_or_else(|| {
117            PyRuntimeError::new_err("Model must be fitted before making predictions")
118        })?;
119
120        let x_array = numpy_to_ndarray2(x)?;
121
122        let predictions: Array1<f64> =
123            Predict::<Array2<f64>, Array1<f64>>::predict(trained_model, &x_array)
124                .map_err(|e| PyRuntimeError::new_err(format!("Prediction failed: {}", e)))?;
125        Ok(predictions.into_pyarray(py).unbind())
126    }
127
128    /// Get feature importances
129    fn feature_importances_<'py>(&self, py: Python<'py>) -> PyResult<Py<PyArray1<f64>>> {
130        let trained_model = self.trained.as_ref().ok_or_else(|| {
131            PyRuntimeError::new_err("Model must be fitted before accessing feature importances")
132        })?;
133
134        match trained_model.feature_importances() {
135            Some(importances) => Ok(importances.clone().into_pyarray(py).unbind()),
136            None => Err(PyRuntimeError::new_err("Feature importances not available")),
137        }
138    }
139
140    fn __repr__(&self) -> String {
141        if self.trained.is_some() {
142            "DecisionTreeClassifier(fitted=True)".to_string()
143        } else {
144            "DecisionTreeClassifier(fitted=False)".to_string()
145        }
146    }
147}
148
149/// Python wrapper for Decision Tree Regressor
150#[pyclass(name = "DecisionTreeRegressor")]
151pub struct PyDecisionTreeRegressor {
152    inner: Option<DecisionTree>,
153    trained: Option<DecisionTree<Trained>>,
154}
155
156#[pymethods]
157impl PyDecisionTreeRegressor {
158    #[new]
159    #[allow(clippy::too_many_arguments)]
160    #[pyo3(signature = (
161        criterion="squared_error",
162        _splitter="best",
163        max_depth=None,
164        min_samples_split=2,
165        min_samples_leaf=1,
166        _min_weight_fraction_leaf=0.0,
167        _max_features=None,
168        random_state=None,
169        _max_leaf_nodes=None,
170        _min_impurity_decrease=0.0,
171        _ccp_alpha=0.0
172    ))]
173    fn new(
174        criterion: &str,
175        _splitter: &str,
176        max_depth: Option<usize>,
177        min_samples_split: usize,
178        min_samples_leaf: usize,
179        _min_weight_fraction_leaf: f64,
180        _max_features: Option<&str>,
181        random_state: Option<u64>,
182        _max_leaf_nodes: Option<usize>,
183        _min_impurity_decrease: f64,
184        _ccp_alpha: f64,
185    ) -> PyResult<Self> {
186        let split_criterion = match criterion {
187            "squared_error" | "mse" => SplitCriterion::MSE,
188            "mae" | "absolute_error" => SplitCriterion::MAE,
189            _ => {
190                return Err(PyValueError::new_err(format!(
191                    "Unknown criterion: {}",
192                    criterion
193                )))
194            }
195        };
196
197        let mut tree = DecisionTree::new()
198            .criterion(split_criterion)
199            .min_samples_split(min_samples_split)
200            .min_samples_leaf(min_samples_leaf);
201
202        if let Some(depth) = max_depth {
203            tree = tree.max_depth(depth);
204        }
205
206        if let Some(seed) = random_state {
207            tree = tree.random_state(Some(seed));
208        }
209
210        Ok(Self {
211            inner: Some(tree),
212            trained: None,
213        })
214    }
215
216    /// Fit the decision tree regressor
217    fn fit(&mut self, x: &Bound<'_, PyArray2<f64>>, y: &Bound<'_, PyArray1<f64>>) -> PyResult<()> {
218        let x_array = numpy_to_ndarray2(x)?;
219        let y_array = numpy_to_ndarray1(y)?;
220
221        let model = self.inner.take().ok_or_else(|| {
222            PyRuntimeError::new_err("Model has already been fitted or was not initialized")
223        })?;
224
225        match model.fit(&x_array, &y_array) {
226            Ok(trained_model) => {
227                self.trained = Some(trained_model);
228                Ok(())
229            }
230            Err(e) => Err(PyRuntimeError::new_err(format!(
231                "Failed to fit model: {}",
232                e
233            ))),
234        }
235    }
236
237    /// Make predictions using the fitted model
238    fn predict<'py>(
239        &self,
240        py: Python<'py>,
241        x: &Bound<'py, PyArray2<f64>>,
242    ) -> PyResult<Py<PyArray1<f64>>> {
243        let trained_model = self.trained.as_ref().ok_or_else(|| {
244            PyRuntimeError::new_err("Model must be fitted before making predictions")
245        })?;
246
247        let x_array = numpy_to_ndarray2(x)?;
248
249        let predictions: Array1<f64> =
250            Predict::<Array2<f64>, Array1<f64>>::predict(trained_model, &x_array)
251                .map_err(|e| PyRuntimeError::new_err(format!("Prediction failed: {}", e)))?;
252        Ok(predictions.into_pyarray(py).unbind())
253    }
254
255    /// Get feature importances
256    fn feature_importances_<'py>(&self, py: Python<'py>) -> PyResult<Py<PyArray1<f64>>> {
257        let trained_model = self.trained.as_ref().ok_or_else(|| {
258            PyRuntimeError::new_err("Model must be fitted before accessing feature importances")
259        })?;
260
261        match trained_model.feature_importances() {
262            Some(importances) => Ok(importances.clone().into_pyarray(py).unbind()),
263            None => Err(PyRuntimeError::new_err("Feature importances not available")),
264        }
265    }
266
267    fn __repr__(&self) -> String {
268        if self.trained.is_some() {
269            "DecisionTreeRegressor(fitted=True)".to_string()
270        } else {
271            "DecisionTreeRegressor(fitted=False)".to_string()
272        }
273    }
274}
275
276/// Python wrapper for Random Forest Classifier
277#[pyclass(name = "RandomForestClassifier")]
278pub struct PyRandomForestClassifier {
279    inner: Option<RandomForestClassifier>,
280    trained: Option<RandomForestClassifier<Trained>>,
281}
282
283#[pymethods]
284impl PyRandomForestClassifier {
285    #[new]
286    #[allow(clippy::too_many_arguments)]
287    #[pyo3(signature = (
288        n_estimators=100,
289        criterion="gini",
290        max_depth=None,
291        min_samples_split=2,
292        min_samples_leaf=1,
293        _min_weight_fraction_leaf=0.0,
294        max_features="sqrt",
295        _max_leaf_nodes=None,
296        _min_impurity_decrease=0.0,
297        bootstrap=true,
298        _oob_score=false,
299        n_jobs=None,
300        random_state=None,
301        _verbose=0,
302        _warm_start=false,
303        _class_weight=None,
304        _ccp_alpha=0.0,
305        _max_samples=None
306    ))]
307    fn new(
308        n_estimators: usize,
309        criterion: &str,
310        max_depth: Option<usize>,
311        min_samples_split: usize,
312        min_samples_leaf: usize,
313        _min_weight_fraction_leaf: f64,
314        max_features: &str,
315        _max_leaf_nodes: Option<usize>,
316        _min_impurity_decrease: f64,
317        bootstrap: bool,
318        _oob_score: bool,
319        n_jobs: Option<i32>,
320        random_state: Option<u64>,
321        _verbose: i32,
322        _warm_start: bool,
323        _class_weight: Option<&str>,
324        _ccp_alpha: f64,
325        _max_samples: Option<f64>,
326    ) -> PyResult<Self> {
327        let split_criterion = match criterion {
328            "gini" => SplitCriterion::Gini,
329            "entropy" => SplitCriterion::Entropy,
330            "log_loss" => SplitCriterion::LogLoss,
331            _ => {
332                return Err(PyValueError::new_err(format!(
333                    "Unknown criterion: {}",
334                    criterion
335                )))
336            }
337        };
338
339        let max_features_strategy = match max_features {
340            "auto" | "sqrt" => MaxFeatures::Sqrt,
341            "log2" => MaxFeatures::Log2,
342            _ => {
343                return Err(PyValueError::new_err(format!(
344                    "Unknown max_features: {}",
345                    max_features
346                )))
347            }
348        };
349
350        let mut forest = RandomForestClassifier::new()
351            .n_estimators(n_estimators)
352            .criterion(split_criterion)
353            .min_samples_split(min_samples_split)
354            .min_samples_leaf(min_samples_leaf)
355            .max_features(max_features_strategy)
356            .bootstrap(bootstrap);
357
358        if let Some(depth) = max_depth {
359            forest = forest.max_depth(depth);
360        }
361
362        if let Some(seed) = random_state {
363            forest = forest.random_state(seed);
364        }
365
366        if let Some(jobs) = n_jobs {
367            forest = forest.n_jobs(jobs);
368        }
369
370        Ok(Self {
371            inner: Some(forest),
372            trained: None,
373        })
374    }
375
376    /// Fit the random forest classifier
377    fn fit(&mut self, x: &Bound<'_, PyArray2<f64>>, y: &Bound<'_, PyArray1<f64>>) -> PyResult<()> {
378        let x_array = numpy_to_ndarray2(x)?;
379        let y_array = numpy_to_ndarray1(y)?;
380
381        let y_int: Array1<i32> = y_array.mapv(|val| val as i32);
382
383        let model = self.inner.take().ok_or_else(|| {
384            PyRuntimeError::new_err("Model has already been fitted or was not initialized")
385        })?;
386
387        match model.fit(&x_array, &y_int) {
388            Ok(trained_model) => {
389                self.trained = Some(trained_model);
390                Ok(())
391            }
392            Err(e) => Err(PyRuntimeError::new_err(format!(
393                "Failed to fit model: {}",
394                e
395            ))),
396        }
397    }
398
399    /// Make predictions using the fitted model
400    fn predict<'py>(
401        &self,
402        py: Python<'py>,
403        x: &Bound<'py, PyArray2<f64>>,
404    ) -> PyResult<Py<PyArray1<f64>>> {
405        let trained_model = self.trained.as_ref().ok_or_else(|| {
406            PyRuntimeError::new_err("Model must be fitted before making predictions")
407        })?;
408
409        let x_array = numpy_to_ndarray2(x)?;
410
411        let predictions: Array1<i32> =
412            Predict::<Array2<f64>, Array1<i32>>::predict(trained_model, &x_array)
413                .map_err(|e| PyRuntimeError::new_err(format!("Prediction failed: {}", e)))?;
414        let predictions_f64: Vec<f64> = predictions.iter().map(|&v| v as f64).collect();
415        Ok(PyArray1::from_vec(py, predictions_f64).unbind())
416    }
417
418    /// Get feature importances
419    fn feature_importances_<'py>(&self, py: Python<'py>) -> PyResult<Py<PyArray1<f64>>> {
420        let trained_model = self.trained.as_ref().ok_or_else(|| {
421            PyRuntimeError::new_err("Model must be fitted before accessing feature importances")
422        })?;
423
424        match trained_model.feature_importances() {
425            Ok(importances) => Ok(importances.into_pyarray(py).unbind()),
426            Err(e) => Err(PyRuntimeError::new_err(format!(
427                "Failed to compute feature importances: {}",
428                e
429            ))),
430        }
431    }
432
433    fn __repr__(&self) -> String {
434        if self.trained.is_some() {
435            "RandomForestClassifier(fitted=True)".to_string()
436        } else {
437            "RandomForestClassifier(fitted=False)".to_string()
438        }
439    }
440}
441
442/// Python wrapper for Random Forest Regressor
443#[pyclass(name = "RandomForestRegressor")]
444pub struct PyRandomForestRegressor {
445    inner: Option<RandomForestRegressor>,
446    trained: Option<RandomForestRegressor<Trained>>,
447}
448
449#[pymethods]
450impl PyRandomForestRegressor {
451    #[new]
452    #[allow(clippy::too_many_arguments)]
453    #[pyo3(signature = (
454        n_estimators=100,
455        criterion="squared_error",
456        max_depth=None,
457        min_samples_split=2,
458        min_samples_leaf=1,
459        _min_weight_fraction_leaf=0.0,
460        max_features=1.0,
461        _max_leaf_nodes=None,
462        _min_impurity_decrease=0.0,
463        bootstrap=true,
464        _oob_score=false,
465        n_jobs=None,
466        random_state=None,
467        _verbose=0,
468        _warm_start=false,
469        _ccp_alpha=0.0,
470        _max_samples=None
471    ))]
472    fn new(
473        n_estimators: usize,
474        criterion: &str,
475        max_depth: Option<usize>,
476        min_samples_split: usize,
477        min_samples_leaf: usize,
478        _min_weight_fraction_leaf: f64,
479        max_features: f64,
480        _max_leaf_nodes: Option<usize>,
481        _min_impurity_decrease: f64,
482        bootstrap: bool,
483        _oob_score: bool,
484        n_jobs: Option<i32>,
485        random_state: Option<u64>,
486        _verbose: i32,
487        _warm_start: bool,
488        _ccp_alpha: f64,
489        _max_samples: Option<f64>,
490    ) -> PyResult<Self> {
491        let split_criterion = match criterion {
492            "squared_error" | "mse" => SplitCriterion::MSE,
493            "mae" | "absolute_error" => SplitCriterion::MAE,
494            _ => {
495                return Err(PyValueError::new_err(format!(
496                    "Unknown criterion: {}",
497                    criterion
498                )))
499            }
500        };
501
502        let max_features_strategy = if (max_features - 1.0).abs() < f64::EPSILON {
503            MaxFeatures::All
504        } else {
505            MaxFeatures::Fraction(max_features)
506        };
507
508        let mut forest = RandomForestRegressor::new()
509            .n_estimators(n_estimators)
510            .criterion(split_criterion)
511            .min_samples_split(min_samples_split)
512            .min_samples_leaf(min_samples_leaf)
513            .max_features(max_features_strategy)
514            .bootstrap(bootstrap);
515
516        if let Some(depth) = max_depth {
517            forest = forest.max_depth(depth);
518        }
519
520        if let Some(seed) = random_state {
521            forest = forest.random_state(seed);
522        }
523
524        if let Some(jobs) = n_jobs {
525            forest = forest.n_jobs(jobs);
526        }
527
528        Ok(Self {
529            inner: Some(forest),
530            trained: None,
531        })
532    }
533
534    /// Fit the random forest regressor
535    fn fit(&mut self, x: &Bound<'_, PyArray2<f64>>, y: &Bound<'_, PyArray1<f64>>) -> PyResult<()> {
536        let x_array = numpy_to_ndarray2(x)?;
537        let y_array = numpy_to_ndarray1(y)?;
538
539        let model = self.inner.take().ok_or_else(|| {
540            PyRuntimeError::new_err("Model has already been fitted or was not initialized")
541        })?;
542
543        match model.fit(&x_array, &y_array) {
544            Ok(trained_model) => {
545                self.trained = Some(trained_model);
546                Ok(())
547            }
548            Err(e) => Err(PyRuntimeError::new_err(format!(
549                "Failed to fit model: {}",
550                e
551            ))),
552        }
553    }
554
555    /// Make predictions using the fitted model
556    fn predict<'py>(
557        &self,
558        py: Python<'py>,
559        x: &Bound<'py, PyArray2<f64>>,
560    ) -> PyResult<Py<PyArray1<f64>>> {
561        let trained_model = self.trained.as_ref().ok_or_else(|| {
562            PyRuntimeError::new_err("Model must be fitted before making predictions")
563        })?;
564
565        let x_array = numpy_to_ndarray2(x)?;
566
567        let predictions: Array1<f64> =
568            Predict::<Array2<f64>, Array1<f64>>::predict(trained_model, &x_array)
569                .map_err(|e| PyRuntimeError::new_err(format!("Prediction failed: {}", e)))?;
570        Ok(predictions.into_pyarray(py).unbind())
571    }
572
573    /// Get feature importances
574    fn feature_importances_<'py>(&self, py: Python<'py>) -> PyResult<Py<PyArray1<f64>>> {
575        let trained_model = self.trained.as_ref().ok_or_else(|| {
576            PyRuntimeError::new_err("Model must be fitted before accessing feature importances")
577        })?;
578
579        match trained_model.feature_importances() {
580            Ok(importances) => Ok(importances.into_pyarray(py).unbind()),
581            Err(e) => Err(PyRuntimeError::new_err(format!(
582                "Failed to compute feature importances: {}",
583                e
584            ))),
585        }
586    }
587
588    fn __repr__(&self) -> String {
589        if self.trained.is_some() {
590            "RandomForestRegressor(fitted=True)".to_string()
591        } else {
592            "RandomForestRegressor(fitted=False)".to_string()
593        }
594    }
595}