1use crate::utils::{numpy_to_ndarray1, numpy_to_ndarray2};
7use numpy::{IntoPyArray, PyArray1, PyArray2};
8use pyo3::exceptions::{PyRuntimeError, PyValueError};
9use pyo3::prelude::*;
10use scirs2_core::ndarray::{Array1, Array2};
11use sklears_core::traits::{Fit, Predict, Trained};
12use sklears_tree::random_forest::RandomForestRegressor;
13use sklears_tree::{DecisionTree, MaxFeatures, RandomForestClassifier, SplitCriterion};
14
15#[pyclass(name = "DecisionTreeClassifier")]
17pub struct PyDecisionTreeClassifier {
18 inner: Option<DecisionTree>,
19 trained: Option<DecisionTree<Trained>>,
20}
21
22#[pymethods]
23impl PyDecisionTreeClassifier {
24 #[new]
25 #[allow(clippy::too_many_arguments)]
26 #[pyo3(signature = (
27 criterion="gini",
28 _splitter="best",
29 max_depth=None,
30 min_samples_split=2,
31 min_samples_leaf=1,
32 _min_weight_fraction_leaf=0.0,
33 _max_features=None,
34 random_state=None,
35 _max_leaf_nodes=None,
36 _min_impurity_decrease=0.0,
37 _class_weight=None,
38 _ccp_alpha=0.0
39 ))]
40 fn new(
41 criterion: &str,
42 _splitter: &str,
43 max_depth: Option<usize>,
44 min_samples_split: usize,
45 min_samples_leaf: usize,
46 _min_weight_fraction_leaf: f64,
47 _max_features: Option<&str>,
48 random_state: Option<u64>,
49 _max_leaf_nodes: Option<usize>,
50 _min_impurity_decrease: f64,
51 _class_weight: Option<&str>,
52 _ccp_alpha: f64,
53 ) -> PyResult<Self> {
54 let split_criterion = match criterion {
55 "gini" => SplitCriterion::Gini,
56 "entropy" => SplitCriterion::Entropy,
57 "log_loss" => SplitCriterion::LogLoss,
58 _ => {
59 return Err(PyValueError::new_err(format!(
60 "Unknown criterion: {}",
61 criterion
62 )))
63 }
64 };
65
66 let mut tree = DecisionTree::new()
67 .criterion(split_criterion)
68 .min_samples_split(min_samples_split)
69 .min_samples_leaf(min_samples_leaf);
70
71 if let Some(depth) = max_depth {
72 tree = tree.max_depth(depth);
73 }
74
75 if let Some(seed) = random_state {
76 tree = tree.random_state(Some(seed));
77 }
78
79 Ok(Self {
80 inner: Some(tree),
81 trained: None,
82 })
83 }
84
85 fn fit<'py>(
87 &mut self,
88 x: &Bound<'py, PyArray2<f64>>,
89 y: &Bound<'py, PyArray1<f64>>,
90 ) -> PyResult<()> {
91 let x_array = numpy_to_ndarray2(x)?;
92 let y_array = numpy_to_ndarray1(y)?;
93
94 let model = self.inner.take().ok_or_else(|| {
95 PyRuntimeError::new_err("Model has already been fitted or was not initialized")
96 })?;
97
98 match model.fit(&x_array, &y_array) {
99 Ok(trained_model) => {
100 self.trained = Some(trained_model);
101 Ok(())
102 }
103 Err(e) => Err(PyRuntimeError::new_err(format!(
104 "Failed to fit model: {}",
105 e
106 ))),
107 }
108 }
109
110 fn predict<'py>(
112 &self,
113 py: Python<'py>,
114 x: &Bound<'py, PyArray2<f64>>,
115 ) -> PyResult<Py<PyArray1<f64>>> {
116 let trained_model = self.trained.as_ref().ok_or_else(|| {
117 PyRuntimeError::new_err("Model must be fitted before making predictions")
118 })?;
119
120 let x_array = numpy_to_ndarray2(x)?;
121
122 let predictions: Array1<f64> =
123 Predict::<Array2<f64>, Array1<f64>>::predict(trained_model, &x_array)
124 .map_err(|e| PyRuntimeError::new_err(format!("Prediction failed: {}", e)))?;
125 Ok(predictions.into_pyarray(py).unbind())
126 }
127
128 fn feature_importances_<'py>(&self, py: Python<'py>) -> PyResult<Py<PyArray1<f64>>> {
130 let trained_model = self.trained.as_ref().ok_or_else(|| {
131 PyRuntimeError::new_err("Model must be fitted before accessing feature importances")
132 })?;
133
134 match trained_model.feature_importances() {
135 Some(importances) => Ok(importances.clone().into_pyarray(py).unbind()),
136 None => Err(PyRuntimeError::new_err("Feature importances not available")),
137 }
138 }
139
140 fn __repr__(&self) -> String {
141 if self.trained.is_some() {
142 "DecisionTreeClassifier(fitted=True)".to_string()
143 } else {
144 "DecisionTreeClassifier(fitted=False)".to_string()
145 }
146 }
147}
148
149#[pyclass(name = "DecisionTreeRegressor")]
151pub struct PyDecisionTreeRegressor {
152 inner: Option<DecisionTree>,
153 trained: Option<DecisionTree<Trained>>,
154}
155
156#[pymethods]
157impl PyDecisionTreeRegressor {
158 #[new]
159 #[allow(clippy::too_many_arguments)]
160 #[pyo3(signature = (
161 criterion="squared_error",
162 _splitter="best",
163 max_depth=None,
164 min_samples_split=2,
165 min_samples_leaf=1,
166 _min_weight_fraction_leaf=0.0,
167 _max_features=None,
168 random_state=None,
169 _max_leaf_nodes=None,
170 _min_impurity_decrease=0.0,
171 _ccp_alpha=0.0
172 ))]
173 fn new(
174 criterion: &str,
175 _splitter: &str,
176 max_depth: Option<usize>,
177 min_samples_split: usize,
178 min_samples_leaf: usize,
179 _min_weight_fraction_leaf: f64,
180 _max_features: Option<&str>,
181 random_state: Option<u64>,
182 _max_leaf_nodes: Option<usize>,
183 _min_impurity_decrease: f64,
184 _ccp_alpha: f64,
185 ) -> PyResult<Self> {
186 let split_criterion = match criterion {
187 "squared_error" | "mse" => SplitCriterion::MSE,
188 "mae" | "absolute_error" => SplitCriterion::MAE,
189 _ => {
190 return Err(PyValueError::new_err(format!(
191 "Unknown criterion: {}",
192 criterion
193 )))
194 }
195 };
196
197 let mut tree = DecisionTree::new()
198 .criterion(split_criterion)
199 .min_samples_split(min_samples_split)
200 .min_samples_leaf(min_samples_leaf);
201
202 if let Some(depth) = max_depth {
203 tree = tree.max_depth(depth);
204 }
205
206 if let Some(seed) = random_state {
207 tree = tree.random_state(Some(seed));
208 }
209
210 Ok(Self {
211 inner: Some(tree),
212 trained: None,
213 })
214 }
215
216 fn fit(&mut self, x: &Bound<'_, PyArray2<f64>>, y: &Bound<'_, PyArray1<f64>>) -> PyResult<()> {
218 let x_array = numpy_to_ndarray2(x)?;
219 let y_array = numpy_to_ndarray1(y)?;
220
221 let model = self.inner.take().ok_or_else(|| {
222 PyRuntimeError::new_err("Model has already been fitted or was not initialized")
223 })?;
224
225 match model.fit(&x_array, &y_array) {
226 Ok(trained_model) => {
227 self.trained = Some(trained_model);
228 Ok(())
229 }
230 Err(e) => Err(PyRuntimeError::new_err(format!(
231 "Failed to fit model: {}",
232 e
233 ))),
234 }
235 }
236
237 fn predict<'py>(
239 &self,
240 py: Python<'py>,
241 x: &Bound<'py, PyArray2<f64>>,
242 ) -> PyResult<Py<PyArray1<f64>>> {
243 let trained_model = self.trained.as_ref().ok_or_else(|| {
244 PyRuntimeError::new_err("Model must be fitted before making predictions")
245 })?;
246
247 let x_array = numpy_to_ndarray2(x)?;
248
249 let predictions: Array1<f64> =
250 Predict::<Array2<f64>, Array1<f64>>::predict(trained_model, &x_array)
251 .map_err(|e| PyRuntimeError::new_err(format!("Prediction failed: {}", e)))?;
252 Ok(predictions.into_pyarray(py).unbind())
253 }
254
255 fn feature_importances_<'py>(&self, py: Python<'py>) -> PyResult<Py<PyArray1<f64>>> {
257 let trained_model = self.trained.as_ref().ok_or_else(|| {
258 PyRuntimeError::new_err("Model must be fitted before accessing feature importances")
259 })?;
260
261 match trained_model.feature_importances() {
262 Some(importances) => Ok(importances.clone().into_pyarray(py).unbind()),
263 None => Err(PyRuntimeError::new_err("Feature importances not available")),
264 }
265 }
266
267 fn __repr__(&self) -> String {
268 if self.trained.is_some() {
269 "DecisionTreeRegressor(fitted=True)".to_string()
270 } else {
271 "DecisionTreeRegressor(fitted=False)".to_string()
272 }
273 }
274}
275
276#[pyclass(name = "RandomForestClassifier")]
278pub struct PyRandomForestClassifier {
279 inner: Option<RandomForestClassifier>,
280 trained: Option<RandomForestClassifier<Trained>>,
281}
282
283#[pymethods]
284impl PyRandomForestClassifier {
285 #[new]
286 #[allow(clippy::too_many_arguments)]
287 #[pyo3(signature = (
288 n_estimators=100,
289 criterion="gini",
290 max_depth=None,
291 min_samples_split=2,
292 min_samples_leaf=1,
293 _min_weight_fraction_leaf=0.0,
294 max_features="sqrt",
295 _max_leaf_nodes=None,
296 _min_impurity_decrease=0.0,
297 bootstrap=true,
298 _oob_score=false,
299 n_jobs=None,
300 random_state=None,
301 _verbose=0,
302 _warm_start=false,
303 _class_weight=None,
304 _ccp_alpha=0.0,
305 _max_samples=None
306 ))]
307 fn new(
308 n_estimators: usize,
309 criterion: &str,
310 max_depth: Option<usize>,
311 min_samples_split: usize,
312 min_samples_leaf: usize,
313 _min_weight_fraction_leaf: f64,
314 max_features: &str,
315 _max_leaf_nodes: Option<usize>,
316 _min_impurity_decrease: f64,
317 bootstrap: bool,
318 _oob_score: bool,
319 n_jobs: Option<i32>,
320 random_state: Option<u64>,
321 _verbose: i32,
322 _warm_start: bool,
323 _class_weight: Option<&str>,
324 _ccp_alpha: f64,
325 _max_samples: Option<f64>,
326 ) -> PyResult<Self> {
327 let split_criterion = match criterion {
328 "gini" => SplitCriterion::Gini,
329 "entropy" => SplitCriterion::Entropy,
330 "log_loss" => SplitCriterion::LogLoss,
331 _ => {
332 return Err(PyValueError::new_err(format!(
333 "Unknown criterion: {}",
334 criterion
335 )))
336 }
337 };
338
339 let max_features_strategy = match max_features {
340 "auto" | "sqrt" => MaxFeatures::Sqrt,
341 "log2" => MaxFeatures::Log2,
342 _ => {
343 return Err(PyValueError::new_err(format!(
344 "Unknown max_features: {}",
345 max_features
346 )))
347 }
348 };
349
350 let mut forest = RandomForestClassifier::new()
351 .n_estimators(n_estimators)
352 .criterion(split_criterion)
353 .min_samples_split(min_samples_split)
354 .min_samples_leaf(min_samples_leaf)
355 .max_features(max_features_strategy)
356 .bootstrap(bootstrap);
357
358 if let Some(depth) = max_depth {
359 forest = forest.max_depth(depth);
360 }
361
362 if let Some(seed) = random_state {
363 forest = forest.random_state(seed);
364 }
365
366 if let Some(jobs) = n_jobs {
367 forest = forest.n_jobs(jobs);
368 }
369
370 Ok(Self {
371 inner: Some(forest),
372 trained: None,
373 })
374 }
375
376 fn fit(&mut self, x: &Bound<'_, PyArray2<f64>>, y: &Bound<'_, PyArray1<f64>>) -> PyResult<()> {
378 let x_array = numpy_to_ndarray2(x)?;
379 let y_array = numpy_to_ndarray1(y)?;
380
381 let y_int: Array1<i32> = y_array.mapv(|val| val as i32);
382
383 let model = self.inner.take().ok_or_else(|| {
384 PyRuntimeError::new_err("Model has already been fitted or was not initialized")
385 })?;
386
387 match model.fit(&x_array, &y_int) {
388 Ok(trained_model) => {
389 self.trained = Some(trained_model);
390 Ok(())
391 }
392 Err(e) => Err(PyRuntimeError::new_err(format!(
393 "Failed to fit model: {}",
394 e
395 ))),
396 }
397 }
398
399 fn predict<'py>(
401 &self,
402 py: Python<'py>,
403 x: &Bound<'py, PyArray2<f64>>,
404 ) -> PyResult<Py<PyArray1<f64>>> {
405 let trained_model = self.trained.as_ref().ok_or_else(|| {
406 PyRuntimeError::new_err("Model must be fitted before making predictions")
407 })?;
408
409 let x_array = numpy_to_ndarray2(x)?;
410
411 let predictions: Array1<i32> =
412 Predict::<Array2<f64>, Array1<i32>>::predict(trained_model, &x_array)
413 .map_err(|e| PyRuntimeError::new_err(format!("Prediction failed: {}", e)))?;
414 let predictions_f64: Vec<f64> = predictions.iter().map(|&v| v as f64).collect();
415 Ok(PyArray1::from_vec(py, predictions_f64).unbind())
416 }
417
418 fn feature_importances_<'py>(&self, py: Python<'py>) -> PyResult<Py<PyArray1<f64>>> {
420 let trained_model = self.trained.as_ref().ok_or_else(|| {
421 PyRuntimeError::new_err("Model must be fitted before accessing feature importances")
422 })?;
423
424 match trained_model.feature_importances() {
425 Ok(importances) => Ok(importances.into_pyarray(py).unbind()),
426 Err(e) => Err(PyRuntimeError::new_err(format!(
427 "Failed to compute feature importances: {}",
428 e
429 ))),
430 }
431 }
432
433 fn __repr__(&self) -> String {
434 if self.trained.is_some() {
435 "RandomForestClassifier(fitted=True)".to_string()
436 } else {
437 "RandomForestClassifier(fitted=False)".to_string()
438 }
439 }
440}
441
442#[pyclass(name = "RandomForestRegressor")]
444pub struct PyRandomForestRegressor {
445 inner: Option<RandomForestRegressor>,
446 trained: Option<RandomForestRegressor<Trained>>,
447}
448
449#[pymethods]
450impl PyRandomForestRegressor {
451 #[new]
452 #[allow(clippy::too_many_arguments)]
453 #[pyo3(signature = (
454 n_estimators=100,
455 criterion="squared_error",
456 max_depth=None,
457 min_samples_split=2,
458 min_samples_leaf=1,
459 _min_weight_fraction_leaf=0.0,
460 max_features=1.0,
461 _max_leaf_nodes=None,
462 _min_impurity_decrease=0.0,
463 bootstrap=true,
464 _oob_score=false,
465 n_jobs=None,
466 random_state=None,
467 _verbose=0,
468 _warm_start=false,
469 _ccp_alpha=0.0,
470 _max_samples=None
471 ))]
472 fn new(
473 n_estimators: usize,
474 criterion: &str,
475 max_depth: Option<usize>,
476 min_samples_split: usize,
477 min_samples_leaf: usize,
478 _min_weight_fraction_leaf: f64,
479 max_features: f64,
480 _max_leaf_nodes: Option<usize>,
481 _min_impurity_decrease: f64,
482 bootstrap: bool,
483 _oob_score: bool,
484 n_jobs: Option<i32>,
485 random_state: Option<u64>,
486 _verbose: i32,
487 _warm_start: bool,
488 _ccp_alpha: f64,
489 _max_samples: Option<f64>,
490 ) -> PyResult<Self> {
491 let split_criterion = match criterion {
492 "squared_error" | "mse" => SplitCriterion::MSE,
493 "mae" | "absolute_error" => SplitCriterion::MAE,
494 _ => {
495 return Err(PyValueError::new_err(format!(
496 "Unknown criterion: {}",
497 criterion
498 )))
499 }
500 };
501
502 let max_features_strategy = if (max_features - 1.0).abs() < f64::EPSILON {
503 MaxFeatures::All
504 } else {
505 MaxFeatures::Fraction(max_features)
506 };
507
508 let mut forest = RandomForestRegressor::new()
509 .n_estimators(n_estimators)
510 .criterion(split_criterion)
511 .min_samples_split(min_samples_split)
512 .min_samples_leaf(min_samples_leaf)
513 .max_features(max_features_strategy)
514 .bootstrap(bootstrap);
515
516 if let Some(depth) = max_depth {
517 forest = forest.max_depth(depth);
518 }
519
520 if let Some(seed) = random_state {
521 forest = forest.random_state(seed);
522 }
523
524 if let Some(jobs) = n_jobs {
525 forest = forest.n_jobs(jobs);
526 }
527
528 Ok(Self {
529 inner: Some(forest),
530 trained: None,
531 })
532 }
533
534 fn fit(&mut self, x: &Bound<'_, PyArray2<f64>>, y: &Bound<'_, PyArray1<f64>>) -> PyResult<()> {
536 let x_array = numpy_to_ndarray2(x)?;
537 let y_array = numpy_to_ndarray1(y)?;
538
539 let model = self.inner.take().ok_or_else(|| {
540 PyRuntimeError::new_err("Model has already been fitted or was not initialized")
541 })?;
542
543 match model.fit(&x_array, &y_array) {
544 Ok(trained_model) => {
545 self.trained = Some(trained_model);
546 Ok(())
547 }
548 Err(e) => Err(PyRuntimeError::new_err(format!(
549 "Failed to fit model: {}",
550 e
551 ))),
552 }
553 }
554
555 fn predict<'py>(
557 &self,
558 py: Python<'py>,
559 x: &Bound<'py, PyArray2<f64>>,
560 ) -> PyResult<Py<PyArray1<f64>>> {
561 let trained_model = self.trained.as_ref().ok_or_else(|| {
562 PyRuntimeError::new_err("Model must be fitted before making predictions")
563 })?;
564
565 let x_array = numpy_to_ndarray2(x)?;
566
567 let predictions: Array1<f64> =
568 Predict::<Array2<f64>, Array1<f64>>::predict(trained_model, &x_array)
569 .map_err(|e| PyRuntimeError::new_err(format!("Prediction failed: {}", e)))?;
570 Ok(predictions.into_pyarray(py).unbind())
571 }
572
573 fn feature_importances_<'py>(&self, py: Python<'py>) -> PyResult<Py<PyArray1<f64>>> {
575 let trained_model = self.trained.as_ref().ok_or_else(|| {
576 PyRuntimeError::new_err("Model must be fitted before accessing feature importances")
577 })?;
578
579 match trained_model.feature_importances() {
580 Ok(importances) => Ok(importances.into_pyarray(py).unbind()),
581 Err(e) => Err(PyRuntimeError::new_err(format!(
582 "Failed to compute feature importances: {}",
583 e
584 ))),
585 }
586 }
587
588 fn __repr__(&self) -> String {
589 if self.trained.is_some() {
590 "RandomForestRegressor(fitted=True)".to_string()
591 } else {
592 "RandomForestRegressor(fitted=False)".to_string()
593 }
594 }
595}