1use crate::linear::common::core_array1_to_py;
7use crate::utils::{numpy_to_ndarray1, numpy_to_ndarray2};
8use numpy::{PyArray1, PyArray2};
9use pyo3::exceptions::{PyRuntimeError, PyValueError};
10use pyo3::prelude::*;
11use scirs2_core::ndarray::{Array1, Array2};
12use sklears_core::traits::{Fit, Predict, Trained};
13use sklears_tree::random_forest::RandomForestRegressor;
14use sklears_tree::{DecisionTree, MaxFeatures, RandomForestClassifier, SplitCriterion};
15
16#[pyclass(name = "DecisionTreeClassifier")]
18pub struct PyDecisionTreeClassifier {
19 inner: Option<DecisionTree>,
20 trained: Option<DecisionTree<Trained>>,
21}
22
23#[pymethods]
24impl PyDecisionTreeClassifier {
25 #[new]
26 #[allow(clippy::too_many_arguments)]
27 #[pyo3(signature = (
28 criterion="gini",
29 _splitter="best",
30 max_depth=None,
31 min_samples_split=2,
32 min_samples_leaf=1,
33 _min_weight_fraction_leaf=0.0,
34 _max_features=None,
35 random_state=None,
36 _max_leaf_nodes=None,
37 _min_impurity_decrease=0.0,
38 _class_weight=None,
39 _ccp_alpha=0.0
40 ))]
41 fn new(
42 criterion: &str,
43 _splitter: &str,
44 max_depth: Option<usize>,
45 min_samples_split: usize,
46 min_samples_leaf: usize,
47 _min_weight_fraction_leaf: f64,
48 _max_features: Option<&str>,
49 random_state: Option<u64>,
50 _max_leaf_nodes: Option<usize>,
51 _min_impurity_decrease: f64,
52 _class_weight: Option<&str>,
53 _ccp_alpha: f64,
54 ) -> PyResult<Self> {
55 let split_criterion = match criterion {
56 "gini" => SplitCriterion::Gini,
57 "entropy" => SplitCriterion::Entropy,
58 "log_loss" => SplitCriterion::LogLoss,
59 _ => {
60 return Err(PyValueError::new_err(format!(
61 "Unknown criterion: {}",
62 criterion
63 )))
64 }
65 };
66
67 let mut tree = DecisionTree::new()
68 .criterion(split_criterion)
69 .min_samples_split(min_samples_split)
70 .min_samples_leaf(min_samples_leaf);
71
72 if let Some(depth) = max_depth {
73 tree = tree.max_depth(depth);
74 }
75
76 if let Some(seed) = random_state {
77 tree = tree.random_state(Some(seed));
78 }
79
80 Ok(Self {
81 inner: Some(tree),
82 trained: None,
83 })
84 }
85
86 fn fit<'py>(
88 &mut self,
89 x: &Bound<'py, PyArray2<f64>>,
90 y: &Bound<'py, PyArray1<f64>>,
91 ) -> PyResult<()> {
92 let x_array = numpy_to_ndarray2(x)?;
93 let y_array = numpy_to_ndarray1(y)?;
94
95 let model = self.inner.take().ok_or_else(|| {
96 PyRuntimeError::new_err("Model has already been fitted or was not initialized")
97 })?;
98
99 match model.fit(&x_array, &y_array) {
100 Ok(trained_model) => {
101 self.trained = Some(trained_model);
102 Ok(())
103 }
104 Err(e) => Err(PyRuntimeError::new_err(format!(
105 "Failed to fit model: {}",
106 e
107 ))),
108 }
109 }
110
111 fn predict<'py>(
113 &self,
114 py: Python<'py>,
115 x: &Bound<'py, PyArray2<f64>>,
116 ) -> PyResult<Py<PyArray1<f64>>> {
117 let trained_model = self.trained.as_ref().ok_or_else(|| {
118 PyRuntimeError::new_err("Model must be fitted before making predictions")
119 })?;
120
121 let x_array = numpy_to_ndarray2(x)?;
122
123 let predictions: Array1<f64> =
124 Predict::<Array2<f64>, Array1<f64>>::predict(trained_model, &x_array)
125 .map_err(|e| PyRuntimeError::new_err(format!("Prediction failed: {}", e)))?;
126 Ok(core_array1_to_py(py, &predictions))
127 }
128
129 fn feature_importances_<'py>(&self, py: Python<'py>) -> PyResult<Py<PyArray1<f64>>> {
131 let trained_model = self.trained.as_ref().ok_or_else(|| {
132 PyRuntimeError::new_err("Model must be fitted before accessing feature importances")
133 })?;
134
135 match trained_model.feature_importances() {
136 Some(importances) => Ok(core_array1_to_py(py, importances)),
137 None => Err(PyRuntimeError::new_err("Feature importances not available")),
138 }
139 }
140
141 fn __repr__(&self) -> String {
142 if self.trained.is_some() {
143 "DecisionTreeClassifier(fitted=True)".to_string()
144 } else {
145 "DecisionTreeClassifier(fitted=False)".to_string()
146 }
147 }
148}
149
150#[pyclass(name = "DecisionTreeRegressor")]
152pub struct PyDecisionTreeRegressor {
153 inner: Option<DecisionTree>,
154 trained: Option<DecisionTree<Trained>>,
155}
156
157#[pymethods]
158impl PyDecisionTreeRegressor {
159 #[new]
160 #[allow(clippy::too_many_arguments)]
161 #[pyo3(signature = (
162 criterion="squared_error",
163 _splitter="best",
164 max_depth=None,
165 min_samples_split=2,
166 min_samples_leaf=1,
167 _min_weight_fraction_leaf=0.0,
168 _max_features=None,
169 random_state=None,
170 _max_leaf_nodes=None,
171 _min_impurity_decrease=0.0,
172 _ccp_alpha=0.0
173 ))]
174 fn new(
175 criterion: &str,
176 _splitter: &str,
177 max_depth: Option<usize>,
178 min_samples_split: usize,
179 min_samples_leaf: usize,
180 _min_weight_fraction_leaf: f64,
181 _max_features: Option<&str>,
182 random_state: Option<u64>,
183 _max_leaf_nodes: Option<usize>,
184 _min_impurity_decrease: f64,
185 _ccp_alpha: f64,
186 ) -> PyResult<Self> {
187 let split_criterion = match criterion {
188 "squared_error" | "mse" => SplitCriterion::MSE,
189 "mae" | "absolute_error" => SplitCriterion::MAE,
190 _ => {
191 return Err(PyValueError::new_err(format!(
192 "Unknown criterion: {}",
193 criterion
194 )))
195 }
196 };
197
198 let mut tree = DecisionTree::new()
199 .criterion(split_criterion)
200 .min_samples_split(min_samples_split)
201 .min_samples_leaf(min_samples_leaf);
202
203 if let Some(depth) = max_depth {
204 tree = tree.max_depth(depth);
205 }
206
207 if let Some(seed) = random_state {
208 tree = tree.random_state(Some(seed));
209 }
210
211 Ok(Self {
212 inner: Some(tree),
213 trained: None,
214 })
215 }
216
217 fn fit(&mut self, x: &Bound<'_, PyArray2<f64>>, y: &Bound<'_, PyArray1<f64>>) -> PyResult<()> {
219 let x_array = numpy_to_ndarray2(x)?;
220 let y_array = numpy_to_ndarray1(y)?;
221
222 let model = self.inner.take().ok_or_else(|| {
223 PyRuntimeError::new_err("Model has already been fitted or was not initialized")
224 })?;
225
226 match model.fit(&x_array, &y_array) {
227 Ok(trained_model) => {
228 self.trained = Some(trained_model);
229 Ok(())
230 }
231 Err(e) => Err(PyRuntimeError::new_err(format!(
232 "Failed to fit model: {}",
233 e
234 ))),
235 }
236 }
237
238 fn predict<'py>(
240 &self,
241 py: Python<'py>,
242 x: &Bound<'py, PyArray2<f64>>,
243 ) -> PyResult<Py<PyArray1<f64>>> {
244 let trained_model = self.trained.as_ref().ok_or_else(|| {
245 PyRuntimeError::new_err("Model must be fitted before making predictions")
246 })?;
247
248 let x_array = numpy_to_ndarray2(x)?;
249
250 let predictions: Array1<f64> =
251 Predict::<Array2<f64>, Array1<f64>>::predict(trained_model, &x_array)
252 .map_err(|e| PyRuntimeError::new_err(format!("Prediction failed: {}", e)))?;
253 Ok(core_array1_to_py(py, &predictions))
254 }
255
256 fn feature_importances_<'py>(&self, py: Python<'py>) -> PyResult<Py<PyArray1<f64>>> {
258 let trained_model = self.trained.as_ref().ok_or_else(|| {
259 PyRuntimeError::new_err("Model must be fitted before accessing feature importances")
260 })?;
261
262 match trained_model.feature_importances() {
263 Some(importances) => Ok(core_array1_to_py(py, importances)),
264 None => Err(PyRuntimeError::new_err("Feature importances not available")),
265 }
266 }
267
268 fn __repr__(&self) -> String {
269 if self.trained.is_some() {
270 "DecisionTreeRegressor(fitted=True)".to_string()
271 } else {
272 "DecisionTreeRegressor(fitted=False)".to_string()
273 }
274 }
275}
276
277#[pyclass(name = "RandomForestClassifier")]
279pub struct PyRandomForestClassifier {
280 inner: Option<RandomForestClassifier>,
281 trained: Option<RandomForestClassifier<Trained>>,
282}
283
284#[pymethods]
285impl PyRandomForestClassifier {
286 #[new]
287 #[allow(clippy::too_many_arguments)]
288 #[pyo3(signature = (
289 n_estimators=100,
290 criterion="gini",
291 max_depth=None,
292 min_samples_split=2,
293 min_samples_leaf=1,
294 _min_weight_fraction_leaf=0.0,
295 max_features="sqrt",
296 _max_leaf_nodes=None,
297 _min_impurity_decrease=0.0,
298 bootstrap=true,
299 _oob_score=false,
300 n_jobs=None,
301 random_state=None,
302 _verbose=0,
303 _warm_start=false,
304 _class_weight=None,
305 _ccp_alpha=0.0,
306 _max_samples=None
307 ))]
308 fn new(
309 n_estimators: usize,
310 criterion: &str,
311 max_depth: Option<usize>,
312 min_samples_split: usize,
313 min_samples_leaf: usize,
314 _min_weight_fraction_leaf: f64,
315 max_features: &str,
316 _max_leaf_nodes: Option<usize>,
317 _min_impurity_decrease: f64,
318 bootstrap: bool,
319 _oob_score: bool,
320 n_jobs: Option<i32>,
321 random_state: Option<u64>,
322 _verbose: i32,
323 _warm_start: bool,
324 _class_weight: Option<&str>,
325 _ccp_alpha: f64,
326 _max_samples: Option<f64>,
327 ) -> PyResult<Self> {
328 let split_criterion = match criterion {
329 "gini" => SplitCriterion::Gini,
330 "entropy" => SplitCriterion::Entropy,
331 "log_loss" => SplitCriterion::LogLoss,
332 _ => {
333 return Err(PyValueError::new_err(format!(
334 "Unknown criterion: {}",
335 criterion
336 )))
337 }
338 };
339
340 let max_features_strategy = match max_features {
341 "auto" | "sqrt" => MaxFeatures::Sqrt,
342 "log2" => MaxFeatures::Log2,
343 _ => {
344 return Err(PyValueError::new_err(format!(
345 "Unknown max_features: {}",
346 max_features
347 )))
348 }
349 };
350
351 let mut forest = RandomForestClassifier::new()
352 .n_estimators(n_estimators)
353 .criterion(split_criterion)
354 .min_samples_split(min_samples_split)
355 .min_samples_leaf(min_samples_leaf)
356 .max_features(max_features_strategy)
357 .bootstrap(bootstrap);
358
359 if let Some(depth) = max_depth {
360 forest = forest.max_depth(depth);
361 }
362
363 if let Some(seed) = random_state {
364 forest = forest.random_state(seed);
365 }
366
367 if let Some(jobs) = n_jobs {
368 forest = forest.n_jobs(jobs);
369 }
370
371 Ok(Self {
372 inner: Some(forest),
373 trained: None,
374 })
375 }
376
377 fn fit(&mut self, x: &Bound<'_, PyArray2<f64>>, y: &Bound<'_, PyArray1<f64>>) -> PyResult<()> {
379 let x_array = numpy_to_ndarray2(x)?;
380 let y_array = numpy_to_ndarray1(y)?;
381
382 let y_int: Array1<i32> = y_array.mapv(|val| val as i32);
383
384 let model = self.inner.take().ok_or_else(|| {
385 PyRuntimeError::new_err("Model has already been fitted or was not initialized")
386 })?;
387
388 match model.fit(&x_array, &y_int) {
389 Ok(trained_model) => {
390 self.trained = Some(trained_model);
391 Ok(())
392 }
393 Err(e) => Err(PyRuntimeError::new_err(format!(
394 "Failed to fit model: {}",
395 e
396 ))),
397 }
398 }
399
400 fn predict<'py>(
402 &self,
403 py: Python<'py>,
404 x: &Bound<'py, PyArray2<f64>>,
405 ) -> PyResult<Py<PyArray1<f64>>> {
406 let trained_model = self.trained.as_ref().ok_or_else(|| {
407 PyRuntimeError::new_err("Model must be fitted before making predictions")
408 })?;
409
410 let x_array = numpy_to_ndarray2(x)?;
411
412 let predictions: Array1<i32> =
413 Predict::<Array2<f64>, Array1<i32>>::predict(trained_model, &x_array)
414 .map_err(|e| PyRuntimeError::new_err(format!("Prediction failed: {}", e)))?;
415 let predictions_f64: Vec<f64> = predictions.iter().map(|&v| v as f64).collect();
416 Ok(PyArray1::from_vec(py, predictions_f64).unbind())
417 }
418
419 fn feature_importances_<'py>(&self, py: Python<'py>) -> PyResult<Py<PyArray1<f64>>> {
421 let trained_model = self.trained.as_ref().ok_or_else(|| {
422 PyRuntimeError::new_err("Model must be fitted before accessing feature importances")
423 })?;
424
425 match trained_model.feature_importances() {
426 Ok(importances) => Ok(core_array1_to_py(py, &importances)),
427 Err(e) => Err(PyRuntimeError::new_err(format!(
428 "Failed to compute feature importances: {}",
429 e
430 ))),
431 }
432 }
433
434 fn __repr__(&self) -> String {
435 if self.trained.is_some() {
436 "RandomForestClassifier(fitted=True)".to_string()
437 } else {
438 "RandomForestClassifier(fitted=False)".to_string()
439 }
440 }
441}
442
443#[pyclass(name = "RandomForestRegressor")]
445pub struct PyRandomForestRegressor {
446 inner: Option<RandomForestRegressor>,
447 trained: Option<RandomForestRegressor<Trained>>,
448}
449
450#[pymethods]
451impl PyRandomForestRegressor {
452 #[new]
453 #[allow(clippy::too_many_arguments)]
454 #[pyo3(signature = (
455 n_estimators=100,
456 criterion="squared_error",
457 max_depth=None,
458 min_samples_split=2,
459 min_samples_leaf=1,
460 _min_weight_fraction_leaf=0.0,
461 max_features=1.0,
462 _max_leaf_nodes=None,
463 _min_impurity_decrease=0.0,
464 bootstrap=true,
465 _oob_score=false,
466 n_jobs=None,
467 random_state=None,
468 _verbose=0,
469 _warm_start=false,
470 _ccp_alpha=0.0,
471 _max_samples=None
472 ))]
473 fn new(
474 n_estimators: usize,
475 criterion: &str,
476 max_depth: Option<usize>,
477 min_samples_split: usize,
478 min_samples_leaf: usize,
479 _min_weight_fraction_leaf: f64,
480 max_features: f64,
481 _max_leaf_nodes: Option<usize>,
482 _min_impurity_decrease: f64,
483 bootstrap: bool,
484 _oob_score: bool,
485 n_jobs: Option<i32>,
486 random_state: Option<u64>,
487 _verbose: i32,
488 _warm_start: bool,
489 _ccp_alpha: f64,
490 _max_samples: Option<f64>,
491 ) -> PyResult<Self> {
492 let split_criterion = match criterion {
493 "squared_error" | "mse" => SplitCriterion::MSE,
494 "mae" | "absolute_error" => SplitCriterion::MAE,
495 _ => {
496 return Err(PyValueError::new_err(format!(
497 "Unknown criterion: {}",
498 criterion
499 )))
500 }
501 };
502
503 let max_features_strategy = if (max_features - 1.0).abs() < f64::EPSILON {
504 MaxFeatures::All
505 } else {
506 MaxFeatures::Fraction(max_features)
507 };
508
509 let mut forest = RandomForestRegressor::new()
510 .n_estimators(n_estimators)
511 .criterion(split_criterion)
512 .min_samples_split(min_samples_split)
513 .min_samples_leaf(min_samples_leaf)
514 .max_features(max_features_strategy)
515 .bootstrap(bootstrap);
516
517 if let Some(depth) = max_depth {
518 forest = forest.max_depth(depth);
519 }
520
521 if let Some(seed) = random_state {
522 forest = forest.random_state(seed);
523 }
524
525 if let Some(jobs) = n_jobs {
526 forest = forest.n_jobs(jobs);
527 }
528
529 Ok(Self {
530 inner: Some(forest),
531 trained: None,
532 })
533 }
534
535 fn fit(&mut self, x: &Bound<'_, PyArray2<f64>>, y: &Bound<'_, PyArray1<f64>>) -> PyResult<()> {
537 let x_array = numpy_to_ndarray2(x)?;
538 let y_array = numpy_to_ndarray1(y)?;
539
540 let model = self.inner.take().ok_or_else(|| {
541 PyRuntimeError::new_err("Model has already been fitted or was not initialized")
542 })?;
543
544 match model.fit(&x_array, &y_array) {
545 Ok(trained_model) => {
546 self.trained = Some(trained_model);
547 Ok(())
548 }
549 Err(e) => Err(PyRuntimeError::new_err(format!(
550 "Failed to fit model: {}",
551 e
552 ))),
553 }
554 }
555
556 fn predict<'py>(
558 &self,
559 py: Python<'py>,
560 x: &Bound<'py, PyArray2<f64>>,
561 ) -> PyResult<Py<PyArray1<f64>>> {
562 let trained_model = self.trained.as_ref().ok_or_else(|| {
563 PyRuntimeError::new_err("Model must be fitted before making predictions")
564 })?;
565
566 let x_array = numpy_to_ndarray2(x)?;
567
568 let predictions: Array1<f64> =
569 Predict::<Array2<f64>, Array1<f64>>::predict(trained_model, &x_array)
570 .map_err(|e| PyRuntimeError::new_err(format!("Prediction failed: {}", e)))?;
571 Ok(core_array1_to_py(py, &predictions))
572 }
573
574 fn feature_importances_<'py>(&self, py: Python<'py>) -> PyResult<Py<PyArray1<f64>>> {
576 let trained_model = self.trained.as_ref().ok_or_else(|| {
577 PyRuntimeError::new_err("Model must be fitted before accessing feature importances")
578 })?;
579
580 match trained_model.feature_importances() {
581 Ok(importances) => Ok(core_array1_to_py(py, &importances)),
582 Err(e) => Err(PyRuntimeError::new_err(format!(
583 "Failed to compute feature importances: {}",
584 e
585 ))),
586 }
587 }
588
589 fn __repr__(&self) -> String {
590 if self.trained.is_some() {
591 "RandomForestRegressor(fitted=True)".to_string()
592 } else {
593 "RandomForestRegressor(fitted=False)".to_string()
594 }
595 }
596}