1use scirs2_core::ndarray::{Array1, Array2};
7use scirs2_core::random::thread_rng;
8use scirs2_core::SliceRandomExt; use sklears_core::error::Result;
10
11#[cfg(feature = "parallel")]
12use rayon::prelude::*;
13
14pub struct ParallelUtils;
16
17impl ParallelUtils {
18 #[cfg(feature = "parallel")]
20 pub fn maybe_parallel_map<T, U, F>(items: Vec<T>, f: F) -> Vec<U>
21 where
22 T: Send + Sync,
23 U: Send,
24 F: Fn(T) -> U + Sync + Send,
25 {
26 items.into_par_iter().map(f).collect()
27 }
28
29 #[cfg(not(feature = "parallel"))]
31 pub fn maybe_parallel_map<T, U, F>(items: Vec<T>, f: F) -> Vec<U>
32 where
33 F: Fn(T) -> U,
34 {
35 items.into_iter().map(f).collect()
36 }
37
38 #[cfg(feature = "parallel")]
40 pub fn parallel_bootstrap_samples(
41 x: &Array2<f64>,
42 y: &Array1<i32>,
43 n_estimators: usize,
44 n_samples: usize,
45 random_seeds: &[u64],
46 ) -> Vec<(Array2<f64>, Array1<i32>)> {
47 (0..n_estimators)
48 .into_par_iter()
49 .map(|i| {
50 let seed = random_seeds.get(i).copied().unwrap_or(42 + i as u64);
51 Self::bootstrap_sample(x, y, n_samples, seed)
52 })
53 .collect()
54 }
55
56 #[cfg(not(feature = "parallel"))]
58 pub fn parallel_bootstrap_samples(
59 x: &Array2<f64>,
60 y: &Array1<i32>,
61 n_estimators: usize,
62 n_samples: usize,
63 random_seeds: &[u64],
64 ) -> Vec<(Array2<f64>, Array1<i32>)> {
65 (0..n_estimators)
66 .map(|i| {
67 let seed = random_seeds.get(i).copied().unwrap_or(42 + i as u64);
68 Self::bootstrap_sample(x, y, n_samples, seed)
69 })
70 .collect()
71 }
72
73 fn bootstrap_sample(
75 x: &Array2<f64>,
76 y: &Array1<i32>,
77 n_samples: usize,
78 seed: u64,
79 ) -> (Array2<f64>, Array1<i32>) {
80 let mut rng = thread_rng();
81 let original_n_samples = x.nrows();
82 let n_features = x.ncols();
83
84 let mut bootstrap_x = Array2::zeros((n_samples, n_features));
85 let mut bootstrap_y = Array1::zeros(n_samples);
86
87 for i in 0..n_samples {
88 let idx = rng.gen_range(0..original_n_samples);
89 bootstrap_x.row_mut(i).assign(&x.row(idx));
90 bootstrap_y[i] = y[idx];
91 }
92
93 (bootstrap_x, bootstrap_y)
94 }
95
96 #[cfg(feature = "parallel")]
98 pub fn parallel_predict_proba_aggregate(predictions: Vec<Array2<f64>>) -> Result<Array2<f64>> {
99 if predictions.is_empty() {
100 return Err(sklears_core::error::SklearsError::InvalidData {
101 reason: "No predictions to aggregate".to_string(),
102 });
103 }
104
105 let n_samples = predictions[0].nrows();
106 let n_classes = predictions[0].ncols();
107
108 let aggregated: Vec<Vec<f64>> = (0..n_samples)
110 .into_par_iter()
111 .map(|sample_idx| {
112 let mut class_votes = vec![0.0; n_classes];
113 for pred_matrix in &predictions {
114 let row = pred_matrix.row(sample_idx);
115 for (class_idx, &prob) in row.iter().enumerate() {
116 class_votes[class_idx] += prob;
117 }
118 }
119
120 let n_estimators = predictions.len() as f64;
122 class_votes
123 .iter()
124 .map(|&vote| vote / n_estimators)
125 .collect()
126 })
127 .collect();
128
129 let mut result = Array2::zeros((n_samples, n_classes));
131 for (i, row) in aggregated.iter().enumerate() {
132 for (j, &val) in row.iter().enumerate() {
133 result[[i, j]] = val;
134 }
135 }
136
137 Ok(result)
138 }
139
140 #[cfg(not(feature = "parallel"))]
142 pub fn parallel_predict_proba_aggregate(predictions: Vec<Array2<f64>>) -> Result<Array2<f64>> {
143 if predictions.is_empty() {
144 return Err(sklears_core::error::SklearsError::InvalidData {
145 reason: "No predictions to aggregate".to_string(),
146 });
147 }
148
149 let n_samples = predictions[0].nrows();
150 let n_classes = predictions[0].ncols();
151 let n_estimators = predictions.len() as f64;
152
153 let mut result = Array2::zeros((n_samples, n_classes));
154
155 for pred_matrix in predictions {
156 result = result + pred_matrix;
157 }
158
159 result = result / n_estimators;
160 Ok(result)
161 }
162
163 #[cfg(feature = "parallel")]
165 pub fn parallel_permutation_importance<F>(
166 x: &Array2<f64>,
167 y: &Array1<i32>,
168 baseline_score: f64,
169 scoring_fn: F,
170 n_repeats: usize,
171 random_seeds: &[u64],
172 ) -> Result<Array1<f64>>
173 where
174 F: Fn(&Array2<f64>, &Array1<i32>) -> Result<f64> + Sync + Send,
175 {
176 let n_features = x.ncols();
177
178 let importances: Result<Vec<f64>> = (0..n_features)
179 .into_par_iter()
180 .map(|feature_idx| {
181 let mut importance_scores = Vec::new();
182
183 for repeat in 0..n_repeats {
184 let seed = random_seeds
185 .get(repeat)
186 .copied()
187 .unwrap_or(42 + repeat as u64);
188 let mut x_permuted = x.clone();
189 Self::permute_feature(&mut x_permuted, feature_idx, seed)?;
190
191 let permuted_score = scoring_fn(&x_permuted, y)?;
192 importance_scores.push(baseline_score - permuted_score);
193 }
194
195 let avg_importance =
197 importance_scores.iter().sum::<f64>() / importance_scores.len() as f64;
198 Ok(avg_importance)
199 })
200 .collect();
201
202 let importances = importances?;
203 Ok(Array1::from_vec(importances))
204 }
205
206 #[cfg(not(feature = "parallel"))]
208 pub fn parallel_permutation_importance<F>(
209 x: &Array2<f64>,
210 y: &Array1<i32>,
211 baseline_score: f64,
212 scoring_fn: F,
213 n_repeats: usize,
214 random_seeds: &[u64],
215 ) -> Result<Array1<f64>>
216 where
217 F: Fn(&Array2<f64>, &Array1<i32>) -> Result<f64>,
218 {
219 let n_features = x.ncols();
220 let mut importances = Vec::with_capacity(n_features);
221
222 for feature_idx in 0..n_features {
223 let mut importance_scores = Vec::new();
224
225 for repeat in 0..n_repeats {
226 let seed = random_seeds
227 .get(repeat)
228 .copied()
229 .unwrap_or(42 + repeat as u64);
230 let mut x_permuted = x.clone();
231 Self::permute_feature(&mut x_permuted, feature_idx, seed)?;
232
233 let permuted_score = scoring_fn(&x_permuted, y)?;
234 importance_scores.push(baseline_score - permuted_score);
235 }
236
237 let avg_importance =
238 importance_scores.iter().sum::<f64>() / importance_scores.len() as f64;
239 importances.push(avg_importance);
240 }
241
242 Ok(Array1::from_vec(importances))
243 }
244
245 fn permute_feature(x: &mut Array2<f64>, feature_idx: usize, seed: u64) -> Result<()> {
247 let mut rng = thread_rng();
248 let mut column_values: Vec<f64> = x.column(feature_idx).to_vec();
249 column_values.shuffle(&mut rng);
250
251 for (i, &value) in column_values.iter().enumerate() {
252 x[[i, feature_idx]] = value;
253 }
254
255 Ok(())
256 }
257
258 pub fn optimal_n_threads(n_jobs: Option<i32>) -> usize {
260 match n_jobs {
261 Some(n) if n > 0 => n as usize,
262 Some(-1) => num_cpus::get(),
263 Some(n) if n < -1 => (num_cpus::get() as i32 + n + 1).max(1) as usize,
264 _ => 1,
265 }
266 }
267
268 #[cfg(feature = "parallel")]
270 pub fn with_thread_pool<T, F>(n_threads: usize, f: F) -> T
271 where
272 F: FnOnce() -> T + Send,
273 T: Send,
274 {
275 if n_threads <= 1 {
276 f()
277 } else {
278 let pool = rayon::ThreadPoolBuilder::new()
279 .num_threads(n_threads)
280 .build()
281 .unwrap_or_else(|_| rayon::ThreadPoolBuilder::new().build().unwrap());
282 pool.install(f)
283 }
284 }
285
286 #[cfg(not(feature = "parallel"))]
288 pub fn with_thread_pool<T, F>(n_threads: usize, f: F) -> T
289 where
290 F: FnOnce() -> T,
291 {
292 f() }
294
295 #[cfg(feature = "parallel")]
297 pub fn parallel_find_best_split(
298 x: &Array2<f64>,
299 y: &Array1<f64>,
300 sample_indices: &[usize],
301 config: &ParallelFeatureConfig,
302 ) -> Result<Option<FeatureSplit>> {
303 let n_features = x.ncols();
304 let n_samples = sample_indices.len();
305
306 if n_samples < config.min_samples_split {
307 return Ok(None);
308 }
309
310 let feature_indices = Self::select_features(n_features, config)?;
312
313 let feature_splits: Vec<Option<FeatureSplit>> = feature_indices
315 .into_par_iter()
316 .map(|feature_idx| {
317 Self::evaluate_feature_split(x, y, sample_indices, feature_idx, config)
318 .unwrap_or(None)
319 })
320 .collect();
321
322 let mut best_split = None;
324 let mut best_score = f64::NEG_INFINITY;
325
326 for split in feature_splits.into_iter().flatten() {
327 if split.is_valid() && split.quality_score() > best_score {
328 best_score = split.quality_score();
329 best_split = Some(split);
330 }
331 }
332
333 Ok(best_split)
334 }
335
336 #[cfg(not(feature = "parallel"))]
338 pub fn parallel_find_best_split(
339 x: &Array2<f64>,
340 y: &Array1<f64>,
341 sample_indices: &[usize],
342 config: &ParallelFeatureConfig,
343 ) -> Result<Option<FeatureSplit>> {
344 let n_features = x.ncols();
345 let n_samples = sample_indices.len();
346
347 if n_samples < config.min_samples_split {
348 return Ok(None);
349 }
350
351 let feature_indices = Self::select_features(n_features, config)?;
352
353 let mut best_split = None;
354 let mut best_score = f64::NEG_INFINITY;
355
356 for feature_idx in feature_indices {
357 if let Some(split) =
358 Self::evaluate_feature_split(x, y, sample_indices, feature_idx, config)?
359 {
360 if split.is_valid() && split.quality_score() > best_score {
361 best_score = split.quality_score();
362 best_split = Some(split);
363 }
364 }
365 }
366
367 Ok(best_split)
368 }
369
370 #[cfg(feature = "parallel")]
372 pub fn parallel_find_best_classification_split(
373 x: &Array2<f64>,
374 y: &Array1<i32>,
375 sample_indices: &[usize],
376 n_classes: usize,
377 config: &ParallelFeatureConfig,
378 ) -> Result<Option<FeatureSplit>> {
379 let n_features = x.ncols();
380 let n_samples = sample_indices.len();
381
382 if n_samples < config.min_samples_split {
383 return Ok(None);
384 }
385
386 let feature_indices = Self::select_features(n_features, config)?;
387
388 let feature_splits: Vec<Option<FeatureSplit>> = feature_indices
390 .into_par_iter()
391 .map(|feature_idx| {
392 Self::evaluate_classification_split(
393 x,
394 y,
395 sample_indices,
396 feature_idx,
397 n_classes,
398 config,
399 )
400 .unwrap_or(None)
401 })
402 .collect();
403
404 let mut best_split = None;
406 let mut best_score = f64::NEG_INFINITY;
407
408 for split in feature_splits.into_iter().flatten() {
409 if split.is_valid() && split.quality_score() > best_score {
410 best_score = split.quality_score();
411 best_split = Some(split);
412 }
413 }
414
415 Ok(best_split)
416 }
417
418 #[cfg(not(feature = "parallel"))]
420 pub fn parallel_find_best_classification_split(
421 x: &Array2<f64>,
422 y: &Array1<i32>,
423 sample_indices: &[usize],
424 n_classes: usize,
425 config: &ParallelFeatureConfig,
426 ) -> Result<Option<FeatureSplit>> {
427 let n_features = x.ncols();
428 let n_samples = sample_indices.len();
429
430 if n_samples < config.min_samples_split {
431 return Ok(None);
432 }
433
434 let feature_indices = Self::select_features(n_features, config)?;
435
436 let mut best_split = None;
437 let mut best_score = f64::NEG_INFINITY;
438
439 for feature_idx in feature_indices {
440 if let Some(split) = Self::evaluate_classification_split(
441 x,
442 y,
443 sample_indices,
444 feature_idx,
445 n_classes,
446 config,
447 )? {
448 if split.is_valid() && split.quality_score() > best_score {
449 best_score = split.quality_score();
450 best_split = Some(split);
451 }
452 }
453 }
454
455 Ok(best_split)
456 }
457
458 #[cfg(feature = "parallel")]
460 pub fn parallel_compute_feature_stats(
461 x: &Array2<f64>,
462 sample_indices: &[usize],
463 feature_indices: &[usize],
464 ) -> Vec<FeatureStats> {
465 feature_indices
466 .par_iter()
467 .map(|&feature_idx| Self::compute_feature_stats(x, sample_indices, feature_idx))
468 .collect()
469 }
470
471 #[cfg(not(feature = "parallel"))]
473 pub fn parallel_compute_feature_stats(
474 x: &Array2<f64>,
475 sample_indices: &[usize],
476 feature_indices: &[usize],
477 ) -> Vec<FeatureStats> {
478 feature_indices
479 .iter()
480 .map(|&feature_idx| Self::compute_feature_stats(x, sample_indices, feature_idx))
481 .collect()
482 }
483
484 fn select_features(n_features: usize, config: &ParallelFeatureConfig) -> Result<Vec<usize>> {
486 let max_features = config.max_features.unwrap_or(n_features);
487 let n_features_to_use = max_features.min(n_features);
488
489 if n_features_to_use >= n_features {
490 Ok((0..n_features).collect())
492 } else {
493 let mut rng = thread_rng();
495
496 let mut all_features: Vec<usize> = (0..n_features).collect();
497 all_features.shuffle(&mut rng);
498 all_features.truncate(n_features_to_use);
499
500 Ok(all_features)
501 }
502 }
503
504 fn evaluate_feature_split(
506 x: &Array2<f64>,
507 y: &Array1<f64>,
508 sample_indices: &[usize],
509 feature_idx: usize,
510 config: &ParallelFeatureConfig,
511 ) -> Result<Option<FeatureSplit>> {
512 let n_samples = sample_indices.len();
513
514 if n_samples < config.min_samples_split {
515 return Ok(None);
516 }
517
518 let mut feature_target_pairs: Vec<(f64, f64)> = sample_indices
520 .iter()
521 .map(|&idx| (x[[idx, feature_idx]], y[idx]))
522 .collect();
523
524 feature_target_pairs.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
526
527 let mut best_threshold = 0.0;
528 let mut best_impurity_reduction = f64::NEG_INFINITY;
529 let mut best_n_left = 0;
530 let mut best_n_right = 0;
531
532 let total_sum: f64 = feature_target_pairs.iter().map(|(_, target)| target).sum();
534 let total_sum_sq: f64 = feature_target_pairs
535 .iter()
536 .map(|(_, target)| target * target)
537 .sum();
538 let total_variance =
539 (total_sum_sq / n_samples as f64) - (total_sum / n_samples as f64).powi(2);
540
541 for i in 1..n_samples {
543 let current_val = feature_target_pairs[i - 1].0;
544 let next_val = feature_target_pairs[i].0;
545
546 if (next_val - current_val).abs() < 1e-10 {
547 continue; }
549
550 let n_left = i;
551 let n_right = n_samples - i;
552
553 if n_left < config.min_samples_leaf || n_right < config.min_samples_leaf {
554 continue;
555 }
556
557 let left_sum: f64 = feature_target_pairs[..i]
559 .iter()
560 .map(|(_, target)| target)
561 .sum();
562 let left_sum_sq: f64 = feature_target_pairs[..i]
563 .iter()
564 .map(|(_, target)| target * target)
565 .sum();
566 let left_variance = (left_sum_sq / n_left as f64) - (left_sum / n_left as f64).powi(2);
567
568 let right_sum = total_sum - left_sum;
569 let right_sum_sq = total_sum_sq - left_sum_sq;
570 let right_variance =
571 (right_sum_sq / n_right as f64) - (right_sum / n_right as f64).powi(2);
572
573 let weighted_variance = (n_left as f64 / n_samples as f64) * left_variance
575 + (n_right as f64 / n_samples as f64) * right_variance;
576 let impurity_reduction = total_variance - weighted_variance;
577
578 if impurity_reduction > best_impurity_reduction {
579 best_impurity_reduction = impurity_reduction;
580 best_threshold = (current_val + next_val) / 2.0;
581 best_n_left = n_left;
582 best_n_right = n_right;
583 }
584 }
585
586 if best_impurity_reduction > config.min_impurity_decrease {
587 Ok(Some(FeatureSplit::new(
588 feature_idx,
589 best_threshold,
590 best_impurity_reduction,
591 best_n_left,
592 best_n_right,
593 )))
594 } else {
595 Ok(None)
596 }
597 }
598
599 fn evaluate_classification_split(
601 x: &Array2<f64>,
602 y: &Array1<i32>,
603 sample_indices: &[usize],
604 feature_idx: usize,
605 n_classes: usize,
606 config: &ParallelFeatureConfig,
607 ) -> Result<Option<FeatureSplit>> {
608 let n_samples = sample_indices.len();
609
610 if n_samples < config.min_samples_split {
611 return Ok(None);
612 }
613
614 let mut feature_target_pairs: Vec<(f64, i32)> = sample_indices
616 .iter()
617 .map(|&idx| (x[[idx, feature_idx]], y[idx]))
618 .collect();
619
620 feature_target_pairs.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
622
623 let mut best_threshold = 0.0;
624 let mut best_information_gain = f64::NEG_INFINITY;
625 let mut best_n_left = 0;
626 let mut best_n_right = 0;
627
628 let mut class_counts = vec![0; n_classes];
630 for (_, class) in &feature_target_pairs {
631 if *class >= 0 && (*class as usize) < n_classes {
632 class_counts[*class as usize] += 1;
633 }
634 }
635
636 let initial_entropy = Self::calculate_entropy(&class_counts, n_samples);
637
638 for i in 1..n_samples {
640 let current_val = feature_target_pairs[i - 1].0;
641 let next_val = feature_target_pairs[i].0;
642
643 if (next_val - current_val).abs() < 1e-10 {
644 continue;
645 }
646
647 let n_left = i;
648 let n_right = n_samples - i;
649
650 if n_left < config.min_samples_leaf || n_right < config.min_samples_leaf {
651 continue;
652 }
653
654 let mut left_counts = vec![0; n_classes];
656 let mut right_counts = vec![0; n_classes];
657
658 for j in 0..i {
659 let class = feature_target_pairs[j].1;
660 if class >= 0 && (class as usize) < n_classes {
661 left_counts[class as usize] += 1;
662 }
663 }
664
665 for j in i..n_samples {
666 let class = feature_target_pairs[j].1;
667 if class >= 0 && (class as usize) < n_classes {
668 right_counts[class as usize] += 1;
669 }
670 }
671
672 let left_entropy = Self::calculate_entropy(&left_counts, n_left);
674 let right_entropy = Self::calculate_entropy(&right_counts, n_right);
675
676 let weighted_entropy = (n_left as f64 / n_samples as f64) * left_entropy
678 + (n_right as f64 / n_samples as f64) * right_entropy;
679
680 let information_gain = initial_entropy - weighted_entropy;
681
682 if information_gain > best_information_gain {
683 best_information_gain = information_gain;
684 best_threshold = (current_val + next_val) / 2.0;
685 best_n_left = n_left;
686 best_n_right = n_right;
687 }
688 }
689
690 if best_information_gain > config.min_impurity_decrease {
691 Ok(Some(
692 FeatureSplit::new(
693 feature_idx,
694 best_threshold,
695 best_information_gain, best_n_left,
697 best_n_right,
698 )
699 .with_information_gain(best_information_gain),
700 ))
701 } else {
702 Ok(None)
703 }
704 }
705
706 fn calculate_entropy(class_counts: &[usize], total_samples: usize) -> f64 {
708 if total_samples == 0 {
709 return 0.0;
710 }
711
712 let mut entropy = 0.0;
713 for &count in class_counts {
714 if count > 0 {
715 let probability = count as f64 / total_samples as f64;
716 entropy -= probability * probability.log2();
717 }
718 }
719
720 entropy
721 }
722
723 fn compute_feature_stats(
725 x: &Array2<f64>,
726 sample_indices: &[usize],
727 feature_idx: usize,
728 ) -> FeatureStats {
729 let values: Vec<f64> = sample_indices
730 .iter()
731 .map(|&idx| x[[idx, feature_idx]])
732 .collect();
733
734 if values.is_empty() {
735 return FeatureStats::default();
736 }
737
738 let mut sorted_values = values.clone();
739 sorted_values.sort_by(|a, b| a.partial_cmp(b).unwrap());
740
741 let n = values.len();
742 let sum: f64 = values.iter().sum();
743 let mean = sum / n as f64;
744
745 let variance = values.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / n as f64;
746
747 let std_dev = variance.sqrt();
748 let min = sorted_values[0];
749 let max = sorted_values[n - 1];
750 let median = if n % 2 == 0 {
751 (sorted_values[n / 2 - 1] + sorted_values[n / 2]) / 2.0
752 } else {
753 sorted_values[n / 2]
754 };
755
756 let mut unique_values = sorted_values.clone();
758 unique_values.dedup_by(|a, b| (*a - *b).abs() < 1e-10);
759 let n_unique = unique_values.len();
760
761 FeatureStats {
762 feature_idx,
763 mean,
764 std_dev,
765 min,
766 max,
767 median,
768 n_unique,
769 n_samples: n,
770 }
771 }
772}
773
774pub trait ParallelTreeExt<T> {
776 fn maybe_parallel_process<U, F>(self, f: F) -> Vec<U>
778 where
779 T: Send + Sync,
780 U: Send,
781 F: Fn(T) -> U + Sync + Send;
782}
783
784impl<I, T> ParallelTreeExt<T> for I
785where
786 I: IntoIterator<Item = T>,
787 I::IntoIter: Send,
788 T: Send + Sync,
789{
790 #[cfg(feature = "parallel")]
791 fn maybe_parallel_process<U, F>(self, f: F) -> Vec<U>
792 where
793 T: Send + Sync,
794 U: Send,
795 F: Fn(T) -> U + Sync + Send,
796 {
797 self.into_iter()
798 .collect::<Vec<_>>()
799 .into_par_iter()
800 .map(f)
801 .collect()
802 }
803
804 #[cfg(not(feature = "parallel"))]
805 fn maybe_parallel_process<U, F>(self, f: F) -> Vec<U>
806 where
807 F: Fn(T) -> U,
808 {
809 self.into_iter().map(f).collect()
810 }
811}
812
813#[derive(Debug, Clone)]
815pub struct FeatureSplit {
816 pub feature_idx: usize,
818 pub threshold: f64,
820 pub impurity_reduction: f64,
822 pub n_left: usize,
824 pub n_right: usize,
826 pub information_gain: Option<f64>,
828}
829
830impl FeatureSplit {
831 pub fn new(
833 feature_idx: usize,
834 threshold: f64,
835 impurity_reduction: f64,
836 n_left: usize,
837 n_right: usize,
838 ) -> Self {
839 Self {
840 feature_idx,
841 threshold,
842 impurity_reduction,
843 n_left,
844 n_right,
845 information_gain: None,
846 }
847 }
848
849 pub fn with_information_gain(mut self, gain: f64) -> Self {
851 self.information_gain = Some(gain);
852 self
853 }
854
855 pub fn is_valid(&self) -> bool {
857 self.n_left > 0 && self.n_right > 0
858 }
859
860 pub fn quality_score(&self) -> f64 {
862 self.information_gain.unwrap_or(self.impurity_reduction)
863 }
864}
865
866#[derive(Debug, Clone)]
868pub struct ParallelFeatureConfig {
869 pub min_samples_split: usize,
871 pub min_samples_leaf: usize,
873 pub min_impurity_decrease: f64,
875 pub max_features: Option<usize>,
877 pub random_state: Option<u64>,
879}
880
881impl Default for ParallelFeatureConfig {
882 fn default() -> Self {
883 Self {
884 min_samples_split: 2,
885 min_samples_leaf: 1,
886 min_impurity_decrease: 0.0,
887 max_features: None,
888 random_state: None,
889 }
890 }
891}
892
893#[derive(Debug, Clone)]
895pub struct FeatureStats {
896 pub feature_idx: usize,
898 pub mean: f64,
900 pub std_dev: f64,
902 pub min: f64,
904 pub max: f64,
906 pub median: f64,
908 pub n_unique: usize,
910 pub n_samples: usize,
912}
913
914impl Default for FeatureStats {
915 fn default() -> Self {
916 Self {
917 feature_idx: 0,
918 mean: 0.0,
919 std_dev: 0.0,
920 min: 0.0,
921 max: 0.0,
922 median: 0.0,
923 n_unique: 0,
924 n_samples: 0,
925 }
926 }
927}
928
929impl FeatureStats {
930 pub fn is_informative(&self) -> bool {
932 self.n_unique > 1 && self.std_dev > 1e-10
933 }
934
935 pub fn range(&self) -> f64 {
937 self.max - self.min
938 }
939
940 pub fn coefficient_of_variation(&self) -> f64 {
942 if self.mean.abs() > 1e-10 {
943 self.std_dev / self.mean.abs()
944 } else {
945 f64::INFINITY
946 }
947 }
948}