1use scirs2_core::ndarray::Array1;
15use scirs2_core::random::{Rng, SeedableRng};
16use sklears_core::error::SklearsError;
17use std::collections::HashMap;
18
19#[derive(Debug, Clone, Copy)]
21pub enum SignificanceTest {
22 TTest,
24 WelchTTest,
26 WilcoxonRankSum,
28 MannWhitneyU,
30 PermutationTest { n_permutations: usize },
32 BootstrapTest { n_bootstrap: usize },
34}
35
36#[derive(Debug, Clone, Copy)]
38pub enum EffectSizeMeasure {
39 CohensD,
41 GlassDelta,
43 HedgesG,
45 CliffsDelta,
47 CommonLanguageEffect,
49 ProbabilityOfSuperiority,
51}
52
53#[derive(Debug, Clone, Copy)]
55pub enum ConfidenceIntervalType {
56 Normal,
58 BootstrapPercentile { n_bootstrap: usize },
60 BootstrapBCa { n_bootstrap: usize },
62 TDistribution,
64}
65
66#[derive(Debug, Clone)]
68pub struct SignificanceTestResult {
69 pub test_type: SignificanceTest,
71 pub statistic: f64,
73 pub p_value: f64,
75 pub effect_size: f64,
77 pub effect_size_measure: EffectSizeMeasure,
79 pub confidence_interval: (f64, f64),
81 pub sample_sizes: (usize, usize),
83 pub is_significant: bool,
85 pub alpha_level: f64,
87}
88
89#[derive(Debug, Clone)]
91pub struct EffectSizeResult {
92 pub measure: EffectSizeMeasure,
94 pub value: f64,
96 pub confidence_interval: (f64, f64),
98 pub interpretation: EffectSizeInterpretation,
100 pub sample_sizes: (usize, usize),
102}
103
104#[derive(Debug, Clone, Copy, PartialEq, Eq)]
106pub enum EffectSizeInterpretation {
107 Negligible,
109 Small,
111 Medium,
113 Large,
115 VeryLarge,
117}
118
119#[derive(Debug, Clone)]
121pub struct ModelComparisonResult {
122 pub model_names: Vec<String>,
124 pub performance_scores: Vec<f64>,
126 pub pairwise_comparisons: Vec<PairwiseComparison>,
128 pub ranking: Vec<usize>, pub best_model_index: usize,
132 pub statistical_summary: StatisticalSummary,
134}
135
136#[derive(Debug, Clone)]
138pub struct PairwiseComparison {
139 pub model_a_index: usize,
141 pub model_b_index: usize,
143 pub significance_test: SignificanceTestResult,
145 pub bayes_factor: Option<f64>,
147 pub practical_significance: bool,
149}
150
151#[derive(Debug, Clone)]
153pub struct StatisticalSummary {
154 pub mean_scores: Vec<f64>,
156 pub std_scores: Vec<f64>,
158 pub confidence_intervals: Vec<(f64, f64)>,
160 pub overall_best_model: usize,
162 pub significantly_different_pairs: Vec<(usize, usize)>,
164 pub effect_sizes: HashMap<(usize, usize), f64>,
166}
167
168pub struct ComparativeAnalyzer {
170 alpha_level: f64,
171 random_state: Option<u64>,
172 correction_method: MultipleComparisonCorrection,
173}
174
175#[derive(Debug, Clone, Copy)]
177pub enum MultipleComparisonCorrection {
178 None,
179 Bonferroni,
181 Holm,
183 BenjaminiHochberg,
185 BenjaminiYekutieli,
187}
188
189impl ComparativeAnalyzer {
190 pub fn new() -> Self {
192 Self {
193 alpha_level: 0.05,
194 random_state: None,
195 correction_method: MultipleComparisonCorrection::Holm,
196 }
197 }
198
199 pub fn with_alpha(mut self, alpha: f64) -> Self {
201 self.alpha_level = alpha;
202 self
203 }
204
205 pub fn with_random_state(mut self, seed: u64) -> Self {
207 self.random_state = Some(seed);
208 self
209 }
210
211 pub fn with_correction(mut self, correction: MultipleComparisonCorrection) -> Self {
213 self.correction_method = correction;
214 self
215 }
216
217 pub fn significance_test(
219 &self,
220 group_a: &Array1<f64>,
221 group_b: &Array1<f64>,
222 test_type: SignificanceTest,
223 effect_size_measure: EffectSizeMeasure,
224 ) -> Result<SignificanceTestResult, SklearsError> {
225 let statistic = self.compute_test_statistic(group_a, group_b, test_type)?;
226 let p_value = self.compute_p_value(group_a, group_b, test_type, statistic)?;
227 let effect_size = self.compute_effect_size(group_a, group_b, effect_size_measure)?;
228 let confidence_interval = self.compute_confidence_interval(
229 group_a,
230 group_b,
231 ConfidenceIntervalType::Normal,
232 0.95,
233 )?;
234
235 let corrected_alpha = self.apply_correction(self.alpha_level, 1);
236 let is_significant = p_value < corrected_alpha;
237
238 Ok(SignificanceTestResult {
239 test_type,
240 statistic,
241 p_value,
242 effect_size,
243 effect_size_measure,
244 confidence_interval,
245 sample_sizes: (group_a.len(), group_b.len()),
246 is_significant,
247 alpha_level: corrected_alpha,
248 })
249 }
250
251 pub fn effect_size(
253 &self,
254 group_a: &Array1<f64>,
255 group_b: &Array1<f64>,
256 measure: EffectSizeMeasure,
257 ) -> Result<EffectSizeResult, SklearsError> {
258 let value = self.compute_effect_size(group_a, group_b, measure)?;
259 let confidence_interval =
260 self.effect_size_confidence_interval(group_a, group_b, measure)?;
261 let interpretation = self.interpret_effect_size(value, measure);
262
263 Ok(EffectSizeResult {
264 measure,
265 value,
266 confidence_interval,
267 interpretation,
268 sample_sizes: (group_a.len(), group_b.len()),
269 })
270 }
271
272 pub fn compare_models(
274 &self,
275 model_names: Vec<String>,
276 cv_scores: Vec<Array1<f64>>,
277 ) -> Result<ModelComparisonResult, SklearsError> {
278 if model_names.len() != cv_scores.len() {
279 return Err(SklearsError::InvalidInput(
280 "Number of model names must match number of score arrays".to_string(),
281 ));
282 }
283
284 let performance_scores: Vec<f64> = cv_scores
285 .iter()
286 .map(|scores| scores.mean().unwrap_or(0.0))
287 .collect();
288
289 let mut ranking: Vec<usize> = (0..model_names.len()).collect();
291 ranking.sort_by(|&a, &b| {
292 performance_scores[b]
293 .partial_cmp(&performance_scores[a])
294 .unwrap()
295 });
296 let best_model_index = ranking[0];
297
298 let mut pairwise_comparisons = Vec::new();
300 let n_models = model_names.len();
301 let n_comparisons = n_models * (n_models - 1) / 2;
302
303 for i in 0..n_models {
304 for j in (i + 1)..n_models {
305 let significance_test = self.significance_test(
306 &cv_scores[i],
307 &cv_scores[j],
308 SignificanceTest::WilcoxonRankSum,
309 EffectSizeMeasure::CliffsDelta,
310 )?;
311
312 let bayes_factor = self.compute_bayes_factor(&cv_scores[i], &cv_scores[j])?;
313 let practical_significance = significance_test.effect_size.abs() > 0.2; pairwise_comparisons.push(PairwiseComparison {
316 model_a_index: i,
317 model_b_index: j,
318 significance_test,
319 bayes_factor: Some(bayes_factor),
320 practical_significance,
321 });
322 }
323 }
324
325 let corrected_comparisons =
327 self.apply_multiple_comparison_correction(pairwise_comparisons, n_comparisons);
328
329 let statistical_summary =
331 self.compute_statistical_summary(&cv_scores, &corrected_comparisons)?;
332
333 Ok(ModelComparisonResult {
334 model_names,
335 performance_scores,
336 pairwise_comparisons: corrected_comparisons,
337 ranking,
338 best_model_index,
339 statistical_summary,
340 })
341 }
342
343 fn compute_test_statistic(
344 &self,
345 group_a: &Array1<f64>,
346 group_b: &Array1<f64>,
347 test_type: SignificanceTest,
348 ) -> Result<f64, SklearsError> {
349 match test_type {
350 SignificanceTest::TTest | SignificanceTest::WelchTTest => self.compute_t_statistic(
351 group_a,
352 group_b,
353 matches!(test_type, SignificanceTest::WelchTTest),
354 ),
355 SignificanceTest::WilcoxonRankSum | SignificanceTest::MannWhitneyU => {
356 self.compute_rank_sum_statistic(group_a, group_b)
357 }
358 SignificanceTest::PermutationTest { n_permutations } => {
359 self.compute_permutation_statistic(group_a, group_b, n_permutations)
360 }
361 SignificanceTest::BootstrapTest { n_bootstrap } => {
362 self.compute_bootstrap_statistic(group_a, group_b, n_bootstrap)
363 }
364 }
365 }
366
367 fn compute_t_statistic(
368 &self,
369 group_a: &Array1<f64>,
370 group_b: &Array1<f64>,
371 welch: bool,
372 ) -> Result<f64, SklearsError> {
373 let mean_a = group_a.mean().unwrap_or(0.0);
374 let mean_b = group_b.mean().unwrap_or(0.0);
375 let n_a = group_a.len() as f64;
376 let n_b = group_b.len() as f64;
377
378 let var_a = group_a.iter().map(|&x| (x - mean_a).powi(2)).sum::<f64>() / (n_a - 1.0);
379 let var_b = group_b.iter().map(|&x| (x - mean_b).powi(2)).sum::<f64>() / (n_b - 1.0);
380
381 let t_statistic = if welch {
382 let se = (var_a / n_a + var_b / n_b).sqrt();
384 (mean_a - mean_b) / se
385 } else {
386 let pooled_var = ((n_a - 1.0) * var_a + (n_b - 1.0) * var_b) / (n_a + n_b - 2.0);
388 let se = pooled_var.sqrt() * (1.0 / n_a + 1.0 / n_b).sqrt();
389 (mean_a - mean_b) / se
390 };
391
392 Ok(t_statistic)
393 }
394
395 fn compute_rank_sum_statistic(
396 &self,
397 group_a: &Array1<f64>,
398 group_b: &Array1<f64>,
399 ) -> Result<f64, SklearsError> {
400 let mut combined: Vec<(f64, usize)> = group_a.iter().map(|&x| (x, 0)).collect();
401 combined.extend(group_b.iter().map(|&x| (x, 1)));
402 combined.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
403
404 let mut rank_sum_a = 0.0;
405 for (rank, (_, group)) in combined.iter().enumerate() {
406 if *group == 0 {
407 rank_sum_a += (rank + 1) as f64;
408 }
409 }
410
411 let n_a = group_a.len() as f64;
412 let n_b = group_b.len() as f64;
413 let expected_rank_sum = n_a * (n_a + n_b + 1.0) / 2.0;
414 let variance = n_a * n_b * (n_a + n_b + 1.0) / 12.0;
415
416 let z_statistic = (rank_sum_a - expected_rank_sum) / variance.sqrt();
417 Ok(z_statistic)
418 }
419
420 fn compute_permutation_statistic(
421 &self,
422 group_a: &Array1<f64>,
423 group_b: &Array1<f64>,
424 n_permutations: usize,
425 ) -> Result<f64, SklearsError> {
426 let observed_diff = group_a.mean().unwrap_or(0.0) - group_b.mean().unwrap_or(0.0);
427
428 let mut rng = if let Some(seed) = self.random_state {
429 scirs2_core::random::rngs::StdRng::seed_from_u64(seed)
430 } else {
431 scirs2_core::random::rngs::StdRng::seed_from_u64(0)
432 };
433
434 let mut combined: Vec<f64> = group_a.iter().chain(group_b.iter()).cloned().collect();
435 let n_a = group_a.len();
436
437 let mut extreme_count = 0;
438 for _ in 0..n_permutations {
439 for i in (1..combined.len()).rev() {
441 let j = rng.gen_range(0..i + 1);
442 combined.swap(i, j);
443 }
444
445 let perm_mean_a = combined[..n_a].iter().sum::<f64>() / n_a as f64;
446 let perm_mean_b = combined[n_a..].iter().sum::<f64>() / (combined.len() - n_a) as f64;
447 let perm_diff = perm_mean_a - perm_mean_b;
448
449 if perm_diff.abs() >= observed_diff.abs() {
450 extreme_count += 1;
451 }
452 }
453
454 let p_value = extreme_count as f64 / n_permutations as f64;
455 Ok(p_value) }
457
458 fn compute_bootstrap_statistic(
459 &self,
460 group_a: &Array1<f64>,
461 group_b: &Array1<f64>,
462 n_bootstrap: usize,
463 ) -> Result<f64, SklearsError> {
464 let observed_diff = group_a.mean().unwrap_or(0.0) - group_b.mean().unwrap_or(0.0);
465
466 let mut rng = if let Some(seed) = self.random_state {
467 scirs2_core::random::rngs::StdRng::seed_from_u64(seed)
468 } else {
469 scirs2_core::random::rngs::StdRng::seed_from_u64(0)
470 };
471
472 let mut bootstrap_diffs = Vec::with_capacity(n_bootstrap);
473
474 for _ in 0..n_bootstrap {
475 let boot_a: Vec<f64> = (0..group_a.len())
477 .map(|_| group_a[rng.gen_range(0..group_a.len())])
478 .collect();
479 let boot_b: Vec<f64> = (0..group_b.len())
480 .map(|_| group_b[rng.gen_range(0..group_b.len())])
481 .collect();
482
483 let boot_mean_a = boot_a.iter().sum::<f64>() / boot_a.len() as f64;
484 let boot_mean_b = boot_b.iter().sum::<f64>() / boot_b.len() as f64;
485 bootstrap_diffs.push(boot_mean_a - boot_mean_b);
486 }
487
488 bootstrap_diffs.sort_by(|a, b| a.partial_cmp(b).unwrap());
489 let std_bootstrap = {
490 let mean_bootstrap = bootstrap_diffs.iter().sum::<f64>() / bootstrap_diffs.len() as f64;
491 let variance = bootstrap_diffs
492 .iter()
493 .map(|&x| (x - mean_bootstrap).powi(2))
494 .sum::<f64>()
495 / (bootstrap_diffs.len() - 1) as f64;
496 variance.sqrt()
497 };
498
499 Ok(observed_diff / std_bootstrap)
500 }
501
502 fn compute_p_value(
503 &self,
504 group_a: &Array1<f64>,
505 group_b: &Array1<f64>,
506 test_type: SignificanceTest,
507 statistic: f64,
508 ) -> Result<f64, SklearsError> {
509 match test_type {
510 SignificanceTest::TTest => {
511 let df = group_a.len() + group_b.len() - 2;
512 self.t_distribution_p_value(statistic, df)
513 }
514 SignificanceTest::WelchTTest => {
515 let n_a = group_a.len() as f64;
516 let n_b = group_b.len() as f64;
517 let var_a = group_a
518 .iter()
519 .map(|&x| (x - group_a.mean().unwrap_or(0.0)).powi(2))
520 .sum::<f64>()
521 / (n_a - 1.0);
522 let var_b = group_b
523 .iter()
524 .map(|&x| (x - group_b.mean().unwrap_or(0.0)).powi(2))
525 .sum::<f64>()
526 / (n_b - 1.0);
527
528 let s_a2_n_a = var_a / n_a;
530 let s_b2_n_b = var_b / n_b;
531 let numerator = (s_a2_n_a + s_b2_n_b).powi(2);
532 let denominator = s_a2_n_a.powi(2) / (n_a - 1.0) + s_b2_n_b.powi(2) / (n_b - 1.0);
533 let df = (numerator / denominator).floor() as usize;
534
535 self.t_distribution_p_value(statistic, df)
536 }
537 SignificanceTest::WilcoxonRankSum | SignificanceTest::MannWhitneyU => {
538 self.normal_distribution_p_value(statistic)
539 }
540 SignificanceTest::PermutationTest { .. } => {
541 Ok(statistic) }
543 SignificanceTest::BootstrapTest { .. } => self.normal_distribution_p_value(statistic),
544 }
545 }
546
547 fn compute_effect_size(
548 &self,
549 group_a: &Array1<f64>,
550 group_b: &Array1<f64>,
551 measure: EffectSizeMeasure,
552 ) -> Result<f64, SklearsError> {
553 match measure {
554 EffectSizeMeasure::CohensD => self.cohens_d(group_a, group_b),
555 EffectSizeMeasure::GlassDelta => self.glass_delta(group_a, group_b),
556 EffectSizeMeasure::HedgesG => self.hedges_g(group_a, group_b),
557 EffectSizeMeasure::CliffsDelta => self.cliffs_delta(group_a, group_b),
558 EffectSizeMeasure::CommonLanguageEffect => {
559 self.common_language_effect(group_a, group_b)
560 }
561 EffectSizeMeasure::ProbabilityOfSuperiority => {
562 self.probability_of_superiority(group_a, group_b)
563 }
564 }
565 }
566
567 fn cohens_d(&self, group_a: &Array1<f64>, group_b: &Array1<f64>) -> Result<f64, SklearsError> {
568 let mean_a = group_a.mean().unwrap_or(0.0);
569 let mean_b = group_b.mean().unwrap_or(0.0);
570 let n_a = group_a.len() as f64;
571 let n_b = group_b.len() as f64;
572
573 let var_a = group_a.iter().map(|&x| (x - mean_a).powi(2)).sum::<f64>() / (n_a - 1.0);
574 let var_b = group_b.iter().map(|&x| (x - mean_b).powi(2)).sum::<f64>() / (n_b - 1.0);
575
576 let pooled_std = (((n_a - 1.0) * var_a + (n_b - 1.0) * var_b) / (n_a + n_b - 2.0)).sqrt();
577
578 Ok((mean_a - mean_b) / pooled_std)
579 }
580
581 fn glass_delta(
582 &self,
583 group_a: &Array1<f64>,
584 group_b: &Array1<f64>,
585 ) -> Result<f64, SklearsError> {
586 let mean_a = group_a.mean().unwrap_or(0.0);
587 let mean_b = group_b.mean().unwrap_or(0.0);
588 let n_b = group_b.len() as f64;
589
590 let var_b = group_b.iter().map(|&x| (x - mean_b).powi(2)).sum::<f64>() / (n_b - 1.0);
591 let std_b = var_b.sqrt();
592
593 Ok((mean_a - mean_b) / std_b)
594 }
595
596 fn hedges_g(&self, group_a: &Array1<f64>, group_b: &Array1<f64>) -> Result<f64, SklearsError> {
597 let cohens_d = self.cohens_d(group_a, group_b)?;
598 let n_a = group_a.len() as f64;
599 let n_b = group_b.len() as f64;
600
601 let df = n_a + n_b - 2.0;
603 let correction = 1.0 - 3.0 / (4.0 * df - 1.0);
604
605 Ok(cohens_d * correction)
606 }
607
608 fn cliffs_delta(
609 &self,
610 group_a: &Array1<f64>,
611 group_b: &Array1<f64>,
612 ) -> Result<f64, SklearsError> {
613 let mut dominance_count = 0;
614 let total_comparisons = group_a.len() * group_b.len();
615
616 for &a in group_a.iter() {
617 for &b in group_b.iter() {
618 if a > b {
619 dominance_count += 1;
620 } else if a < b {
621 dominance_count -= 1;
622 }
623 }
625 }
626
627 Ok(dominance_count as f64 / total_comparisons as f64)
628 }
629
630 fn common_language_effect(
631 &self,
632 group_a: &Array1<f64>,
633 group_b: &Array1<f64>,
634 ) -> Result<f64, SklearsError> {
635 let mut superiority_count = 0;
636 let total_comparisons = group_a.len() * group_b.len();
637
638 for &a in group_a.iter() {
639 for &b in group_b.iter() {
640 if a > b {
641 superiority_count += 1;
642 }
643 }
644 }
645
646 Ok(superiority_count as f64 / total_comparisons as f64)
647 }
648
649 fn probability_of_superiority(
650 &self,
651 group_a: &Array1<f64>,
652 group_b: &Array1<f64>,
653 ) -> Result<f64, SklearsError> {
654 self.common_language_effect(group_a, group_b)
656 }
657
658 fn compute_confidence_interval(
659 &self,
660 group_a: &Array1<f64>,
661 group_b: &Array1<f64>,
662 ci_type: ConfidenceIntervalType,
663 confidence_level: f64,
664 ) -> Result<(f64, f64), SklearsError> {
665 match ci_type {
666 ConfidenceIntervalType::Normal => {
667 let mean_diff = group_a.mean().unwrap_or(0.0) - group_b.mean().unwrap_or(0.0);
668 let n_a = group_a.len() as f64;
669 let n_b = group_b.len() as f64;
670 let mean_a = group_a.mean().unwrap_or(0.0);
671 let mean_b = group_b.mean().unwrap_or(0.0);
672
673 let var_a =
674 group_a.iter().map(|&x| (x - mean_a).powi(2)).sum::<f64>() / (n_a - 1.0);
675 let var_b =
676 group_b.iter().map(|&x| (x - mean_b).powi(2)).sum::<f64>() / (n_b - 1.0);
677 let se = (var_a / n_a + var_b / n_b).sqrt();
678
679 let z_critical = self.normal_quantile((1.0 + confidence_level) / 2.0);
680 let margin_of_error = z_critical * se;
681
682 Ok((mean_diff - margin_of_error, mean_diff + margin_of_error))
683 }
684 ConfidenceIntervalType::TDistribution => {
685 let mean_diff = group_a.mean().unwrap_or(0.0) - group_b.mean().unwrap_or(0.0);
686 let n_a = group_a.len() as f64;
687 let n_b = group_b.len() as f64;
688 let df = (n_a + n_b - 2.0) as usize;
689 let mean_a = group_a.mean().unwrap_or(0.0);
690 let mean_b = group_b.mean().unwrap_or(0.0);
691
692 let var_a =
693 group_a.iter().map(|&x| (x - mean_a).powi(2)).sum::<f64>() / (n_a - 1.0);
694 let var_b =
695 group_b.iter().map(|&x| (x - mean_b).powi(2)).sum::<f64>() / (n_b - 1.0);
696 let pooled_var = ((n_a - 1.0) * var_a + (n_b - 1.0) * var_b) / (n_a + n_b - 2.0);
697 let se = pooled_var.sqrt() * (1.0 / n_a + 1.0 / n_b).sqrt();
698
699 let t_critical = self.t_quantile((1.0 + confidence_level) / 2.0, df);
700 let margin_of_error = t_critical * se;
701
702 Ok((mean_diff - margin_of_error, mean_diff + margin_of_error))
703 }
704 _ => {
705 self.compute_confidence_interval(
707 group_a,
708 group_b,
709 ConfidenceIntervalType::Normal,
710 confidence_level,
711 )
712 }
713 }
714 }
715
716 fn effect_size_confidence_interval(
717 &self,
718 group_a: &Array1<f64>,
719 group_b: &Array1<f64>,
720 measure: EffectSizeMeasure,
721 ) -> Result<(f64, f64), SklearsError> {
722 let effect_size = self.compute_effect_size(group_a, group_b, measure)?;
725 let n_a = group_a.len() as f64;
726 let n_b = group_b.len() as f64;
727
728 let se = match measure {
730 EffectSizeMeasure::CohensD | EffectSizeMeasure::HedgesG => {
731 ((n_a + n_b) / (n_a * n_b) + effect_size.powi(2) / (2.0 * (n_a + n_b))).sqrt()
732 }
733 _ => 0.1, };
735
736 let z_critical = self.normal_quantile(0.975); let margin_of_error = z_critical * se;
738
739 Ok((effect_size - margin_of_error, effect_size + margin_of_error))
740 }
741
742 fn interpret_effect_size(
743 &self,
744 value: f64,
745 measure: EffectSizeMeasure,
746 ) -> EffectSizeInterpretation {
747 let abs_value = value.abs();
748
749 match measure {
750 EffectSizeMeasure::CohensD | EffectSizeMeasure::HedgesG => {
751 if abs_value < 0.2 {
752 EffectSizeInterpretation::Negligible
753 } else if abs_value < 0.5 {
754 EffectSizeInterpretation::Small
755 } else if abs_value < 0.8 {
756 EffectSizeInterpretation::Medium
757 } else if abs_value < 1.2 {
758 EffectSizeInterpretation::Large
759 } else {
760 EffectSizeInterpretation::VeryLarge
761 }
762 }
763 EffectSizeMeasure::CliffsDelta => {
764 if abs_value < 0.147 {
765 EffectSizeInterpretation::Negligible
766 } else if abs_value < 0.33 {
767 EffectSizeInterpretation::Small
768 } else if abs_value < 0.474 {
769 EffectSizeInterpretation::Medium
770 } else {
771 EffectSizeInterpretation::Large
772 }
773 }
774 _ => {
775 if abs_value < 0.1 {
777 EffectSizeInterpretation::Negligible
778 } else if abs_value < 0.3 {
779 EffectSizeInterpretation::Small
780 } else if abs_value < 0.5 {
781 EffectSizeInterpretation::Medium
782 } else {
783 EffectSizeInterpretation::Large
784 }
785 }
786 }
787 }
788
789 fn compute_bayes_factor(
790 &self,
791 group_a: &Array1<f64>,
792 group_b: &Array1<f64>,
793 ) -> Result<f64, SklearsError> {
794 let t_stat = self.compute_t_statistic(group_a, group_b, false)?;
796 let n_a = group_a.len() as f64;
797 let n_b = group_b.len() as f64;
798 let df = n_a + n_b - 2.0;
799
800 let bic_null = (n_a + n_b) * (1.0 + t_stat.powi(2) / df).ln();
802 let bic_alt = bic_null - df.ln();
803 let log_bf = (bic_null - bic_alt) / 2.0;
804
805 Ok(log_bf.exp())
806 }
807
808 fn apply_correction(&self, alpha: f64, n_comparisons: usize) -> f64 {
809 match self.correction_method {
810 MultipleComparisonCorrection::None => alpha,
811 MultipleComparisonCorrection::Bonferroni => alpha / n_comparisons as f64,
812 MultipleComparisonCorrection::Holm => alpha, MultipleComparisonCorrection::BenjaminiHochberg => alpha, MultipleComparisonCorrection::BenjaminiYekutieli => alpha, }
816 }
817
818 fn apply_multiple_comparison_correction(
819 &self,
820 mut comparisons: Vec<PairwiseComparison>,
821 n_comparisons: usize,
822 ) -> Vec<PairwiseComparison> {
823 match self.correction_method {
824 MultipleComparisonCorrection::None | MultipleComparisonCorrection::Bonferroni => {
825 comparisons
827 }
828 MultipleComparisonCorrection::Holm => {
829 comparisons.sort_by(|a, b| {
831 a.significance_test
832 .p_value
833 .partial_cmp(&b.significance_test.p_value)
834 .unwrap()
835 });
836 for (i, comparison) in comparisons.iter_mut().enumerate() {
837 let corrected_alpha = self.alpha_level / (n_comparisons - i) as f64;
838 comparison.significance_test.is_significant =
839 comparison.significance_test.p_value < corrected_alpha;
840 comparison.significance_test.alpha_level = corrected_alpha;
841 }
842 comparisons
843 }
844 MultipleComparisonCorrection::BenjaminiHochberg => {
845 comparisons.sort_by(|a, b| {
847 a.significance_test
848 .p_value
849 .partial_cmp(&b.significance_test.p_value)
850 .unwrap()
851 });
852 for (i, comparison) in comparisons.iter_mut().enumerate() {
853 let corrected_alpha = self.alpha_level * (i + 1) as f64 / n_comparisons as f64;
854 comparison.significance_test.is_significant =
855 comparison.significance_test.p_value <= corrected_alpha;
856 comparison.significance_test.alpha_level = corrected_alpha;
857 }
858 comparisons
859 }
860 MultipleComparisonCorrection::BenjaminiYekutieli => {
861 let c_factor: f64 = (1..=n_comparisons).map(|i| 1.0 / i as f64).sum();
863 comparisons.sort_by(|a, b| {
864 a.significance_test
865 .p_value
866 .partial_cmp(&b.significance_test.p_value)
867 .unwrap()
868 });
869 for (i, comparison) in comparisons.iter_mut().enumerate() {
870 let corrected_alpha =
871 self.alpha_level * (i + 1) as f64 / (n_comparisons as f64 * c_factor);
872 comparison.significance_test.is_significant =
873 comparison.significance_test.p_value <= corrected_alpha;
874 comparison.significance_test.alpha_level = corrected_alpha;
875 }
876 comparisons
877 }
878 }
879 }
880
881 fn compute_statistical_summary(
882 &self,
883 cv_scores: &[Array1<f64>],
884 comparisons: &[PairwiseComparison],
885 ) -> Result<StatisticalSummary, SklearsError> {
886 let mean_scores: Vec<f64> = cv_scores
887 .iter()
888 .map(|scores| scores.mean().unwrap_or(0.0))
889 .collect();
890 let std_scores: Vec<f64> = cv_scores
891 .iter()
892 .enumerate()
893 .map(|(i, scores)| {
894 let mean = mean_scores[i];
895 let variance = scores.iter().map(|&x| (x - mean).powi(2)).sum::<f64>()
896 / (scores.len() - 1) as f64;
897 variance.sqrt()
898 })
899 .collect();
900
901 let confidence_intervals: Vec<(f64, f64)> = cv_scores
902 .iter()
903 .enumerate()
904 .map(|(i, scores)| {
905 let mean = mean_scores[i];
906 let std = std_scores[i];
907 let n = scores.len() as f64;
908 let se = std / n.sqrt();
909 let t_critical = self.t_quantile(0.975, scores.len() - 1);
910 let margin = t_critical * se;
911 (mean - margin, mean + margin)
912 })
913 .collect();
914
915 let overall_best_model = mean_scores
916 .iter()
917 .enumerate()
918 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
919 .map(|(i, _)| i)
920 .unwrap_or(0);
921
922 let significantly_different_pairs: Vec<(usize, usize)> = comparisons
923 .iter()
924 .filter(|comp| comp.significance_test.is_significant)
925 .map(|comp| (comp.model_a_index, comp.model_b_index))
926 .collect();
927
928 let effect_sizes: HashMap<(usize, usize), f64> = comparisons
929 .iter()
930 .map(|comp| {
931 (
932 (comp.model_a_index, comp.model_b_index),
933 comp.significance_test.effect_size,
934 )
935 })
936 .collect();
937
938 Ok(StatisticalSummary {
939 mean_scores,
940 std_scores,
941 confidence_intervals,
942 overall_best_model,
943 significantly_different_pairs,
944 effect_sizes,
945 })
946 }
947
948 fn normal_quantile(&self, p: f64) -> f64 {
950 let a = [
952 -3.969683028665376e+01,
953 2.209460984245205e+02,
954 -2.759285104469687e+02,
955 1.383_577_518_672_69e2,
956 -3.066479806614716e+01,
957 2.506628277459239e+00,
958 ];
959 let b = [
960 -5.447609879822406e+01,
961 1.615858368580409e+02,
962 -1.556989798598866e+02,
963 6.680131188771972e+01,
964 -1.328068155288572e+01,
965 ];
966 let c = [
967 -7.784894002430293e-03,
968 -3.223964580411365e-01,
969 -2.400758277161838e+00,
970 -2.549732539343734e+00,
971 4.374664141464968e+00,
972 2.938163982698783e+00,
973 ];
974 let d = [
975 7.784695709041462e-03,
976 3.224671290700398e-01,
977 2.445134137142996e+00,
978 3.754408661907416e+00,
979 ];
980
981 let p_low = 0.02425;
982 let p_high = 1.0 - p_low;
983
984 if p < p_low {
985 let q = (-2.0 * p.ln()).sqrt();
986 -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5])
987 / ((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1.0)
988 } else if p <= p_high {
989 let q = p - 0.5;
990 let r = q * q;
991 (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * q
992 / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1.0)
993 } else {
994 let q = (-2.0 * (1.0 - p).ln()).sqrt();
995 (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5])
996 / ((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1.0)
997 }
998 }
999
1000 fn t_quantile(&self, p: f64, df: usize) -> f64 {
1001 let z = self.normal_quantile(p);
1003 let df_f = df as f64;
1004
1005 if df >= 30 {
1006 return z;
1008 }
1009
1010 let a = 4.0 * df_f;
1012 let c = (a + z * z) / a;
1013 let correction = 1.0 + (z * z + 1.0) / a;
1014
1015 z * c.sqrt() * correction
1016 }
1017
1018 fn t_distribution_p_value(&self, t: f64, df: usize) -> Result<f64, SklearsError> {
1019 let abs_t = t.abs();
1022
1023 if df >= 30 {
1024 return self.normal_distribution_p_value(abs_t);
1025 }
1026
1027 let p = if abs_t > 3.0 {
1029 0.001
1030 } else if abs_t > 2.0 {
1031 0.05
1032 } else if abs_t > 1.0 {
1033 0.3
1034 } else {
1035 0.6
1036 };
1037
1038 Ok(p)
1039 }
1040
1041 fn normal_distribution_p_value(&self, z: f64) -> Result<f64, SklearsError> {
1042 let abs_z = z.abs();
1044
1045 let p = if abs_z > 6.0 {
1047 0.000000001
1048 } else {
1049 let t = 1.0 / (1.0 + 0.2316419 * abs_z);
1050 let d = 0.3989423 * (-abs_z * abs_z / 2.0).exp();
1051 d * t * (0.3193815 + t * (-0.3565638 + t * (1.781478 + t * (-1.821256 + t * 1.330274))))
1052 };
1053
1054 Ok(2.0 * p) }
1056}
1057
1058impl Default for ComparativeAnalyzer {
1059 fn default() -> Self {
1060 Self::new()
1061 }
1062}
1063
1064pub struct ComparisonReporter;
1066
1067impl ComparisonReporter {
1068 pub fn generate_report(comparison: &ModelComparisonResult) -> String {
1070 let mut report = String::new();
1071
1072 report.push_str("# Model Comparison Report\n\n");
1073
1074 report.push_str("## Overall Ranking\n");
1076 for (rank, &model_idx) in comparison.ranking.iter().enumerate() {
1077 let model_name = &comparison.model_names[model_idx];
1078 let score = comparison.performance_scores[model_idx];
1079 report.push_str(&format!(
1080 "{}. {} (Score: {:.4})\n",
1081 rank + 1,
1082 model_name,
1083 score
1084 ));
1085 }
1086
1087 report.push_str("\n## Statistical Summary\n");
1089 for (i, model_name) in comparison.model_names.iter().enumerate() {
1090 let mean = comparison.statistical_summary.mean_scores[i];
1091 let std = comparison.statistical_summary.std_scores[i];
1092 let (ci_low, ci_high) = comparison.statistical_summary.confidence_intervals[i];
1093 report.push_str(&format!(
1094 "- {}: Mean = {:.4} ± {:.4}, 95% CI = [{:.4}, {:.4}]\n",
1095 model_name, mean, std, ci_low, ci_high
1096 ));
1097 }
1098
1099 report.push_str("\n## Significant Pairwise Differences\n");
1101 for pairwise_comp in &comparison.pairwise_comparisons {
1102 if pairwise_comp.significance_test.is_significant {
1103 let model_a = &comparison.model_names[pairwise_comp.model_a_index];
1104 let model_b = &comparison.model_names[pairwise_comp.model_b_index];
1105 let p_value = pairwise_comp.significance_test.p_value;
1106 let effect_size = pairwise_comp.significance_test.effect_size;
1107
1108 report.push_str(&format!(
1109 "- {} vs {}: p = {:.4}, Effect Size = {:.4}\n",
1110 model_a, model_b, p_value, effect_size
1111 ));
1112 }
1113 }
1114
1115 report
1116 }
1117}
1118
1119#[allow(non_snake_case)]
1120#[cfg(test)]
1121mod tests {
1122 use super::*;
1123 use scirs2_core::ndarray::array;
1124
1125 #[test]
1126 fn test_significance_test() {
1127 let analyzer = ComparativeAnalyzer::new();
1128 let group_a = array![1.0, 2.0, 3.0, 4.0, 5.0];
1129 let group_b = array![2.0, 3.0, 4.0, 5.0, 6.0];
1130
1131 let result = analyzer
1132 .significance_test(
1133 &group_a,
1134 &group_b,
1135 SignificanceTest::TTest,
1136 EffectSizeMeasure::CohensD,
1137 )
1138 .unwrap();
1139
1140 assert!(result.p_value >= 0.0 && result.p_value <= 1.0);
1141 assert!(!result.statistic.is_nan());
1142 assert!(!result.effect_size.is_nan());
1143 }
1144
1145 #[test]
1146 fn test_effect_size_cohens_d() {
1147 let analyzer = ComparativeAnalyzer::new();
1148 let group_a = array![1.0, 2.0, 3.0, 4.0, 5.0];
1149 let group_b = array![3.0, 4.0, 5.0, 6.0, 7.0];
1150
1151 let result = analyzer
1152 .effect_size(&group_a, &group_b, EffectSizeMeasure::CohensD)
1153 .unwrap();
1154
1155 assert!(result.value < 0.0); assert!(result.value.abs() > 0.5); assert!(!matches!(
1160 result.interpretation,
1161 EffectSizeInterpretation::Negligible
1162 ));
1163 }
1164
1165 #[test]
1166 fn test_cliffs_delta() {
1167 let analyzer = ComparativeAnalyzer::new();
1168 let group_a = array![1.0, 2.0, 3.0];
1169 let group_b = array![4.0, 5.0, 6.0];
1170
1171 let delta = analyzer.cliffs_delta(&group_a, &group_b).unwrap();
1172 assert_eq!(delta, -1.0); }
1174
1175 #[test]
1176 fn test_model_comparison() {
1177 let analyzer = ComparativeAnalyzer::new();
1178 let model_names = vec!["Model A".to_string(), "Model B".to_string()];
1179 let cv_scores = vec![
1180 array![0.8, 0.82, 0.78, 0.81, 0.79],
1181 array![0.75, 0.77, 0.73, 0.76, 0.74],
1182 ];
1183
1184 let result = analyzer.compare_models(model_names, cv_scores).unwrap();
1185
1186 assert_eq!(result.model_names.len(), 2);
1187 assert_eq!(result.performance_scores.len(), 2);
1188 assert_eq!(result.ranking[0], 0); assert_eq!(result.best_model_index, 0);
1190 }
1191
1192 #[test]
1193 fn test_permutation_test() {
1194 let analyzer = ComparativeAnalyzer::new().with_random_state(42);
1195 let group_a = array![1.0, 2.0, 3.0, 4.0, 5.0];
1196 let group_b = array![2.0, 3.0, 4.0, 5.0, 6.0];
1197
1198 let result = analyzer
1199 .significance_test(
1200 &group_a,
1201 &group_b,
1202 SignificanceTest::PermutationTest {
1203 n_permutations: 1000,
1204 },
1205 EffectSizeMeasure::CohensD,
1206 )
1207 .unwrap();
1208
1209 assert!(result.p_value >= 0.0 && result.p_value <= 1.0);
1210 }
1211
1212 #[test]
1213 fn test_confidence_intervals() {
1214 let analyzer = ComparativeAnalyzer::new();
1215 let group_a = array![1.0, 2.0, 3.0, 4.0, 5.0];
1216 let group_b = array![2.0, 3.0, 4.0, 5.0, 6.0];
1217
1218 let (lower, upper) = analyzer
1219 .compute_confidence_interval(
1220 &group_a,
1221 &group_b,
1222 ConfidenceIntervalType::TDistribution,
1223 0.95,
1224 )
1225 .unwrap();
1226
1227 assert!(lower < upper);
1228 let mean_diff = 3.0 - 4.0; assert!(lower < mean_diff && mean_diff < upper); }
1232}