entrenar_bench/
stats.rs

1//! Statistical analysis utilities.
2
3/// Statistical analyzer for benchmark results.
4pub struct StatisticalAnalyzer;
5
6impl StatisticalAnalyzer {
7    /// Perform Welch's t-test for two independent samples.
8    ///
9    /// Returns the p-value for the null hypothesis that the means are equal.
10    pub fn welch_t_test(sample1: &[f64], sample2: &[f64]) -> TestResult {
11        if sample1.len() < 2 || sample2.len() < 2 {
12            return TestResult {
13                statistic: 0.0,
14                p_value: 1.0,
15                significant: false,
16                effect_size: 0.0,
17            };
18        }
19
20        let n1 = sample1.len() as f64;
21        let n2 = sample2.len() as f64;
22
23        let mean1 = sample1.iter().sum::<f64>() / n1;
24        let mean2 = sample2.iter().sum::<f64>() / n2;
25
26        let var1 = sample1.iter().map(|x| (x - mean1).powi(2)).sum::<f64>() / (n1 - 1.0);
27        let var2 = sample2.iter().map(|x| (x - mean2).powi(2)).sum::<f64>() / (n2 - 1.0);
28
29        let se = ((var1 / n1) + (var2 / n2)).sqrt();
30        if se == 0.0 {
31            return TestResult {
32                statistic: 0.0,
33                p_value: 1.0,
34                significant: false,
35                effect_size: 0.0,
36            };
37        }
38
39        let t = (mean1 - mean2) / se;
40
41        // Welch-Satterthwaite degrees of freedom
42        let df_num = ((var1 / n1) + (var2 / n2)).powi(2);
43        let df_denom = ((var1 / n1).powi(2) / (n1 - 1.0)) + ((var2 / n2).powi(2) / (n2 - 1.0));
44        let df = df_num / df_denom;
45
46        // Approximate p-value using normal distribution for large df
47        let p_value = Self::t_to_p(t.abs(), df);
48
49        // Cohen's d effect size
50        let pooled_std = f64::midpoint(var1, var2).sqrt();
51        let effect_size = if pooled_std > 0.0 {
52            (mean1 - mean2).abs() / pooled_std
53        } else {
54            0.0
55        };
56
57        TestResult {
58            statistic: t,
59            p_value,
60            significant: p_value < 0.05,
61            effect_size,
62        }
63    }
64
65    /// Perform Mann-Whitney U test (non-parametric).
66    pub fn mann_whitney_u(sample1: &[f64], sample2: &[f64]) -> TestResult {
67        if sample1.is_empty() || sample2.is_empty() {
68            return TestResult {
69                statistic: 0.0,
70                p_value: 1.0,
71                significant: false,
72                effect_size: 0.0,
73            };
74        }
75
76        let n1 = sample1.len();
77        let n2 = sample2.len();
78
79        // Count ranks
80        let mut u = 0.0;
81        for &x in sample1 {
82            for &y in sample2 {
83                if x > y {
84                    u += 1.0;
85                } else if (x - y).abs() < 1e-10 {
86                    u += 0.5;
87                }
88            }
89        }
90
91        // Normal approximation for large samples
92        let mu = (n1 * n2) as f64 / 2.0;
93        let sigma = ((n1 * n2 * (n1 + n2 + 1)) as f64 / 12.0).sqrt();
94
95        let z = if sigma > 0.0 { (u - mu) / sigma } else { 0.0 };
96        let p_value = 2.0 * Self::normal_cdf(-z.abs());
97
98        // Effect size: r = z / sqrt(n)
99        let effect_size = z.abs() / ((n1 + n2) as f64).sqrt();
100
101        TestResult {
102            statistic: u,
103            p_value,
104            significant: p_value < 0.05,
105            effect_size,
106        }
107    }
108
109    /// Perform one-way ANOVA.
110    pub fn anova(groups: &[Vec<f64>]) -> TestResult {
111        if groups.len() < 2 {
112            return TestResult {
113                statistic: 0.0,
114                p_value: 1.0,
115                significant: false,
116                effect_size: 0.0,
117            };
118        }
119
120        let k = groups.len() as f64;
121        let n_total: usize = groups.iter().map(Vec::len).sum();
122
123        // Grand mean
124        let grand_mean: f64 = groups.iter().flat_map(|g| g.iter()).sum::<f64>() / n_total as f64;
125
126        // Between-group sum of squares
127        let ss_between: f64 = groups
128            .iter()
129            .map(|g| {
130                let group_mean = g.iter().sum::<f64>() / g.len() as f64;
131                g.len() as f64 * (group_mean - grand_mean).powi(2)
132            })
133            .sum();
134
135        // Within-group sum of squares
136        let ss_within: f64 = groups
137            .iter()
138            .map(|g| {
139                let group_mean = g.iter().sum::<f64>() / g.len() as f64;
140                g.iter().map(|x| (x - group_mean).powi(2)).sum::<f64>()
141            })
142            .sum();
143
144        let df_between = k - 1.0;
145        let df_within = n_total as f64 - k;
146
147        let ms_between = ss_between / df_between;
148        let ms_within = ss_within / df_within;
149
150        let f = if ms_within > 0.0 {
151            ms_between / ms_within
152        } else {
153            0.0
154        };
155
156        // Approximate p-value
157        let p_value = Self::f_to_p(f, df_between, df_within);
158
159        // Eta-squared effect size
160        let effect_size = ss_between / (ss_between + ss_within);
161
162        TestResult {
163            statistic: f,
164            p_value,
165            significant: p_value < 0.05,
166            effect_size,
167        }
168    }
169
170    // Approximate t-distribution CDF using normal approximation
171    fn t_to_p(t: f64, df: f64) -> f64 {
172        // For large df, t-distribution approaches normal
173        if df > 30.0 {
174            2.0 * Self::normal_cdf(-t)
175        } else {
176            // Simple approximation for smaller df
177            let adjusted_t = t * (1.0 + 1.0 / (4.0 * df));
178            2.0 * Self::normal_cdf(-adjusted_t)
179        }
180    }
181
182    // Approximate F-distribution p-value
183    fn f_to_p(f: f64, df1: f64, _df2: f64) -> f64 {
184        // Very rough approximation using chi-square
185        let chi_approx = f * df1;
186        Self::chi_square_p(chi_approx, df1)
187    }
188
189    // Approximate chi-square p-value
190    fn chi_square_p(x: f64, df: f64) -> f64 {
191        // Use normal approximation for large df
192        if df > 30.0 {
193            let z = (2.0 * x).sqrt() - (2.0 * df - 1.0).sqrt();
194            Self::normal_cdf(-z)
195        } else {
196            // Rough approximation
197            let mean = df;
198            let std = (2.0 * df).sqrt();
199            Self::normal_cdf(-(x - mean) / std)
200        }
201    }
202
203    // Standard normal CDF approximation
204    fn normal_cdf(x: f64) -> f64 {
205        // Approximation using error function
206        0.5 * (1.0 + Self::erf(x / std::f64::consts::SQRT_2))
207    }
208
209    // Error function approximation
210    fn erf(x: f64) -> f64 {
211        // Approximation from Abramowitz and Stegun
212        let a1 = 0.254829592;
213        let a2 = -0.284496736;
214        let a3 = 1.421413741;
215        let a4 = -1.453152027;
216        let a5 = 1.061405429;
217        let p = 0.3275911;
218
219        let sign = if x < 0.0 { -1.0 } else { 1.0 };
220        let x = x.abs();
221
222        let t = 1.0 / (1.0 + p * x);
223        let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
224
225        sign * y
226    }
227}
228
229/// Result of a statistical test.
230#[derive(Debug, Clone)]
231pub struct TestResult {
232    /// Test statistic (t, U, F, etc.)
233    pub statistic: f64,
234    /// P-value
235    pub p_value: f64,
236    /// Whether result is significant at α=0.05
237    pub significant: bool,
238    /// Effect size (Cohen's d, eta-squared, etc.)
239    pub effect_size: f64,
240}
241
242impl TestResult {
243    /// Interpret effect size (Cohen's d).
244    pub fn effect_interpretation(&self) -> &'static str {
245        if self.effect_size < 0.2 {
246            "negligible"
247        } else if self.effect_size < 0.5 {
248            "small"
249        } else if self.effect_size < 0.8 {
250            "medium"
251        } else {
252            "large"
253        }
254    }
255}
256
257impl std::fmt::Display for TestResult {
258    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
259        write!(
260            f,
261            "statistic={:.3}, p={:.4}, significant={}, effect_size={:.3} ({})",
262            self.statistic,
263            self.p_value,
264            self.significant,
265            self.effect_size,
266            self.effect_interpretation()
267        )
268    }
269}
270
271/// Calculate confidence interval for a mean.
272pub fn confidence_interval(sample: &[f64], confidence: f64) -> (f64, f64) {
273    if sample.len() < 2 {
274        let mean = sample.first().copied().unwrap_or(0.0);
275        return (mean, mean);
276    }
277
278    let n = sample.len() as f64;
279    let mean = sample.iter().sum::<f64>() / n;
280    let std = (sample.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (n - 1.0)).sqrt();
281    let se = std / n.sqrt();
282
283    // Z-score for confidence level (approximate)
284    let z = match confidence {
285        c if c >= 0.99 => 2.576,
286        c if c >= 0.95 => 1.96,
287        c if c >= 0.90 => 1.645,
288        _ => 1.96,
289    };
290
291    (mean - z * se, mean + z * se)
292}
293
294#[cfg(test)]
295mod tests {
296    use super::*;
297
298    #[test]
299    fn test_welch_t_test_significant() {
300        let sample1 = vec![1.0, 2.0, 3.0, 4.0, 5.0];
301        let sample2 = vec![6.0, 7.0, 8.0, 9.0, 10.0];
302
303        let result = StatisticalAnalyzer::welch_t_test(&sample1, &sample2);
304        assert!(result.significant);
305        assert!(result.p_value < 0.05);
306    }
307
308    #[test]
309    fn test_welch_t_test_not_significant() {
310        let sample1 = vec![1.0, 2.0, 3.0, 4.0, 5.0];
311        let sample2 = vec![1.5, 2.5, 3.5, 4.5, 5.5];
312
313        let result = StatisticalAnalyzer::welch_t_test(&sample1, &sample2);
314        // Means are close, should not be significant
315        assert!(result.p_value > 0.01);
316    }
317
318    #[test]
319    fn test_mann_whitney() {
320        let sample1 = vec![1.0, 2.0, 3.0];
321        let sample2 = vec![4.0, 5.0, 6.0];
322
323        let result = StatisticalAnalyzer::mann_whitney_u(&sample1, &sample2);
324        assert!(result.statistic >= 0.0);
325    }
326
327    #[test]
328    fn test_anova() {
329        let groups = vec![
330            vec![1.0, 2.0, 3.0],
331            vec![4.0, 5.0, 6.0],
332            vec![7.0, 8.0, 9.0],
333        ];
334
335        let result = StatisticalAnalyzer::anova(&groups);
336        assert!(result.significant);
337    }
338
339    #[test]
340    fn test_confidence_interval() {
341        let sample = vec![1.0, 2.0, 3.0, 4.0, 5.0];
342        let (lower, upper) = confidence_interval(&sample, 0.95);
343
344        assert!(lower < 3.0); // Mean is 3.0
345        assert!(upper > 3.0);
346    }
347
348    #[test]
349    fn test_effect_interpretation() {
350        let small = TestResult {
351            statistic: 0.0,
352            p_value: 0.0,
353            significant: false,
354            effect_size: 0.3,
355        };
356        assert_eq!(small.effect_interpretation(), "small");
357
358        let large = TestResult {
359            statistic: 0.0,
360            p_value: 0.0,
361            significant: false,
362            effect_size: 1.0,
363        };
364        assert_eq!(large.effect_interpretation(), "large");
365    }
366
367    #[test]
368    fn test_effect_interpretation_all_levels() {
369        // negligible
370        let negligible = TestResult {
371            statistic: 0.0,
372            p_value: 0.0,
373            significant: false,
374            effect_size: 0.1,
375        };
376        assert_eq!(negligible.effect_interpretation(), "negligible");
377
378        // medium
379        let medium = TestResult {
380            statistic: 0.0,
381            p_value: 0.0,
382            significant: false,
383            effect_size: 0.6,
384        };
385        assert_eq!(medium.effect_interpretation(), "medium");
386    }
387
388    #[test]
389    fn test_welch_t_test_small_samples() {
390        let sample1 = vec![1.0];
391        let sample2 = vec![2.0];
392
393        let result = StatisticalAnalyzer::welch_t_test(&sample1, &sample2);
394        assert_eq!(result.p_value, 1.0);
395        assert!(!result.significant);
396    }
397
398    #[test]
399    fn test_mann_whitney_empty_samples() {
400        let result = StatisticalAnalyzer::mann_whitney_u(&[], &[1.0, 2.0]);
401        assert_eq!(result.p_value, 1.0);
402        assert!(!result.significant);
403    }
404
405    #[test]
406    fn test_mann_whitney_ties() {
407        // Sample with ties
408        let sample1 = vec![1.0, 2.0, 3.0];
409        let sample2 = vec![2.0, 3.0, 4.0]; // 2.0 and 3.0 tie with sample1
410
411        let result = StatisticalAnalyzer::mann_whitney_u(&sample1, &sample2);
412        assert!(result.statistic >= 0.0);
413    }
414
415    #[test]
416    fn test_anova_single_group() {
417        let groups = vec![vec![1.0, 2.0, 3.0]];
418
419        let result = StatisticalAnalyzer::anova(&groups);
420        assert_eq!(result.p_value, 1.0);
421    }
422
423    #[test]
424    fn test_anova_identical_groups() {
425        let groups = vec![
426            vec![5.0, 5.0, 5.0],
427            vec![5.0, 5.0, 5.0],
428            vec![5.0, 5.0, 5.0],
429        ];
430
431        let result = StatisticalAnalyzer::anova(&groups);
432        // No variance within or between, F should be 0
433        assert!(!result.significant || result.statistic.abs() < 0.001);
434    }
435
436    #[test]
437    fn test_confidence_interval_single_sample() {
438        let sample = vec![5.0];
439        let (lower, upper) = confidence_interval(&sample, 0.95);
440        assert_eq!(lower, 5.0);
441        assert_eq!(upper, 5.0);
442    }
443
444    #[test]
445    fn test_confidence_interval_99() {
446        let sample = vec![1.0, 2.0, 3.0, 4.0, 5.0];
447        let (lower_95, upper_95) = confidence_interval(&sample, 0.95);
448        let (lower_99, upper_99) = confidence_interval(&sample, 0.99);
449
450        // 99% CI should be wider than 95% CI
451        assert!(lower_99 < lower_95);
452        assert!(upper_99 > upper_95);
453    }
454
455    #[test]
456    fn test_confidence_interval_90() {
457        let sample = vec![1.0, 2.0, 3.0, 4.0, 5.0];
458        let (lower_95, upper_95) = confidence_interval(&sample, 0.95);
459        let (lower_90, upper_90) = confidence_interval(&sample, 0.90);
460
461        // 90% CI should be narrower than 95% CI
462        assert!(lower_90 > lower_95);
463        assert!(upper_90 < upper_95);
464    }
465
466    #[test]
467    fn test_test_result_to_string() {
468        let result = TestResult {
469            statistic: 2.5,
470            p_value: 0.025,
471            significant: true,
472            effect_size: 0.8,
473        };
474
475        let s = result.to_string();
476        assert!(s.contains("statistic=2.5"));
477        assert!(s.contains("p=0.0250"));
478        assert!(s.contains("significant=true"));
479        assert!(s.contains("large"));
480    }
481
482    #[test]
483    fn test_welch_t_test_zero_variance() {
484        // Samples with zero variance
485        let sample1 = vec![5.0, 5.0, 5.0, 5.0, 5.0];
486        let sample2 = vec![10.0, 10.0, 10.0, 10.0, 10.0];
487
488        let result = StatisticalAnalyzer::welch_t_test(&sample1, &sample2);
489        // Should return p=1 due to zero SE
490        assert_eq!(result.p_value, 1.0);
491    }
492
493    #[test]
494    fn test_normal_cdf_properties() {
495        // Normal CDF at 0 should be 0.5
496        let cdf_0 = StatisticalAnalyzer::normal_cdf(0.0);
497        assert!((cdf_0 - 0.5).abs() < 0.01);
498
499        // CDF at large positive should approach 1
500        let cdf_large = StatisticalAnalyzer::normal_cdf(3.0);
501        assert!(cdf_large > 0.99);
502
503        // CDF at large negative should approach 0
504        let cdf_neg = StatisticalAnalyzer::normal_cdf(-3.0);
505        assert!(cdf_neg < 0.01);
506    }
507}