Skip to main content

perfgate_significance/
lib.rs

1//! Statistical significance testing for benchmarking.
2//!
3//! This crate provides Welch's t-test implementation for detecting statistically
4//! significant performance changes between benchmark runs.
5//!
6//! Part of the [perfgate](https://github.com/EffortlessMetrics/perfgate) workspace.
7//!
8//! # Statistical Methodology
9//!
10//! ## Welch's t-test
11//!
12//! Welch's t-test is an adaptation of Student's t-test that is more reliable when
13//! the two samples have unequal variances and/or unequal sample sizes. This makes
14//! it ideal for benchmarking where:
15//!
16//! - Baseline and current runs may have different numbers of samples
17//! - Variance can differ significantly between runs due to system noise
18//! - We want to detect real performance changes, not just noise
19//!
20//! ### Formula
21//!
22//! The test statistic is computed as:
23//!
24//! ```text
25//! t = (mean_1 - mean_2) / sqrt(var_1/n_1 + var_2/n_2)
26//! ```
27//!
28//! The degrees of freedom is approximated using the Welch-Satterthwaite equation:
29//!
30//! ```text
31//! df = (var_1/n_1 + var_2/n_2)² / ((var_1²/n_1²(n_1-1)) + (var_2²/n_2²(n_2-1)))
32//! ```
33//!
34//! ### Interpretation
35//!
36//! - The p-value represents the probability of observing a difference as extreme
37//!   as (or more extreme than) the measured difference, assuming no real change.
38//! - A small p-value (≤ alpha, typically 0.05) indicates strong evidence against
39//!   the null hypothesis, suggesting a statistically significant change.
40//!
41//! ## Limitations
42//!
43//! - **Minimum samples**: Requires at least `min_samples` in both groups (typically 8)
44//!   for reliable results with smaller sample sizes, the test returns `None`
45//! - **Zero variance**: When all values in a group are identical, the test handles
46//!   this edge case explicitly (returns p-value 1.0 if means are equal, 0.0 otherwise)
47//! - **Assumptions**: Assumes data is approximately normally distributed; for
48//!   highly skewed distributions, consider non-parametric alternatives
49
50use perfgate_types::{Significance, SignificanceTest};
51use statrs::distribution::{ContinuousCDF, StudentsT};
52
53/// Compute statistical significance using Welch's t-test.
54///
55/// Returns `None` if:
56/// - Either sample has fewer than `min_samples` observations
57/// - Either sample has fewer than 2 observations (variance undefined)
58/// - Computed degrees of freedom is non-finite or non-positive
59///
60/// # Arguments
61///
62/// * `baseline` - Baseline metric values
63/// * `current` - Current metric values
64/// * `alpha` - Significance level (typically 0.05)
65/// * `min_samples` - Minimum samples required in each group
66///
67/// # Returns
68///
69/// A `Significance` struct containing:
70/// - `p_value`: Two-tailed p-value from Welch's t-test
71/// - `alpha`: The provided significance threshold
72/// - `significant`: Whether p_value ≤ alpha
73/// - `baseline_samples` / `current_samples`: Sample counts
74///
75/// # Example
76///
77/// ```
78/// use perfgate_significance::compute_significance;
79///
80/// let baseline = vec![100.0, 102.0, 98.0, 101.0, 99.0, 100.0, 101.0, 99.0];
81/// let current = vec![110.0, 112.0, 108.0, 111.0, 109.0, 110.0, 111.0, 109.0];
82///
83/// let result = compute_significance(&baseline, &current, 0.05, 8);
84/// assert!(result.is_some());
85///
86/// let sig = result.unwrap();
87/// assert!(sig.significant); // Clear performance regression
88/// assert!(sig.p_value.unwrap() < 0.05);
89/// ```
90pub fn compute_significance(
91    baseline: &[f64],
92    current: &[f64],
93    alpha: f64,
94    min_samples: usize,
95) -> Option<Significance> {
96    if baseline.len() < min_samples || current.len() < min_samples {
97        return None;
98    }
99
100    if baseline.len() < 2 || current.len() < 2 {
101        return None;
102    }
103
104    let (base_mean, base_var) = mean_and_variance(baseline)?;
105    let (curr_mean, curr_var) = mean_and_variance(current)?;
106
107    let n1 = baseline.len() as f64;
108    let n2 = current.len() as f64;
109    let se2 = (base_var / n1) + (curr_var / n2);
110
111    let p_value = if se2 <= 0.0 {
112        if (base_mean - curr_mean).abs() < f64::EPSILON {
113            1.0
114        } else {
115            0.0
116        }
117    } else {
118        let t = (base_mean - curr_mean) / se2.sqrt();
119        let numerator = se2 * se2;
120        let denom_left = (base_var * base_var) / (n1 * n1 * (n1 - 1.0));
121        let denom_right = (curr_var * curr_var) / (n2 * n2 * (n2 - 1.0));
122        let df = numerator / (denom_left + denom_right);
123
124        if !df.is_finite() || df <= 0.0 {
125            return None;
126        }
127
128        let dist = StudentsT::new(0.0, 1.0, df).ok()?;
129        let tail = 1.0 - dist.cdf(t.abs());
130        (2.0 * tail).clamp(0.0, 1.0)
131    };
132
133    Some(Significance {
134        test: SignificanceTest::WelchT,
135        p_value: Some(p_value),
136        alpha,
137        significant: p_value <= alpha,
138        baseline_samples: baseline.len() as u32,
139        current_samples: current.len() as u32,
140        ci_lower: None, // Could be calculated here if needed
141        ci_upper: None, // Could be calculated here if needed
142    })
143}
144
145/// Compute sample mean and unbiased variance (Bessel's correction).
146///
147/// Returns `None` if:
148/// - The input slice is empty
149/// - Mean or variance is non-finite (NaN or infinity)
150///
151/// # Arguments
152///
153/// * `values` - Slice of f64 values
154///
155/// # Returns
156///
157/// A tuple of (mean, variance) where:
158/// - Mean is the arithmetic mean
159/// - Variance is the sample variance (n-1 denominator for unbiased estimation)
160/// - Variance is 0.0 for single-element samples
161/// - Variance is clamped to be non-negative (handles floating point errors)
162///
163/// # Example
164///
165/// ```
166/// use perfgate_significance::mean_and_variance;
167///
168/// let values = vec![10.0, 12.0, 14.0, 16.0, 18.0];
169/// let (mean, var) = mean_and_variance(&values).unwrap();
170///
171/// assert!((mean - 14.0).abs() < 1e-10);
172/// assert!(var > 0.0); // Sample variance with Bessel's correction
173/// ```
174pub fn mean_and_variance(values: &[f64]) -> Option<(f64, f64)> {
175    if values.is_empty() {
176        return None;
177    }
178
179    // Welford's online one-pass algorithm for numerical stability
180    let mut n: u64 = 0;
181    let mut mean = 0.0_f64;
182    let mut m2 = 0.0_f64;
183
184    for &x in values {
185        n += 1;
186        let delta = x - mean;
187        mean += delta / n as f64;
188        let delta2 = x - mean;
189        m2 += delta * delta2;
190    }
191
192    let var = if n > 1 { m2 / (n as f64 - 1.0) } else { 0.0 };
193
194    if mean.is_finite() && var.is_finite() {
195        Some((mean, var.max(0.0)))
196    } else {
197        None
198    }
199}
200
201#[cfg(test)]
202mod tests {
203    use super::*;
204    use approx::assert_relative_eq;
205    use proptest::prelude::*;
206
207    #[test]
208    fn significance_detects_clear_regression() {
209        let baseline = vec![100.0; 20];
210        let current = vec![110.0; 20];
211
212        let result = compute_significance(&baseline, &current, 0.05, 8).unwrap();
213
214        assert!(result.significant);
215        assert!(result.p_value.unwrap() < 0.001);
216        assert_eq!(result.test, SignificanceTest::WelchT);
217    }
218
219    #[test]
220    fn significance_returns_none_for_insufficient_samples() {
221        let baseline = vec![100.0, 101.0, 102.0];
222        let current = vec![100.0, 101.0, 102.0, 103.0, 104.0, 105.0, 106.0, 107.0];
223
224        let result = compute_significance(&baseline, &current, 0.05, 8);
225
226        assert!(result.is_none());
227    }
228
229    #[test]
230    fn significance_returns_none_for_single_sample() {
231        let baseline = vec![100.0];
232        let current = vec![100.0];
233
234        let result = compute_significance(&baseline, &current, 0.05, 1);
235
236        assert!(result.is_none());
237    }
238
239    #[test]
240    fn significance_handles_zero_variance_equal_means() {
241        let baseline = vec![100.0; 10];
242        let current = vec![100.0; 10];
243
244        let result = compute_significance(&baseline, &current, 0.05, 8).unwrap();
245
246        assert!(!result.significant);
247        assert_relative_eq!(result.p_value.unwrap(), 1.0);
248    }
249
250    #[test]
251    fn significance_handles_zero_variance_different_means() {
252        let baseline = vec![100.0; 10];
253        let current = vec![110.0; 10];
254
255        let result = compute_significance(&baseline, &current, 0.05, 8).unwrap();
256
257        assert!(result.significant);
258        assert_relative_eq!(result.p_value.unwrap(), 0.0);
259    }
260
261    #[test]
262    fn significance_not_significant_for_noisy_data() {
263        let baseline: Vec<f64> = (0..20).map(|i| 100.0 + (i as f64 % 5.0) - 2.5).collect();
264        let current: Vec<f64> = (0..20).map(|i| 100.5 + (i as f64 % 5.0) - 2.5).collect();
265
266        let result = compute_significance(&baseline, &current, 0.05, 8).unwrap();
267
268        assert!(
269            !result.significant,
270            "Expected not significant due to high variance"
271        );
272    }
273
274    #[test]
275    fn significance_sample_counts_recorded() {
276        let baseline = vec![100.0; 15];
277        let current = vec![100.0; 12];
278
279        let result = compute_significance(&baseline, &current, 0.05, 8).unwrap();
280
281        assert_eq!(result.baseline_samples, 15);
282        assert_eq!(result.current_samples, 12);
283    }
284
285    #[test]
286    fn significance_respects_alpha_threshold() {
287        let baseline = vec![100.0, 101.0, 99.0, 100.0, 101.0, 99.0, 100.0, 101.0];
288        let current = vec![102.0, 103.0, 101.0, 102.0, 103.0, 101.0, 102.0, 103.0];
289
290        let result_strict = compute_significance(&baseline, &current, 0.01, 8).unwrap();
291        let result_lenient = compute_significance(&baseline, &current, 0.10, 8).unwrap();
292
293        assert_eq!(result_strict.p_value, result_lenient.p_value);
294        assert!(
295            result_lenient.significant || !result_strict.significant,
296            "lenient threshold should be more likely to be significant"
297        );
298    }
299
300    #[test]
301    fn mean_and_variance_empty_returns_none() {
302        assert!(mean_and_variance(&[]).is_none());
303    }
304
305    #[test]
306    fn mean_and_variance_single_element() {
307        let (mean, var) = mean_and_variance(&[42.0]).unwrap();
308
309        assert_relative_eq!(mean, 42.0);
310        assert_relative_eq!(var, 0.0);
311    }
312
313    #[test]
314    fn mean_and_variance_two_elements() {
315        let (mean, var) = mean_and_variance(&[10.0, 20.0]).unwrap();
316
317        assert_relative_eq!(mean, 15.0);
318        assert_relative_eq!(var, 50.0);
319    }
320
321    #[test]
322    fn mean_and_variance_uniform_values() {
323        let (mean, var) = mean_and_variance(&[100.0; 10]).unwrap();
324
325        assert_relative_eq!(mean, 100.0);
326        assert_relative_eq!(var, 0.0);
327    }
328
329    #[test]
330    fn mean_and_variance_known_values() {
331        let values = vec![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
332        let (mean, var) = mean_and_variance(&values).unwrap();
333
334        assert_relative_eq!(mean, 5.0);
335        assert_relative_eq!(var, 32.0 / 7.0);
336    }
337
338    #[test]
339    fn significance_large_samples() {
340        let baseline: Vec<f64> = (0..1000).map(|i| 100.0 + (i as f64 % 10.0)).collect();
341        let current: Vec<f64> = (0..1000).map(|i| 100.0 + (i as f64 % 10.0)).collect();
342
343        let result = compute_significance(&baseline, &current, 0.05, 8).unwrap();
344
345        assert_relative_eq!(result.p_value.unwrap(), 1.0, epsilon = 1e-10);
346        assert!(!result.significant);
347    }
348
349    #[test]
350    fn significance_with_small_real_difference() {
351        let baseline: Vec<f64> = (0..50).map(|_| 100.0 + rand_normal(0.0, 1.0)).collect();
352        let current: Vec<f64> = (0..50).map(|_| 100.0 + rand_normal(0.0, 1.0)).collect();
353
354        let result = compute_significance(&baseline, &current, 0.05, 8).unwrap();
355
356        assert!(result.p_value.unwrap() >= 0.0 && result.p_value.unwrap() <= 1.0);
357    }
358
359    fn rand_normal(_mean: f64, _std: f64) -> f64 {
360        use std::time::{SystemTime, UNIX_EPOCH};
361        let ns = SystemTime::now()
362            .duration_since(UNIX_EPOCH)
363            .unwrap()
364            .subsec_nanos();
365        ((ns as f64 % 2000.0) - 1000.0) / 100.0
366    }
367
368    mod property_tests {
369        use super::*;
370
371        proptest! {
372            #[test]
373            fn prop_p_value_bounds(
374                baseline in prop::collection::vec(0.0f64..1000.0, 8..100),
375                current in prop::collection::vec(0.0f64..1000.0, 8..100),
376                alpha in 0.01f64..0.5,
377            ) {
378                let result = compute_significance(&baseline, &current, alpha, 8);
379
380                if let Some(sig) = result {
381                    prop_assert!(sig.p_value.unwrap() >= 0.0, "p-value must be >= 0");
382                    prop_assert!(sig.p_value.unwrap() <= 1.0, "p-value must be <= 1");
383                    prop_assert_eq!(sig.baseline_samples, baseline.len() as u32);
384                    prop_assert_eq!(sig.current_samples, current.len() as u32);
385                    prop_assert_eq!(sig.significant, sig.p_value.unwrap() <= sig.alpha);
386                }
387            }
388
389            #[test]
390            fn prop_mean_and_variance_finite(values in prop::collection::vec(any::<f64>(), 1..100)) {
391                let result = mean_and_variance(&values);
392
393                if values.iter().all(|v| v.is_finite())
394                    && let Some((mean, var)) = result
395                {
396                    prop_assert!(mean.is_finite(), "mean must be finite");
397                    prop_assert!(var.is_finite(), "variance must be finite");
398                    prop_assert!(var >= 0.0, "variance must be non-negative");
399                }
400            }
401
402            #[test]
403            fn prop_identical_samples_p_value_one(
404                values in prop::collection::vec(0.0f64..1000.0, 8..50)
405            ) {
406                let result = compute_significance(&values, &values, 0.05, 8);
407
408                if let Some(sig) = result {
409                    prop_assert!(
410                        (sig.p_value.unwrap() - 1.0).abs() < 1e-10,
411                        "identical samples should have p-value ≈ 1, got {}",
412                        sig.p_value.unwrap()
413                    );
414                    prop_assert!(!sig.significant, "identical samples should not be significant");
415                }
416            }
417
418            #[test]
419            fn prop_shifted_samples_significant(
420                values in prop::collection::vec(10.0f64..100.0, 20..50)
421                    .prop_filter("values must have variance", |v| {
422                        let mean: f64 = v.iter().sum::<f64>() / v.len() as f64;
423                        let var: f64 = v.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / v.len() as f64;
424                        var > 0.01
425                    }),
426                shift in 50.0f64..100.0,
427            ) {
428                let current: Vec<f64> = values.iter().map(|v| v + shift).collect();
429
430                let result = compute_significance(&values, &current, 0.05, 8);
431
432                if let Some(sig) = result {
433                    prop_assert!(sig.significant, "large shift should be significant");
434                    prop_assert!(sig.p_value.unwrap() < 0.001, "large shift should have small p-value");
435                }
436            }
437
438            #[test]
439            fn prop_significance_deterministic(
440                baseline in prop::collection::vec(0.0f64..1000.0, 8..30),
441                current in prop::collection::vec(0.0f64..1000.0, 8..30),
442            ) {
443                let result1 = compute_significance(&baseline, &current, 0.05, 8);
444                let result2 = compute_significance(&baseline, &current, 0.05, 8);
445
446                prop_assert_eq!(result1, result2, "significance test should be deterministic");
447            }
448
449            #[test]
450            fn prop_u64_p_value_in_range(
451                baseline in prop::collection::vec(1u64..10000u64, 5..50),
452                current in prop::collection::vec(1u64..10000u64, 5..50),
453            ) {
454                let baseline_f64: Vec<f64> = baseline.iter().map(|&v| v as f64).collect();
455                let current_f64: Vec<f64> = current.iter().map(|&v| v as f64).collect();
456                if let Some(sig) = compute_significance(&baseline_f64, &current_f64, 0.05, 5) {
457                    prop_assert!(sig.p_value.unwrap() >= 0.0, "p-value must be >= 0");
458                    prop_assert!(sig.p_value.unwrap() <= 1.0, "p-value must be <= 1");
459                }
460            }
461
462            #[test]
463            fn prop_u64_identical_distributions_not_significant(
464                values in prop::collection::vec(1u64..10000u64, 5..50),
465            ) {
466                let values_f64: Vec<f64> = values.iter().map(|&v| v as f64).collect();
467                if let Some(sig) = compute_significance(&values_f64, &values_f64, 0.05, 5) {
468                    prop_assert!(!sig.significant, "identical distributions should not be significant");
469                }
470            }
471
472            #[test]
473            fn prop_u64_significance_deterministic(
474                baseline in prop::collection::vec(1u64..10000u64, 5..50),
475                current in prop::collection::vec(1u64..10000u64, 5..50),
476            ) {
477                let baseline_f64: Vec<f64> = baseline.iter().map(|&v| v as f64).collect();
478                let current_f64: Vec<f64> = current.iter().map(|&v| v as f64).collect();
479                let r1 = compute_significance(&baseline_f64, &current_f64, 0.05, 5);
480                let r2 = compute_significance(&baseline_f64, &current_f64, 0.05, 5);
481                prop_assert_eq!(r1, r2, "significance test must be deterministic");
482            }
483
484            #[test]
485            fn prop_u64_very_different_distributions_significant(
486                values in prop::collection::vec(1u64..10000u64, 5..50),
487            ) {
488                let baseline_f64: Vec<f64> = values.iter().map(|&v| v as f64).collect();
489                // Offset must dwarf std_dev of uniform(1,10000) ≈ 2887 to guarantee significance.
490                let current_f64: Vec<f64> = values.iter().map(|&v| v as f64 + 1_000_000.0).collect();
491                if let Some(sig) = compute_significance(&baseline_f64, &current_f64, 0.05, 5) {
492                    prop_assert!(sig.significant, "very different distributions must be significant");
493                }
494            }
495
496            #[test]
497            fn prop_variance_bessel_correction(values in prop::collection::vec(0.0f64..100.0, 3..50)) {
498                let result = mean_and_variance(&values);
499
500                if let Some((mean, var)) = result {
501                    let n = values.len() as f64;
502                    let expected_mean: f64 = values.iter().sum::<f64>() / n;
503                    let pop_var: f64 = values.iter()
504                        .map(|v| (v - expected_mean).powi(2))
505                        .sum::<f64>() / n;
506
507                    if values.len() > 1 {
508                        let sample_var = pop_var * n / (n - 1.0);
509                        prop_assert!(
510                            (var - sample_var).abs() < 1e-10 || (var < 1e-10 && sample_var < 1e-10),
511                            "sample variance should use Bessel's correction"
512                        );
513                    }
514
515                    prop_assert!((mean - expected_mean).abs() < 1e-10);
516                }
517            }
518        }
519    }
520
521    mod edge_cases {
522        use super::*;
523
524        #[test]
525        fn handles_very_large_values() {
526            let baseline = vec![
527                1e15,
528                1e15 + 1.0,
529                1e15 - 1.0,
530                1e15,
531                1e15 + 0.5,
532                1e15 - 0.5,
533                1e15,
534                1e15,
535            ];
536            let current = vec![1e15 + 1000.0; 8];
537
538            let result = compute_significance(&baseline, &current, 0.05, 8);
539
540            assert!(result.is_some() || result.is_none());
541        }
542
543        #[test]
544        fn handles_very_small_values() {
545            let baseline = vec![1e-15, 2e-15, 1.5e-15, 1e-15, 2e-15, 1.5e-15, 1e-15, 2e-15];
546            let current = vec![1e-10; 8];
547
548            let result = compute_significance(&baseline, &current, 0.05, 8);
549
550            assert!(result.is_some());
551            if let Some(sig) = result {
552                assert!(sig.significant);
553            }
554        }
555
556        #[test]
557        fn handles_negative_values() {
558            let baseline = vec![-100.0, -102.0, -98.0, -101.0, -99.0, -100.0, -101.0, -99.0];
559            let current = vec![
560                -110.0, -112.0, -108.0, -111.0, -109.0, -110.0, -111.0, -109.0,
561            ];
562
563            let result = compute_significance(&baseline, &current, 0.05, 8);
564
565            assert!(result.is_some());
566            let sig = result.unwrap();
567            assert!(sig.significant);
568        }
569
570        #[test]
571        fn handles_mixed_sign_values() {
572            let baseline = vec![-50.0, -25.0, 0.0, 25.0, 50.0, 75.0, 100.0, 125.0];
573            let current = vec![-100.0, -75.0, -50.0, -25.0, 0.0, 25.0, 50.0, 75.0];
574
575            let result = compute_significance(&baseline, &current, 0.05, 8);
576
577            assert!(result.is_some());
578        }
579
580        #[test]
581        fn exactly_min_samples() {
582            let baseline = vec![100.0; 8];
583            let current = vec![110.0; 8];
584
585            let result = compute_significance(&baseline, &current, 0.05, 8);
586
587            assert!(result.is_some());
588        }
589
590        #[test]
591        fn one_below_min_samples() {
592            let baseline = vec![100.0; 7];
593            let current = vec![110.0; 8];
594
595            let result = compute_significance(&baseline, &current, 0.05, 8);
596
597            assert!(result.is_none());
598        }
599
600        #[test]
601        fn unequal_sample_sizes() {
602            let baseline = vec![100.0; 20];
603            let current = vec![110.0; 8];
604
605            let result = compute_significance(&baseline, &current, 0.05, 8);
606
607            assert!(result.is_some());
608            let sig = result.unwrap();
609            assert_eq!(sig.baseline_samples, 20);
610            assert_eq!(sig.current_samples, 8);
611        }
612
613        #[test]
614        fn alpha_boundary_p_value_equal() {
615            let baseline = vec![100.0; 10];
616            let current = vec![100.0; 10];
617
618            let result = compute_significance(&baseline, &current, 0.05, 8).unwrap();
619
620            assert_eq!(result.p_value.unwrap(), 1.0);
621            assert!(!result.significant);
622        }
623
624        #[test]
625        fn identical_samples_with_variance_p_value_one() {
626            let samples = vec![10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0];
627            let result = compute_significance(&samples, &samples, 0.05, 8).unwrap();
628
629            assert_relative_eq!(result.p_value.unwrap(), 1.0, epsilon = 1e-10);
630            assert!(!result.significant);
631        }
632
633        #[test]
634        fn single_sample_returns_none_even_with_min_one() {
635            let result = compute_significance(&[42.0], &[99.0], 0.05, 1);
636
637            assert!(result.is_none(), "n<2 means variance is undefined");
638        }
639
640        #[test]
641        fn zero_variance_both_groups_same_value() {
642            let baseline = vec![7.0; 10];
643            let current = vec![7.0; 10];
644
645            let sig = compute_significance(&baseline, &current, 0.05, 2).unwrap();
646
647            assert_relative_eq!(sig.p_value.unwrap(), 1.0);
648            assert!(!sig.significant);
649        }
650
651        #[test]
652        fn zero_variance_different_constant_values() {
653            let baseline = vec![5.0; 10];
654            let current = vec![50.0; 10];
655
656            let sig = compute_significance(&baseline, &current, 0.05, 2).unwrap();
657
658            assert_relative_eq!(sig.p_value.unwrap(), 0.0);
659            assert!(sig.significant);
660        }
661
662        #[test]
663        fn large_sample_size_identical() {
664            let samples: Vec<f64> = (0..2000).map(|i| (i as f64).sin() * 100.0).collect();
665            let result = compute_significance(&samples, &samples, 0.05, 8).unwrap();
666
667            assert_relative_eq!(result.p_value.unwrap(), 1.0, epsilon = 1e-10);
668            assert!(!result.significant);
669            assert_eq!(result.baseline_samples, 2000);
670            assert_eq!(result.current_samples, 2000);
671        }
672
673        #[test]
674        fn large_sample_size_with_small_shift() {
675            let baseline: Vec<f64> = (0..1500).map(|i| 100.0 + (i as f64 % 7.0)).collect();
676            let current: Vec<f64> = baseline.iter().map(|v| v + 0.5).collect();
677
678            let result = compute_significance(&baseline, &current, 0.05, 8).unwrap();
679
680            assert!(result.significant, "large n should detect even tiny shifts");
681        }
682
683        #[test]
684        fn extreme_difference_large_vs_small() {
685            let baseline = vec![1e-10; 10];
686            let current = vec![1e10; 10];
687
688            let sig = compute_significance(&baseline, &current, 0.05, 2).unwrap();
689
690            assert!(sig.significant);
691            assert_relative_eq!(sig.p_value.unwrap(), 0.0);
692        }
693
694        #[test]
695        fn extreme_difference_large_vs_tiny_with_variance() {
696            let baseline = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
697            let current = vec![1e8, 1e8 + 1.0, 1e8 + 2.0, 1e8 + 3.0, 1e8, 1e8, 1e8, 1e8];
698
699            let sig = compute_significance(&baseline, &current, 0.05, 8).unwrap();
700
701            assert!(sig.significant);
702            assert!(sig.p_value.unwrap() < 0.001);
703        }
704
705        #[test]
706        fn all_zeros_both_groups() {
707            let baseline = vec![0.0; 10];
708            let current = vec![0.0; 10];
709
710            let sig = compute_significance(&baseline, &current, 0.05, 2).unwrap();
711
712            assert_relative_eq!(sig.p_value.unwrap(), 1.0);
713            assert!(!sig.significant);
714        }
715
716        #[test]
717        fn all_zeros_vs_nonzero() {
718            let baseline = vec![0.0; 10];
719            let current = vec![5.0; 10];
720
721            let sig = compute_significance(&baseline, &current, 0.05, 2).unwrap();
722
723            assert_relative_eq!(sig.p_value.unwrap(), 0.0);
724            assert!(sig.significant);
725        }
726    }
727}