Skip to main content

perfgate_stats/
lib.rs

1//! Statistical functions for benchmarking analysis.
2//!
3//! This crate provides pure statistical functions with no I/O dependencies.
4//! It is designed to be used by `perfgate-domain` and can be independently
5//! tested and versioned.
6//!
7//! Part of the [perfgate](https://github.com/EffortlessMetrics/perfgate) workspace.
8//!
9//! # Overview
10//!
11//! The crate provides:
12//! - Summary statistics (median, min, max) for `u64` and `f64` slices
13//! - Percentile calculation
14//! - Mean and variance computation
15//! - Trend analysis with linear regression and drift classification
16
17pub mod trend;
18
19pub use perfgate_error::StatsError;
20
21use perfgate_types::{F64Summary, U64Summary};
22use std::cmp::Ordering;
23
24/// Compute min, max, and median for a `u64` slice.
25///
26/// # Errors
27///
28/// Returns [`StatsError::NoSamples`] if the slice is empty.
29///
30/// # Examples
31///
32/// ```
33/// use perfgate_stats::summarize_u64;
34///
35/// let s = summarize_u64(&[10, 30, 20]).unwrap();
36/// assert_eq!(s.median, 20);
37/// assert_eq!(s.min, 10);
38/// assert_eq!(s.max, 30);
39/// ```
40pub fn summarize_u64(values: &[u64]) -> Result<U64Summary, StatsError> {
41    if values.is_empty() {
42        return Err(StatsError::NoSamples);
43    }
44    let mut v = values.to_vec();
45    v.sort_unstable();
46    let min = *v.first().unwrap();
47    let max = *v.last().unwrap();
48    let median = median_u64_sorted(&v);
49
50    let f64_vals: Vec<f64> = values.iter().map(|&x| x as f64).collect();
51    let (mean, stddev) = if let Some((m, var)) = mean_and_variance(&f64_vals) {
52        (Some(m), Some(var.sqrt()))
53    } else {
54        (None, None)
55    };
56
57    Ok(U64Summary {
58        median,
59        min,
60        max,
61        mean,
62        stddev,
63    })
64}
65
66/// Compute min, max, and median for an `f64` slice.
67///
68/// # Errors
69///
70/// Returns [`StatsError::NoSamples`] if the slice is empty.
71///
72/// # Examples
73///
74/// ```
75/// use perfgate_stats::summarize_f64;
76///
77/// let s = summarize_f64(&[1.0, 3.0, 2.0]).unwrap();
78/// assert_eq!(s.median, 2.0);
79/// assert_eq!(s.min, 1.0);
80/// assert_eq!(s.max, 3.0);
81/// ```
82pub fn summarize_f64(values: &[f64]) -> Result<F64Summary, StatsError> {
83    if values.is_empty() {
84        return Err(StatsError::NoSamples);
85    }
86    let mut v = values.to_vec();
87    v.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
88    let min = *v.first().unwrap();
89    let max = *v.last().unwrap();
90    let median = median_f64_sorted(&v);
91
92    let (mean, stddev) = if let Some((m, var)) = mean_and_variance(values) {
93        (Some(m), Some(var.sqrt()))
94    } else {
95        (None, None)
96    };
97
98    Ok(F64Summary {
99        median,
100        min,
101        max,
102        mean,
103        stddev,
104    })
105}
106
107pub fn median_u64_sorted(sorted: &[u64]) -> u64 {
108    debug_assert!(!sorted.is_empty());
109    let n = sorted.len();
110    let mid = n / 2;
111    if n % 2 == 1 {
112        sorted[mid]
113    } else {
114        (sorted[mid - 1] / 2) + (sorted[mid] / 2) + ((sorted[mid - 1] % 2 + sorted[mid] % 2) / 2)
115    }
116}
117
118pub fn median_f64_sorted(sorted: &[f64]) -> f64 {
119    debug_assert!(!sorted.is_empty());
120    let n = sorted.len();
121    let mid = n / 2;
122    if n % 2 == 1 {
123        sorted[mid]
124    } else {
125        (sorted[mid - 1] + sorted[mid]) / 2.0
126    }
127}
128
129/// Compute the `q`-th percentile (0.0–1.0) using linear interpolation.
130///
131/// Returns `None` if `values` is empty.
132///
133/// # Examples
134///
135/// ```
136/// use perfgate_stats::percentile;
137///
138/// let p50 = percentile(vec![1.0, 2.0, 3.0, 4.0, 5.0], 0.5).unwrap();
139/// assert_eq!(p50, 3.0);
140///
141/// let p0 = percentile(vec![10.0, 20.0, 30.0], 0.0).unwrap();
142/// assert_eq!(p0, 10.0);
143/// ```
144pub fn percentile(mut values: Vec<f64>, q: f64) -> Option<f64> {
145    if values.is_empty() {
146        return None;
147    }
148
149    values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
150
151    if values.len() == 1 {
152        return Some(values[0]);
153    }
154
155    let rank = q.clamp(0.0, 1.0) * (values.len() as f64 - 1.0);
156    let lower = rank.floor() as usize;
157    let upper = rank.ceil() as usize;
158
159    if lower == upper {
160        return Some(values[lower]);
161    }
162
163    let weight = rank - lower as f64;
164    Some(values[lower] + (values[upper] - values[lower]) * weight)
165}
166
167/// Compute sample mean and unbiased variance (Welford's algorithm).
168///
169/// Returns `None` if `values` is empty or the result is non-finite.
170/// Variance uses Bessel's correction (nāˆ’1 denominator).
171///
172/// # Examples
173///
174/// ```
175/// use perfgate_stats::mean_and_variance;
176///
177/// let (mean, var) = mean_and_variance(&[1.0, 2.0, 3.0, 4.0, 5.0]).unwrap();
178/// assert!((mean - 3.0).abs() < 1e-10);
179/// assert!((var - 2.5).abs() < 1e-10);
180///
181/// // Single element: variance is 0
182/// let (mean, var) = mean_and_variance(&[42.0]).unwrap();
183/// assert_eq!(mean, 42.0);
184/// assert_eq!(var, 0.0);
185/// ```
186pub fn mean_and_variance(values: &[f64]) -> Option<(f64, f64)> {
187    if values.is_empty() {
188        return None;
189    }
190
191    // Welford's online one-pass algorithm for numerical stability
192    let mut n: u64 = 0;
193    let mut mean = 0.0_f64;
194    let mut m2 = 0.0_f64;
195
196    for &x in values {
197        n += 1;
198        let delta = x - mean;
199        mean += delta / n as f64;
200        let delta2 = x - mean;
201        m2 += delta * delta2;
202    }
203
204    let var = if n > 1 { m2 / (n as f64 - 1.0) } else { 0.0 };
205
206    if mean.is_finite() && var.is_finite() {
207        Some((mean, var.max(0.0)))
208    } else {
209        None
210    }
211}
212
213#[cfg(test)]
214mod tests {
215    use super::*;
216
217    #[test]
218    fn summarize_u64_empty_returns_error() {
219        let result = summarize_u64(&[]);
220        assert!(matches!(result, Err(StatsError::NoSamples)));
221    }
222
223    #[test]
224    fn summarize_f64_empty_returns_error() {
225        let result = summarize_f64(&[]);
226        assert!(matches!(result, Err(StatsError::NoSamples)));
227    }
228
229    #[test]
230    fn summarize_u64_single_element() {
231        let summary = summarize_u64(&[42]).unwrap();
232        assert_eq!(summary.median, 42);
233        assert_eq!(summary.min, 42);
234        assert_eq!(summary.max, 42);
235    }
236
237    #[test]
238    fn summarize_f64_single_element() {
239        let summary = summarize_f64(&[42.0]).unwrap();
240        assert_eq!(summary.median, 42.0);
241        assert_eq!(summary.min, 42.0);
242        assert_eq!(summary.max, 42.0);
243    }
244
245    #[test]
246    fn summarize_u64_two_elements() {
247        let summary = summarize_u64(&[10, 20]).unwrap();
248        assert_eq!(summary.median, 15);
249        assert_eq!(summary.min, 10);
250        assert_eq!(summary.max, 20);
251    }
252
253    #[test]
254    fn summarize_f64_two_elements() {
255        let summary = summarize_f64(&[10.0, 20.0]).unwrap();
256        assert_eq!(summary.median, 15.0);
257        assert_eq!(summary.min, 10.0);
258        assert_eq!(summary.max, 20.0);
259    }
260
261    #[test]
262    fn summarize_u64_odd_length() {
263        let summary = summarize_u64(&[10, 30, 20]).unwrap();
264        assert_eq!(summary.median, 20);
265        assert_eq!(summary.min, 10);
266        assert_eq!(summary.max, 30);
267    }
268
269    #[test]
270    fn summarize_f64_odd_length() {
271        let summary = summarize_f64(&[10.0, 30.0, 20.0]).unwrap();
272        assert_eq!(summary.median, 20.0);
273        assert_eq!(summary.min, 10.0);
274        assert_eq!(summary.max, 30.0);
275    }
276
277    #[test]
278    fn summarize_u64_even_length_median_rounds_down() {
279        let summary = summarize_u64(&[10, 20, 30, 40]).unwrap();
280        assert_eq!(summary.median, 25);
281    }
282
283    #[test]
284    fn summarize_u64_large_values_no_overflow() {
285        let values = [u64::MAX, u64::MAX - 1];
286        let summary = summarize_u64(&values).unwrap();
287        assert_eq!(summary.min, u64::MAX - 1);
288        assert_eq!(summary.max, u64::MAX);
289        assert_eq!(summary.median, u64::MAX - 1);
290    }
291
292    #[test]
293    fn percentile_empty_returns_none() {
294        assert!(percentile(vec![], 0.5).is_none());
295    }
296
297    #[test]
298    fn percentile_single_element() {
299        let p = percentile(vec![42.0], 0.5).unwrap();
300        assert_eq!(p, 42.0);
301    }
302
303    #[test]
304    fn percentile_zero_is_min() {
305        let p = percentile(vec![1.0, 2.0, 3.0, 4.0, 5.0], 0.0).unwrap();
306        assert_eq!(p, 1.0);
307    }
308
309    #[test]
310    fn percentile_one_is_max() {
311        let p = percentile(vec![1.0, 2.0, 3.0, 4.0, 5.0], 1.0).unwrap();
312        assert_eq!(p, 5.0);
313    }
314
315    #[test]
316    fn percentile_half_is_median_odd() {
317        let p = percentile(vec![1.0, 2.0, 3.0, 4.0, 5.0], 0.5).unwrap();
318        assert_eq!(p, 3.0);
319    }
320
321    #[test]
322    fn mean_and_variance_empty_returns_none() {
323        assert!(mean_and_variance(&[]).is_none());
324    }
325
326    #[test]
327    fn mean_and_variance_single_element() {
328        let (mean, var) = mean_and_variance(&[42.0]).unwrap();
329        assert_eq!(mean, 42.0);
330        assert_eq!(var, 0.0);
331    }
332
333    #[test]
334    fn mean_and_variance_basic() {
335        let (mean, var) = mean_and_variance(&[1.0, 2.0, 3.0, 4.0, 5.0]).unwrap();
336        assert!((mean - 3.0).abs() < 1e-10);
337        assert!((var - 2.5).abs() < 1e-10);
338    }
339}
340
341#[cfg(test)]
342mod property_tests {
343    use super::*;
344    use proptest::prelude::*;
345
346    fn expected_median_u64(sorted: &[u64]) -> u64 {
347        let n = sorted.len();
348        let mid = n / 2;
349        if n % 2 == 1 {
350            sorted[mid]
351        } else {
352            let a = sorted[mid - 1] as u128;
353            let b = sorted[mid] as u128;
354            ((a + b) / 2) as u64
355        }
356    }
357
358    fn finite_f64_strategy() -> impl Strategy<Value = f64> {
359        -1e100f64..1e100f64
360    }
361
362    fn large_u64_strategy() -> impl Strategy<Value = u64> {
363        let min_val = u64::MAX - (u64::MAX / 10);
364        min_val..=u64::MAX
365    }
366
367    proptest! {
368        #[test]
369        fn prop_summarize_u64_ordering(values in prop::collection::vec(any::<u64>(), 1..100)) {
370            let summary = summarize_u64(&values).expect("non-empty vec should succeed");
371            prop_assert!(summary.min <= summary.median);
372            prop_assert!(summary.median <= summary.max);
373        }
374
375        #[test]
376        fn prop_summarize_u64_correctness(values in prop::collection::vec(any::<u64>(), 1..100)) {
377            let summary = summarize_u64(&values).expect("non-empty vec should succeed");
378            let mut sorted = values.clone();
379            sorted.sort_unstable();
380            prop_assert_eq!(summary.min, *sorted.first().unwrap());
381            prop_assert_eq!(summary.max, *sorted.last().unwrap());
382            prop_assert_eq!(summary.median, expected_median_u64(&sorted));
383        }
384
385        #[test]
386        fn prop_summarize_u64_single_element(value: u64) {
387            let summary = summarize_u64(&[value]).unwrap();
388            prop_assert_eq!(summary.min, value);
389            prop_assert_eq!(summary.max, value);
390            prop_assert_eq!(summary.median, value);
391        }
392
393        #[test]
394        fn prop_summarize_f64_ordering(values in prop::collection::vec(finite_f64_strategy(), 1..100)) {
395            let summary = summarize_f64(&values).expect("non-empty vec should succeed");
396            prop_assert!(summary.min <= summary.median);
397            prop_assert!(summary.median <= summary.max);
398        }
399
400        #[test]
401        fn prop_median_u64_overflow_handling(values in prop::collection::vec(large_u64_strategy(), 2..50)) {
402            let summary = summarize_u64(&values).expect("non-empty vec should succeed");
403            let mut sorted = values.clone();
404            sorted.sort_unstable();
405            let expected = expected_median_u64(&sorted);
406            prop_assert_eq!(summary.median, expected);
407        }
408
409        #[test]
410        fn prop_percentile_bounds(values in prop::collection::vec(finite_f64_strategy(), 1..50)) {
411            let min_val = values.iter().cloned().fold(f64::INFINITY, f64::min);
412            let max_val = values.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
413            let p0 = percentile(values.clone(), 0.0).unwrap();
414            let p100 = percentile(values.clone(), 1.0).unwrap();
415            let p50 = percentile(values.clone(), 0.5).unwrap();
416            prop_assert!((p0 - min_val).abs() < f64::EPSILON);
417            prop_assert!((p100 - max_val).abs() < f64::EPSILON);
418            prop_assert!(p50 >= min_val && p50 <= max_val);
419        }
420
421        #[test]
422        fn prop_mean_and_variance_correctness(values in prop::collection::vec(finite_f64_strategy(), 1..50)) {
423            let result = mean_and_variance(&values);
424            prop_assert!(result.is_some());
425            let (mean, var) = result.unwrap();
426            let expected_mean: f64 = values.iter().sum::<f64>() / values.len() as f64;
427            let mean_tol = expected_mean.abs().max(1.0) * 1e-9;
428            prop_assert!((mean - expected_mean).abs() < mean_tol,
429                "mean diff {} exceeds tolerance {}", (mean - expected_mean).abs(), mean_tol);
430            if values.len() > 1 {
431                let expected_var: f64 = values.iter()
432                    .map(|v| (v - expected_mean).powi(2))
433                    .sum::<f64>() / (values.len() - 1) as f64;
434                let var_tol = expected_var.abs().max(1.0) * 1e-6;
435                prop_assert!((var - expected_var).abs() < var_tol,
436                    "var diff {} exceeds tolerance {}", (var - expected_var).abs(), var_tol);
437            } else {
438                prop_assert_eq!(var, 0.0);
439            }
440        }
441
442        #[test]
443        fn prop_mean_and_variance_finite(values in prop::collection::vec(finite_f64_strategy(), 1..50)) {
444            let (mean, var) = mean_and_variance(&values).unwrap();
445            prop_assert!(mean.is_finite());
446            prop_assert!(var.is_finite());
447            prop_assert!(var >= 0.0);
448        }
449
450        #[test]
451        fn prop_p95_gte_median(values in prop::collection::vec(finite_f64_strategy(), 2..100)) {
452            let p50 = percentile(values.clone(), 0.5).unwrap();
453            let p95 = percentile(values, 0.95).unwrap();
454            prop_assert!(p95 >= p50, "p95 ({}) should be >= median ({})", p95, p50);
455        }
456
457        #[test]
458        fn prop_mean_equals_sum_over_count(values in prop::collection::vec(finite_f64_strategy(), 1..50)) {
459            if let Some((mean, _)) = mean_and_variance(&values) {
460                let expected = values.iter().sum::<f64>() / values.len() as f64;
461                if expected.is_finite() {
462                    let tol = expected.abs().max(1.0) * 1e-10;
463                    prop_assert!((mean - expected).abs() < tol,
464                        "mean={}, expected={}, diff={}", mean, expected, (mean - expected).abs());
465                }
466            }
467        }
468
469        #[test]
470        fn prop_summarize_u64_preserves_input(values in prop::collection::vec(any::<u64>(), 1..100)) {
471            let summary = summarize_u64(&values).unwrap();
472            prop_assert_eq!(summary.min, *values.iter().min().unwrap());
473            prop_assert_eq!(summary.max, *values.iter().max().unwrap());
474        }
475    }
476}