Skip to main content

perfgate_stats/
lib.rs

1//! Statistical functions for benchmarking analysis.
2//!
3//! This crate provides pure statistical functions with no I/O dependencies.
4//! It is designed to be used by `perfgate-domain` and can be independently
5//! tested and versioned.
6//!
7//! # Overview
8//!
9//! The crate provides:
10//! - Summary statistics (median, min, max) for `u64` and `f64` slices
11//! - Percentile calculation
12//! - Mean and variance computation
13
14pub use perfgate_error::StatsError;
15
16use perfgate_types::{F64Summary, U64Summary};
17use std::cmp::Ordering;
18
19/// Compute min, max, and median for a `u64` slice.
20///
21/// # Errors
22///
23/// Returns [`StatsError::NoSamples`] if the slice is empty.
24///
25/// # Examples
26///
27/// ```
28/// use perfgate_stats::summarize_u64;
29///
30/// let s = summarize_u64(&[10, 30, 20]).unwrap();
31/// assert_eq!(s.median, 20);
32/// assert_eq!(s.min, 10);
33/// assert_eq!(s.max, 30);
34/// ```
35pub fn summarize_u64(values: &[u64]) -> Result<U64Summary, StatsError> {
36    if values.is_empty() {
37        return Err(StatsError::NoSamples);
38    }
39    let mut v = values.to_vec();
40    v.sort_unstable();
41    let min = *v.first().unwrap();
42    let max = *v.last().unwrap();
43    let median = median_u64_sorted(&v);
44
45    let f64_vals: Vec<f64> = values.iter().map(|&x| x as f64).collect();
46    let (mean, stddev) = if let Some((m, var)) = mean_and_variance(&f64_vals) {
47        (Some(m), Some(var.sqrt()))
48    } else {
49        (None, None)
50    };
51
52    Ok(U64Summary {
53        median,
54        min,
55        max,
56        mean,
57        stddev,
58    })
59}
60
61/// Compute min, max, and median for an `f64` slice.
62///
63/// # Errors
64///
65/// Returns [`StatsError::NoSamples`] if the slice is empty.
66///
67/// # Examples
68///
69/// ```
70/// use perfgate_stats::summarize_f64;
71///
72/// let s = summarize_f64(&[1.0, 3.0, 2.0]).unwrap();
73/// assert_eq!(s.median, 2.0);
74/// assert_eq!(s.min, 1.0);
75/// assert_eq!(s.max, 3.0);
76/// ```
77pub fn summarize_f64(values: &[f64]) -> Result<F64Summary, StatsError> {
78    if values.is_empty() {
79        return Err(StatsError::NoSamples);
80    }
81    let mut v = values.to_vec();
82    v.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
83    let min = *v.first().unwrap();
84    let max = *v.last().unwrap();
85    let median = median_f64_sorted(&v);
86
87    let (mean, stddev) = if let Some((m, var)) = mean_and_variance(values) {
88        (Some(m), Some(var.sqrt()))
89    } else {
90        (None, None)
91    };
92
93    Ok(F64Summary {
94        median,
95        min,
96        max,
97        mean,
98        stddev,
99    })
100}
101
102pub fn median_u64_sorted(sorted: &[u64]) -> u64 {
103    debug_assert!(!sorted.is_empty());
104    let n = sorted.len();
105    let mid = n / 2;
106    if n % 2 == 1 {
107        sorted[mid]
108    } else {
109        (sorted[mid - 1] / 2) + (sorted[mid] / 2) + ((sorted[mid - 1] % 2 + sorted[mid] % 2) / 2)
110    }
111}
112
113pub fn median_f64_sorted(sorted: &[f64]) -> f64 {
114    debug_assert!(!sorted.is_empty());
115    let n = sorted.len();
116    let mid = n / 2;
117    if n % 2 == 1 {
118        sorted[mid]
119    } else {
120        (sorted[mid - 1] + sorted[mid]) / 2.0
121    }
122}
123
124/// Compute the `q`-th percentile (0.0–1.0) using linear interpolation.
125///
126/// Returns `None` if `values` is empty.
127///
128/// # Examples
129///
130/// ```
131/// use perfgate_stats::percentile;
132///
133/// let p50 = percentile(vec![1.0, 2.0, 3.0, 4.0, 5.0], 0.5).unwrap();
134/// assert_eq!(p50, 3.0);
135///
136/// let p0 = percentile(vec![10.0, 20.0, 30.0], 0.0).unwrap();
137/// assert_eq!(p0, 10.0);
138/// ```
139pub fn percentile(mut values: Vec<f64>, q: f64) -> Option<f64> {
140    if values.is_empty() {
141        return None;
142    }
143
144    values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
145
146    if values.len() == 1 {
147        return Some(values[0]);
148    }
149
150    let rank = q.clamp(0.0, 1.0) * (values.len() as f64 - 1.0);
151    let lower = rank.floor() as usize;
152    let upper = rank.ceil() as usize;
153
154    if lower == upper {
155        return Some(values[lower]);
156    }
157
158    let weight = rank - lower as f64;
159    Some(values[lower] + (values[upper] - values[lower]) * weight)
160}
161
162/// Compute sample mean and unbiased variance (Welford's algorithm).
163///
164/// Returns `None` if `values` is empty or the result is non-finite.
165/// Variance uses Bessel's correction (nāˆ’1 denominator).
166///
167/// # Examples
168///
169/// ```
170/// use perfgate_stats::mean_and_variance;
171///
172/// let (mean, var) = mean_and_variance(&[1.0, 2.0, 3.0, 4.0, 5.0]).unwrap();
173/// assert!((mean - 3.0).abs() < 1e-10);
174/// assert!((var - 2.5).abs() < 1e-10);
175///
176/// // Single element: variance is 0
177/// let (mean, var) = mean_and_variance(&[42.0]).unwrap();
178/// assert_eq!(mean, 42.0);
179/// assert_eq!(var, 0.0);
180/// ```
181pub fn mean_and_variance(values: &[f64]) -> Option<(f64, f64)> {
182    if values.is_empty() {
183        return None;
184    }
185
186    // Welford's online one-pass algorithm for numerical stability
187    let mut n: u64 = 0;
188    let mut mean = 0.0_f64;
189    let mut m2 = 0.0_f64;
190
191    for &x in values {
192        n += 1;
193        let delta = x - mean;
194        mean += delta / n as f64;
195        let delta2 = x - mean;
196        m2 += delta * delta2;
197    }
198
199    let var = if n > 1 { m2 / (n as f64 - 1.0) } else { 0.0 };
200
201    if mean.is_finite() && var.is_finite() {
202        Some((mean, var.max(0.0)))
203    } else {
204        None
205    }
206}
207
208#[cfg(test)]
209mod tests {
210    use super::*;
211
212    #[test]
213    fn summarize_u64_empty_returns_error() {
214        let result = summarize_u64(&[]);
215        assert!(matches!(result, Err(StatsError::NoSamples)));
216    }
217
218    #[test]
219    fn summarize_f64_empty_returns_error() {
220        let result = summarize_f64(&[]);
221        assert!(matches!(result, Err(StatsError::NoSamples)));
222    }
223
224    #[test]
225    fn summarize_u64_single_element() {
226        let summary = summarize_u64(&[42]).unwrap();
227        assert_eq!(summary.median, 42);
228        assert_eq!(summary.min, 42);
229        assert_eq!(summary.max, 42);
230    }
231
232    #[test]
233    fn summarize_f64_single_element() {
234        let summary = summarize_f64(&[42.0]).unwrap();
235        assert_eq!(summary.median, 42.0);
236        assert_eq!(summary.min, 42.0);
237        assert_eq!(summary.max, 42.0);
238    }
239
240    #[test]
241    fn summarize_u64_two_elements() {
242        let summary = summarize_u64(&[10, 20]).unwrap();
243        assert_eq!(summary.median, 15);
244        assert_eq!(summary.min, 10);
245        assert_eq!(summary.max, 20);
246    }
247
248    #[test]
249    fn summarize_f64_two_elements() {
250        let summary = summarize_f64(&[10.0, 20.0]).unwrap();
251        assert_eq!(summary.median, 15.0);
252        assert_eq!(summary.min, 10.0);
253        assert_eq!(summary.max, 20.0);
254    }
255
256    #[test]
257    fn summarize_u64_odd_length() {
258        let summary = summarize_u64(&[10, 30, 20]).unwrap();
259        assert_eq!(summary.median, 20);
260        assert_eq!(summary.min, 10);
261        assert_eq!(summary.max, 30);
262    }
263
264    #[test]
265    fn summarize_f64_odd_length() {
266        let summary = summarize_f64(&[10.0, 30.0, 20.0]).unwrap();
267        assert_eq!(summary.median, 20.0);
268        assert_eq!(summary.min, 10.0);
269        assert_eq!(summary.max, 30.0);
270    }
271
272    #[test]
273    fn summarize_u64_even_length_median_rounds_down() {
274        let summary = summarize_u64(&[10, 20, 30, 40]).unwrap();
275        assert_eq!(summary.median, 25);
276    }
277
278    #[test]
279    fn summarize_u64_large_values_no_overflow() {
280        let values = [u64::MAX, u64::MAX - 1];
281        let summary = summarize_u64(&values).unwrap();
282        assert_eq!(summary.min, u64::MAX - 1);
283        assert_eq!(summary.max, u64::MAX);
284        assert_eq!(summary.median, u64::MAX - 1);
285    }
286
287    #[test]
288    fn percentile_empty_returns_none() {
289        assert!(percentile(vec![], 0.5).is_none());
290    }
291
292    #[test]
293    fn percentile_single_element() {
294        let p = percentile(vec![42.0], 0.5).unwrap();
295        assert_eq!(p, 42.0);
296    }
297
298    #[test]
299    fn percentile_zero_is_min() {
300        let p = percentile(vec![1.0, 2.0, 3.0, 4.0, 5.0], 0.0).unwrap();
301        assert_eq!(p, 1.0);
302    }
303
304    #[test]
305    fn percentile_one_is_max() {
306        let p = percentile(vec![1.0, 2.0, 3.0, 4.0, 5.0], 1.0).unwrap();
307        assert_eq!(p, 5.0);
308    }
309
310    #[test]
311    fn percentile_half_is_median_odd() {
312        let p = percentile(vec![1.0, 2.0, 3.0, 4.0, 5.0], 0.5).unwrap();
313        assert_eq!(p, 3.0);
314    }
315
316    #[test]
317    fn mean_and_variance_empty_returns_none() {
318        assert!(mean_and_variance(&[]).is_none());
319    }
320
321    #[test]
322    fn mean_and_variance_single_element() {
323        let (mean, var) = mean_and_variance(&[42.0]).unwrap();
324        assert_eq!(mean, 42.0);
325        assert_eq!(var, 0.0);
326    }
327
328    #[test]
329    fn mean_and_variance_basic() {
330        let (mean, var) = mean_and_variance(&[1.0, 2.0, 3.0, 4.0, 5.0]).unwrap();
331        assert!((mean - 3.0).abs() < 1e-10);
332        assert!((var - 2.5).abs() < 1e-10);
333    }
334}
335
336#[cfg(test)]
337mod property_tests {
338    use super::*;
339    use proptest::prelude::*;
340
341    fn expected_median_u64(sorted: &[u64]) -> u64 {
342        let n = sorted.len();
343        let mid = n / 2;
344        if n % 2 == 1 {
345            sorted[mid]
346        } else {
347            let a = sorted[mid - 1] as u128;
348            let b = sorted[mid] as u128;
349            ((a + b) / 2) as u64
350        }
351    }
352
353    fn finite_f64_strategy() -> impl Strategy<Value = f64> {
354        -1e100f64..1e100f64
355    }
356
357    fn large_u64_strategy() -> impl Strategy<Value = u64> {
358        let min_val = u64::MAX - (u64::MAX / 10);
359        min_val..=u64::MAX
360    }
361
362    proptest! {
363        #[test]
364        fn prop_summarize_u64_ordering(values in prop::collection::vec(any::<u64>(), 1..100)) {
365            let summary = summarize_u64(&values).expect("non-empty vec should succeed");
366            prop_assert!(summary.min <= summary.median);
367            prop_assert!(summary.median <= summary.max);
368        }
369
370        #[test]
371        fn prop_summarize_u64_correctness(values in prop::collection::vec(any::<u64>(), 1..100)) {
372            let summary = summarize_u64(&values).expect("non-empty vec should succeed");
373            let mut sorted = values.clone();
374            sorted.sort_unstable();
375            prop_assert_eq!(summary.min, *sorted.first().unwrap());
376            prop_assert_eq!(summary.max, *sorted.last().unwrap());
377            prop_assert_eq!(summary.median, expected_median_u64(&sorted));
378        }
379
380        #[test]
381        fn prop_summarize_u64_single_element(value: u64) {
382            let summary = summarize_u64(&[value]).unwrap();
383            prop_assert_eq!(summary.min, value);
384            prop_assert_eq!(summary.max, value);
385            prop_assert_eq!(summary.median, value);
386        }
387
388        #[test]
389        fn prop_summarize_f64_ordering(values in prop::collection::vec(finite_f64_strategy(), 1..100)) {
390            let summary = summarize_f64(&values).expect("non-empty vec should succeed");
391            prop_assert!(summary.min <= summary.median);
392            prop_assert!(summary.median <= summary.max);
393        }
394
395        #[test]
396        fn prop_median_u64_overflow_handling(values in prop::collection::vec(large_u64_strategy(), 2..50)) {
397            let summary = summarize_u64(&values).expect("non-empty vec should succeed");
398            let mut sorted = values.clone();
399            sorted.sort_unstable();
400            let expected = expected_median_u64(&sorted);
401            prop_assert_eq!(summary.median, expected);
402        }
403
404        #[test]
405        fn prop_percentile_bounds(values in prop::collection::vec(finite_f64_strategy(), 1..50)) {
406            let min_val = values.iter().cloned().fold(f64::INFINITY, f64::min);
407            let max_val = values.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
408            let p0 = percentile(values.clone(), 0.0).unwrap();
409            let p100 = percentile(values.clone(), 1.0).unwrap();
410            let p50 = percentile(values.clone(), 0.5).unwrap();
411            prop_assert!((p0 - min_val).abs() < f64::EPSILON);
412            prop_assert!((p100 - max_val).abs() < f64::EPSILON);
413            prop_assert!(p50 >= min_val && p50 <= max_val);
414        }
415
416        #[test]
417        fn prop_mean_and_variance_correctness(values in prop::collection::vec(finite_f64_strategy(), 1..50)) {
418            let result = mean_and_variance(&values);
419            prop_assert!(result.is_some());
420            let (mean, var) = result.unwrap();
421            let expected_mean: f64 = values.iter().sum::<f64>() / values.len() as f64;
422            let mean_tol = expected_mean.abs().max(1.0) * 1e-9;
423            prop_assert!((mean - expected_mean).abs() < mean_tol,
424                "mean diff {} exceeds tolerance {}", (mean - expected_mean).abs(), mean_tol);
425            if values.len() > 1 {
426                let expected_var: f64 = values.iter()
427                    .map(|v| (v - expected_mean).powi(2))
428                    .sum::<f64>() / (values.len() - 1) as f64;
429                let var_tol = expected_var.abs().max(1.0) * 1e-6;
430                prop_assert!((var - expected_var).abs() < var_tol,
431                    "var diff {} exceeds tolerance {}", (var - expected_var).abs(), var_tol);
432            } else {
433                prop_assert_eq!(var, 0.0);
434            }
435        }
436
437        #[test]
438        fn prop_mean_and_variance_finite(values in prop::collection::vec(finite_f64_strategy(), 1..50)) {
439            let (mean, var) = mean_and_variance(&values).unwrap();
440            prop_assert!(mean.is_finite());
441            prop_assert!(var.is_finite());
442            prop_assert!(var >= 0.0);
443        }
444
445        #[test]
446        fn prop_p95_gte_median(values in prop::collection::vec(finite_f64_strategy(), 2..100)) {
447            let p50 = percentile(values.clone(), 0.5).unwrap();
448            let p95 = percentile(values, 0.95).unwrap();
449            prop_assert!(p95 >= p50, "p95 ({}) should be >= median ({})", p95, p50);
450        }
451
452        #[test]
453        fn prop_mean_equals_sum_over_count(values in prop::collection::vec(finite_f64_strategy(), 1..50)) {
454            if let Some((mean, _)) = mean_and_variance(&values) {
455                let expected = values.iter().sum::<f64>() / values.len() as f64;
456                if expected.is_finite() {
457                    let tol = expected.abs().max(1.0) * 1e-10;
458                    prop_assert!((mean - expected).abs() < tol,
459                        "mean={}, expected={}, diff={}", mean, expected, (mean - expected).abs());
460                }
461            }
462        }
463
464        #[test]
465        fn prop_summarize_u64_preserves_input(values in prop::collection::vec(any::<u64>(), 1..100)) {
466            let summary = summarize_u64(&values).unwrap();
467            prop_assert_eq!(summary.min, *values.iter().min().unwrap());
468            prop_assert_eq!(summary.max, *values.iter().max().unwrap());
469        }
470    }
471}