Skip to main content

perfgate_stats/
lib.rs

1//! Statistical functions for benchmarking analysis.
2//!
3//! This crate provides pure statistical functions with no I/O dependencies.
4//! It is designed to be used by `perfgate-domain` and can be independently
5//! tested and versioned.
6//!
7//! # Overview
8//!
9//! The crate provides:
10//! - Summary statistics (median, min, max) for `u64` and `f64` slices
11//! - Percentile calculation
12//! - Mean and variance computation
13
14pub use perfgate_error::StatsError;
15
16use perfgate_types::{F64Summary, U64Summary};
17use std::cmp::Ordering;
18
19/// Compute min, max, and median for a `u64` slice.
20///
21/// # Errors
22///
23/// Returns [`StatsError::NoSamples`] if the slice is empty.
24///
25/// # Examples
26///
27/// ```
28/// use perfgate_stats::summarize_u64;
29///
30/// let s = summarize_u64(&[10, 30, 20]).unwrap();
31/// assert_eq!(s.median, 20);
32/// assert_eq!(s.min, 10);
33/// assert_eq!(s.max, 30);
34/// ```
35pub fn summarize_u64(values: &[u64]) -> Result<U64Summary, StatsError> {
36    if values.is_empty() {
37        return Err(StatsError::NoSamples);
38    }
39    let mut v = values.to_vec();
40    v.sort_unstable();
41    let min = *v.first().unwrap();
42    let max = *v.last().unwrap();
43    let median = median_u64_sorted(&v);
44    Ok(U64Summary { median, min, max })
45}
46
47/// Compute min, max, and median for an `f64` slice.
48///
49/// # Errors
50///
51/// Returns [`StatsError::NoSamples`] if the slice is empty.
52///
53/// # Examples
54///
55/// ```
56/// use perfgate_stats::summarize_f64;
57///
58/// let s = summarize_f64(&[1.0, 3.0, 2.0]).unwrap();
59/// assert_eq!(s.median, 2.0);
60/// assert_eq!(s.min, 1.0);
61/// assert_eq!(s.max, 3.0);
62/// ```
63pub fn summarize_f64(values: &[f64]) -> Result<F64Summary, StatsError> {
64    if values.is_empty() {
65        return Err(StatsError::NoSamples);
66    }
67    let mut v = values.to_vec();
68    v.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
69    let min = *v.first().unwrap();
70    let max = *v.last().unwrap();
71    let median = median_f64_sorted(&v);
72    Ok(F64Summary { median, min, max })
73}
74
75pub fn median_u64_sorted(sorted: &[u64]) -> u64 {
76    debug_assert!(!sorted.is_empty());
77    let n = sorted.len();
78    let mid = n / 2;
79    if n % 2 == 1 {
80        sorted[mid]
81    } else {
82        (sorted[mid - 1] / 2) + (sorted[mid] / 2) + ((sorted[mid - 1] % 2 + sorted[mid] % 2) / 2)
83    }
84}
85
86pub fn median_f64_sorted(sorted: &[f64]) -> f64 {
87    debug_assert!(!sorted.is_empty());
88    let n = sorted.len();
89    let mid = n / 2;
90    if n % 2 == 1 {
91        sorted[mid]
92    } else {
93        (sorted[mid - 1] + sorted[mid]) / 2.0
94    }
95}
96
97/// Compute the `q`-th percentile (0.0–1.0) using linear interpolation.
98///
99/// Returns `None` if `values` is empty.
100///
101/// # Examples
102///
103/// ```
104/// use perfgate_stats::percentile;
105///
106/// let p50 = percentile(vec![1.0, 2.0, 3.0, 4.0, 5.0], 0.5).unwrap();
107/// assert_eq!(p50, 3.0);
108///
109/// let p0 = percentile(vec![10.0, 20.0, 30.0], 0.0).unwrap();
110/// assert_eq!(p0, 10.0);
111/// ```
112pub fn percentile(mut values: Vec<f64>, q: f64) -> Option<f64> {
113    if values.is_empty() {
114        return None;
115    }
116
117    values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
118
119    if values.len() == 1 {
120        return Some(values[0]);
121    }
122
123    let rank = q.clamp(0.0, 1.0) * (values.len() as f64 - 1.0);
124    let lower = rank.floor() as usize;
125    let upper = rank.ceil() as usize;
126
127    if lower == upper {
128        return Some(values[lower]);
129    }
130
131    let weight = rank - lower as f64;
132    Some(values[lower] + (values[upper] - values[lower]) * weight)
133}
134
135/// Compute sample mean and unbiased variance (Welford's algorithm).
136///
137/// Returns `None` if `values` is empty or the result is non-finite.
138/// Variance uses Bessel's correction (nāˆ’1 denominator).
139///
140/// # Examples
141///
142/// ```
143/// use perfgate_stats::mean_and_variance;
144///
145/// let (mean, var) = mean_and_variance(&[1.0, 2.0, 3.0, 4.0, 5.0]).unwrap();
146/// assert!((mean - 3.0).abs() < 1e-10);
147/// assert!((var - 2.5).abs() < 1e-10);
148///
149/// // Single element: variance is 0
150/// let (mean, var) = mean_and_variance(&[42.0]).unwrap();
151/// assert_eq!(mean, 42.0);
152/// assert_eq!(var, 0.0);
153/// ```
154pub fn mean_and_variance(values: &[f64]) -> Option<(f64, f64)> {
155    if values.is_empty() {
156        return None;
157    }
158
159    // Welford's online one-pass algorithm for numerical stability
160    let mut n: u64 = 0;
161    let mut mean = 0.0_f64;
162    let mut m2 = 0.0_f64;
163
164    for &x in values {
165        n += 1;
166        let delta = x - mean;
167        mean += delta / n as f64;
168        let delta2 = x - mean;
169        m2 += delta * delta2;
170    }
171
172    let var = if n > 1 { m2 / (n as f64 - 1.0) } else { 0.0 };
173
174    if mean.is_finite() && var.is_finite() {
175        Some((mean, var.max(0.0)))
176    } else {
177        None
178    }
179}
180
181#[cfg(test)]
182mod tests {
183    use super::*;
184
185    #[test]
186    fn summarize_u64_empty_returns_error() {
187        let result = summarize_u64(&[]);
188        assert!(matches!(result, Err(StatsError::NoSamples)));
189    }
190
191    #[test]
192    fn summarize_f64_empty_returns_error() {
193        let result = summarize_f64(&[]);
194        assert!(matches!(result, Err(StatsError::NoSamples)));
195    }
196
197    #[test]
198    fn summarize_u64_single_element() {
199        let summary = summarize_u64(&[42]).unwrap();
200        assert_eq!(summary.median, 42);
201        assert_eq!(summary.min, 42);
202        assert_eq!(summary.max, 42);
203    }
204
205    #[test]
206    fn summarize_f64_single_element() {
207        let summary = summarize_f64(&[42.0]).unwrap();
208        assert_eq!(summary.median, 42.0);
209        assert_eq!(summary.min, 42.0);
210        assert_eq!(summary.max, 42.0);
211    }
212
213    #[test]
214    fn summarize_u64_two_elements() {
215        let summary = summarize_u64(&[10, 20]).unwrap();
216        assert_eq!(summary.median, 15);
217        assert_eq!(summary.min, 10);
218        assert_eq!(summary.max, 20);
219    }
220
221    #[test]
222    fn summarize_f64_two_elements() {
223        let summary = summarize_f64(&[10.0, 20.0]).unwrap();
224        assert_eq!(summary.median, 15.0);
225        assert_eq!(summary.min, 10.0);
226        assert_eq!(summary.max, 20.0);
227    }
228
229    #[test]
230    fn summarize_u64_odd_length() {
231        let summary = summarize_u64(&[10, 30, 20]).unwrap();
232        assert_eq!(summary.median, 20);
233        assert_eq!(summary.min, 10);
234        assert_eq!(summary.max, 30);
235    }
236
237    #[test]
238    fn summarize_f64_odd_length() {
239        let summary = summarize_f64(&[10.0, 30.0, 20.0]).unwrap();
240        assert_eq!(summary.median, 20.0);
241        assert_eq!(summary.min, 10.0);
242        assert_eq!(summary.max, 30.0);
243    }
244
245    #[test]
246    fn summarize_u64_even_length_median_rounds_down() {
247        let summary = summarize_u64(&[10, 20, 30, 40]).unwrap();
248        assert_eq!(summary.median, 25);
249    }
250
251    #[test]
252    fn summarize_u64_large_values_no_overflow() {
253        let values = [u64::MAX, u64::MAX - 1];
254        let summary = summarize_u64(&values).unwrap();
255        assert_eq!(summary.min, u64::MAX - 1);
256        assert_eq!(summary.max, u64::MAX);
257        assert_eq!(summary.median, u64::MAX - 1);
258    }
259
260    #[test]
261    fn percentile_empty_returns_none() {
262        assert!(percentile(vec![], 0.5).is_none());
263    }
264
265    #[test]
266    fn percentile_single_element() {
267        let p = percentile(vec![42.0], 0.5).unwrap();
268        assert_eq!(p, 42.0);
269    }
270
271    #[test]
272    fn percentile_zero_is_min() {
273        let p = percentile(vec![1.0, 2.0, 3.0, 4.0, 5.0], 0.0).unwrap();
274        assert_eq!(p, 1.0);
275    }
276
277    #[test]
278    fn percentile_one_is_max() {
279        let p = percentile(vec![1.0, 2.0, 3.0, 4.0, 5.0], 1.0).unwrap();
280        assert_eq!(p, 5.0);
281    }
282
283    #[test]
284    fn percentile_half_is_median_odd() {
285        let p = percentile(vec![1.0, 2.0, 3.0, 4.0, 5.0], 0.5).unwrap();
286        assert_eq!(p, 3.0);
287    }
288
289    #[test]
290    fn mean_and_variance_empty_returns_none() {
291        assert!(mean_and_variance(&[]).is_none());
292    }
293
294    #[test]
295    fn mean_and_variance_single_element() {
296        let (mean, var) = mean_and_variance(&[42.0]).unwrap();
297        assert_eq!(mean, 42.0);
298        assert_eq!(var, 0.0);
299    }
300
301    #[test]
302    fn mean_and_variance_basic() {
303        let (mean, var) = mean_and_variance(&[1.0, 2.0, 3.0, 4.0, 5.0]).unwrap();
304        assert!((mean - 3.0).abs() < 1e-10);
305        assert!((var - 2.5).abs() < 1e-10);
306    }
307}
308
309#[cfg(test)]
310mod property_tests {
311    use super::*;
312    use proptest::prelude::*;
313
314    fn expected_median_u64(sorted: &[u64]) -> u64 {
315        let n = sorted.len();
316        let mid = n / 2;
317        if n % 2 == 1 {
318            sorted[mid]
319        } else {
320            let a = sorted[mid - 1] as u128;
321            let b = sorted[mid] as u128;
322            ((a + b) / 2) as u64
323        }
324    }
325
326    fn finite_f64_strategy() -> impl Strategy<Value = f64> {
327        -1e100f64..1e100f64
328    }
329
330    fn large_u64_strategy() -> impl Strategy<Value = u64> {
331        let min_val = u64::MAX - (u64::MAX / 10);
332        min_val..=u64::MAX
333    }
334
335    proptest! {
336        #[test]
337        fn prop_summarize_u64_ordering(values in prop::collection::vec(any::<u64>(), 1..100)) {
338            let summary = summarize_u64(&values).expect("non-empty vec should succeed");
339            prop_assert!(summary.min <= summary.median);
340            prop_assert!(summary.median <= summary.max);
341        }
342
343        #[test]
344        fn prop_summarize_u64_correctness(values in prop::collection::vec(any::<u64>(), 1..100)) {
345            let summary = summarize_u64(&values).expect("non-empty vec should succeed");
346            let mut sorted = values.clone();
347            sorted.sort_unstable();
348            prop_assert_eq!(summary.min, *sorted.first().unwrap());
349            prop_assert_eq!(summary.max, *sorted.last().unwrap());
350            prop_assert_eq!(summary.median, expected_median_u64(&sorted));
351        }
352
353        #[test]
354        fn prop_summarize_u64_single_element(value: u64) {
355            let summary = summarize_u64(&[value]).unwrap();
356            prop_assert_eq!(summary.min, value);
357            prop_assert_eq!(summary.max, value);
358            prop_assert_eq!(summary.median, value);
359        }
360
361        #[test]
362        fn prop_summarize_f64_ordering(values in prop::collection::vec(finite_f64_strategy(), 1..100)) {
363            let summary = summarize_f64(&values).expect("non-empty vec should succeed");
364            prop_assert!(summary.min <= summary.median);
365            prop_assert!(summary.median <= summary.max);
366        }
367
368        #[test]
369        fn prop_median_u64_overflow_handling(values in prop::collection::vec(large_u64_strategy(), 2..50)) {
370            let summary = summarize_u64(&values).expect("non-empty vec should succeed");
371            let mut sorted = values.clone();
372            sorted.sort_unstable();
373            let expected = expected_median_u64(&sorted);
374            prop_assert_eq!(summary.median, expected);
375        }
376
377        #[test]
378        fn prop_percentile_bounds(values in prop::collection::vec(finite_f64_strategy(), 1..50)) {
379            let min_val = values.iter().cloned().fold(f64::INFINITY, f64::min);
380            let max_val = values.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
381            let p0 = percentile(values.clone(), 0.0).unwrap();
382            let p100 = percentile(values.clone(), 1.0).unwrap();
383            let p50 = percentile(values.clone(), 0.5).unwrap();
384            prop_assert!((p0 - min_val).abs() < f64::EPSILON);
385            prop_assert!((p100 - max_val).abs() < f64::EPSILON);
386            prop_assert!(p50 >= min_val && p50 <= max_val);
387        }
388
389        #[test]
390        fn prop_mean_and_variance_correctness(values in prop::collection::vec(finite_f64_strategy(), 1..50)) {
391            let result = mean_and_variance(&values);
392            prop_assert!(result.is_some());
393            let (mean, var) = result.unwrap();
394            let expected_mean: f64 = values.iter().sum::<f64>() / values.len() as f64;
395            let mean_tol = expected_mean.abs().max(1.0) * 1e-9;
396            prop_assert!((mean - expected_mean).abs() < mean_tol,
397                "mean diff {} exceeds tolerance {}", (mean - expected_mean).abs(), mean_tol);
398            if values.len() > 1 {
399                let expected_var: f64 = values.iter()
400                    .map(|v| (v - expected_mean).powi(2))
401                    .sum::<f64>() / (values.len() - 1) as f64;
402                let var_tol = expected_var.abs().max(1.0) * 1e-6;
403                prop_assert!((var - expected_var).abs() < var_tol,
404                    "var diff {} exceeds tolerance {}", (var - expected_var).abs(), var_tol);
405            } else {
406                prop_assert_eq!(var, 0.0);
407            }
408        }
409
410        #[test]
411        fn prop_mean_and_variance_finite(values in prop::collection::vec(finite_f64_strategy(), 1..50)) {
412            let (mean, var) = mean_and_variance(&values).unwrap();
413            prop_assert!(mean.is_finite());
414            prop_assert!(var.is_finite());
415            prop_assert!(var >= 0.0);
416        }
417
418        #[test]
419        fn prop_p95_gte_median(values in prop::collection::vec(finite_f64_strategy(), 2..100)) {
420            let p50 = percentile(values.clone(), 0.5).unwrap();
421            let p95 = percentile(values, 0.95).unwrap();
422            prop_assert!(p95 >= p50, "p95 ({}) should be >= median ({})", p95, p50);
423        }
424
425        #[test]
426        fn prop_mean_equals_sum_over_count(values in prop::collection::vec(finite_f64_strategy(), 1..50)) {
427            if let Some((mean, _)) = mean_and_variance(&values) {
428                let expected = values.iter().sum::<f64>() / values.len() as f64;
429                if expected.is_finite() {
430                    let tol = expected.abs().max(1.0) * 1e-10;
431                    prop_assert!((mean - expected).abs() < tol,
432                        "mean={}, expected={}, diff={}", mean, expected, (mean - expected).abs());
433                }
434            }
435        }
436
437        #[test]
438        fn prop_summarize_u64_preserves_input(values in prop::collection::vec(any::<u64>(), 1..100)) {
439            let summary = summarize_u64(&values).unwrap();
440            prop_assert_eq!(summary.min, *values.iter().min().unwrap());
441            prop_assert_eq!(summary.max, *values.iter().max().unwrap());
442        }
443    }
444}