u_numflow/
stats.rs

1//! Descriptive statistics with numerical stability guarantees.
2//!
3//! All functions in this module handle edge cases explicitly and use
4//! numerically stable algorithms to avoid catastrophic cancellation.
5//!
6//! # Algorithms
7//!
8//! - **Mean**: Kahan compensated summation for O(ε) error independent of n.
9//! - **Variance/StdDev**: Welford's online algorithm.
10//!   Reference: Welford (1962), "Note on a Method for Calculating
11//!   Corrected Sums of Squares and Products", *Technometrics* 4(3).
12//! - **Quantile**: R-7 linear interpolation (default in R, Python, Excel).
13//!   Reference: Hyndman & Fan (1996), "Sample Quantiles in Statistical
14//!   Packages", *The American Statistician* 50(4).
15
16/// Computes the arithmetic mean using Kahan compensated summation.
17///
18/// # Algorithm
19/// Kahan summation accumulates a compensation term to recover lost
20/// low-order bits, achieving O(ε) total error independent of `n`.
21///
22/// # Complexity
23/// Time: O(n), Space: O(1)
24///
25/// # Returns
26/// - `None` if `data` is empty or contains any NaN/Inf.
27///
28/// # Examples
29/// ```
30/// use u_numflow::stats::mean;
31/// let v = [1.0, 2.0, 3.0, 4.0, 5.0];
32/// assert!((mean(&v).unwrap() - 3.0).abs() < 1e-15);
33/// ```
34pub fn mean(data: &[f64]) -> Option<f64> {
35    if data.is_empty() {
36        return None;
37    }
38    if !data.iter().all(|x| x.is_finite()) {
39        return None;
40    }
41    Some(kahan_sum(data) / data.len() as f64)
42}
43
44/// Computes the sample variance using Welford's online algorithm.
45///
46/// Returns the **sample** (unbiased) variance with Bessel's correction
47/// (denominator `n − 1`).
48///
49/// # Algorithm
50/// Welford's method maintains a running mean and sum of squared deviations,
51/// avoiding catastrophic cancellation inherent in the naive formula
52/// `Var = E[X²] − (E[X])²`.
53///
54/// Reference: Welford (1962), *Technometrics* 4(3), pp. 419–420.
55///
56/// # Complexity
57/// Time: O(n), Space: O(1)
58///
59/// # Returns
60/// - `None` if `data.len() < 2` or contains NaN/Inf.
61///
62/// # Examples
63/// ```
64/// use u_numflow::stats::variance;
65/// let v = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
66/// assert!((variance(&v).unwrap() - 4.571428571428571).abs() < 1e-10);
67/// ```
68pub fn variance(data: &[f64]) -> Option<f64> {
69    if data.len() < 2 {
70        return None;
71    }
72    if !data.iter().all(|x| x.is_finite()) {
73        return None;
74    }
75    let mut acc = WelfordAccumulator::new();
76    for &x in data {
77        acc.update(x);
78    }
79    acc.sample_variance()
80}
81
82/// Computes the population variance using Welford's online algorithm.
83///
84/// Returns the **population** variance (denominator `n`).
85///
86/// # Returns
87/// - `None` if `data` is empty or contains NaN/Inf.
88///
89/// # Examples
90/// ```
91/// use u_numflow::stats::population_variance;
92/// let v = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
93/// assert!((population_variance(&v).unwrap() - 4.0).abs() < 1e-10);
94/// ```
95pub fn population_variance(data: &[f64]) -> Option<f64> {
96    if data.is_empty() {
97        return None;
98    }
99    if !data.iter().all(|x| x.is_finite()) {
100        return None;
101    }
102    let mut acc = WelfordAccumulator::new();
103    for &x in data {
104        acc.update(x);
105    }
106    acc.population_variance()
107}
108
109/// Computes the sample standard deviation.
110///
111/// Equivalent to `sqrt(variance(data))`.
112///
113/// # Returns
114/// - `None` if `data.len() < 2` or contains NaN/Inf.
115///
116/// # Examples
117/// ```
118/// use u_numflow::stats::std_dev;
119/// let v = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
120/// let sd = std_dev(&v).unwrap();
121/// assert!((sd - 2.138089935299395).abs() < 1e-10);
122/// ```
123pub fn std_dev(data: &[f64]) -> Option<f64> {
124    variance(data).map(f64::sqrt)
125}
126
127/// Computes the population standard deviation.
128///
129/// Equivalent to `sqrt(population_variance(data))`.
130///
131/// # Returns
132/// - `None` if `data` is empty or contains NaN/Inf.
133pub fn population_std_dev(data: &[f64]) -> Option<f64> {
134    population_variance(data).map(f64::sqrt)
135}
136
137/// Returns the minimum value in the slice.
138///
139/// # Returns
140/// - `None` if `data` is empty or contains NaN.
141///
142/// # Examples
143/// ```
144/// use u_numflow::stats::min;
145/// assert_eq!(min(&[3.0, 1.0, 4.0, 1.0, 5.0]), Some(1.0));
146/// ```
147pub fn min(data: &[f64]) -> Option<f64> {
148    if data.is_empty() {
149        return None;
150    }
151    data.iter().copied().try_fold(f64::INFINITY, |acc, x| {
152        if x.is_nan() {
153            None
154        } else {
155            Some(acc.min(x))
156        }
157    })
158}
159
160/// Returns the maximum value in the slice.
161///
162/// # Returns
163/// - `None` if `data` is empty or contains NaN.
164///
165/// # Examples
166/// ```
167/// use u_numflow::stats::max;
168/// assert_eq!(max(&[3.0, 1.0, 4.0, 1.0, 5.0]), Some(5.0));
169/// ```
170pub fn max(data: &[f64]) -> Option<f64> {
171    if data.is_empty() {
172        return None;
173    }
174    data.iter().copied().try_fold(f64::NEG_INFINITY, |acc, x| {
175        if x.is_nan() {
176            None
177        } else {
178            Some(acc.max(x))
179        }
180    })
181}
182
183/// Computes the median of `data` without mutating the input.
184///
185/// Internally clones and sorts the data, then returns the middle element
186/// (or the average of the two middle elements for even-length data).
187///
188/// # Complexity
189/// Time: O(n log n), Space: O(n)
190///
191/// # Returns
192/// - `None` if `data` is empty or contains NaN.
193///
194/// # Examples
195/// ```
196/// use u_numflow::stats::median;
197/// assert_eq!(median(&[3.0, 1.0, 2.0]), Some(2.0));
198/// assert_eq!(median(&[4.0, 1.0, 3.0, 2.0]), Some(2.5));
199/// ```
200pub fn median(data: &[f64]) -> Option<f64> {
201    if data.is_empty() {
202        return None;
203    }
204    if data.iter().any(|x| x.is_nan()) {
205        return None;
206    }
207    let mut sorted = data.to_vec();
208    sorted.sort_unstable_by(|a, b| a.partial_cmp(b).expect("NaN filtered above"));
209    let n = sorted.len();
210    if n % 2 == 1 {
211        Some(sorted[n / 2])
212    } else {
213        Some((sorted[n / 2 - 1] + sorted[n / 2]) / 2.0)
214    }
215}
216
217/// Computes the `p`-th quantile using the R-7 linear interpolation method.
218///
219/// This is the default quantile method in R, NumPy, and Excel.
220///
221/// # Algorithm
222/// For sorted data `x[0..n]` and quantile `p ∈ [0, 1]`:
223/// 1. Compute `h = (n − 1) × p`
224/// 2. Let `j = ⌊h⌋` and `g = h − j`
225/// 3. Return `(1 − g) × x[j] + g × x[j+1]`
226///
227/// Reference: Hyndman & Fan (1996), *The American Statistician* 50(4), pp. 361–365.
228///
229/// # Complexity
230/// Time: O(n log n) (dominated by sort), Space: O(n)
231///
232/// # Panics
233/// Does not panic; returns `None` for invalid inputs.
234///
235/// # Returns
236/// - `None` if `data` is empty, `p` is outside `[0, 1]`, or data contains NaN.
237///
238/// # Examples
239/// ```
240/// use u_numflow::stats::quantile;
241/// let data = [1.0, 2.0, 3.0, 4.0, 5.0];
242/// assert_eq!(quantile(&data, 0.0), Some(1.0));
243/// assert_eq!(quantile(&data, 1.0), Some(5.0));
244/// assert_eq!(quantile(&data, 0.5), Some(3.0));
245/// ```
246pub fn quantile(data: &[f64], p: f64) -> Option<f64> {
247    if data.is_empty() || !(0.0..=1.0).contains(&p) {
248        return None;
249    }
250    if data.iter().any(|x| x.is_nan()) {
251        return None;
252    }
253    let mut sorted = data.to_vec();
254    sorted.sort_unstable_by(|a, b| a.partial_cmp(b).expect("NaN filtered above"));
255    quantile_sorted(&sorted, p)
256}
257
258/// Computes the `p`-th quantile on **pre-sorted** data (R-7 method).
259///
260/// This avoids the O(n log n) sort when calling multiple quantiles on
261/// the same dataset. The caller must guarantee that `sorted_data` is
262/// sorted in non-decreasing order.
263///
264/// # Returns
265/// - `None` if `sorted_data` is empty or `p` is outside `[0, 1]`.
266pub fn quantile_sorted(sorted_data: &[f64], p: f64) -> Option<f64> {
267    let n = sorted_data.len();
268    if n == 0 || !(0.0..=1.0).contains(&p) {
269        return None;
270    }
271    if n == 1 {
272        return Some(sorted_data[0]);
273    }
274
275    let h = (n - 1) as f64 * p;
276    let j = h.floor() as usize;
277    let g = h - h.floor();
278
279    if j + 1 >= n {
280        Some(sorted_data[n - 1])
281    } else {
282        Some((1.0 - g) * sorted_data[j] + g * sorted_data[j + 1])
283    }
284}
285
286// ---------------------------------------------------------------------------
287// Kahan compensated summation
288// ---------------------------------------------------------------------------
289
290/// Neumaier compensated summation for O(ε) error independent of `n`.
291///
292/// This is an improved variant of Kahan summation that also handles the
293/// case where the addend is larger in magnitude than the running sum.
294///
295/// # Algorithm
296/// Maintains a running compensation variable `c`. At each step, the
297/// branch ensures the smaller operand's low-order bits are captured.
298///
299/// Reference: Neumaier (1974), "Rundungsfehleranalyse einiger Verfahren
300/// zur Summation endlicher Summen", *Zeitschrift für Angewandte
301/// Mathematik und Mechanik* 54(1), pp. 39–51.
302///
303/// # Complexity
304/// Time: O(n), Space: O(1)
305pub fn kahan_sum(data: &[f64]) -> f64 {
306    let mut sum = 0.0_f64;
307    let mut c = 0.0_f64;
308    for &x in data {
309        let t = sum + x;
310        if sum.abs() >= x.abs() {
311            c += (sum - t) + x;
312        } else {
313            c += (x - t) + sum;
314        }
315        sum = t;
316    }
317    sum + c
318}
319
320// ---------------------------------------------------------------------------
321// Welford online accumulator
322// ---------------------------------------------------------------------------
323
324/// Streaming accumulator for mean and variance using Welford's algorithm.
325///
326/// Computes running mean, variance, and standard deviation in a single
327/// pass with O(1) memory and guaranteed numerical stability.
328///
329/// # Algorithm
330/// For each new sample `x_k`:
331/// ```text
332/// δ₁ = x_k − μ_{k−1}
333/// μ_k = μ_{k−1} + δ₁ / k
334/// δ₂ = x_k − μ_k
335/// M₂_k = M₂_{k−1} + δ₁ × δ₂
336/// ```
337///
338/// Reference: Welford (1962), *Technometrics* 4(3), pp. 419–420.
339///
340/// # Examples
341/// ```
342/// use u_numflow::stats::WelfordAccumulator;
343/// let mut acc = WelfordAccumulator::new();
344/// for &x in &[2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0] {
345///     acc.update(x);
346/// }
347/// assert!((acc.mean().unwrap() - 5.0).abs() < 1e-15);
348/// assert!((acc.sample_variance().unwrap() - 4.571428571428571).abs() < 1e-10);
349/// ```
350#[derive(Debug, Clone)]
351pub struct WelfordAccumulator {
352    count: u64,
353    mean_acc: f64,
354    m2: f64,
355}
356
357impl WelfordAccumulator {
358    /// Creates a new empty accumulator.
359    pub fn new() -> Self {
360        Self {
361            count: 0,
362            mean_acc: 0.0,
363            m2: 0.0,
364        }
365    }
366
367    /// Feeds a new sample into the accumulator.
368    pub fn update(&mut self, value: f64) {
369        self.count += 1;
370        let delta = value - self.mean_acc;
371        self.mean_acc += delta / self.count as f64;
372        let delta2 = value - self.mean_acc;
373        self.m2 += delta * delta2;
374    }
375
376    /// Returns the number of samples seen so far.
377    pub fn count(&self) -> u64 {
378        self.count
379    }
380
381    /// Returns the running mean, or `None` if no samples have been added.
382    pub fn mean(&self) -> Option<f64> {
383        if self.count == 0 {
384            None
385        } else {
386            Some(self.mean_acc)
387        }
388    }
389
390    /// Returns the sample variance (n − 1 denominator), or `None` if fewer
391    /// than 2 samples have been added.
392    pub fn sample_variance(&self) -> Option<f64> {
393        if self.count < 2 {
394            None
395        } else {
396            Some(self.m2 / (self.count - 1) as f64)
397        }
398    }
399
400    /// Returns the population variance (n denominator), or `None` if no
401    /// samples have been added.
402    pub fn population_variance(&self) -> Option<f64> {
403        if self.count == 0 {
404            None
405        } else {
406            Some(self.m2 / self.count as f64)
407        }
408    }
409
410    /// Returns the sample standard deviation, or `None` if fewer than 2
411    /// samples have been added.
412    pub fn sample_std_dev(&self) -> Option<f64> {
413        self.sample_variance().map(f64::sqrt)
414    }
415
416    /// Returns the population standard deviation, or `None` if no samples
417    /// have been added.
418    pub fn population_std_dev(&self) -> Option<f64> {
419        self.population_variance().map(f64::sqrt)
420    }
421
422    /// Merges another accumulator into this one (parallel-friendly).
423    ///
424    /// Uses Chan's parallel algorithm for combining partial aggregates.
425    ///
426    /// Reference: Chan, Golub & LeVeque (1979), "Updating Formulae and a
427    /// Pairwise Algorithm for Computing Sample Variances".
428    pub fn merge(&mut self, other: &WelfordAccumulator) {
429        if other.count == 0 {
430            return;
431        }
432        if self.count == 0 {
433            *self = other.clone();
434            return;
435        }
436        let total = self.count + other.count;
437        let delta = other.mean_acc - self.mean_acc;
438        let new_mean = self.mean_acc + delta * (other.count as f64 / total as f64);
439        let new_m2 = self.m2
440            + other.m2
441            + delta * delta * (self.count as f64 * other.count as f64 / total as f64);
442        self.count = total;
443        self.mean_acc = new_mean;
444        self.m2 = new_m2;
445    }
446}
447
448impl Default for WelfordAccumulator {
449    fn default() -> Self {
450        Self::new()
451    }
452}
453
454// ---------------------------------------------------------------------------
455// Tests
456// ---------------------------------------------------------------------------
457
458#[cfg(test)]
459mod tests {
460    use super::*;
461
462    // --- mean ---
463
464    #[test]
465    fn test_mean_basic() {
466        assert_eq!(mean(&[1.0, 2.0, 3.0, 4.0, 5.0]), Some(3.0));
467    }
468
469    #[test]
470    fn test_mean_single() {
471        assert_eq!(mean(&[42.0]), Some(42.0));
472    }
473
474    #[test]
475    fn test_mean_empty() {
476        assert_eq!(mean(&[]), None);
477    }
478
479    #[test]
480    fn test_mean_nan() {
481        assert_eq!(mean(&[1.0, f64::NAN, 3.0]), None);
482    }
483
484    #[test]
485    fn test_mean_inf() {
486        assert_eq!(mean(&[1.0, f64::INFINITY, 3.0]), None);
487    }
488
489    // --- variance ---
490
491    #[test]
492    fn test_variance_basic() {
493        let v = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
494        let var = variance(&v).unwrap();
495        assert!((var - 4.571428571428571).abs() < 1e-10);
496    }
497
498    #[test]
499    fn test_variance_constant() {
500        let v = [5.0; 100];
501        assert!((variance(&v).unwrap()).abs() < 1e-15);
502    }
503
504    #[test]
505    fn test_variance_single() {
506        assert_eq!(variance(&[1.0]), None);
507    }
508
509    #[test]
510    fn test_variance_empty() {
511        assert_eq!(variance(&[]), None);
512    }
513
514    #[test]
515    fn test_population_variance() {
516        let v = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
517        let var = population_variance(&v).unwrap();
518        assert!((var - 4.0).abs() < 1e-10);
519    }
520
521    // --- std_dev ---
522
523    #[test]
524    fn test_std_dev() {
525        let v = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
526        let sd = std_dev(&v).unwrap();
527        let expected = 4.571428571428571_f64.sqrt();
528        assert!((sd - expected).abs() < 1e-10);
529    }
530
531    // --- min / max ---
532
533    #[test]
534    fn test_min_max() {
535        let v = [3.0, 1.0, 4.0, 1.0, 5.0, 9.0, 2.0, 6.0];
536        assert_eq!(min(&v), Some(1.0));
537        assert_eq!(max(&v), Some(9.0));
538    }
539
540    #[test]
541    fn test_min_max_empty() {
542        assert_eq!(min(&[]), None);
543        assert_eq!(max(&[]), None);
544    }
545
546    #[test]
547    fn test_min_max_nan() {
548        assert_eq!(min(&[1.0, f64::NAN]), None);
549        assert_eq!(max(&[1.0, f64::NAN]), None);
550    }
551
552    // --- median ---
553
554    #[test]
555    fn test_median_odd() {
556        assert_eq!(median(&[3.0, 1.0, 2.0]), Some(2.0));
557    }
558
559    #[test]
560    fn test_median_even() {
561        assert_eq!(median(&[4.0, 1.0, 3.0, 2.0]), Some(2.5));
562    }
563
564    #[test]
565    fn test_median_single() {
566        assert_eq!(median(&[7.0]), Some(7.0));
567    }
568
569    #[test]
570    fn test_median_empty() {
571        assert_eq!(median(&[]), None);
572    }
573
574    // --- quantile ---
575
576    #[test]
577    fn test_quantile_extremes() {
578        let data = [1.0, 2.0, 3.0, 4.0, 5.0];
579        assert_eq!(quantile(&data, 0.0), Some(1.0));
580        assert_eq!(quantile(&data, 1.0), Some(5.0));
581    }
582
583    #[test]
584    fn test_quantile_median() {
585        let data = [1.0, 2.0, 3.0, 4.0, 5.0];
586        assert_eq!(quantile(&data, 0.5), Some(3.0));
587    }
588
589    #[test]
590    fn test_quantile_interpolation() {
591        let data = [1.0, 2.0, 3.0, 4.0];
592        // h = (4-1)*0.25 = 0.75, j=0, g=0.75
593        // (1-0.75)*1.0 + 0.75*2.0 = 0.25 + 1.5 = 1.75
594        let q = quantile(&data, 0.25).unwrap();
595        assert!((q - 1.75).abs() < 1e-15);
596    }
597
598    #[test]
599    fn test_quantile_invalid_p() {
600        assert_eq!(quantile(&[1.0, 2.0], -0.1), None);
601        assert_eq!(quantile(&[1.0, 2.0], 1.1), None);
602    }
603
604    #[test]
605    fn test_quantile_empty() {
606        assert_eq!(quantile(&[], 0.5), None);
607    }
608
609    #[test]
610    fn test_quantile_single() {
611        assert_eq!(quantile(&[42.0], 0.0), Some(42.0));
612        assert_eq!(quantile(&[42.0], 0.5), Some(42.0));
613        assert_eq!(quantile(&[42.0], 1.0), Some(42.0));
614    }
615
616    // --- kahan_sum ---
617
618    #[test]
619    fn test_kahan_sum_basic() {
620        let v = [1.0, 2.0, 3.0];
621        assert!((kahan_sum(&v) - 6.0).abs() < 1e-15);
622    }
623
624    #[test]
625    fn test_kahan_sum_precision() {
626        // Sum of 1e16 + 1.0 + (-1e16) with naive sum loses the 1.0
627        let v = [1e16, 1.0, -1e16];
628        let result = kahan_sum(&v);
629        assert!(
630            (result - 1.0).abs() < 1e-10,
631            "Kahan sum should preserve the 1.0: got {result}"
632        );
633    }
634
635    // --- WelfordAccumulator ---
636
637    #[test]
638    fn test_welford_empty() {
639        let acc = WelfordAccumulator::new();
640        assert_eq!(acc.count(), 0);
641        assert_eq!(acc.mean(), None);
642        assert_eq!(acc.sample_variance(), None);
643    }
644
645    #[test]
646    fn test_welford_single() {
647        let mut acc = WelfordAccumulator::new();
648        acc.update(5.0);
649        assert_eq!(acc.mean(), Some(5.0));
650        assert_eq!(acc.sample_variance(), None);
651        assert_eq!(acc.population_variance(), Some(0.0));
652    }
653
654    #[test]
655    fn test_welford_matches_batch() {
656        let data = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
657        let mut acc = WelfordAccumulator::new();
658        for &x in &data {
659            acc.update(x);
660        }
661        let batch_mean = mean(&data).unwrap();
662        let batch_var = variance(&data).unwrap();
663        assert!((acc.mean().unwrap() - batch_mean).abs() < 1e-14);
664        assert!((acc.sample_variance().unwrap() - batch_var).abs() < 1e-10);
665    }
666
667    #[test]
668    fn test_welford_merge() {
669        let data_a = [1.0, 2.0, 3.0, 4.0];
670        let data_b = [5.0, 6.0, 7.0, 8.0];
671        let data_all: Vec<f64> = data_a.iter().chain(data_b.iter()).copied().collect();
672
673        let mut acc_a = WelfordAccumulator::new();
674        for &x in &data_a {
675            acc_a.update(x);
676        }
677        let mut acc_b = WelfordAccumulator::new();
678        for &x in &data_b {
679            acc_b.update(x);
680        }
681        acc_a.merge(&acc_b);
682
683        let expected_mean = mean(&data_all).unwrap();
684        let expected_var = variance(&data_all).unwrap();
685
686        assert!((acc_a.mean().unwrap() - expected_mean).abs() < 1e-14);
687        assert!((acc_a.sample_variance().unwrap() - expected_var).abs() < 1e-10);
688    }
689
690    // --- numerical stability ---
691
692    #[test]
693    fn test_variance_large_offset() {
694        // Data with large mean: [1e9 + 1, 1e9 + 2, ..., 1e9 + 5]
695        // Naive algorithm would suffer catastrophic cancellation.
696        let data: Vec<f64> = (1..=5).map(|i| 1e9 + i as f64).collect();
697        let var = variance(&data).unwrap();
698        // True variance of [1,2,3,4,5] = 2.5
699        assert!(
700            (var - 2.5).abs() < 1e-5,
701            "Variance of offset data should be ~2.5, got {var}"
702        );
703    }
704}
705
706#[cfg(test)]
707mod proptests {
708    use super::*;
709    use proptest::prelude::*;
710
711    /// Strategy for generating finite f64 vectors of reasonable size.
712    fn finite_vec(min_len: usize, max_len: usize) -> impl Strategy<Value = Vec<f64>> {
713        proptest::collection::vec(
714            prop::num::f64::NORMAL.prop_filter("finite", |x| x.is_finite() && x.abs() < 1e12),
715            min_len..=max_len,
716        )
717    }
718
719    proptest! {
720        #![proptest_config(ProptestConfig::with_cases(500))]
721
722        // --- Variance is non-negative ---
723        #[test]
724        fn variance_non_negative(data in finite_vec(2, 100)) {
725            let var = variance(&data).unwrap();
726            prop_assert!(var >= 0.0, "variance must be >= 0, got {}", var);
727        }
728
729        // --- Variance of constant is zero ---
730        #[test]
731        fn variance_of_constant_is_zero(
732            value in prop::num::f64::NORMAL.prop_filter("finite", |x| x.is_finite()),
733            n in 2_usize..50,
734        ) {
735            let data = vec![value; n];
736            let var = variance(&data).unwrap();
737            prop_assert!(var.abs() < 1e-10, "variance of constant should be ~0, got {}", var);
738        }
739
740        // --- std_dev = sqrt(variance) ---
741        #[test]
742        fn std_dev_is_sqrt_of_variance(data in finite_vec(2, 100)) {
743            let var = variance(&data).unwrap();
744            let sd = std_dev(&data).unwrap();
745            let diff = (sd * sd - var).abs();
746            prop_assert!(diff < 1e-10 * var.max(1.0), "sd² should equal variance");
747        }
748
749        // --- Mean linearity: mean(a*x + b) = a*mean(x) + b ---
750        #[test]
751        fn mean_linearity(
752            data in finite_vec(1, 100),
753            a in -100.0_f64..100.0,
754            b in -100.0_f64..100.0,
755        ) {
756            prop_assume!(a.is_finite() && b.is_finite());
757            let m = mean(&data).unwrap();
758            let transformed: Vec<f64> = data.iter().map(|&x| a * x + b).collect();
759            if let Some(mt) = mean(&transformed) {
760                let expected = a * m + b;
761                let tol = 1e-8 * expected.abs().max(1.0);
762                prop_assert!(
763                    (mt - expected).abs() < tol,
764                    "mean(a*x+b)={} != a*mean(x)+b={}",
765                    mt, expected
766                );
767            }
768        }
769
770        // --- quantile(0) = min, quantile(1) = max ---
771        #[test]
772        fn quantile_extremes_are_min_max(data in finite_vec(1, 100)) {
773            let q0 = quantile(&data, 0.0).unwrap();
774            let q1 = quantile(&data, 1.0).unwrap();
775            let mn = min(&data).unwrap();
776            let mx = max(&data).unwrap();
777            prop_assert!((q0 - mn).abs() < 1e-15, "quantile(0) should be min");
778            prop_assert!((q1 - mx).abs() < 1e-15, "quantile(1) should be max");
779        }
780
781        // --- Quantiles are monotonic ---
782        #[test]
783        fn quantiles_monotonic(
784            data in finite_vec(2, 100),
785            p1 in 0.0_f64..=1.0,
786            p2 in 0.0_f64..=1.0,
787        ) {
788            let (lo, hi) = if p1 <= p2 { (p1, p2) } else { (p2, p1) };
789            let q_lo = quantile(&data, lo).unwrap();
790            let q_hi = quantile(&data, hi).unwrap();
791            prop_assert!(q_lo <= q_hi + 1e-15, "quantiles should be monotonic");
792        }
793
794        // --- median = quantile(0.5) ---
795        #[test]
796        fn median_equals_quantile_half(data in finite_vec(1, 100)) {
797            let med = median(&data).unwrap();
798            let q50 = quantile(&data, 0.5).unwrap();
799            prop_assert!(
800                (med - q50).abs() < 1e-14,
801                "median={} != quantile(0.5)={}",
802                med, q50
803            );
804        }
805
806        // --- Welford merge produces same result as sequential ---
807        #[test]
808        fn welford_merge_equals_sequential(
809            data_a in finite_vec(1, 50),
810            data_b in finite_vec(1, 50),
811        ) {
812            let mut sequential = WelfordAccumulator::new();
813            for &x in data_a.iter().chain(data_b.iter()) {
814                sequential.update(x);
815            }
816
817            let mut acc_a = WelfordAccumulator::new();
818            for &x in &data_a { acc_a.update(x); }
819            let mut acc_b = WelfordAccumulator::new();
820            for &x in &data_b { acc_b.update(x); }
821            acc_a.merge(&acc_b);
822
823            let seq_mean = sequential.mean().unwrap();
824            let mrg_mean = acc_a.mean().unwrap();
825            prop_assert!(
826                (seq_mean - mrg_mean).abs() < 1e-10 * seq_mean.abs().max(1.0),
827                "merged mean should match sequential"
828            );
829
830            if sequential.count() >= 2 {
831                let seq_var = sequential.sample_variance().unwrap();
832                let mrg_var = acc_a.sample_variance().unwrap();
833                prop_assert!(
834                    (seq_var - mrg_var).abs() < 1e-8 * seq_var.max(1.0),
835                    "merged variance should match sequential"
836                );
837            }
838        }
839    }
840}
u_numflow/stats.rs

u_numflow/
stats.rs