u_numflow/
stats.rs

1//! Descriptive statistics with numerical stability guarantees.
2//!
3//! All functions in this module handle edge cases explicitly and use
4//! numerically stable algorithms to avoid catastrophic cancellation.
5//!
6//! # Algorithms
7//!
8//! - **Mean**: Kahan compensated summation for O(ε) error independent of n.
9//! - **Variance/StdDev**: Welford's online algorithm.
10//!   Reference: Welford (1962), "Note on a Method for Calculating
11//!   Corrected Sums of Squares and Products", *Technometrics* 4(3).
12//! - **Quantile**: R-7 linear interpolation (default in R, Python, Excel).
13//!   Reference: Hyndman & Fan (1996), "Sample Quantiles in Statistical
14//!   Packages", *The American Statistician* 50(4).
15
16/// Computes the arithmetic mean using Kahan compensated summation.
17///
18/// # Algorithm
19/// Kahan summation accumulates a compensation term to recover lost
20/// low-order bits, achieving O(ε) total error independent of `n`.
21///
22/// # Complexity
23/// Time: O(n), Space: O(1)
24///
25/// # Returns
26/// - `None` if `data` is empty or contains any NaN/Inf.
27///
28/// # Examples
29/// ```
30/// use u_numflow::stats::mean;
31/// let v = [1.0, 2.0, 3.0, 4.0, 5.0];
32/// assert!((mean(&v).unwrap() - 3.0).abs() < 1e-15);
33/// ```
34pub fn mean(data: &[f64]) -> Option<f64> {
35    if data.is_empty() {
36        return None;
37    }
38    if !data.iter().all(|x| x.is_finite()) {
39        return None;
40    }
41    Some(kahan_sum(data) / data.len() as f64)
42}
43
44/// Computes the sample variance using Welford's online algorithm.
45///
46/// Returns the **sample** (unbiased) variance with Bessel's correction
47/// (denominator `n − 1`).
48///
49/// # Algorithm
50/// Welford's method maintains a running mean and sum of squared deviations,
51/// avoiding catastrophic cancellation inherent in the naive formula
52/// `Var = E[X²] − (E[X])²`.
53///
54/// Reference: Welford (1962), *Technometrics* 4(3), pp. 419–420.
55///
56/// # Complexity
57/// Time: O(n), Space: O(1)
58///
59/// # Returns
60/// - `None` if `data.len() < 2` or contains NaN/Inf.
61///
62/// # Examples
63/// ```
64/// use u_numflow::stats::variance;
65/// let v = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
66/// assert!((variance(&v).unwrap() - 4.571428571428571).abs() < 1e-10);
67/// ```
68pub fn variance(data: &[f64]) -> Option<f64> {
69    if data.len() < 2 {
70        return None;
71    }
72    if !data.iter().all(|x| x.is_finite()) {
73        return None;
74    }
75    let mut acc = WelfordAccumulator::new();
76    for &x in data {
77        acc.update(x);
78    }
79    acc.sample_variance()
80}
81
82/// Computes the population variance using Welford's online algorithm.
83///
84/// Returns the **population** variance (denominator `n`).
85///
86/// # Returns
87/// - `None` if `data` is empty or contains NaN/Inf.
88///
89/// # Examples
90/// ```
91/// use u_numflow::stats::population_variance;
92/// let v = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
93/// assert!((population_variance(&v).unwrap() - 4.0).abs() < 1e-10);
94/// ```
95pub fn population_variance(data: &[f64]) -> Option<f64> {
96    if data.is_empty() {
97        return None;
98    }
99    if !data.iter().all(|x| x.is_finite()) {
100        return None;
101    }
102    let mut acc = WelfordAccumulator::new();
103    for &x in data {
104        acc.update(x);
105    }
106    acc.population_variance()
107}
108
109/// Computes the sample standard deviation.
110///
111/// Equivalent to `sqrt(variance(data))`.
112///
113/// # Returns
114/// - `None` if `data.len() < 2` or contains NaN/Inf.
115///
116/// # Examples
117/// ```
118/// use u_numflow::stats::std_dev;
119/// let v = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
120/// let sd = std_dev(&v).unwrap();
121/// assert!((sd - 2.138089935299395).abs() < 1e-10);
122/// ```
123pub fn std_dev(data: &[f64]) -> Option<f64> {
124    variance(data).map(f64::sqrt)
125}
126
127/// Computes the population standard deviation.
128///
129/// Equivalent to `sqrt(population_variance(data))`.
130///
131/// # Returns
132/// - `None` if `data` is empty or contains NaN/Inf.
133pub fn population_std_dev(data: &[f64]) -> Option<f64> {
134    population_variance(data).map(f64::sqrt)
135}
136
137/// Returns the minimum value in the slice.
138///
139/// # Returns
140/// - `None` if `data` is empty or contains NaN.
141///
142/// # Examples
143/// ```
144/// use u_numflow::stats::min;
145/// assert_eq!(min(&[3.0, 1.0, 4.0, 1.0, 5.0]), Some(1.0));
146/// ```
147pub fn min(data: &[f64]) -> Option<f64> {
148    if data.is_empty() {
149        return None;
150    }
151    data.iter().copied().try_fold(f64::INFINITY, |acc, x| {
152        if x.is_nan() {
153            None
154        } else {
155            Some(acc.min(x))
156        }
157    })
158}
159
160/// Returns the maximum value in the slice.
161///
162/// # Returns
163/// - `None` if `data` is empty or contains NaN.
164///
165/// # Examples
166/// ```
167/// use u_numflow::stats::max;
168/// assert_eq!(max(&[3.0, 1.0, 4.0, 1.0, 5.0]), Some(5.0));
169/// ```
170pub fn max(data: &[f64]) -> Option<f64> {
171    if data.is_empty() {
172        return None;
173    }
174    data.iter().copied().try_fold(f64::NEG_INFINITY, |acc, x| {
175        if x.is_nan() {
176            None
177        } else {
178            Some(acc.max(x))
179        }
180    })
181}
182
183/// Computes the median of `data` without mutating the input.
184///
185/// Internally clones and sorts the data, then returns the middle element
186/// (or the average of the two middle elements for even-length data).
187///
188/// # Complexity
189/// Time: O(n log n), Space: O(n)
190///
191/// # Returns
192/// - `None` if `data` is empty or contains NaN.
193///
194/// # Examples
195/// ```
196/// use u_numflow::stats::median;
197/// assert_eq!(median(&[3.0, 1.0, 2.0]), Some(2.0));
198/// assert_eq!(median(&[4.0, 1.0, 3.0, 2.0]), Some(2.5));
199/// ```
200pub fn median(data: &[f64]) -> Option<f64> {
201    if data.is_empty() {
202        return None;
203    }
204    if data.iter().any(|x| x.is_nan()) {
205        return None;
206    }
207    let mut sorted = data.to_vec();
208    sorted.sort_unstable_by(|a, b| a.partial_cmp(b).expect("NaN filtered above"));
209    let n = sorted.len();
210    if n % 2 == 1 {
211        Some(sorted[n / 2])
212    } else {
213        Some((sorted[n / 2 - 1] + sorted[n / 2]) / 2.0)
214    }
215}
216
217/// Computes the `p`-th quantile using the R-7 linear interpolation method.
218///
219/// This is the default quantile method in R, NumPy, and Excel.
220///
221/// # Algorithm
222/// For sorted data `x[0..n]` and quantile `p ∈ [0, 1]`:
223/// 1. Compute `h = (n − 1) × p`
224/// 2. Let `j = ⌊h⌋` and `g = h − j`
225/// 3. Return `(1 − g) × x[j] + g × x[j+1]`
226///
227/// Reference: Hyndman & Fan (1996), *The American Statistician* 50(4), pp. 361–365.
228///
229/// # Complexity
230/// Time: O(n log n) (dominated by sort), Space: O(n)
231///
232/// # Panics
233/// Does not panic; returns `None` for invalid inputs.
234///
235/// # Returns
236/// - `None` if `data` is empty, `p` is outside `[0, 1]`, or data contains NaN.
237///
238/// # Examples
239/// ```
240/// use u_numflow::stats::quantile;
241/// let data = [1.0, 2.0, 3.0, 4.0, 5.0];
242/// assert_eq!(quantile(&data, 0.0), Some(1.0));
243/// assert_eq!(quantile(&data, 1.0), Some(5.0));
244/// assert_eq!(quantile(&data, 0.5), Some(3.0));
245/// ```
246pub fn quantile(data: &[f64], p: f64) -> Option<f64> {
247    if data.is_empty() || !(0.0..=1.0).contains(&p) {
248        return None;
249    }
250    if data.iter().any(|x| x.is_nan()) {
251        return None;
252    }
253    let mut sorted = data.to_vec();
254    sorted.sort_unstable_by(|a, b| a.partial_cmp(b).expect("NaN filtered above"));
255    quantile_sorted(&sorted, p)
256}
257
258/// Computes the `p`-th quantile on **pre-sorted** data (R-7 method).
259///
260/// This avoids the O(n log n) sort when calling multiple quantiles on
261/// the same dataset. The caller must guarantee that `sorted_data` is
262/// sorted in non-decreasing order.
263///
264/// # Returns
265/// - `None` if `sorted_data` is empty or `p` is outside `[0, 1]`.
266pub fn quantile_sorted(sorted_data: &[f64], p: f64) -> Option<f64> {
267    let n = sorted_data.len();
268    if n == 0 || !(0.0..=1.0).contains(&p) {
269        return None;
270    }
271    if n == 1 {
272        return Some(sorted_data[0]);
273    }
274
275    let h = (n - 1) as f64 * p;
276    let j = h.floor() as usize;
277    let g = h - h.floor();
278
279    if j + 1 >= n {
280        Some(sorted_data[n - 1])
281    } else {
282        Some((1.0 - g) * sorted_data[j] + g * sorted_data[j + 1])
283    }
284}
285
286/// Computes Fisher's adjusted sample skewness (G₁) with bias correction.
287///
288/// # Formula
289/// ```text
290/// G₁ = [√(n(n−1)) / (n−2)] × (m₃ / m₂^{3/2})
291/// ```
292/// where `m₂`, `m₃` are the biased second and third central moments.
293///
294/// This matches Excel `SKEW()` and `scipy.stats.skew(bias=False)`.
295///
296/// # Algorithm
297/// Two-pass: first computes the mean (Kahan sum), then accumulates
298/// central moments in a single sweep.
299///
300/// Reference: Joanes & Gill (1998), "Comparing measures of sample skewness
301/// and kurtosis", *The Statistician* 47(1), pp. 183–189.
302///
303/// # Complexity
304/// Time: O(n), Space: O(1)
305///
306/// # Returns
307/// - `None` if `data.len() < 3`, data contains NaN/Inf, or variance is zero.
308///
309/// # Examples
310/// ```
311/// use u_numflow::stats::skewness;
312/// // Symmetric data → skewness = 0
313/// let sym = [1.0, 2.0, 3.0, 4.0, 5.0];
314/// assert!(skewness(&sym).unwrap().abs() < 1e-14);
315///
316/// // Right-skewed data → positive skewness
317/// let right = [1.0, 2.0, 3.0, 4.0, 50.0];
318/// assert!(skewness(&right).unwrap() > 0.0);
319/// ```
320pub fn skewness(data: &[f64]) -> Option<f64> {
321    let n = data.len();
322    if n < 3 {
323        return None;
324    }
325    if !data.iter().all(|x| x.is_finite()) {
326        return None;
327    }
328    let nf = n as f64;
329    let m = kahan_sum(data) / nf;
330    let mut sum2 = 0.0;
331    let mut sum3 = 0.0;
332    for &x in data {
333        let d = x - m;
334        let d2 = d * d;
335        sum2 += d2;
336        sum3 += d2 * d;
337    }
338    let m2 = sum2 / nf;
339    if m2 == 0.0 {
340        return None;
341    }
342    let m3 = sum3 / nf;
343    let g1 = m3 / m2.powf(1.5);
344    let correction = (nf * (nf - 1.0)).sqrt() / (nf - 2.0);
345    Some(correction * g1)
346}
347
348/// Computes Fisher's excess kurtosis (G₂) with bias correction.
349///
350/// # Formula
351/// ```text
352/// G₂ = [n(n+1) / ((n−1)(n−2)(n−3))] × Σ[(xᵢ−x̄)/s]⁴ − [3(n−1)² / ((n−2)(n−3))]
353/// ```
354/// where `s` is the sample standard deviation (n−1 denominator).
355///
356/// This matches Excel `KURT()` and `scipy.stats.kurtosis(bias=False)`.
357/// Returns **0** for a normal distribution, positive for heavy tails
358/// (leptokurtic), negative for light tails (platykurtic).
359///
360/// # Algorithm
361/// Two-pass: first computes the mean (Kahan sum), then accumulates
362/// central moments in a single sweep.
363///
364/// Reference: Joanes & Gill (1998), "Comparing measures of sample skewness
365/// and kurtosis", *The Statistician* 47(1), pp. 183–189.
366///
367/// # Complexity
368/// Time: O(n), Space: O(1)
369///
370/// # Returns
371/// - `None` if `data.len() < 4`, data contains NaN/Inf, or variance is zero.
372///
373/// # Examples
374/// ```
375/// use u_numflow::stats::kurtosis;
376/// // Uniform-ish data → negative excess kurtosis
377/// let data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
378/// let k = kurtosis(&data).unwrap();
379/// assert!(k < 0.0); // platykurtic
380/// ```
381pub fn kurtosis(data: &[f64]) -> Option<f64> {
382    let n = data.len();
383    if n < 4 {
384        return None;
385    }
386    if !data.iter().all(|x| x.is_finite()) {
387        return None;
388    }
389    let nf = n as f64;
390    let m = kahan_sum(data) / nf;
391    let mut sum2 = 0.0;
392    let mut sum4 = 0.0;
393    for &x in data {
394        let d = x - m;
395        let d2 = d * d;
396        sum2 += d2;
397        sum4 += d2 * d2;
398    }
399    // Sample variance s² = sum2 / (n-1)
400    let s2 = sum2 / (nf - 1.0);
401    if s2 == 0.0 {
402        return None;
403    }
404    let s4 = s2 * s2;
405    // Σ[(xᵢ − x̄)/s]⁴
406    let sum_z4 = sum4 / s4;
407    let a = nf * (nf + 1.0) / ((nf - 1.0) * (nf - 2.0) * (nf - 3.0));
408    let b = 3.0 * (nf - 1.0) * (nf - 1.0) / ((nf - 2.0) * (nf - 3.0));
409    Some(a * sum_z4 - b)
410}
411
412/// Computes the sample covariance between two datasets.
413///
414/// # Formula
415/// ```text
416/// Cov(X, Y) = Σ(xᵢ − x̄)(yᵢ − ȳ) / (n − 1)
417/// ```
418///
419/// Uses Bessel's correction (n−1 denominator) for an unbiased estimator.
420///
421/// # Complexity
422/// Time: O(n), Space: O(1)
423///
424/// # Returns
425/// - `None` if `x.len() != y.len()`, `n < 2`, or data contains NaN/Inf.
426///
427/// # Examples
428/// ```
429/// use u_numflow::stats::covariance;
430/// let x = [1.0, 2.0, 3.0, 4.0, 5.0];
431/// let y = [2.0, 4.0, 6.0, 8.0, 10.0];
432/// let cov = covariance(&x, &y).unwrap();
433/// assert!((cov - 5.0).abs() < 1e-14); // perfect positive covariance
434/// ```
435pub fn covariance(x: &[f64], y: &[f64]) -> Option<f64> {
436    let n = x.len();
437    if n != y.len() || n < 2 {
438        return None;
439    }
440    if !x.iter().chain(y.iter()).all(|v| v.is_finite()) {
441        return None;
442    }
443    let nf = n as f64;
444    let mean_x = kahan_sum(x) / nf;
445    let mean_y = kahan_sum(y) / nf;
446    let mut sum = 0.0;
447    for i in 0..n {
448        sum += (x[i] - mean_x) * (y[i] - mean_y);
449    }
450    Some(sum / (nf - 1.0))
451}
452
453// ---------------------------------------------------------------------------
454// Kahan compensated summation
455// ---------------------------------------------------------------------------
456
457/// Neumaier compensated summation for O(ε) error independent of `n`.
458///
459/// This is an improved variant of Kahan summation that also handles the
460/// case where the addend is larger in magnitude than the running sum.
461///
462/// # Algorithm
463/// Maintains a running compensation variable `c`. At each step, the
464/// branch ensures the smaller operand's low-order bits are captured.
465///
466/// Reference: Neumaier (1974), "Rundungsfehleranalyse einiger Verfahren
467/// zur Summation endlicher Summen", *Zeitschrift für Angewandte
468/// Mathematik und Mechanik* 54(1), pp. 39–51.
469///
470/// # Complexity
471/// Time: O(n), Space: O(1)
472pub fn kahan_sum(data: &[f64]) -> f64 {
473    let mut sum = 0.0_f64;
474    let mut c = 0.0_f64;
475    for &x in data {
476        let t = sum + x;
477        if sum.abs() >= x.abs() {
478            c += (sum - t) + x;
479        } else {
480            c += (x - t) + sum;
481        }
482        sum = t;
483    }
484    sum + c
485}
486
487// ---------------------------------------------------------------------------
488// Welford online accumulator
489// ---------------------------------------------------------------------------
490
491/// Streaming accumulator for mean, variance, skewness, and kurtosis.
492///
493/// Computes running descriptive statistics in a single pass with O(1)
494/// memory and guaranteed numerical stability, using the extended
495/// Welford algorithm for higher-order moments.
496///
497/// # Algorithm
498/// Maintains central moment sums M₂, M₃, M₄ incrementally. The update
499/// order (M₄ → M₃ → M₂) preserves correctness since each uses the
500/// *previous* values of lower moments.
501///
502/// References:
503/// - Welford (1962), *Technometrics* 4(3), pp. 419–420.
504/// - Pébay (2008), "Formulas for Robust, One-Pass Parallel Computation
505///   of Covariances and Arbitrary-Order Statistical Moments",
506///   Sandia Report SAND2008-6212.
507/// - Terriberry (2007), "Computing Higher-Order Moments Online".
508///
509/// # Examples
510/// ```
511/// use u_numflow::stats::WelfordAccumulator;
512/// let mut acc = WelfordAccumulator::new();
513/// for &x in &[2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0] {
514///     acc.update(x);
515/// }
516/// assert!((acc.mean().unwrap() - 5.0).abs() < 1e-15);
517/// assert!((acc.sample_variance().unwrap() - 4.571428571428571).abs() < 1e-10);
518/// ```
519#[derive(Debug, Clone)]
520pub struct WelfordAccumulator {
521    count: u64,
522    mean_acc: f64,
523    m2: f64,
524    m3: f64,
525    m4: f64,
526}
527
528impl WelfordAccumulator {
529    /// Creates a new empty accumulator.
530    pub fn new() -> Self {
531        Self {
532            count: 0,
533            mean_acc: 0.0,
534            m2: 0.0,
535            m3: 0.0,
536            m4: 0.0,
537        }
538    }
539
540    /// Feeds a new sample into the accumulator.
541    ///
542    /// Updates M₂, M₃, M₄ in the correct order (M₄ → M₃ → M₂) so that
543    /// each uses the *previous* values of lower-order moments.
544    ///
545    /// The first sample is handled as a special case: all moments remain
546    /// zero and only the mean is initialized. This avoids intermediate
547    /// overflow when `delta² > f64::MAX` (e.g., `value ≈ 1e166`).
548    pub fn update(&mut self, value: f64) {
549        let n1 = self.count;
550        self.count += 1;
551
552        if n1 == 0 {
553            // First sample: mean = value, all moments stay zero.
554            self.mean_acc = value;
555            return;
556        }
557
558        let n = self.count as f64;
559        let delta = value - self.mean_acc;
560        let delta_n = delta / n;
561        let delta_n2 = delta_n * delta_n;
562        let term1 = delta * delta_n * n1 as f64;
563
564        // M₄ before M₃ before M₂ — order matters!
565        self.m4 += term1 * delta_n2 * (n * n - 3.0 * n + 3.0) + 6.0 * delta_n2 * self.m2
566            - 4.0 * delta_n * self.m3;
567        self.m3 += term1 * delta_n * (n - 2.0) - 3.0 * delta_n * self.m2;
568        self.m2 += term1;
569        self.mean_acc += delta_n;
570    }
571
572    /// Returns the number of samples seen so far.
573    pub fn count(&self) -> u64 {
574        self.count
575    }
576
577    /// Returns the running mean, or `None` if no samples have been added.
578    pub fn mean(&self) -> Option<f64> {
579        if self.count == 0 {
580            None
581        } else {
582            Some(self.mean_acc)
583        }
584    }
585
586    /// Returns the sample variance (n − 1 denominator), or `None` if fewer
587    /// than 2 samples have been added.
588    pub fn sample_variance(&self) -> Option<f64> {
589        if self.count < 2 {
590            None
591        } else {
592            Some(self.m2 / (self.count - 1) as f64)
593        }
594    }
595
596    /// Returns the population variance (n denominator), or `None` if no
597    /// samples have been added.
598    pub fn population_variance(&self) -> Option<f64> {
599        if self.count == 0 {
600            None
601        } else {
602            Some(self.m2 / self.count as f64)
603        }
604    }
605
606    /// Returns the sample standard deviation, or `None` if fewer than 2
607    /// samples have been added.
608    pub fn sample_std_dev(&self) -> Option<f64> {
609        self.sample_variance().map(f64::sqrt)
610    }
611
612    /// Returns the population standard deviation, or `None` if no samples
613    /// have been added.
614    pub fn population_std_dev(&self) -> Option<f64> {
615        self.population_variance().map(f64::sqrt)
616    }
617
618    /// Returns Fisher's adjusted sample skewness (G₁), or `None` if
619    /// fewer than 3 samples have been added or variance is zero.
620    ///
621    /// Uses the same bias correction as Excel `SKEW()`.
622    pub fn skewness(&self) -> Option<f64> {
623        if self.count < 3 {
624            return None;
625        }
626        let n = self.count as f64;
627        if self.m2 == 0.0 {
628            return None;
629        }
630        // Biased skewness: g₁ = √n × M₃ / M₂^(3/2)
631        let g1 = n.sqrt() * self.m3 / self.m2.powf(1.5);
632        // Bias correction: √(n(n−1)) / (n−2)
633        let correction = (n * (n - 1.0)).sqrt() / (n - 2.0);
634        Some(correction * g1)
635    }
636
637    /// Returns Fisher's excess kurtosis (G₂) with bias correction, or
638    /// `None` if fewer than 4 samples have been added or variance is zero.
639    ///
640    /// Uses the same bias correction as Excel `KURT()`.
641    /// Returns 0 for a normal distribution, positive for heavy tails.
642    pub fn kurtosis(&self) -> Option<f64> {
643        if self.count < 4 {
644            return None;
645        }
646        let n = self.count as f64;
647        if self.m2 == 0.0 {
648            return None;
649        }
650        // Biased excess kurtosis: g₂ = n × M₄ / M₂² − 3
651        let g2 = n * self.m4 / (self.m2 * self.m2) - 3.0;
652        // Unbiased (Fisher G₂): [(n−1)/((n−2)(n−3))] × [(n+1)×g₂ + 6]
653        let correction = (n - 1.0) / ((n - 2.0) * (n - 3.0));
654        Some(correction * ((n + 1.0) * g2 + 6.0))
655    }
656
657    /// Merges another accumulator into this one (parallel-friendly).
658    ///
659    /// Uses Chan's parallel algorithm extended to higher-order moments.
660    ///
661    /// References:
662    /// - Chan, Golub & LeVeque (1979), "Updating Formulae and a
663    ///   Pairwise Algorithm for Computing Sample Variances".
664    /// - Pébay (2008), SAND2008-6212 (M₃, M₄ merge formulas).
665    pub fn merge(&mut self, other: &WelfordAccumulator) {
666        if other.count == 0 {
667            return;
668        }
669        if self.count == 0 {
670            *self = other.clone();
671            return;
672        }
673        let na = self.count as f64;
674        let nb = other.count as f64;
675        let total = self.count + other.count;
676        let n = total as f64;
677        let delta = other.mean_acc - self.mean_acc;
678        let delta2 = delta * delta;
679        let delta3 = delta2 * delta;
680        let delta4 = delta2 * delta2;
681
682        let new_mean = self.mean_acc + delta * (nb / n);
683
684        let new_m2 = self.m2 + other.m2 + delta2 * na * nb / n;
685
686        let new_m3 = self.m3
687            + other.m3
688            + delta3 * na * nb * (na - nb) / (n * n)
689            + 3.0 * delta * (na * other.m2 - nb * self.m2) / n;
690
691        let new_m4 = self.m4
692            + other.m4
693            + delta4 * na * nb * (na * na - na * nb + nb * nb) / (n * n * n)
694            + 6.0 * delta2 * (na * na * other.m2 + nb * nb * self.m2) / (n * n)
695            + 4.0 * delta * (na * other.m3 - nb * self.m3) / n;
696
697        self.count = total;
698        self.mean_acc = new_mean;
699        self.m2 = new_m2;
700        self.m3 = new_m3;
701        self.m4 = new_m4;
702    }
703}
704
705impl Default for WelfordAccumulator {
706    fn default() -> Self {
707        Self::new()
708    }
709}
710
711// ---------------------------------------------------------------------------
712// Tests
713// ---------------------------------------------------------------------------
714
715#[cfg(test)]
716mod tests {
717    use super::*;
718
719    // --- mean ---
720
721    #[test]
722    fn test_mean_basic() {
723        assert_eq!(mean(&[1.0, 2.0, 3.0, 4.0, 5.0]), Some(3.0));
724    }
725
726    #[test]
727    fn test_mean_single() {
728        assert_eq!(mean(&[42.0]), Some(42.0));
729    }
730
731    #[test]
732    fn test_mean_empty() {
733        assert_eq!(mean(&[]), None);
734    }
735
736    #[test]
737    fn test_mean_nan() {
738        assert_eq!(mean(&[1.0, f64::NAN, 3.0]), None);
739    }
740
741    #[test]
742    fn test_mean_inf() {
743        assert_eq!(mean(&[1.0, f64::INFINITY, 3.0]), None);
744    }
745
746    // --- variance ---
747
748    #[test]
749    fn test_variance_basic() {
750        let v = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
751        let var = variance(&v).unwrap();
752        assert!((var - 4.571428571428571).abs() < 1e-10);
753    }
754
755    #[test]
756    fn test_variance_constant() {
757        let v = [5.0; 100];
758        assert!((variance(&v).unwrap()).abs() < 1e-15);
759    }
760
761    #[test]
762    fn test_variance_single() {
763        assert_eq!(variance(&[1.0]), None);
764    }
765
766    #[test]
767    fn test_variance_empty() {
768        assert_eq!(variance(&[]), None);
769    }
770
771    #[test]
772    fn test_population_variance() {
773        let v = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
774        let var = population_variance(&v).unwrap();
775        assert!((var - 4.0).abs() < 1e-10);
776    }
777
778    // --- std_dev ---
779
780    #[test]
781    fn test_std_dev() {
782        let v = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
783        let sd = std_dev(&v).unwrap();
784        let expected = 4.571428571428571_f64.sqrt();
785        assert!((sd - expected).abs() < 1e-10);
786    }
787
788    // --- min / max ---
789
790    #[test]
791    fn test_min_max() {
792        let v = [3.0, 1.0, 4.0, 1.0, 5.0, 9.0, 2.0, 6.0];
793        assert_eq!(min(&v), Some(1.0));
794        assert_eq!(max(&v), Some(9.0));
795    }
796
797    #[test]
798    fn test_min_max_empty() {
799        assert_eq!(min(&[]), None);
800        assert_eq!(max(&[]), None);
801    }
802
803    #[test]
804    fn test_min_max_nan() {
805        assert_eq!(min(&[1.0, f64::NAN]), None);
806        assert_eq!(max(&[1.0, f64::NAN]), None);
807    }
808
809    // --- median ---
810
811    #[test]
812    fn test_median_odd() {
813        assert_eq!(median(&[3.0, 1.0, 2.0]), Some(2.0));
814    }
815
816    #[test]
817    fn test_median_even() {
818        assert_eq!(median(&[4.0, 1.0, 3.0, 2.0]), Some(2.5));
819    }
820
821    #[test]
822    fn test_median_single() {
823        assert_eq!(median(&[7.0]), Some(7.0));
824    }
825
826    #[test]
827    fn test_median_empty() {
828        assert_eq!(median(&[]), None);
829    }
830
831    // --- quantile ---
832
833    #[test]
834    fn test_quantile_extremes() {
835        let data = [1.0, 2.0, 3.0, 4.0, 5.0];
836        assert_eq!(quantile(&data, 0.0), Some(1.0));
837        assert_eq!(quantile(&data, 1.0), Some(5.0));
838    }
839
840    #[test]
841    fn test_quantile_median() {
842        let data = [1.0, 2.0, 3.0, 4.0, 5.0];
843        assert_eq!(quantile(&data, 0.5), Some(3.0));
844    }
845
846    #[test]
847    fn test_quantile_interpolation() {
848        let data = [1.0, 2.0, 3.0, 4.0];
849        // h = (4-1)*0.25 = 0.75, j=0, g=0.75
850        // (1-0.75)*1.0 + 0.75*2.0 = 0.25 + 1.5 = 1.75
851        let q = quantile(&data, 0.25).unwrap();
852        assert!((q - 1.75).abs() < 1e-15);
853    }
854
855    #[test]
856    fn test_quantile_invalid_p() {
857        assert_eq!(quantile(&[1.0, 2.0], -0.1), None);
858        assert_eq!(quantile(&[1.0, 2.0], 1.1), None);
859    }
860
861    #[test]
862    fn test_quantile_empty() {
863        assert_eq!(quantile(&[], 0.5), None);
864    }
865
866    #[test]
867    fn test_quantile_single() {
868        assert_eq!(quantile(&[42.0], 0.0), Some(42.0));
869        assert_eq!(quantile(&[42.0], 0.5), Some(42.0));
870        assert_eq!(quantile(&[42.0], 1.0), Some(42.0));
871    }
872
873    // --- kahan_sum ---
874
875    #[test]
876    fn test_kahan_sum_basic() {
877        let v = [1.0, 2.0, 3.0];
878        assert!((kahan_sum(&v) - 6.0).abs() < 1e-15);
879    }
880
881    #[test]
882    fn test_kahan_sum_precision() {
883        // Sum of 1e16 + 1.0 + (-1e16) with naive sum loses the 1.0
884        let v = [1e16, 1.0, -1e16];
885        let result = kahan_sum(&v);
886        assert!(
887            (result - 1.0).abs() < 1e-10,
888            "Kahan sum should preserve the 1.0: got {result}"
889        );
890    }
891
892    // --- WelfordAccumulator ---
893
894    #[test]
895    fn test_welford_empty() {
896        let acc = WelfordAccumulator::new();
897        assert_eq!(acc.count(), 0);
898        assert_eq!(acc.mean(), None);
899        assert_eq!(acc.sample_variance(), None);
900    }
901
902    #[test]
903    fn test_welford_single() {
904        let mut acc = WelfordAccumulator::new();
905        acc.update(5.0);
906        assert_eq!(acc.mean(), Some(5.0));
907        assert_eq!(acc.sample_variance(), None);
908        assert_eq!(acc.population_variance(), Some(0.0));
909    }
910
911    #[test]
912    fn test_welford_matches_batch() {
913        let data = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
914        let mut acc = WelfordAccumulator::new();
915        for &x in &data {
916            acc.update(x);
917        }
918        let batch_mean = mean(&data).unwrap();
919        let batch_var = variance(&data).unwrap();
920        assert!((acc.mean().unwrap() - batch_mean).abs() < 1e-14);
921        assert!((acc.sample_variance().unwrap() - batch_var).abs() < 1e-10);
922    }
923
924    #[test]
925    fn test_welford_merge() {
926        let data_a = [1.0, 2.0, 3.0, 4.0];
927        let data_b = [5.0, 6.0, 7.0, 8.0];
928        let data_all: Vec<f64> = data_a.iter().chain(data_b.iter()).copied().collect();
929
930        let mut acc_a = WelfordAccumulator::new();
931        for &x in &data_a {
932            acc_a.update(x);
933        }
934        let mut acc_b = WelfordAccumulator::new();
935        for &x in &data_b {
936            acc_b.update(x);
937        }
938        acc_a.merge(&acc_b);
939
940        let expected_mean = mean(&data_all).unwrap();
941        let expected_var = variance(&data_all).unwrap();
942
943        assert!((acc_a.mean().unwrap() - expected_mean).abs() < 1e-14);
944        assert!((acc_a.sample_variance().unwrap() - expected_var).abs() < 1e-10);
945    }
946
947    // --- skewness ---
948
949    #[test]
950    fn test_skewness_symmetric() {
951        // Symmetric data → skewness = 0
952        let data = [1.0, 2.0, 3.0, 4.0, 5.0];
953        assert!(skewness(&data).unwrap().abs() < 1e-14);
954    }
955
956    #[test]
957    fn test_skewness_right_skewed() {
958        // Right-skewed: one large outlier
959        let data = [1.0, 2.0, 3.0, 4.0, 50.0];
960        let s = skewness(&data).unwrap();
961        assert!(s > 0.0, "expected positive skewness, got {s}");
962    }
963
964    #[test]
965    fn test_skewness_left_skewed() {
966        // Left-skewed: one small outlier
967        let data = [-50.0, 1.0, 2.0, 3.0, 4.0];
968        let s = skewness(&data).unwrap();
969        assert!(s < 0.0, "expected negative skewness, got {s}");
970    }
971
972    #[test]
973    fn test_skewness_edge_cases() {
974        assert_eq!(skewness(&[]), None);
975        assert_eq!(skewness(&[1.0]), None);
976        assert_eq!(skewness(&[1.0, 2.0]), None);
977        // Constant data → zero variance → None
978        assert_eq!(skewness(&[5.0, 5.0, 5.0]), None);
979        // NaN → None
980        assert_eq!(skewness(&[1.0, f64::NAN, 3.0]), None);
981    }
982
983    #[test]
984    fn test_skewness_known_value() {
985        // Data: [1, 2, 3, 4, 8]
986        // Manual computation:
987        //   n=5, mean=3.6
988        //   d = [-2.6, -1.6, -0.6, 0.4, 4.4]
989        //   m2 = (6.76+2.56+0.36+0.16+19.36)/5 = 5.84
990        //   m3 = (-17.576-4.096-0.216+0.064+85.184)/5 = 12.672
991        //   g1 = 12.672 / 5.84^1.5 ≈ 0.8982
992        //   correction = sqrt(20)/3 ≈ 1.4907
993        //   G1 ≈ 1.3388
994        let data = [1.0, 2.0, 3.0, 4.0, 8.0];
995        let s = skewness(&data).unwrap();
996        assert!(
997            (s - 1.339).abs() < 0.01,
998            "expected skewness ≈ 1.34, got {s}"
999        );
1000    }
1001
1002    // --- kurtosis ---
1003
1004    #[test]
1005    fn test_kurtosis_uniform_negative() {
1006        // Uniform-ish data should have negative excess kurtosis (platykurtic)
1007        let data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
1008        let k = kurtosis(&data).unwrap();
1009        assert!(k < 0.0, "uniform data should be platykurtic, got {k}");
1010    }
1011
1012    #[test]
1013    fn test_kurtosis_edge_cases() {
1014        assert_eq!(kurtosis(&[]), None);
1015        assert_eq!(kurtosis(&[1.0]), None);
1016        assert_eq!(kurtosis(&[1.0, 2.0]), None);
1017        assert_eq!(kurtosis(&[1.0, 2.0, 3.0]), None);
1018        // Constant → None
1019        assert_eq!(kurtosis(&[5.0, 5.0, 5.0, 5.0]), None);
1020        // NaN → None
1021        assert_eq!(kurtosis(&[1.0, f64::NAN, 3.0, 4.0]), None);
1022    }
1023
1024    #[test]
1025    fn test_kurtosis_heavy_tails() {
1026        // Heavy-tailed data (leptokurtic) → positive excess kurtosis
1027        let data = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0];
1028        let k = kurtosis(&data).unwrap();
1029        assert!(k > 0.0, "heavy-tailed data should be leptokurtic, got {k}");
1030    }
1031
1032    // --- covariance ---
1033
1034    #[test]
1035    fn test_covariance_perfect_positive() {
1036        let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1037        let y = [2.0, 4.0, 6.0, 8.0, 10.0];
1038        let cov = covariance(&x, &y).unwrap();
1039        assert!((cov - 5.0).abs() < 1e-14);
1040    }
1041
1042    #[test]
1043    fn test_covariance_perfect_negative() {
1044        let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1045        let y = [10.0, 8.0, 6.0, 4.0, 2.0];
1046        let cov = covariance(&x, &y).unwrap();
1047        assert!((cov - (-5.0)).abs() < 1e-14);
1048    }
1049
1050    #[test]
1051    fn test_covariance_zero() {
1052        // Independent: x and -x values cancel
1053        let x = [1.0, 2.0, 3.0, 4.0];
1054        let y = [5.0, 5.0, 5.0, 5.0]; // constant
1055        let cov = covariance(&x, &y).unwrap();
1056        assert!(cov.abs() < 1e-14);
1057    }
1058
1059    #[test]
1060    fn test_covariance_self_is_variance() {
1061        let data = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
1062        let cov_xx = covariance(&data, &data).unwrap();
1063        let var = variance(&data).unwrap();
1064        assert!((cov_xx - var).abs() < 1e-10);
1065    }
1066
1067    #[test]
1068    fn test_covariance_edge_cases() {
1069        assert_eq!(covariance(&[], &[]), None);
1070        assert_eq!(covariance(&[1.0], &[2.0]), None);
1071        assert_eq!(covariance(&[1.0, 2.0], &[1.0]), None); // different lengths
1072        assert_eq!(covariance(&[1.0, f64::NAN], &[1.0, 2.0]), None);
1073    }
1074
1075    // --- WelfordAccumulator: skewness & kurtosis ---
1076
1077    #[test]
1078    fn test_welford_skewness_matches_batch() {
1079        let data = [1.0, 2.0, 3.0, 4.0, 50.0];
1080        let mut acc = WelfordAccumulator::new();
1081        for &x in &data {
1082            acc.update(x);
1083        }
1084        let batch = skewness(&data).unwrap();
1085        let stream = acc.skewness().unwrap();
1086        assert!(
1087            (batch - stream).abs() < 1e-10,
1088            "batch={batch}, stream={stream}"
1089        );
1090    }
1091
1092    #[test]
1093    fn test_welford_kurtosis_matches_batch() {
1094        let data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 100.0];
1095        let mut acc = WelfordAccumulator::new();
1096        for &x in &data {
1097            acc.update(x);
1098        }
1099        let batch = kurtosis(&data).unwrap();
1100        let stream = acc.kurtosis().unwrap();
1101        assert!(
1102            (batch - stream).abs() < 1e-8,
1103            "batch={batch}, stream={stream}"
1104        );
1105    }
1106
1107    #[test]
1108    fn test_welford_skewness_kurtosis_edge_cases() {
1109        let mut acc = WelfordAccumulator::new();
1110        assert_eq!(acc.skewness(), None);
1111        assert_eq!(acc.kurtosis(), None);
1112        acc.update(1.0);
1113        assert_eq!(acc.skewness(), None);
1114        assert_eq!(acc.kurtosis(), None);
1115        acc.update(2.0);
1116        assert_eq!(acc.skewness(), None);
1117        assert_eq!(acc.kurtosis(), None);
1118        acc.update(3.0);
1119        // skewness available at n=3
1120        assert!(acc.skewness().is_some());
1121        assert_eq!(acc.kurtosis(), None);
1122        acc.update(4.0);
1123        // kurtosis available at n=4
1124        assert!(acc.kurtosis().is_some());
1125    }
1126
1127    #[test]
1128    fn test_welford_merge_skewness_kurtosis() {
1129        let data_a = [1.0, 3.0, 5.0, 7.0, 9.0, 11.0];
1130        let data_b = [2.0, 50.0, 4.0, 6.0, 8.0, 100.0];
1131        let all: Vec<f64> = data_a.iter().chain(data_b.iter()).copied().collect();
1132
1133        let mut acc_a = WelfordAccumulator::new();
1134        for &x in &data_a {
1135            acc_a.update(x);
1136        }
1137        let mut acc_b = WelfordAccumulator::new();
1138        for &x in &data_b {
1139            acc_b.update(x);
1140        }
1141        acc_a.merge(&acc_b);
1142
1143        let expected_skew = skewness(&all).unwrap();
1144        let expected_kurt = kurtosis(&all).unwrap();
1145        let merged_skew = acc_a.skewness().unwrap();
1146        let merged_kurt = acc_a.kurtosis().unwrap();
1147
1148        assert!(
1149            (expected_skew - merged_skew).abs() < 1e-8,
1150            "skewness: expected={expected_skew}, merged={merged_skew}"
1151        );
1152        assert!(
1153            (expected_kurt - merged_kurt).abs() < 1e-6,
1154            "kurtosis: expected={expected_kurt}, merged={merged_kurt}"
1155        );
1156    }
1157
1158    // --- numerical stability ---
1159
1160    #[test]
1161    fn test_variance_large_offset() {
1162        // Data with large mean: [1e9 + 1, 1e9 + 2, ..., 1e9 + 5]
1163        // Naive algorithm would suffer catastrophic cancellation.
1164        let data: Vec<f64> = (1..=5).map(|i| 1e9 + i as f64).collect();
1165        let var = variance(&data).unwrap();
1166        // True variance of [1,2,3,4,5] = 2.5
1167        assert!(
1168            (var - 2.5).abs() < 1e-5,
1169            "Variance of offset data should be ~2.5, got {var}"
1170        );
1171    }
1172}
1173
1174#[cfg(test)]
1175mod proptests {
1176    use super::*;
1177    use proptest::prelude::*;
1178
1179    /// Strategy for generating finite f64 vectors of reasonable size.
1180    fn finite_vec(min_len: usize, max_len: usize) -> impl Strategy<Value = Vec<f64>> {
1181        proptest::collection::vec(
1182            prop::num::f64::NORMAL.prop_filter("finite", |x| x.is_finite() && x.abs() < 1e12),
1183            min_len..=max_len,
1184        )
1185    }
1186
1187    proptest! {
1188        #![proptest_config(ProptestConfig::with_cases(500))]
1189
1190        // --- Variance is non-negative ---
1191        #[test]
1192        fn variance_non_negative(data in finite_vec(2, 100)) {
1193            let var = variance(&data).unwrap();
1194            prop_assert!(var >= 0.0, "variance must be >= 0, got {}", var);
1195        }
1196
1197        // --- Variance of constant is zero ---
1198        #[test]
1199        fn variance_of_constant_is_zero(
1200            value in prop::num::f64::NORMAL.prop_filter("finite", |x| x.is_finite()),
1201            n in 2_usize..50,
1202        ) {
1203            let data = vec![value; n];
1204            let var = variance(&data).unwrap();
1205            prop_assert!(var.abs() < 1e-10, "variance of constant should be ~0, got {}", var);
1206        }
1207
1208        // --- std_dev = sqrt(variance) ---
1209        #[test]
1210        fn std_dev_is_sqrt_of_variance(data in finite_vec(2, 100)) {
1211            let var = variance(&data).unwrap();
1212            let sd = std_dev(&data).unwrap();
1213            let diff = (sd * sd - var).abs();
1214            prop_assert!(diff < 1e-10 * var.max(1.0), "sd² should equal variance");
1215        }
1216
1217        // --- Mean linearity: mean(a*x + b) = a*mean(x) + b ---
1218        #[test]
1219        fn mean_linearity(
1220            data in finite_vec(1, 100),
1221            a in -100.0_f64..100.0,
1222            b in -100.0_f64..100.0,
1223        ) {
1224            prop_assume!(a.is_finite() && b.is_finite());
1225            let m = mean(&data).unwrap();
1226            let transformed: Vec<f64> = data.iter().map(|&x| a * x + b).collect();
1227            if let Some(mt) = mean(&transformed) {
1228                let expected = a * m + b;
1229                let tol = 1e-8 * expected.abs().max(1.0);
1230                prop_assert!(
1231                    (mt - expected).abs() < tol,
1232                    "mean(a*x+b)={} != a*mean(x)+b={}",
1233                    mt, expected
1234                );
1235            }
1236        }
1237
1238        // --- quantile(0) = min, quantile(1) = max ---
1239        #[test]
1240        fn quantile_extremes_are_min_max(data in finite_vec(1, 100)) {
1241            let q0 = quantile(&data, 0.0).unwrap();
1242            let q1 = quantile(&data, 1.0).unwrap();
1243            let mn = min(&data).unwrap();
1244            let mx = max(&data).unwrap();
1245            prop_assert!((q0 - mn).abs() < 1e-15, "quantile(0) should be min");
1246            prop_assert!((q1 - mx).abs() < 1e-15, "quantile(1) should be max");
1247        }
1248
1249        // --- Quantiles are monotonic ---
1250        #[test]
1251        fn quantiles_monotonic(
1252            data in finite_vec(2, 100),
1253            p1 in 0.0_f64..=1.0,
1254            p2 in 0.0_f64..=1.0,
1255        ) {
1256            let (lo, hi) = if p1 <= p2 { (p1, p2) } else { (p2, p1) };
1257            let q_lo = quantile(&data, lo).unwrap();
1258            let q_hi = quantile(&data, hi).unwrap();
1259            prop_assert!(q_lo <= q_hi + 1e-15, "quantiles should be monotonic");
1260        }
1261
1262        // --- median = quantile(0.5) ---
1263        #[test]
1264        fn median_equals_quantile_half(data in finite_vec(1, 100)) {
1265            let med = median(&data).unwrap();
1266            let q50 = quantile(&data, 0.5).unwrap();
1267            prop_assert!(
1268                (med - q50).abs() < 1e-14,
1269                "median={} != quantile(0.5)={}",
1270                med, q50
1271            );
1272        }
1273
1274        // --- Welford merge produces same result as sequential ---
1275        #[test]
1276        fn welford_merge_equals_sequential(
1277            data_a in finite_vec(1, 50),
1278            data_b in finite_vec(1, 50),
1279        ) {
1280            let mut sequential = WelfordAccumulator::new();
1281            for &x in data_a.iter().chain(data_b.iter()) {
1282                sequential.update(x);
1283            }
1284
1285            let mut acc_a = WelfordAccumulator::new();
1286            for &x in &data_a { acc_a.update(x); }
1287            let mut acc_b = WelfordAccumulator::new();
1288            for &x in &data_b { acc_b.update(x); }
1289            acc_a.merge(&acc_b);
1290
1291            let seq_mean = sequential.mean().unwrap();
1292            let mrg_mean = acc_a.mean().unwrap();
1293            prop_assert!(
1294                (seq_mean - mrg_mean).abs() < 1e-10 * seq_mean.abs().max(1.0),
1295                "merged mean should match sequential"
1296            );
1297
1298            if sequential.count() >= 2 {
1299                let seq_var = sequential.sample_variance().unwrap();
1300                let mrg_var = acc_a.sample_variance().unwrap();
1301                prop_assert!(
1302                    (seq_var - mrg_var).abs() < 1e-8 * seq_var.max(1.0),
1303                    "merged variance should match sequential"
1304                );
1305            }
1306        }
1307
1308        // --- Skewness of symmetric data is near zero ---
1309        #[test]
1310        fn skewness_of_symmetric_is_zero(
1311            half in proptest::collection::vec(-1e6_f64..1e6, 2..=50),
1312        ) {
1313            prop_assume!(half.iter().all(|x| x.is_finite()));
1314            // Build truly symmetric data around 0: [a, b, c, -c, -b, -a]
1315            let mut data: Vec<f64> = half.clone();
1316            data.extend(half.iter().map(|x| -x));
1317            if let Some(s) = skewness(&data) {
1318                prop_assert!(
1319                    s.abs() < 1e-8,
1320                    "symmetric data should have ~0 skewness, got {}",
1321                    s
1322                );
1323            }
1324        }
1325
1326        // --- Streaming skewness matches batch ---
1327        // Uses bounded range [-1e6, 1e6] to avoid ill-conditioned data
1328        // with 100+ order-of-magnitude dynamic range.
1329        #[test]
1330        fn welford_skewness_matches_batch(
1331            data in proptest::collection::vec(-1e6_f64..1e6, 3..=100)
1332        ) {
1333            let mut acc = WelfordAccumulator::new();
1334            for &x in &data { acc.update(x); }
1335            match (skewness(&data), acc.skewness()) {
1336                (Some(batch), Some(stream)) if batch.is_finite() && stream.is_finite() => {
1337                    let tol = 1e-8 * batch.abs().max(1.0);
1338                    prop_assert!(
1339                        (batch - stream).abs() < tol,
1340                        "batch={} stream={}", batch, stream
1341                    );
1342                }
1343                _ => {} // NaN/None cases: skip
1344            }
1345        }
1346
1347        // --- Streaming kurtosis matches batch ---
1348        #[test]
1349        fn welford_kurtosis_matches_batch(
1350            data in proptest::collection::vec(-1e6_f64..1e6, 4..=100)
1351        ) {
1352            let mut acc = WelfordAccumulator::new();
1353            for &x in &data { acc.update(x); }
1354            match (kurtosis(&data), acc.kurtosis()) {
1355                (Some(batch), Some(stream)) if batch.is_finite() && stream.is_finite() => {
1356                    let tol = 1e-6 * batch.abs().max(1.0);
1357                    prop_assert!(
1358                        (batch - stream).abs() < tol,
1359                        "batch={} stream={}", batch, stream
1360                    );
1361                }
1362                _ => {} // NaN/None cases: skip
1363            }
1364        }
1365
1366        // --- Covariance of x with itself equals variance ---
1367        #[test]
1368        fn covariance_self_is_variance(data in finite_vec(2, 100)) {
1369            let cov = covariance(&data, &data).unwrap();
1370            let var = variance(&data).unwrap();
1371            let tol = 1e-10 * var.max(1.0);
1372            prop_assert!(
1373                (cov - var).abs() < tol,
1374                "Cov(x,x)={} != Var(x)={}", cov, var
1375            );
1376        }
1377
1378        // --- Covariance is symmetric: Cov(x,y) = Cov(y,x) ---
1379        #[test]
1380        fn covariance_symmetric(
1381            x in finite_vec(2, 50),
1382            y in finite_vec(2, 50),
1383        ) {
1384            let n = x.len().min(y.len());
1385            if n >= 2 {
1386                let x_slice = &x[..n];
1387                let y_slice = &y[..n];
1388                let cov_xy = covariance(x_slice, y_slice).unwrap();
1389                let cov_yx = covariance(y_slice, x_slice).unwrap();
1390                prop_assert!(
1391                    (cov_xy - cov_yx).abs() < 1e-10 * cov_xy.abs().max(1.0),
1392                    "Cov(x,y)={} != Cov(y,x)={}", cov_xy, cov_yx
1393                );
1394            }
1395        }
1396    }
1397}
u_numflow/stats.rs

u_numflow/
stats.rs