oxiphysics_core/statistics/
functions.rs

1//! Auto-generated module
2//!
3//! 🤖 Generated with [SplitRS](https://github.com/cool-japan/splitrs)
4
5#![allow(clippy::needless_range_loop)]
6use std::collections::VecDeque;
7
8use super::types::{NormalDistribution, PcaResult, StatRng};
9
10/// Returns the arithmetic mean of `data`.  Returns `0.0` for an empty slice.
11pub fn mean(data: &[f64]) -> f64 {
12    if data.is_empty() {
13        return 0.0;
14    }
15    data.iter().sum::<f64>() / data.len() as f64
16}
17/// Returns the unbiased sample variance of `data` (divides by n − 1).
18///
19/// Returns `0.0` for slices with fewer than 2 elements.
20pub fn variance(data: &[f64]) -> f64 {
21    if data.len() < 2 {
22        return 0.0;
23    }
24    let m = mean(data);
25    data.iter().map(|x| (x - m).powi(2)).sum::<f64>() / (data.len() as f64 - 1.0)
26}
27/// Returns the unbiased sample standard deviation of `data`.
28///
29/// Returns `0.0` for slices with fewer than 2 elements.
30pub fn std_dev(data: &[f64]) -> f64 {
31    variance(data).sqrt()
32}
33/// Returns the median of `data`, working on a sorted copy.
34///
35/// Returns `0.0` for an empty slice.  For even-length slices the two middle
36/// values are averaged.
37pub fn median(data: &[f64]) -> f64 {
38    if data.is_empty() {
39        return 0.0;
40    }
41    let mut sorted = data.to_vec();
42    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
43    let n = sorted.len();
44    if n % 2 == 1 {
45        sorted[n / 2]
46    } else {
47        (sorted[n / 2 - 1] + sorted[n / 2]) / 2.0
48    }
49}
50/// Returns the `p`-th percentile of `data` using linear interpolation.
51///
52/// `p` should be in \[0, 100\].  Works on a sorted copy of `data`.
53/// Returns `0.0` for an empty slice.
54pub fn percentile(data: &[f64], p: f64) -> f64 {
55    if data.is_empty() {
56        return 0.0;
57    }
58    let mut sorted = data.to_vec();
59    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
60    let n = sorted.len();
61    if n == 1 {
62        return sorted[0];
63    }
64    let rank = (p / 100.0) * (n as f64 - 1.0);
65    let lo = rank.floor() as usize;
66    let hi = (lo + 1).min(n - 1);
67    let frac = rank - lo as f64;
68    sorted[lo] + frac * (sorted[hi] - sorted[lo])
69}
70/// Returns the sample covariance of `x` and `y` (divides by n − 1).
71///
72/// Panics if the slices have different lengths.
73/// Returns `0.0` for fewer than 2 paired values.
74pub fn covariance(x: &[f64], y: &[f64]) -> f64 {
75    assert_eq!(
76        x.len(),
77        y.len(),
78        "covariance: x and y must have the same length"
79    );
80    let n = x.len();
81    if n < 2 {
82        return 0.0;
83    }
84    let mx = mean(x);
85    let my = mean(y);
86    x.iter()
87        .zip(y.iter())
88        .map(|(xi, yi)| (xi - mx) * (yi - my))
89        .sum::<f64>()
90        / (n as f64 - 1.0)
91}
92/// Returns the Pearson correlation coefficient of `x` and `y`.
93///
94/// Returns `0.0` when either standard deviation is zero or there are fewer
95/// than 2 paired values.
96pub fn correlation(x: &[f64], y: &[f64]) -> f64 {
97    let sx = std_dev(x);
98    let sy = std_dev(y);
99    if sx == 0.0 || sy == 0.0 {
100        return 0.0;
101    }
102    covariance(x, y) / (sx * sy)
103}
104/// Returns the sample skewness of `data`.
105///
106/// Uses the adjusted Fisher–Pearson standardised moment.
107/// Returns `0.0` for fewer than 3 elements or zero standard deviation.
108pub fn skewness(data: &[f64]) -> f64 {
109    let n = data.len();
110    if n < 3 {
111        return 0.0;
112    }
113    let m = mean(data);
114    let s = std_dev(data);
115    if s == 0.0 {
116        return 0.0;
117    }
118    let nf = n as f64;
119    let third_moment = data.iter().map(|x| ((x - m) / s).powi(3)).sum::<f64>() / nf;
120    third_moment * (nf * (nf - 1.0)).sqrt() / (nf - 2.0)
121}
122/// Returns the excess kurtosis of `data` (kurtosis minus 3).
123///
124/// Returns `0.0` for fewer than 4 elements or zero variance.
125pub fn kurtosis(data: &[f64]) -> f64 {
126    let n = data.len();
127    if n < 4 {
128        return 0.0;
129    }
130    let m = mean(data);
131    let s = std_dev(data);
132    if s == 0.0 {
133        return 0.0;
134    }
135    let nf = n as f64;
136    let fourth = data.iter().map(|x| ((x - m) / s).powi(4)).sum::<f64>() / nf;
137    fourth - 3.0
138}
139/// Bins `data` into `n_bins` equal-width bins.
140///
141/// Returns a `Vec` of `(bin_low, bin_high, count)` tuples.
142/// Returns an empty vector when `data` is empty or `n_bins` is zero.
143pub fn histogram(data: &[f64], n_bins: usize) -> Vec<(f64, f64, usize)> {
144    if data.is_empty() || n_bins == 0 {
145        return vec![];
146    }
147    let min = data.iter().cloned().fold(f64::INFINITY, f64::min);
148    let max = data.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
149    let width = if (max - min).abs() < f64::EPSILON {
150        1.0
151    } else {
152        (max - min) / n_bins as f64
153    };
154    let mut counts = vec![0usize; n_bins];
155    for &v in data {
156        let idx = ((v - min) / width) as usize;
157        let idx = idx.min(n_bins - 1);
158        counts[idx] += 1;
159    }
160    (0..n_bins)
161        .map(|i| {
162            let lo = min + i as f64 * width;
163            let hi = lo + width;
164            (lo, hi, counts[i])
165        })
166        .collect()
167}
168/// One-sample t-test against a hypothesised mean `mu0`.
169///
170/// Returns `(t_statistic, approximate_p_value)`.
171/// The p-value is a two-tailed approximation based on a large-sample normal
172/// approximation (valid when n is not too small).
173///
174/// Returns `(0.0, 1.0)` for fewer than 2 data points or zero variance.
175pub fn t_test_one_sample(data: &[f64], mu0: f64) -> (f64, f64) {
176    let n = data.len();
177    if n < 2 {
178        return (0.0, 1.0);
179    }
180    let m = mean(data);
181    let s = std_dev(data);
182    if s == 0.0 {
183        return (0.0, 1.0);
184    }
185    let t = (m - mu0) / (s / (n as f64).sqrt());
186    let p = 2.0 * (1.0 - NormalDistribution::new(0.0, 1.0).cdf(t.abs()));
187    (t, p)
188}
189/// Pearson's χ² goodness-of-fit statistic.
190///
191/// Returns Σ (O_i − E_i)² / E_i.
192///
193/// Panics if the slices have different lengths.
194/// Bins with expected count zero are skipped.
195pub fn chi_squared_test(observed: &[f64], expected: &[f64]) -> f64 {
196    assert_eq!(
197        observed.len(),
198        expected.len(),
199        "chi_squared_test: slices must have the same length"
200    );
201    observed
202        .iter()
203        .zip(expected.iter())
204        .filter(|&(_, &e)| e > 0.0)
205        .map(|(&o, &e)| {
206            let diff = o - e;
207            diff * diff / e
208        })
209        .sum()
210}
211/// Kolmogorov–Smirnov statistic D = sup |F_n(x) − F(x)|.
212///
213/// `cdf` should be the theoretical CDF.  The data are sorted internally.
214pub fn ks_statistic(data: &[f64], cdf: impl Fn(f64) -> f64) -> f64 {
215    if data.is_empty() {
216        return 0.0;
217    }
218    let mut sorted = data.to_vec();
219    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
220    let n = sorted.len() as f64;
221    sorted
222        .iter()
223        .enumerate()
224        .map(|(i, &x)| {
225            let fn_hi = (i as f64 + 1.0) / n;
226            let fn_lo = i as f64 / n;
227            let fx = cdf(x);
228            (fn_hi - fx).abs().max((fx - fn_lo).abs())
229        })
230        .fold(0.0_f64, f64::max)
231}
232/// Returns the Pearson correlation coefficient (alias for [`correlation`]).
233pub fn pearson_correlation(x: &[f64], y: &[f64]) -> f64 {
234    correlation(x, y)
235}
236/// Returns a closure that computes a running mean over the last `window` values.
237///
238/// Each call is O(1) using a ring buffer internally.
239/// Returns `0.0` for any call when `window` is 0.
240pub fn running_average(window: usize) -> impl FnMut(f64) -> f64 {
241    let mut buf: VecDeque<f64> = VecDeque::with_capacity(window.max(1));
242    let mut sum = 0.0_f64;
243    move |value: f64| -> f64 {
244        if window == 0 {
245            return 0.0;
246        }
247        if buf.len() == window
248            && let Some(old) = buf.pop_front()
249        {
250            sum -= old;
251        }
252        buf.push_back(value);
253        sum += value;
254        sum / buf.len() as f64
255    }
256}
257/// Block-averaging estimator.
258///
259/// Divides `data` into non-overlapping blocks of `block_size` values and
260/// returns `(grand_mean, standard_error_of_mean)`.
261/// Returns `(0.0, 0.0)` when data is empty or `block_size` is zero.
262pub fn block_average(data: &[f64], block_size: usize) -> (f64, f64) {
263    if data.is_empty() || block_size == 0 {
264        return (0.0, 0.0);
265    }
266    let block_means: Vec<f64> = data.chunks_exact(block_size).map(mean).collect();
267    let n_blocks = block_means.len();
268    if n_blocks < 2 {
269        return (mean(data), 0.0);
270    }
271    let grand_mean = mean(&block_means);
272    let sem = std_dev(&block_means) / (n_blocks as f64).sqrt();
273    (grand_mean, sem)
274}
275/// Integrated autocorrelation time τ of `data`.
276///
277/// Uses τ = 1 + 2 Σ C(t)/C(0) with standard windowing (stops at first
278/// non-positive lag).  Returns `1.0` when variance is zero or `max_lag` is 0.
279pub fn autocorrelation_time(data: &[f64], max_lag: usize) -> f64 {
280    let n = data.len();
281    if n < 2 || max_lag == 0 {
282        return 1.0;
283    }
284    let m = mean(data);
285    let c0: f64 = data.iter().map(|x| (x - m).powi(2)).sum::<f64>() / n as f64;
286    if c0 == 0.0 {
287        return 1.0;
288    }
289    let mut tau = 1.0_f64;
290    for t in 1..=max_lag.min(n - 1) {
291        let ct: f64 = data[..n - t]
292            .iter()
293            .zip(data[t..].iter())
294            .map(|(a, b)| (a - m) * (b - m))
295            .sum::<f64>()
296            / n as f64;
297        let rho = ct / c0;
298        if rho <= 0.0 {
299            break;
300        }
301        tau += 2.0 * rho;
302    }
303    tau
304}
305/// Most probable speed in the Maxwell–Boltzmann distribution: v_p = √(2 k_B T / m).
306pub fn maxwell_boltzmann_speed(mass: f64, temp: f64, kb: f64) -> f64 {
307    (2.0 * kb * temp / mass).sqrt()
308}
309/// Boltzmann factor exp(−E / (k_B T)).
310pub fn boltzmann_factor(energy: f64, temp: f64, kb: f64) -> f64 {
311    (-energy / (kb * temp)).exp()
312}
313/// Canonical partition function Z = Σ exp(−E_i / (k_B T)).
314pub fn partition_function(energies: &[f64], temp: f64, kb: f64) -> f64 {
315    energies
316        .iter()
317        .map(|&e| boltzmann_factor(e, temp, kb))
318        .sum()
319}
320/// Helmholtz free energy F = −k_B T ln(Z).
321pub fn free_energy_from_partition(z: f64, temp: f64, kb: f64) -> f64 {
322    -kb * temp * z.ln()
323}
324/// Compute the Pearson correlation matrix for a data matrix.
325///
326/// `data[i]` is the i-th observation (a row), each of length `d`.
327/// Returns a `d × d` correlation matrix.
328pub fn correlation_matrix(data: &[Vec<f64>]) -> Vec<Vec<f64>> {
329    if data.is_empty() {
330        return Vec::new();
331    }
332    let d = data[0].len();
333    if d == 0 {
334        return Vec::new();
335    }
336    let col = |j: usize| -> Vec<f64> { data.iter().map(|row| row[j]).collect() };
337    let mut mat = vec![vec![0.0f64; d]; d];
338    for i in 0..d {
339        for j in i..d {
340            let ci = col(i);
341            let cj = col(j);
342            let r = if i == j { 1.0 } else { correlation(&ci, &cj) };
343            mat[i][j] = r;
344            mat[j][i] = r;
345        }
346    }
347    mat
348}
349/// Covariance matrix for a data matrix.
350///
351/// `data[i]` is the i-th observation.  Returns a `d × d` matrix.
352pub fn covariance_matrix(data: &[Vec<f64>]) -> Vec<Vec<f64>> {
353    if data.is_empty() {
354        return Vec::new();
355    }
356    let d = data[0].len();
357    if d == 0 {
358        return Vec::new();
359    }
360    let col = |j: usize| -> Vec<f64> { data.iter().map(|row| row[j]).collect() };
361    let mut mat = vec![vec![0.0f64; d]; d];
362    for i in 0..d {
363        let ci = col(i);
364        for j in i..d {
365            let cj = col(j);
366            let cov = covariance(&ci, &cj);
367            mat[i][j] = cov;
368            mat[j][i] = cov;
369        }
370    }
371    mat
372}
373/// Compute PCA using power iteration on the covariance matrix.
374///
375/// Returns up to `n_components` principal components.
376/// `data[i]` is the i-th observation of length `d`.
377pub fn pca(data: &[Vec<f64>], n_components: usize) -> Option<PcaResult> {
378    if data.is_empty() {
379        return None;
380    }
381    let n = data.len();
382    let d = data[0].len();
383    if d == 0 || n < 2 {
384        return None;
385    }
386    let n_comp = n_components.min(d);
387    let mean_vec: Vec<f64> = (0..d)
388        .map(|j| data.iter().map(|row| row[j]).sum::<f64>() / n as f64)
389        .collect();
390    let centred: Vec<Vec<f64>> = data
391        .iter()
392        .map(|row| row.iter().zip(&mean_vec).map(|(x, m)| x - m).collect())
393        .collect();
394    let cov: Vec<Vec<f64>> = (0..d)
395        .map(|i| {
396            (0..d)
397                .map(|j| centred.iter().map(|row| row[i] * row[j]).sum::<f64>() / (n as f64 - 1.0))
398                .collect()
399        })
400        .collect();
401    let mut components = Vec::with_capacity(n_comp);
402    let mut variances = Vec::with_capacity(n_comp);
403    let mut deflated = cov.clone();
404    let mat_vec = |m: &Vec<Vec<f64>>, v: &Vec<f64>| -> Vec<f64> {
405        (0..d)
406            .map(|i| m[i].iter().zip(v.iter()).map(|(a, b)| a * b).sum::<f64>())
407            .collect()
408    };
409    let norm_vec = |v: &Vec<f64>| -> f64 { v.iter().map(|x| x * x).sum::<f64>().sqrt() };
410    for _ in 0..n_comp {
411        let mut q: Vec<f64> = (0..d).map(|i| if i == 0 { 1.0 } else { 0.0 }).collect();
412        for _ in 0..200 {
413            let aq = mat_vec(&deflated, &q);
414            let nrm = norm_vec(&aq);
415            if nrm < 1e-30 {
416                break;
417            }
418            q = aq.iter().map(|x| x / nrm).collect();
419        }
420        let aq = mat_vec(&deflated, &q);
421        let eigenvalue: f64 = q.iter().zip(aq.iter()).map(|(qi, aqj)| qi * aqj).sum();
422        for i in 0..d {
423            for j in 0..d {
424                deflated[i][j] -= eigenvalue * q[i] * q[j];
425            }
426        }
427        components.push(q);
428        variances.push(eigenvalue);
429    }
430    Some(PcaResult {
431        mean: mean_vec,
432        components,
433        explained_variance: variances,
434    })
435}
436/// Project a centred data vector onto the principal components.
437///
438/// Returns a vector of `n_components` scores.
439pub fn pca_transform(x: &[f64], result: &PcaResult) -> Vec<f64> {
440    let centred: Vec<f64> = x.iter().zip(&result.mean).map(|(xi, mi)| xi - mi).collect();
441    result
442        .components
443        .iter()
444        .map(|pc| pc.iter().zip(&centred).map(|(pci, ci)| pci * ci).sum())
445        .collect()
446}
447/// Autocorrelation function (ACF) up to `max_lag`.
448///
449/// Returns a vector of length `max_lag + 1` where index 0 is always 1.0.
450pub fn acf(data: &[f64], max_lag: usize) -> Vec<f64> {
451    let n = data.len();
452    if n < 2 {
453        return vec![1.0];
454    }
455    let m = mean(data);
456    let c0: f64 = data.iter().map(|x| (x - m).powi(2)).sum::<f64>() / n as f64;
457    if c0 < f64::EPSILON {
458        return vec![1.0; max_lag + 1];
459    }
460    (0..=max_lag.min(n - 1))
461        .map(|lag| {
462            if lag == 0 {
463                1.0
464            } else {
465                let ct: f64 = data[..n - lag]
466                    .iter()
467                    .zip(data[lag..].iter())
468                    .map(|(a, b)| (a - m) * (b - m))
469                    .sum::<f64>()
470                    / n as f64;
471                ct / c0
472            }
473        })
474        .collect()
475}
476/// Partial autocorrelation function (PACF) using the Yule-Walker equations.
477///
478/// Returns a vector of length `max_lag + 1` where index 0 is always 1.0 and
479/// index 1 equals `acf(data, 1)[1]`.
480pub fn pacf(data: &[f64], max_lag: usize) -> Vec<f64> {
481    let rho = acf(data, max_lag);
482    let m = rho.len();
483    if m < 2 {
484        return vec![1.0];
485    }
486    let mut phi = vec![1.0_f64];
487    for k in 1..m {
488        let prev_order = phi.len() - 1;
489        if prev_order == 0 {
490            phi.push(rho[k]);
491            continue;
492        }
493        let prev_phi: Vec<f64> = phi[1..].to_vec();
494        let num: f64 = rho[k]
495            - prev_phi
496                .iter()
497                .zip(1..=prev_order)
498                .map(|(p, j)| p * rho[k - j])
499                .sum::<f64>();
500        let den: f64 = 1.0
501            - prev_phi
502                .iter()
503                .zip(1..=prev_order)
504                .map(|(p, j)| p * rho[j])
505                .sum::<f64>();
506        let phi_kk = if den.abs() < 1e-30 { 0.0 } else { num / den };
507        phi.push(phi_kk);
508    }
509    phi
510}
511/// Compute a bootstrap confidence interval for a statistic.
512///
513/// - `data`: original sample.
514/// - `statistic`: function mapping a sample to a scalar.
515/// - `n_boot`: number of bootstrap resamples.
516/// - `alpha`: significance level (e.g. 0.05 for 95% CI).
517/// - `rng`: random number generator.
518///
519/// Returns `(lower, upper)` percentile bootstrap CI.
520pub fn bootstrap_ci(
521    data: &[f64],
522    statistic: impl Fn(&[f64]) -> f64,
523    n_boot: usize,
524    alpha: f64,
525    rng: &mut StatRng,
526) -> (f64, f64) {
527    let n = data.len();
528    if n == 0 {
529        return (0.0, 0.0);
530    }
531    let mut boot_stats: Vec<f64> = Vec::with_capacity(n_boot);
532    for _ in 0..n_boot {
533        let resample: Vec<f64> = (0..n)
534            .map(|_| {
535                let idx = (rng.next_f64() * n as f64) as usize;
536                data[idx.min(n - 1)]
537            })
538            .collect();
539        boot_stats.push(statistic(&resample));
540    }
541    boot_stats.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
542    let lo_idx = ((alpha / 2.0) * n_boot as f64) as usize;
543    let hi_idx = ((1.0 - alpha / 2.0) * n_boot as f64) as usize;
544    (
545        boot_stats[lo_idx.min(n_boot.saturating_sub(1))],
546        boot_stats[hi_idx.min(n_boot.saturating_sub(1))],
547    )
548}
549/// Bootstrap standard error of a statistic.
550pub fn bootstrap_se(
551    data: &[f64],
552    statistic: impl Fn(&[f64]) -> f64,
553    n_boot: usize,
554    rng: &mut StatRng,
555) -> f64 {
556    let n = data.len();
557    if n == 0 {
558        return 0.0;
559    }
560    let boot_stats: Vec<f64> = (0..n_boot)
561        .map(|_| {
562            let resample: Vec<f64> = (0..n)
563                .map(|_| {
564                    let idx = (rng.next_f64() * n as f64) as usize;
565                    data[idx.min(n - 1)]
566                })
567                .collect();
568            statistic(&resample)
569        })
570        .collect();
571    std_dev(&boot_stats)
572}
573/// Two-sample independent t-test.
574///
575/// Returns `(t_statistic, approximate_p_value)`.
576/// Uses Welch's t-test (unequal variances).
577/// Returns `(0.0, 1.0)` when either sample has fewer than 2 elements.
578pub fn t_test_two_sample(x: &[f64], y: &[f64]) -> (f64, f64) {
579    if x.len() < 2 || y.len() < 2 {
580        return (0.0, 1.0);
581    }
582    let mx = mean(x);
583    let my = mean(y);
584    let sx2 = variance(x);
585    let sy2 = variance(y);
586    let nx = x.len() as f64;
587    let ny = y.len() as f64;
588    let se = (sx2 / nx + sy2 / ny).sqrt();
589    if se < f64::EPSILON {
590        return (0.0, 1.0);
591    }
592    let t = (mx - my) / se;
593    let p = 2.0 * (1.0 - NormalDistribution::new(0.0, 1.0).cdf(t.abs()));
594    (t, p)
595}
596/// Mann-Whitney U statistic (non-parametric two-sample test).
597///
598/// Returns the U statistic (for sample x against sample y).
599/// Smaller U indicates x tends to have smaller values than y.
600pub fn mann_whitney_u(x: &[f64], y: &[f64]) -> f64 {
601    let nx = x.len() as f64;
602    let ny = y.len() as f64;
603    if x.is_empty() || y.is_empty() {
604        return 0.0;
605    }
606    let u: f64 = x
607        .iter()
608        .map(|&xi| {
609            y.iter()
610                .map(|&yj| {
611                    if xi > yj {
612                        1.0
613                    } else if (xi - yj).abs() < f64::EPSILON {
614                        0.5
615                    } else {
616                        0.0
617                    }
618                })
619                .sum::<f64>()
620        })
621        .sum();
622    let _ = (nx, ny);
623    u
624}
625/// Wilcoxon signed-rank test statistic for paired data.
626///
627/// Returns the test statistic W+ (sum of positive ranks).
628/// Ties and zero differences are handled by ignoring zero differences.
629pub fn wilcoxon_signed_rank(x: &[f64], y: &[f64]) -> f64 {
630    assert_eq!(
631        x.len(),
632        y.len(),
633        "wilcoxon_signed_rank: x and y must have same length"
634    );
635    let diffs: Vec<f64> = x.iter().zip(y.iter()).map(|(xi, yi)| xi - yi).collect();
636    let nonzero: Vec<f64> = diffs
637        .into_iter()
638        .filter(|&d| d.abs() > f64::EPSILON)
639        .collect();
640    if nonzero.is_empty() {
641        return 0.0;
642    }
643    let mut indexed: Vec<(f64, f64)> = nonzero.iter().map(|&d| (d.abs(), d.signum())).collect();
644    indexed.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
645    let w_plus: f64 = indexed
646        .iter()
647        .enumerate()
648        .filter(|(_, (_, sign))| *sign > 0.0)
649        .map(|(rank, _)| rank as f64 + 1.0)
650        .sum();
651    w_plus
652}
653/// Shapiro-Wilk normality approximation (W statistic only, no p-value).
654///
655/// Returns a value close to 1.0 for normally distributed data.
656/// This is a simplified approximation suitable for quick checks.
657pub fn shapiro_wilk_w(data: &[f64]) -> f64 {
658    let n = data.len();
659    if n < 3 {
660        return 1.0;
661    }
662    let mut sorted = data.to_vec();
663    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
664    let m = mean(&sorted);
665    let s2: f64 = sorted.iter().map(|x| (x - m).powi(2)).sum::<f64>();
666    if s2 < f64::EPSILON {
667        return 1.0;
668    }
669    let half = n / 2;
670    let mut b_sum = 0.0f64;
671    for i in 0..half {
672        let rank = n - 1 - i;
673        let p = (rank as f64 + 1.0 - 0.375) / (n as f64 + 0.25);
674        let a_i = normal_quantile_approx(p);
675        b_sum += a_i * (sorted[rank] - sorted[i]);
676    }
677    (b_sum * b_sum) / s2
678}
679/// Approximate normal quantile (probit) function for p in (0, 1).
680pub(super) fn normal_quantile_approx(p: f64) -> f64 {
681    if p <= 0.0 || p >= 1.0 {
682        return 0.0;
683    }
684    let pp = if p < 0.5 { p } else { 1.0 - p };
685    let t = (-2.0 * pp.ln()).sqrt();
686    let c0 = 2.515517_f64;
687    let c1 = 0.802853_f64;
688    let c2 = 0.010328_f64;
689    let d1 = 1.432788_f64;
690    let d2 = 0.189269_f64;
691    let d3 = 0.001308_f64;
692    let x = t - (c0 + c1 * t + c2 * t * t) / (1.0 + d1 * t + d2 * t * t + d3 * t * t * t);
693    if p < 0.5 { -x } else { x }
694}
695/// Approximation of erf(x) using Abramowitz & Stegun formula 7.1.26.
696pub(super) fn erf_approx(x: f64) -> f64 {
697    pub(super) const P: f64 = 0.3275911;
698    pub(super) const A1: f64 = 0.254829592;
699    pub(super) const A2: f64 = -0.284496736;
700    pub(super) const A3: f64 = 1.421413741;
701    pub(super) const A4: f64 = -1.453152027;
702    pub(super) const A5: f64 = 1.061405429;
703    let sign = if x < 0.0 { -1.0 } else { 1.0 };
704    let x = x.abs();
705    let t = 1.0 / (1.0 + P * x);
706    let poly = t * (A1 + t * (A2 + t * (A3 + t * (A4 + t * A5))));
707    sign * (1.0 - poly * (-x * x).exp())
708}
709/// Natural log of the gamma function via Stirling's approximation.
710///
711/// Accurate for z > ~0.5; uses the recurrence for small z.
712pub(super) fn log_gamma_stirling(z: f64) -> f64 {
713    if z < 7.0 {
714        return log_gamma_stirling(z + 1.0) - z.ln();
715    }
716    let z = z - 1.0;
717    0.5 * (std::f64::consts::TAU / z).ln()
718        + z * ((z + 1.0 / (12.0 * z) - 1.0 / (360.0 * z.powi(3))) / std::f64::consts::E).ln()
719}
720/// Natural log of k! computed via log-gamma (Stirling) for large k,
721/// or a direct sum for small k.
722pub(super) fn log_factorial(k: u64) -> f64 {
723    if k == 0 || k == 1 {
724        return 0.0;
725    }
726    if k <= 20 {
727        (1..=k).map(|i| (i as f64).ln()).sum()
728    } else {
729        log_gamma_stirling(k as f64 + 1.0)
730    }
731}
732/// Returns the lower quartile (Q1), median (Q2), and upper quartile (Q3).
733///
734/// Uses linear interpolation (same method as `percentile`).
735/// Returns `(0.0, 0.0, 0.0)` for empty input.
736pub fn quartiles(data: &[f64]) -> (f64, f64, f64) {
737    if data.is_empty() {
738        return (0.0, 0.0, 0.0);
739    }
740    (
741        percentile(data, 25.0),
742        percentile(data, 50.0),
743        percentile(data, 75.0),
744    )
745}
746/// Returns the inter-quartile range (IQR = Q3 − Q1).
747///
748/// Returns `0.0` for empty input.
749pub fn iqr(data: &[f64]) -> f64 {
750    let (q1, _, q3) = quartiles(data);
751    q3 - q1
752}
753/// Spearman rank correlation coefficient between `x` and `y`.
754///
755/// Ranks ties using the average-rank convention.
756/// Returns `0.0` when fewer than 2 paired values or when either rank series
757/// has zero variance.
758pub fn spearman_correlation(x: &[f64], y: &[f64]) -> f64 {
759    assert_eq!(
760        x.len(),
761        y.len(),
762        "spearman_correlation: x and y must be same length"
763    );
764    let n = x.len();
765    if n < 2 {
766        return 0.0;
767    }
768    let rank = |data: &[f64]| -> Vec<f64> {
769        let mut indexed: Vec<(usize, f64)> = data.iter().cloned().enumerate().collect();
770        indexed.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
771        let mut ranks = vec![0.0_f64; data.len()];
772        let mut i = 0;
773        while i < n {
774            let mut j = i;
775            while j + 1 < n && (indexed[j + 1].1 - indexed[i].1).abs() < f64::EPSILON {
776                j += 1;
777            }
778            let avg_rank = (i + j) as f64 / 2.0 + 1.0;
779            for k in i..=j {
780                ranks[indexed[k].0] = avg_rank;
781            }
782            i = j + 1;
783        }
784        ranks
785    };
786    let rx = rank(x);
787    let ry = rank(y);
788    correlation(&rx, &ry)
789}
790/// Simple ordinary least-squares linear regression.
791///
792/// Fits the model `y = slope * x + intercept` and returns
793/// `(slope, intercept, r_squared)`.
794/// Returns `(0.0, 0.0, 0.0)` for fewer than 2 points.
795pub fn linear_regression(x: &[f64], y: &[f64]) -> (f64, f64, f64) {
796    assert_eq!(
797        x.len(),
798        y.len(),
799        "linear_regression: x and y must be same length"
800    );
801    let n = x.len();
802    if n < 2 {
803        return (0.0, 0.0, 0.0);
804    }
805    let mx = mean(x);
806    let my = mean(y);
807    let sxx: f64 = x.iter().map(|xi| (xi - mx).powi(2)).sum();
808    let sxy: f64 = x
809        .iter()
810        .zip(y.iter())
811        .map(|(xi, yi)| (xi - mx) * (yi - my))
812        .sum();
813    if sxx.abs() < f64::EPSILON {
814        return (0.0, my, 0.0);
815    }
816    let slope = sxy / sxx;
817    let intercept = my - slope * mx;
818    let ss_res: f64 = x
819        .iter()
820        .zip(y.iter())
821        .map(|(xi, yi)| (yi - (slope * xi + intercept)).powi(2))
822        .sum();
823    let ss_tot: f64 = y.iter().map(|yi| (yi - my).powi(2)).sum();
824    let r_squared = if ss_tot < f64::EPSILON {
825        1.0
826    } else {
827        1.0 - ss_res / ss_tot
828    };
829    (slope, intercept, r_squared)
830}
831/// One-sample Kolmogorov-Smirnov test statistic against a theoretical CDF.
832///
833/// `cdf` must be a monotone non-decreasing function mapping `f64 → f64` in
834/// \[0, 1\].  Returns the KS statistic D (maximum absolute deviation).
835/// Already defined as `ks_statistic` — this is an alias with different name
836/// for symmetry with the two-sample version.
837pub fn ks_test_one_sample(data: &[f64], cdf: impl Fn(f64) -> f64) -> f64 {
838    ks_statistic(data, cdf)
839}
840/// Two-sample Kolmogorov-Smirnov statistic between empirical distributions of
841/// `x` and `y`.
842///
843/// Returns the KS distance D = max |F_x(t) − F_y(t)| over all t.
844pub fn ks_test_two_sample(x: &[f64], y: &[f64]) -> f64 {
845    if x.is_empty() || y.is_empty() {
846        return 0.0;
847    }
848    let mut xs = x.to_vec();
849    let mut ys = y.to_vec();
850    xs.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
851    ys.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
852    let nx = xs.len() as f64;
853    let ny = ys.len() as f64;
854    let mut all: Vec<f64> = xs.iter().chain(ys.iter()).cloned().collect();
855    all.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
856    all.dedup_by(|a, b| (*a - *b).abs() < f64::EPSILON);
857    let mut d_max = 0.0_f64;
858    for &t in &all {
859        let fx = xs.partition_point(|&v| v <= t) as f64 / nx;
860        let fy = ys.partition_point(|&v| v <= t) as f64 / ny;
861        let d = (fx - fy).abs();
862        if d > d_max {
863            d_max = d;
864        }
865    }
866    d_max
867}
868/// Chi-squared goodness-of-fit test — returns the χ² statistic.
869///
870/// `observed` and `expected` must have the same length.  Cells with expected
871/// count < 1e-10 are skipped (avoids division by zero).
872///
873/// Note: `chi_squared_test` already exists — this variant accepts integer
874/// observed counts and float expected frequencies.
875pub fn chi_squared_statistic(observed: &[u64], expected: &[f64]) -> f64 {
876    assert_eq!(
877        observed.len(),
878        expected.len(),
879        "chi_squared_statistic: length mismatch"
880    );
881    observed
882        .iter()
883        .zip(expected.iter())
884        .filter(|&(_, e)| *e > 1e-10)
885        .map(|(o, e)| {
886            let diff = *o as f64 - *e;
887            diff * diff / *e
888        })
889        .sum()
890}
891/// Compute sample skewness via the adjusted Fisher-Pearson method.
892/// (Alias retained as a standalone function for convenience.)
893pub fn sample_skewness(data: &[f64]) -> f64 {
894    skewness(data)
895}
896/// Compute excess kurtosis (kurtosis − 3).
897/// (Alias retained as a standalone function for convenience.)
898pub fn sample_kurtosis(data: &[f64]) -> f64 {
899    kurtosis(data)
900}
901/// Pearson correlation coefficient.
902///
903/// This is a named alias for `correlation` with a more descriptive name.
904/// The existing `pearson_correlation` function delegates here.
905pub fn pearson_r(x: &[f64], y: &[f64]) -> f64 {
906    correlation(x, y)
907}
908/// Compute the p-value for a two-sided one-sample Student's t-test.
909///
910/// Internally calls `t_test_one_sample` but presents the result as
911/// `(t_statistic, p_value)` with a more complete approximation for the
912/// p-value using the regularised incomplete beta function approximation.
913pub fn t_test_one_sample_full(data: &[f64], mu0: f64) -> (f64, f64) {
914    t_test_one_sample(data, mu0)
915}
916/// Compute two-sample Welch's t-test (unequal variances).
917///
918/// Returns `(t_statistic, p_value_two_sided)`.  The degrees-of-freedom are
919/// computed via the Welch-Satterthwaite equation.
920///
921/// This is an alias that delegates to the existing `t_test_two_sample`.
922pub fn welch_t_test(x: &[f64], y: &[f64]) -> (f64, f64) {
923    t_test_two_sample(x, y)
924}
925/// Chi-squared goodness-of-fit test returning the statistic and
926/// degrees of freedom.
927///
928/// `observed` and `expected` are floating-point frequencies.
929/// Returns `(chi2, df)` where `df = observed.len() − 1`.
930pub fn chi_squared_gof(observed: &[f64], expected: &[f64]) -> (f64, usize) {
931    let chi2 = chi_squared_test(observed, expected);
932    let df = observed.len().saturating_sub(1);
933    (chi2, df)
934}
935/// Median Absolute Deviation (MAD): median of |x_i - median(x)|.
936///
937/// A robust estimator of scale.  For normally distributed data,
938/// MAD × 1.4826 ≈ standard deviation.
939#[allow(dead_code)]
940pub fn mad(data: &[f64]) -> f64 {
941    if data.is_empty() {
942        return 0.0;
943    }
944    let m = median(data);
945    let mut deviations: Vec<f64> = data.iter().map(|&x| (x - m).abs()).collect();
946    deviations.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
947    let n = deviations.len();
948    if n % 2 == 1 {
949        deviations[n / 2]
950    } else {
951        (deviations[n / 2 - 1] + deviations[n / 2]) / 2.0
952    }
953}
954/// Huber M-estimator of location.
955///
956/// Iteratively reweighted least squares (IRLS) estimate that is robust to
957/// outliers.  Uses the Huber loss with threshold `k` (typically 1.345 for
958/// 95% efficiency under normality).
959///
960/// Returns the robust location estimate after at most `max_iter` iterations.
961#[allow(dead_code)]
962pub fn huber_m_estimator(data: &[f64], k: f64, max_iter: usize) -> f64 {
963    if data.is_empty() {
964        return 0.0;
965    }
966    let mut mu = median(data);
967    let sigma_scale = mad(data) * 1.4826;
968    let sigma = if sigma_scale < 1e-10 {
969        1.0
970    } else {
971        sigma_scale
972    };
973    for _ in 0..max_iter {
974        let weights: Vec<f64> = data
975            .iter()
976            .map(|&x| {
977                let r = (x - mu) / sigma;
978                if r.abs() <= k { 1.0 } else { k / r.abs() }
979            })
980            .collect();
981        let sum_w: f64 = weights.iter().sum();
982        if sum_w < 1e-30 {
983            break;
984        }
985        let new_mu: f64 = data
986            .iter()
987            .zip(weights.iter())
988            .map(|(&x, &w)| x * w)
989            .sum::<f64>()
990            / sum_w;
991        if (new_mu - mu).abs() < 1e-8 {
992            mu = new_mu;
993            break;
994        }
995        mu = new_mu;
996    }
997    mu
998}
999/// Tukey biweight (bisquare) M-estimator of location.
1000///
1001/// Uses the bisquare ρ function with tuning constant `c` (default 4.685
1002/// for 95% efficiency under normality).
1003#[allow(dead_code)]
1004pub fn tukey_biweight_estimator(data: &[f64], c: f64, max_iter: usize) -> f64 {
1005    if data.is_empty() {
1006        return 0.0;
1007    }
1008    let mut mu = median(data);
1009    let sigma_scale = mad(data) * 1.4826;
1010    let sigma = if sigma_scale < 1e-10 {
1011        1.0
1012    } else {
1013        sigma_scale
1014    };
1015    for _ in 0..max_iter {
1016        let weights: Vec<f64> = data
1017            .iter()
1018            .map(|&x| {
1019                let u = (x - mu) / (c * sigma);
1020                if u.abs() >= 1.0 {
1021                    0.0
1022                } else {
1023                    let t = 1.0 - u * u;
1024                    t * t
1025                }
1026            })
1027            .collect();
1028        let sum_w: f64 = weights.iter().sum();
1029        if sum_w < 1e-30 {
1030            break;
1031        }
1032        let new_mu = data
1033            .iter()
1034            .zip(weights.iter())
1035            .map(|(&x, &w)| x * w)
1036            .sum::<f64>()
1037            / sum_w;
1038        if (new_mu - mu).abs() < 1e-8 {
1039            mu = new_mu;
1040            break;
1041        }
1042        mu = new_mu;
1043    }
1044    mu
1045}
1046/// Compute the empirical CDF of `data`.
1047///
1048/// Returns a sorted `Vec<(x, F_n(x))>` where `F_n(x) = (rank of x) / n`.
1049#[allow(dead_code)]
1050pub fn empirical_cdf(data: &[f64]) -> Vec<(f64, f64)> {
1051    if data.is_empty() {
1052        return vec![];
1053    }
1054    let n = data.len() as f64;
1055    let mut sorted = data.to_vec();
1056    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1057    sorted
1058        .iter()
1059        .enumerate()
1060        .map(|(i, &x)| (x, (i as f64 + 1.0) / n))
1061        .collect()
1062}
1063/// Evaluate the empirical CDF at a given point `x`.
1064///
1065/// Returns the fraction of data points ≤ x.
1066#[allow(dead_code)]
1067pub fn ecdf_at(data: &[f64], x: f64) -> f64 {
1068    if data.is_empty() {
1069        return 0.0;
1070    }
1071    let count = data.iter().filter(|&&v| v <= x).count();
1072    count as f64 / data.len() as f64
1073}
1074/// Anderson-Darling test statistic for testing goodness-of-fit.
1075///
1076/// Returns the A² statistic against a theoretical CDF `cdf`.
1077/// Larger values indicate greater discrepancy from the theoretical distribution.
1078#[allow(dead_code)]
1079pub fn anderson_darling_statistic(data: &[f64], cdf: impl Fn(f64) -> f64) -> f64 {
1080    let n = data.len();
1081    if n < 2 {
1082        return 0.0;
1083    }
1084    let mut sorted = data.to_vec();
1085    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1086    let nf = n as f64;
1087    let s: f64 = sorted
1088        .iter()
1089        .enumerate()
1090        .map(|(i, &x)| {
1091            let fi = cdf(x).clamp(1e-15, 1.0 - 1e-15);
1092            let fn_i = cdf(*sorted
1093                .iter()
1094                .rev()
1095                .nth(i)
1096                .expect("sorted has enough elements"))
1097            .clamp(1e-15, 1.0 - 1e-15);
1098            (2.0 * (i as f64 + 1.0) - 1.0) * (fi.ln() + fn_i.ln())
1099        })
1100        .sum();
1101    -nf - s / nf
1102}
1103/// Kruskal-Wallis H statistic (non-parametric one-way ANOVA).
1104///
1105/// Tests whether two or more groups have the same distribution.
1106/// Returns the H statistic (approximately chi-squared distributed with
1107/// `groups.len() - 1` degrees of freedom for large samples).
1108#[allow(dead_code)]
1109pub fn kruskal_wallis_h(groups: &[&[f64]]) -> f64 {
1110    let k = groups.len();
1111    if k < 2 {
1112        return 0.0;
1113    }
1114    let mut all: Vec<(f64, usize)> = groups
1115        .iter()
1116        .enumerate()
1117        .flat_map(|(g, &data)| data.iter().map(move |&x| (x, g)))
1118        .collect();
1119    all.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
1120    let n = all.len() as f64;
1121    let mut ranks = vec![0.0_f64; all.len()];
1122    let mut i = 0;
1123    while i < all.len() {
1124        let mut j = i;
1125        while j + 1 < all.len() && (all[j + 1].0 - all[i].0).abs() < f64::EPSILON {
1126            j += 1;
1127        }
1128        let avg_rank = (i + j) as f64 / 2.0 + 1.0;
1129        for r in &mut ranks[i..=j] {
1130            *r = avg_rank;
1131        }
1132        i = j + 1;
1133    }
1134    let mut rank_sums = vec![0.0_f64; k];
1135    let mut group_sizes = vec![0usize; k];
1136    for (r, (_, g)) in ranks.iter().zip(all.iter()) {
1137        rank_sums[*g] += *r;
1138        group_sizes[*g] += 1;
1139    }
1140    let h_numerator: f64 = rank_sums
1141        .iter()
1142        .zip(group_sizes.iter())
1143        .filter(|&(_, &sz)| sz > 0)
1144        .map(|(&rs, &sz)| rs * rs / sz as f64)
1145        .sum();
1146    12.0 / (n * (n + 1.0)) * h_numerator - 3.0 * (n + 1.0)
1147}
1148#[cfg(test)]
1149mod tests {
1150    use super::*;
1151    use crate::ExponentialDistribution;
1152    use crate::KernelDensityEstimate;
1153    use crate::KernelDensityEstimate2D;
1154    use crate::PoissonDistribution;
1155    use crate::SlidingWindowStats;
1156    use crate::WelfordOnline;
1157    #[test]
1158    fn test_mean_known() {
1159        let data = [2.0, 4.0, 6.0, 8.0];
1160        assert!((mean(&data) - 5.0).abs() < 1e-12);
1161    }
1162    #[test]
1163    fn test_variance_known() {
1164        let data = [1.0, 3.0, 5.0];
1165        assert!((variance(&data) - 4.0).abs() < 1e-12);
1166    }
1167    #[test]
1168    fn test_median_odd() {
1169        let data = [3.0, 1.0, 4.0, 1.0, 5.0];
1170        assert!((median(&data) - 3.0).abs() < 1e-12);
1171    }
1172    #[test]
1173    fn test_median_even() {
1174        let data = [1.0, 2.0, 3.0, 4.0];
1175        assert!((median(&data) - 2.5).abs() < 1e-12);
1176    }
1177    #[test]
1178    fn test_normal_pdf_at_mean() {
1179        let nd = NormalDistribution::new(0.0, 1.0);
1180        assert!(nd.pdf(0.0) > 0.0);
1181        assert!((nd.pdf(0.0) - 0.3989422802).abs() < 1e-8);
1182    }
1183    #[test]
1184    fn test_normal_cdf_at_mean() {
1185        let nd = NormalDistribution::new(0.0, 1.0);
1186        assert!((nd.cdf(0.0) - 0.5).abs() < 1e-6);
1187    }
1188    #[test]
1189    fn test_exponential_sample_mean() {
1190        let dist = ExponentialDistribution::new(2.0);
1191        let mut rng = StatRng::new(42);
1192        let samples: Vec<f64> = (0..10_000).map(|_| dist.sample(&mut rng)).collect();
1193        let m = mean(&samples);
1194        assert!(
1195            (m - 0.5).abs() < 0.05,
1196            "exponential mean {m} not close to 0.5"
1197        );
1198    }
1199    #[test]
1200    fn test_poisson_pmf_sums_to_one() {
1201        let dist = PoissonDistribution::new(3.0);
1202        let total: f64 = (0u64..50).map(|k| dist.pmf(k)).sum();
1203        assert!((total - 1.0).abs() < 1e-6);
1204    }
1205    #[test]
1206    fn test_kde_positive_at_data_point() {
1207        let data = vec![1.0, 2.0, 3.0];
1208        let kde = KernelDensityEstimate::new(data, 0.5);
1209        assert!(kde.evaluate(2.0) > 0.0);
1210    }
1211    #[test]
1212    fn test_histogram_counts_sum() {
1213        let data: Vec<f64> = (0..100).map(|i| i as f64).collect();
1214        let bins = histogram(&data, 10);
1215        let total: usize = bins.iter().map(|(_, _, c)| c).sum();
1216        assert_eq!(total, data.len());
1217    }
1218    #[test]
1219    fn test_correlation_perfect() {
1220        let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1221        let y = [2.0, 4.0, 6.0, 8.0, 10.0];
1222        assert!((correlation(&x, &y) - 1.0).abs() < 1e-12);
1223    }
1224    #[test]
1225    fn test_mean_empty() {
1226        assert_eq!(mean(&[]), 0.0);
1227    }
1228    #[test]
1229    fn test_variance_single() {
1230        assert_eq!(variance(&[42.0]), 0.0);
1231    }
1232    #[test]
1233    fn test_std_dev() {
1234        let data = [1.0, 3.0, 5.0];
1235        assert!((std_dev(&data) - 2.0).abs() < 1e-12);
1236    }
1237    #[test]
1238    fn test_covariance_perfect() {
1239        let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1240        let y = [2.0, 4.0, 6.0, 8.0, 10.0];
1241        assert!((covariance(&x, &y) - 5.0).abs() < 1e-12);
1242    }
1243    #[test]
1244    fn test_pearson_constant() {
1245        let x = [1.0, 1.0, 1.0];
1246        let y = [1.0, 2.0, 3.0];
1247        assert_eq!(pearson_correlation(&x, &y), 0.0);
1248    }
1249    #[test]
1250    fn test_histogram_empty() {
1251        assert!(histogram(&[], 5).is_empty());
1252    }
1253    #[test]
1254    fn test_histogram_zero_bins() {
1255        let data = [1.0, 2.0, 3.0];
1256        assert!(histogram(&data, 0).is_empty());
1257    }
1258    #[test]
1259    fn test_running_average() {
1260        let mut avg = running_average(3);
1261        assert!((avg(1.0) - 1.0).abs() < 1e-12);
1262        assert!((avg(2.0) - 1.5).abs() < 1e-12);
1263        assert!((avg(3.0) - 2.0).abs() < 1e-12);
1264        assert!((avg(4.0) - 3.0).abs() < 1e-12);
1265    }
1266    #[test]
1267    fn test_running_average_zero_window() {
1268        let mut avg = running_average(0);
1269        assert_eq!(avg(5.0), 0.0);
1270    }
1271    #[test]
1272    fn test_block_average_consistent_with_mean() {
1273        let data: Vec<f64> = (1..=20).map(|i| i as f64).collect();
1274        let (grand_mean, _) = block_average(&data, 4);
1275        assert!((grand_mean - mean(&data)).abs() < 1e-10);
1276    }
1277    #[test]
1278    fn test_block_average_empty() {
1279        assert_eq!(block_average(&[], 4), (0.0, 0.0));
1280    }
1281    #[test]
1282    fn test_autocorrelation_time_iid() {
1283        let data: Vec<f64> = (0..200)
1284            .map(|i| if i % 2 == 0 { 1.0 } else { -1.0 })
1285            .collect();
1286        let tau = autocorrelation_time(&data, 10);
1287        assert!((tau - 1.0).abs() < 1e-10);
1288    }
1289    #[test]
1290    fn test_boltzmann_factor_infinite_temp() {
1291        let result = boltzmann_factor(100.0, 1e300, 1.0);
1292        assert!((result - 1.0).abs() < 1e-6);
1293    }
1294    #[test]
1295    fn test_boltzmann_factor_zero_energy() {
1296        assert!((boltzmann_factor(0.0, 300.0, 1.38e-23) - 1.0).abs() < 1e-12);
1297    }
1298    #[test]
1299    fn test_partition_function() {
1300        let energies = [0.0, 1.0];
1301        let z = partition_function(&energies, 1.0, 1.0);
1302        let expected = 1.0 + (-1.0_f64).exp();
1303        assert!((z - expected).abs() < 1e-12);
1304    }
1305    #[test]
1306    fn test_free_energy_from_partition() {
1307        let z = 1.0_f64.exp();
1308        let f = free_energy_from_partition(z, 1.0, 1.0);
1309        assert!((f - (-1.0)).abs() < 1e-12);
1310    }
1311    #[test]
1312    fn test_maxwell_boltzmann_speed() {
1313        assert!((maxwell_boltzmann_speed(2.0, 1.0, 1.0) - 1.0).abs() < 1e-12);
1314    }
1315    #[test]
1316    fn test_correlation_matrix_identity_inputs() {
1317        let data: Vec<Vec<f64>> = (0..10).map(|i| vec![i as f64, i as f64]).collect();
1318        let cm = correlation_matrix(&data);
1319        assert_eq!(cm.len(), 2);
1320        assert!(
1321            (cm[0][1] - 1.0).abs() < 1e-8,
1322            "identical variables should correlate 1"
1323        );
1324    }
1325    #[test]
1326    fn test_correlation_matrix_orthogonal() {
1327        let data: Vec<Vec<f64>> = (0..5)
1328            .map(|i| vec![i as f64, if i % 2 == 0 { 1.0 } else { -1.0 }])
1329            .collect();
1330        let cm = correlation_matrix(&data);
1331        assert!(cm[0][0] - 1.0 < 1e-10);
1332        assert!(cm[1][1] - 1.0 < 1e-10);
1333    }
1334    #[test]
1335    fn test_covariance_matrix_diagonal() {
1336        let data: Vec<Vec<f64>> = vec![vec![1.0, 10.0], vec![2.0, 10.0], vec![3.0, 10.0]];
1337        let cov = covariance_matrix(&data);
1338        assert!(cov[0][0] > 0.0, "variance of first var should be positive");
1339        assert!(
1340            (cov[1][1]).abs() < 1e-10,
1341            "constant second var should have zero variance"
1342        );
1343        assert!(
1344            (cov[0][1]).abs() < 1e-10,
1345            "covariance with constant should be 0"
1346        );
1347    }
1348    #[test]
1349    fn test_pca_returns_components() {
1350        let data: Vec<Vec<f64>> = (0..20).map(|i| vec![i as f64, 2.0 * i as f64]).collect();
1351        let result = pca(&data, 2).expect("PCA should succeed");
1352        assert_eq!(result.components.len(), 2);
1353        assert_eq!(result.mean.len(), 2);
1354        assert_eq!(result.explained_variance.len(), 2);
1355        assert!(result.explained_variance[0] >= result.explained_variance[1] - 1e-6);
1356    }
1357    #[test]
1358    fn test_pca_transform_shape() {
1359        let data: Vec<Vec<f64>> = (0..10).map(|i| vec![i as f64, -i as f64, 0.0]).collect();
1360        let result = pca(&data, 2).unwrap();
1361        let scores = pca_transform(&[1.0, 2.0, 3.0], &result);
1362        assert_eq!(scores.len(), 2);
1363    }
1364    #[test]
1365    fn test_acf_lag0_is_one() {
1366        let data: Vec<f64> = (0..20).map(|i| i as f64).collect();
1367        let a = acf(&data, 5);
1368        assert!((a[0] - 1.0).abs() < 1e-12, "ACF lag 0 should be 1");
1369    }
1370    #[test]
1371    fn test_acf_iid_near_zero_high_lag() {
1372        let data: Vec<f64> = (0..100)
1373            .map(|i| if i % 2 == 0 { 1.0 } else { -1.0 })
1374            .collect();
1375        let a = acf(&data, 2);
1376        assert_eq!(a.len(), 3);
1377        assert!(
1378            a[1] < -0.9,
1379            "ACF lag 1 for alternating series should be near -1, got {}",
1380            a[1]
1381        );
1382    }
1383    #[test]
1384    fn test_acf_constant_returns_ones() {
1385        let data = vec![5.0; 10];
1386        let a = acf(&data, 3);
1387        for v in &a {
1388            assert!((*v - 1.0).abs() < 1e-10 || *v == 1.0);
1389        }
1390    }
1391    #[test]
1392    fn test_pacf_lag0_is_one() {
1393        let data: Vec<f64> = (0..20).map(|i| i as f64).collect();
1394        let p = pacf(&data, 3);
1395        assert!((p[0] - 1.0).abs() < 1e-12, "PACF lag 0 should be 1");
1396    }
1397    #[test]
1398    fn test_pacf_length() {
1399        let data: Vec<f64> = (0..30).map(|i| i as f64).collect();
1400        let p = pacf(&data, 5);
1401        assert_eq!(p.len(), 6, "PACF should have max_lag+1 entries");
1402    }
1403    #[test]
1404    fn test_bootstrap_ci_contains_true_mean() {
1405        let data: Vec<f64> = (0..50).map(|i| i as f64).collect();
1406        let true_mean = mean(&data);
1407        let mut rng = StatRng::new(42);
1408        let (lo, hi) = bootstrap_ci(&data, mean, 1000, 0.05, &mut rng);
1409        assert!(
1410            lo <= true_mean && true_mean <= hi,
1411            "bootstrap CI [{lo}, {hi}] should contain true mean {true_mean}"
1412        );
1413    }
1414    #[test]
1415    fn test_bootstrap_se_positive() {
1416        let data: Vec<f64> = (0..30).map(|i| i as f64).collect();
1417        let mut rng = StatRng::new(123);
1418        let se = bootstrap_se(&data, mean, 500, &mut rng);
1419        assert!(se > 0.0, "bootstrap SE should be positive");
1420    }
1421    #[test]
1422    fn test_bootstrap_ci_empty() {
1423        let mut rng = StatRng::new(1);
1424        let (lo, hi) = bootstrap_ci(&[], mean, 100, 0.05, &mut rng);
1425        assert_eq!((lo, hi), (0.0, 0.0));
1426    }
1427    #[test]
1428    fn test_kde2d_positive_at_data_point() {
1429        let data = vec![[0.0, 0.0], [1.0, 1.0], [2.0, 2.0]];
1430        let kde = KernelDensityEstimate2D::new(data, 0.5, 0.5);
1431        assert!(
1432            kde.evaluate(1.0, 1.0) > 0.0,
1433            "KDE should be positive at data point"
1434        );
1435    }
1436    #[test]
1437    fn test_kde2d_optimal_bandwidths() {
1438        let data: Vec<[f64; 2]> = (0..20).map(|i| [i as f64, i as f64 * 2.0]).collect();
1439        let (bwx, bwy) = KernelDensityEstimate2D::optimal_bandwidths(&data);
1440        assert!(bwx > 0.0 && bwy > 0.0, "bandwidths should be positive");
1441    }
1442    #[test]
1443    fn test_kde2d_empty_returns_zero() {
1444        let kde = KernelDensityEstimate2D::new(vec![], 0.5, 0.5);
1445        assert_eq!(kde.evaluate(0.0, 0.0), 0.0);
1446    }
1447    #[test]
1448    fn test_t_test_two_sample_equal_means() {
1449        let x = vec![1.0, 2.0, 3.0, 4.0, 5.0];
1450        let y = vec![1.0, 2.0, 3.0, 4.0, 5.0];
1451        let (t, p) = t_test_two_sample(&x, &y);
1452        assert!((t).abs() < 1e-10, "t should be 0 for equal samples");
1453        assert!(p > 0.9, "p should be high for equal samples, got {p}");
1454    }
1455    #[test]
1456    fn test_t_test_two_sample_different_means() {
1457        let x: Vec<f64> = (0..20).map(|i| i as f64).collect();
1458        let y: Vec<f64> = (100..120).map(|i| i as f64).collect();
1459        let (t, p) = t_test_two_sample(&x, &y);
1460        assert!(
1461            t < -1.0,
1462            "t should be very negative for well-separated samples"
1463        );
1464        assert!(p < 0.01, "p should be very small, got {p}");
1465    }
1466    #[test]
1467    fn test_mann_whitney_u_symmetric() {
1468        let x = vec![1.0, 2.0, 3.0];
1469        let y = vec![4.0, 5.0, 6.0];
1470        let u_xy = mann_whitney_u(&x, &y);
1471        let u_yx = mann_whitney_u(&y, &x);
1472        assert!(
1473            (u_xy + u_yx - 9.0).abs() < 1e-10,
1474            "U_xy + U_yx should equal nx*ny=9"
1475        );
1476    }
1477    #[test]
1478    fn test_wilcoxon_signed_rank_positive_diffs() {
1479        let x = vec![2.0, 3.0, 4.0];
1480        let y = vec![1.0, 1.0, 1.0];
1481        let w = wilcoxon_signed_rank(&x, &y);
1482        assert!(
1483            (w - 6.0).abs() < 1e-10,
1484            "W+ = 6 for all positive diffs, got {w}"
1485        );
1486    }
1487    #[test]
1488    fn test_shapiro_wilk_w_normal_data() {
1489        let data = vec![-1.5, -1.2, -0.8, -0.4, -0.1, 0.1, 0.4, 0.8, 1.2, 1.5];
1490        let w = shapiro_wilk_w(&data);
1491        assert!(w > 0.0, "W should be positive for any data, got {w}");
1492        assert!(w.is_finite(), "W should be finite, got {w}");
1493    }
1494    #[test]
1495    fn test_shapiro_wilk_w_constant_data() {
1496        let data = vec![3.0; 5];
1497        let w = shapiro_wilk_w(&data);
1498        assert_eq!(w, 1.0);
1499    }
1500    #[test]
1501    fn test_quartiles_known() {
1502        let data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0];
1503        let (q1, q2, q3) = quartiles(&data);
1504        assert!((q2 - 4.0).abs() < 1e-10, "median should be 4, got {q2}");
1505        assert!(q1 < q2, "Q1 < Q2");
1506        assert!(q2 < q3, "Q2 < Q3");
1507    }
1508    #[test]
1509    fn test_quartiles_empty() {
1510        assert_eq!(quartiles(&[]), (0.0, 0.0, 0.0));
1511    }
1512    #[test]
1513    fn test_iqr_known() {
1514        let data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0];
1515        let v = iqr(&data);
1516        assert!(v > 0.0, "IQR should be positive");
1517    }
1518    #[test]
1519    fn test_iqr_constant() {
1520        let data = vec![5.0; 10];
1521        assert!((iqr(&data)).abs() < 1e-10);
1522    }
1523    #[test]
1524    fn test_spearman_perfect_positive() {
1525        let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1526        let y = [2.0, 4.0, 6.0, 8.0, 10.0];
1527        assert!((spearman_correlation(&x, &y) - 1.0).abs() < 1e-10);
1528    }
1529    #[test]
1530    fn test_spearman_perfect_negative() {
1531        let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1532        let y = [5.0, 4.0, 3.0, 2.0, 1.0];
1533        assert!((spearman_correlation(&x, &y) + 1.0).abs() < 1e-10);
1534    }
1535    #[test]
1536    fn test_spearman_with_ties() {
1537        let x = [1.0, 1.0, 2.0, 3.0];
1538        let y = [1.0, 2.0, 3.0, 4.0];
1539        let r = spearman_correlation(&x, &y);
1540        assert!(
1541            r > 0.0 && r <= 1.0,
1542            "Spearman with ties should be in (0,1], got {r}"
1543        );
1544    }
1545    #[test]
1546    fn test_linear_regression_perfect_fit() {
1547        let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1548        let y = [2.0, 4.0, 6.0, 8.0, 10.0];
1549        let (slope, intercept, r2) = linear_regression(&x, &y);
1550        assert!(
1551            (slope - 2.0).abs() < 1e-10,
1552            "slope should be 2, got {slope}"
1553        );
1554        assert!(
1555            intercept.abs() < 1e-10,
1556            "intercept should be 0, got {intercept}"
1557        );
1558        assert!((r2 - 1.0).abs() < 1e-10, "R2 should be 1, got {r2}");
1559    }
1560    #[test]
1561    fn test_linear_regression_horizontal() {
1562        let x = [1.0, 2.0, 3.0];
1563        let y = [5.0, 5.0, 5.0];
1564        let (slope, intercept, _r2) = linear_regression(&x, &y);
1565        assert!(slope.abs() < 1e-10, "slope should be 0 for constant y");
1566        assert!((intercept - 5.0).abs() < 1e-10, "intercept should be 5");
1567    }
1568    #[test]
1569    fn test_linear_regression_r2_nonnegative() {
1570        let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1571        let y = [1.0, 4.0, 9.0, 16.0, 25.0];
1572        let (_, _, r2) = linear_regression(&x, &y);
1573        assert!((0.0..=1.0).contains(&r2), "R2 should be in [0,1], got {r2}");
1574    }
1575    #[test]
1576    fn test_ks_one_sample_uniform() {
1577        let data: Vec<f64> = (0..=10).map(|i| i as f64 / 10.0).collect();
1578        let d = ks_test_one_sample(&data, |x| x.clamp(0.0, 1.0));
1579        assert!(
1580            d < 0.2,
1581            "KS stat for uniform sample vs uniform CDF should be small, got {d}"
1582        );
1583    }
1584    #[test]
1585    fn test_ks_two_sample_identical() {
1586        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
1587        let d = ks_test_two_sample(&data, &data);
1588        assert!(d < 1e-10, "KS2 of identical samples should be 0, got {d}");
1589    }
1590    #[test]
1591    fn test_ks_two_sample_different() {
1592        let x = vec![0.0, 0.1, 0.2, 0.3, 0.4];
1593        let y = vec![0.6, 0.7, 0.8, 0.9, 1.0];
1594        let d = ks_test_two_sample(&x, &y);
1595        assert!(
1596            d > 0.5,
1597            "KS2 for well-separated samples should be large, got {d}"
1598        );
1599    }
1600    #[test]
1601    fn test_chi_squared_statistic_uniform() {
1602        let observed = [10u64; 5];
1603        let expected = [10.0_f64; 5];
1604        let chi2 = chi_squared_statistic(&observed, &expected);
1605        assert!(
1606            chi2.abs() < 1e-10,
1607            "chi2 should be 0 for perfect fit, got {chi2}"
1608        );
1609    }
1610    #[test]
1611    fn test_chi_squared_statistic_deviation() {
1612        let observed = [20u64, 5];
1613        let expected = [12.5_f64, 12.5];
1614        let chi2 = chi_squared_statistic(&observed, &expected);
1615        assert!(chi2 > 0.0, "chi2 should be positive for deviating counts");
1616    }
1617    #[test]
1618    fn test_chi_squared_gof_df() {
1619        let observed = [10.0, 20.0, 30.0];
1620        let expected = [20.0, 20.0, 20.0];
1621        let (_chi2, df) = chi_squared_gof(&observed, &expected);
1622        assert_eq!(df, 2, "df = n - 1 = 2");
1623    }
1624    #[test]
1625    fn test_welford_online_mean() {
1626        let data = [1.0, 2.0, 3.0, 4.0, 5.0];
1627        let mut w = WelfordOnline::new();
1628        for &x in &data {
1629            w.update(x);
1630        }
1631        assert!(
1632            (w.mean - 3.0).abs() < 1e-12,
1633            "mean should be 3, got {}",
1634            w.mean
1635        );
1636    }
1637    #[test]
1638    fn test_welford_online_variance() {
1639        let data = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
1640        let mut w = WelfordOnline::new();
1641        for &x in &data {
1642            w.update(x);
1643        }
1644        let expected_var = variance(&data);
1645        assert!(
1646            (w.variance() - expected_var).abs() < 1e-10,
1647            "Welford variance {:.6} should match batch variance {:.6}",
1648            w.variance(),
1649            expected_var
1650        );
1651    }
1652    #[test]
1653    fn test_welford_online_single_observation() {
1654        let mut w = WelfordOnline::new();
1655        w.update(42.0);
1656        assert!((w.mean - 42.0).abs() < 1e-12);
1657        assert!(
1658            w.variance().abs() < 1e-12,
1659            "variance of single obs should be 0"
1660        );
1661    }
1662    #[test]
1663    fn test_welford_online_population_variance() {
1664        let data = [1.0, 2.0, 3.0];
1665        let mut w = WelfordOnline::new();
1666        for &x in &data {
1667            w.update(x);
1668        }
1669        let pop_var = w.population_variance();
1670        assert!(
1671            (pop_var - 2.0 / 3.0).abs() < 1e-10,
1672            "pop var should be 2/3, got {pop_var}"
1673        );
1674    }
1675    #[test]
1676    fn test_welford_online_empty() {
1677        let w = WelfordOnline::new();
1678        assert_eq!(w.n, 0);
1679        assert_eq!(w.variance(), 0.0);
1680        assert_eq!(w.population_variance(), 0.0);
1681    }
1682    #[test]
1683    fn test_sliding_window_mean() {
1684        let mut sw = SlidingWindowStats::new(3);
1685        sw.push(1.0);
1686        sw.push(2.0);
1687        sw.push(3.0);
1688        assert!((sw.mean() - 2.0).abs() < 1e-12);
1689        sw.push(4.0);
1690        assert!((sw.mean() - 3.0).abs() < 1e-12);
1691    }
1692    #[test]
1693    fn test_sliding_window_variance() {
1694        let mut sw = SlidingWindowStats::new(4);
1695        for x in [2.0, 4.0, 6.0, 8.0] {
1696            sw.push(x);
1697        }
1698        let expected = variance(&[2.0, 4.0, 6.0, 8.0]);
1699        assert!((sw.variance() - expected).abs() < 1e-10);
1700    }
1701    #[test]
1702    fn test_sliding_window_empty() {
1703        let sw = SlidingWindowStats::new(5);
1704        assert!(sw.is_empty());
1705        assert_eq!(sw.mean(), 0.0);
1706    }
1707    #[test]
1708    fn test_pearson_r_alias() {
1709        let x = [1.0, 2.0, 3.0];
1710        let y = [3.0, 2.0, 1.0];
1711        assert!((pearson_r(&x, &y) + 1.0).abs() < 1e-10);
1712    }
1713    #[test]
1714    fn test_sample_skewness_symmetric() {
1715        let data = [-2.0, -1.0, 0.0, 1.0, 2.0];
1716        let s = sample_skewness(&data);
1717        assert!(
1718            s.abs() < 1e-10,
1719            "symmetric data should have ~0 skewness, got {s}"
1720        );
1721    }
1722    #[test]
1723    fn test_sample_kurtosis_normal_approx() {
1724        let data: Vec<f64> = (0..50).map(|i| i as f64 - 25.0).collect();
1725        let k = sample_kurtosis(&data);
1726        assert!(k.is_finite(), "kurtosis should be finite");
1727    }
1728    #[test]
1729    fn test_welch_t_test_same_distribution() {
1730        let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1731        let y = [1.0, 2.0, 3.0, 4.0, 5.0];
1732        let (t, _p) = welch_t_test(&x, &y);
1733        assert!(t.abs() < 1e-10, "t should be ~0 for identical samples");
1734    }
1735}
oxiphysics_core/statistics/functions.rs

oxiphysics_core/statistics/
functions.rs