1#![allow(clippy::needless_range_loop)]
6use std::collections::VecDeque;
7
8use super::types::{NormalDistribution, PcaResult, StatRng};
9
10pub fn mean(data: &[f64]) -> f64 {
12 if data.is_empty() {
13 return 0.0;
14 }
15 data.iter().sum::<f64>() / data.len() as f64
16}
17pub fn variance(data: &[f64]) -> f64 {
21 if data.len() < 2 {
22 return 0.0;
23 }
24 let m = mean(data);
25 data.iter().map(|x| (x - m).powi(2)).sum::<f64>() / (data.len() as f64 - 1.0)
26}
27pub fn std_dev(data: &[f64]) -> f64 {
31 variance(data).sqrt()
32}
33pub fn median(data: &[f64]) -> f64 {
38 if data.is_empty() {
39 return 0.0;
40 }
41 let mut sorted = data.to_vec();
42 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
43 let n = sorted.len();
44 if n % 2 == 1 {
45 sorted[n / 2]
46 } else {
47 (sorted[n / 2 - 1] + sorted[n / 2]) / 2.0
48 }
49}
50pub fn percentile(data: &[f64], p: f64) -> f64 {
55 if data.is_empty() {
56 return 0.0;
57 }
58 let mut sorted = data.to_vec();
59 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
60 let n = sorted.len();
61 if n == 1 {
62 return sorted[0];
63 }
64 let rank = (p / 100.0) * (n as f64 - 1.0);
65 let lo = rank.floor() as usize;
66 let hi = (lo + 1).min(n - 1);
67 let frac = rank - lo as f64;
68 sorted[lo] + frac * (sorted[hi] - sorted[lo])
69}
70pub fn covariance(x: &[f64], y: &[f64]) -> f64 {
75 assert_eq!(
76 x.len(),
77 y.len(),
78 "covariance: x and y must have the same length"
79 );
80 let n = x.len();
81 if n < 2 {
82 return 0.0;
83 }
84 let mx = mean(x);
85 let my = mean(y);
86 x.iter()
87 .zip(y.iter())
88 .map(|(xi, yi)| (xi - mx) * (yi - my))
89 .sum::<f64>()
90 / (n as f64 - 1.0)
91}
92pub fn correlation(x: &[f64], y: &[f64]) -> f64 {
97 let sx = std_dev(x);
98 let sy = std_dev(y);
99 if sx == 0.0 || sy == 0.0 {
100 return 0.0;
101 }
102 covariance(x, y) / (sx * sy)
103}
104pub fn skewness(data: &[f64]) -> f64 {
109 let n = data.len();
110 if n < 3 {
111 return 0.0;
112 }
113 let m = mean(data);
114 let s = std_dev(data);
115 if s == 0.0 {
116 return 0.0;
117 }
118 let nf = n as f64;
119 let third_moment = data.iter().map(|x| ((x - m) / s).powi(3)).sum::<f64>() / nf;
120 third_moment * (nf * (nf - 1.0)).sqrt() / (nf - 2.0)
121}
122pub fn kurtosis(data: &[f64]) -> f64 {
126 let n = data.len();
127 if n < 4 {
128 return 0.0;
129 }
130 let m = mean(data);
131 let s = std_dev(data);
132 if s == 0.0 {
133 return 0.0;
134 }
135 let nf = n as f64;
136 let fourth = data.iter().map(|x| ((x - m) / s).powi(4)).sum::<f64>() / nf;
137 fourth - 3.0
138}
139pub fn histogram(data: &[f64], n_bins: usize) -> Vec<(f64, f64, usize)> {
144 if data.is_empty() || n_bins == 0 {
145 return vec![];
146 }
147 let min = data.iter().cloned().fold(f64::INFINITY, f64::min);
148 let max = data.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
149 let width = if (max - min).abs() < f64::EPSILON {
150 1.0
151 } else {
152 (max - min) / n_bins as f64
153 };
154 let mut counts = vec![0usize; n_bins];
155 for &v in data {
156 let idx = ((v - min) / width) as usize;
157 let idx = idx.min(n_bins - 1);
158 counts[idx] += 1;
159 }
160 (0..n_bins)
161 .map(|i| {
162 let lo = min + i as f64 * width;
163 let hi = lo + width;
164 (lo, hi, counts[i])
165 })
166 .collect()
167}
168pub fn t_test_one_sample(data: &[f64], mu0: f64) -> (f64, f64) {
176 let n = data.len();
177 if n < 2 {
178 return (0.0, 1.0);
179 }
180 let m = mean(data);
181 let s = std_dev(data);
182 if s == 0.0 {
183 return (0.0, 1.0);
184 }
185 let t = (m - mu0) / (s / (n as f64).sqrt());
186 let p = 2.0 * (1.0 - NormalDistribution::new(0.0, 1.0).cdf(t.abs()));
187 (t, p)
188}
189pub fn chi_squared_test(observed: &[f64], expected: &[f64]) -> f64 {
196 assert_eq!(
197 observed.len(),
198 expected.len(),
199 "chi_squared_test: slices must have the same length"
200 );
201 observed
202 .iter()
203 .zip(expected.iter())
204 .filter(|&(_, &e)| e > 0.0)
205 .map(|(&o, &e)| {
206 let diff = o - e;
207 diff * diff / e
208 })
209 .sum()
210}
211pub fn ks_statistic(data: &[f64], cdf: impl Fn(f64) -> f64) -> f64 {
215 if data.is_empty() {
216 return 0.0;
217 }
218 let mut sorted = data.to_vec();
219 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
220 let n = sorted.len() as f64;
221 sorted
222 .iter()
223 .enumerate()
224 .map(|(i, &x)| {
225 let fn_hi = (i as f64 + 1.0) / n;
226 let fn_lo = i as f64 / n;
227 let fx = cdf(x);
228 (fn_hi - fx).abs().max((fx - fn_lo).abs())
229 })
230 .fold(0.0_f64, f64::max)
231}
232pub fn pearson_correlation(x: &[f64], y: &[f64]) -> f64 {
234 correlation(x, y)
235}
236pub fn running_average(window: usize) -> impl FnMut(f64) -> f64 {
241 let mut buf: VecDeque<f64> = VecDeque::with_capacity(window.max(1));
242 let mut sum = 0.0_f64;
243 move |value: f64| -> f64 {
244 if window == 0 {
245 return 0.0;
246 }
247 if buf.len() == window
248 && let Some(old) = buf.pop_front()
249 {
250 sum -= old;
251 }
252 buf.push_back(value);
253 sum += value;
254 sum / buf.len() as f64
255 }
256}
257pub fn block_average(data: &[f64], block_size: usize) -> (f64, f64) {
263 if data.is_empty() || block_size == 0 {
264 return (0.0, 0.0);
265 }
266 let block_means: Vec<f64> = data.chunks_exact(block_size).map(mean).collect();
267 let n_blocks = block_means.len();
268 if n_blocks < 2 {
269 return (mean(data), 0.0);
270 }
271 let grand_mean = mean(&block_means);
272 let sem = std_dev(&block_means) / (n_blocks as f64).sqrt();
273 (grand_mean, sem)
274}
275pub fn autocorrelation_time(data: &[f64], max_lag: usize) -> f64 {
280 let n = data.len();
281 if n < 2 || max_lag == 0 {
282 return 1.0;
283 }
284 let m = mean(data);
285 let c0: f64 = data.iter().map(|x| (x - m).powi(2)).sum::<f64>() / n as f64;
286 if c0 == 0.0 {
287 return 1.0;
288 }
289 let mut tau = 1.0_f64;
290 for t in 1..=max_lag.min(n - 1) {
291 let ct: f64 = data[..n - t]
292 .iter()
293 .zip(data[t..].iter())
294 .map(|(a, b)| (a - m) * (b - m))
295 .sum::<f64>()
296 / n as f64;
297 let rho = ct / c0;
298 if rho <= 0.0 {
299 break;
300 }
301 tau += 2.0 * rho;
302 }
303 tau
304}
305pub fn maxwell_boltzmann_speed(mass: f64, temp: f64, kb: f64) -> f64 {
307 (2.0 * kb * temp / mass).sqrt()
308}
309pub fn boltzmann_factor(energy: f64, temp: f64, kb: f64) -> f64 {
311 (-energy / (kb * temp)).exp()
312}
313pub fn partition_function(energies: &[f64], temp: f64, kb: f64) -> f64 {
315 energies
316 .iter()
317 .map(|&e| boltzmann_factor(e, temp, kb))
318 .sum()
319}
320pub fn free_energy_from_partition(z: f64, temp: f64, kb: f64) -> f64 {
322 -kb * temp * z.ln()
323}
324pub fn correlation_matrix(data: &[Vec<f64>]) -> Vec<Vec<f64>> {
329 if data.is_empty() {
330 return Vec::new();
331 }
332 let d = data[0].len();
333 if d == 0 {
334 return Vec::new();
335 }
336 let col = |j: usize| -> Vec<f64> { data.iter().map(|row| row[j]).collect() };
337 let mut mat = vec![vec![0.0f64; d]; d];
338 for i in 0..d {
339 for j in i..d {
340 let ci = col(i);
341 let cj = col(j);
342 let r = if i == j { 1.0 } else { correlation(&ci, &cj) };
343 mat[i][j] = r;
344 mat[j][i] = r;
345 }
346 }
347 mat
348}
349pub fn covariance_matrix(data: &[Vec<f64>]) -> Vec<Vec<f64>> {
353 if data.is_empty() {
354 return Vec::new();
355 }
356 let d = data[0].len();
357 if d == 0 {
358 return Vec::new();
359 }
360 let col = |j: usize| -> Vec<f64> { data.iter().map(|row| row[j]).collect() };
361 let mut mat = vec![vec![0.0f64; d]; d];
362 for i in 0..d {
363 let ci = col(i);
364 for j in i..d {
365 let cj = col(j);
366 let cov = covariance(&ci, &cj);
367 mat[i][j] = cov;
368 mat[j][i] = cov;
369 }
370 }
371 mat
372}
373pub fn pca(data: &[Vec<f64>], n_components: usize) -> Option<PcaResult> {
378 if data.is_empty() {
379 return None;
380 }
381 let n = data.len();
382 let d = data[0].len();
383 if d == 0 || n < 2 {
384 return None;
385 }
386 let n_comp = n_components.min(d);
387 let mean_vec: Vec<f64> = (0..d)
388 .map(|j| data.iter().map(|row| row[j]).sum::<f64>() / n as f64)
389 .collect();
390 let centred: Vec<Vec<f64>> = data
391 .iter()
392 .map(|row| row.iter().zip(&mean_vec).map(|(x, m)| x - m).collect())
393 .collect();
394 let cov: Vec<Vec<f64>> = (0..d)
395 .map(|i| {
396 (0..d)
397 .map(|j| centred.iter().map(|row| row[i] * row[j]).sum::<f64>() / (n as f64 - 1.0))
398 .collect()
399 })
400 .collect();
401 let mut components = Vec::with_capacity(n_comp);
402 let mut variances = Vec::with_capacity(n_comp);
403 let mut deflated = cov.clone();
404 let mat_vec = |m: &Vec<Vec<f64>>, v: &Vec<f64>| -> Vec<f64> {
405 (0..d)
406 .map(|i| m[i].iter().zip(v.iter()).map(|(a, b)| a * b).sum::<f64>())
407 .collect()
408 };
409 let norm_vec = |v: &Vec<f64>| -> f64 { v.iter().map(|x| x * x).sum::<f64>().sqrt() };
410 for _ in 0..n_comp {
411 let mut q: Vec<f64> = (0..d).map(|i| if i == 0 { 1.0 } else { 0.0 }).collect();
412 for _ in 0..200 {
413 let aq = mat_vec(&deflated, &q);
414 let nrm = norm_vec(&aq);
415 if nrm < 1e-30 {
416 break;
417 }
418 q = aq.iter().map(|x| x / nrm).collect();
419 }
420 let aq = mat_vec(&deflated, &q);
421 let eigenvalue: f64 = q.iter().zip(aq.iter()).map(|(qi, aqj)| qi * aqj).sum();
422 for i in 0..d {
423 for j in 0..d {
424 deflated[i][j] -= eigenvalue * q[i] * q[j];
425 }
426 }
427 components.push(q);
428 variances.push(eigenvalue);
429 }
430 Some(PcaResult {
431 mean: mean_vec,
432 components,
433 explained_variance: variances,
434 })
435}
436pub fn pca_transform(x: &[f64], result: &PcaResult) -> Vec<f64> {
440 let centred: Vec<f64> = x.iter().zip(&result.mean).map(|(xi, mi)| xi - mi).collect();
441 result
442 .components
443 .iter()
444 .map(|pc| pc.iter().zip(¢red).map(|(pci, ci)| pci * ci).sum())
445 .collect()
446}
447pub fn acf(data: &[f64], max_lag: usize) -> Vec<f64> {
451 let n = data.len();
452 if n < 2 {
453 return vec![1.0];
454 }
455 let m = mean(data);
456 let c0: f64 = data.iter().map(|x| (x - m).powi(2)).sum::<f64>() / n as f64;
457 if c0 < f64::EPSILON {
458 return vec![1.0; max_lag + 1];
459 }
460 (0..=max_lag.min(n - 1))
461 .map(|lag| {
462 if lag == 0 {
463 1.0
464 } else {
465 let ct: f64 = data[..n - lag]
466 .iter()
467 .zip(data[lag..].iter())
468 .map(|(a, b)| (a - m) * (b - m))
469 .sum::<f64>()
470 / n as f64;
471 ct / c0
472 }
473 })
474 .collect()
475}
476pub fn pacf(data: &[f64], max_lag: usize) -> Vec<f64> {
481 let rho = acf(data, max_lag);
482 let m = rho.len();
483 if m < 2 {
484 return vec![1.0];
485 }
486 let mut phi = vec![1.0_f64];
487 for k in 1..m {
488 let prev_order = phi.len() - 1;
489 if prev_order == 0 {
490 phi.push(rho[k]);
491 continue;
492 }
493 let prev_phi: Vec<f64> = phi[1..].to_vec();
494 let num: f64 = rho[k]
495 - prev_phi
496 .iter()
497 .zip(1..=prev_order)
498 .map(|(p, j)| p * rho[k - j])
499 .sum::<f64>();
500 let den: f64 = 1.0
501 - prev_phi
502 .iter()
503 .zip(1..=prev_order)
504 .map(|(p, j)| p * rho[j])
505 .sum::<f64>();
506 let phi_kk = if den.abs() < 1e-30 { 0.0 } else { num / den };
507 phi.push(phi_kk);
508 }
509 phi
510}
511pub fn bootstrap_ci(
521 data: &[f64],
522 statistic: impl Fn(&[f64]) -> f64,
523 n_boot: usize,
524 alpha: f64,
525 rng: &mut StatRng,
526) -> (f64, f64) {
527 let n = data.len();
528 if n == 0 {
529 return (0.0, 0.0);
530 }
531 let mut boot_stats: Vec<f64> = Vec::with_capacity(n_boot);
532 for _ in 0..n_boot {
533 let resample: Vec<f64> = (0..n)
534 .map(|_| {
535 let idx = (rng.next_f64() * n as f64) as usize;
536 data[idx.min(n - 1)]
537 })
538 .collect();
539 boot_stats.push(statistic(&resample));
540 }
541 boot_stats.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
542 let lo_idx = ((alpha / 2.0) * n_boot as f64) as usize;
543 let hi_idx = ((1.0 - alpha / 2.0) * n_boot as f64) as usize;
544 (
545 boot_stats[lo_idx.min(n_boot.saturating_sub(1))],
546 boot_stats[hi_idx.min(n_boot.saturating_sub(1))],
547 )
548}
549pub fn bootstrap_se(
551 data: &[f64],
552 statistic: impl Fn(&[f64]) -> f64,
553 n_boot: usize,
554 rng: &mut StatRng,
555) -> f64 {
556 let n = data.len();
557 if n == 0 {
558 return 0.0;
559 }
560 let boot_stats: Vec<f64> = (0..n_boot)
561 .map(|_| {
562 let resample: Vec<f64> = (0..n)
563 .map(|_| {
564 let idx = (rng.next_f64() * n as f64) as usize;
565 data[idx.min(n - 1)]
566 })
567 .collect();
568 statistic(&resample)
569 })
570 .collect();
571 std_dev(&boot_stats)
572}
573pub fn t_test_two_sample(x: &[f64], y: &[f64]) -> (f64, f64) {
579 if x.len() < 2 || y.len() < 2 {
580 return (0.0, 1.0);
581 }
582 let mx = mean(x);
583 let my = mean(y);
584 let sx2 = variance(x);
585 let sy2 = variance(y);
586 let nx = x.len() as f64;
587 let ny = y.len() as f64;
588 let se = (sx2 / nx + sy2 / ny).sqrt();
589 if se < f64::EPSILON {
590 return (0.0, 1.0);
591 }
592 let t = (mx - my) / se;
593 let p = 2.0 * (1.0 - NormalDistribution::new(0.0, 1.0).cdf(t.abs()));
594 (t, p)
595}
596pub fn mann_whitney_u(x: &[f64], y: &[f64]) -> f64 {
601 let nx = x.len() as f64;
602 let ny = y.len() as f64;
603 if x.is_empty() || y.is_empty() {
604 return 0.0;
605 }
606 let u: f64 = x
607 .iter()
608 .map(|&xi| {
609 y.iter()
610 .map(|&yj| {
611 if xi > yj {
612 1.0
613 } else if (xi - yj).abs() < f64::EPSILON {
614 0.5
615 } else {
616 0.0
617 }
618 })
619 .sum::<f64>()
620 })
621 .sum();
622 let _ = (nx, ny);
623 u
624}
625pub fn wilcoxon_signed_rank(x: &[f64], y: &[f64]) -> f64 {
630 assert_eq!(
631 x.len(),
632 y.len(),
633 "wilcoxon_signed_rank: x and y must have same length"
634 );
635 let diffs: Vec<f64> = x.iter().zip(y.iter()).map(|(xi, yi)| xi - yi).collect();
636 let nonzero: Vec<f64> = diffs
637 .into_iter()
638 .filter(|&d| d.abs() > f64::EPSILON)
639 .collect();
640 if nonzero.is_empty() {
641 return 0.0;
642 }
643 let mut indexed: Vec<(f64, f64)> = nonzero.iter().map(|&d| (d.abs(), d.signum())).collect();
644 indexed.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
645 let w_plus: f64 = indexed
646 .iter()
647 .enumerate()
648 .filter(|(_, (_, sign))| *sign > 0.0)
649 .map(|(rank, _)| rank as f64 + 1.0)
650 .sum();
651 w_plus
652}
653pub fn shapiro_wilk_w(data: &[f64]) -> f64 {
658 let n = data.len();
659 if n < 3 {
660 return 1.0;
661 }
662 let mut sorted = data.to_vec();
663 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
664 let m = mean(&sorted);
665 let s2: f64 = sorted.iter().map(|x| (x - m).powi(2)).sum::<f64>();
666 if s2 < f64::EPSILON {
667 return 1.0;
668 }
669 let half = n / 2;
670 let mut b_sum = 0.0f64;
671 for i in 0..half {
672 let rank = n - 1 - i;
673 let p = (rank as f64 + 1.0 - 0.375) / (n as f64 + 0.25);
674 let a_i = normal_quantile_approx(p);
675 b_sum += a_i * (sorted[rank] - sorted[i]);
676 }
677 (b_sum * b_sum) / s2
678}
679pub(super) fn normal_quantile_approx(p: f64) -> f64 {
681 if p <= 0.0 || p >= 1.0 {
682 return 0.0;
683 }
684 let pp = if p < 0.5 { p } else { 1.0 - p };
685 let t = (-2.0 * pp.ln()).sqrt();
686 let c0 = 2.515517_f64;
687 let c1 = 0.802853_f64;
688 let c2 = 0.010328_f64;
689 let d1 = 1.432788_f64;
690 let d2 = 0.189269_f64;
691 let d3 = 0.001308_f64;
692 let x = t - (c0 + c1 * t + c2 * t * t) / (1.0 + d1 * t + d2 * t * t + d3 * t * t * t);
693 if p < 0.5 { -x } else { x }
694}
695pub(super) fn erf_approx(x: f64) -> f64 {
697 pub(super) const P: f64 = 0.3275911;
698 pub(super) const A1: f64 = 0.254829592;
699 pub(super) const A2: f64 = -0.284496736;
700 pub(super) const A3: f64 = 1.421413741;
701 pub(super) const A4: f64 = -1.453152027;
702 pub(super) const A5: f64 = 1.061405429;
703 let sign = if x < 0.0 { -1.0 } else { 1.0 };
704 let x = x.abs();
705 let t = 1.0 / (1.0 + P * x);
706 let poly = t * (A1 + t * (A2 + t * (A3 + t * (A4 + t * A5))));
707 sign * (1.0 - poly * (-x * x).exp())
708}
709pub(super) fn log_gamma_stirling(z: f64) -> f64 {
713 if z < 7.0 {
714 return log_gamma_stirling(z + 1.0) - z.ln();
715 }
716 let z = z - 1.0;
717 0.5 * (std::f64::consts::TAU / z).ln()
718 + z * ((z + 1.0 / (12.0 * z) - 1.0 / (360.0 * z.powi(3))) / std::f64::consts::E).ln()
719}
720pub(super) fn log_factorial(k: u64) -> f64 {
723 if k == 0 || k == 1 {
724 return 0.0;
725 }
726 if k <= 20 {
727 (1..=k).map(|i| (i as f64).ln()).sum()
728 } else {
729 log_gamma_stirling(k as f64 + 1.0)
730 }
731}
732pub fn quartiles(data: &[f64]) -> (f64, f64, f64) {
737 if data.is_empty() {
738 return (0.0, 0.0, 0.0);
739 }
740 (
741 percentile(data, 25.0),
742 percentile(data, 50.0),
743 percentile(data, 75.0),
744 )
745}
746pub fn iqr(data: &[f64]) -> f64 {
750 let (q1, _, q3) = quartiles(data);
751 q3 - q1
752}
753pub fn spearman_correlation(x: &[f64], y: &[f64]) -> f64 {
759 assert_eq!(
760 x.len(),
761 y.len(),
762 "spearman_correlation: x and y must be same length"
763 );
764 let n = x.len();
765 if n < 2 {
766 return 0.0;
767 }
768 let rank = |data: &[f64]| -> Vec<f64> {
769 let mut indexed: Vec<(usize, f64)> = data.iter().cloned().enumerate().collect();
770 indexed.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
771 let mut ranks = vec![0.0_f64; data.len()];
772 let mut i = 0;
773 while i < n {
774 let mut j = i;
775 while j + 1 < n && (indexed[j + 1].1 - indexed[i].1).abs() < f64::EPSILON {
776 j += 1;
777 }
778 let avg_rank = (i + j) as f64 / 2.0 + 1.0;
779 for k in i..=j {
780 ranks[indexed[k].0] = avg_rank;
781 }
782 i = j + 1;
783 }
784 ranks
785 };
786 let rx = rank(x);
787 let ry = rank(y);
788 correlation(&rx, &ry)
789}
790pub fn linear_regression(x: &[f64], y: &[f64]) -> (f64, f64, f64) {
796 assert_eq!(
797 x.len(),
798 y.len(),
799 "linear_regression: x and y must be same length"
800 );
801 let n = x.len();
802 if n < 2 {
803 return (0.0, 0.0, 0.0);
804 }
805 let mx = mean(x);
806 let my = mean(y);
807 let sxx: f64 = x.iter().map(|xi| (xi - mx).powi(2)).sum();
808 let sxy: f64 = x
809 .iter()
810 .zip(y.iter())
811 .map(|(xi, yi)| (xi - mx) * (yi - my))
812 .sum();
813 if sxx.abs() < f64::EPSILON {
814 return (0.0, my, 0.0);
815 }
816 let slope = sxy / sxx;
817 let intercept = my - slope * mx;
818 let ss_res: f64 = x
819 .iter()
820 .zip(y.iter())
821 .map(|(xi, yi)| (yi - (slope * xi + intercept)).powi(2))
822 .sum();
823 let ss_tot: f64 = y.iter().map(|yi| (yi - my).powi(2)).sum();
824 let r_squared = if ss_tot < f64::EPSILON {
825 1.0
826 } else {
827 1.0 - ss_res / ss_tot
828 };
829 (slope, intercept, r_squared)
830}
831pub fn ks_test_one_sample(data: &[f64], cdf: impl Fn(f64) -> f64) -> f64 {
838 ks_statistic(data, cdf)
839}
840pub fn ks_test_two_sample(x: &[f64], y: &[f64]) -> f64 {
845 if x.is_empty() || y.is_empty() {
846 return 0.0;
847 }
848 let mut xs = x.to_vec();
849 let mut ys = y.to_vec();
850 xs.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
851 ys.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
852 let nx = xs.len() as f64;
853 let ny = ys.len() as f64;
854 let mut all: Vec<f64> = xs.iter().chain(ys.iter()).cloned().collect();
855 all.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
856 all.dedup_by(|a, b| (*a - *b).abs() < f64::EPSILON);
857 let mut d_max = 0.0_f64;
858 for &t in &all {
859 let fx = xs.partition_point(|&v| v <= t) as f64 / nx;
860 let fy = ys.partition_point(|&v| v <= t) as f64 / ny;
861 let d = (fx - fy).abs();
862 if d > d_max {
863 d_max = d;
864 }
865 }
866 d_max
867}
868pub fn chi_squared_statistic(observed: &[u64], expected: &[f64]) -> f64 {
876 assert_eq!(
877 observed.len(),
878 expected.len(),
879 "chi_squared_statistic: length mismatch"
880 );
881 observed
882 .iter()
883 .zip(expected.iter())
884 .filter(|&(_, e)| *e > 1e-10)
885 .map(|(o, e)| {
886 let diff = *o as f64 - *e;
887 diff * diff / *e
888 })
889 .sum()
890}
891pub fn sample_skewness(data: &[f64]) -> f64 {
894 skewness(data)
895}
896pub fn sample_kurtosis(data: &[f64]) -> f64 {
899 kurtosis(data)
900}
901pub fn pearson_r(x: &[f64], y: &[f64]) -> f64 {
906 correlation(x, y)
907}
908pub fn t_test_one_sample_full(data: &[f64], mu0: f64) -> (f64, f64) {
914 t_test_one_sample(data, mu0)
915}
916pub fn welch_t_test(x: &[f64], y: &[f64]) -> (f64, f64) {
923 t_test_two_sample(x, y)
924}
925pub fn chi_squared_gof(observed: &[f64], expected: &[f64]) -> (f64, usize) {
931 let chi2 = chi_squared_test(observed, expected);
932 let df = observed.len().saturating_sub(1);
933 (chi2, df)
934}
935#[allow(dead_code)]
940pub fn mad(data: &[f64]) -> f64 {
941 if data.is_empty() {
942 return 0.0;
943 }
944 let m = median(data);
945 let mut deviations: Vec<f64> = data.iter().map(|&x| (x - m).abs()).collect();
946 deviations.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
947 let n = deviations.len();
948 if n % 2 == 1 {
949 deviations[n / 2]
950 } else {
951 (deviations[n / 2 - 1] + deviations[n / 2]) / 2.0
952 }
953}
954#[allow(dead_code)]
962pub fn huber_m_estimator(data: &[f64], k: f64, max_iter: usize) -> f64 {
963 if data.is_empty() {
964 return 0.0;
965 }
966 let mut mu = median(data);
967 let sigma_scale = mad(data) * 1.4826;
968 let sigma = if sigma_scale < 1e-10 {
969 1.0
970 } else {
971 sigma_scale
972 };
973 for _ in 0..max_iter {
974 let weights: Vec<f64> = data
975 .iter()
976 .map(|&x| {
977 let r = (x - mu) / sigma;
978 if r.abs() <= k { 1.0 } else { k / r.abs() }
979 })
980 .collect();
981 let sum_w: f64 = weights.iter().sum();
982 if sum_w < 1e-30 {
983 break;
984 }
985 let new_mu: f64 = data
986 .iter()
987 .zip(weights.iter())
988 .map(|(&x, &w)| x * w)
989 .sum::<f64>()
990 / sum_w;
991 if (new_mu - mu).abs() < 1e-8 {
992 mu = new_mu;
993 break;
994 }
995 mu = new_mu;
996 }
997 mu
998}
999#[allow(dead_code)]
1004pub fn tukey_biweight_estimator(data: &[f64], c: f64, max_iter: usize) -> f64 {
1005 if data.is_empty() {
1006 return 0.0;
1007 }
1008 let mut mu = median(data);
1009 let sigma_scale = mad(data) * 1.4826;
1010 let sigma = if sigma_scale < 1e-10 {
1011 1.0
1012 } else {
1013 sigma_scale
1014 };
1015 for _ in 0..max_iter {
1016 let weights: Vec<f64> = data
1017 .iter()
1018 .map(|&x| {
1019 let u = (x - mu) / (c * sigma);
1020 if u.abs() >= 1.0 {
1021 0.0
1022 } else {
1023 let t = 1.0 - u * u;
1024 t * t
1025 }
1026 })
1027 .collect();
1028 let sum_w: f64 = weights.iter().sum();
1029 if sum_w < 1e-30 {
1030 break;
1031 }
1032 let new_mu = data
1033 .iter()
1034 .zip(weights.iter())
1035 .map(|(&x, &w)| x * w)
1036 .sum::<f64>()
1037 / sum_w;
1038 if (new_mu - mu).abs() < 1e-8 {
1039 mu = new_mu;
1040 break;
1041 }
1042 mu = new_mu;
1043 }
1044 mu
1045}
1046#[allow(dead_code)]
1050pub fn empirical_cdf(data: &[f64]) -> Vec<(f64, f64)> {
1051 if data.is_empty() {
1052 return vec![];
1053 }
1054 let n = data.len() as f64;
1055 let mut sorted = data.to_vec();
1056 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1057 sorted
1058 .iter()
1059 .enumerate()
1060 .map(|(i, &x)| (x, (i as f64 + 1.0) / n))
1061 .collect()
1062}
1063#[allow(dead_code)]
1067pub fn ecdf_at(data: &[f64], x: f64) -> f64 {
1068 if data.is_empty() {
1069 return 0.0;
1070 }
1071 let count = data.iter().filter(|&&v| v <= x).count();
1072 count as f64 / data.len() as f64
1073}
1074#[allow(dead_code)]
1079pub fn anderson_darling_statistic(data: &[f64], cdf: impl Fn(f64) -> f64) -> f64 {
1080 let n = data.len();
1081 if n < 2 {
1082 return 0.0;
1083 }
1084 let mut sorted = data.to_vec();
1085 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1086 let nf = n as f64;
1087 let s: f64 = sorted
1088 .iter()
1089 .enumerate()
1090 .map(|(i, &x)| {
1091 let fi = cdf(x).clamp(1e-15, 1.0 - 1e-15);
1092 let fn_i = cdf(*sorted
1093 .iter()
1094 .rev()
1095 .nth(i)
1096 .expect("sorted has enough elements"))
1097 .clamp(1e-15, 1.0 - 1e-15);
1098 (2.0 * (i as f64 + 1.0) - 1.0) * (fi.ln() + fn_i.ln())
1099 })
1100 .sum();
1101 -nf - s / nf
1102}
1103#[allow(dead_code)]
1109pub fn kruskal_wallis_h(groups: &[&[f64]]) -> f64 {
1110 let k = groups.len();
1111 if k < 2 {
1112 return 0.0;
1113 }
1114 let mut all: Vec<(f64, usize)> = groups
1115 .iter()
1116 .enumerate()
1117 .flat_map(|(g, &data)| data.iter().map(move |&x| (x, g)))
1118 .collect();
1119 all.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
1120 let n = all.len() as f64;
1121 let mut ranks = vec![0.0_f64; all.len()];
1122 let mut i = 0;
1123 while i < all.len() {
1124 let mut j = i;
1125 while j + 1 < all.len() && (all[j + 1].0 - all[i].0).abs() < f64::EPSILON {
1126 j += 1;
1127 }
1128 let avg_rank = (i + j) as f64 / 2.0 + 1.0;
1129 for r in &mut ranks[i..=j] {
1130 *r = avg_rank;
1131 }
1132 i = j + 1;
1133 }
1134 let mut rank_sums = vec![0.0_f64; k];
1135 let mut group_sizes = vec![0usize; k];
1136 for (r, (_, g)) in ranks.iter().zip(all.iter()) {
1137 rank_sums[*g] += *r;
1138 group_sizes[*g] += 1;
1139 }
1140 let h_numerator: f64 = rank_sums
1141 .iter()
1142 .zip(group_sizes.iter())
1143 .filter(|&(_, &sz)| sz > 0)
1144 .map(|(&rs, &sz)| rs * rs / sz as f64)
1145 .sum();
1146 12.0 / (n * (n + 1.0)) * h_numerator - 3.0 * (n + 1.0)
1147}
1148#[cfg(test)]
1149mod tests {
1150 use super::*;
1151 use crate::ExponentialDistribution;
1152 use crate::KernelDensityEstimate;
1153 use crate::KernelDensityEstimate2D;
1154 use crate::PoissonDistribution;
1155 use crate::SlidingWindowStats;
1156 use crate::WelfordOnline;
1157 #[test]
1158 fn test_mean_known() {
1159 let data = [2.0, 4.0, 6.0, 8.0];
1160 assert!((mean(&data) - 5.0).abs() < 1e-12);
1161 }
1162 #[test]
1163 fn test_variance_known() {
1164 let data = [1.0, 3.0, 5.0];
1165 assert!((variance(&data) - 4.0).abs() < 1e-12);
1166 }
1167 #[test]
1168 fn test_median_odd() {
1169 let data = [3.0, 1.0, 4.0, 1.0, 5.0];
1170 assert!((median(&data) - 3.0).abs() < 1e-12);
1171 }
1172 #[test]
1173 fn test_median_even() {
1174 let data = [1.0, 2.0, 3.0, 4.0];
1175 assert!((median(&data) - 2.5).abs() < 1e-12);
1176 }
1177 #[test]
1178 fn test_normal_pdf_at_mean() {
1179 let nd = NormalDistribution::new(0.0, 1.0);
1180 assert!(nd.pdf(0.0) > 0.0);
1181 assert!((nd.pdf(0.0) - 0.3989422802).abs() < 1e-8);
1182 }
1183 #[test]
1184 fn test_normal_cdf_at_mean() {
1185 let nd = NormalDistribution::new(0.0, 1.0);
1186 assert!((nd.cdf(0.0) - 0.5).abs() < 1e-6);
1187 }
1188 #[test]
1189 fn test_exponential_sample_mean() {
1190 let dist = ExponentialDistribution::new(2.0);
1191 let mut rng = StatRng::new(42);
1192 let samples: Vec<f64> = (0..10_000).map(|_| dist.sample(&mut rng)).collect();
1193 let m = mean(&samples);
1194 assert!(
1195 (m - 0.5).abs() < 0.05,
1196 "exponential mean {m} not close to 0.5"
1197 );
1198 }
1199 #[test]
1200 fn test_poisson_pmf_sums_to_one() {
1201 let dist = PoissonDistribution::new(3.0);
1202 let total: f64 = (0u64..50).map(|k| dist.pmf(k)).sum();
1203 assert!((total - 1.0).abs() < 1e-6);
1204 }
1205 #[test]
1206 fn test_kde_positive_at_data_point() {
1207 let data = vec![1.0, 2.0, 3.0];
1208 let kde = KernelDensityEstimate::new(data, 0.5);
1209 assert!(kde.evaluate(2.0) > 0.0);
1210 }
1211 #[test]
1212 fn test_histogram_counts_sum() {
1213 let data: Vec<f64> = (0..100).map(|i| i as f64).collect();
1214 let bins = histogram(&data, 10);
1215 let total: usize = bins.iter().map(|(_, _, c)| c).sum();
1216 assert_eq!(total, data.len());
1217 }
1218 #[test]
1219 fn test_correlation_perfect() {
1220 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1221 let y = [2.0, 4.0, 6.0, 8.0, 10.0];
1222 assert!((correlation(&x, &y) - 1.0).abs() < 1e-12);
1223 }
1224 #[test]
1225 fn test_mean_empty() {
1226 assert_eq!(mean(&[]), 0.0);
1227 }
1228 #[test]
1229 fn test_variance_single() {
1230 assert_eq!(variance(&[42.0]), 0.0);
1231 }
1232 #[test]
1233 fn test_std_dev() {
1234 let data = [1.0, 3.0, 5.0];
1235 assert!((std_dev(&data) - 2.0).abs() < 1e-12);
1236 }
1237 #[test]
1238 fn test_covariance_perfect() {
1239 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1240 let y = [2.0, 4.0, 6.0, 8.0, 10.0];
1241 assert!((covariance(&x, &y) - 5.0).abs() < 1e-12);
1242 }
1243 #[test]
1244 fn test_pearson_constant() {
1245 let x = [1.0, 1.0, 1.0];
1246 let y = [1.0, 2.0, 3.0];
1247 assert_eq!(pearson_correlation(&x, &y), 0.0);
1248 }
1249 #[test]
1250 fn test_histogram_empty() {
1251 assert!(histogram(&[], 5).is_empty());
1252 }
1253 #[test]
1254 fn test_histogram_zero_bins() {
1255 let data = [1.0, 2.0, 3.0];
1256 assert!(histogram(&data, 0).is_empty());
1257 }
1258 #[test]
1259 fn test_running_average() {
1260 let mut avg = running_average(3);
1261 assert!((avg(1.0) - 1.0).abs() < 1e-12);
1262 assert!((avg(2.0) - 1.5).abs() < 1e-12);
1263 assert!((avg(3.0) - 2.0).abs() < 1e-12);
1264 assert!((avg(4.0) - 3.0).abs() < 1e-12);
1265 }
1266 #[test]
1267 fn test_running_average_zero_window() {
1268 let mut avg = running_average(0);
1269 assert_eq!(avg(5.0), 0.0);
1270 }
1271 #[test]
1272 fn test_block_average_consistent_with_mean() {
1273 let data: Vec<f64> = (1..=20).map(|i| i as f64).collect();
1274 let (grand_mean, _) = block_average(&data, 4);
1275 assert!((grand_mean - mean(&data)).abs() < 1e-10);
1276 }
1277 #[test]
1278 fn test_block_average_empty() {
1279 assert_eq!(block_average(&[], 4), (0.0, 0.0));
1280 }
1281 #[test]
1282 fn test_autocorrelation_time_iid() {
1283 let data: Vec<f64> = (0..200)
1284 .map(|i| if i % 2 == 0 { 1.0 } else { -1.0 })
1285 .collect();
1286 let tau = autocorrelation_time(&data, 10);
1287 assert!((tau - 1.0).abs() < 1e-10);
1288 }
1289 #[test]
1290 fn test_boltzmann_factor_infinite_temp() {
1291 let result = boltzmann_factor(100.0, 1e300, 1.0);
1292 assert!((result - 1.0).abs() < 1e-6);
1293 }
1294 #[test]
1295 fn test_boltzmann_factor_zero_energy() {
1296 assert!((boltzmann_factor(0.0, 300.0, 1.38e-23) - 1.0).abs() < 1e-12);
1297 }
1298 #[test]
1299 fn test_partition_function() {
1300 let energies = [0.0, 1.0];
1301 let z = partition_function(&energies, 1.0, 1.0);
1302 let expected = 1.0 + (-1.0_f64).exp();
1303 assert!((z - expected).abs() < 1e-12);
1304 }
1305 #[test]
1306 fn test_free_energy_from_partition() {
1307 let z = 1.0_f64.exp();
1308 let f = free_energy_from_partition(z, 1.0, 1.0);
1309 assert!((f - (-1.0)).abs() < 1e-12);
1310 }
1311 #[test]
1312 fn test_maxwell_boltzmann_speed() {
1313 assert!((maxwell_boltzmann_speed(2.0, 1.0, 1.0) - 1.0).abs() < 1e-12);
1314 }
1315 #[test]
1316 fn test_correlation_matrix_identity_inputs() {
1317 let data: Vec<Vec<f64>> = (0..10).map(|i| vec![i as f64, i as f64]).collect();
1318 let cm = correlation_matrix(&data);
1319 assert_eq!(cm.len(), 2);
1320 assert!(
1321 (cm[0][1] - 1.0).abs() < 1e-8,
1322 "identical variables should correlate 1"
1323 );
1324 }
1325 #[test]
1326 fn test_correlation_matrix_orthogonal() {
1327 let data: Vec<Vec<f64>> = (0..5)
1328 .map(|i| vec![i as f64, if i % 2 == 0 { 1.0 } else { -1.0 }])
1329 .collect();
1330 let cm = correlation_matrix(&data);
1331 assert!(cm[0][0] - 1.0 < 1e-10);
1332 assert!(cm[1][1] - 1.0 < 1e-10);
1333 }
1334 #[test]
1335 fn test_covariance_matrix_diagonal() {
1336 let data: Vec<Vec<f64>> = vec![vec![1.0, 10.0], vec![2.0, 10.0], vec![3.0, 10.0]];
1337 let cov = covariance_matrix(&data);
1338 assert!(cov[0][0] > 0.0, "variance of first var should be positive");
1339 assert!(
1340 (cov[1][1]).abs() < 1e-10,
1341 "constant second var should have zero variance"
1342 );
1343 assert!(
1344 (cov[0][1]).abs() < 1e-10,
1345 "covariance with constant should be 0"
1346 );
1347 }
1348 #[test]
1349 fn test_pca_returns_components() {
1350 let data: Vec<Vec<f64>> = (0..20).map(|i| vec![i as f64, 2.0 * i as f64]).collect();
1351 let result = pca(&data, 2).expect("PCA should succeed");
1352 assert_eq!(result.components.len(), 2);
1353 assert_eq!(result.mean.len(), 2);
1354 assert_eq!(result.explained_variance.len(), 2);
1355 assert!(result.explained_variance[0] >= result.explained_variance[1] - 1e-6);
1356 }
1357 #[test]
1358 fn test_pca_transform_shape() {
1359 let data: Vec<Vec<f64>> = (0..10).map(|i| vec![i as f64, -i as f64, 0.0]).collect();
1360 let result = pca(&data, 2).unwrap();
1361 let scores = pca_transform(&[1.0, 2.0, 3.0], &result);
1362 assert_eq!(scores.len(), 2);
1363 }
1364 #[test]
1365 fn test_acf_lag0_is_one() {
1366 let data: Vec<f64> = (0..20).map(|i| i as f64).collect();
1367 let a = acf(&data, 5);
1368 assert!((a[0] - 1.0).abs() < 1e-12, "ACF lag 0 should be 1");
1369 }
1370 #[test]
1371 fn test_acf_iid_near_zero_high_lag() {
1372 let data: Vec<f64> = (0..100)
1373 .map(|i| if i % 2 == 0 { 1.0 } else { -1.0 })
1374 .collect();
1375 let a = acf(&data, 2);
1376 assert_eq!(a.len(), 3);
1377 assert!(
1378 a[1] < -0.9,
1379 "ACF lag 1 for alternating series should be near -1, got {}",
1380 a[1]
1381 );
1382 }
1383 #[test]
1384 fn test_acf_constant_returns_ones() {
1385 let data = vec![5.0; 10];
1386 let a = acf(&data, 3);
1387 for v in &a {
1388 assert!((*v - 1.0).abs() < 1e-10 || *v == 1.0);
1389 }
1390 }
1391 #[test]
1392 fn test_pacf_lag0_is_one() {
1393 let data: Vec<f64> = (0..20).map(|i| i as f64).collect();
1394 let p = pacf(&data, 3);
1395 assert!((p[0] - 1.0).abs() < 1e-12, "PACF lag 0 should be 1");
1396 }
1397 #[test]
1398 fn test_pacf_length() {
1399 let data: Vec<f64> = (0..30).map(|i| i as f64).collect();
1400 let p = pacf(&data, 5);
1401 assert_eq!(p.len(), 6, "PACF should have max_lag+1 entries");
1402 }
1403 #[test]
1404 fn test_bootstrap_ci_contains_true_mean() {
1405 let data: Vec<f64> = (0..50).map(|i| i as f64).collect();
1406 let true_mean = mean(&data);
1407 let mut rng = StatRng::new(42);
1408 let (lo, hi) = bootstrap_ci(&data, mean, 1000, 0.05, &mut rng);
1409 assert!(
1410 lo <= true_mean && true_mean <= hi,
1411 "bootstrap CI [{lo}, {hi}] should contain true mean {true_mean}"
1412 );
1413 }
1414 #[test]
1415 fn test_bootstrap_se_positive() {
1416 let data: Vec<f64> = (0..30).map(|i| i as f64).collect();
1417 let mut rng = StatRng::new(123);
1418 let se = bootstrap_se(&data, mean, 500, &mut rng);
1419 assert!(se > 0.0, "bootstrap SE should be positive");
1420 }
1421 #[test]
1422 fn test_bootstrap_ci_empty() {
1423 let mut rng = StatRng::new(1);
1424 let (lo, hi) = bootstrap_ci(&[], mean, 100, 0.05, &mut rng);
1425 assert_eq!((lo, hi), (0.0, 0.0));
1426 }
1427 #[test]
1428 fn test_kde2d_positive_at_data_point() {
1429 let data = vec![[0.0, 0.0], [1.0, 1.0], [2.0, 2.0]];
1430 let kde = KernelDensityEstimate2D::new(data, 0.5, 0.5);
1431 assert!(
1432 kde.evaluate(1.0, 1.0) > 0.0,
1433 "KDE should be positive at data point"
1434 );
1435 }
1436 #[test]
1437 fn test_kde2d_optimal_bandwidths() {
1438 let data: Vec<[f64; 2]> = (0..20).map(|i| [i as f64, i as f64 * 2.0]).collect();
1439 let (bwx, bwy) = KernelDensityEstimate2D::optimal_bandwidths(&data);
1440 assert!(bwx > 0.0 && bwy > 0.0, "bandwidths should be positive");
1441 }
1442 #[test]
1443 fn test_kde2d_empty_returns_zero() {
1444 let kde = KernelDensityEstimate2D::new(vec![], 0.5, 0.5);
1445 assert_eq!(kde.evaluate(0.0, 0.0), 0.0);
1446 }
1447 #[test]
1448 fn test_t_test_two_sample_equal_means() {
1449 let x = vec![1.0, 2.0, 3.0, 4.0, 5.0];
1450 let y = vec![1.0, 2.0, 3.0, 4.0, 5.0];
1451 let (t, p) = t_test_two_sample(&x, &y);
1452 assert!((t).abs() < 1e-10, "t should be 0 for equal samples");
1453 assert!(p > 0.9, "p should be high for equal samples, got {p}");
1454 }
1455 #[test]
1456 fn test_t_test_two_sample_different_means() {
1457 let x: Vec<f64> = (0..20).map(|i| i as f64).collect();
1458 let y: Vec<f64> = (100..120).map(|i| i as f64).collect();
1459 let (t, p) = t_test_two_sample(&x, &y);
1460 assert!(
1461 t < -1.0,
1462 "t should be very negative for well-separated samples"
1463 );
1464 assert!(p < 0.01, "p should be very small, got {p}");
1465 }
1466 #[test]
1467 fn test_mann_whitney_u_symmetric() {
1468 let x = vec![1.0, 2.0, 3.0];
1469 let y = vec![4.0, 5.0, 6.0];
1470 let u_xy = mann_whitney_u(&x, &y);
1471 let u_yx = mann_whitney_u(&y, &x);
1472 assert!(
1473 (u_xy + u_yx - 9.0).abs() < 1e-10,
1474 "U_xy + U_yx should equal nx*ny=9"
1475 );
1476 }
1477 #[test]
1478 fn test_wilcoxon_signed_rank_positive_diffs() {
1479 let x = vec![2.0, 3.0, 4.0];
1480 let y = vec![1.0, 1.0, 1.0];
1481 let w = wilcoxon_signed_rank(&x, &y);
1482 assert!(
1483 (w - 6.0).abs() < 1e-10,
1484 "W+ = 6 for all positive diffs, got {w}"
1485 );
1486 }
1487 #[test]
1488 fn test_shapiro_wilk_w_normal_data() {
1489 let data = vec![-1.5, -1.2, -0.8, -0.4, -0.1, 0.1, 0.4, 0.8, 1.2, 1.5];
1490 let w = shapiro_wilk_w(&data);
1491 assert!(w > 0.0, "W should be positive for any data, got {w}");
1492 assert!(w.is_finite(), "W should be finite, got {w}");
1493 }
1494 #[test]
1495 fn test_shapiro_wilk_w_constant_data() {
1496 let data = vec![3.0; 5];
1497 let w = shapiro_wilk_w(&data);
1498 assert_eq!(w, 1.0);
1499 }
1500 #[test]
1501 fn test_quartiles_known() {
1502 let data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0];
1503 let (q1, q2, q3) = quartiles(&data);
1504 assert!((q2 - 4.0).abs() < 1e-10, "median should be 4, got {q2}");
1505 assert!(q1 < q2, "Q1 < Q2");
1506 assert!(q2 < q3, "Q2 < Q3");
1507 }
1508 #[test]
1509 fn test_quartiles_empty() {
1510 assert_eq!(quartiles(&[]), (0.0, 0.0, 0.0));
1511 }
1512 #[test]
1513 fn test_iqr_known() {
1514 let data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0];
1515 let v = iqr(&data);
1516 assert!(v > 0.0, "IQR should be positive");
1517 }
1518 #[test]
1519 fn test_iqr_constant() {
1520 let data = vec![5.0; 10];
1521 assert!((iqr(&data)).abs() < 1e-10);
1522 }
1523 #[test]
1524 fn test_spearman_perfect_positive() {
1525 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1526 let y = [2.0, 4.0, 6.0, 8.0, 10.0];
1527 assert!((spearman_correlation(&x, &y) - 1.0).abs() < 1e-10);
1528 }
1529 #[test]
1530 fn test_spearman_perfect_negative() {
1531 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1532 let y = [5.0, 4.0, 3.0, 2.0, 1.0];
1533 assert!((spearman_correlation(&x, &y) + 1.0).abs() < 1e-10);
1534 }
1535 #[test]
1536 fn test_spearman_with_ties() {
1537 let x = [1.0, 1.0, 2.0, 3.0];
1538 let y = [1.0, 2.0, 3.0, 4.0];
1539 let r = spearman_correlation(&x, &y);
1540 assert!(
1541 r > 0.0 && r <= 1.0,
1542 "Spearman with ties should be in (0,1], got {r}"
1543 );
1544 }
1545 #[test]
1546 fn test_linear_regression_perfect_fit() {
1547 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1548 let y = [2.0, 4.0, 6.0, 8.0, 10.0];
1549 let (slope, intercept, r2) = linear_regression(&x, &y);
1550 assert!(
1551 (slope - 2.0).abs() < 1e-10,
1552 "slope should be 2, got {slope}"
1553 );
1554 assert!(
1555 intercept.abs() < 1e-10,
1556 "intercept should be 0, got {intercept}"
1557 );
1558 assert!((r2 - 1.0).abs() < 1e-10, "R2 should be 1, got {r2}");
1559 }
1560 #[test]
1561 fn test_linear_regression_horizontal() {
1562 let x = [1.0, 2.0, 3.0];
1563 let y = [5.0, 5.0, 5.0];
1564 let (slope, intercept, _r2) = linear_regression(&x, &y);
1565 assert!(slope.abs() < 1e-10, "slope should be 0 for constant y");
1566 assert!((intercept - 5.0).abs() < 1e-10, "intercept should be 5");
1567 }
1568 #[test]
1569 fn test_linear_regression_r2_nonnegative() {
1570 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1571 let y = [1.0, 4.0, 9.0, 16.0, 25.0];
1572 let (_, _, r2) = linear_regression(&x, &y);
1573 assert!((0.0..=1.0).contains(&r2), "R2 should be in [0,1], got {r2}");
1574 }
1575 #[test]
1576 fn test_ks_one_sample_uniform() {
1577 let data: Vec<f64> = (0..=10).map(|i| i as f64 / 10.0).collect();
1578 let d = ks_test_one_sample(&data, |x| x.clamp(0.0, 1.0));
1579 assert!(
1580 d < 0.2,
1581 "KS stat for uniform sample vs uniform CDF should be small, got {d}"
1582 );
1583 }
1584 #[test]
1585 fn test_ks_two_sample_identical() {
1586 let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
1587 let d = ks_test_two_sample(&data, &data);
1588 assert!(d < 1e-10, "KS2 of identical samples should be 0, got {d}");
1589 }
1590 #[test]
1591 fn test_ks_two_sample_different() {
1592 let x = vec![0.0, 0.1, 0.2, 0.3, 0.4];
1593 let y = vec![0.6, 0.7, 0.8, 0.9, 1.0];
1594 let d = ks_test_two_sample(&x, &y);
1595 assert!(
1596 d > 0.5,
1597 "KS2 for well-separated samples should be large, got {d}"
1598 );
1599 }
1600 #[test]
1601 fn test_chi_squared_statistic_uniform() {
1602 let observed = [10u64; 5];
1603 let expected = [10.0_f64; 5];
1604 let chi2 = chi_squared_statistic(&observed, &expected);
1605 assert!(
1606 chi2.abs() < 1e-10,
1607 "chi2 should be 0 for perfect fit, got {chi2}"
1608 );
1609 }
1610 #[test]
1611 fn test_chi_squared_statistic_deviation() {
1612 let observed = [20u64, 5];
1613 let expected = [12.5_f64, 12.5];
1614 let chi2 = chi_squared_statistic(&observed, &expected);
1615 assert!(chi2 > 0.0, "chi2 should be positive for deviating counts");
1616 }
1617 #[test]
1618 fn test_chi_squared_gof_df() {
1619 let observed = [10.0, 20.0, 30.0];
1620 let expected = [20.0, 20.0, 20.0];
1621 let (_chi2, df) = chi_squared_gof(&observed, &expected);
1622 assert_eq!(df, 2, "df = n - 1 = 2");
1623 }
1624 #[test]
1625 fn test_welford_online_mean() {
1626 let data = [1.0, 2.0, 3.0, 4.0, 5.0];
1627 let mut w = WelfordOnline::new();
1628 for &x in &data {
1629 w.update(x);
1630 }
1631 assert!(
1632 (w.mean - 3.0).abs() < 1e-12,
1633 "mean should be 3, got {}",
1634 w.mean
1635 );
1636 }
1637 #[test]
1638 fn test_welford_online_variance() {
1639 let data = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
1640 let mut w = WelfordOnline::new();
1641 for &x in &data {
1642 w.update(x);
1643 }
1644 let expected_var = variance(&data);
1645 assert!(
1646 (w.variance() - expected_var).abs() < 1e-10,
1647 "Welford variance {:.6} should match batch variance {:.6}",
1648 w.variance(),
1649 expected_var
1650 );
1651 }
1652 #[test]
1653 fn test_welford_online_single_observation() {
1654 let mut w = WelfordOnline::new();
1655 w.update(42.0);
1656 assert!((w.mean - 42.0).abs() < 1e-12);
1657 assert!(
1658 w.variance().abs() < 1e-12,
1659 "variance of single obs should be 0"
1660 );
1661 }
1662 #[test]
1663 fn test_welford_online_population_variance() {
1664 let data = [1.0, 2.0, 3.0];
1665 let mut w = WelfordOnline::new();
1666 for &x in &data {
1667 w.update(x);
1668 }
1669 let pop_var = w.population_variance();
1670 assert!(
1671 (pop_var - 2.0 / 3.0).abs() < 1e-10,
1672 "pop var should be 2/3, got {pop_var}"
1673 );
1674 }
1675 #[test]
1676 fn test_welford_online_empty() {
1677 let w = WelfordOnline::new();
1678 assert_eq!(w.n, 0);
1679 assert_eq!(w.variance(), 0.0);
1680 assert_eq!(w.population_variance(), 0.0);
1681 }
1682 #[test]
1683 fn test_sliding_window_mean() {
1684 let mut sw = SlidingWindowStats::new(3);
1685 sw.push(1.0);
1686 sw.push(2.0);
1687 sw.push(3.0);
1688 assert!((sw.mean() - 2.0).abs() < 1e-12);
1689 sw.push(4.0);
1690 assert!((sw.mean() - 3.0).abs() < 1e-12);
1691 }
1692 #[test]
1693 fn test_sliding_window_variance() {
1694 let mut sw = SlidingWindowStats::new(4);
1695 for x in [2.0, 4.0, 6.0, 8.0] {
1696 sw.push(x);
1697 }
1698 let expected = variance(&[2.0, 4.0, 6.0, 8.0]);
1699 assert!((sw.variance() - expected).abs() < 1e-10);
1700 }
1701 #[test]
1702 fn test_sliding_window_empty() {
1703 let sw = SlidingWindowStats::new(5);
1704 assert!(sw.is_empty());
1705 assert_eq!(sw.mean(), 0.0);
1706 }
1707 #[test]
1708 fn test_pearson_r_alias() {
1709 let x = [1.0, 2.0, 3.0];
1710 let y = [3.0, 2.0, 1.0];
1711 assert!((pearson_r(&x, &y) + 1.0).abs() < 1e-10);
1712 }
1713 #[test]
1714 fn test_sample_skewness_symmetric() {
1715 let data = [-2.0, -1.0, 0.0, 1.0, 2.0];
1716 let s = sample_skewness(&data);
1717 assert!(
1718 s.abs() < 1e-10,
1719 "symmetric data should have ~0 skewness, got {s}"
1720 );
1721 }
1722 #[test]
1723 fn test_sample_kurtosis_normal_approx() {
1724 let data: Vec<f64> = (0..50).map(|i| i as f64 - 25.0).collect();
1725 let k = sample_kurtosis(&data);
1726 assert!(k.is_finite(), "kurtosis should be finite");
1727 }
1728 #[test]
1729 fn test_welch_t_test_same_distribution() {
1730 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1731 let y = [1.0, 2.0, 3.0, 4.0, 5.0];
1732 let (t, _p) = welch_t_test(&x, &y);
1733 assert!(t.abs() < 1e-10, "t should be ~0 for identical samples");
1734 }
1735}