1pub fn mean(data: &[f64]) -> Option<f64> {
35 if data.is_empty() {
36 return None;
37 }
38 if !data.iter().all(|x| x.is_finite()) {
39 return None;
40 }
41 Some(kahan_sum(data) / data.len() as f64)
42}
43
44pub fn variance(data: &[f64]) -> Option<f64> {
69 if data.len() < 2 {
70 return None;
71 }
72 if !data.iter().all(|x| x.is_finite()) {
73 return None;
74 }
75 let mut acc = WelfordAccumulator::new();
76 for &x in data {
77 acc.update(x);
78 }
79 acc.sample_variance()
80}
81
82pub fn population_variance(data: &[f64]) -> Option<f64> {
96 if data.is_empty() {
97 return None;
98 }
99 if !data.iter().all(|x| x.is_finite()) {
100 return None;
101 }
102 let mut acc = WelfordAccumulator::new();
103 for &x in data {
104 acc.update(x);
105 }
106 acc.population_variance()
107}
108
109pub fn std_dev(data: &[f64]) -> Option<f64> {
124 variance(data).map(f64::sqrt)
125}
126
127pub fn population_std_dev(data: &[f64]) -> Option<f64> {
134 population_variance(data).map(f64::sqrt)
135}
136
137pub fn min(data: &[f64]) -> Option<f64> {
148 if data.is_empty() {
149 return None;
150 }
151 data.iter().copied().try_fold(f64::INFINITY, |acc, x| {
152 if x.is_nan() {
153 None
154 } else {
155 Some(acc.min(x))
156 }
157 })
158}
159
160pub fn max(data: &[f64]) -> Option<f64> {
171 if data.is_empty() {
172 return None;
173 }
174 data.iter().copied().try_fold(f64::NEG_INFINITY, |acc, x| {
175 if x.is_nan() {
176 None
177 } else {
178 Some(acc.max(x))
179 }
180 })
181}
182
183pub fn median(data: &[f64]) -> Option<f64> {
201 if data.is_empty() {
202 return None;
203 }
204 if data.iter().any(|x| x.is_nan()) {
205 return None;
206 }
207 let mut sorted = data.to_vec();
208 sorted.sort_unstable_by(|a, b| a.partial_cmp(b).expect("NaN filtered above"));
209 let n = sorted.len();
210 if n % 2 == 1 {
211 Some(sorted[n / 2])
212 } else {
213 Some((sorted[n / 2 - 1] + sorted[n / 2]) / 2.0)
214 }
215}
216
217pub fn quantile(data: &[f64], p: f64) -> Option<f64> {
247 if data.is_empty() || !(0.0..=1.0).contains(&p) {
248 return None;
249 }
250 if data.iter().any(|x| x.is_nan()) {
251 return None;
252 }
253 let mut sorted = data.to_vec();
254 sorted.sort_unstable_by(|a, b| a.partial_cmp(b).expect("NaN filtered above"));
255 quantile_sorted(&sorted, p)
256}
257
258pub fn quantile_sorted(sorted_data: &[f64], p: f64) -> Option<f64> {
267 let n = sorted_data.len();
268 if n == 0 || !(0.0..=1.0).contains(&p) {
269 return None;
270 }
271 if n == 1 {
272 return Some(sorted_data[0]);
273 }
274
275 let h = (n - 1) as f64 * p;
276 let j = h.floor() as usize;
277 let g = h - h.floor();
278
279 if j + 1 >= n {
280 Some(sorted_data[n - 1])
281 } else {
282 Some((1.0 - g) * sorted_data[j] + g * sorted_data[j + 1])
283 }
284}
285
286pub fn skewness(data: &[f64]) -> Option<f64> {
321 let n = data.len();
322 if n < 3 {
323 return None;
324 }
325 if !data.iter().all(|x| x.is_finite()) {
326 return None;
327 }
328 let nf = n as f64;
329 let m = kahan_sum(data) / nf;
330 let mut sum2 = 0.0;
331 let mut sum3 = 0.0;
332 for &x in data {
333 let d = x - m;
334 let d2 = d * d;
335 sum2 += d2;
336 sum3 += d2 * d;
337 }
338 let m2 = sum2 / nf;
339 if m2 == 0.0 {
340 return None;
341 }
342 let m3 = sum3 / nf;
343 let g1 = m3 / m2.powf(1.5);
344 let correction = (nf * (nf - 1.0)).sqrt() / (nf - 2.0);
345 Some(correction * g1)
346}
347
348pub fn kurtosis(data: &[f64]) -> Option<f64> {
382 let n = data.len();
383 if n < 4 {
384 return None;
385 }
386 if !data.iter().all(|x| x.is_finite()) {
387 return None;
388 }
389 let nf = n as f64;
390 let m = kahan_sum(data) / nf;
391 let mut sum2 = 0.0;
392 let mut sum4 = 0.0;
393 for &x in data {
394 let d = x - m;
395 let d2 = d * d;
396 sum2 += d2;
397 sum4 += d2 * d2;
398 }
399 let s2 = sum2 / (nf - 1.0);
401 if s2 == 0.0 {
402 return None;
403 }
404 let s4 = s2 * s2;
405 let sum_z4 = sum4 / s4;
407 let a = nf * (nf + 1.0) / ((nf - 1.0) * (nf - 2.0) * (nf - 3.0));
408 let b = 3.0 * (nf - 1.0) * (nf - 1.0) / ((nf - 2.0) * (nf - 3.0));
409 Some(a * sum_z4 - b)
410}
411
412pub fn covariance(x: &[f64], y: &[f64]) -> Option<f64> {
436 let n = x.len();
437 if n != y.len() || n < 2 {
438 return None;
439 }
440 if !x.iter().chain(y.iter()).all(|v| v.is_finite()) {
441 return None;
442 }
443 let nf = n as f64;
444 let mean_x = kahan_sum(x) / nf;
445 let mean_y = kahan_sum(y) / nf;
446 let mut sum = 0.0;
447 for i in 0..n {
448 sum += (x[i] - mean_x) * (y[i] - mean_y);
449 }
450 Some(sum / (nf - 1.0))
451}
452
453pub fn kahan_sum(data: &[f64]) -> f64 {
473 let mut sum = 0.0_f64;
474 let mut c = 0.0_f64;
475 for &x in data {
476 let t = sum + x;
477 if sum.abs() >= x.abs() {
478 c += (sum - t) + x;
479 } else {
480 c += (x - t) + sum;
481 }
482 sum = t;
483 }
484 sum + c
485}
486
487#[derive(Debug, Clone)]
520pub struct WelfordAccumulator {
521 count: u64,
522 mean_acc: f64,
523 m2: f64,
524 m3: f64,
525 m4: f64,
526}
527
528impl WelfordAccumulator {
529 pub fn new() -> Self {
531 Self {
532 count: 0,
533 mean_acc: 0.0,
534 m2: 0.0,
535 m3: 0.0,
536 m4: 0.0,
537 }
538 }
539
540 pub fn update(&mut self, value: f64) {
549 let n1 = self.count;
550 self.count += 1;
551
552 if n1 == 0 {
553 self.mean_acc = value;
555 return;
556 }
557
558 let n = self.count as f64;
559 let delta = value - self.mean_acc;
560 let delta_n = delta / n;
561 let delta_n2 = delta_n * delta_n;
562 let term1 = delta * delta_n * n1 as f64;
563
564 self.m4 += term1 * delta_n2 * (n * n - 3.0 * n + 3.0) + 6.0 * delta_n2 * self.m2
566 - 4.0 * delta_n * self.m3;
567 self.m3 += term1 * delta_n * (n - 2.0) - 3.0 * delta_n * self.m2;
568 self.m2 += term1;
569 self.mean_acc += delta_n;
570 }
571
572 pub fn count(&self) -> u64 {
574 self.count
575 }
576
577 pub fn mean(&self) -> Option<f64> {
579 if self.count == 0 {
580 None
581 } else {
582 Some(self.mean_acc)
583 }
584 }
585
586 pub fn sample_variance(&self) -> Option<f64> {
589 if self.count < 2 {
590 None
591 } else {
592 Some(self.m2 / (self.count - 1) as f64)
593 }
594 }
595
596 pub fn population_variance(&self) -> Option<f64> {
599 if self.count == 0 {
600 None
601 } else {
602 Some(self.m2 / self.count as f64)
603 }
604 }
605
606 pub fn sample_std_dev(&self) -> Option<f64> {
609 self.sample_variance().map(f64::sqrt)
610 }
611
612 pub fn population_std_dev(&self) -> Option<f64> {
615 self.population_variance().map(f64::sqrt)
616 }
617
618 pub fn skewness(&self) -> Option<f64> {
623 if self.count < 3 {
624 return None;
625 }
626 let n = self.count as f64;
627 if self.m2 == 0.0 {
628 return None;
629 }
630 let g1 = n.sqrt() * self.m3 / self.m2.powf(1.5);
632 let correction = (n * (n - 1.0)).sqrt() / (n - 2.0);
634 Some(correction * g1)
635 }
636
637 pub fn kurtosis(&self) -> Option<f64> {
643 if self.count < 4 {
644 return None;
645 }
646 let n = self.count as f64;
647 if self.m2 == 0.0 {
648 return None;
649 }
650 let g2 = n * self.m4 / (self.m2 * self.m2) - 3.0;
652 let correction = (n - 1.0) / ((n - 2.0) * (n - 3.0));
654 Some(correction * ((n + 1.0) * g2 + 6.0))
655 }
656
657 pub fn merge(&mut self, other: &WelfordAccumulator) {
666 if other.count == 0 {
667 return;
668 }
669 if self.count == 0 {
670 *self = other.clone();
671 return;
672 }
673 let na = self.count as f64;
674 let nb = other.count as f64;
675 let total = self.count + other.count;
676 let n = total as f64;
677 let delta = other.mean_acc - self.mean_acc;
678 let delta2 = delta * delta;
679 let delta3 = delta2 * delta;
680 let delta4 = delta2 * delta2;
681
682 let new_mean = self.mean_acc + delta * (nb / n);
683
684 let new_m2 = self.m2 + other.m2 + delta2 * na * nb / n;
685
686 let new_m3 = self.m3
687 + other.m3
688 + delta3 * na * nb * (na - nb) / (n * n)
689 + 3.0 * delta * (na * other.m2 - nb * self.m2) / n;
690
691 let new_m4 = self.m4
692 + other.m4
693 + delta4 * na * nb * (na * na - na * nb + nb * nb) / (n * n * n)
694 + 6.0 * delta2 * (na * na * other.m2 + nb * nb * self.m2) / (n * n)
695 + 4.0 * delta * (na * other.m3 - nb * self.m3) / n;
696
697 self.count = total;
698 self.mean_acc = new_mean;
699 self.m2 = new_m2;
700 self.m3 = new_m3;
701 self.m4 = new_m4;
702 }
703}
704
705impl Default for WelfordAccumulator {
706 fn default() -> Self {
707 Self::new()
708 }
709}
710
711#[cfg(test)]
716mod tests {
717 use super::*;
718
719 #[test]
722 fn test_mean_basic() {
723 assert_eq!(mean(&[1.0, 2.0, 3.0, 4.0, 5.0]), Some(3.0));
724 }
725
726 #[test]
727 fn test_mean_single() {
728 assert_eq!(mean(&[42.0]), Some(42.0));
729 }
730
731 #[test]
732 fn test_mean_empty() {
733 assert_eq!(mean(&[]), None);
734 }
735
736 #[test]
737 fn test_mean_nan() {
738 assert_eq!(mean(&[1.0, f64::NAN, 3.0]), None);
739 }
740
741 #[test]
742 fn test_mean_inf() {
743 assert_eq!(mean(&[1.0, f64::INFINITY, 3.0]), None);
744 }
745
746 #[test]
749 fn test_variance_basic() {
750 let v = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
751 let var = variance(&v).unwrap();
752 assert!((var - 4.571428571428571).abs() < 1e-10);
753 }
754
755 #[test]
756 fn test_variance_constant() {
757 let v = [5.0; 100];
758 assert!((variance(&v).unwrap()).abs() < 1e-15);
759 }
760
761 #[test]
762 fn test_variance_single() {
763 assert_eq!(variance(&[1.0]), None);
764 }
765
766 #[test]
767 fn test_variance_empty() {
768 assert_eq!(variance(&[]), None);
769 }
770
771 #[test]
772 fn test_population_variance() {
773 let v = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
774 let var = population_variance(&v).unwrap();
775 assert!((var - 4.0).abs() < 1e-10);
776 }
777
778 #[test]
781 fn test_std_dev() {
782 let v = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
783 let sd = std_dev(&v).unwrap();
784 let expected = 4.571428571428571_f64.sqrt();
785 assert!((sd - expected).abs() < 1e-10);
786 }
787
788 #[test]
791 fn test_min_max() {
792 let v = [3.0, 1.0, 4.0, 1.0, 5.0, 9.0, 2.0, 6.0];
793 assert_eq!(min(&v), Some(1.0));
794 assert_eq!(max(&v), Some(9.0));
795 }
796
797 #[test]
798 fn test_min_max_empty() {
799 assert_eq!(min(&[]), None);
800 assert_eq!(max(&[]), None);
801 }
802
803 #[test]
804 fn test_min_max_nan() {
805 assert_eq!(min(&[1.0, f64::NAN]), None);
806 assert_eq!(max(&[1.0, f64::NAN]), None);
807 }
808
809 #[test]
812 fn test_median_odd() {
813 assert_eq!(median(&[3.0, 1.0, 2.0]), Some(2.0));
814 }
815
816 #[test]
817 fn test_median_even() {
818 assert_eq!(median(&[4.0, 1.0, 3.0, 2.0]), Some(2.5));
819 }
820
821 #[test]
822 fn test_median_single() {
823 assert_eq!(median(&[7.0]), Some(7.0));
824 }
825
826 #[test]
827 fn test_median_empty() {
828 assert_eq!(median(&[]), None);
829 }
830
831 #[test]
834 fn test_quantile_extremes() {
835 let data = [1.0, 2.0, 3.0, 4.0, 5.0];
836 assert_eq!(quantile(&data, 0.0), Some(1.0));
837 assert_eq!(quantile(&data, 1.0), Some(5.0));
838 }
839
840 #[test]
841 fn test_quantile_median() {
842 let data = [1.0, 2.0, 3.0, 4.0, 5.0];
843 assert_eq!(quantile(&data, 0.5), Some(3.0));
844 }
845
846 #[test]
847 fn test_quantile_interpolation() {
848 let data = [1.0, 2.0, 3.0, 4.0];
849 let q = quantile(&data, 0.25).unwrap();
852 assert!((q - 1.75).abs() < 1e-15);
853 }
854
855 #[test]
856 fn test_quantile_invalid_p() {
857 assert_eq!(quantile(&[1.0, 2.0], -0.1), None);
858 assert_eq!(quantile(&[1.0, 2.0], 1.1), None);
859 }
860
861 #[test]
862 fn test_quantile_empty() {
863 assert_eq!(quantile(&[], 0.5), None);
864 }
865
866 #[test]
867 fn test_quantile_single() {
868 assert_eq!(quantile(&[42.0], 0.0), Some(42.0));
869 assert_eq!(quantile(&[42.0], 0.5), Some(42.0));
870 assert_eq!(quantile(&[42.0], 1.0), Some(42.0));
871 }
872
873 #[test]
876 fn test_kahan_sum_basic() {
877 let v = [1.0, 2.0, 3.0];
878 assert!((kahan_sum(&v) - 6.0).abs() < 1e-15);
879 }
880
881 #[test]
882 fn test_kahan_sum_precision() {
883 let v = [1e16, 1.0, -1e16];
885 let result = kahan_sum(&v);
886 assert!(
887 (result - 1.0).abs() < 1e-10,
888 "Kahan sum should preserve the 1.0: got {result}"
889 );
890 }
891
892 #[test]
895 fn test_welford_empty() {
896 let acc = WelfordAccumulator::new();
897 assert_eq!(acc.count(), 0);
898 assert_eq!(acc.mean(), None);
899 assert_eq!(acc.sample_variance(), None);
900 }
901
902 #[test]
903 fn test_welford_single() {
904 let mut acc = WelfordAccumulator::new();
905 acc.update(5.0);
906 assert_eq!(acc.mean(), Some(5.0));
907 assert_eq!(acc.sample_variance(), None);
908 assert_eq!(acc.population_variance(), Some(0.0));
909 }
910
911 #[test]
912 fn test_welford_matches_batch() {
913 let data = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
914 let mut acc = WelfordAccumulator::new();
915 for &x in &data {
916 acc.update(x);
917 }
918 let batch_mean = mean(&data).unwrap();
919 let batch_var = variance(&data).unwrap();
920 assert!((acc.mean().unwrap() - batch_mean).abs() < 1e-14);
921 assert!((acc.sample_variance().unwrap() - batch_var).abs() < 1e-10);
922 }
923
924 #[test]
925 fn test_welford_merge() {
926 let data_a = [1.0, 2.0, 3.0, 4.0];
927 let data_b = [5.0, 6.0, 7.0, 8.0];
928 let data_all: Vec<f64> = data_a.iter().chain(data_b.iter()).copied().collect();
929
930 let mut acc_a = WelfordAccumulator::new();
931 for &x in &data_a {
932 acc_a.update(x);
933 }
934 let mut acc_b = WelfordAccumulator::new();
935 for &x in &data_b {
936 acc_b.update(x);
937 }
938 acc_a.merge(&acc_b);
939
940 let expected_mean = mean(&data_all).unwrap();
941 let expected_var = variance(&data_all).unwrap();
942
943 assert!((acc_a.mean().unwrap() - expected_mean).abs() < 1e-14);
944 assert!((acc_a.sample_variance().unwrap() - expected_var).abs() < 1e-10);
945 }
946
947 #[test]
950 fn test_skewness_symmetric() {
951 let data = [1.0, 2.0, 3.0, 4.0, 5.0];
953 assert!(skewness(&data).unwrap().abs() < 1e-14);
954 }
955
956 #[test]
957 fn test_skewness_right_skewed() {
958 let data = [1.0, 2.0, 3.0, 4.0, 50.0];
960 let s = skewness(&data).unwrap();
961 assert!(s > 0.0, "expected positive skewness, got {s}");
962 }
963
964 #[test]
965 fn test_skewness_left_skewed() {
966 let data = [-50.0, 1.0, 2.0, 3.0, 4.0];
968 let s = skewness(&data).unwrap();
969 assert!(s < 0.0, "expected negative skewness, got {s}");
970 }
971
972 #[test]
973 fn test_skewness_edge_cases() {
974 assert_eq!(skewness(&[]), None);
975 assert_eq!(skewness(&[1.0]), None);
976 assert_eq!(skewness(&[1.0, 2.0]), None);
977 assert_eq!(skewness(&[5.0, 5.0, 5.0]), None);
979 assert_eq!(skewness(&[1.0, f64::NAN, 3.0]), None);
981 }
982
983 #[test]
984 fn test_skewness_known_value() {
985 let data = [1.0, 2.0, 3.0, 4.0, 8.0];
995 let s = skewness(&data).unwrap();
996 assert!(
997 (s - 1.339).abs() < 0.01,
998 "expected skewness ≈ 1.34, got {s}"
999 );
1000 }
1001
1002 #[test]
1005 fn test_kurtosis_uniform_negative() {
1006 let data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
1008 let k = kurtosis(&data).unwrap();
1009 assert!(k < 0.0, "uniform data should be platykurtic, got {k}");
1010 }
1011
1012 #[test]
1013 fn test_kurtosis_edge_cases() {
1014 assert_eq!(kurtosis(&[]), None);
1015 assert_eq!(kurtosis(&[1.0]), None);
1016 assert_eq!(kurtosis(&[1.0, 2.0]), None);
1017 assert_eq!(kurtosis(&[1.0, 2.0, 3.0]), None);
1018 assert_eq!(kurtosis(&[5.0, 5.0, 5.0, 5.0]), None);
1020 assert_eq!(kurtosis(&[1.0, f64::NAN, 3.0, 4.0]), None);
1022 }
1023
1024 #[test]
1025 fn test_kurtosis_heavy_tails() {
1026 let data = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0];
1028 let k = kurtosis(&data).unwrap();
1029 assert!(k > 0.0, "heavy-tailed data should be leptokurtic, got {k}");
1030 }
1031
1032 #[test]
1035 fn test_covariance_perfect_positive() {
1036 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1037 let y = [2.0, 4.0, 6.0, 8.0, 10.0];
1038 let cov = covariance(&x, &y).unwrap();
1039 assert!((cov - 5.0).abs() < 1e-14);
1040 }
1041
1042 #[test]
1043 fn test_covariance_perfect_negative() {
1044 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1045 let y = [10.0, 8.0, 6.0, 4.0, 2.0];
1046 let cov = covariance(&x, &y).unwrap();
1047 assert!((cov - (-5.0)).abs() < 1e-14);
1048 }
1049
1050 #[test]
1051 fn test_covariance_zero() {
1052 let x = [1.0, 2.0, 3.0, 4.0];
1054 let y = [5.0, 5.0, 5.0, 5.0]; let cov = covariance(&x, &y).unwrap();
1056 assert!(cov.abs() < 1e-14);
1057 }
1058
1059 #[test]
1060 fn test_covariance_self_is_variance() {
1061 let data = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
1062 let cov_xx = covariance(&data, &data).unwrap();
1063 let var = variance(&data).unwrap();
1064 assert!((cov_xx - var).abs() < 1e-10);
1065 }
1066
1067 #[test]
1068 fn test_covariance_edge_cases() {
1069 assert_eq!(covariance(&[], &[]), None);
1070 assert_eq!(covariance(&[1.0], &[2.0]), None);
1071 assert_eq!(covariance(&[1.0, 2.0], &[1.0]), None); assert_eq!(covariance(&[1.0, f64::NAN], &[1.0, 2.0]), None);
1073 }
1074
1075 #[test]
1078 fn test_welford_skewness_matches_batch() {
1079 let data = [1.0, 2.0, 3.0, 4.0, 50.0];
1080 let mut acc = WelfordAccumulator::new();
1081 for &x in &data {
1082 acc.update(x);
1083 }
1084 let batch = skewness(&data).unwrap();
1085 let stream = acc.skewness().unwrap();
1086 assert!(
1087 (batch - stream).abs() < 1e-10,
1088 "batch={batch}, stream={stream}"
1089 );
1090 }
1091
1092 #[test]
1093 fn test_welford_kurtosis_matches_batch() {
1094 let data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 100.0];
1095 let mut acc = WelfordAccumulator::new();
1096 for &x in &data {
1097 acc.update(x);
1098 }
1099 let batch = kurtosis(&data).unwrap();
1100 let stream = acc.kurtosis().unwrap();
1101 assert!(
1102 (batch - stream).abs() < 1e-8,
1103 "batch={batch}, stream={stream}"
1104 );
1105 }
1106
1107 #[test]
1108 fn test_welford_skewness_kurtosis_edge_cases() {
1109 let mut acc = WelfordAccumulator::new();
1110 assert_eq!(acc.skewness(), None);
1111 assert_eq!(acc.kurtosis(), None);
1112 acc.update(1.0);
1113 assert_eq!(acc.skewness(), None);
1114 assert_eq!(acc.kurtosis(), None);
1115 acc.update(2.0);
1116 assert_eq!(acc.skewness(), None);
1117 assert_eq!(acc.kurtosis(), None);
1118 acc.update(3.0);
1119 assert!(acc.skewness().is_some());
1121 assert_eq!(acc.kurtosis(), None);
1122 acc.update(4.0);
1123 assert!(acc.kurtosis().is_some());
1125 }
1126
1127 #[test]
1128 fn test_welford_merge_skewness_kurtosis() {
1129 let data_a = [1.0, 3.0, 5.0, 7.0, 9.0, 11.0];
1130 let data_b = [2.0, 50.0, 4.0, 6.0, 8.0, 100.0];
1131 let all: Vec<f64> = data_a.iter().chain(data_b.iter()).copied().collect();
1132
1133 let mut acc_a = WelfordAccumulator::new();
1134 for &x in &data_a {
1135 acc_a.update(x);
1136 }
1137 let mut acc_b = WelfordAccumulator::new();
1138 for &x in &data_b {
1139 acc_b.update(x);
1140 }
1141 acc_a.merge(&acc_b);
1142
1143 let expected_skew = skewness(&all).unwrap();
1144 let expected_kurt = kurtosis(&all).unwrap();
1145 let merged_skew = acc_a.skewness().unwrap();
1146 let merged_kurt = acc_a.kurtosis().unwrap();
1147
1148 assert!(
1149 (expected_skew - merged_skew).abs() < 1e-8,
1150 "skewness: expected={expected_skew}, merged={merged_skew}"
1151 );
1152 assert!(
1153 (expected_kurt - merged_kurt).abs() < 1e-6,
1154 "kurtosis: expected={expected_kurt}, merged={merged_kurt}"
1155 );
1156 }
1157
1158 #[test]
1161 fn test_variance_large_offset() {
1162 let data: Vec<f64> = (1..=5).map(|i| 1e9 + i as f64).collect();
1165 let var = variance(&data).unwrap();
1166 assert!(
1168 (var - 2.5).abs() < 1e-5,
1169 "Variance of offset data should be ~2.5, got {var}"
1170 );
1171 }
1172}
1173
1174#[cfg(test)]
1175mod proptests {
1176 use super::*;
1177 use proptest::prelude::*;
1178
1179 fn finite_vec(min_len: usize, max_len: usize) -> impl Strategy<Value = Vec<f64>> {
1181 proptest::collection::vec(
1182 prop::num::f64::NORMAL.prop_filter("finite", |x| x.is_finite() && x.abs() < 1e12),
1183 min_len..=max_len,
1184 )
1185 }
1186
1187 proptest! {
1188 #![proptest_config(ProptestConfig::with_cases(500))]
1189
1190 #[test]
1192 fn variance_non_negative(data in finite_vec(2, 100)) {
1193 let var = variance(&data).unwrap();
1194 prop_assert!(var >= 0.0, "variance must be >= 0, got {}", var);
1195 }
1196
1197 #[test]
1199 fn variance_of_constant_is_zero(
1200 value in prop::num::f64::NORMAL.prop_filter("finite", |x| x.is_finite()),
1201 n in 2_usize..50,
1202 ) {
1203 let data = vec![value; n];
1204 let var = variance(&data).unwrap();
1205 prop_assert!(var.abs() < 1e-10, "variance of constant should be ~0, got {}", var);
1206 }
1207
1208 #[test]
1210 fn std_dev_is_sqrt_of_variance(data in finite_vec(2, 100)) {
1211 let var = variance(&data).unwrap();
1212 let sd = std_dev(&data).unwrap();
1213 let diff = (sd * sd - var).abs();
1214 prop_assert!(diff < 1e-10 * var.max(1.0), "sd² should equal variance");
1215 }
1216
1217 #[test]
1219 fn mean_linearity(
1220 data in finite_vec(1, 100),
1221 a in -100.0_f64..100.0,
1222 b in -100.0_f64..100.0,
1223 ) {
1224 prop_assume!(a.is_finite() && b.is_finite());
1225 let m = mean(&data).unwrap();
1226 let transformed: Vec<f64> = data.iter().map(|&x| a * x + b).collect();
1227 if let Some(mt) = mean(&transformed) {
1228 let expected = a * m + b;
1229 let tol = 1e-8 * expected.abs().max(1.0);
1230 prop_assert!(
1231 (mt - expected).abs() < tol,
1232 "mean(a*x+b)={} != a*mean(x)+b={}",
1233 mt, expected
1234 );
1235 }
1236 }
1237
1238 #[test]
1240 fn quantile_extremes_are_min_max(data in finite_vec(1, 100)) {
1241 let q0 = quantile(&data, 0.0).unwrap();
1242 let q1 = quantile(&data, 1.0).unwrap();
1243 let mn = min(&data).unwrap();
1244 let mx = max(&data).unwrap();
1245 prop_assert!((q0 - mn).abs() < 1e-15, "quantile(0) should be min");
1246 prop_assert!((q1 - mx).abs() < 1e-15, "quantile(1) should be max");
1247 }
1248
1249 #[test]
1251 fn quantiles_monotonic(
1252 data in finite_vec(2, 100),
1253 p1 in 0.0_f64..=1.0,
1254 p2 in 0.0_f64..=1.0,
1255 ) {
1256 let (lo, hi) = if p1 <= p2 { (p1, p2) } else { (p2, p1) };
1257 let q_lo = quantile(&data, lo).unwrap();
1258 let q_hi = quantile(&data, hi).unwrap();
1259 prop_assert!(q_lo <= q_hi + 1e-15, "quantiles should be monotonic");
1260 }
1261
1262 #[test]
1264 fn median_equals_quantile_half(data in finite_vec(1, 100)) {
1265 let med = median(&data).unwrap();
1266 let q50 = quantile(&data, 0.5).unwrap();
1267 prop_assert!(
1268 (med - q50).abs() < 1e-14,
1269 "median={} != quantile(0.5)={}",
1270 med, q50
1271 );
1272 }
1273
1274 #[test]
1276 fn welford_merge_equals_sequential(
1277 data_a in finite_vec(1, 50),
1278 data_b in finite_vec(1, 50),
1279 ) {
1280 let mut sequential = WelfordAccumulator::new();
1281 for &x in data_a.iter().chain(data_b.iter()) {
1282 sequential.update(x);
1283 }
1284
1285 let mut acc_a = WelfordAccumulator::new();
1286 for &x in &data_a { acc_a.update(x); }
1287 let mut acc_b = WelfordAccumulator::new();
1288 for &x in &data_b { acc_b.update(x); }
1289 acc_a.merge(&acc_b);
1290
1291 let seq_mean = sequential.mean().unwrap();
1292 let mrg_mean = acc_a.mean().unwrap();
1293 prop_assert!(
1294 (seq_mean - mrg_mean).abs() < 1e-10 * seq_mean.abs().max(1.0),
1295 "merged mean should match sequential"
1296 );
1297
1298 if sequential.count() >= 2 {
1299 let seq_var = sequential.sample_variance().unwrap();
1300 let mrg_var = acc_a.sample_variance().unwrap();
1301 prop_assert!(
1302 (seq_var - mrg_var).abs() < 1e-8 * seq_var.max(1.0),
1303 "merged variance should match sequential"
1304 );
1305 }
1306 }
1307
1308 #[test]
1310 fn skewness_of_symmetric_is_zero(
1311 half in proptest::collection::vec(-1e6_f64..1e6, 2..=50),
1312 ) {
1313 prop_assume!(half.iter().all(|x| x.is_finite()));
1314 let mut data: Vec<f64> = half.clone();
1316 data.extend(half.iter().map(|x| -x));
1317 if let Some(s) = skewness(&data) {
1318 prop_assert!(
1319 s.abs() < 1e-8,
1320 "symmetric data should have ~0 skewness, got {}",
1321 s
1322 );
1323 }
1324 }
1325
1326 #[test]
1330 fn welford_skewness_matches_batch(
1331 data in proptest::collection::vec(-1e6_f64..1e6, 3..=100)
1332 ) {
1333 let mut acc = WelfordAccumulator::new();
1334 for &x in &data { acc.update(x); }
1335 match (skewness(&data), acc.skewness()) {
1336 (Some(batch), Some(stream)) if batch.is_finite() && stream.is_finite() => {
1337 let tol = 1e-8 * batch.abs().max(1.0);
1338 prop_assert!(
1339 (batch - stream).abs() < tol,
1340 "batch={} stream={}", batch, stream
1341 );
1342 }
1343 _ => {} }
1345 }
1346
1347 #[test]
1349 fn welford_kurtosis_matches_batch(
1350 data in proptest::collection::vec(-1e6_f64..1e6, 4..=100)
1351 ) {
1352 let mut acc = WelfordAccumulator::new();
1353 for &x in &data { acc.update(x); }
1354 match (kurtosis(&data), acc.kurtosis()) {
1355 (Some(batch), Some(stream)) if batch.is_finite() && stream.is_finite() => {
1356 let tol = 1e-6 * batch.abs().max(1.0);
1357 prop_assert!(
1358 (batch - stream).abs() < tol,
1359 "batch={} stream={}", batch, stream
1360 );
1361 }
1362 _ => {} }
1364 }
1365
1366 #[test]
1368 fn covariance_self_is_variance(data in finite_vec(2, 100)) {
1369 let cov = covariance(&data, &data).unwrap();
1370 let var = variance(&data).unwrap();
1371 let tol = 1e-10 * var.max(1.0);
1372 prop_assert!(
1373 (cov - var).abs() < tol,
1374 "Cov(x,x)={} != Var(x)={}", cov, var
1375 );
1376 }
1377
1378 #[test]
1380 fn covariance_symmetric(
1381 x in finite_vec(2, 50),
1382 y in finite_vec(2, 50),
1383 ) {
1384 let n = x.len().min(y.len());
1385 if n >= 2 {
1386 let x_slice = &x[..n];
1387 let y_slice = &y[..n];
1388 let cov_xy = covariance(x_slice, y_slice).unwrap();
1389 let cov_yx = covariance(y_slice, x_slice).unwrap();
1390 prop_assert!(
1391 (cov_xy - cov_yx).abs() < 1e-10 * cov_xy.abs().max(1.0),
1392 "Cov(x,y)={} != Cov(y,x)={}", cov_xy, cov_yx
1393 );
1394 }
1395 }
1396 }
1397}