1pub fn mean(data: &[f64]) -> Option<f64> {
35 if data.is_empty() {
36 return None;
37 }
38 if !data.iter().all(|x| x.is_finite()) {
39 return None;
40 }
41 Some(kahan_sum(data) / data.len() as f64)
42}
43
44pub fn variance(data: &[f64]) -> Option<f64> {
69 if data.len() < 2 {
70 return None;
71 }
72 if !data.iter().all(|x| x.is_finite()) {
73 return None;
74 }
75 let mut acc = WelfordAccumulator::new();
76 for &x in data {
77 acc.update(x);
78 }
79 acc.sample_variance()
80}
81
82pub fn population_variance(data: &[f64]) -> Option<f64> {
96 if data.is_empty() {
97 return None;
98 }
99 if !data.iter().all(|x| x.is_finite()) {
100 return None;
101 }
102 let mut acc = WelfordAccumulator::new();
103 for &x in data {
104 acc.update(x);
105 }
106 acc.population_variance()
107}
108
109pub fn std_dev(data: &[f64]) -> Option<f64> {
124 variance(data).map(f64::sqrt)
125}
126
127pub fn population_std_dev(data: &[f64]) -> Option<f64> {
134 population_variance(data).map(f64::sqrt)
135}
136
137pub fn min(data: &[f64]) -> Option<f64> {
148 if data.is_empty() {
149 return None;
150 }
151 data.iter().copied().try_fold(f64::INFINITY, |acc, x| {
152 if x.is_nan() {
153 None
154 } else {
155 Some(acc.min(x))
156 }
157 })
158}
159
160pub fn max(data: &[f64]) -> Option<f64> {
171 if data.is_empty() {
172 return None;
173 }
174 data.iter().copied().try_fold(f64::NEG_INFINITY, |acc, x| {
175 if x.is_nan() {
176 None
177 } else {
178 Some(acc.max(x))
179 }
180 })
181}
182
183pub fn median(data: &[f64]) -> Option<f64> {
201 if data.is_empty() {
202 return None;
203 }
204 if data.iter().any(|x| x.is_nan()) {
205 return None;
206 }
207 let mut sorted = data.to_vec();
208 sorted.sort_unstable_by(|a, b| a.partial_cmp(b).expect("NaN filtered above"));
209 let n = sorted.len();
210 if n % 2 == 1 {
211 Some(sorted[n / 2])
212 } else {
213 Some((sorted[n / 2 - 1] + sorted[n / 2]) / 2.0)
214 }
215}
216
217pub fn quantile(data: &[f64], p: f64) -> Option<f64> {
247 if data.is_empty() || !(0.0..=1.0).contains(&p) {
248 return None;
249 }
250 if data.iter().any(|x| x.is_nan()) {
251 return None;
252 }
253 let mut sorted = data.to_vec();
254 sorted.sort_unstable_by(|a, b| a.partial_cmp(b).expect("NaN filtered above"));
255 quantile_sorted(&sorted, p)
256}
257
258pub fn quantile_sorted(sorted_data: &[f64], p: f64) -> Option<f64> {
267 let n = sorted_data.len();
268 if n == 0 || !(0.0..=1.0).contains(&p) {
269 return None;
270 }
271 if n == 1 {
272 return Some(sorted_data[0]);
273 }
274
275 let h = (n - 1) as f64 * p;
276 let j = h.floor() as usize;
277 let g = h - h.floor();
278
279 if j + 1 >= n {
280 Some(sorted_data[n - 1])
281 } else {
282 Some((1.0 - g) * sorted_data[j] + g * sorted_data[j + 1])
283 }
284}
285
286pub fn kahan_sum(data: &[f64]) -> f64 {
306 let mut sum = 0.0_f64;
307 let mut c = 0.0_f64;
308 for &x in data {
309 let t = sum + x;
310 if sum.abs() >= x.abs() {
311 c += (sum - t) + x;
312 } else {
313 c += (x - t) + sum;
314 }
315 sum = t;
316 }
317 sum + c
318}
319
320#[derive(Debug, Clone)]
351pub struct WelfordAccumulator {
352 count: u64,
353 mean_acc: f64,
354 m2: f64,
355}
356
357impl WelfordAccumulator {
358 pub fn new() -> Self {
360 Self {
361 count: 0,
362 mean_acc: 0.0,
363 m2: 0.0,
364 }
365 }
366
367 pub fn update(&mut self, value: f64) {
369 self.count += 1;
370 let delta = value - self.mean_acc;
371 self.mean_acc += delta / self.count as f64;
372 let delta2 = value - self.mean_acc;
373 self.m2 += delta * delta2;
374 }
375
376 pub fn count(&self) -> u64 {
378 self.count
379 }
380
381 pub fn mean(&self) -> Option<f64> {
383 if self.count == 0 {
384 None
385 } else {
386 Some(self.mean_acc)
387 }
388 }
389
390 pub fn sample_variance(&self) -> Option<f64> {
393 if self.count < 2 {
394 None
395 } else {
396 Some(self.m2 / (self.count - 1) as f64)
397 }
398 }
399
400 pub fn population_variance(&self) -> Option<f64> {
403 if self.count == 0 {
404 None
405 } else {
406 Some(self.m2 / self.count as f64)
407 }
408 }
409
410 pub fn sample_std_dev(&self) -> Option<f64> {
413 self.sample_variance().map(f64::sqrt)
414 }
415
416 pub fn population_std_dev(&self) -> Option<f64> {
419 self.population_variance().map(f64::sqrt)
420 }
421
422 pub fn merge(&mut self, other: &WelfordAccumulator) {
429 if other.count == 0 {
430 return;
431 }
432 if self.count == 0 {
433 *self = other.clone();
434 return;
435 }
436 let total = self.count + other.count;
437 let delta = other.mean_acc - self.mean_acc;
438 let new_mean = self.mean_acc + delta * (other.count as f64 / total as f64);
439 let new_m2 = self.m2
440 + other.m2
441 + delta * delta * (self.count as f64 * other.count as f64 / total as f64);
442 self.count = total;
443 self.mean_acc = new_mean;
444 self.m2 = new_m2;
445 }
446}
447
448impl Default for WelfordAccumulator {
449 fn default() -> Self {
450 Self::new()
451 }
452}
453
454#[cfg(test)]
459mod tests {
460 use super::*;
461
462 #[test]
465 fn test_mean_basic() {
466 assert_eq!(mean(&[1.0, 2.0, 3.0, 4.0, 5.0]), Some(3.0));
467 }
468
469 #[test]
470 fn test_mean_single() {
471 assert_eq!(mean(&[42.0]), Some(42.0));
472 }
473
474 #[test]
475 fn test_mean_empty() {
476 assert_eq!(mean(&[]), None);
477 }
478
479 #[test]
480 fn test_mean_nan() {
481 assert_eq!(mean(&[1.0, f64::NAN, 3.0]), None);
482 }
483
484 #[test]
485 fn test_mean_inf() {
486 assert_eq!(mean(&[1.0, f64::INFINITY, 3.0]), None);
487 }
488
489 #[test]
492 fn test_variance_basic() {
493 let v = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
494 let var = variance(&v).unwrap();
495 assert!((var - 4.571428571428571).abs() < 1e-10);
496 }
497
498 #[test]
499 fn test_variance_constant() {
500 let v = [5.0; 100];
501 assert!((variance(&v).unwrap()).abs() < 1e-15);
502 }
503
504 #[test]
505 fn test_variance_single() {
506 assert_eq!(variance(&[1.0]), None);
507 }
508
509 #[test]
510 fn test_variance_empty() {
511 assert_eq!(variance(&[]), None);
512 }
513
514 #[test]
515 fn test_population_variance() {
516 let v = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
517 let var = population_variance(&v).unwrap();
518 assert!((var - 4.0).abs() < 1e-10);
519 }
520
521 #[test]
524 fn test_std_dev() {
525 let v = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
526 let sd = std_dev(&v).unwrap();
527 let expected = 4.571428571428571_f64.sqrt();
528 assert!((sd - expected).abs() < 1e-10);
529 }
530
531 #[test]
534 fn test_min_max() {
535 let v = [3.0, 1.0, 4.0, 1.0, 5.0, 9.0, 2.0, 6.0];
536 assert_eq!(min(&v), Some(1.0));
537 assert_eq!(max(&v), Some(9.0));
538 }
539
540 #[test]
541 fn test_min_max_empty() {
542 assert_eq!(min(&[]), None);
543 assert_eq!(max(&[]), None);
544 }
545
546 #[test]
547 fn test_min_max_nan() {
548 assert_eq!(min(&[1.0, f64::NAN]), None);
549 assert_eq!(max(&[1.0, f64::NAN]), None);
550 }
551
552 #[test]
555 fn test_median_odd() {
556 assert_eq!(median(&[3.0, 1.0, 2.0]), Some(2.0));
557 }
558
559 #[test]
560 fn test_median_even() {
561 assert_eq!(median(&[4.0, 1.0, 3.0, 2.0]), Some(2.5));
562 }
563
564 #[test]
565 fn test_median_single() {
566 assert_eq!(median(&[7.0]), Some(7.0));
567 }
568
569 #[test]
570 fn test_median_empty() {
571 assert_eq!(median(&[]), None);
572 }
573
574 #[test]
577 fn test_quantile_extremes() {
578 let data = [1.0, 2.0, 3.0, 4.0, 5.0];
579 assert_eq!(quantile(&data, 0.0), Some(1.0));
580 assert_eq!(quantile(&data, 1.0), Some(5.0));
581 }
582
583 #[test]
584 fn test_quantile_median() {
585 let data = [1.0, 2.0, 3.0, 4.0, 5.0];
586 assert_eq!(quantile(&data, 0.5), Some(3.0));
587 }
588
589 #[test]
590 fn test_quantile_interpolation() {
591 let data = [1.0, 2.0, 3.0, 4.0];
592 let q = quantile(&data, 0.25).unwrap();
595 assert!((q - 1.75).abs() < 1e-15);
596 }
597
598 #[test]
599 fn test_quantile_invalid_p() {
600 assert_eq!(quantile(&[1.0, 2.0], -0.1), None);
601 assert_eq!(quantile(&[1.0, 2.0], 1.1), None);
602 }
603
604 #[test]
605 fn test_quantile_empty() {
606 assert_eq!(quantile(&[], 0.5), None);
607 }
608
609 #[test]
610 fn test_quantile_single() {
611 assert_eq!(quantile(&[42.0], 0.0), Some(42.0));
612 assert_eq!(quantile(&[42.0], 0.5), Some(42.0));
613 assert_eq!(quantile(&[42.0], 1.0), Some(42.0));
614 }
615
616 #[test]
619 fn test_kahan_sum_basic() {
620 let v = [1.0, 2.0, 3.0];
621 assert!((kahan_sum(&v) - 6.0).abs() < 1e-15);
622 }
623
624 #[test]
625 fn test_kahan_sum_precision() {
626 let v = [1e16, 1.0, -1e16];
628 let result = kahan_sum(&v);
629 assert!(
630 (result - 1.0).abs() < 1e-10,
631 "Kahan sum should preserve the 1.0: got {result}"
632 );
633 }
634
635 #[test]
638 fn test_welford_empty() {
639 let acc = WelfordAccumulator::new();
640 assert_eq!(acc.count(), 0);
641 assert_eq!(acc.mean(), None);
642 assert_eq!(acc.sample_variance(), None);
643 }
644
645 #[test]
646 fn test_welford_single() {
647 let mut acc = WelfordAccumulator::new();
648 acc.update(5.0);
649 assert_eq!(acc.mean(), Some(5.0));
650 assert_eq!(acc.sample_variance(), None);
651 assert_eq!(acc.population_variance(), Some(0.0));
652 }
653
654 #[test]
655 fn test_welford_matches_batch() {
656 let data = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
657 let mut acc = WelfordAccumulator::new();
658 for &x in &data {
659 acc.update(x);
660 }
661 let batch_mean = mean(&data).unwrap();
662 let batch_var = variance(&data).unwrap();
663 assert!((acc.mean().unwrap() - batch_mean).abs() < 1e-14);
664 assert!((acc.sample_variance().unwrap() - batch_var).abs() < 1e-10);
665 }
666
667 #[test]
668 fn test_welford_merge() {
669 let data_a = [1.0, 2.0, 3.0, 4.0];
670 let data_b = [5.0, 6.0, 7.0, 8.0];
671 let data_all: Vec<f64> = data_a.iter().chain(data_b.iter()).copied().collect();
672
673 let mut acc_a = WelfordAccumulator::new();
674 for &x in &data_a {
675 acc_a.update(x);
676 }
677 let mut acc_b = WelfordAccumulator::new();
678 for &x in &data_b {
679 acc_b.update(x);
680 }
681 acc_a.merge(&acc_b);
682
683 let expected_mean = mean(&data_all).unwrap();
684 let expected_var = variance(&data_all).unwrap();
685
686 assert!((acc_a.mean().unwrap() - expected_mean).abs() < 1e-14);
687 assert!((acc_a.sample_variance().unwrap() - expected_var).abs() < 1e-10);
688 }
689
690 #[test]
693 fn test_variance_large_offset() {
694 let data: Vec<f64> = (1..=5).map(|i| 1e9 + i as f64).collect();
697 let var = variance(&data).unwrap();
698 assert!(
700 (var - 2.5).abs() < 1e-5,
701 "Variance of offset data should be ~2.5, got {var}"
702 );
703 }
704}
705
706#[cfg(test)]
707mod proptests {
708 use super::*;
709 use proptest::prelude::*;
710
711 fn finite_vec(min_len: usize, max_len: usize) -> impl Strategy<Value = Vec<f64>> {
713 proptest::collection::vec(
714 prop::num::f64::NORMAL.prop_filter("finite", |x| x.is_finite() && x.abs() < 1e12),
715 min_len..=max_len,
716 )
717 }
718
719 proptest! {
720 #![proptest_config(ProptestConfig::with_cases(500))]
721
722 #[test]
724 fn variance_non_negative(data in finite_vec(2, 100)) {
725 let var = variance(&data).unwrap();
726 prop_assert!(var >= 0.0, "variance must be >= 0, got {}", var);
727 }
728
729 #[test]
731 fn variance_of_constant_is_zero(
732 value in prop::num::f64::NORMAL.prop_filter("finite", |x| x.is_finite()),
733 n in 2_usize..50,
734 ) {
735 let data = vec![value; n];
736 let var = variance(&data).unwrap();
737 prop_assert!(var.abs() < 1e-10, "variance of constant should be ~0, got {}", var);
738 }
739
740 #[test]
742 fn std_dev_is_sqrt_of_variance(data in finite_vec(2, 100)) {
743 let var = variance(&data).unwrap();
744 let sd = std_dev(&data).unwrap();
745 let diff = (sd * sd - var).abs();
746 prop_assert!(diff < 1e-10 * var.max(1.0), "sd² should equal variance");
747 }
748
749 #[test]
751 fn mean_linearity(
752 data in finite_vec(1, 100),
753 a in -100.0_f64..100.0,
754 b in -100.0_f64..100.0,
755 ) {
756 prop_assume!(a.is_finite() && b.is_finite());
757 let m = mean(&data).unwrap();
758 let transformed: Vec<f64> = data.iter().map(|&x| a * x + b).collect();
759 if let Some(mt) = mean(&transformed) {
760 let expected = a * m + b;
761 let tol = 1e-8 * expected.abs().max(1.0);
762 prop_assert!(
763 (mt - expected).abs() < tol,
764 "mean(a*x+b)={} != a*mean(x)+b={}",
765 mt, expected
766 );
767 }
768 }
769
770 #[test]
772 fn quantile_extremes_are_min_max(data in finite_vec(1, 100)) {
773 let q0 = quantile(&data, 0.0).unwrap();
774 let q1 = quantile(&data, 1.0).unwrap();
775 let mn = min(&data).unwrap();
776 let mx = max(&data).unwrap();
777 prop_assert!((q0 - mn).abs() < 1e-15, "quantile(0) should be min");
778 prop_assert!((q1 - mx).abs() < 1e-15, "quantile(1) should be max");
779 }
780
781 #[test]
783 fn quantiles_monotonic(
784 data in finite_vec(2, 100),
785 p1 in 0.0_f64..=1.0,
786 p2 in 0.0_f64..=1.0,
787 ) {
788 let (lo, hi) = if p1 <= p2 { (p1, p2) } else { (p2, p1) };
789 let q_lo = quantile(&data, lo).unwrap();
790 let q_hi = quantile(&data, hi).unwrap();
791 prop_assert!(q_lo <= q_hi + 1e-15, "quantiles should be monotonic");
792 }
793
794 #[test]
796 fn median_equals_quantile_half(data in finite_vec(1, 100)) {
797 let med = median(&data).unwrap();
798 let q50 = quantile(&data, 0.5).unwrap();
799 prop_assert!(
800 (med - q50).abs() < 1e-14,
801 "median={} != quantile(0.5)={}",
802 med, q50
803 );
804 }
805
806 #[test]
808 fn welford_merge_equals_sequential(
809 data_a in finite_vec(1, 50),
810 data_b in finite_vec(1, 50),
811 ) {
812 let mut sequential = WelfordAccumulator::new();
813 for &x in data_a.iter().chain(data_b.iter()) {
814 sequential.update(x);
815 }
816
817 let mut acc_a = WelfordAccumulator::new();
818 for &x in &data_a { acc_a.update(x); }
819 let mut acc_b = WelfordAccumulator::new();
820 for &x in &data_b { acc_b.update(x); }
821 acc_a.merge(&acc_b);
822
823 let seq_mean = sequential.mean().unwrap();
824 let mrg_mean = acc_a.mean().unwrap();
825 prop_assert!(
826 (seq_mean - mrg_mean).abs() < 1e-10 * seq_mean.abs().max(1.0),
827 "merged mean should match sequential"
828 );
829
830 if sequential.count() >= 2 {
831 let seq_var = sequential.sample_variance().unwrap();
832 let mrg_var = acc_a.sample_variance().unwrap();
833 prop_assert!(
834 (seq_var - mrg_var).abs() < 1e-8 * seq_var.max(1.0),
835 "merged variance should match sequential"
836 );
837 }
838 }
839 }
840}