1use num_traits::{Num, NumCast, ToPrimitive};
2use std::ops::Sub;
3
4pub fn mu<T>(data: &[T]) -> T
15where
16 T: Num + NumCast + Copy,
17{
18 if data.is_empty() {
19 return T::zero();
20 }
21
22 let sum = data.iter().copied().fold(T::zero(), |sum, x| sum + x);
23
24 let n = NumCast::from(data.len()).unwrap();
27
28 sum / n
29}
30
31pub fn mean<T>(data: &[T]) -> T
42where
43 T: Num + NumCast + Copy,
44{
45 mu(data)
46}
47
48pub fn median<T>(data: &[T]) -> T
63where
64 T: Num + NumCast + Copy,
65{
66 if data.is_empty() {
67 return T::zero();
68 }
69
70 let mid = data.len() / 2;
71 if data.len() % 2 == 0 {
72 (data[mid] + data[mid - 1]) / (T::one() + T::one())
73 } else {
74 data[mid]
75 }
76}
77
78pub fn median_unsorted<T>(data: &[T]) -> T
93where
94 T: Num + NumCast + Copy + PartialOrd,
95{
96 if data.is_empty() {
97 return T::zero();
98 }
99
100 let mut sorted = data.to_vec();
101 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
102
103 median(&sorted)
104}
105
106pub fn min<T>(data: &[T]) -> Option<T>
119where
120 T: PartialOrd + Copy,
121{
122 let mut iter = data.iter().copied();
123 let init_min = iter.next()?;
124
125 Some(iter.fold(init_min, |min, x| if x < min { x } else { min }))
126}
127
128pub fn max<T>(data: &[T]) -> Option<T>
141where
142 T: PartialOrd + Copy,
143{
144 let mut iter = data.iter().copied();
145 let init_max = iter.next()?;
146
147 Some(iter.fold(init_max, |max, x| if x > max { x } else { max }))
148}
149
150pub fn range<T>(data: &[T]) -> Option<T>
163where
164 T: PartialOrd + Copy + Sub<Output = T>,
165{
166 if data.is_empty() {
167 return None;
168 }
169
170 let minimum = min(data).unwrap();
171 let maximum = max(data).unwrap();
172
173 Some(maximum - minimum)
174}
175
176pub fn quartiles<T>(data: &[T]) -> Option<(T, T, T)>
192where
193 T: Num + NumCast + Copy + PartialOrd,
194{
195 if data.is_empty() {
196 return None;
197 }
198
199 let mut sorted = data.to_vec();
200 if sorted
201 .iter()
202 .any(|x| x.to_f64().is_some_and(|x| x.is_nan()))
203 {
204 return None;
205 };
206 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
207
208 let n = sorted.len();
209 let get_quantile = |p: f64| -> Option<T> {
210 let idx = p * (n - 1) as f64;
211 let idx_floor = idx.floor();
212 let weight = idx - idx_floor;
213
214 let lower = sorted.get(idx_floor as usize)?.to_f64()?;
215 let upper = sorted.get(idx.ceil() as usize)?.to_f64()?;
216 let interpolated = lower + weight * (upper - lower);
217
218 NumCast::from(interpolated)
219 };
220
221 Some((
222 get_quantile(0.25)?,
223 get_quantile(0.50)?,
224 get_quantile(0.75)?,
225 ))
226}
227
228pub fn interquartile_range<T>(data: &[T]) -> Option<T>
243where
244 T: Num + NumCast + Copy + PartialOrd,
245{
246 if let Some((q1, _, q3)) = quartiles(data) {
247 Some(q3 - q1)
248 } else {
249 None
250 }
251}
252
253pub fn variance<T>(data: &[T]) -> f64
268where
269 T: Num + NumCast + Copy,
270{
271 if data.is_empty() || data.len() < 2 {
272 return 0.0;
273 }
274
275 let mu = mu(data).to_f64().unwrap_or_default();
276
277 let dev_sum: f64 = data
278 .iter()
279 .map(|x| {
280 let x = x.to_f64().unwrap_or_default();
281
282 (x - mu).powi(2)
283 })
284 .sum();
285
286 dev_sum / (data.len() as f64 - 1.0)
287}
288
289pub fn stdev<T>(data: &[T]) -> f64
304where
305 T: Num + NumCast + Copy,
306{
307 variance(data).sqrt()
308}
309
310pub fn z_score<T, F>(datapoint: T, mu: F, sigma: F) -> Option<f64>
327where
328 T: Num + NumCast + Copy,
329 F: Into<f64> + Copy,
330{
331 let x_f64 = datapoint.to_f64()?;
332 Some((x_f64 - mu.into()) / sigma.into())
333}
334
335pub fn z_scores<T>(data: &[T]) -> Option<Vec<f64>>
364where
365 T: Num + NumCast + Copy,
366{
367 if data.is_empty() {
368 return None;
369 }
370
371 let mu = mu(data).to_f64()?;
372 let sigma = stdev(data).to_f64()?;
373
374 data.iter().map(|x| z_score(*x, mu, sigma)).collect()
375}
376
377pub fn normalized_entropy<T>(data: &[T], n_bins: u8) -> Option<f64>
397where
398 T: Num + NumCast + Copy + PartialOrd,
399{
400 if data.is_empty() {
401 return None;
402 }
403
404 let x_min = min(data).and_then(|x| x.to_f64()).unwrap();
407 let x_max = max(data).and_then(|x| x.to_f64()).unwrap();
408
409 let factor = (n_bins as f64 - 1e-11) / ((x_max - x_min) + 1e-60);
416 let mut bin_counts = vec![0u32; n_bins as usize];
417 data.iter().for_each(|x| {
418 let k = (factor * (x.to_f64().unwrap() - x_min)) as usize;
419 bin_counts[k] += 1;
420 });
421
422 let entropy_sum = bin_counts
423 .iter()
424 .copied()
425 .fold(0.0, |entropy_sum, bin_count| {
426 if bin_count == 0 {
427 entropy_sum
428 } else {
429 let bin_probability = bin_count as f64 / data.len() as f64;
430 entropy_sum - (bin_probability * bin_probability.ln())
431 }
432 });
433
434 let relative_entropy = entropy_sum / (n_bins as f64).ln();
435
436 Some(relative_entropy)
437}
438
439#[cfg(test)]
440mod tests {
441 use super::*;
442
443 fn assert_close(a: f64, b: f64, eps: f64) {
444 assert!(
445 (a - b).abs() < eps,
446 "Expected {:.6}, got {:.6}, diff = {:.6}",
447 b,
448 a,
449 (a - b).abs()
450 );
451 }
452
453 #[test]
454 fn test_mu_basic() {
455 let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
456 assert_eq!(mu(&data), 3.0);
457 }
458
459 #[test]
460 fn test_mu_single_element() {
461 let data = vec![42];
462 assert_eq!(mu(&data), 42);
463 }
464
465 #[test]
466 fn test_mu_empty() {
467 let data: Vec<f64> = vec![];
468 let result = mu(&data);
469 assert_eq!(result, 0.0);
470 }
471
472 #[test]
473 fn test_mu_negative_numbers() {
474 let data = vec![-1.0, -2.0, -3.0];
475 assert_eq!(mu(&data), -2.0);
476 }
477
478 #[test]
479 fn test_mu_mixed_numbers() {
480 let data = vec![-2.0, 0.0, 2.0];
481 assert_eq!(mu(&data), 0.0);
482 }
483
484 #[test]
485 fn test_mu_bigger_sample_f64() {
486 let data: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
487 let result = mu(&data);
488 let expected = 5.5;
489 let epsilon = 1e-10;
490
491 assert!(
492 (result - expected).abs() < epsilon,
493 "Expected {}, got {}",
494 expected,
495 result
496 );
497 }
498
499 #[test]
500 fn test_mu_bigger_sample_integers() {
501 let data = vec![2, 4, 6, 8, 10, 12, 14, 16, 18, 20];
502 let result = mu(&data);
503 let expected = 11;
504 assert_eq!(result, expected);
505 }
506
507 #[test]
508 fn test_median_sorted_odd_length() {
509 let data = [1, 2, 3];
510 let result = median(&data);
511 assert_eq!(result, 2);
512 }
513
514 #[test]
515 fn test_median_sorted_even_length() {
516 let data = [1, 2, 3, 4];
517 let result = median(&data);
518 assert_eq!(result, (2 + 3) / 2);
519 }
520
521 #[test]
522 fn test_median_single_element() {
523 let data = [42];
524 let result = median(&data);
525 assert_eq!(result, 42);
526 }
527
528 #[test]
529 fn test_median_empty_slice() {
530 let data: [i32; 0] = [];
531 let result = median(&data);
532 assert_eq!(result, 0);
533 }
534
535 #[test]
536 fn test_median_floats() {
537 let data = [1.0, 2.0, 3.0, 4.0];
538 let result = median(&data);
539 let expected = (2.0 + 3.0) / 2.0;
540 assert_close(result, expected, 1e-6);
541 }
542
543 #[test]
544 fn test_median_unsorted_odd_length() {
545 let data = [2, 4, 3, 5, 1];
546 let result = median_unsorted(&data);
547 assert_eq!(result, 3);
548 }
549
550 #[test]
551 fn test_median_unsorted_even_length() {
552 let data = [7, 1, 5, 3];
553 let result = median_unsorted(&data);
554 assert_eq!(result, (3 + 5) / 2);
555 }
556
557 #[test]
558 fn test_median_unsorted_single_element() {
559 let data = [42];
560 let result = median_unsorted(&data);
561 assert_eq!(result, 42);
562 }
563
564 #[test]
565 fn test_median_unsorted_empty_slice() {
566 let data: [i32; 0] = [];
567 let result = median_unsorted(&data);
568 assert_eq!(result, 0);
569 }
570
571 #[test]
572 fn test_median_unsorted_floats() {
573 let data = [2.5, 3.5, 1.5, 4.5];
574 let result = median_unsorted(&data);
575 let expected = (2.5 + 3.5) / 2.0;
576 assert_close(result, expected, 1e-6);
577 }
578
579 #[test]
580 fn test_median_unsorted_duplicates() {
581 let data = [1, 2, 2, 2, 3];
582 let result = median_unsorted(&data);
583 assert_eq!(result, 2);
584 }
585
586 #[test]
587 fn test_stdev_basic_floats() {
588 let data = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
589 let expected = 2.138089935;
590 let result = stdev(&data);
591 assert_close(result, expected, 1e-6);
592 }
593
594 #[test]
595 fn test_stdev_integers() {
596 let data = [1, 2, 3, 4, 5];
597 let expected = 1.58113883;
598 let result = stdev(&data);
599 assert_close(result, expected, 1e-6);
600 }
601
602 #[test]
603 fn test_stdev_identical_values() {
604 let data = [42.0, 42.0, 42.0];
605 let result = stdev(&data);
606 assert_eq!(result, 0.0);
607 }
608
609 #[test]
610 fn test_stdev_single_element() {
611 let data = [99.0];
612 let result = stdev(&data);
613 assert_eq!(result, 0.0);
614 }
615
616 #[test]
617 fn test_stdev_empty() {
618 let data: [f64; 0] = [];
619 let result = stdev(&data);
620 assert_eq!(result, 0.0);
621 }
622
623 #[test]
624 fn test_variance_basic_integers() {
625 let data = [1, 2, 3, 4, 5];
626 let var = variance(&data);
627 assert_close(var, 2.5, 1e-6);
628 }
629
630 #[test]
631 fn test_variance_basic_floats() {
632 let data = [1.0, 2.0, 3.0, 4.0, 5.0];
633 let var = variance(&data);
634 assert_close(var, 2.5, 1e-6);
635 }
636
637 #[test]
638 fn test_variance_single_element() {
639 let data = [42];
640 let var = variance(&data);
641 assert_eq!(var, 0.0);
642 }
643
644 #[test]
645 fn test_variance_empty() {
646 let data: [f64; 0] = [];
647 let var = variance(&data);
648 assert_eq!(var, 0.0);
649 }
650
651 #[test]
652 fn test_variance_duplicates() {
653 let data = [3, 3, 3, 3];
654 let var = variance(&data);
655 assert_eq!(var, 0.0);
656 }
657
658 #[test]
659 fn test_variance_negative_numbers() {
660 let data = [-1, -2, -3, -4, -5];
661 let var = variance(&data);
662 assert_close(var, 2.5, 1e-6);
663 }
664
665 #[test]
666 fn test_min_with_integers() {
667 let data = [4, 2, 7, 1, 9];
668 assert_eq!(min(&data), Some(1));
669 }
670
671 #[test]
672 fn test_min_with_floats() {
673 let data = [3.5, 2.2, 5.1, 0.1, -4.7];
674 assert_eq!(min(&data), Some(-4.7));
675 }
676
677 #[test]
678 fn test_min_with_one_element() {
679 let data = [42];
680 assert_eq!(min(&data), Some(42));
681 }
682
683 #[test]
684 fn test_min_with_empty_slice() {
685 let data: [i32; 0] = [];
686 assert_eq!(min(&data), None);
687 }
688
689 #[test]
690 fn test_min_with_duplicates() {
691 let data = [5, 5, 5, 5];
692 assert_eq!(min(&data), Some(5));
693 }
694
695 #[test]
696 fn test_min_with_negatives() {
697 let data = [-10, -20, -5, -30];
698 assert_eq!(min(&data), Some(-30));
699 }
700
701 #[test]
702 fn test_max_with_integers() {
703 let data = [4, 2, 7, 1, 9];
704 assert_eq!(max(&data), Some(9));
705 }
706
707 #[test]
708 fn test_max_with_floats() {
709 let data = [3.5, 2.2, 5.1, 0.1, -4.7];
710 assert_eq!(max(&data), Some(5.1));
711 }
712
713 #[test]
714 fn test_max_with_one_element() {
715 let data = [42];
716 assert_eq!(max(&data), Some(42));
717 }
718
719 #[test]
720 fn test_max_with_empty_slice() {
721 let data: [i32; 0] = [];
722 assert_eq!(max(&data), None);
723 }
724
725 #[test]
726 fn test_max_with_duplicates() {
727 let data = [5, 5, 5, 5];
728 assert_eq!(max(&data), Some(5));
729 }
730
731 #[test]
732 fn test_max_with_negatives() {
733 let data = [-10, -20, -5, -30];
734 assert_eq!(max(&data), Some(-5));
735 }
736
737 #[test]
738 fn test_range_with_integers() {
739 let data = [4, 2, 7, 1, 9];
740 assert_eq!(range(&data), Some(8));
741 }
742
743 #[test]
744 fn test_range_with_floats() {
745 let data = [3.5, 2.2, 5.1, 0.1, -4.7];
746 let result = range(&data).unwrap();
747 assert_close(result, 9.8, 1e-10);
748 }
749
750 #[test]
751 fn test_range_with_one_element() {
752 let data = [42];
753 assert_eq!(range(&data), Some(0));
754 }
755
756 #[test]
757 fn test_range_with_empty_slice() {
758 let data: [i32; 0] = [];
759 assert_eq!(range(&data), None);
760 }
761
762 #[test]
763 fn test_range_with_duplicates() {
764 let data = [5, 5, 5, 5];
765 assert_eq!(range(&data), Some(0));
766 }
767
768 #[test]
769 fn test_range_with_negatives() {
770 let data = [-10, -20, -5, -30];
771 assert_eq!(range(&data), Some(25));
772 }
773
774 #[test]
775 fn test_quartiles_even_sized_data() {
776 let data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
777 let (q1, q2, q3) = quartiles(&data).unwrap();
778
779 assert_close(q1, 2.25, 1e-10);
780 assert_close(q2, 3.5, 1e-10);
781 assert_close(q3, 4.75, 1e-10);
782 }
783
784 #[test]
785 fn test_quartiles_odd_sized_data() {
786 let data = [10.0, 20.0, 30.0, 40.0, 50.0];
787 let (q1, q2, q3) = quartiles(&data).unwrap();
788
789 assert_eq!(q1, 20.0);
790 assert_eq!(q2, 30.0);
791 assert_eq!(q3, 40.0);
792 }
793
794 #[test]
795 fn test_quartiles_empty_input() {
796 let data: [f64; 0] = [];
797 assert_eq!(quartiles(&data), None);
798 }
799
800 #[test]
801 fn test_quartiles_nan_in_input() {
802 let data = [1.0, 2.0, f64::NAN, 4.0];
803 assert_eq!(quartiles(&data), None);
804 }
805
806 #[test]
807 fn test_quartiles_all_same_values() {
808 let data = [42.0, 42.0, 42.0, 42.0, 42.0];
809 let (q1, q2, q3) = quartiles(&data).unwrap();
810 assert_eq!(q1, 42.0);
811 assert_eq!(q2, 42.0);
812 assert_eq!(q3, 42.0);
813 }
814
815 #[test]
816 fn test_quartiles_unsorted_input() {
817 let data = [9.0, 3.0, 1.0, 10.0, 5.0, 6.0, 2.0, 4.0, 8.0, 7.0];
818 let (q1, q2, q3) = quartiles(&data).unwrap();
819
820 assert_close(q1, 3.25, 1e-10);
821 assert_close(q2, 5.5, 1e-10);
822 assert_close(q3, 7.75, 1e-10);
823 }
824
825 #[test]
826 fn test_iqr_empty() {
827 let data: [f64; 0] = [];
828 assert_eq!(interquartile_range(&data), None);
829 }
830
831 #[test]
832 fn test_iqr_single_element() {
833 let data = [42.0];
834 assert_eq!(interquartile_range(&data), Some(0.0));
835 }
836
837 #[test]
838 fn test_iqr_even_length() {
839 let data = [1.0, 2.0, 3.0, 4.0];
840 let result = interquartile_range(&data);
841 assert_eq!(result, Some(1.5));
842 }
843
844 #[test]
845 fn test_iqr_odd_length() {
846 let data = [
847 7.0, 15.0, 36.0, 39.0, 40.0, 41.0, 42.0, 43.0, 47.0, 49.0, 50.0,
848 ];
849 let result = interquartile_range(&data);
850 assert_eq!(result, Some(7.5));
851 }
852
853 #[test]
854 fn test_iqr_unsorted() {
855 let data = [5.0, 1.0, 3.0, 2.0, 4.0];
856 let result = interquartile_range(&data);
857 assert_eq!(result, Some(2.0));
858 }
859
860 #[test]
861 fn test_iqr_integers() {
862 let data = [10, 20, 30, 40, 50];
863 let result = interquartile_range(&data);
864 assert_eq!(result, Some(20));
865 }
866
867 #[test]
868 fn test_z_score_f64_inputs() {
869 let z = z_score(10.0f64, 5.0f64, 2.0f64);
870 assert_eq!(z, Some(2.5));
871 }
872
873 #[test]
874 fn test_z_score_f32_inputs() {
875 let z = z_score(10.0f32, 5.0f32, 2.0f32);
876 assert_eq!(z, Some(2.5));
877 }
878
879 #[test]
880 fn test_z_score_integer_datapoint() {
881 let z = z_score(10i32, 5.0f64, 2.0f64);
882 assert_eq!(z, Some(2.5));
883 }
884
885 #[test]
886 fn test_z_score_zero_sigma() {
887 let z = z_score(10.0f64, 5.0f64, 0.0f64);
888 assert!(z.unwrap().is_infinite());
889 }
890
891 #[test]
892 fn test_z_scores_precise() {
893 let data = [1.0, 2.0, 3.0, 4.0, 5.0];
894
895 let stdev = 2.5f64.sqrt();
896 let expected = [
897 (1.0 - 3.0) / stdev,
898 (2.0 - 3.0) / stdev,
899 (3.0 - 3.0) / stdev,
900 (4.0 - 3.0) / stdev,
901 (5.0 - 3.0) / stdev,
902 ];
903
904 let result = z_scores(&data).unwrap();
905
906 for (actual, expected) in result.into_iter().zip(expected.into_iter()) {
907 assert_close(actual, expected, 1e-10);
908 }
909 }
910
911 #[test]
912 fn test_z_scores_symmetric_centered() {
913 let data = [-2.0, -1.0, 0.0, 1.0, 2.0];
914
915 let stdev = 2.5f64.sqrt();
916 let expected = [-2.0 / stdev, -1.0 / stdev, 0.0, 1.0 / stdev, 2.0 / stdev];
917
918 let result = z_scores(&data).unwrap();
919
920 for (actual, expected) in result.into_iter().zip(expected.into_iter()) {
921 assert_close(actual, expected, 1e-10);
922 }
923 }
924
925 #[test]
926 fn test_entropy_all_same() {
927 let data = [1.0; 100];
928 let entropy = normalized_entropy(&data, 10);
929 assert_eq!(entropy, Some(0.0));
930 }
931
932 #[test]
933 fn test_entropy_uniform_distribution() {
934 let data: Vec<f64> = (0..100).map(|x| x as f64).collect();
935 let entropy = normalized_entropy(&data, 10);
936 assert!(entropy.unwrap() > 0.95);
937 }
938
939 #[test]
940 fn test_entropy_random_distribution() {
941 let data = [1.0, 2.0, 2.0, 3.0, 4.0, 5.0, 1.0, 4.0, 3.0, 2.0];
942 let entropy = normalized_entropy(&data, 5);
943 assert!(entropy.unwrap() > 0.5 && entropy.unwrap() < 1.0);
944 }
945
946 #[test]
947 fn test_entropy_single_value() {
948 let data = [42.0];
949 let entropy = normalized_entropy(&data, 5);
950 assert_eq!(entropy, Some(0.0));
951 }
952
953 #[test]
954 fn test_entropy_empty_slice() {
955 let data: [f64; 0] = [];
956 let entropy = normalized_entropy(&data, 5);
957 assert_eq!(entropy, None);
958 }
959
960 #[test]
961 fn test_entropy_maximum_when_even_bins() {
962 let data = [0.0, 1.0, 2.0, 3.0];
963 let entropy = normalized_entropy(&data, 4);
964 assert!((entropy.unwrap() - 1.0).abs() < 1e-6);
965 }
966}