1pub mod covariance;
28pub mod hypothesis;
29
30pub use covariance::{corr, corr_matrix, cov, cov_matrix};
31pub use hypothesis::{
32 chisquare, f_oneway, ttest_1samp, ttest_ind, ttest_rel, AnovaResult, ChiSquareResult,
33 TTestResult,
34};
35
36use trueno::Vector;
37
38#[derive(Debug)]
43pub struct DescriptiveStats<'a> {
44 data: &'a Vector<f32>,
45}
46
47#[derive(Debug, Clone, PartialEq)]
51pub struct FiveNumberSummary {
52 pub min: f32,
53 pub q1: f32,
54 pub median: f32,
55 pub q3: f32,
56 pub max: f32,
57}
58
59#[derive(Debug, Clone, PartialEq)]
61pub struct Histogram {
62 pub bins: Vec<f32>,
64 pub counts: Vec<usize>,
66 pub density: Option<Vec<f64>>,
68}
69
70#[derive(Debug, Clone, Copy, PartialEq)]
79pub enum BinMethod {
80 FreedmanDiaconis,
81 Sturges,
82 Scott,
83 SquareRoot,
84 Bayesian,
85}
86
87impl<'a> DescriptiveStats<'a> {
88 #[must_use]
102 pub fn new(data: &'a Vector<f32>) -> Self {
103 Self { data }
104 }
105
106 pub fn quantile(&self, q: f64) -> Result<f32, String> {
140 if self.data.is_empty() {
142 return Err("Cannot compute quantile of empty vector".to_string());
143 }
144 if !(0.0..=1.0).contains(&q) {
145 return Err(format!("Quantile must be in [0, 1], got {q}"));
146 }
147
148 let n = self.data.len();
149
150 if n == 1 {
152 return Ok(self.data.as_slice()[0]);
153 }
154
155 let h = (n - 1) as f64 * q;
158 let h_floor = h.floor() as usize;
159 let h_ceil = h.ceil() as usize;
160
161 let mut working_copy = self.data.as_slice().to_vec();
164
165 if h_floor == h_ceil {
167 working_copy.select_nth_unstable_by(h_floor, |a, b| {
169 a.partial_cmp(b)
170 .expect("f32 values should be comparable (not NaN)")
171 });
172 return Ok(working_copy[h_floor]);
173 }
174
175 working_copy.select_nth_unstable_by(h_floor, |a, b| {
178 a.partial_cmp(b)
179 .expect("f32 values should be comparable (not NaN)")
180 });
181 let lower = working_copy[h_floor];
182
183 working_copy.select_nth_unstable_by(h_ceil, |a, b| {
187 a.partial_cmp(b)
188 .expect("f32 values should be comparable (not NaN)")
189 });
190 let upper = working_copy[h_ceil];
191
192 let fraction = h - h_floor as f64;
194 let result = lower + (fraction as f32) * (upper - lower);
195
196 Ok(result)
197 }
198
199 pub fn percentiles(&self, percentiles: &[f64]) -> Result<Vec<f32>, String> {
221 if self.data.is_empty() {
223 return Err("Cannot compute percentiles of empty vector".to_string());
224 }
225 for &p in percentiles {
226 if !(0.0..=100.0).contains(&p) {
227 return Err(format!("Percentile must be in [0, 100], got {p}"));
228 }
229 }
230
231 let mut sorted = self.data.as_slice().to_vec();
233 sorted.sort_by(|a, b| {
234 a.partial_cmp(b)
235 .expect("f32 values should be comparable (not NaN)")
236 });
237
238 let n = sorted.len();
239 let mut results = Vec::with_capacity(percentiles.len());
240
241 for &p in percentiles {
242 let q = p / 100.0;
243 let h = (n - 1) as f64 * q;
244 let h_floor = h.floor() as usize;
245 let h_ceil = h.ceil() as usize;
246
247 let value = if h_floor == h_ceil {
248 sorted[h_floor]
249 } else {
250 let fraction = h - h_floor as f64;
251 sorted[h_floor] + (fraction as f32) * (sorted[h_ceil] - sorted[h_floor])
252 };
253
254 results.push(value);
255 }
256
257 Ok(results)
258 }
259
260 pub fn five_number_summary(&self) -> Result<FiveNumberSummary, String> {
280 if self.data.is_empty() {
281 return Err("Cannot compute summary of empty vector".to_string());
282 }
283
284 let values = self.percentiles(&[0.0, 25.0, 50.0, 75.0, 100.0])?;
286
287 Ok(FiveNumberSummary {
288 min: values[0],
289 q1: values[1],
290 median: values[2],
291 q3: values[3],
292 max: values[4],
293 })
294 }
295
296 pub fn iqr(&self) -> Result<f32, String> {
311 let summary = self.five_number_summary()?;
312 Ok(summary.q3 - summary.q1)
313 }
314
315 pub fn histogram_auto(&self) -> Result<Histogram, String> {
331 self.histogram_method(BinMethod::FreedmanDiaconis)
332 }
333
334 fn bayesian_blocks_edges(&self) -> Result<Vec<f32>, String> {
342 if self.data.is_empty() {
343 return Err("Cannot compute Bayesian Blocks on empty data".to_string());
344 }
345
346 let n = self.data.len();
347
348 if n == 1 {
350 let val = self.data.as_slice()[0];
351 return Ok(vec![val - 0.5, val + 0.5]);
352 }
353
354 let mut sorted_data: Vec<f32> = self.data.as_slice().to_vec();
356 sorted_data.sort_by(|a, b| {
357 a.partial_cmp(b)
358 .expect("f32 values should be comparable (not NaN)")
359 });
360
361 if sorted_data[0] == sorted_data[n - 1] {
363 let val = sorted_data[0];
364 return Ok(vec![val - 0.5, val + 0.5]);
365 }
366
367 let ncp_prior = 0.5_f32; let mut best_fitness = vec![0.0_f32; n];
374 let mut last_change_point = vec![0_usize; n];
375
376 best_fitness[0] = 0.0;
378
379 for r in 1..n {
381 let mut max_fitness = f32::NEG_INFINITY;
383 let mut best_cp = 0;
384
385 for l in 0..=r {
386 let block_count = (r - l + 1) as f32;
388
389 let block_values: Vec<f32> = sorted_data[l..=r].to_vec();
392
393 let block_min = block_values[0];
395 let block_max = block_values[block_values.len() - 1];
396 let block_range = (block_max - block_min).max(1e-10);
397
398 let density_score = -block_range / block_count.sqrt();
401
402 let fitness = if l == 0 {
404 density_score - ncp_prior
405 } else {
406 best_fitness[l - 1] + density_score - ncp_prior
407 };
408
409 if fitness > max_fitness {
410 max_fitness = fitness;
411 best_cp = l;
412 }
413 }
414
415 best_fitness[r] = max_fitness;
416 last_change_point[r] = best_cp;
417 }
418
419 let mut change_points = Vec::new();
421 let mut current = n - 1;
422
423 while current > 0 {
424 let cp = last_change_point[current];
425 if cp > 0 {
426 change_points.push(cp);
427 }
428 if cp == 0 {
429 break;
430 }
431 current = cp - 1;
432 }
433
434 change_points.reverse();
435
436 let mut edges = Vec::new();
438
439 let data_min = sorted_data[0];
441 let data_max = sorted_data[n - 1];
442 let range = data_max - data_min;
443 let margin = range * 0.001; edges.push(data_min - margin);
445
446 for &cp in &change_points {
448 if cp > 0 && cp < n {
449 let edge = (sorted_data[cp - 1] + sorted_data[cp]) / 2.0;
450 edges.push(edge);
451 }
452 }
453
454 edges.push(data_max + margin);
456
457 edges.dedup();
459 edges.sort_by(|a, b| {
460 a.partial_cmp(b)
461 .expect("f32 values should be comparable (not NaN)")
462 });
463
464 let mut i = 1;
466 while i < edges.len() {
467 if edges[i] <= edges[i - 1] {
468 edges.remove(i);
469 } else {
470 i += 1;
471 }
472 }
473
474 if edges.len() < 2 {
476 return Ok(vec![data_min - margin, data_max + margin]);
477 }
478
479 Ok(edges)
480 }
481
482 pub fn histogram_method(&self, method: BinMethod) -> Result<Histogram, String> {
497 if self.data.is_empty() {
498 return Err("Cannot compute histogram of empty vector".to_string());
499 }
500
501 let n = self.data.len();
502 let n_bins = match method {
503 BinMethod::FreedmanDiaconis => {
504 let iqr = self.iqr()?;
506 if iqr == 0.0 {
507 return Err("IQR is zero, cannot use Freedman-Diaconis rule".to_string());
508 }
509 let bin_width = 2.0 * iqr * (n as f32).powf(-1.0 / 3.0);
510 let data_min = self.data.min().map_err(|e| e.to_string())?;
511 let data_max = self.data.max().map_err(|e| e.to_string())?;
512 let range = data_max - data_min;
513 let n_bins = (range / bin_width).ceil() as usize;
514 n_bins.max(1) }
516 BinMethod::Sturges => {
517 ((n as f64).log2().ceil() as usize + 1).max(1)
519 }
520 BinMethod::Scott => {
521 let std = self.data.stddev().map_err(|e| e.to_string())?;
523 if std == 0.0 {
524 return Err("Standard deviation is zero, cannot use Scott rule".to_string());
525 }
526 let bin_width = 3.5 * std * (n as f32).powf(-1.0 / 3.0);
527 let data_min = self.data.min().map_err(|e| e.to_string())?;
528 let data_max = self.data.max().map_err(|e| e.to_string())?;
529 let range = data_max - data_min;
530 let n_bins = (range / bin_width).ceil() as usize;
531 n_bins.max(1)
532 }
533 BinMethod::SquareRoot => {
534 ((n as f64).sqrt().ceil() as usize).max(1)
536 }
537 BinMethod::Bayesian => {
538 let edges = self.bayesian_blocks_edges()?;
540 return self.histogram_edges(&edges);
541 }
542 };
543
544 self.histogram(n_bins)
545 }
546
547 pub fn histogram(&self, n_bins: usize) -> Result<Histogram, String> {
564 if self.data.is_empty() {
565 return Err("Cannot compute histogram of empty vector".to_string());
566 }
567 if n_bins == 0 {
568 return Err("Number of bins must be at least 1".to_string());
569 }
570
571 let data_min = self.data.min().map_err(|e| e.to_string())?;
572 let data_max = self.data.max().map_err(|e| e.to_string())?;
573
574 if data_min == data_max {
576 return Ok(Histogram {
577 bins: vec![data_min, data_max],
578 counts: vec![self.data.len()],
579 density: None,
580 });
581 }
582
583 let range = data_max - data_min;
585 let bin_width = range / n_bins as f32;
586 let mut bins = Vec::with_capacity(n_bins + 1);
587 for i in 0..=n_bins {
588 bins.push(data_min + i as f32 * bin_width);
589 }
590
591 let mut counts = vec![0usize; n_bins];
593 for &value in self.data.as_slice() {
594 let mut bin_idx = ((value - data_min) / bin_width) as usize;
596 if bin_idx >= n_bins {
598 bin_idx = n_bins - 1;
599 }
600 counts[bin_idx] += 1;
601 }
602
603 Ok(Histogram {
604 bins,
605 counts,
606 density: None,
607 })
608 }
609
610 pub fn histogram_edges(&self, edges: &[f32]) -> Result<Histogram, String> {
627 if self.data.is_empty() {
628 return Err("Cannot compute histogram of empty vector".to_string());
629 }
630 if edges.len() < 2 {
631 return Err("Must have at least 2 bin edges".to_string());
632 }
633
634 for i in 1..edges.len() {
636 if edges[i] <= edges[i - 1] {
637 return Err("Bin edges must be strictly increasing".to_string());
638 }
639 }
640
641 let n_bins = edges.len() - 1;
642 let mut counts = vec![0usize; n_bins];
643
644 for &value in self.data.as_slice() {
645 if value < edges[0] || value > edges[n_bins] {
647 continue;
649 }
650
651 let mut bin_idx = None;
654 for i in 0..(n_bins - 1) {
655 if value >= edges[i] && value < edges[i + 1] {
656 bin_idx = Some(i);
657 break;
658 }
659 }
660
661 if bin_idx.is_none() && value >= edges[n_bins - 1] && value <= edges[n_bins] {
663 bin_idx = Some(n_bins - 1);
664 }
665
666 if let Some(idx) = bin_idx {
667 counts[idx] += 1;
668 }
669 }
670
671 Ok(Histogram {
672 bins: edges.to_vec(),
673 counts,
674 density: None,
675 })
676 }
677}
678
679#[cfg(test)]
680mod tests {
681 use super::*;
682 use trueno::Vector;
683
684 #[test]
685 fn test_quantile_empty() {
686 let v = Vector::from_slice(&[]);
687 let stats = DescriptiveStats::new(&v);
688 assert!(stats.quantile(0.5).is_err());
689 }
690
691 #[test]
692 fn test_quantile_single_element() {
693 let v = Vector::from_slice(&[42.0]);
694 let stats = DescriptiveStats::new(&v);
695 assert_eq!(
696 stats
697 .quantile(0.0)
698 .expect("quantile should succeed for single element"),
699 42.0
700 );
701 assert_eq!(
702 stats
703 .quantile(0.5)
704 .expect("quantile should succeed for single element"),
705 42.0
706 );
707 assert_eq!(
708 stats
709 .quantile(1.0)
710 .expect("quantile should succeed for single element"),
711 42.0
712 );
713 }
714
715 #[test]
716 fn test_quantile_two_elements() {
717 let v = Vector::from_slice(&[1.0, 2.0]);
718 let stats = DescriptiveStats::new(&v);
719 assert_eq!(
720 stats
721 .quantile(0.0)
722 .expect("quantile should succeed for two elements"),
723 1.0
724 );
725 assert_eq!(
726 stats
727 .quantile(0.5)
728 .expect("quantile should succeed for two elements"),
729 1.5
730 );
731 assert_eq!(
732 stats
733 .quantile(1.0)
734 .expect("quantile should succeed for two elements"),
735 2.0
736 );
737 }
738
739 #[test]
740 fn test_quantile_odd_length() {
741 let v = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0]);
742 let stats = DescriptiveStats::new(&v);
743 assert_eq!(
744 stats
745 .quantile(0.5)
746 .expect("quantile should succeed for odd length data"),
747 3.0
748 ); }
750
751 #[test]
752 fn test_quantile_even_length() {
753 let v = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0]);
754 let stats = DescriptiveStats::new(&v);
755 assert_eq!(
756 stats
757 .quantile(0.5)
758 .expect("quantile should succeed for even length data"),
759 2.5
760 ); }
762
763 #[test]
764 fn test_quantile_edge_cases() {
765 let v = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0]);
766 let stats = DescriptiveStats::new(&v);
767 assert_eq!(
768 stats.quantile(0.0).expect("min quantile should succeed"),
769 1.0
770 ); assert_eq!(
772 stats.quantile(1.0).expect("max quantile should succeed"),
773 5.0
774 ); }
776
777 #[test]
778 fn test_quantile_invalid() {
779 let v = Vector::from_slice(&[1.0, 2.0, 3.0]);
780 let stats = DescriptiveStats::new(&v);
781 assert!(stats.quantile(-0.1).is_err());
782 assert!(stats.quantile(1.1).is_err());
783 }
784
785 #[test]
786 fn test_percentiles() {
787 let v = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0]);
788 let stats = DescriptiveStats::new(&v);
789 let p = stats
790 .percentiles(&[25.0, 50.0, 75.0])
791 .expect("percentiles should succeed for valid inputs");
792 assert_eq!(p, vec![2.0, 3.0, 4.0]);
793 }
794
795 #[test]
796 fn test_five_number_summary() {
797 let v = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0]);
798 let stats = DescriptiveStats::new(&v);
799 let summary = stats
800 .five_number_summary()
801 .expect("five-number summary should succeed for valid data");
802
803 assert_eq!(summary.min, 1.0);
804 assert_eq!(summary.q1, 2.0);
805 assert_eq!(summary.median, 3.0);
806 assert_eq!(summary.q3, 4.0);
807 assert_eq!(summary.max, 5.0);
808 }
809
810 #[test]
811 fn test_iqr() {
812 let v = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0]);
813 let stats = DescriptiveStats::new(&v);
814 assert_eq!(stats.iqr().expect("IQR should succeed for valid data"), 2.0);
815 }
816
817 #[test]
820 fn test_histogram_empty() {
821 let v = Vector::from_slice(&[]);
822 let stats = DescriptiveStats::new(&v);
823 assert!(stats.histogram(3).is_err());
824 }
825
826 #[test]
827 fn test_histogram_zero_bins() {
828 let v = Vector::from_slice(&[1.0, 2.0, 3.0]);
829 let stats = DescriptiveStats::new(&v);
830 assert!(stats.histogram(0).is_err());
831 }
832
833 #[test]
834 fn test_histogram_fixed_bins() {
835 let v = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0]);
836 let stats = DescriptiveStats::new(&v);
837 let hist = stats
838 .histogram(3)
839 .expect("histogram should succeed for valid inputs");
840
841 assert_eq!(hist.bins.len(), 4); assert_eq!(hist.counts.len(), 3);
843 assert_eq!(hist.counts.iter().sum::<usize>(), 5); }
845
846 #[test]
847 fn test_histogram_uniform_distribution() {
848 let v = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]);
850 let stats = DescriptiveStats::new(&v);
851 let hist = stats
852 .histogram(5)
853 .expect("histogram should succeed for uniform distribution");
854
855 assert_eq!(hist.bins.len(), 6);
856 assert_eq!(hist.counts.len(), 5);
857 for count in hist.counts {
859 assert_eq!(count, 2);
860 }
861 }
862
863 #[test]
864 fn test_histogram_all_same_value() {
865 let v = Vector::from_slice(&[5.0, 5.0, 5.0, 5.0]);
866 let stats = DescriptiveStats::new(&v);
867 let hist = stats
868 .histogram(3)
869 .expect("histogram should succeed for constant data");
870
871 assert_eq!(hist.bins.len(), 2);
872 assert_eq!(hist.counts.len(), 1);
873 assert_eq!(hist.counts[0], 4);
874 }
875
876 #[test]
877 fn test_histogram_sturges() {
878 let v = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);
879 let stats = DescriptiveStats::new(&v);
880 let hist = stats
881 .histogram_method(BinMethod::Sturges)
882 .expect("histogram with Sturges method should succeed");
883
884 assert_eq!(hist.bins.len(), 5);
886 assert_eq!(hist.counts.len(), 4);
887 assert_eq!(hist.counts.iter().sum::<usize>(), 8);
888 }
889
890 #[test]
891 fn test_histogram_square_root() {
892 let v = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]);
893 let stats = DescriptiveStats::new(&v);
894 let hist = stats
895 .histogram_method(BinMethod::SquareRoot)
896 .expect("histogram with SquareRoot method should succeed");
897
898 assert_eq!(hist.bins.len(), 4);
900 assert_eq!(hist.counts.len(), 3);
901 assert_eq!(hist.counts.iter().sum::<usize>(), 9);
902 }
903
904 #[test]
905 fn test_histogram_freedman_diaconis() {
906 let v = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]);
907 let stats = DescriptiveStats::new(&v);
908 let hist = stats
909 .histogram_method(BinMethod::FreedmanDiaconis)
910 .expect("histogram with FreedmanDiaconis method should succeed");
911
912 assert!(hist.bins.len() >= 2);
914 assert_eq!(hist.bins.len(), hist.counts.len() + 1);
915 assert_eq!(hist.counts.iter().sum::<usize>(), 10);
916 }
917
918 #[test]
919 fn test_histogram_auto() {
920 let v = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0]);
921 let stats = DescriptiveStats::new(&v);
922 let hist = stats
923 .histogram_auto()
924 .expect("auto histogram should succeed");
925
926 assert!(hist.bins.len() >= 2);
928 assert_eq!(hist.bins.len(), hist.counts.len() + 1);
929 }
930
931 #[test]
932 fn test_histogram_edges_custom() {
933 let v = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0]);
934 let stats = DescriptiveStats::new(&v);
935 let hist = stats
936 .histogram_edges(&[0.0, 2.5, 5.0, 10.0])
937 .expect("histogram with custom edges should succeed");
938
939 assert_eq!(hist.bins.len(), 4);
940 assert_eq!(hist.counts.len(), 3);
941 assert_eq!(hist.counts[0], 2);
946 assert_eq!(hist.counts[1], 2);
947 assert_eq!(hist.counts[2], 1);
948 }
949
950 #[test]
951 fn test_histogram_edges_invalid() {
952 let v = Vector::from_slice(&[1.0, 2.0, 3.0]);
953 let stats = DescriptiveStats::new(&v);
954
955 assert!(stats.histogram_edges(&[1.0]).is_err());
957
958 assert!(stats.histogram_edges(&[5.0, 1.0, 10.0]).is_err());
960
961 assert!(stats.histogram_edges(&[1.0, 5.0, 5.0, 10.0]).is_err());
963 }
964
965 #[test]
967 fn test_histogram_bayesian_basic() {
968 let v = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]);
970 let stats = DescriptiveStats::new(&v);
971 let hist = stats
972 .histogram_method(BinMethod::Bayesian)
973 .expect("Bayesian histogram should succeed");
974
975 assert!(hist.bins.len() >= 2);
977 assert_eq!(hist.bins.len(), hist.counts.len() + 1);
978
979 for i in 1..hist.bins.len() {
981 assert!(hist.bins[i] > hist.bins[i - 1]);
982 }
983
984 let total: usize = hist.counts.iter().sum();
986 assert_eq!(total, 10);
987 }
988
989 #[test]
990 fn test_histogram_bayesian_uniform_data() {
991 let v = Vector::from_slice(&[
993 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
994 17.0, 18.0, 19.0, 20.0,
995 ]);
996 let stats = DescriptiveStats::new(&v);
997 let hist = stats
998 .histogram_method(BinMethod::Bayesian)
999 .expect("Bayesian histogram should succeed for uniform data");
1000
1001 assert!(hist.bins.len() <= 10); assert_eq!(hist.bins.len(), hist.counts.len() + 1);
1004 }
1005
1006 #[test]
1007 fn test_histogram_bayesian_change_point_detection() {
1008 let v = Vector::from_slice(&[
1010 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 9.0, 9.1, 9.2, 9.3, 9.4, 9.5, 9.6, 9.7, 9.8, 9.9, 10.0,
1013 ]);
1014 let stats = DescriptiveStats::new(&v);
1015 let hist = stats
1016 .histogram_method(BinMethod::Bayesian)
1017 .expect("Bayesian histogram should succeed for clustered data");
1018
1019 assert!(hist.bins.len() >= 3);
1022
1023 assert!(hist.bins[0] <= 1.0);
1025 assert!(
1026 *hist
1027 .bins
1028 .last()
1029 .expect("histogram should have at least one bin edge")
1030 >= 10.0
1031 );
1032 }
1033
1034 #[test]
1035 fn test_histogram_bayesian_small_dataset() {
1036 let v = Vector::from_slice(&[1.0, 2.0, 3.0]);
1038 let stats = DescriptiveStats::new(&v);
1039 let hist = stats
1040 .histogram_method(BinMethod::Bayesian)
1041 .expect("Bayesian histogram should succeed for small dataset");
1042
1043 assert!(hist.bins.len() >= 2);
1044 assert_eq!(hist.bins.len(), hist.counts.len() + 1);
1045
1046 let total: usize = hist.counts.iter().sum();
1047 assert_eq!(total, 3);
1048 }
1049
1050 #[test]
1051 fn test_histogram_bayesian_reproducibility() {
1052 let v = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 11.0, 12.0]);
1054 let stats = DescriptiveStats::new(&v);
1055
1056 let hist1 = stats
1057 .histogram_method(BinMethod::Bayesian)
1058 .expect("first Bayesian histogram should succeed");
1059 let hist2 = stats
1060 .histogram_method(BinMethod::Bayesian)
1061 .expect("second Bayesian histogram should succeed");
1062
1063 assert_eq!(hist1.bins.len(), hist2.bins.len());
1065 for (b1, b2) in hist1.bins.iter().zip(hist2.bins.iter()) {
1066 assert!((b1 - b2).abs() < 1e-6);
1067 }
1068 assert_eq!(hist1.counts, hist2.counts);
1069 }
1070
1071 #[test]
1072 fn test_histogram_bayesian_single_value() {
1073 let v = Vector::from_slice(&[5.0, 5.0, 5.0, 5.0, 5.0]);
1075 let stats = DescriptiveStats::new(&v);
1076 let hist = stats
1077 .histogram_method(BinMethod::Bayesian)
1078 .expect("Bayesian histogram should succeed for constant data");
1079
1080 assert!(hist.bins.len() >= 2); assert_eq!(hist.bins.len(), hist.counts.len() + 1);
1083
1084 let total: usize = hist.counts.iter().sum();
1086 assert_eq!(total, 5);
1087 }
1088
1089 #[test]
1090 fn test_histogram_bayesian_vs_fixed_width() {
1091 let v = Vector::from_slice(&[
1094 1.0, 1.5, 2.0, 2.5, 3.0, 10.0, 15.0, 20.0, 30.0, 30.5, 31.0, 31.5, 32.0, ]);
1098 let stats = DescriptiveStats::new(&v);
1099
1100 let hist_bayesian = stats
1101 .histogram_method(BinMethod::Bayesian)
1102 .expect("Bayesian histogram should succeed");
1103 let hist_sturges = stats
1104 .histogram_method(BinMethod::Sturges)
1105 .expect("Sturges histogram should succeed");
1106
1107 assert!(hist_bayesian.bins.len() >= 2);
1109 assert!(hist_sturges.bins.len() >= 2);
1110
1111 assert_eq!(hist_bayesian.bins.len(), hist_bayesian.counts.len() + 1);
1114 }
1115
1116 #[test]
1117 fn test_histogram_bayesian_large_dataset() {
1118 let mut data = Vec::new();
1120 for i in 0..50 {
1121 data.push(i as f32 / 10.0);
1122 }
1123 let v = Vector::from_slice(&data);
1124 let stats = DescriptiveStats::new(&v);
1125
1126 let hist = stats
1127 .histogram_method(BinMethod::Bayesian)
1128 .expect("Bayesian histogram should succeed for large dataset");
1129
1130 assert!(hist.bins.len() >= 2);
1132 assert_eq!(hist.bins.len(), hist.counts.len() + 1);
1133
1134 let total: usize = hist.counts.iter().sum();
1135 assert_eq!(total, 50);
1136 }
1137}