1use u_numflow::special;
21use u_numflow::stats;
22
23pub fn ecdf(data: &[f64]) -> Option<(Vec<f64>, Vec<f64>)> {
44 if data.is_empty() || data.iter().any(|v| !v.is_finite()) {
45 return None;
46 }
47
48 let n = data.len() as f64;
49 let mut sorted: Vec<f64> = data.to_vec();
50 sorted.sort_by(|a, b| a.partial_cmp(b).expect("finite values"));
51
52 let mut values = Vec::new();
53 let mut probs = Vec::new();
54
55 let mut i = 0;
56 while i < sorted.len() {
57 let val = sorted[i];
58 let mut j = i;
60 while j < sorted.len() && (sorted[j] - val).abs() < 1e-300 {
61 j += 1;
62 }
63 values.push(val);
64 probs.push(j as f64 / n);
65 i = j;
66 }
67
68 Some((values, probs))
69}
70
71#[derive(Debug, Clone, Copy)]
77pub enum BinMethod {
78 Sturges,
80 Scott,
82 FreedmanDiaconis,
84 Fixed(usize),
86}
87
88#[derive(Debug, Clone)]
90pub struct HistogramBins {
91 pub n_bins: usize,
93 pub bin_width: f64,
95 pub edges: Vec<f64>,
97 pub counts: Vec<usize>,
99}
100
101pub fn histogram_bins(data: &[f64], method: BinMethod) -> Option<HistogramBins> {
119 let n = data.len();
120 if n < 2 || data.iter().any(|v| !v.is_finite()) {
121 return None;
122 }
123
124 let min_val = data.iter().cloned().reduce(f64::min)?;
125 let max_val = data.iter().cloned().reduce(f64::max)?;
126 let range = max_val - min_val;
127
128 if range < 1e-300 {
129 return None; }
131
132 let nf = n as f64;
133
134 let n_bins = match method {
135 BinMethod::Sturges => {
136 let k = (nf.log2()).ceil() as usize + 1;
137 k.max(2)
138 }
139 BinMethod::Scott => {
140 let sd = stats::std_dev(data)?;
141 if sd < 1e-300 {
142 return None;
143 }
144 let h = 3.49 * sd * nf.powf(-1.0 / 3.0);
145 ((range / h).ceil() as usize).max(2)
146 }
147 BinMethod::FreedmanDiaconis => {
148 let q1 = stats::quantile(data, 0.25)?;
149 let q3 = stats::quantile(data, 0.75)?;
150 let iqr = q3 - q1;
151 if iqr < 1e-300 {
152 let k = (nf.log2()).ceil() as usize + 1;
154 k.max(2)
155 } else {
156 let h = 2.0 * iqr * nf.powf(-1.0 / 3.0);
157 ((range / h).ceil() as usize).max(2)
158 }
159 }
160 BinMethod::Fixed(0) => return None,
163 BinMethod::Fixed(k) => k,
164 };
165
166 let bin_width = range / n_bins as f64;
167
168 let mut edges = Vec::with_capacity(n_bins + 1);
170 for i in 0..=n_bins {
171 edges.push(min_val + i as f64 * bin_width);
172 }
173
174 let mut counts = vec![0_usize; n_bins];
176 for &x in data {
177 let bin = ((x - min_val) / bin_width).floor() as usize;
178 let bin = bin.min(n_bins - 1); counts[bin] += 1;
180 }
181
182 Some(HistogramBins {
183 n_bins,
184 bin_width,
185 edges,
186 counts,
187 })
188}
189
190pub fn qq_plot_normal(data: &[f64]) -> Option<(Vec<f64>, Vec<f64>)> {
217 let n = data.len();
218 if n < 3 || data.iter().any(|v| !v.is_finite()) {
219 return None;
220 }
221
222 let mut sample: Vec<f64> = data.to_vec();
223 sample.sort_by(|a, b| a.partial_cmp(b).expect("finite values"));
224
225 let nf = n as f64;
226 let theoretical: Vec<f64> = (0..n)
227 .map(|i| {
228 let p = (i as f64 + 0.5) / nf;
229 special::inverse_normal_cdf(p)
230 })
231 .collect();
232
233 Some((theoretical, sample))
234}
235
236pub fn ks_test_normal(data: &[f64]) -> Option<(f64, f64)> {
262 let n = data.len();
263 if n < 5 || data.iter().any(|v| !v.is_finite()) {
264 return None;
265 }
266
267 let mean = stats::mean(data)?;
268 let sd = stats::std_dev(data)?;
269 if sd < 1e-300 {
270 return None;
271 }
272
273 let mut sorted: Vec<f64> = data.to_vec();
274 sorted.sort_by(|a, b| a.partial_cmp(b).expect("finite values"));
275
276 let nf = n as f64;
277 let mut d_stat = 0.0_f64;
278
279 for (i, &x) in sorted.iter().enumerate() {
280 let z = (x - mean) / sd;
281 let cdf = special::standard_normal_cdf(z);
282 let ecdf_above = (i + 1) as f64 / nf;
283 let ecdf_below = i as f64 / nf;
284 d_stat = d_stat.max((ecdf_above - cdf).abs());
285 d_stat = d_stat.max((ecdf_below - cdf).abs());
286 }
287
288 let lambda = (nf.sqrt() + 0.12 + 0.11 / nf.sqrt()) * d_stat;
291 let mut p_value = 0.0;
292 for k in 1..=100 {
293 let kf = k as f64;
294 let sign = if k % 2 == 1 { 1.0 } else { -1.0 };
295 let term = sign * (-2.0 * kf * kf * lambda * lambda).exp();
296 p_value += term;
297 if term.abs() < 1e-15 {
298 break;
299 }
300 }
301 p_value = (2.0 * p_value).clamp(0.0, 1.0);
302
303 Some((d_stat, p_value))
304}
305
306#[derive(Debug, Clone, Copy)]
312pub enum BandwidthMethod {
313 Silverman,
319 Scott,
324 Manual(f64),
326}
327
328#[derive(Debug, Clone)]
330pub struct KdeResult {
331 pub x: Vec<f64>,
333 pub density: Vec<f64>,
335 pub bandwidth: f64,
337}
338
339pub fn kde(data: &[f64], method: BandwidthMethod, n_points: usize) -> Option<KdeResult> {
381 let n = data.len();
382 if n < 2 || n_points < 2 || data.iter().any(|v| !v.is_finite()) {
383 return None;
384 }
385
386 let bandwidth = kde_bandwidth(data, method)?;
387
388 let min_val = data.iter().cloned().reduce(f64::min)?;
390 let max_val = data.iter().cloned().reduce(f64::max)?;
391 let x_min = min_val - 3.0 * bandwidth;
392 let x_max = max_val + 3.0 * bandwidth;
393 let step = (x_max - x_min) / (n_points - 1) as f64;
394
395 let x: Vec<f64> = (0..n_points).map(|i| x_min + i as f64 * step).collect();
396
397 let inv_h = 1.0 / bandwidth;
399 let inv_nh = inv_h / n as f64;
400 let inv_sqrt_2pi = 1.0 / (2.0 * std::f64::consts::PI).sqrt();
401
402 let density: Vec<f64> = x
403 .iter()
404 .map(|&xi| {
405 let sum: f64 = data
406 .iter()
407 .map(|&xj| {
408 let u = (xi - xj) * inv_h;
409 inv_sqrt_2pi * (-0.5 * u * u).exp()
410 })
411 .sum();
412 sum * inv_nh
413 })
414 .collect();
415
416 Some(KdeResult {
417 x,
418 density,
419 bandwidth,
420 })
421}
422
423pub fn kde_evaluate(data: &[f64], bandwidth: f64, x: f64) -> Option<f64> {
432 let n = data.len();
433 if n == 0 || bandwidth <= 0.0 || !bandwidth.is_finite() || !x.is_finite() {
434 return None;
435 }
436 if data.iter().any(|v| !v.is_finite()) {
437 return None;
438 }
439
440 let inv_h = 1.0 / bandwidth;
441 let inv_nh = inv_h / n as f64;
442 let inv_sqrt_2pi = 1.0 / (2.0 * std::f64::consts::PI).sqrt();
443
444 let sum: f64 = data
445 .iter()
446 .map(|&xj| {
447 let u = (x - xj) * inv_h;
448 inv_sqrt_2pi * (-0.5 * u * u).exp()
449 })
450 .sum();
451
452 Some(sum * inv_nh)
453}
454
455pub fn kde_bandwidth(data: &[f64], method: BandwidthMethod) -> Option<f64> {
464 let n = data.len();
465 if n < 2 || data.iter().any(|v| !v.is_finite()) {
466 return None;
467 }
468
469 match method {
470 BandwidthMethod::Silverman => {
471 let sd = stats::std_dev(data)?;
472 if sd < 1e-300 {
473 return None;
474 }
475 let q1 = stats::quantile(data, 0.25)?;
476 let q3 = stats::quantile(data, 0.75)?;
477 let iqr = q3 - q1;
478 let spread = if iqr > 1e-300 { sd.min(iqr / 1.34) } else { sd };
479 Some(0.9 * spread * (n as f64).powf(-0.2))
480 }
481 BandwidthMethod::Scott => {
482 let sd = stats::std_dev(data)?;
483 if sd < 1e-300 {
484 return None;
485 }
486 Some(1.06 * sd * (n as f64).powf(-0.2))
487 }
488 BandwidthMethod::Manual(h) => {
489 if h <= 0.0 || !h.is_finite() {
490 None
491 } else {
492 Some(h)
493 }
494 }
495 }
496}
497
498#[derive(Debug, Clone)]
504pub struct FitResult {
505 pub distribution: String,
507 pub parameters: Vec<(String, f64)>,
509 pub log_likelihood: f64,
511 pub aic: f64,
513 pub bic: f64,
515 pub n_params: usize,
517}
518
519pub fn fit_normal(data: &[f64]) -> Option<FitResult> {
542 let n = data.len();
543 if n < 2 || data.iter().any(|v| !v.is_finite()) {
544 return None;
545 }
546
547 let nf = n as f64;
548 let mu = stats::mean(data)?;
549
550 let sum_sq: f64 = data.iter().map(|&x| (x - mu).powi(2)).sum();
552 let sigma_sq = sum_sq / nf;
553 if sigma_sq < 1e-300 {
554 return None;
555 }
556 let sigma = sigma_sq.sqrt();
557
558 let log_lik = -nf / 2.0 * (2.0 * std::f64::consts::PI).ln() - nf * sigma.ln() - nf / 2.0;
560
561 let k = 2;
562 let aic = -2.0 * log_lik + 2.0 * k as f64;
563 let bic = -2.0 * log_lik + k as f64 * nf.ln();
564
565 Some(FitResult {
566 distribution: "Normal".to_string(),
567 parameters: vec![("mu".to_string(), mu), ("sigma".to_string(), sigma)],
568 log_likelihood: log_lik,
569 aic,
570 bic,
571 n_params: k,
572 })
573}
574
575pub fn fit_exponential(data: &[f64]) -> Option<FitResult> {
596 let n = data.len();
597 if n < 2 || data.iter().any(|v| !v.is_finite()) {
598 return None;
599 }
600 if data.iter().any(|&v| v <= 0.0) {
602 return None;
603 }
604
605 let nf = n as f64;
606 let mean = stats::mean(data)?;
607 if mean < 1e-300 {
608 return None;
609 }
610
611 let lambda = 1.0 / mean;
612
613 let log_lik = nf * lambda.ln() - nf;
615
616 let k = 1;
617 let aic = -2.0 * log_lik + 2.0 * k as f64;
618 let bic = -2.0 * log_lik + k as f64 * nf.ln();
619
620 Some(FitResult {
621 distribution: "Exponential".to_string(),
622 parameters: vec![("lambda".to_string(), lambda)],
623 log_likelihood: log_lik,
624 aic,
625 bic,
626 n_params: k,
627 })
628}
629
630fn digamma(x: f64) -> f64 {
635 if x <= 0.0 {
636 return f64::NAN;
637 }
638
639 let mut val = 0.0;
641 let mut x = x;
642 while x < 8.0 {
643 val -= 1.0 / x;
644 x += 1.0;
645 }
646
647 let inv_x = 1.0 / x;
650 let inv_x2 = inv_x * inv_x;
651 val += x.ln()
652 - 0.5 * inv_x
653 - inv_x2 * (1.0 / 12.0 - inv_x2 * (1.0 / 120.0 - inv_x2 * (1.0 / 252.0)));
654
655 val
656}
657
658fn trigamma(x: f64) -> f64 {
662 if x <= 0.0 {
663 return f64::NAN;
664 }
665
666 let mut val = 0.0;
668 let mut x = x;
669 while x < 8.0 {
670 val += 1.0 / (x * x);
671 x += 1.0;
672 }
673
674 let inv_x = 1.0 / x;
677 let inv_x2 = inv_x * inv_x;
678 val += inv_x
679 + 0.5 * inv_x2
680 + inv_x2 * inv_x * (1.0 / 6.0 - inv_x2 * (1.0 / 30.0 - inv_x2 * (1.0 / 42.0)));
681
682 val
683}
684
685pub fn fit_gamma(data: &[f64]) -> Option<FitResult> {
714 let n = data.len();
715 if n < 2 || data.iter().any(|v| !v.is_finite()) {
716 return None;
717 }
718 if data.iter().any(|&v| v <= 0.0) {
720 return None;
721 }
722
723 let nf = n as f64;
724 let mean = stats::mean(data)?;
725 if mean < 1e-300 {
726 return None;
727 }
728
729 let var: f64 = data.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / nf;
731 if var < 1e-300 {
732 return None;
733 }
734
735 let sum_log: f64 = data.iter().map(|&x| x.ln()).sum();
736 let mean_log = sum_log / nf;
737 let target = mean.ln() - mean_log; let mut alpha = mean * mean / var;
741 if alpha < 1e-10 {
742 alpha = 0.5;
743 }
744
745 for _ in 0..200 {
747 let f = alpha.ln() - digamma(alpha) - target;
748 let f_prime = 1.0 / alpha - trigamma(alpha);
749 if f_prime.abs() < 1e-30 {
750 break;
751 }
752
753 let delta = f / f_prime;
754 alpha -= delta;
755
756 if alpha <= 0.0 {
758 alpha = 1e-10;
759 }
760
761 if delta.abs() < 1e-10 {
762 break;
763 }
764 }
765
766 if !alpha.is_finite() || alpha <= 0.0 {
767 return None;
768 }
769
770 let beta = alpha / mean; let sum_x: f64 = data.iter().sum();
774 let log_lik = nf * alpha * beta.ln() - nf * special::ln_gamma(alpha) + (alpha - 1.0) * sum_log
775 - beta * sum_x;
776
777 let k = 2;
778 let aic = -2.0 * log_lik + 2.0 * k as f64;
779 let bic = -2.0 * log_lik + k as f64 * nf.ln();
780
781 Some(FitResult {
782 distribution: "Gamma".to_string(),
783 parameters: vec![("alpha".to_string(), alpha), ("beta".to_string(), beta)],
784 log_likelihood: log_lik,
785 aic,
786 bic,
787 n_params: k,
788 })
789}
790
791pub fn fit_lognormal(data: &[f64]) -> Option<FitResult> {
815 let n = data.len();
816 if n < 2 || data.iter().any(|v| !v.is_finite()) {
817 return None;
818 }
819 if data.iter().any(|&v| v <= 0.0) {
820 return None;
821 }
822
823 let nf = n as f64;
824 let log_data: Vec<f64> = data.iter().map(|&x| x.ln()).collect();
825
826 let mu = log_data.iter().sum::<f64>() / nf;
827 let sigma_sq = log_data.iter().map(|&y| (y - mu).powi(2)).sum::<f64>() / nf;
828 if sigma_sq < 1e-300 {
829 return None;
830 }
831 let sigma = sigma_sq.sqrt();
832
833 let sum_log: f64 = log_data.iter().sum();
835 let log_lik =
836 -nf / 2.0 * (2.0 * std::f64::consts::PI).ln() - nf * sigma.ln() - nf / 2.0 - sum_log;
837
838 let k = 2;
839 let aic = -2.0 * log_lik + 2.0 * k as f64;
840 let bic = -2.0 * log_lik + k as f64 * nf.ln();
841
842 Some(FitResult {
843 distribution: "LogNormal".to_string(),
844 parameters: vec![("mu".to_string(), mu), ("sigma".to_string(), sigma)],
845 log_likelihood: log_lik,
846 aic,
847 bic,
848 n_params: k,
849 })
850}
851
852pub fn fit_poisson(data: &[f64]) -> Option<FitResult> {
874 let n = data.len();
875 if n < 2 || data.iter().any(|v| !v.is_finite()) {
876 return None;
877 }
878 if data.iter().any(|&v| v < 0.0) {
879 return None;
880 }
881
882 let nf = n as f64;
883 let lambda = stats::mean(data)?;
884 if lambda < 1e-300 {
885 return None; }
887
888 let sum_log_fact: f64 = data.iter().map(|&x| special::ln_gamma(x + 1.0)).sum();
891 let log_lik = nf * lambda * lambda.ln() - nf * lambda - sum_log_fact;
892
893 let k = 1;
894 let aic = -2.0 * log_lik + 2.0 * k as f64;
895 let bic = -2.0 * log_lik + k as f64 * nf.ln();
896
897 Some(FitResult {
898 distribution: "Poisson".to_string(),
899 parameters: vec![("lambda".to_string(), lambda)],
900 log_likelihood: log_lik,
901 aic,
902 bic,
903 n_params: k,
904 })
905}
906
907pub fn fit_beta(data: &[f64]) -> Option<FitResult> {
937 let n = data.len();
938 if n < 2 || data.iter().any(|v| !v.is_finite()) {
939 return None;
940 }
941 if data.iter().any(|&v| v <= 0.0 || v >= 1.0) {
943 return None;
944 }
945
946 let nf = n as f64;
947 let mean = stats::mean(data)?;
948 let var = data.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / nf;
949 if var < 1e-300 {
950 return None;
951 }
952
953 let sum_log_x: f64 = data.iter().map(|&x| x.ln()).sum();
954 let sum_log_1mx: f64 = data.iter().map(|&x| (1.0 - x).ln()).sum();
955 let mean_log_x = sum_log_x / nf;
956 let mean_log_1mx = sum_log_1mx / nf;
957
958 let factor = mean * (1.0 - mean) / var - 1.0;
960 if factor <= 0.0 {
961 return None;
962 }
963 let mut alpha = mean * factor;
964 let mut beta = (1.0 - mean) * factor;
965
966 if alpha < 0.01 {
967 alpha = 0.5;
968 }
969 if beta < 0.01 {
970 beta = 0.5;
971 }
972
973 for _ in 0..200 {
975 let psi_ab = digamma(alpha + beta);
976 let tri_ab = trigamma(alpha + beta);
977
978 let f1 = digamma(alpha) - psi_ab - mean_log_x;
979 let f2 = digamma(beta) - psi_ab - mean_log_1mx;
980
981 let j11 = trigamma(alpha) - tri_ab;
982 let j12 = -tri_ab;
983 let j21 = -tri_ab;
984 let j22 = trigamma(beta) - tri_ab;
985
986 let det = j11 * j22 - j12 * j21;
987 if det.abs() < 1e-30 {
988 break;
989 }
990
991 let da = (j22 * f1 - j12 * f2) / det;
992 let db = (j11 * f2 - j21 * f1) / det;
993
994 alpha -= da;
995 beta -= db;
996
997 if alpha <= 0.0 {
998 alpha = 1e-10;
999 }
1000 if beta <= 0.0 {
1001 beta = 1e-10;
1002 }
1003
1004 if da.abs() < 1e-10 && db.abs() < 1e-10 {
1005 break;
1006 }
1007 }
1008
1009 if !alpha.is_finite() || !beta.is_finite() || alpha <= 0.0 || beta <= 0.0 {
1010 return None;
1011 }
1012
1013 let log_lik = nf
1016 * (special::ln_gamma(alpha + beta) - special::ln_gamma(alpha) - special::ln_gamma(beta))
1017 + (alpha - 1.0) * sum_log_x
1018 + (beta - 1.0) * sum_log_1mx;
1019
1020 let k = 2;
1021 let aic = -2.0 * log_lik + 2.0 * k as f64;
1022 let bic = -2.0 * log_lik + k as f64 * nf.ln();
1023
1024 Some(FitResult {
1025 distribution: "Beta".to_string(),
1026 parameters: vec![("alpha".to_string(), alpha), ("beta".to_string(), beta)],
1027 log_likelihood: log_lik,
1028 aic,
1029 bic,
1030 n_params: k,
1031 })
1032}
1033
1034pub fn fit_best(data: &[f64]) -> Vec<FitResult> {
1054 let mut results = Vec::new();
1055
1056 if let Some(r) = fit_normal(data) {
1057 results.push(r);
1058 }
1059 if let Some(r) = fit_exponential(data) {
1060 results.push(r);
1061 }
1062 if let Some(r) = fit_gamma(data) {
1063 results.push(r);
1064 }
1065 if let Some(r) = fit_lognormal(data) {
1066 results.push(r);
1067 }
1068 if let Some(r) = fit_poisson(data) {
1069 results.push(r);
1070 }
1071
1072 results.sort_by(|a, b| {
1073 a.aic
1074 .partial_cmp(&b.aic)
1075 .unwrap_or(std::cmp::Ordering::Equal)
1076 });
1077 results
1078}
1079
1080#[cfg(test)]
1081mod tests {
1082 use super::*;
1083
1084 #[test]
1089 fn ecdf_basic() {
1090 let (vals, probs) = ecdf(&[3.0, 1.0, 2.0]).expect("should compute");
1091 assert_eq!(vals, vec![1.0, 2.0, 3.0]);
1092 assert!((probs[0] - 1.0 / 3.0).abs() < 1e-10);
1093 assert!((probs[1] - 2.0 / 3.0).abs() < 1e-10);
1094 assert!((probs[2] - 1.0).abs() < 1e-10);
1095 }
1096
1097 #[test]
1098 fn ecdf_with_ties() {
1099 let (vals, probs) = ecdf(&[1.0, 2.0, 2.0, 3.0]).expect("should compute");
1100 assert_eq!(vals, vec![1.0, 2.0, 3.0]);
1101 assert!((probs[0] - 0.25).abs() < 1e-10);
1102 assert!((probs[1] - 0.75).abs() < 1e-10); assert!((probs[2] - 1.0).abs() < 1e-10);
1104 }
1105
1106 #[test]
1107 fn ecdf_single() {
1108 let (vals, probs) = ecdf(&[5.0]).expect("should compute");
1109 assert_eq!(vals, vec![5.0]);
1110 assert!((probs[0] - 1.0).abs() < 1e-10);
1111 }
1112
1113 #[test]
1114 fn ecdf_empty() {
1115 assert!(ecdf(&[]).is_none());
1116 }
1117
1118 #[test]
1119 fn ecdf_nan() {
1120 assert!(ecdf(&[1.0, f64::NAN, 3.0]).is_none());
1121 }
1122
1123 #[test]
1128 fn hist_sturges_basic() {
1129 let data: Vec<f64> = (0..100).map(|i| i as f64).collect();
1130 let r = histogram_bins(&data, BinMethod::Sturges).expect("should compute");
1131 assert!(r.n_bins >= 5);
1132 assert_eq!(r.edges.len(), r.n_bins + 1);
1133 assert_eq!(r.counts.len(), r.n_bins);
1134 let total: usize = r.counts.iter().sum();
1135 assert_eq!(total, 100);
1136 }
1137
1138 #[test]
1139 fn hist_scott() {
1140 let data: Vec<f64> = (0..100).map(|i| i as f64).collect();
1141 let r = histogram_bins(&data, BinMethod::Scott).expect("should compute");
1142 assert!(r.n_bins >= 3);
1143 let total: usize = r.counts.iter().sum();
1144 assert_eq!(total, 100);
1145 }
1146
1147 #[test]
1148 fn hist_fd() {
1149 let data: Vec<f64> = (0..100).map(|i| i as f64).collect();
1150 let r = histogram_bins(&data, BinMethod::FreedmanDiaconis).expect("should compute");
1151 assert!(r.n_bins >= 3);
1152 let total: usize = r.counts.iter().sum();
1153 assert_eq!(total, 100);
1154 }
1155
1156 #[test]
1157 fn hist_edge_cases() {
1158 assert!(histogram_bins(&[1.0], BinMethod::Sturges).is_none()); assert!(histogram_bins(&[5.0, 5.0, 5.0], BinMethod::Sturges).is_none());
1160 }
1162
1163 #[test]
1164 fn hist_fixed_exact_count() {
1165 let data: Vec<f64> = (0..100).map(|i| i as f64).collect();
1166 let r = histogram_bins(&data, BinMethod::Fixed(5)).expect("should compute");
1167 assert_eq!(r.n_bins, 5);
1168 assert_eq!(r.edges.len(), 6);
1169 assert_eq!(r.counts.len(), 5);
1170 let total: usize = r.counts.iter().sum();
1171 assert_eq!(total, 100);
1172 }
1173
1174 #[test]
1175 fn hist_fixed_single_bin() {
1176 let data = [1.0, 2.0, 3.0];
1177 let r = histogram_bins(&data, BinMethod::Fixed(1)).expect("should compute");
1178 assert_eq!(r.n_bins, 1);
1179 assert_eq!(r.counts, vec![3]);
1180 }
1181
1182 #[test]
1183 fn hist_fixed_zero_is_invalid() {
1184 let data = [1.0, 2.0, 3.0];
1185 assert!(histogram_bins(&data, BinMethod::Fixed(0)).is_none());
1186 }
1187
1188 #[test]
1189 fn hist_fixed_shares_input_guards() {
1190 assert!(histogram_bins(&[1.0], BinMethod::Fixed(5)).is_none()); assert!(histogram_bins(&[5.0, 5.0, 5.0], BinMethod::Fixed(5)).is_none());
1192 }
1194
1195 #[test]
1200 fn qq_basic() {
1201 let data = [-1.5, -1.0, -0.5, 0.0, 0.5, 1.0, 1.5];
1202 let (theo, samp) = qq_plot_normal(&data).expect("should compute");
1203 assert_eq!(theo.len(), 7);
1204 assert_eq!(samp.len(), 7);
1205 assert!((samp[0] + 1.5).abs() < 1e-10);
1207 assert!((samp[6] - 1.5).abs() < 1e-10);
1208 assert!(theo[3].abs() < 0.1);
1210 }
1211
1212 #[test]
1213 fn qq_edge_cases() {
1214 assert!(qq_plot_normal(&[1.0, 2.0]).is_none()); }
1216
1217 #[test]
1222 fn ks_normal_data() {
1223 let data = [-2.0, -1.5, -1.0, -0.5, 0.0, 0.5, 1.0, 1.5, 2.0, 2.5];
1225 let (d, p) = ks_test_normal(&data).expect("should compute");
1226 assert!(d > 0.0 && d < 1.0, "D = {d}");
1227 assert!(p > 0.05, "p = {p}");
1228 }
1229
1230 #[test]
1231 fn ks_non_normal_data() {
1232 let data: Vec<f64> = (0..50).map(|i| i as f64 * 2.0).collect();
1234 let (d, _p) = ks_test_normal(&data).expect("should compute");
1235 assert!(d > 0.0);
1236 }
1237
1238 #[test]
1239 fn ks_edge_cases() {
1240 assert!(ks_test_normal(&[1.0, 2.0, 3.0, 4.0]).is_none()); assert!(ks_test_normal(&[5.0, 5.0, 5.0, 5.0, 5.0]).is_none()); }
1243
1244 #[test]
1249 fn kde_silverman_basic() {
1250 let data: Vec<f64> = (0..100).map(|i| i as f64).collect();
1251 let r = kde(&data, BandwidthMethod::Silverman, 256).expect("should compute");
1252 assert_eq!(r.x.len(), 256);
1253 assert_eq!(r.density.len(), 256);
1254 assert!(r.bandwidth > 0.0);
1255 assert!(r.density.iter().all(|&d| d >= 0.0));
1257 let dx = r.x[1] - r.x[0];
1259 let integral: f64 = r.density.iter().sum::<f64>() * dx;
1260 assert!(
1261 (integral - 1.0).abs() < 0.05,
1262 "integral = {integral}, expected ≈ 1.0"
1263 );
1264 }
1265
1266 #[test]
1267 fn kde_scott_basic() {
1268 let data: Vec<f64> = (0..100).map(|i| i as f64).collect();
1269 let r = kde(&data, BandwidthMethod::Scott, 256).expect("should compute");
1270 assert!(r.bandwidth > 0.0);
1271 let dx = r.x[1] - r.x[0];
1272 let integral: f64 = r.density.iter().sum::<f64>() * dx;
1273 assert!(
1274 (integral - 1.0).abs() < 0.05,
1275 "integral = {integral}, expected ≈ 1.0"
1276 );
1277 }
1278
1279 #[test]
1280 fn kde_manual_bandwidth() {
1281 let data = [1.0, 2.0, 3.0, 4.0, 5.0];
1282 let r = kde(&data, BandwidthMethod::Manual(0.5), 128).expect("should compute");
1283 assert!((r.bandwidth - 0.5).abs() < 1e-10);
1284 assert_eq!(r.x.len(), 128);
1285 }
1286
1287 #[test]
1288 fn kde_edge_cases() {
1289 assert!(kde(&[1.0], BandwidthMethod::Silverman, 256).is_none());
1291 assert!(kde(&[], BandwidthMethod::Silverman, 256).is_none());
1293 assert!(kde(&[1.0, f64::NAN], BandwidthMethod::Silverman, 256).is_none());
1295 assert!(kde(&[5.0, 5.0, 5.0], BandwidthMethod::Silverman, 256).is_none());
1297 assert!(kde(&[1.0, 2.0, 3.0], BandwidthMethod::Manual(1.0), 1).is_none());
1299 assert!(kde(&[1.0, 2.0, 3.0], BandwidthMethod::Manual(0.0), 256).is_none());
1301 assert!(kde(&[1.0, 2.0, 3.0], BandwidthMethod::Manual(-1.0), 256).is_none());
1302 }
1303
1304 #[test]
1305 fn kde_bimodal() {
1306 let mut data = Vec::new();
1308 for i in 0..50 {
1309 data.push(i as f64 * 0.1); }
1311 for i in 0..50 {
1312 data.push(10.0 + i as f64 * 0.1); }
1314 let r = kde(&data, BandwidthMethod::Silverman, 512).expect("should compute");
1315 let idx_cluster = r.x.iter().position(|&x| x >= 2.5).expect("grid point");
1318 let idx_valley = r.x.iter().position(|&x| x >= 7.5).expect("grid point");
1319 assert!(
1320 r.density[idx_cluster] > r.density[idx_valley],
1321 "cluster density ({}) should exceed valley density ({})",
1322 r.density[idx_cluster],
1323 r.density[idx_valley]
1324 );
1325 }
1326
1327 #[test]
1328 fn kde_evaluate_single() {
1329 let data = [0.0, 1.0, 2.0, 3.0, 4.0];
1330 let h = 1.0;
1331 let d = kde_evaluate(&data, h, 2.0).expect("should compute");
1332 assert!(d > 0.0);
1333 let d_edge = kde_evaluate(&data, h, -5.0).expect("should compute");
1335 assert!(d > d_edge, "center density ({d}) > edge density ({d_edge})");
1336 }
1337
1338 #[test]
1339 fn kde_evaluate_edge_cases() {
1340 assert!(kde_evaluate(&[], 1.0, 0.0).is_none());
1341 assert!(kde_evaluate(&[1.0], 0.0, 0.0).is_none());
1342 assert!(kde_evaluate(&[1.0], -1.0, 0.0).is_none());
1343 assert!(kde_evaluate(&[1.0], 1.0, f64::NAN).is_none());
1344 assert!(kde_evaluate(&[f64::NAN], 1.0, 0.0).is_none());
1345 }
1346
1347 #[test]
1348 fn kde_bandwidth_methods() {
1349 let data: Vec<f64> = (0..50).map(|i| i as f64).collect();
1350 let h_silv = kde_bandwidth(&data, BandwidthMethod::Silverman).expect("silverman");
1351 let h_scott = kde_bandwidth(&data, BandwidthMethod::Scott).expect("scott");
1352 let h_manual = kde_bandwidth(&data, BandwidthMethod::Manual(2.5)).expect("manual");
1353 assert!(h_silv > 0.0);
1354 assert!(h_scott > 0.0);
1355 assert!((h_manual - 2.5).abs() < 1e-10);
1356 assert!(h_scott > h_silv);
1359 }
1360
1361 #[test]
1362 fn kde_bandwidth_edge_cases() {
1363 assert!(kde_bandwidth(&[1.0], BandwidthMethod::Silverman).is_none());
1364 assert!(kde_bandwidth(&[5.0, 5.0], BandwidthMethod::Silverman).is_none());
1365 assert!(kde_bandwidth(&[1.0, 2.0], BandwidthMethod::Manual(0.0)).is_none());
1366 }
1367
1368 #[test]
1369 fn kde_grid_extends_beyond_data() {
1370 let data = [10.0, 20.0, 30.0];
1371 let r = kde(&data, BandwidthMethod::Manual(2.0), 64).expect("should compute");
1372 assert!(r.x[0] < 10.0 - 2.0, "grid starts at {}", r.x[0]);
1374 assert!(
1375 *r.x.last().expect("non-empty") > 30.0 + 2.0,
1376 "grid ends at {}",
1377 r.x.last().expect("non-empty")
1378 );
1379 }
1380
1381 #[test]
1386 fn fit_normal_basic() {
1387 let data = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
1388 let r = fit_normal(&data).expect("should compute");
1389 assert_eq!(r.distribution, "Normal");
1390 assert_eq!(r.n_params, 2);
1391 assert!((r.parameters[0].1 - 5.0).abs() < 1e-10, "μ̂ = 5.0");
1392 let expected_sigma = (32.0_f64 / 8.0).sqrt();
1395 assert!(
1396 (r.parameters[1].1 - expected_sigma).abs() < 1e-10,
1397 "σ̂ = {}, expected {}",
1398 r.parameters[1].1,
1399 expected_sigma
1400 );
1401 assert!(r.log_likelihood.is_finite());
1402 assert!(r.aic.is_finite());
1403 assert!(r.bic.is_finite());
1404 }
1405
1406 #[test]
1407 fn fit_normal_edge_cases() {
1408 assert!(fit_normal(&[1.0]).is_none()); assert!(fit_normal(&[]).is_none());
1410 assert!(fit_normal(&[1.0, f64::NAN]).is_none());
1411 assert!(fit_normal(&[5.0, 5.0, 5.0]).is_none()); }
1413
1414 #[test]
1415 fn fit_exponential_basic() {
1416 let data = [0.5, 1.2, 0.8, 2.1, 0.3, 1.5, 0.9, 1.8];
1417 let r = fit_exponential(&data).expect("should compute");
1418 assert_eq!(r.distribution, "Exponential");
1419 assert_eq!(r.n_params, 1);
1420 let mean: f64 = data.iter().sum::<f64>() / data.len() as f64;
1421 let expected_lambda = 1.0 / mean;
1422 assert!(
1423 (r.parameters[0].1 - expected_lambda).abs() < 1e-10,
1424 "λ̂ = {}, expected {}",
1425 r.parameters[0].1,
1426 expected_lambda
1427 );
1428 }
1429
1430 #[test]
1431 fn fit_exponential_edge_cases() {
1432 assert!(fit_exponential(&[1.0]).is_none()); assert!(fit_exponential(&[1.0, -1.0]).is_none()); assert!(fit_exponential(&[0.0, 1.0]).is_none()); assert!(fit_exponential(&[1.0, f64::NAN]).is_none());
1436 }
1437
1438 #[test]
1439 fn fit_gamma_basic() {
1440 let data = [
1442 1.5, 2.1, 1.8, 2.5, 1.9, 2.3, 2.0, 1.7, 2.4, 2.2, 1.6, 2.6, 1.4, 2.8, 2.1, 1.9, 2.3,
1443 2.0, 1.8, 2.5,
1444 ];
1445 let r = fit_gamma(&data).expect("should compute");
1446 assert_eq!(r.distribution, "Gamma");
1447 assert_eq!(r.n_params, 2);
1448 assert!(r.parameters[0].1 > 0.0, "α̂ > 0");
1449 assert!(r.parameters[1].1 > 0.0, "β̂ > 0");
1450 assert!(r.log_likelihood.is_finite());
1451 }
1452
1453 #[test]
1454 fn fit_gamma_edge_cases() {
1455 assert!(fit_gamma(&[1.0]).is_none()); assert!(fit_gamma(&[1.0, -1.0]).is_none()); assert!(fit_gamma(&[0.0, 1.0]).is_none()); assert!(fit_gamma(&[1.0, f64::NAN]).is_none());
1459 assert!(fit_gamma(&[5.0, 5.0, 5.0]).is_none()); }
1461
1462 #[test]
1463 fn fit_gamma_mean_rate_relationship() {
1464 let data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
1465 let r = fit_gamma(&data).expect("should compute");
1466 let alpha = r.parameters[0].1;
1467 let beta = r.parameters[1].1;
1468 let mean: f64 = data.iter().sum::<f64>() / data.len() as f64;
1469 assert!(
1471 (alpha / beta - mean).abs() < 1e-6,
1472 "α/β = {}, mean = {}",
1473 alpha / beta,
1474 mean
1475 );
1476 }
1477
1478 #[test]
1479 fn fit_best_sorted_by_aic() {
1480 let data = [2.1, 3.5, 1.8, 4.2, 2.9, 3.1, 2.5, 3.8, 1.5, 2.7];
1481 let fits = fit_best(&data);
1482 assert!(!fits.is_empty());
1483 assert!(fits.len() >= 2, "got {} fits", fits.len());
1485 for w in fits.windows(2) {
1487 assert!(
1488 w[0].aic <= w[1].aic,
1489 "AIC not sorted: {} > {}",
1490 w[0].aic,
1491 w[1].aic
1492 );
1493 }
1494 }
1495
1496 #[test]
1497 fn fit_best_mixed_sign_data() {
1498 let data = [-2.0, -1.0, 0.5, 1.0, 2.0, 3.0, 1.5, -0.5];
1500 let fits = fit_best(&data);
1501 assert_eq!(fits.len(), 1, "only Normal should fit");
1502 assert_eq!(fits[0].distribution, "Normal");
1503 }
1504
1505 #[test]
1506 fn digamma_known_values() {
1507 assert!((digamma(1.0) + 0.5772156649).abs() < 1e-8);
1509 assert!((digamma(2.0) - 0.4227843351).abs() < 1e-8);
1511 assert!((digamma(0.5) + 1.9635100260).abs() < 1e-7);
1513 }
1514
1515 #[test]
1516 fn trigamma_known_values() {
1517 assert!((trigamma(1.0) - std::f64::consts::PI.powi(2) / 6.0).abs() < 1e-7);
1519 assert!((trigamma(2.0) - (std::f64::consts::PI.powi(2) / 6.0 - 1.0)).abs() < 1e-7);
1521 }
1522
1523 #[test]
1528 fn fit_lognormal_basic() {
1529 let data = [1.2, 2.5, 1.8, 3.1, 0.9, 2.0, 1.5, 4.0];
1530 let r = fit_lognormal(&data).expect("should compute");
1531 assert_eq!(r.distribution, "LogNormal");
1532 assert_eq!(r.n_params, 2);
1533 assert!(r.parameters[1].1 > 0.0); assert!(r.log_likelihood.is_finite());
1535 }
1536
1537 #[test]
1538 fn fit_lognormal_params_match_log_transform() {
1539 let data = [1.0, 2.0, 3.0, 4.0, 5.0];
1540 let r = fit_lognormal(&data).expect("should compute");
1541 let mu = r.parameters[0].1;
1542 let sigma = r.parameters[1].1;
1543 let log_data: Vec<f64> = data.iter().map(|&x| x.ln()).collect();
1545 let expected_mu: f64 = log_data.iter().sum::<f64>() / 5.0;
1546 assert!(
1547 (mu - expected_mu).abs() < 1e-10,
1548 "μ = {}, expected {}",
1549 mu,
1550 expected_mu
1551 );
1552 assert!(sigma > 0.0);
1553 }
1554
1555 #[test]
1556 fn fit_lognormal_edge_cases() {
1557 assert!(fit_lognormal(&[1.0]).is_none()); assert!(fit_lognormal(&[1.0, -1.0]).is_none()); assert!(fit_lognormal(&[0.0, 1.0]).is_none()); }
1561
1562 #[test]
1567 fn fit_poisson_basic() {
1568 let data = [2.0, 3.0, 1.0, 4.0, 2.0, 3.0, 1.0, 2.0];
1569 let r = fit_poisson(&data).expect("should compute");
1570 assert_eq!(r.distribution, "Poisson");
1571 assert_eq!(r.n_params, 1);
1572 let mean: f64 = data.iter().sum::<f64>() / data.len() as f64;
1573 assert!(
1574 (r.parameters[0].1 - mean).abs() < 1e-10,
1575 "λ̂ = {}, expected {}",
1576 r.parameters[0].1,
1577 mean
1578 );
1579 }
1580
1581 #[test]
1582 fn fit_poisson_edge_cases() {
1583 assert!(fit_poisson(&[1.0]).is_none()); assert!(fit_poisson(&[1.0, -1.0]).is_none()); assert!(fit_poisson(&[0.0, 0.0]).is_none()); }
1587
1588 #[test]
1593 fn fit_beta_basic() {
1594 let data = [0.2, 0.35, 0.5, 0.15, 0.45, 0.3, 0.6, 0.25, 0.4, 0.55];
1595 let r = fit_beta(&data).expect("should compute");
1596 assert_eq!(r.distribution, "Beta");
1597 assert_eq!(r.n_params, 2);
1598 assert!(r.parameters[0].1 > 0.0, "α̂ > 0");
1599 assert!(r.parameters[1].1 > 0.0, "β̂ > 0");
1600 assert!(r.log_likelihood.is_finite());
1601 }
1602
1603 #[test]
1604 fn fit_beta_symmetric() {
1605 let data = [0.3, 0.4, 0.5, 0.6, 0.7, 0.35, 0.45, 0.55, 0.65, 0.5];
1607 let r = fit_beta(&data).expect("should compute");
1608 let alpha = r.parameters[0].1;
1609 let beta = r.parameters[1].1;
1610 assert!(
1612 (alpha - beta).abs() / alpha < 0.3,
1613 "α = {alpha}, β = {beta}, should be roughly equal"
1614 );
1615 }
1616
1617 #[test]
1618 fn fit_beta_edge_cases() {
1619 assert!(fit_beta(&[0.5]).is_none()); assert!(fit_beta(&[0.5, 1.0]).is_none()); assert!(fit_beta(&[0.0, 0.5]).is_none()); assert!(fit_beta(&[0.5, 1.5]).is_none()); assert!(fit_beta(&[0.5, -0.5]).is_none()); }
1625
1626 #[test]
1631 fn fit_best_includes_lognormal_and_poisson() {
1632 let data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
1633 let fits = fit_best(&data);
1634 let names: Vec<&str> = fits.iter().map(|f| f.distribution.as_str()).collect();
1635 assert!(names.contains(&"Normal"), "should include Normal");
1637 assert!(names.contains(&"Exponential"), "should include Exponential");
1638 assert!(names.contains(&"Gamma"), "should include Gamma");
1639 assert!(names.contains(&"LogNormal"), "should include LogNormal");
1640 assert!(names.contains(&"Poisson"), "should include Poisson");
1641 }
1642}
1643
1644#[cfg(test)]
1645mod proptests {
1646 use super::*;
1647 use proptest::prelude::*;
1648
1649 proptest! {
1650 #[test]
1651 fn ecdf_last_is_one(
1652 data in proptest::collection::vec(-1e3_f64..1e3, 1..=50)
1653 ) {
1654 if let Some((_, probs)) = ecdf(&data) {
1655 let last = *probs.last().expect("non-empty");
1656 prop_assert!((last - 1.0).abs() < 1e-10, "last prob = {last}");
1657 }
1658 }
1659
1660 #[test]
1661 fn ecdf_monotonic(
1662 data in proptest::collection::vec(-1e3_f64..1e3, 2..=50)
1663 ) {
1664 if let Some((_, probs)) = ecdf(&data) {
1665 for w in probs.windows(2) {
1666 prop_assert!(w[1] >= w[0] - 1e-15, "{} < {}", w[1], w[0]);
1667 }
1668 }
1669 }
1670
1671 #[test]
1672 fn hist_counts_sum_to_n(
1673 data in proptest::collection::vec(-1e3_f64..1e3, 5..=100)
1674 ) {
1675 if let Some(r) = histogram_bins(&data, BinMethod::Sturges) {
1676 let total: usize = r.counts.iter().sum();
1677 prop_assert_eq!(total, data.len());
1678 }
1679 }
1680
1681 #[test]
1682 fn kde_density_non_negative(
1683 data in proptest::collection::vec(-100.0_f64..100.0, 5..=50)
1684 ) {
1685 if let Some(r) = kde(&data, BandwidthMethod::Silverman, 128) {
1686 for &d in &r.density {
1687 prop_assert!(d >= 0.0, "negative density: {d}");
1688 }
1689 }
1690 }
1691
1692 #[test]
1693 fn kde_integral_approx_one(
1694 data in proptest::collection::vec(-100.0_f64..100.0, 10..=80)
1695 ) {
1696 if let Some(r) = kde(&data, BandwidthMethod::Silverman, 512) {
1697 let dx = r.x[1] - r.x[0];
1698 let integral: f64 = r.density.iter().sum::<f64>() * dx;
1699 prop_assert!(
1700 (integral - 1.0).abs() < 0.1,
1701 "integral = {integral}, expected ≈ 1.0"
1702 );
1703 }
1704 }
1705
1706 #[test]
1707 fn fit_normal_aic_finite(
1708 data in proptest::collection::vec(-100.0_f64..100.0, 5..=50)
1709 ) {
1710 if let Some(r) = fit_normal(&data) {
1711 prop_assert!(r.aic.is_finite(), "AIC = {}", r.aic);
1712 prop_assert!(r.bic.is_finite(), "BIC = {}", r.bic);
1713 prop_assert!(r.log_likelihood.is_finite(), "LL = {}", r.log_likelihood);
1714 }
1715 }
1716
1717 #[test]
1718 fn fit_gamma_alpha_positive(
1719 data in proptest::collection::vec(0.1_f64..100.0, 5..=50)
1720 ) {
1721 if let Some(r) = fit_gamma(&data) {
1722 let alpha = r.parameters[0].1;
1723 let beta = r.parameters[1].1;
1724 prop_assert!(alpha > 0.0, "α = {alpha}");
1725 prop_assert!(beta > 0.0, "β = {beta}");
1726 prop_assert!(r.aic.is_finite(), "AIC = {}", r.aic);
1727 }
1728 }
1729 }
1730}