1use u_numflow::special;
21use u_numflow::stats;
22
23pub fn ecdf(data: &[f64]) -> Option<(Vec<f64>, Vec<f64>)> {
44 if data.is_empty() || data.iter().any(|v| !v.is_finite()) {
45 return None;
46 }
47
48 let n = data.len() as f64;
49 let mut sorted: Vec<f64> = data.to_vec();
50 sorted.sort_by(|a, b| a.partial_cmp(b).expect("finite values"));
51
52 let mut values = Vec::new();
53 let mut probs = Vec::new();
54
55 let mut i = 0;
56 while i < sorted.len() {
57 let val = sorted[i];
58 let mut j = i;
60 while j < sorted.len() && (sorted[j] - val).abs() < 1e-300 {
61 j += 1;
62 }
63 values.push(val);
64 probs.push(j as f64 / n);
65 i = j;
66 }
67
68 Some((values, probs))
69}
70
71#[derive(Debug, Clone, Copy)]
77pub enum BinMethod {
78 Sturges,
80 Scott,
82 FreedmanDiaconis,
84}
85
86#[derive(Debug, Clone)]
88pub struct HistogramBins {
89 pub n_bins: usize,
91 pub bin_width: f64,
93 pub edges: Vec<f64>,
95 pub counts: Vec<usize>,
97}
98
99pub fn histogram_bins(data: &[f64], method: BinMethod) -> Option<HistogramBins> {
117 let n = data.len();
118 if n < 2 || data.iter().any(|v| !v.is_finite()) {
119 return None;
120 }
121
122 let min_val = data.iter().cloned().reduce(f64::min)?;
123 let max_val = data.iter().cloned().reduce(f64::max)?;
124 let range = max_val - min_val;
125
126 if range < 1e-300 {
127 return None; }
129
130 let nf = n as f64;
131
132 let n_bins = match method {
133 BinMethod::Sturges => {
134 let k = (nf.log2()).ceil() as usize + 1;
135 k.max(2)
136 }
137 BinMethod::Scott => {
138 let sd = stats::std_dev(data)?;
139 if sd < 1e-300 {
140 return None;
141 }
142 let h = 3.49 * sd * nf.powf(-1.0 / 3.0);
143 (range / h).ceil() as usize
144 }
145 BinMethod::FreedmanDiaconis => {
146 let q1 = stats::quantile(data, 0.25)?;
147 let q3 = stats::quantile(data, 0.75)?;
148 let iqr = q3 - q1;
149 if iqr < 1e-300 {
150 let k = (nf.log2()).ceil() as usize + 1;
152 k.max(2)
153 } else {
154 let h = 2.0 * iqr * nf.powf(-1.0 / 3.0);
155 (range / h).ceil() as usize
156 }
157 }
158 }
159 .max(2);
160
161 let bin_width = range / n_bins as f64;
162
163 let mut edges = Vec::with_capacity(n_bins + 1);
165 for i in 0..=n_bins {
166 edges.push(min_val + i as f64 * bin_width);
167 }
168
169 let mut counts = vec![0_usize; n_bins];
171 for &x in data {
172 let bin = ((x - min_val) / bin_width).floor() as usize;
173 let bin = bin.min(n_bins - 1); counts[bin] += 1;
175 }
176
177 Some(HistogramBins {
178 n_bins,
179 bin_width,
180 edges,
181 counts,
182 })
183}
184
185pub fn qq_plot_normal(data: &[f64]) -> Option<(Vec<f64>, Vec<f64>)> {
212 let n = data.len();
213 if n < 3 || data.iter().any(|v| !v.is_finite()) {
214 return None;
215 }
216
217 let mut sample: Vec<f64> = data.to_vec();
218 sample.sort_by(|a, b| a.partial_cmp(b).expect("finite values"));
219
220 let nf = n as f64;
221 let theoretical: Vec<f64> = (0..n)
222 .map(|i| {
223 let p = (i as f64 + 0.5) / nf;
224 special::inverse_normal_cdf(p)
225 })
226 .collect();
227
228 Some((theoretical, sample))
229}
230
231pub fn ks_test_normal(data: &[f64]) -> Option<(f64, f64)> {
257 let n = data.len();
258 if n < 5 || data.iter().any(|v| !v.is_finite()) {
259 return None;
260 }
261
262 let mean = stats::mean(data)?;
263 let sd = stats::std_dev(data)?;
264 if sd < 1e-300 {
265 return None;
266 }
267
268 let mut sorted: Vec<f64> = data.to_vec();
269 sorted.sort_by(|a, b| a.partial_cmp(b).expect("finite values"));
270
271 let nf = n as f64;
272 let mut d_stat = 0.0_f64;
273
274 for (i, &x) in sorted.iter().enumerate() {
275 let z = (x - mean) / sd;
276 let cdf = special::standard_normal_cdf(z);
277 let ecdf_above = (i + 1) as f64 / nf;
278 let ecdf_below = i as f64 / nf;
279 d_stat = d_stat.max((ecdf_above - cdf).abs());
280 d_stat = d_stat.max((ecdf_below - cdf).abs());
281 }
282
283 let lambda = (nf.sqrt() + 0.12 + 0.11 / nf.sqrt()) * d_stat;
286 let mut p_value = 0.0;
287 for k in 1..=100 {
288 let kf = k as f64;
289 let sign = if k % 2 == 1 { 1.0 } else { -1.0 };
290 let term = sign * (-2.0 * kf * kf * lambda * lambda).exp();
291 p_value += term;
292 if term.abs() < 1e-15 {
293 break;
294 }
295 }
296 p_value = (2.0 * p_value).clamp(0.0, 1.0);
297
298 Some((d_stat, p_value))
299}
300
301#[derive(Debug, Clone, Copy)]
307pub enum BandwidthMethod {
308 Silverman,
314 Scott,
319 Manual(f64),
321}
322
323#[derive(Debug, Clone)]
325pub struct KdeResult {
326 pub x: Vec<f64>,
328 pub density: Vec<f64>,
330 pub bandwidth: f64,
332}
333
334pub fn kde(data: &[f64], method: BandwidthMethod, n_points: usize) -> Option<KdeResult> {
376 let n = data.len();
377 if n < 2 || n_points < 2 || data.iter().any(|v| !v.is_finite()) {
378 return None;
379 }
380
381 let bandwidth = kde_bandwidth(data, method)?;
382
383 let min_val = data.iter().cloned().reduce(f64::min)?;
385 let max_val = data.iter().cloned().reduce(f64::max)?;
386 let x_min = min_val - 3.0 * bandwidth;
387 let x_max = max_val + 3.0 * bandwidth;
388 let step = (x_max - x_min) / (n_points - 1) as f64;
389
390 let x: Vec<f64> = (0..n_points).map(|i| x_min + i as f64 * step).collect();
391
392 let inv_h = 1.0 / bandwidth;
394 let inv_nh = inv_h / n as f64;
395 let inv_sqrt_2pi = 1.0 / (2.0 * std::f64::consts::PI).sqrt();
396
397 let density: Vec<f64> = x
398 .iter()
399 .map(|&xi| {
400 let sum: f64 = data
401 .iter()
402 .map(|&xj| {
403 let u = (xi - xj) * inv_h;
404 inv_sqrt_2pi * (-0.5 * u * u).exp()
405 })
406 .sum();
407 sum * inv_nh
408 })
409 .collect();
410
411 Some(KdeResult {
412 x,
413 density,
414 bandwidth,
415 })
416}
417
418pub fn kde_evaluate(data: &[f64], bandwidth: f64, x: f64) -> Option<f64> {
427 let n = data.len();
428 if n == 0 || bandwidth <= 0.0 || !bandwidth.is_finite() || !x.is_finite() {
429 return None;
430 }
431 if data.iter().any(|v| !v.is_finite()) {
432 return None;
433 }
434
435 let inv_h = 1.0 / bandwidth;
436 let inv_nh = inv_h / n as f64;
437 let inv_sqrt_2pi = 1.0 / (2.0 * std::f64::consts::PI).sqrt();
438
439 let sum: f64 = data
440 .iter()
441 .map(|&xj| {
442 let u = (x - xj) * inv_h;
443 inv_sqrt_2pi * (-0.5 * u * u).exp()
444 })
445 .sum();
446
447 Some(sum * inv_nh)
448}
449
450pub fn kde_bandwidth(data: &[f64], method: BandwidthMethod) -> Option<f64> {
459 let n = data.len();
460 if n < 2 || data.iter().any(|v| !v.is_finite()) {
461 return None;
462 }
463
464 match method {
465 BandwidthMethod::Silverman => {
466 let sd = stats::std_dev(data)?;
467 if sd < 1e-300 {
468 return None;
469 }
470 let q1 = stats::quantile(data, 0.25)?;
471 let q3 = stats::quantile(data, 0.75)?;
472 let iqr = q3 - q1;
473 let spread = if iqr > 1e-300 { sd.min(iqr / 1.34) } else { sd };
474 Some(0.9 * spread * (n as f64).powf(-0.2))
475 }
476 BandwidthMethod::Scott => {
477 let sd = stats::std_dev(data)?;
478 if sd < 1e-300 {
479 return None;
480 }
481 Some(1.06 * sd * (n as f64).powf(-0.2))
482 }
483 BandwidthMethod::Manual(h) => {
484 if h <= 0.0 || !h.is_finite() {
485 None
486 } else {
487 Some(h)
488 }
489 }
490 }
491}
492
493#[derive(Debug, Clone)]
499pub struct FitResult {
500 pub distribution: String,
502 pub parameters: Vec<(String, f64)>,
504 pub log_likelihood: f64,
506 pub aic: f64,
508 pub bic: f64,
510 pub n_params: usize,
512}
513
514pub fn fit_normal(data: &[f64]) -> Option<FitResult> {
537 let n = data.len();
538 if n < 2 || data.iter().any(|v| !v.is_finite()) {
539 return None;
540 }
541
542 let nf = n as f64;
543 let mu = stats::mean(data)?;
544
545 let sum_sq: f64 = data.iter().map(|&x| (x - mu).powi(2)).sum();
547 let sigma_sq = sum_sq / nf;
548 if sigma_sq < 1e-300 {
549 return None;
550 }
551 let sigma = sigma_sq.sqrt();
552
553 let log_lik = -nf / 2.0 * (2.0 * std::f64::consts::PI).ln() - nf * sigma.ln() - nf / 2.0;
555
556 let k = 2;
557 let aic = -2.0 * log_lik + 2.0 * k as f64;
558 let bic = -2.0 * log_lik + k as f64 * nf.ln();
559
560 Some(FitResult {
561 distribution: "Normal".to_string(),
562 parameters: vec![("mu".to_string(), mu), ("sigma".to_string(), sigma)],
563 log_likelihood: log_lik,
564 aic,
565 bic,
566 n_params: k,
567 })
568}
569
570pub fn fit_exponential(data: &[f64]) -> Option<FitResult> {
591 let n = data.len();
592 if n < 2 || data.iter().any(|v| !v.is_finite()) {
593 return None;
594 }
595 if data.iter().any(|&v| v <= 0.0) {
597 return None;
598 }
599
600 let nf = n as f64;
601 let mean = stats::mean(data)?;
602 if mean < 1e-300 {
603 return None;
604 }
605
606 let lambda = 1.0 / mean;
607
608 let log_lik = nf * lambda.ln() - nf;
610
611 let k = 1;
612 let aic = -2.0 * log_lik + 2.0 * k as f64;
613 let bic = -2.0 * log_lik + k as f64 * nf.ln();
614
615 Some(FitResult {
616 distribution: "Exponential".to_string(),
617 parameters: vec![("lambda".to_string(), lambda)],
618 log_likelihood: log_lik,
619 aic,
620 bic,
621 n_params: k,
622 })
623}
624
625fn digamma(x: f64) -> f64 {
630 if x <= 0.0 {
631 return f64::NAN;
632 }
633
634 let mut val = 0.0;
636 let mut x = x;
637 while x < 8.0 {
638 val -= 1.0 / x;
639 x += 1.0;
640 }
641
642 let inv_x = 1.0 / x;
645 let inv_x2 = inv_x * inv_x;
646 val += x.ln()
647 - 0.5 * inv_x
648 - inv_x2 * (1.0 / 12.0 - inv_x2 * (1.0 / 120.0 - inv_x2 * (1.0 / 252.0)));
649
650 val
651}
652
653fn trigamma(x: f64) -> f64 {
657 if x <= 0.0 {
658 return f64::NAN;
659 }
660
661 let mut val = 0.0;
663 let mut x = x;
664 while x < 8.0 {
665 val += 1.0 / (x * x);
666 x += 1.0;
667 }
668
669 let inv_x = 1.0 / x;
672 let inv_x2 = inv_x * inv_x;
673 val += inv_x
674 + 0.5 * inv_x2
675 + inv_x2 * inv_x * (1.0 / 6.0 - inv_x2 * (1.0 / 30.0 - inv_x2 * (1.0 / 42.0)));
676
677 val
678}
679
680pub fn fit_gamma(data: &[f64]) -> Option<FitResult> {
709 let n = data.len();
710 if n < 2 || data.iter().any(|v| !v.is_finite()) {
711 return None;
712 }
713 if data.iter().any(|&v| v <= 0.0) {
715 return None;
716 }
717
718 let nf = n as f64;
719 let mean = stats::mean(data)?;
720 if mean < 1e-300 {
721 return None;
722 }
723
724 let var: f64 = data.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / nf;
726 if var < 1e-300 {
727 return None;
728 }
729
730 let sum_log: f64 = data.iter().map(|&x| x.ln()).sum();
731 let mean_log = sum_log / nf;
732 let target = mean.ln() - mean_log; let mut alpha = mean * mean / var;
736 if alpha < 1e-10 {
737 alpha = 0.5;
738 }
739
740 for _ in 0..200 {
742 let f = alpha.ln() - digamma(alpha) - target;
743 let f_prime = 1.0 / alpha - trigamma(alpha);
744 if f_prime.abs() < 1e-30 {
745 break;
746 }
747
748 let delta = f / f_prime;
749 alpha -= delta;
750
751 if alpha <= 0.0 {
753 alpha = 1e-10;
754 }
755
756 if delta.abs() < 1e-10 {
757 break;
758 }
759 }
760
761 if !alpha.is_finite() || alpha <= 0.0 {
762 return None;
763 }
764
765 let beta = alpha / mean; let sum_x: f64 = data.iter().sum();
769 let log_lik = nf * alpha * beta.ln() - nf * special::ln_gamma(alpha) + (alpha - 1.0) * sum_log
770 - beta * sum_x;
771
772 let k = 2;
773 let aic = -2.0 * log_lik + 2.0 * k as f64;
774 let bic = -2.0 * log_lik + k as f64 * nf.ln();
775
776 Some(FitResult {
777 distribution: "Gamma".to_string(),
778 parameters: vec![("alpha".to_string(), alpha), ("beta".to_string(), beta)],
779 log_likelihood: log_lik,
780 aic,
781 bic,
782 n_params: k,
783 })
784}
785
786pub fn fit_lognormal(data: &[f64]) -> Option<FitResult> {
810 let n = data.len();
811 if n < 2 || data.iter().any(|v| !v.is_finite()) {
812 return None;
813 }
814 if data.iter().any(|&v| v <= 0.0) {
815 return None;
816 }
817
818 let nf = n as f64;
819 let log_data: Vec<f64> = data.iter().map(|&x| x.ln()).collect();
820
821 let mu = log_data.iter().sum::<f64>() / nf;
822 let sigma_sq = log_data.iter().map(|&y| (y - mu).powi(2)).sum::<f64>() / nf;
823 if sigma_sq < 1e-300 {
824 return None;
825 }
826 let sigma = sigma_sq.sqrt();
827
828 let sum_log: f64 = log_data.iter().sum();
830 let log_lik =
831 -nf / 2.0 * (2.0 * std::f64::consts::PI).ln() - nf * sigma.ln() - nf / 2.0 - sum_log;
832
833 let k = 2;
834 let aic = -2.0 * log_lik + 2.0 * k as f64;
835 let bic = -2.0 * log_lik + k as f64 * nf.ln();
836
837 Some(FitResult {
838 distribution: "LogNormal".to_string(),
839 parameters: vec![("mu".to_string(), mu), ("sigma".to_string(), sigma)],
840 log_likelihood: log_lik,
841 aic,
842 bic,
843 n_params: k,
844 })
845}
846
847pub fn fit_poisson(data: &[f64]) -> Option<FitResult> {
869 let n = data.len();
870 if n < 2 || data.iter().any(|v| !v.is_finite()) {
871 return None;
872 }
873 if data.iter().any(|&v| v < 0.0) {
874 return None;
875 }
876
877 let nf = n as f64;
878 let lambda = stats::mean(data)?;
879 if lambda < 1e-300 {
880 return None; }
882
883 let sum_log_fact: f64 = data.iter().map(|&x| special::ln_gamma(x + 1.0)).sum();
886 let log_lik = nf * lambda * lambda.ln() - nf * lambda - sum_log_fact;
887
888 let k = 1;
889 let aic = -2.0 * log_lik + 2.0 * k as f64;
890 let bic = -2.0 * log_lik + k as f64 * nf.ln();
891
892 Some(FitResult {
893 distribution: "Poisson".to_string(),
894 parameters: vec![("lambda".to_string(), lambda)],
895 log_likelihood: log_lik,
896 aic,
897 bic,
898 n_params: k,
899 })
900}
901
902pub fn fit_beta(data: &[f64]) -> Option<FitResult> {
932 let n = data.len();
933 if n < 2 || data.iter().any(|v| !v.is_finite()) {
934 return None;
935 }
936 if data.iter().any(|&v| v <= 0.0 || v >= 1.0) {
938 return None;
939 }
940
941 let nf = n as f64;
942 let mean = stats::mean(data)?;
943 let var = data.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / nf;
944 if var < 1e-300 {
945 return None;
946 }
947
948 let sum_log_x: f64 = data.iter().map(|&x| x.ln()).sum();
949 let sum_log_1mx: f64 = data.iter().map(|&x| (1.0 - x).ln()).sum();
950 let mean_log_x = sum_log_x / nf;
951 let mean_log_1mx = sum_log_1mx / nf;
952
953 let factor = mean * (1.0 - mean) / var - 1.0;
955 if factor <= 0.0 {
956 return None;
957 }
958 let mut alpha = mean * factor;
959 let mut beta = (1.0 - mean) * factor;
960
961 if alpha < 0.01 {
962 alpha = 0.5;
963 }
964 if beta < 0.01 {
965 beta = 0.5;
966 }
967
968 for _ in 0..200 {
970 let psi_ab = digamma(alpha + beta);
971 let tri_ab = trigamma(alpha + beta);
972
973 let f1 = digamma(alpha) - psi_ab - mean_log_x;
974 let f2 = digamma(beta) - psi_ab - mean_log_1mx;
975
976 let j11 = trigamma(alpha) - tri_ab;
977 let j12 = -tri_ab;
978 let j21 = -tri_ab;
979 let j22 = trigamma(beta) - tri_ab;
980
981 let det = j11 * j22 - j12 * j21;
982 if det.abs() < 1e-30 {
983 break;
984 }
985
986 let da = (j22 * f1 - j12 * f2) / det;
987 let db = (j11 * f2 - j21 * f1) / det;
988
989 alpha -= da;
990 beta -= db;
991
992 if alpha <= 0.0 {
993 alpha = 1e-10;
994 }
995 if beta <= 0.0 {
996 beta = 1e-10;
997 }
998
999 if da.abs() < 1e-10 && db.abs() < 1e-10 {
1000 break;
1001 }
1002 }
1003
1004 if !alpha.is_finite() || !beta.is_finite() || alpha <= 0.0 || beta <= 0.0 {
1005 return None;
1006 }
1007
1008 let log_lik = nf
1011 * (special::ln_gamma(alpha + beta) - special::ln_gamma(alpha) - special::ln_gamma(beta))
1012 + (alpha - 1.0) * sum_log_x
1013 + (beta - 1.0) * sum_log_1mx;
1014
1015 let k = 2;
1016 let aic = -2.0 * log_lik + 2.0 * k as f64;
1017 let bic = -2.0 * log_lik + k as f64 * nf.ln();
1018
1019 Some(FitResult {
1020 distribution: "Beta".to_string(),
1021 parameters: vec![("alpha".to_string(), alpha), ("beta".to_string(), beta)],
1022 log_likelihood: log_lik,
1023 aic,
1024 bic,
1025 n_params: k,
1026 })
1027}
1028
1029pub fn fit_best(data: &[f64]) -> Vec<FitResult> {
1049 let mut results = Vec::new();
1050
1051 if let Some(r) = fit_normal(data) {
1052 results.push(r);
1053 }
1054 if let Some(r) = fit_exponential(data) {
1055 results.push(r);
1056 }
1057 if let Some(r) = fit_gamma(data) {
1058 results.push(r);
1059 }
1060 if let Some(r) = fit_lognormal(data) {
1061 results.push(r);
1062 }
1063 if let Some(r) = fit_poisson(data) {
1064 results.push(r);
1065 }
1066
1067 results.sort_by(|a, b| {
1068 a.aic
1069 .partial_cmp(&b.aic)
1070 .unwrap_or(std::cmp::Ordering::Equal)
1071 });
1072 results
1073}
1074
1075#[cfg(test)]
1076mod tests {
1077 use super::*;
1078
1079 #[test]
1084 fn ecdf_basic() {
1085 let (vals, probs) = ecdf(&[3.0, 1.0, 2.0]).expect("should compute");
1086 assert_eq!(vals, vec![1.0, 2.0, 3.0]);
1087 assert!((probs[0] - 1.0 / 3.0).abs() < 1e-10);
1088 assert!((probs[1] - 2.0 / 3.0).abs() < 1e-10);
1089 assert!((probs[2] - 1.0).abs() < 1e-10);
1090 }
1091
1092 #[test]
1093 fn ecdf_with_ties() {
1094 let (vals, probs) = ecdf(&[1.0, 2.0, 2.0, 3.0]).expect("should compute");
1095 assert_eq!(vals, vec![1.0, 2.0, 3.0]);
1096 assert!((probs[0] - 0.25).abs() < 1e-10);
1097 assert!((probs[1] - 0.75).abs() < 1e-10); assert!((probs[2] - 1.0).abs() < 1e-10);
1099 }
1100
1101 #[test]
1102 fn ecdf_single() {
1103 let (vals, probs) = ecdf(&[5.0]).expect("should compute");
1104 assert_eq!(vals, vec![5.0]);
1105 assert!((probs[0] - 1.0).abs() < 1e-10);
1106 }
1107
1108 #[test]
1109 fn ecdf_empty() {
1110 assert!(ecdf(&[]).is_none());
1111 }
1112
1113 #[test]
1114 fn ecdf_nan() {
1115 assert!(ecdf(&[1.0, f64::NAN, 3.0]).is_none());
1116 }
1117
1118 #[test]
1123 fn hist_sturges_basic() {
1124 let data: Vec<f64> = (0..100).map(|i| i as f64).collect();
1125 let r = histogram_bins(&data, BinMethod::Sturges).expect("should compute");
1126 assert!(r.n_bins >= 5);
1127 assert_eq!(r.edges.len(), r.n_bins + 1);
1128 assert_eq!(r.counts.len(), r.n_bins);
1129 let total: usize = r.counts.iter().sum();
1130 assert_eq!(total, 100);
1131 }
1132
1133 #[test]
1134 fn hist_scott() {
1135 let data: Vec<f64> = (0..100).map(|i| i as f64).collect();
1136 let r = histogram_bins(&data, BinMethod::Scott).expect("should compute");
1137 assert!(r.n_bins >= 3);
1138 let total: usize = r.counts.iter().sum();
1139 assert_eq!(total, 100);
1140 }
1141
1142 #[test]
1143 fn hist_fd() {
1144 let data: Vec<f64> = (0..100).map(|i| i as f64).collect();
1145 let r = histogram_bins(&data, BinMethod::FreedmanDiaconis).expect("should compute");
1146 assert!(r.n_bins >= 3);
1147 let total: usize = r.counts.iter().sum();
1148 assert_eq!(total, 100);
1149 }
1150
1151 #[test]
1152 fn hist_edge_cases() {
1153 assert!(histogram_bins(&[1.0], BinMethod::Sturges).is_none()); assert!(histogram_bins(&[5.0, 5.0, 5.0], BinMethod::Sturges).is_none());
1155 }
1157
1158 #[test]
1163 fn qq_basic() {
1164 let data = [-1.5, -1.0, -0.5, 0.0, 0.5, 1.0, 1.5];
1165 let (theo, samp) = qq_plot_normal(&data).expect("should compute");
1166 assert_eq!(theo.len(), 7);
1167 assert_eq!(samp.len(), 7);
1168 assert!((samp[0] + 1.5).abs() < 1e-10);
1170 assert!((samp[6] - 1.5).abs() < 1e-10);
1171 assert!(theo[3].abs() < 0.1);
1173 }
1174
1175 #[test]
1176 fn qq_edge_cases() {
1177 assert!(qq_plot_normal(&[1.0, 2.0]).is_none()); }
1179
1180 #[test]
1185 fn ks_normal_data() {
1186 let data = [-2.0, -1.5, -1.0, -0.5, 0.0, 0.5, 1.0, 1.5, 2.0, 2.5];
1188 let (d, p) = ks_test_normal(&data).expect("should compute");
1189 assert!(d > 0.0 && d < 1.0, "D = {d}");
1190 assert!(p > 0.05, "p = {p}");
1191 }
1192
1193 #[test]
1194 fn ks_non_normal_data() {
1195 let data: Vec<f64> = (0..50).map(|i| i as f64 * 2.0).collect();
1197 let (d, _p) = ks_test_normal(&data).expect("should compute");
1198 assert!(d > 0.0);
1199 }
1200
1201 #[test]
1202 fn ks_edge_cases() {
1203 assert!(ks_test_normal(&[1.0, 2.0, 3.0, 4.0]).is_none()); assert!(ks_test_normal(&[5.0, 5.0, 5.0, 5.0, 5.0]).is_none()); }
1206
1207 #[test]
1212 fn kde_silverman_basic() {
1213 let data: Vec<f64> = (0..100).map(|i| i as f64).collect();
1214 let r = kde(&data, BandwidthMethod::Silverman, 256).expect("should compute");
1215 assert_eq!(r.x.len(), 256);
1216 assert_eq!(r.density.len(), 256);
1217 assert!(r.bandwidth > 0.0);
1218 assert!(r.density.iter().all(|&d| d >= 0.0));
1220 let dx = r.x[1] - r.x[0];
1222 let integral: f64 = r.density.iter().sum::<f64>() * dx;
1223 assert!(
1224 (integral - 1.0).abs() < 0.05,
1225 "integral = {integral}, expected ≈ 1.0"
1226 );
1227 }
1228
1229 #[test]
1230 fn kde_scott_basic() {
1231 let data: Vec<f64> = (0..100).map(|i| i as f64).collect();
1232 let r = kde(&data, BandwidthMethod::Scott, 256).expect("should compute");
1233 assert!(r.bandwidth > 0.0);
1234 let dx = r.x[1] - r.x[0];
1235 let integral: f64 = r.density.iter().sum::<f64>() * dx;
1236 assert!(
1237 (integral - 1.0).abs() < 0.05,
1238 "integral = {integral}, expected ≈ 1.0"
1239 );
1240 }
1241
1242 #[test]
1243 fn kde_manual_bandwidth() {
1244 let data = [1.0, 2.0, 3.0, 4.0, 5.0];
1245 let r = kde(&data, BandwidthMethod::Manual(0.5), 128).expect("should compute");
1246 assert!((r.bandwidth - 0.5).abs() < 1e-10);
1247 assert_eq!(r.x.len(), 128);
1248 }
1249
1250 #[test]
1251 fn kde_edge_cases() {
1252 assert!(kde(&[1.0], BandwidthMethod::Silverman, 256).is_none());
1254 assert!(kde(&[], BandwidthMethod::Silverman, 256).is_none());
1256 assert!(kde(&[1.0, f64::NAN], BandwidthMethod::Silverman, 256).is_none());
1258 assert!(kde(&[5.0, 5.0, 5.0], BandwidthMethod::Silverman, 256).is_none());
1260 assert!(kde(&[1.0, 2.0, 3.0], BandwidthMethod::Manual(1.0), 1).is_none());
1262 assert!(kde(&[1.0, 2.0, 3.0], BandwidthMethod::Manual(0.0), 256).is_none());
1264 assert!(kde(&[1.0, 2.0, 3.0], BandwidthMethod::Manual(-1.0), 256).is_none());
1265 }
1266
1267 #[test]
1268 fn kde_bimodal() {
1269 let mut data = Vec::new();
1271 for i in 0..50 {
1272 data.push(i as f64 * 0.1); }
1274 for i in 0..50 {
1275 data.push(10.0 + i as f64 * 0.1); }
1277 let r = kde(&data, BandwidthMethod::Silverman, 512).expect("should compute");
1278 let idx_cluster = r.x.iter().position(|&x| x >= 2.5).expect("grid point");
1281 let idx_valley = r.x.iter().position(|&x| x >= 7.5).expect("grid point");
1282 assert!(
1283 r.density[idx_cluster] > r.density[idx_valley],
1284 "cluster density ({}) should exceed valley density ({})",
1285 r.density[idx_cluster],
1286 r.density[idx_valley]
1287 );
1288 }
1289
1290 #[test]
1291 fn kde_evaluate_single() {
1292 let data = [0.0, 1.0, 2.0, 3.0, 4.0];
1293 let h = 1.0;
1294 let d = kde_evaluate(&data, h, 2.0).expect("should compute");
1295 assert!(d > 0.0);
1296 let d_edge = kde_evaluate(&data, h, -5.0).expect("should compute");
1298 assert!(d > d_edge, "center density ({d}) > edge density ({d_edge})");
1299 }
1300
1301 #[test]
1302 fn kde_evaluate_edge_cases() {
1303 assert!(kde_evaluate(&[], 1.0, 0.0).is_none());
1304 assert!(kde_evaluate(&[1.0], 0.0, 0.0).is_none());
1305 assert!(kde_evaluate(&[1.0], -1.0, 0.0).is_none());
1306 assert!(kde_evaluate(&[1.0], 1.0, f64::NAN).is_none());
1307 assert!(kde_evaluate(&[f64::NAN], 1.0, 0.0).is_none());
1308 }
1309
1310 #[test]
1311 fn kde_bandwidth_methods() {
1312 let data: Vec<f64> = (0..50).map(|i| i as f64).collect();
1313 let h_silv = kde_bandwidth(&data, BandwidthMethod::Silverman).expect("silverman");
1314 let h_scott = kde_bandwidth(&data, BandwidthMethod::Scott).expect("scott");
1315 let h_manual = kde_bandwidth(&data, BandwidthMethod::Manual(2.5)).expect("manual");
1316 assert!(h_silv > 0.0);
1317 assert!(h_scott > 0.0);
1318 assert!((h_manual - 2.5).abs() < 1e-10);
1319 assert!(h_scott > h_silv);
1322 }
1323
1324 #[test]
1325 fn kde_bandwidth_edge_cases() {
1326 assert!(kde_bandwidth(&[1.0], BandwidthMethod::Silverman).is_none());
1327 assert!(kde_bandwidth(&[5.0, 5.0], BandwidthMethod::Silverman).is_none());
1328 assert!(kde_bandwidth(&[1.0, 2.0], BandwidthMethod::Manual(0.0)).is_none());
1329 }
1330
1331 #[test]
1332 fn kde_grid_extends_beyond_data() {
1333 let data = [10.0, 20.0, 30.0];
1334 let r = kde(&data, BandwidthMethod::Manual(2.0), 64).expect("should compute");
1335 assert!(r.x[0] < 10.0 - 2.0, "grid starts at {}", r.x[0]);
1337 assert!(
1338 *r.x.last().expect("non-empty") > 30.0 + 2.0,
1339 "grid ends at {}",
1340 r.x.last().expect("non-empty")
1341 );
1342 }
1343
1344 #[test]
1349 fn fit_normal_basic() {
1350 let data = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
1351 let r = fit_normal(&data).expect("should compute");
1352 assert_eq!(r.distribution, "Normal");
1353 assert_eq!(r.n_params, 2);
1354 assert!((r.parameters[0].1 - 5.0).abs() < 1e-10, "μ̂ = 5.0");
1355 let expected_sigma = (32.0_f64 / 8.0).sqrt();
1358 assert!(
1359 (r.parameters[1].1 - expected_sigma).abs() < 1e-10,
1360 "σ̂ = {}, expected {}",
1361 r.parameters[1].1,
1362 expected_sigma
1363 );
1364 assert!(r.log_likelihood.is_finite());
1365 assert!(r.aic.is_finite());
1366 assert!(r.bic.is_finite());
1367 }
1368
1369 #[test]
1370 fn fit_normal_edge_cases() {
1371 assert!(fit_normal(&[1.0]).is_none()); assert!(fit_normal(&[]).is_none());
1373 assert!(fit_normal(&[1.0, f64::NAN]).is_none());
1374 assert!(fit_normal(&[5.0, 5.0, 5.0]).is_none()); }
1376
1377 #[test]
1378 fn fit_exponential_basic() {
1379 let data = [0.5, 1.2, 0.8, 2.1, 0.3, 1.5, 0.9, 1.8];
1380 let r = fit_exponential(&data).expect("should compute");
1381 assert_eq!(r.distribution, "Exponential");
1382 assert_eq!(r.n_params, 1);
1383 let mean: f64 = data.iter().sum::<f64>() / data.len() as f64;
1384 let expected_lambda = 1.0 / mean;
1385 assert!(
1386 (r.parameters[0].1 - expected_lambda).abs() < 1e-10,
1387 "λ̂ = {}, expected {}",
1388 r.parameters[0].1,
1389 expected_lambda
1390 );
1391 }
1392
1393 #[test]
1394 fn fit_exponential_edge_cases() {
1395 assert!(fit_exponential(&[1.0]).is_none()); assert!(fit_exponential(&[1.0, -1.0]).is_none()); assert!(fit_exponential(&[0.0, 1.0]).is_none()); assert!(fit_exponential(&[1.0, f64::NAN]).is_none());
1399 }
1400
1401 #[test]
1402 fn fit_gamma_basic() {
1403 let data = [
1405 1.5, 2.1, 1.8, 2.5, 1.9, 2.3, 2.0, 1.7, 2.4, 2.2, 1.6, 2.6, 1.4, 2.8, 2.1, 1.9, 2.3,
1406 2.0, 1.8, 2.5,
1407 ];
1408 let r = fit_gamma(&data).expect("should compute");
1409 assert_eq!(r.distribution, "Gamma");
1410 assert_eq!(r.n_params, 2);
1411 assert!(r.parameters[0].1 > 0.0, "α̂ > 0");
1412 assert!(r.parameters[1].1 > 0.0, "β̂ > 0");
1413 assert!(r.log_likelihood.is_finite());
1414 }
1415
1416 #[test]
1417 fn fit_gamma_edge_cases() {
1418 assert!(fit_gamma(&[1.0]).is_none()); assert!(fit_gamma(&[1.0, -1.0]).is_none()); assert!(fit_gamma(&[0.0, 1.0]).is_none()); assert!(fit_gamma(&[1.0, f64::NAN]).is_none());
1422 assert!(fit_gamma(&[5.0, 5.0, 5.0]).is_none()); }
1424
1425 #[test]
1426 fn fit_gamma_mean_rate_relationship() {
1427 let data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
1428 let r = fit_gamma(&data).expect("should compute");
1429 let alpha = r.parameters[0].1;
1430 let beta = r.parameters[1].1;
1431 let mean: f64 = data.iter().sum::<f64>() / data.len() as f64;
1432 assert!(
1434 (alpha / beta - mean).abs() < 1e-6,
1435 "α/β = {}, mean = {}",
1436 alpha / beta,
1437 mean
1438 );
1439 }
1440
1441 #[test]
1442 fn fit_best_sorted_by_aic() {
1443 let data = [2.1, 3.5, 1.8, 4.2, 2.9, 3.1, 2.5, 3.8, 1.5, 2.7];
1444 let fits = fit_best(&data);
1445 assert!(!fits.is_empty());
1446 assert!(fits.len() >= 2, "got {} fits", fits.len());
1448 for w in fits.windows(2) {
1450 assert!(
1451 w[0].aic <= w[1].aic,
1452 "AIC not sorted: {} > {}",
1453 w[0].aic,
1454 w[1].aic
1455 );
1456 }
1457 }
1458
1459 #[test]
1460 fn fit_best_mixed_sign_data() {
1461 let data = [-2.0, -1.0, 0.5, 1.0, 2.0, 3.0, 1.5, -0.5];
1463 let fits = fit_best(&data);
1464 assert_eq!(fits.len(), 1, "only Normal should fit");
1465 assert_eq!(fits[0].distribution, "Normal");
1466 }
1467
1468 #[test]
1469 fn digamma_known_values() {
1470 assert!((digamma(1.0) + 0.5772156649).abs() < 1e-8);
1472 assert!((digamma(2.0) - 0.4227843351).abs() < 1e-8);
1474 assert!((digamma(0.5) + 1.9635100260).abs() < 1e-7);
1476 }
1477
1478 #[test]
1479 fn trigamma_known_values() {
1480 assert!((trigamma(1.0) - std::f64::consts::PI.powi(2) / 6.0).abs() < 1e-7);
1482 assert!((trigamma(2.0) - (std::f64::consts::PI.powi(2) / 6.0 - 1.0)).abs() < 1e-7);
1484 }
1485
1486 #[test]
1491 fn fit_lognormal_basic() {
1492 let data = [1.2, 2.5, 1.8, 3.1, 0.9, 2.0, 1.5, 4.0];
1493 let r = fit_lognormal(&data).expect("should compute");
1494 assert_eq!(r.distribution, "LogNormal");
1495 assert_eq!(r.n_params, 2);
1496 assert!(r.parameters[1].1 > 0.0); assert!(r.log_likelihood.is_finite());
1498 }
1499
1500 #[test]
1501 fn fit_lognormal_params_match_log_transform() {
1502 let data = [1.0, 2.0, 3.0, 4.0, 5.0];
1503 let r = fit_lognormal(&data).expect("should compute");
1504 let mu = r.parameters[0].1;
1505 let sigma = r.parameters[1].1;
1506 let log_data: Vec<f64> = data.iter().map(|&x| x.ln()).collect();
1508 let expected_mu: f64 = log_data.iter().sum::<f64>() / 5.0;
1509 assert!(
1510 (mu - expected_mu).abs() < 1e-10,
1511 "μ = {}, expected {}",
1512 mu,
1513 expected_mu
1514 );
1515 assert!(sigma > 0.0);
1516 }
1517
1518 #[test]
1519 fn fit_lognormal_edge_cases() {
1520 assert!(fit_lognormal(&[1.0]).is_none()); assert!(fit_lognormal(&[1.0, -1.0]).is_none()); assert!(fit_lognormal(&[0.0, 1.0]).is_none()); }
1524
1525 #[test]
1530 fn fit_poisson_basic() {
1531 let data = [2.0, 3.0, 1.0, 4.0, 2.0, 3.0, 1.0, 2.0];
1532 let r = fit_poisson(&data).expect("should compute");
1533 assert_eq!(r.distribution, "Poisson");
1534 assert_eq!(r.n_params, 1);
1535 let mean: f64 = data.iter().sum::<f64>() / data.len() as f64;
1536 assert!(
1537 (r.parameters[0].1 - mean).abs() < 1e-10,
1538 "λ̂ = {}, expected {}",
1539 r.parameters[0].1,
1540 mean
1541 );
1542 }
1543
1544 #[test]
1545 fn fit_poisson_edge_cases() {
1546 assert!(fit_poisson(&[1.0]).is_none()); assert!(fit_poisson(&[1.0, -1.0]).is_none()); assert!(fit_poisson(&[0.0, 0.0]).is_none()); }
1550
1551 #[test]
1556 fn fit_beta_basic() {
1557 let data = [0.2, 0.35, 0.5, 0.15, 0.45, 0.3, 0.6, 0.25, 0.4, 0.55];
1558 let r = fit_beta(&data).expect("should compute");
1559 assert_eq!(r.distribution, "Beta");
1560 assert_eq!(r.n_params, 2);
1561 assert!(r.parameters[0].1 > 0.0, "α̂ > 0");
1562 assert!(r.parameters[1].1 > 0.0, "β̂ > 0");
1563 assert!(r.log_likelihood.is_finite());
1564 }
1565
1566 #[test]
1567 fn fit_beta_symmetric() {
1568 let data = [0.3, 0.4, 0.5, 0.6, 0.7, 0.35, 0.45, 0.55, 0.65, 0.5];
1570 let r = fit_beta(&data).expect("should compute");
1571 let alpha = r.parameters[0].1;
1572 let beta = r.parameters[1].1;
1573 assert!(
1575 (alpha - beta).abs() / alpha < 0.3,
1576 "α = {alpha}, β = {beta}, should be roughly equal"
1577 );
1578 }
1579
1580 #[test]
1581 fn fit_beta_edge_cases() {
1582 assert!(fit_beta(&[0.5]).is_none()); assert!(fit_beta(&[0.5, 1.0]).is_none()); assert!(fit_beta(&[0.0, 0.5]).is_none()); assert!(fit_beta(&[0.5, 1.5]).is_none()); assert!(fit_beta(&[0.5, -0.5]).is_none()); }
1588
1589 #[test]
1594 fn fit_best_includes_lognormal_and_poisson() {
1595 let data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
1596 let fits = fit_best(&data);
1597 let names: Vec<&str> = fits.iter().map(|f| f.distribution.as_str()).collect();
1598 assert!(names.contains(&"Normal"), "should include Normal");
1600 assert!(names.contains(&"Exponential"), "should include Exponential");
1601 assert!(names.contains(&"Gamma"), "should include Gamma");
1602 assert!(names.contains(&"LogNormal"), "should include LogNormal");
1603 assert!(names.contains(&"Poisson"), "should include Poisson");
1604 }
1605}
1606
1607#[cfg(test)]
1608mod proptests {
1609 use super::*;
1610 use proptest::prelude::*;
1611
1612 proptest! {
1613 #[test]
1614 fn ecdf_last_is_one(
1615 data in proptest::collection::vec(-1e3_f64..1e3, 1..=50)
1616 ) {
1617 if let Some((_, probs)) = ecdf(&data) {
1618 let last = *probs.last().expect("non-empty");
1619 prop_assert!((last - 1.0).abs() < 1e-10, "last prob = {last}");
1620 }
1621 }
1622
1623 #[test]
1624 fn ecdf_monotonic(
1625 data in proptest::collection::vec(-1e3_f64..1e3, 2..=50)
1626 ) {
1627 if let Some((_, probs)) = ecdf(&data) {
1628 for w in probs.windows(2) {
1629 prop_assert!(w[1] >= w[0] - 1e-15, "{} < {}", w[1], w[0]);
1630 }
1631 }
1632 }
1633
1634 #[test]
1635 fn hist_counts_sum_to_n(
1636 data in proptest::collection::vec(-1e3_f64..1e3, 5..=100)
1637 ) {
1638 if let Some(r) = histogram_bins(&data, BinMethod::Sturges) {
1639 let total: usize = r.counts.iter().sum();
1640 prop_assert_eq!(total, data.len());
1641 }
1642 }
1643
1644 #[test]
1645 fn kde_density_non_negative(
1646 data in proptest::collection::vec(-100.0_f64..100.0, 5..=50)
1647 ) {
1648 if let Some(r) = kde(&data, BandwidthMethod::Silverman, 128) {
1649 for &d in &r.density {
1650 prop_assert!(d >= 0.0, "negative density: {d}");
1651 }
1652 }
1653 }
1654
1655 #[test]
1656 fn kde_integral_approx_one(
1657 data in proptest::collection::vec(-100.0_f64..100.0, 10..=80)
1658 ) {
1659 if let Some(r) = kde(&data, BandwidthMethod::Silverman, 512) {
1660 let dx = r.x[1] - r.x[0];
1661 let integral: f64 = r.density.iter().sum::<f64>() * dx;
1662 prop_assert!(
1663 (integral - 1.0).abs() < 0.1,
1664 "integral = {integral}, expected ≈ 1.0"
1665 );
1666 }
1667 }
1668
1669 #[test]
1670 fn fit_normal_aic_finite(
1671 data in proptest::collection::vec(-100.0_f64..100.0, 5..=50)
1672 ) {
1673 if let Some(r) = fit_normal(&data) {
1674 prop_assert!(r.aic.is_finite(), "AIC = {}", r.aic);
1675 prop_assert!(r.bic.is_finite(), "BIC = {}", r.bic);
1676 prop_assert!(r.log_likelihood.is_finite(), "LL = {}", r.log_likelihood);
1677 }
1678 }
1679
1680 #[test]
1681 fn fit_gamma_alpha_positive(
1682 data in proptest::collection::vec(0.1_f64..100.0, 5..=50)
1683 ) {
1684 if let Some(r) = fit_gamma(&data) {
1685 let alpha = r.parameters[0].1;
1686 let beta = r.parameters[1].1;
1687 prop_assert!(alpha > 0.0, "α = {alpha}");
1688 prop_assert!(beta > 0.0, "β = {beta}");
1689 prop_assert!(r.aic.is_finite(), "AIC = {}", r.aic);
1690 }
1691 }
1692 }
1693}