1use crate::math;
4use serde::Serialize;
5
6#[derive(Debug, Clone, Serialize)]
7pub struct CramerVonMisesResult {
8 pub statistic: f64,
9 pub p_value: f64,
10 pub is_uniform: bool,
11 pub is_valid: bool,
12}
13
14pub fn cramer_von_mises(data: &[u8]) -> CramerVonMisesResult {
15 let n = data.len();
16 if n < 2 {
17 return CramerVonMisesResult {
18 statistic: 0.0,
19 p_value: 0.0,
20 is_uniform: false,
21 is_valid: false,
22 };
23 }
24
25 let mut sorted = data.to_vec();
26 sorted.sort_unstable();
27
28 let nf = n as f64;
29 let mut w2 = 0.0;
30 for (idx, &x) in sorted.iter().enumerate() {
31 let i = idx + 1;
32 let u_i = (x as f64 + 0.5) / 256.0;
33 let target = (2.0 * i as f64 - 1.0) / (2.0 * nf);
34 let d = u_i - target;
35 w2 += d * d;
36 }
37 w2 += 1.0 / (12.0 * nf);
38
39 let corrected = w2 * (1.0 + 0.5 / nf);
40 let pi = std::f64::consts::PI;
41 let raw_p = if corrected < 0.0275 {
42 let mut sum = 0.0;
43 for k in 0..=4 {
44 let kf = k as f64;
45 let odd = 2.0 * kf + 1.0;
46 let exponent = -(odd * odd) * pi * pi / (8.0 * corrected);
47 sum += exponent.exp();
48 }
49 1.0 - (2.0 * pi.sqrt() * sum / corrected.sqrt())
50 } else {
51 let mut sum = 0.0;
52 for k in 1..=4 {
53 let kf = k as f64;
54 let sign = if k % 2 == 1 { 1.0 } else { -1.0 };
55 sum += sign * (-2.0 * kf * kf * pi * pi * corrected).exp();
56 }
57 2.0 * sum
58 };
59
60 let p_value = raw_p.clamp(0.001, 0.999);
61 CramerVonMisesResult {
62 statistic: w2,
63 p_value,
64 is_uniform: p_value > 0.05,
65 is_valid: true,
66 }
67}
68
69#[derive(Debug, Clone, Serialize)]
70pub struct LjungBoxResult {
71 pub q_statistic: f64,
72 pub p_value: f64,
73 pub max_lag: usize,
74 pub has_serial_correlation: bool,
75 pub is_valid: bool,
76}
77
78pub fn ljung_box_default(data: &[u8]) -> LjungBoxResult {
79 ljung_box(data, 20)
80}
81
82pub fn ljung_box(data: &[u8], max_lag: usize) -> LjungBoxResult {
83 let n = data.len();
84 if max_lag == 0 || n <= max_lag {
85 return LjungBoxResult {
86 q_statistic: 0.0,
87 p_value: 0.0,
88 max_lag,
89 has_serial_correlation: false,
90 is_valid: false,
91 };
92 }
93
94 let arr: Vec<f64> = data.iter().map(|&b| b as f64).collect();
95 let mean = arr.iter().sum::<f64>() / n as f64;
96 let denom = arr.iter().map(|x| (x - mean).powi(2)).sum::<f64>();
97
98 let mut q_sum = 0.0;
99 for lag in 1..=max_lag {
100 let mut numer = 0.0;
101 for i in 0..(n - lag) {
102 numer += (arr[i] - mean) * (arr[i + lag] - mean);
103 }
104 let r_k = if denom <= 1e-12 { 0.0 } else { numer / denom };
105 q_sum += (r_k * r_k) / (n as f64 - lag as f64);
106 }
107
108 let q_statistic = n as f64 * (n as f64 + 2.0) * q_sum;
109 let p_value = math::chi_squared_survival(q_statistic, max_lag);
110
111 LjungBoxResult {
112 q_statistic,
113 p_value,
114 max_lag,
115 has_serial_correlation: p_value < 0.05,
116 is_valid: true,
117 }
118}
119
120#[derive(Debug, Clone, Serialize)]
121pub struct GapTestResult {
122 pub chi_squared: f64,
123 pub p_value: f64,
124 pub degrees_of_freedom: usize,
125 pub mean_gap: f64,
126 pub expected_gap: f64,
127 pub is_uniform_gaps: bool,
128 pub is_valid: bool,
129}
130
131pub fn gap_test_default(data: &[u8]) -> GapTestResult {
132 gap_test(data, 100, 155)
133}
134
135pub fn gap_test(data: &[u8], lower: u8, upper: u8) -> GapTestResult {
136 if data.is_empty() || lower > upper {
137 return GapTestResult {
138 chi_squared: 0.0,
139 p_value: 0.0,
140 degrees_of_freedom: 0,
141 mean_gap: 0.0,
142 expected_gap: 0.0,
143 is_uniform_gaps: false,
144 is_valid: false,
145 };
146 }
147
148 let p_range = (upper as f64 - lower as f64 + 1.0) / 256.0;
149 if p_range <= 0.0 {
150 return GapTestResult {
151 chi_squared: 0.0,
152 p_value: 0.0,
153 degrees_of_freedom: 0,
154 mean_gap: 0.0,
155 expected_gap: 0.0,
156 is_uniform_gaps: false,
157 is_valid: false,
158 };
159 }
160
161 let hits: Vec<usize> = data
162 .iter()
163 .enumerate()
164 .filter_map(|(idx, &b)| {
165 if (lower..=upper).contains(&b) {
166 Some(idx)
167 } else {
168 None
169 }
170 })
171 .collect();
172
173 if hits.len() < 2 {
174 return GapTestResult {
175 chi_squared: 0.0,
176 p_value: 0.0,
177 degrees_of_freedom: 0,
178 mean_gap: 0.0,
179 expected_gap: 1.0 / p_range,
180 is_uniform_gaps: false,
181 is_valid: false,
182 };
183 }
184
185 let gaps: Vec<usize> = hits.windows(2).map(|w| w[1] - w[0]).collect();
186 if gaps.len() < 10 {
187 let mean_gap = gaps.iter().sum::<usize>() as f64 / gaps.len() as f64;
188 return GapTestResult {
189 chi_squared: 0.0,
190 p_value: 0.0,
191 degrees_of_freedom: 0,
192 mean_gap,
193 expected_gap: 1.0 / p_range,
194 is_uniform_gaps: false,
195 is_valid: false,
196 };
197 }
198
199 let mut observed = [0usize; 9];
200 for &gap in &gaps {
201 let bin = match gap {
202 0 => 0,
203 1 => 1,
204 2 => 2,
205 3 => 3,
206 4 => 4,
207 5 => 5,
208 6..=10 => 6,
209 11..=20 => 7,
210 _ => 8,
211 };
212 observed[bin] += 1;
213 }
214
215 let n_gaps = gaps.len() as f64;
216 let q = 1.0 - p_range;
217 let probs = [
218 0.0,
219 p_range,
220 q * p_range,
221 q.powi(2) * p_range,
222 q.powi(3) * p_range,
223 q.powi(4) * p_range,
224 q.powi(5) - q.powi(10),
225 q.powi(10) - q.powi(20),
226 q.powi(20),
227 ];
228
229 let mut chi_squared = 0.0;
230 let mut contributing_bins = 0usize;
231 for i in 0..observed.len() {
232 let expected = n_gaps * probs[i];
233 if expected > 1e-12 {
234 let diff = observed[i] as f64 - expected;
235 chi_squared += diff * diff / expected;
236 contributing_bins += 1;
237 }
238 }
239
240 let degrees_of_freedom = contributing_bins.saturating_sub(1);
241 let p_value = math::chi_squared_survival(chi_squared, degrees_of_freedom);
242 let mean_gap = gaps.iter().sum::<usize>() as f64 / n_gaps;
243 let expected_gap = 1.0 / p_range;
244
245 GapTestResult {
246 chi_squared,
247 p_value,
248 degrees_of_freedom,
249 mean_gap,
250 expected_gap,
251 is_uniform_gaps: p_value > 0.05,
252 is_valid: true,
253 }
254}
255
256#[derive(Debug, Clone, Serialize)]
257pub struct AnovaResult {
258 pub f_statistic: f64,
259 pub p_value: f64,
260 pub df_between: usize,
261 pub df_within: usize,
262 pub group_means: Vec<f64>,
263 pub grand_mean: f64,
264 pub is_significant: bool,
265 pub is_valid: bool,
266}
267
268pub fn anova(groups: &[&[u8]]) -> AnovaResult {
269 let valid_groups: Vec<&[u8]> = groups.iter().copied().filter(|g| !g.is_empty()).collect();
270 if valid_groups.len() < 2 {
271 return AnovaResult {
272 f_statistic: 0.0,
273 p_value: 0.0,
274 df_between: 0,
275 df_within: 0,
276 group_means: Vec::new(),
277 grand_mean: 0.0,
278 is_significant: false,
279 is_valid: false,
280 };
281 }
282
283 let k = valid_groups.len();
284 let total_n: usize = valid_groups.iter().map(|g| g.len()).sum();
285 if total_n <= k {
286 return AnovaResult {
287 f_statistic: 0.0,
288 p_value: 0.0,
289 df_between: 0,
290 df_within: 0,
291 group_means: Vec::new(),
292 grand_mean: 0.0,
293 is_significant: false,
294 is_valid: false,
295 };
296 }
297
298 let group_means: Vec<f64> = valid_groups
299 .iter()
300 .map(|g| g.iter().map(|&x| x as f64).sum::<f64>() / g.len() as f64)
301 .collect();
302
303 let grand_mean = valid_groups
304 .iter()
305 .flat_map(|g| g.iter())
306 .map(|&x| x as f64)
307 .sum::<f64>()
308 / total_n as f64;
309
310 let ss_between = valid_groups
311 .iter()
312 .zip(group_means.iter())
313 .map(|(g, &mean)| g.len() as f64 * (mean - grand_mean).powi(2))
314 .sum::<f64>();
315
316 let ss_within = valid_groups
317 .iter()
318 .zip(group_means.iter())
319 .map(|(g, &mean)| {
320 g.iter()
321 .map(|&x| {
322 let d = x as f64 - mean;
323 d * d
324 })
325 .sum::<f64>()
326 })
327 .sum::<f64>();
328
329 let df_between = k - 1;
330 let df_within = total_n - k;
331 let ms_between = ss_between / df_between as f64;
332 let ms_within = ss_within / df_within as f64;
333
334 let (f_statistic, p_value) = if ms_within <= 1e-12 {
335 if ms_between <= 1e-12 {
336 (0.0, 1.0)
337 } else {
338 (f64::INFINITY, 0.0)
339 }
340 } else {
341 let f = ms_between / ms_within;
342 let p = math::f_distribution_survival(f, df_between, df_within);
343 (f, p)
344 };
345
346 AnovaResult {
347 f_statistic,
348 p_value,
349 df_between,
350 df_within,
351 group_means,
352 grand_mean,
353 is_significant: p_value < 0.05,
354 is_valid: true,
355 }
356}
357
358#[derive(Debug, Clone, Serialize)]
359pub struct KruskalWallisResult {
360 pub h_statistic: f64,
361 pub p_value: f64,
362 pub df: usize,
363 pub is_significant: bool,
364 pub is_valid: bool,
365}
366
367pub fn kruskal_wallis(groups: &[&[u8]]) -> KruskalWallisResult {
368 let valid_groups: Vec<&[u8]> = groups.iter().copied().filter(|g| !g.is_empty()).collect();
369 if valid_groups.len() < 2 {
370 return KruskalWallisResult {
371 h_statistic: 0.0,
372 p_value: 0.0,
373 df: 0,
374 is_significant: false,
375 is_valid: false,
376 };
377 }
378
379 let k = valid_groups.len();
380 let n_total: usize = valid_groups.iter().map(|g| g.len()).sum();
381 if n_total < 2 {
382 return KruskalWallisResult {
383 h_statistic: 0.0,
384 p_value: 0.0,
385 df: 0,
386 is_significant: false,
387 is_valid: false,
388 };
389 }
390
391 let mut values = Vec::with_capacity(n_total);
392 for (group_idx, group) in valid_groups.iter().enumerate() {
393 for &x in group.iter() {
394 values.push((x, group_idx));
395 }
396 }
397 values.sort_by_key(|&(x, _)| x);
398
399 let mut rank_sums = vec![0.0; k];
400 let mut tie_term_sum = 0.0;
401 let mut i = 0usize;
402 while i < values.len() {
403 let tie_value = values[i].0;
404 let start = i;
405 while i < values.len() && values[i].0 == tie_value {
406 i += 1;
407 }
408 let end = i;
409 let tie_count = end - start;
410 let rank_start = start as f64 + 1.0;
411 let rank_end = end as f64;
412 let avg_rank = (rank_start + rank_end) / 2.0;
413
414 for &(_, group_idx) in &values[start..end] {
415 rank_sums[group_idx] += avg_rank;
416 }
417
418 if tie_count > 1 {
419 let t = tie_count as f64;
420 tie_term_sum += t.powi(3) - t;
421 }
422 }
423
424 let n = n_total as f64;
425 let mut h = 0.0;
426 for (group, &rank_sum) in valid_groups.iter().zip(rank_sums.iter()) {
427 h += (rank_sum * rank_sum) / group.len() as f64;
428 }
429 h = (12.0 / (n * (n + 1.0))) * h - 3.0 * (n + 1.0);
430
431 let tie_den = n.powi(3) - n;
432 if tie_den > 0.0 {
433 let tie_correction = 1.0 - (tie_term_sum / tie_den);
434 if tie_correction > 1e-12 {
435 h /= tie_correction;
436 } else {
437 h = 0.0;
438 }
439 }
440
441 if h < 0.0 {
442 h = 0.0;
443 }
444
445 let df = k - 1;
446 let p_value = math::chi_squared_survival(h, df);
447 KruskalWallisResult {
448 h_statistic: h,
449 p_value,
450 df,
451 is_significant: p_value < 0.05,
452 is_valid: true,
453 }
454}
455
456#[derive(Debug, Clone, Serialize)]
457pub struct LeveneResult {
458 pub w_statistic: f64,
459 pub p_value: f64,
460 pub df_between: usize,
461 pub df_within: usize,
462 pub group_variances: Vec<f64>,
463 pub is_homogeneous: bool,
464 pub is_valid: bool,
465}
466
467fn anova_on_f64_groups(groups: &[Vec<f64>]) -> Option<(f64, f64, usize, usize)> {
468 if groups.len() < 2 || groups.iter().any(|g| g.is_empty()) {
469 return None;
470 }
471 let k = groups.len();
472 let total_n: usize = groups.iter().map(|g| g.len()).sum();
473 if total_n <= k {
474 return None;
475 }
476
477 let means: Vec<f64> = groups
478 .iter()
479 .map(|g| g.iter().sum::<f64>() / g.len() as f64)
480 .collect();
481 let grand_mean = groups.iter().flatten().sum::<f64>() / total_n as f64;
482
483 let ss_between = groups
484 .iter()
485 .zip(means.iter())
486 .map(|(g, &m)| g.len() as f64 * (m - grand_mean).powi(2))
487 .sum::<f64>();
488 let ss_within = groups
489 .iter()
490 .zip(means.iter())
491 .map(|(g, &m)| g.iter().map(|&x| (x - m).powi(2)).sum::<f64>())
492 .sum::<f64>();
493
494 let df_between = k - 1;
495 let df_within = total_n - k;
496 let ms_between = ss_between / df_between as f64;
497 let ms_within = ss_within / df_within as f64;
498
499 let (f_statistic, p_value) = if ms_within <= 1e-12 {
500 if ms_between <= 1e-12 {
501 (0.0, 1.0)
502 } else {
503 (f64::INFINITY, 0.0)
504 }
505 } else {
506 let f = ms_between / ms_within;
507 let p = math::f_distribution_survival(f, df_between, df_within);
508 (f, p)
509 };
510
511 Some((f_statistic, p_value, df_between, df_within))
512}
513
514pub fn levene_test(groups: &[&[u8]]) -> LeveneResult {
515 let valid_groups: Vec<&[u8]> = groups.iter().copied().filter(|g| !g.is_empty()).collect();
516 if valid_groups.len() < 2 || valid_groups.iter().any(|g| g.len() < 2) {
517 return LeveneResult {
518 w_statistic: 0.0,
519 p_value: 0.0,
520 df_between: 0,
521 df_within: 0,
522 group_variances: Vec::new(),
523 is_homogeneous: false,
524 is_valid: false,
525 };
526 }
527
528 let group_variances: Vec<f64> = valid_groups
529 .iter()
530 .map(|g| {
531 let mean = g.iter().map(|&x| x as f64).sum::<f64>() / g.len() as f64;
532 let ss = g
533 .iter()
534 .map(|&x| {
535 let d = x as f64 - mean;
536 d * d
537 })
538 .sum::<f64>();
539 ss / (g.len() as f64 - 1.0)
540 })
541 .collect();
542
543 let z_groups: Vec<Vec<f64>> = valid_groups
544 .iter()
545 .map(|g| {
546 let mean = g.iter().map(|&x| x as f64).sum::<f64>() / g.len() as f64;
547 g.iter().map(|&x| (x as f64 - mean).abs()).collect()
548 })
549 .collect();
550
551 let Some((w_statistic, p_value, df_between, df_within)) = anova_on_f64_groups(&z_groups) else {
552 return LeveneResult {
553 w_statistic: 0.0,
554 p_value: 0.0,
555 df_between: 0,
556 df_within: 0,
557 group_variances,
558 is_homogeneous: false,
559 is_valid: false,
560 };
561 };
562
563 LeveneResult {
564 w_statistic,
565 p_value,
566 df_between,
567 df_within,
568 group_variances,
569 is_homogeneous: p_value > 0.05,
570 is_valid: true,
571 }
572}
573
574#[derive(Debug, Clone, Serialize)]
575pub struct PowerResult {
576 pub power: f64,
577 pub effect_size: f64,
578 pub sample_size: usize,
579 pub alpha: f64,
580 pub required_n_for_80: usize,
581 pub required_n_for_90: usize,
582 pub is_valid: bool,
583}
584
585fn inverse_t_cdf(p: f64, df: usize) -> f64 {
586 if p <= 0.0 {
587 return f64::NEG_INFINITY;
588 }
589 if p >= 1.0 {
590 return f64::INFINITY;
591 }
592
593 let mut lo = -20.0;
594 let mut hi = 20.0;
595 for _ in 0..100 {
596 let mid = (lo + hi) / 2.0;
597 let cdf = math::t_distribution_cdf(mid, df);
598 if cdf < p {
599 lo = mid;
600 } else {
601 hi = mid;
602 }
603 }
604 (lo + hi) / 2.0
605}
606
607fn power_for_two_sample_t(effect_size: f64, sample_size: usize, alpha: f64) -> f64 {
608 let df = 2 * (sample_size - 1);
609 let ncp = effect_size * (sample_size as f64 / 2.0).sqrt();
610 let t_critical = inverse_t_cdf(1.0 - alpha / 2.0, df);
611 let left = math::t_distribution_cdf(t_critical - ncp, df);
612 let right = math::t_distribution_cdf(-t_critical - ncp, df);
613 (1.0 - left + right).clamp(0.0, 1.0)
614}
615
616fn required_n_for_power(effect_size: f64, alpha: f64, target: f64) -> usize {
617 for n in 2..=10_000 {
618 if power_for_two_sample_t(effect_size, n, alpha) >= target {
619 return n;
620 }
621 }
622 10_000
623}
624
625pub fn power_analysis(effect_size: f64, sample_size: usize, alpha: f64) -> PowerResult {
626 if effect_size <= 0.0 || sample_size < 2 || alpha <= 0.0 || alpha >= 1.0 {
627 return PowerResult {
628 power: 0.0,
629 effect_size,
630 sample_size,
631 alpha,
632 required_n_for_80: 0,
633 required_n_for_90: 0,
634 is_valid: false,
635 };
636 }
637
638 let power = power_for_two_sample_t(effect_size, sample_size, alpha);
639 let required_n_for_80 = required_n_for_power(effect_size, alpha, 0.80);
640 let required_n_for_90 = required_n_for_power(effect_size, alpha, 0.90);
641
642 PowerResult {
643 power,
644 effect_size,
645 sample_size,
646 alpha,
647 required_n_for_80,
648 required_n_for_90,
649 is_valid: true,
650 }
651}
652
653pub fn power_analysis_default(sample_size: usize) -> PowerResult {
654 power_analysis(0.2, sample_size, 0.05)
655}
656
657#[derive(Debug, Clone, Serialize)]
658pub struct MultipleCorrectionResult {
659 pub adjusted_p_values: Vec<f64>,
660 pub rejected: Vec<bool>,
661 pub method: String,
662 pub alpha: f64,
663 pub n_tests: usize,
664 pub n_rejected: usize,
665 pub is_valid: bool,
666}
667
668pub fn bonferroni_correction(p_values: &[f64], alpha: f64) -> MultipleCorrectionResult {
669 if p_values.is_empty()
670 || alpha <= 0.0
671 || alpha >= 1.0
672 || p_values
673 .iter()
674 .any(|&p| !p.is_finite() || !(0.0..=1.0).contains(&p))
675 {
676 return MultipleCorrectionResult {
677 adjusted_p_values: Vec::new(),
678 rejected: Vec::new(),
679 method: "bonferroni".to_string(),
680 alpha,
681 n_tests: p_values.len(),
682 n_rejected: 0,
683 is_valid: false,
684 };
685 }
686
687 let m = p_values.len() as f64;
688 let adjusted_p_values: Vec<f64> = p_values.iter().map(|&p| (p * m).min(1.0)).collect();
689 let rejected: Vec<bool> = adjusted_p_values.iter().map(|&p| p < alpha).collect();
690 let n_rejected = rejected.iter().filter(|&&r| r).count();
691
692 MultipleCorrectionResult {
693 adjusted_p_values,
694 rejected,
695 method: "bonferroni".to_string(),
696 alpha,
697 n_tests: p_values.len(),
698 n_rejected,
699 is_valid: true,
700 }
701}
702
703pub fn holm_bonferroni_correction(p_values: &[f64], alpha: f64) -> MultipleCorrectionResult {
704 if p_values.is_empty()
705 || alpha <= 0.0
706 || alpha >= 1.0
707 || p_values
708 .iter()
709 .any(|&p| !p.is_finite() || !(0.0..=1.0).contains(&p))
710 {
711 return MultipleCorrectionResult {
712 adjusted_p_values: Vec::new(),
713 rejected: Vec::new(),
714 method: "holm-bonferroni".to_string(),
715 alpha,
716 n_tests: p_values.len(),
717 n_rejected: 0,
718 is_valid: false,
719 };
720 }
721
722 let m = p_values.len();
723 let mut indexed: Vec<(usize, f64)> = p_values.iter().copied().enumerate().collect();
724 indexed.sort_by(|a, b| a.1.total_cmp(&b.1));
725
726 let mut adjusted_sorted = vec![0.0; m];
727 let mut rejected_sorted = vec![false; m];
728 let mut all_previous_rejected = true;
729
730 for i in 0..m {
731 let factor = (m - i) as f64;
732 let raw_adjusted = (indexed[i].1 * factor).min(1.0);
733 if i == 0 {
734 adjusted_sorted[i] = raw_adjusted;
735 } else {
736 adjusted_sorted[i] = adjusted_sorted[i - 1].max(raw_adjusted);
737 }
738
739 let rejected = adjusted_sorted[i] < alpha && all_previous_rejected;
740 rejected_sorted[i] = rejected;
741 all_previous_rejected &= rejected;
742 }
743
744 let mut adjusted_p_values = vec![0.0; m];
745 let mut rejected = vec![false; m];
746 for i in 0..m {
747 let original_idx = indexed[i].0;
748 adjusted_p_values[original_idx] = adjusted_sorted[i];
749 rejected[original_idx] = rejected_sorted[i];
750 }
751
752 let n_rejected = rejected.iter().filter(|&&r| r).count();
753 MultipleCorrectionResult {
754 adjusted_p_values,
755 rejected,
756 method: "holm-bonferroni".to_string(),
757 alpha,
758 n_tests: m,
759 n_rejected,
760 is_valid: true,
761 }
762}
763
764#[derive(Debug, Clone, Serialize)]
765pub struct StatisticsAnalysis {
766 pub cramer_von_mises: CramerVonMisesResult,
767 pub ljung_box: LjungBoxResult,
768 pub gap_test: GapTestResult,
769}
770
771pub fn statistics_analysis(data: &[u8]) -> StatisticsAnalysis {
772 StatisticsAnalysis {
773 cramer_von_mises: cramer_von_mises(data),
774 ljung_box: ljung_box_default(data),
775 gap_test: gap_test_default(data),
776 }
777}
778
779#[cfg(test)]
780mod tests {
781 use super::*;
782
783 fn random_data_seeded(len: usize, seed: u64) -> Vec<u8> {
784 let mut state = seed;
785 let mut data = Vec::with_capacity(len);
786 for _ in 0..len {
787 state = state
788 .wrapping_mul(6364136223846793005)
789 .wrapping_add(1442695040888963407);
790 data.push((state >> 33) as u8);
791 }
792 data
793 }
794
795 #[test]
796 fn statistics_cvm_random_uniform() {
797 let data = random_data_seeded(5000, 0xdeadbeef);
798 let result = cramer_von_mises(&data);
799 assert!(result.is_uniform);
800 assert!(result.p_value > 0.05);
801 }
802
803 #[test]
804 fn statistics_cvm_constant_non_uniform() {
805 let data = vec![42u8; 1000];
806 let result = cramer_von_mises(&data);
807 assert!(!result.is_uniform);
808 }
809
810 #[test]
811 fn statistics_cvm_empty_invalid() {
812 let result = cramer_von_mises(&[]);
813 assert!(!result.is_valid);
814 }
815
816 #[test]
817 fn statistics_cvm_all_zero_non_uniform() {
818 let data = vec![0u8; 1000];
819 let result = cramer_von_mises(&data);
820 assert!(!result.is_uniform);
821 }
822
823 #[test]
824 fn statistics_ljung_box_random_no_serial_correlation() {
825 let data = random_data_seeded(5000, 0xdeadbeef);
826 let result = ljung_box_default(&data);
827 assert!(!result.has_serial_correlation);
828 }
829
830 #[test]
831 fn statistics_ljung_box_alternating_has_serial_correlation() {
832 let data: Vec<u8> = (0..5000)
833 .map(|i| if i % 2 == 0 { 0u8 } else { 255u8 })
834 .collect();
835 let result = ljung_box_default(&data);
836 assert!(result.has_serial_correlation);
837 }
838
839 #[test]
840 fn statistics_ljung_box_too_short_invalid() {
841 let result = ljung_box(&[1u8, 2u8], 20);
842 assert!(!result.is_valid);
843 }
844
845 #[test]
846 fn statistics_ljung_box_empty_invalid() {
847 let result = ljung_box_default(&[]);
848 assert!(!result.is_valid);
849 }
850
851 #[test]
852 fn statistics_gap_random_uniform_gaps() {
853 let data = random_data_seeded(10000, 0xdeadbeef);
854 let result = gap_test_default(&data);
855 assert!(result.is_uniform_gaps);
856 }
857
858 #[test]
859 fn statistics_gap_no_values_in_range_invalid() {
860 let data = vec![0u8; 5000];
861 let result = gap_test(&data, 200, 255);
862 assert!(!result.is_valid);
863 }
864
865 #[test]
866 fn statistics_gap_empty_invalid() {
867 let result = gap_test_default(&[]);
868 assert!(!result.is_valid);
869 }
870
871 #[test]
872 fn statistics_gap_mean_close_to_expected_for_random() {
873 let data = random_data_seeded(5000, 0xdeadbeef);
874 let result = gap_test_default(&data);
875 assert!(result.is_valid);
876 assert!((result.mean_gap - result.expected_gap).abs() < 1.5);
877 }
878
879 fn shifted_data_seeded(len: usize, seed: u64, shift: u8) -> Vec<u8> {
880 random_data_seeded(len, seed)
881 .into_iter()
882 .map(|v| v.wrapping_add(shift))
883 .collect()
884 }
885
886 #[test]
887 fn statistics_anova_random_groups_not_significant() {
888 let g1 = random_data_seeded(2000, 0x1001);
889 let g2 = random_data_seeded(2000, 0x1002);
890 let g3 = random_data_seeded(2000, 0x1003);
891 let groups: [&[u8]; 3] = [&g1, &g2, &g3];
892 let result = anova(&groups);
893 assert!(result.is_valid);
894 assert!(!result.is_significant);
895 }
896
897 #[test]
898 fn statistics_anova_biased_group_significant() {
899 let g1 = random_data_seeded(2000, 0x2001);
900 let g2 = random_data_seeded(2000, 0x2002);
901 let g3 = vec![200u8; 2000];
902 let groups: [&[u8]; 3] = [&g1, &g2, &g3];
903 let result = anova(&groups);
904 assert!(result.is_valid);
905 assert!(result.is_significant);
906 }
907
908 #[test]
909 fn statistics_anova_too_few_groups_invalid() {
910 let g1 = random_data_seeded(2000, 0x3001);
911 let groups: [&[u8]; 1] = [&g1];
912 let result = anova(&groups);
913 assert!(!result.is_valid);
914 }
915
916 #[test]
917 fn statistics_anova_identical_groups_zero_f() {
918 let g1 = vec![128u8; 2000];
919 let g2 = vec![128u8; 2000];
920 let g3 = vec![128u8; 2000];
921 let groups: [&[u8]; 3] = [&g1, &g2, &g3];
922 let result = anova(&groups);
923 assert!(result.is_valid);
924 assert!(result.f_statistic.abs() < 1e-12);
925 assert!((result.p_value - 1.0).abs() < 1e-12);
926 }
927
928 #[test]
929 fn statistics_kruskal_wallis_same_distribution_not_significant() {
930 let g1 = random_data_seeded(2000, 0x4001);
931 let g2 = random_data_seeded(2000, 0x4002);
932 let g3 = random_data_seeded(2000, 0x4003);
933 let groups: [&[u8]; 3] = [&g1, &g2, &g3];
934 let result = kruskal_wallis(&groups);
935 assert!(result.is_valid);
936 assert!(!result.is_significant);
937 }
938
939 #[test]
940 fn statistics_kruskal_wallis_biased_group_significant() {
941 let g1 = random_data_seeded(2000, 0x5001);
942 let g2 = random_data_seeded(2000, 0x5002);
943 let g3 = vec![200u8; 2000];
944 let groups: [&[u8]; 3] = [&g1, &g2, &g3];
945 let result = kruskal_wallis(&groups);
946 assert!(result.is_valid);
947 assert!(result.is_significant);
948 }
949
950 #[test]
951 fn statistics_kruskal_wallis_too_few_groups_invalid() {
952 let g1 = random_data_seeded(2000, 0x6001);
953 let groups: [&[u8]; 1] = [&g1];
954 let result = kruskal_wallis(&groups);
955 assert!(!result.is_valid);
956 }
957
958 #[test]
959 fn statistics_kruskal_wallis_identical_groups_not_significant() {
960 let g1 = vec![77u8; 1000];
961 let g2 = vec![77u8; 1000];
962 let g3 = vec![77u8; 1000];
963 let groups: [&[u8]; 3] = [&g1, &g2, &g3];
964 let result = kruskal_wallis(&groups);
965 assert!(result.is_valid);
966 assert!(!result.is_significant);
967 }
968
969 #[test]
970 fn statistics_levene_same_variance_homogeneous() {
971 let g1 = random_data_seeded(2000, 0x7001);
972 let g2 = random_data_seeded(2000, 0x7002);
973 let g3 = random_data_seeded(2000, 0x7003);
974 let groups: [&[u8]; 3] = [&g1, &g2, &g3];
975 let result = levene_test(&groups);
976 assert!(result.is_valid);
977 assert!(result.is_homogeneous);
978 }
979
980 #[test]
981 fn statistics_levene_high_variance_group_not_homogeneous() {
982 let g1 = random_data_seeded(2000, 0x8001);
983 let g2 = random_data_seeded(2000, 0x8002);
984 let g3: Vec<u8> = (0..2000)
985 .map(|i| if i % 2 == 0 { 0u8 } else { 255u8 })
986 .collect();
987 let groups: [&[u8]; 3] = [&g1, &g2, &g3];
988 let result = levene_test(&groups);
989 assert!(result.is_valid);
990 assert!(!result.is_homogeneous);
991 }
992
993 #[test]
994 fn statistics_levene_too_few_groups_invalid() {
995 let g1 = random_data_seeded(2000, 0x9001);
996 let groups: [&[u8]; 1] = [&g1];
997 let result = levene_test(&groups);
998 assert!(!result.is_valid);
999 }
1000
1001 #[test]
1002 fn statistics_levene_single_element_group_invalid() {
1003 let g1 = vec![1u8];
1004 let g2 = vec![2u8, 3u8];
1005 let groups: [&[u8]; 2] = [&g1, &g2];
1006 let result = levene_test(&groups);
1007 assert!(!result.is_valid);
1008 }
1009
1010 #[test]
1011 fn statistics_power_large_n_medium_effect_high_power() {
1012 let result = power_analysis(0.5, 10_000, 0.05);
1013 assert!(result.is_valid);
1014 assert!(result.power > 0.99);
1015 }
1016
1017 #[test]
1018 fn statistics_power_small_n_small_effect_low_power() {
1019 let result = power_analysis(0.2, 10, 0.05);
1020 assert!(result.is_valid);
1021 assert!(result.power < 0.2);
1022 }
1023
1024 #[test]
1025 fn statistics_power_required_n_ordering() {
1026 let result = power_analysis_default(50);
1027 assert!(result.is_valid);
1028 assert!(result.required_n_for_90 >= result.required_n_for_80);
1029 }
1030
1031 #[test]
1032 fn statistics_power_invalid_inputs() {
1033 assert!(!power_analysis(0.0, 10, 0.05).is_valid);
1034 assert!(!power_analysis(0.2, 1, 0.05).is_valid);
1035 }
1036
1037 #[test]
1038 fn statistics_bonferroni_expected_rejection_pattern() {
1039 let result = bonferroni_correction(&[0.01, 0.02, 0.03], 0.05);
1040 assert!(result.is_valid);
1041 assert_eq!(result.rejected, vec![true, false, false]);
1042 }
1043
1044 #[test]
1045 fn statistics_holm_rejects_at_least_bonferroni() {
1046 let p = [0.01, 0.02, 0.03];
1047 let bonf = bonferroni_correction(&p, 0.05);
1048 let holm = holm_bonferroni_correction(&p, 0.05);
1049 assert!(holm.is_valid);
1050 assert!(holm.n_rejected >= bonf.n_rejected);
1051 }
1052
1053 #[test]
1054 fn statistics_multiple_correction_empty_invalid() {
1055 assert!(!bonferroni_correction(&[], 0.05).is_valid);
1056 assert!(!holm_bonferroni_correction(&[], 0.05).is_valid);
1057 }
1058
1059 #[test]
1060 fn statistics_holm_monotonic_adjusted_p_values_sorted() {
1061 let result = holm_bonferroni_correction(&[0.03, 0.01, 0.02], 0.05);
1062 assert!(result.is_valid);
1063 let mut sorted = result.adjusted_p_values.clone();
1064 sorted.sort_by(|a, b| a.total_cmp(b));
1065 assert_eq!(result.adjusted_p_values.len(), 3);
1066 assert_eq!(sorted.len(), 3);
1067 }
1068
1069 #[test]
1070 fn test_statistics_analysis_serializes() {
1071 let data = random_data_seeded(5000, 0xdeadbeef);
1072 let result = statistics_analysis(&data);
1073 let json = serde_json::to_string(&result).expect("serialization failed");
1074 assert!(json.contains("cramer_von_mises"));
1075 assert!(json.contains("ljung_box"));
1076 }
1077}