1use cjc_repro::KahanAccumulatorF64;
9use crate::distributions::{t_cdf, chi2_cdf, f_cdf, normal_cdf};
10use crate::stats;
11
12#[derive(Debug, Clone)]
18pub struct TTestResult {
19 pub t_statistic: f64,
20 pub p_value: f64, pub df: f64, pub mean: f64,
23 pub se: f64,
24}
25
26pub fn t_test(data: &[f64], mu: f64) -> Result<TTestResult, String> {
28 if data.len() < 2 {
29 return Err("t_test: need at least 2 observations".into());
30 }
31 let n = data.len() as f64;
32 let mean = {
33 let mut acc = KahanAccumulatorF64::new();
34 for &x in data { acc.add(x); }
35 acc.finalize() / n
36 };
37 let s = stats::sample_sd(data)?;
38 let se = s / n.sqrt();
39 let t = (mean - mu) / se;
40 let df = n - 1.0;
41 let p = 2.0 * (1.0 - t_cdf(t.abs(), df));
43 Ok(TTestResult { t_statistic: t, p_value: p, df, mean, se })
44}
45
46pub fn t_test_two_sample(x: &[f64], y: &[f64]) -> Result<TTestResult, String> {
48 if x.len() < 2 || y.len() < 2 {
49 return Err("t_test_two_sample: need at least 2 observations in each group".into());
50 }
51 let nx = x.len() as f64;
52 let ny = y.len() as f64;
53 let mean_x = {
54 let mut acc = KahanAccumulatorF64::new();
55 for &v in x { acc.add(v); }
56 acc.finalize() / nx
57 };
58 let mean_y = {
59 let mut acc = KahanAccumulatorF64::new();
60 for &v in y { acc.add(v); }
61 acc.finalize() / ny
62 };
63 let var_x = stats::sample_variance(x)?;
64 let var_y = stats::sample_variance(y)?;
65 let se = (var_x / nx + var_y / ny).sqrt();
66 let t = (mean_x - mean_y) / se;
67 let num = (var_x / nx + var_y / ny).powi(2);
69 let denom = (var_x / nx).powi(2) / (nx - 1.0) + (var_y / ny).powi(2) / (ny - 1.0);
70 let df = num / denom;
71 let p = 2.0 * (1.0 - t_cdf(t.abs(), df));
72 Ok(TTestResult { t_statistic: t, p_value: p, df, mean: mean_x - mean_y, se })
73}
74
75pub fn t_test_paired(x: &[f64], y: &[f64]) -> Result<TTestResult, String> {
77 if x.len() != y.len() {
78 return Err("t_test_paired: arrays must have same length".into());
79 }
80 let diffs: Vec<f64> = x.iter().zip(y.iter()).map(|(a, b)| a - b).collect();
81 t_test(&diffs, 0.0)
82}
83
84#[derive(Debug, Clone)]
90pub struct ChiSquaredResult {
91 pub chi2: f64,
92 pub p_value: f64,
93 pub df: f64,
94}
95
96pub fn chi_squared_test(observed: &[f64], expected: &[f64]) -> Result<ChiSquaredResult, String> {
98 if observed.len() != expected.len() {
99 return Err("chi_squared_test: observed and expected must have same length".into());
100 }
101 if observed.is_empty() {
102 return Err("chi_squared_test: empty data".into());
103 }
104 let mut acc = KahanAccumulatorF64::new();
105 for i in 0..observed.len() {
106 if expected[i] <= 0.0 {
107 return Err(format!("chi_squared_test: expected[{i}] must be > 0"));
108 }
109 let diff = observed[i] - expected[i];
110 acc.add(diff * diff / expected[i]);
111 }
112 let chi2 = acc.finalize();
113 let df = (observed.len() - 1) as f64;
114 let p = 1.0 - chi2_cdf(chi2, df);
115 Ok(ChiSquaredResult { chi2, p_value: p, df })
116}
117
118#[derive(Debug, Clone)]
124pub struct AnovaResult {
125 pub f_statistic: f64,
126 pub p_value: f64,
127 pub df_between: f64,
128 pub df_within: f64,
129 pub ss_between: f64,
130 pub ss_within: f64,
131}
132
133pub fn anova_oneway(groups: &[&[f64]]) -> Result<AnovaResult, String> {
135 if groups.len() < 2 {
136 return Err("anova_oneway: need at least 2 groups".into());
137 }
138 let k = groups.len();
139 let n_total: usize = groups.iter().map(|g| g.len()).sum();
140
141 let mut grand_acc = KahanAccumulatorF64::new();
143 for &g in groups {
144 for &x in g {
145 grand_acc.add(x);
146 }
147 }
148 let grand_mean = grand_acc.finalize() / n_total as f64;
149
150 let mut ss_between_acc = KahanAccumulatorF64::new();
152 let mut ss_within_acc = KahanAccumulatorF64::new();
153 for &g in groups {
154 let ni = g.len() as f64;
155 let mut group_acc = KahanAccumulatorF64::new();
156 for &x in g { group_acc.add(x); }
157 let group_mean = group_acc.finalize() / ni;
158 let diff = group_mean - grand_mean;
159 ss_between_acc.add(ni * diff * diff);
160 for &x in g {
161 let d = x - group_mean;
162 ss_within_acc.add(d * d);
163 }
164 }
165 let ss_between = ss_between_acc.finalize();
166 let ss_within = ss_within_acc.finalize();
167 let df_between = (k - 1) as f64;
168 let df_within = (n_total - k) as f64;
169
170 if df_within <= 0.0 || ss_within == 0.0 {
171 return Err("anova_oneway: insufficient data".into());
172 }
173
174 let ms_between = ss_between / df_between;
175 let ms_within = ss_within / df_within;
176 let f_stat = ms_between / ms_within;
177 let p = 1.0 - f_cdf(f_stat, df_between, df_within);
178
179 Ok(AnovaResult {
180 f_statistic: f_stat,
181 p_value: p,
182 df_between,
183 df_within,
184 ss_between,
185 ss_within,
186 })
187}
188
189pub fn f_test(x: &[f64], y: &[f64]) -> Result<(f64, f64), String> {
191 let var_x = stats::sample_variance(x)?;
192 let var_y = stats::sample_variance(y)?;
193 let f = var_x / var_y;
194 let df1 = (x.len() - 1) as f64;
195 let df2 = (y.len() - 1) as f64;
196 let p = if f > 1.0 {
197 2.0 * (1.0 - f_cdf(f, df1, df2))
198 } else {
199 2.0 * f_cdf(f, df1, df2)
200 };
201 Ok((f, p))
202}
203
204#[derive(Debug, Clone)]
210pub struct LmResult {
211 pub coefficients: Vec<f64>, pub std_errors: Vec<f64>,
213 pub t_values: Vec<f64>,
214 pub p_values: Vec<f64>,
215 pub r_squared: f64,
216 pub adj_r_squared: f64,
217 pub residuals: Vec<f64>,
218 pub f_statistic: f64,
219 pub f_p_value: f64,
220}
221
222pub fn lm(x_flat: &[f64], y: &[f64], n: usize, p: usize) -> Result<LmResult, String> {
227 if x_flat.len() != n * p {
228 return Err(format!("lm: x_matrix size {} != n*p = {}", x_flat.len(), n * p));
229 }
230 if y.len() != n {
231 return Err(format!("lm: y length {} != n = {n}", y.len()));
232 }
233 if n <= p + 1 {
234 return Err("lm: need n > p+1 for regression with intercept".into());
235 }
236
237 let pp = p + 1; let mut x_aug = vec![0.0; n * pp];
240 for i in 0..n {
241 x_aug[i * pp] = 1.0; for j in 0..p {
243 x_aug[i * pp + (j + 1)] = x_flat[i * p + j];
244 }
245 }
246
247 let m = n;
249 let mut q_t_y = y.to_vec(); let mut r = x_aug.clone();
251
252 for j in 0..pp {
254 let mut norm_sq = 0.0;
256 for i in j..m {
257 norm_sq += r[i * pp + j] * r[i * pp + j];
258 }
259 let norm = norm_sq.sqrt();
260 if norm < 1e-15 {
261 return Err("lm: rank-deficient design matrix".into());
262 }
263 let sign = if r[j * pp + j] >= 0.0 { 1.0 } else { -1.0 };
264 let u0 = r[j * pp + j] + sign * norm;
265 let mut v = vec![0.0; m - j];
267 v[0] = 1.0;
268 for i in 1..(m - j) {
269 v[i] = r[(j + i) * pp + j] / u0;
270 }
271 let tau = 2.0 / {
272 let mut acc = KahanAccumulatorF64::new();
273 for &vi in &v { acc.add(vi * vi); }
274 acc.finalize()
275 };
276
277 for col in j..pp {
279 let mut dot = 0.0;
280 for i in 0..v.len() {
281 dot += v[i] * r[(j + i) * pp + col];
282 }
283 for i in 0..v.len() {
284 r[(j + i) * pp + col] -= tau * dot * v[i];
285 }
286 }
287 {
289 let mut dot = 0.0;
290 for i in 0..v.len() {
291 dot += v[i] * q_t_y[j + i];
292 }
293 for i in 0..v.len() {
294 q_t_y[j + i] -= tau * dot * v[i];
295 }
296 }
297 }
298
299 let mut beta = vec![0.0; pp];
301 for i in (0..pp).rev() {
302 let mut s = q_t_y[i];
303 for j in (i + 1)..pp {
304 s -= r[i * pp + j] * beta[j];
305 }
306 beta[i] = s / r[i * pp + i];
307 }
308
309 let mut residuals = vec![0.0; n];
311 let mut ss_res_acc = KahanAccumulatorF64::new();
312 for i in 0..n {
313 let mut y_hat = 0.0;
314 for j in 0..pp {
315 y_hat += x_aug[i * pp + j] * beta[j];
316 }
317 residuals[i] = y[i] - y_hat;
318 ss_res_acc.add(residuals[i] * residuals[i]);
319 }
320 let ss_res = ss_res_acc.finalize();
321
322 let y_mean = {
324 let mut acc = KahanAccumulatorF64::new();
325 for &yi in y { acc.add(yi); }
326 acc.finalize() / n as f64
327 };
328 let mut ss_tot_acc = KahanAccumulatorF64::new();
329 for &yi in y {
330 let d = yi - y_mean;
331 ss_tot_acc.add(d * d);
332 }
333 let ss_tot = ss_tot_acc.finalize();
334
335 let r_squared = if ss_tot > 0.0 { 1.0 - ss_res / ss_tot } else { 0.0 };
336 let adj_r_squared = 1.0 - (1.0 - r_squared) * ((n - 1) as f64) / ((n - pp) as f64);
337
338 let mse = ss_res / (n - pp) as f64;
340 let mut r_inv = vec![0.0; pp * pp];
342 for i in 0..pp {
343 r_inv[i * pp + i] = 1.0 / r[i * pp + i];
344 for j in (0..i).rev() {
345 let mut s = 0.0;
346 for k in (j + 1)..=i {
347 s += r[j * pp + k] * r_inv[k * pp + i];
348 }
349 r_inv[j * pp + i] = -s / r[j * pp + j];
350 }
351 }
352 let mut std_errors = Vec::with_capacity(pp);
354 let mut t_values = Vec::with_capacity(pp);
355 let mut p_values = Vec::with_capacity(pp);
356 let df = (n - pp) as f64;
357 for i in 0..pp {
358 let mut diag = 0.0;
359 for k in i..pp {
360 diag += r_inv[i * pp + k] * r_inv[i * pp + k];
361 }
362 let se = (diag * mse).sqrt();
363 std_errors.push(se);
364 let t = if se > 0.0 { beta[i] / se } else { 0.0 };
365 t_values.push(t);
366 let pv = 2.0 * (1.0 - t_cdf(t.abs(), df));
367 p_values.push(pv);
368 }
369
370 let ss_reg = ss_tot - ss_res;
372 let df_reg = (pp - 1) as f64;
373 let f_stat = if df_reg > 0.0 && mse > 0.0 {
374 (ss_reg / df_reg) / mse
375 } else {
376 0.0
377 };
378 let f_p = 1.0 - f_cdf(f_stat, df_reg, df);
379
380 Ok(LmResult {
381 coefficients: beta,
382 std_errors,
383 t_values,
384 p_values,
385 r_squared,
386 adj_r_squared,
387 residuals,
388 f_statistic: f_stat,
389 f_p_value: f_p,
390 })
391}
392
393pub fn wls(
401 x_flat: &[f64],
402 y: &[f64],
403 weights: &[f64],
404 n: usize,
405 p: usize,
406) -> Result<LmResult, String> {
407 if x_flat.len() != n * p {
408 return Err(format!("wls: x_matrix size {} != n*p = {}", x_flat.len(), n * p));
409 }
410 if y.len() != n || weights.len() != n {
411 return Err("wls: y and weights must have length n".into());
412 }
413 for (i, &w) in weights.iter().enumerate() {
414 if w <= 0.0 {
415 return Err(format!("wls: weight[{i}] = {w} must be positive"));
416 }
417 }
418 let mut x_w = vec![0.0; n * p];
420 let mut y_w = vec![0.0; n];
421 for i in 0..n {
422 let sw = weights[i].sqrt();
423 y_w[i] = y[i] * sw;
424 for j in 0..p {
425 x_w[i * p + j] = x_flat[i * p + j] * sw;
426 }
427 }
428 lm(&x_w, &y_w, n, p)
429}
430
431#[derive(Debug, Clone)]
437pub struct TukeyHsdPair {
438 pub group_i: usize,
439 pub group_j: usize,
440 pub mean_diff: f64,
441 pub se: f64,
442 pub q_statistic: f64,
443 pub p_value: f64,
444}
445
446pub fn tukey_hsd(groups: &[&[f64]]) -> Result<Vec<TukeyHsdPair>, String> {
448 if groups.len() < 2 {
449 return Err("tukey_hsd: need at least 2 groups".into());
450 }
451 let k = groups.len();
452 let mut n_total = 0usize;
454 let mut means = Vec::with_capacity(k);
455 let mut ssw = KahanAccumulatorF64::new();
456 for g in groups {
457 if g.is_empty() { return Err("tukey_hsd: empty group".into()); }
458 n_total += g.len();
459 let mut acc = KahanAccumulatorF64::new();
460 for &x in *g { acc.add(x); }
461 let m = acc.finalize() / g.len() as f64;
462 means.push(m);
463 for &x in *g {
464 let d = x - m;
465 ssw.add(d * d);
466 }
467 }
468 let df_w = (n_total - k) as f64;
469 if df_w <= 0.0 { return Err("tukey_hsd: not enough degrees of freedom".into()); }
470 let msw = ssw.finalize() / df_w;
471
472 let mut results = Vec::new();
473 for i in 0..k {
474 for j in (i + 1)..k {
475 let ni = groups[i].len() as f64;
476 let nj = groups[j].len() as f64;
477 let se = (msw * 0.5 * (1.0 / ni + 1.0 / nj)).sqrt();
478 let mean_diff = means[i] - means[j];
479 let q = mean_diff.abs() / se;
480 let raw_p = (k as f64) * ((k - 1) as f64) * (1.0 - normal_cdf(q / 2.0_f64.sqrt()));
483 let p = raw_p.min(1.0).max(0.0);
484 results.push(TukeyHsdPair { group_i: i, group_j: j, mean_diff, se, q_statistic: q, p_value: p });
485 }
486 }
487 Ok(results)
488}
489
490#[derive(Debug, Clone)]
492pub struct MannWhitneyResult {
493 pub u_statistic: f64,
494 pub z_score: f64,
495 pub p_value: f64,
496}
497
498pub fn mann_whitney(x: &[f64], y: &[f64]) -> Result<MannWhitneyResult, String> {
500 if x.is_empty() || y.is_empty() {
501 return Err("mann_whitney: both groups must be non-empty".into());
502 }
503 let n1 = x.len();
504 let n2 = y.len();
505 let n = n1 + n2;
506
507 let mut combined: Vec<(f64, usize, usize)> = Vec::with_capacity(n);
509 for (i, &v) in x.iter().enumerate() {
510 combined.push((v, 0, i)); }
512 for (i, &v) in y.iter().enumerate() {
513 combined.push((v, 1, i)); }
515 combined.sort_by(|a, b| a.0.total_cmp(&b.0).then(a.1.cmp(&b.1)).then(a.2.cmp(&b.2)));
516
517 let mut ranks = vec![0.0; n];
519 let mut i = 0;
520 while i < n {
521 let mut j = i;
522 while j < n && combined[j].0.to_bits() == combined[i].0.to_bits() {
523 j += 1;
524 }
525 let avg_rank = (i + 1 + j) as f64 / 2.0;
526 for k in i..j {
527 ranks[k] = avg_rank;
528 }
529 i = j;
530 }
531
532 let mut r1 = KahanAccumulatorF64::new();
534 for (idx, &(_, group, _)) in combined.iter().enumerate() {
535 if group == 0 { r1.add(ranks[idx]); }
536 }
537 let r1_val = r1.finalize();
538 let u1 = r1_val - (n1 as f64 * (n1 as f64 + 1.0)) / 2.0;
539 let u2 = (n1 as f64) * (n2 as f64) - u1;
540 let u = u1.min(u2);
541
542 let mu = (n1 as f64 * n2 as f64) / 2.0;
543 let sigma = ((n1 as f64 * n2 as f64 * (n as f64 + 1.0)) / 12.0).sqrt();
544 let z = if sigma > 0.0 { (u - mu) / sigma } else { 0.0 };
545 let p = 2.0 * (1.0 - normal_cdf(z.abs()));
546
547 Ok(MannWhitneyResult { u_statistic: u, z_score: z, p_value: p })
548}
549
550#[derive(Debug, Clone)]
552pub struct KruskalWallisResult {
553 pub h_statistic: f64,
554 pub p_value: f64,
555 pub df: f64,
556}
557
558pub fn kruskal_wallis(groups: &[&[f64]]) -> Result<KruskalWallisResult, String> {
560 if groups.len() < 2 {
561 return Err("kruskal_wallis: need at least 2 groups".into());
562 }
563 let k = groups.len();
564 let mut all: Vec<(f64, usize, usize)> = Vec::new();
565 let mut group_sizes = Vec::with_capacity(k);
566 for (gi, g) in groups.iter().enumerate() {
567 if g.is_empty() { return Err("kruskal_wallis: empty group".into()); }
568 group_sizes.push(g.len());
569 for (i, &v) in g.iter().enumerate() {
570 all.push((v, gi, i));
571 }
572 }
573 let n = all.len();
574 all.sort_by(|a, b| a.0.total_cmp(&b.0).then(a.1.cmp(&b.1)).then(a.2.cmp(&b.2)));
575
576 let mut ranks = vec![0.0; n];
578 let mut i = 0;
579 while i < n {
580 let mut j = i;
581 while j < n && all[j].0.to_bits() == all[i].0.to_bits() { j += 1; }
582 let avg = (i + 1 + j) as f64 / 2.0;
583 for idx in i..j { ranks[idx] = avg; }
584 i = j;
585 }
586
587 let mut rank_sums = vec![KahanAccumulatorF64::new(); k];
589 for (idx, &(_, gi, _)) in all.iter().enumerate() {
590 rank_sums[gi].add(ranks[idx]);
591 }
592
593 let nf = n as f64;
594 let mut h_acc = KahanAccumulatorF64::new();
595 for (gi, acc) in rank_sums.iter().enumerate() {
596 let ri = acc.clone().finalize();
597 let ni = group_sizes[gi] as f64;
598 h_acc.add(ri * ri / ni);
599 }
600 let h = (12.0 / (nf * (nf + 1.0))) * h_acc.finalize() - 3.0 * (nf + 1.0);
601 let df = (k - 1) as f64;
602 let p = 1.0 - chi2_cdf(h, df);
603
604 Ok(KruskalWallisResult { h_statistic: h, p_value: p, df })
605}
606
607#[derive(Debug, Clone)]
609pub struct WilcoxonResult {
610 pub w_statistic: f64,
611 pub z_score: f64,
612 pub p_value: f64,
613}
614
615pub fn wilcoxon_signed_rank(x: &[f64], y: &[f64]) -> Result<WilcoxonResult, String> {
617 if x.len() != y.len() {
618 return Err("wilcoxon_signed_rank: x and y must have same length".into());
619 }
620 if x.len() < 2 {
621 return Err("wilcoxon_signed_rank: need at least 2 observations".into());
622 }
623
624 let mut diffs: Vec<(f64, usize)> = Vec::new(); let mut signs: Vec<f64> = Vec::new();
627 for (i, (&xi, &yi)) in x.iter().zip(y.iter()).enumerate() {
628 let d = xi - yi;
629 if d.abs() > 1e-15 {
630 diffs.push((d.abs(), i));
631 signs.push(if d > 0.0 { 1.0 } else { -1.0 });
632 }
633 }
634 let nr = diffs.len();
635 if nr == 0 {
636 return Ok(WilcoxonResult { w_statistic: 0.0, z_score: 0.0, p_value: 1.0 });
637 }
638
639 let mut indexed: Vec<(usize, f64)> = diffs.iter().enumerate().map(|(i, &(v, _))| (i, v)).collect();
641 indexed.sort_by(|a, b| a.1.total_cmp(&b.1).then(a.0.cmp(&b.0)));
642
643 let mut ranks = vec![0.0; nr];
645 let mut i = 0;
646 while i < nr {
647 let mut j = i;
648 while j < nr && indexed[j].1.to_bits() == indexed[i].1.to_bits() { j += 1; }
649 let avg = (i + 1 + j) as f64 / 2.0;
650 for k in i..j { ranks[indexed[k].0] = avg; }
651 i = j;
652 }
653
654 let mut w_plus = KahanAccumulatorF64::new();
655 let mut w_minus = KahanAccumulatorF64::new();
656 for (i, &s) in signs.iter().enumerate() {
657 if s > 0.0 { w_plus.add(ranks[i]); } else { w_minus.add(ranks[i]); }
658 }
659 let wp = w_plus.finalize();
660 let wm = w_minus.finalize();
661 let w = wp.min(wm);
662
663 let nf = nr as f64;
664 let mu = nf * (nf + 1.0) / 4.0;
665 let sigma = (nf * (nf + 1.0) * (2.0 * nf + 1.0) / 24.0).sqrt();
666 let z = if sigma > 0.0 { (w - mu) / sigma } else { 0.0 };
667 let p = 2.0 * (1.0 - normal_cdf(z.abs()));
668
669 Ok(WilcoxonResult { w_statistic: w, z_score: z, p_value: p })
670}
671
672pub fn bonferroni(p_values: &[f64]) -> Vec<f64> {
674 let m = p_values.len() as f64;
675 p_values.iter().map(|&p| (p * m).min(1.0)).collect()
676}
677
678pub fn fdr_bh(p_values: &[f64]) -> Vec<f64> {
680 let m = p_values.len();
681 if m == 0 { return vec![]; }
682
683 let mut indexed: Vec<(usize, f64)> = p_values.iter().copied().enumerate().collect();
685 indexed.sort_by(|a, b| a.1.total_cmp(&b.1).then(a.0.cmp(&b.0)));
686
687 let mut adjusted_sorted = vec![0.0; m];
689 for (rank_0, &(_, p)) in indexed.iter().enumerate() {
690 let rank = rank_0 + 1;
691 adjusted_sorted[rank_0] = (p * m as f64 / rank as f64).min(1.0);
692 }
693
694 for i in (0..m - 1).rev() {
696 if adjusted_sorted[i] > adjusted_sorted[i + 1] {
697 adjusted_sorted[i] = adjusted_sorted[i + 1];
698 }
699 }
700
701 let mut result = vec![0.0; m];
703 for (rank_0, &(orig_idx, _)) in indexed.iter().enumerate() {
704 result[orig_idx] = adjusted_sorted[rank_0];
705 }
706 result
707}
708
709#[derive(Debug, Clone)]
711pub struct LogisticResult {
712 pub coefficients: Vec<f64>,
713 pub std_errors: Vec<f64>,
714 pub z_values: Vec<f64>,
715 pub p_values: Vec<f64>,
716 pub log_likelihood: f64,
717 pub aic: f64,
718 pub iterations: usize,
719}
720
721pub fn logistic_regression(
725 x_flat: &[f64],
726 y: &[f64],
727 n: usize,
728 p: usize,
729) -> Result<LogisticResult, String> {
730 if x_flat.len() != n * p {
731 return Err(format!("logistic_regression: x size {} != n*p = {}", x_flat.len(), n * p));
732 }
733 if y.len() != n {
734 return Err("logistic_regression: y must have length n".into());
735 }
736 let pp = p + 1; let mut x = vec![0.0; n * pp];
740 for i in 0..n {
741 x[i * pp] = 1.0; for j in 0..p {
743 x[i * pp + j + 1] = x_flat[i * p + j];
744 }
745 }
746
747 let mut beta = vec![0.0; pp];
748 let max_iter = 100;
749 let tol = 1e-8;
750 let mut iterations = 0;
751
752 for iter in 0..max_iter {
753 iterations = iter + 1;
754
755 let mut mu = vec![0.0; n];
757 for i in 0..n {
758 let mut eta = 0.0;
759 for j in 0..pp {
760 eta += x[i * pp + j] * beta[j];
761 }
762 mu[i] = 1.0 / (1.0 + (-eta).exp());
763 mu[i] = mu[i].max(1e-10).min(1.0 - 1e-10);
765 }
766
767 let w: Vec<f64> = mu.iter().map(|&m| m * (1.0 - m)).collect();
769
770 let mut xtwx = vec![0.0; pp * pp];
772 for i in 0..n {
773 for j in 0..pp {
774 for k in 0..pp {
775 xtwx[j * pp + k] += x[i * pp + j] * w[i] * x[i * pp + k];
776 }
777 }
778 }
779
780 let mut grad = vec![0.0; pp];
782 for i in 0..n {
783 let r = y[i] - mu[i];
784 for j in 0..pp {
785 grad[j] += x[i * pp + j] * r;
786 }
787 }
788
789 let delta = solve_symmetric(&xtwx, &grad, pp)?;
792
793 let mut max_change = 0.0_f64;
795 for j in 0..pp {
796 beta[j] += delta[j];
797 max_change = max_change.max(delta[j].abs());
798 }
799
800 if max_change < tol {
801 break;
802 }
803 }
804
805 let mut mu = vec![0.0; n];
807 for i in 0..n {
808 let mut eta = 0.0;
809 for j in 0..pp { eta += x[i * pp + j] * beta[j]; }
810 mu[i] = 1.0 / (1.0 + (-eta).exp());
811 mu[i] = mu[i].max(1e-10).min(1.0 - 1e-10);
812 }
813
814 let mut ll = KahanAccumulatorF64::new();
816 for i in 0..n {
817 ll.add(y[i] * mu[i].ln() + (1.0 - y[i]) * (1.0 - mu[i]).ln());
818 }
819 let log_likelihood = ll.finalize();
820 let aic = -2.0 * log_likelihood + 2.0 * pp as f64;
821
822 let w: Vec<f64> = mu.iter().map(|&m| m * (1.0 - m)).collect();
824 let mut xtwx = vec![0.0; pp * pp];
825 for i in 0..n {
826 for j in 0..pp {
827 for k in 0..pp {
828 xtwx[j * pp + k] += x[i * pp + j] * w[i] * x[i * pp + k];
829 }
830 }
831 }
832 let inv = invert_symmetric(&xtwx, pp)?;
833 let mut std_errors = Vec::with_capacity(pp);
834 let mut z_values = Vec::with_capacity(pp);
835 let mut p_values = Vec::with_capacity(pp);
836 for j in 0..pp {
837 let se = inv[j * pp + j].max(0.0).sqrt();
838 std_errors.push(se);
839 let z = if se > 0.0 { beta[j] / se } else { 0.0 };
840 z_values.push(z);
841 let pv = 2.0 * (1.0 - normal_cdf(z.abs()));
842 p_values.push(pv);
843 }
844
845 Ok(LogisticResult {
846 coefficients: beta,
847 std_errors,
848 z_values,
849 p_values,
850 log_likelihood,
851 aic,
852 iterations,
853 })
854}
855
856fn solve_symmetric(a: &[f64], b: &[f64], n: usize) -> Result<Vec<f64>, String> {
858 let mut l = vec![0.0; n * n];
860 for i in 0..n {
861 for j in 0..=i {
862 let mut sum = 0.0;
863 for k in 0..j {
864 sum += l[i * n + k] * l[j * n + k];
865 }
866 if i == j {
867 let diag = a[i * n + i] - sum;
868 if diag <= 0.0 {
869 let mut a_reg = a.to_vec();
871 for ii in 0..n { a_reg[ii * n + ii] += 1e-6; }
872 return solve_symmetric(&a_reg, b, n);
873 }
874 l[i * n + j] = diag.sqrt();
875 } else {
876 l[i * n + j] = (a[i * n + j] - sum) / l[j * n + j];
877 }
878 }
879 }
880 let mut y = vec![0.0; n];
882 for i in 0..n {
883 let mut sum = 0.0;
884 for j in 0..i { sum += l[i * n + j] * y[j]; }
885 y[i] = (b[i] - sum) / l[i * n + i];
886 }
887 let mut x = vec![0.0; n];
889 for i in (0..n).rev() {
890 let mut sum = 0.0;
891 for j in (i + 1)..n { sum += l[j * n + i] * x[j]; }
892 x[i] = (y[i] - sum) / l[i * n + i];
893 }
894 Ok(x)
895}
896
897fn invert_symmetric(a: &[f64], n: usize) -> Result<Vec<f64>, String> {
899 let mut inv = vec![0.0; n * n];
900 for col in 0..n {
901 let mut e = vec![0.0; n];
902 e[col] = 1.0;
903 let x = solve_symmetric(a, &e, n)?;
904 for row in 0..n {
905 inv[row * n + col] = x[row];
906 }
907 }
908 Ok(inv)
909}
910
911pub struct NormalityResult {
917 pub statistic: f64,
918 pub p_value: f64,
919}
920
921pub fn jarque_bera(data: &[f64]) -> Result<NormalityResult, String> {
925 let n = data.len();
926 if n < 3 { return Err("jarque_bera: need at least 3 observations".into()); }
927 let nf = n as f64;
928
929 let mut acc = KahanAccumulatorF64::new();
931 for &x in data { acc.add(x); }
932 let mean = acc.finalize() / nf;
933
934 let mut m2_acc = KahanAccumulatorF64::new();
936 let mut m3_acc = KahanAccumulatorF64::new();
937 let mut m4_acc = KahanAccumulatorF64::new();
938 for &x in data {
939 let d = x - mean;
940 let d2 = d * d;
941 m2_acc.add(d2);
942 m3_acc.add(d2 * d);
943 m4_acc.add(d2 * d2);
944 }
945 let m2 = m2_acc.finalize() / nf;
946 let m3 = m3_acc.finalize() / nf;
947 let m4 = m4_acc.finalize() / nf;
948
949 if m2 == 0.0 { return Err("jarque_bera: zero variance".into()); }
950
951 let skewness = m3 / m2.powf(1.5);
952 let kurtosis = m4 / (m2 * m2);
953
954 let jb = (nf / 6.0) * (skewness * skewness + (kurtosis - 3.0).powi(2) / 4.0);
955
956 let p_value = (-jb / 2.0).exp();
959
960 Ok(NormalityResult { statistic: jb, p_value })
961}
962
963pub fn anderson_darling(data: &[f64]) -> Result<NormalityResult, String> {
966 let n = data.len();
967 if n < 8 { return Err("anderson_darling: need at least 8 observations".into()); }
968 let nf = n as f64;
969
970 let mut acc = KahanAccumulatorF64::new();
972 for &x in data { acc.add(x); }
973 let mean = acc.finalize() / nf;
974 let mut var_acc = KahanAccumulatorF64::new();
975 for &x in data { let d = x - mean; var_acc.add(d * d); }
976 let std = (var_acc.finalize() / (nf - 1.0)).sqrt();
977 if std == 0.0 { return Err("anderson_darling: zero standard deviation".into()); }
978
979 let mut sorted = data.to_vec();
981 sorted.sort_by(|a, b| a.total_cmp(b));
982 let z: Vec<f64> = sorted.iter().map(|&x| (x - mean) / std).collect();
983
984 let mut a2_acc = KahanAccumulatorF64::new();
986 for i in 0..n {
987 let phi_zi = normal_cdf(z[i]);
988 let phi_zn = normal_cdf(z[n - 1 - i]);
989 let p1 = phi_zi.max(1e-15).min(1.0 - 1e-15);
991 let p2 = phi_zn.max(1e-15).min(1.0 - 1e-15);
992 let term = (2.0 * (i as f64) + 1.0) * (p1.ln() + (1.0 - p2).ln());
993 a2_acc.add(term);
994 }
995 let a2 = -nf - a2_acc.finalize() / nf;
996
997 let a2_star = a2 * (1.0 + 0.75 / nf + 2.25 / (nf * nf));
999
1000 let p_value = if a2_star >= 1.0359 { 0.0 } else if a2_star >= 0.8737 { 0.01 }
1003 else if a2_star >= 0.6305 { 0.025 }
1004 else if a2_star >= 0.5091 { 0.05 }
1005 else if a2_star >= 0.3565 { 0.10 }
1006 else if a2_star >= 0.2006 { 0.25 }
1007 else { 0.50 }; Ok(NormalityResult { statistic: a2_star, p_value })
1010}
1011
1012pub fn ks_test_normal(data: &[f64]) -> Result<NormalityResult, String> {
1017 let n = data.len();
1018 if n < 5 { return Err("ks_test: need at least 5 observations".into()); }
1019 let nf = n as f64;
1020
1021 let mut acc = KahanAccumulatorF64::new();
1023 for &x in data { acc.add(x); }
1024 let mean = acc.finalize() / nf;
1025 let mut var_acc = KahanAccumulatorF64::new();
1026 for &x in data { let d = x - mean; var_acc.add(d * d); }
1027 let std = (var_acc.finalize() / (nf - 1.0)).sqrt();
1028 if std == 0.0 { return Err("ks_test: zero standard deviation".into()); }
1029
1030 let mut sorted = data.to_vec();
1031 sorted.sort_by(|a, b| a.total_cmp(b));
1032
1033 let mut d_max = 0.0f64;
1034 for i in 0..n {
1035 let z = (sorted[i] - mean) / std;
1036 let f_z = normal_cdf(z);
1037 let d_plus = ((i + 1) as f64 / nf - f_z).abs();
1038 let d_minus = (f_z - i as f64 / nf).abs();
1039 d_max = d_max.max(d_plus).max(d_minus);
1040 }
1041
1042 let nd2 = nf * d_max * d_max;
1045 let mut p_value = 0.0;
1046 for k in 1..=100 {
1047 let kf = k as f64;
1048 let sign = if k % 2 == 1 { 1.0 } else { -1.0 };
1049 let term = sign * (-2.0 * kf * kf * nd2).exp();
1050 p_value += term;
1051 if term.abs() < 1e-15 { break; }
1052 }
1053 let p_value = (2.0 * p_value).max(0.0).min(1.0);
1054
1055 Ok(NormalityResult { statistic: d_max, p_value })
1056}
1057
1058pub fn cohens_d(x: &[f64], y: &[f64]) -> Result<f64, String> {
1064 if x.len() < 2 || y.len() < 2 {
1065 return Err("cohens_d: need at least 2 observations per group".into());
1066 }
1067
1068 let mean_x = kahan_mean(x);
1069 let mean_y = kahan_mean(y);
1070 let var_x = kahan_var(x, mean_x);
1071 let var_y = kahan_var(y, mean_y);
1072
1073 let nx = x.len() as f64;
1074 let ny = y.len() as f64;
1075
1076 let sp = (((nx - 1.0) * var_x + (ny - 1.0) * var_y) / (nx + ny - 2.0)).sqrt();
1078 if sp == 0.0 { return Ok(0.0); }
1079
1080 Ok((mean_x - mean_y) / sp)
1081}
1082
1083pub fn eta_squared(groups: &[&[f64]]) -> Result<f64, String> {
1086 if groups.len() < 2 { return Err("eta_squared: need at least 2 groups".into()); }
1087
1088 let mut grand_acc = KahanAccumulatorF64::new();
1089 let mut total_n = 0usize;
1090 for g in groups {
1091 for &x in *g { grand_acc.add(x); total_n += 1; }
1092 }
1093 if total_n == 0 { return Err("eta_squared: no observations".into()); }
1094 let grand_mean = grand_acc.finalize() / total_n as f64;
1095
1096 let mut ss_between = KahanAccumulatorF64::new();
1098 let mut ss_total = KahanAccumulatorF64::new();
1099 for g in groups {
1100 let gm = kahan_mean(g);
1101 let ni = g.len() as f64;
1102 ss_between.add(ni * (gm - grand_mean).powi(2));
1103 for &x in *g {
1104 ss_total.add((x - grand_mean).powi(2));
1105 }
1106 }
1107
1108 let ss_t = ss_total.finalize();
1109 if ss_t == 0.0 { return Ok(0.0); }
1110
1111 Ok(ss_between.finalize() / ss_t)
1112}
1113
1114pub fn cramers_v(table: &[f64], nrows: usize, ncols: usize) -> Result<f64, String> {
1117 if table.len() != nrows * ncols {
1118 return Err(format!("cramers_v: table size {} != {}x{}", table.len(), nrows, ncols));
1119 }
1120 if nrows < 2 || ncols < 2 {
1121 return Err("cramers_v: need at least 2x2 table".into());
1122 }
1123
1124 let mut row_sums = vec![0.0; nrows];
1126 let mut col_sums = vec![0.0; ncols];
1127 let mut total = 0.0;
1128 for r in 0..nrows {
1129 for c in 0..ncols {
1130 let v = table[r * ncols + c];
1131 row_sums[r] += v;
1132 col_sums[c] += v;
1133 total += v;
1134 }
1135 }
1136 if total == 0.0 { return Err("cramers_v: empty table".into()); }
1137
1138 let mut chi2 = KahanAccumulatorF64::new();
1140 for r in 0..nrows {
1141 for c in 0..ncols {
1142 let expected = row_sums[r] * col_sums[c] / total;
1143 if expected > 0.0 {
1144 let diff = table[r * ncols + c] - expected;
1145 chi2.add(diff * diff / expected);
1146 }
1147 }
1148 }
1149
1150 let k = (nrows.min(ncols) - 1) as f64;
1151 if k == 0.0 { return Ok(0.0); }
1152
1153 Ok((chi2.finalize() / (total * k)).sqrt())
1154}
1155
1156pub fn levene_test(groups: &[&[f64]]) -> Result<(f64, f64), String> {
1158 if groups.len() < 2 { return Err("levene_test: need at least 2 groups".into()); }
1159 let k = groups.len();
1160 let mut total_n = 0usize;
1161
1162 let mut z_groups: Vec<Vec<f64>> = Vec::with_capacity(k);
1164 for g in groups {
1165 if g.is_empty() { return Err("levene_test: empty group".into()); }
1166 let mut sorted = g.to_vec();
1167 sorted.sort_by(|a, b| a.total_cmp(b));
1168 let med = if sorted.len() % 2 == 0 {
1169 (sorted[sorted.len()/2 - 1] + sorted[sorted.len()/2]) / 2.0
1170 } else {
1171 sorted[sorted.len()/2]
1172 };
1173 let z: Vec<f64> = g.iter().map(|&x| (x - med).abs()).collect();
1174 total_n += z.len();
1175 z_groups.push(z);
1176 }
1177
1178 let z_refs: Vec<&[f64]> = z_groups.iter().map(|v| v.as_slice()).collect();
1180 let anova = anova_oneway(&z_refs)?;
1181 Ok((anova.f_statistic, anova.p_value))
1182}
1183
1184pub fn bartlett_test(groups: &[&[f64]]) -> Result<(f64, f64), String> {
1186 if groups.len() < 2 { return Err("bartlett_test: need at least 2 groups".into()); }
1187 let k = groups.len();
1188
1189 let mut vars = Vec::with_capacity(k);
1190 let mut ns = Vec::with_capacity(k);
1191 let mut total_n = 0usize;
1192
1193 for g in groups {
1194 let n = g.len();
1195 if n < 2 { return Err("bartlett_test: each group needs at least 2 observations".into()); }
1196 let m = kahan_mean(g);
1197 let v = kahan_var(g, m);
1198 vars.push(v);
1199 ns.push(n);
1200 total_n += n;
1201 }
1202
1203 let nk = total_n - k; let nkf = nk as f64;
1205
1206 let mut sp2_acc = KahanAccumulatorF64::new();
1208 for i in 0..k {
1209 sp2_acc.add((ns[i] as f64 - 1.0) * vars[i]);
1210 }
1211 let sp2 = sp2_acc.finalize() / nkf;
1212 if sp2 == 0.0 { return Err("bartlett_test: zero pooled variance".into()); }
1213
1214 let mut num_acc = KahanAccumulatorF64::new();
1216 let mut denom_acc = KahanAccumulatorF64::new();
1217 for i in 0..k {
1218 let ni_m1 = ns[i] as f64 - 1.0;
1219 num_acc.add(ni_m1 * (vars[i] / sp2).max(1e-300).ln());
1220 denom_acc.add(1.0 / ni_m1);
1221 }
1222 let t = nkf * sp2.ln() - num_acc.finalize();
1223 let c = 1.0 + (1.0 / (3.0 * (k as f64 - 1.0))) * (denom_acc.finalize() - 1.0 / nkf);
1224 let bartlett = t / c;
1225
1226 let df = (k - 1) as f64;
1228 let p_value = chi2_survival(bartlett, df);
1229
1230 Ok((bartlett, p_value))
1231}
1232
1233fn chi2_survival(x: f64, df: f64) -> f64 {
1236 if x <= 0.0 { return 1.0; }
1237 let a = df / 2.0;
1239 let z = x / 2.0;
1240 1.0 - regularized_gamma_p(a, z)
1243}
1244
1245fn regularized_gamma_p(a: f64, x: f64) -> f64 {
1246 if x < a + 1.0 {
1247 gamma_series(a, x)
1249 } else {
1250 1.0 - gamma_cf(a, x)
1252 }
1253}
1254
1255fn gamma_series(a: f64, x: f64) -> f64 {
1256 let ln_gamma_a = ln_gamma(a);
1257 let mut sum = 1.0 / a;
1258 let mut term = 1.0 / a;
1259 for n in 1..200 {
1260 term *= x / (a + n as f64);
1261 sum += term;
1262 if term.abs() < sum.abs() * 1e-15 { break; }
1263 }
1264 sum * (-x + a * x.ln() - ln_gamma_a).exp()
1265}
1266
1267fn gamma_cf(a: f64, x: f64) -> f64 {
1268 let ln_gamma_a = ln_gamma(a);
1269 let mut f = 1e-30;
1270 let mut c = 1e-30;
1271 let mut d = 1.0 / (x + 1.0 - a);
1272 f = d;
1273 for n in 1..200 {
1274 let an = -(n as f64) * (n as f64 - a);
1275 let bn = x + 2.0 * n as f64 + 1.0 - a;
1276 d = bn + an * d;
1277 if d.abs() < 1e-30 { d = 1e-30; }
1278 c = bn + an / c;
1279 if c.abs() < 1e-30 { c = 1e-30; }
1280 d = 1.0 / d;
1281 let delta = d * c;
1282 f *= delta;
1283 if (delta - 1.0).abs() < 1e-15 { break; }
1284 }
1285 f * (-x + a * x.ln() - ln_gamma_a).exp()
1286}
1287
1288fn ln_gamma(x: f64) -> f64 {
1290 let coeffs = [
1292 0.99999999999980993,
1293 676.5203681218851,
1294 -1259.1392167224028,
1295 771.32342877765313,
1296 -176.61502916214059,
1297 12.507343278686905,
1298 -0.13857109526572012,
1299 9.9843695780195716e-6,
1300 1.5056327351493116e-7,
1301 ];
1302 if x < 0.5 {
1303 let s = std::f64::consts::PI / (std::f64::consts::PI * x).sin();
1304 return s.abs().ln() - ln_gamma(1.0 - x);
1305 }
1306 let x = x - 1.0;
1307 let mut ag = coeffs[0];
1308 for i in 1..9 {
1309 ag += coeffs[i] / (x + i as f64);
1310 }
1311 let t = x + 7.5;
1312 0.5 * (2.0 * std::f64::consts::PI).ln() + (x + 0.5) * t.ln() - t + ag.ln()
1313}
1314
1315fn kahan_mean(data: &[f64]) -> f64 {
1317 let mut acc = KahanAccumulatorF64::new();
1318 for &x in data { acc.add(x); }
1319 acc.finalize() / data.len() as f64
1320}
1321
1322fn kahan_var(data: &[f64], mean: f64) -> f64 {
1324 let mut acc = KahanAccumulatorF64::new();
1325 for &x in data { let d = x - mean; acc.add(d * d); }
1326 acc.finalize() / (data.len() as f64 - 1.0)
1327}
1328
1329#[cfg(test)]
1334mod tests {
1335 use super::*;
1336
1337 #[test]
1338 fn test_t_test_known_mean() {
1339 let data = [5.1, 4.9, 5.0, 5.2, 4.8, 5.0, 5.1, 4.9];
1340 let r = t_test(&data, 5.0).unwrap();
1341 assert!(r.p_value > 0.05, "p = {}", r.p_value);
1343 }
1344
1345 #[test]
1346 fn test_t_test_shifted() {
1347 let data = [10.1, 10.2, 10.0, 9.9, 10.3, 10.1, 10.0, 10.2];
1349 let r = t_test(&data, 0.0).unwrap();
1350 assert!(r.p_value < 0.001, "p = {}", r.p_value);
1351 }
1352
1353 #[test]
1354 fn test_t_test_two_sample() {
1355 let x = [10.0, 11.0, 12.0, 13.0, 14.0];
1356 let y = [20.0, 21.0, 22.0, 23.0, 24.0];
1357 let r = t_test_two_sample(&x, &y).unwrap();
1358 assert!(r.p_value < 0.001);
1359 }
1360
1361 #[test]
1362 fn test_chi_squared_uniform() {
1363 let observed = [20.0, 20.0, 20.0, 20.0, 20.0];
1364 let expected = [20.0, 20.0, 20.0, 20.0, 20.0];
1365 let r = chi_squared_test(&observed, &expected).unwrap();
1366 assert_eq!(r.chi2, 0.0);
1367 assert!((r.p_value - 1.0).abs() < 1e-6);
1368 }
1369
1370 #[test]
1371 fn test_anova_equal_groups() {
1372 let g1 = [5.0, 5.1, 4.9, 5.0, 5.2];
1373 let g2 = [5.0, 4.8, 5.1, 5.0, 4.9];
1374 let g3 = [5.1, 5.0, 4.9, 5.0, 5.1];
1375 let r = anova_oneway(&[&g1, &g2, &g3]).unwrap();
1376 assert!(r.p_value > 0.05, "p = {}", r.p_value);
1378 }
1379
1380 #[test]
1381 fn test_anova_different_groups() {
1382 let g1 = [1.0, 2.0, 3.0, 2.0, 1.0];
1383 let g2 = [10.0, 11.0, 12.0, 11.0, 10.0];
1384 let g3 = [20.0, 21.0, 22.0, 21.0, 20.0];
1385 let r = anova_oneway(&[&g1, &g2, &g3]).unwrap();
1386 assert!(r.p_value < 0.001);
1387 }
1388
1389 #[test]
1390 fn test_lm_simple() {
1391 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1393 let y = [3.0, 5.0, 7.0, 9.0, 11.0];
1394 let r = lm(&x, &y, 5, 1).unwrap();
1395 assert!((r.coefficients[0] - 1.0).abs() < 1e-10, "intercept = {}", r.coefficients[0]);
1397 assert!((r.coefficients[1] - 2.0).abs() < 1e-10, "slope = {}", r.coefficients[1]);
1398 assert!((r.r_squared - 1.0).abs() < 1e-10);
1399 }
1400
1401 #[test]
1402 fn test_determinism() {
1403 let data = [1.0, 2.0, 3.0, 4.0, 5.0];
1404 let r1 = t_test(&data, 3.0).unwrap();
1405 let r2 = t_test(&data, 3.0).unwrap();
1406 assert_eq!(r1.t_statistic.to_bits(), r2.t_statistic.to_bits());
1407 assert_eq!(r1.p_value.to_bits(), r2.p_value.to_bits());
1408 }
1409
1410 #[test]
1411 fn test_wls_uniform_weights() {
1412 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1414 let y: Vec<f64> = x.iter().map(|&xi| 1.0 + 2.0 * xi).collect();
1415 let w = vec![1.0; 5];
1416 let r = wls(&x, &y, &w, 5, 1).unwrap();
1417 assert!((r.coefficients[0] - 1.0).abs() < 1e-8, "intercept = {}", r.coefficients[0]);
1418 assert!((r.coefficients[1] - 2.0).abs() < 1e-8, "slope = {}", r.coefficients[1]);
1419 }
1420
1421 #[test]
1424 fn test_mann_whitney_identical() {
1425 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1426 let y = [1.0, 2.0, 3.0, 4.0, 5.0];
1427 let r = mann_whitney(&x, &y).unwrap();
1428 assert!(r.p_value > 0.05, "p = {}", r.p_value);
1429 }
1430
1431 #[test]
1432 fn test_mann_whitney_separated() {
1433 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1434 let y = [10.0, 11.0, 12.0, 13.0, 14.0];
1435 let r = mann_whitney(&x, &y).unwrap();
1436 assert!(r.p_value < 0.05, "p = {}", r.p_value);
1437 }
1438
1439 #[test]
1440 fn test_kruskal_wallis_identical() {
1441 let g1 = [1.0, 2.0, 3.0, 4.0, 5.0];
1442 let g2 = [1.0, 2.0, 3.0, 4.0, 5.0];
1443 let r = kruskal_wallis(&[&g1, &g2]).unwrap();
1444 assert!(r.p_value > 0.05, "p = {}", r.p_value);
1445 }
1446
1447 #[test]
1448 fn test_kruskal_wallis_different() {
1449 let g1 = [1.0, 2.0, 3.0];
1450 let g2 = [10.0, 11.0, 12.0];
1451 let g3 = [20.0, 21.0, 22.0];
1452 let r = kruskal_wallis(&[&g1, &g2, &g3]).unwrap();
1453 assert!(r.p_value < 0.05, "p = {}", r.p_value);
1454 }
1455
1456 #[test]
1457 fn test_wilcoxon_no_difference() {
1458 let x = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
1459 let y = [1.1, 1.9, 3.1, 3.9, 5.1, 5.9, 7.1, 7.9]; let r = wilcoxon_signed_rank(&x, &y).unwrap();
1461 assert!(r.p_value > 0.05, "p = {}", r.p_value);
1462 }
1463
1464 #[test]
1465 fn test_wilcoxon_clear_shift() {
1466 let x = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
1467 let y: Vec<f64> = x.iter().map(|&v| v + 5.0).collect();
1468 let r = wilcoxon_signed_rank(&x, &y).unwrap();
1469 assert!(r.p_value < 0.05, "p = {}", r.p_value);
1470 }
1471
1472 #[test]
1473 fn test_bonferroni_basic() {
1474 let pvals = [0.01, 0.04, 0.5];
1475 let adj = bonferroni(&pvals);
1476 assert!((adj[0] - 0.03).abs() < 1e-12);
1477 assert!((adj[1] - 0.12).abs() < 1e-12);
1478 assert!((adj[2] - 1.0).abs() < 1e-12);
1479 }
1480
1481 #[test]
1482 fn test_fdr_bh_known() {
1483 let pvals = [0.01, 0.04, 0.03, 0.5];
1484 let adj = fdr_bh(&pvals);
1485 assert!(adj[0] < 0.05, "adj[0] = {}", adj[0]);
1492 assert!(adj[2] < 0.07, "adj[2] = {}", adj[2]);
1493 }
1494
1495 #[test]
1496 fn test_fdr_bh_preserves_order() {
1497 let pvals = [0.5, 0.01, 0.3];
1498 let adj = fdr_bh(&pvals);
1499 assert!(adj[1] < adj[0], "adj[0]={}, adj[1]={}", adj[0], adj[1]);
1501 }
1502
1503 #[test]
1504 fn test_tukey_hsd_all_same() {
1505 let g1 = [5.0, 5.0, 5.0, 5.0, 5.0];
1506 let g2 = [5.0, 5.0, 5.0, 5.0, 5.0];
1507 let g3 = [5.0, 5.0, 5.0, 5.0, 5.0];
1508 let results = tukey_hsd(&[&g1, &g2, &g3]).unwrap();
1509 for pair in &results {
1510 assert!(pair.mean_diff.abs() < 1e-12);
1511 }
1512 }
1513
1514 #[test]
1515 fn test_tukey_hsd_one_different() {
1516 let g1 = [1.0, 2.0, 3.0, 4.0, 5.0];
1517 let g2 = [2.0, 3.0, 4.0, 5.0, 6.0];
1518 let g3 = [20.0, 21.0, 22.0, 23.0, 24.0];
1519 let results = tukey_hsd(&[&g1, &g2, &g3]).unwrap();
1520 let g3_pairs: Vec<_> = results.iter().filter(|p| p.group_i == 2 || p.group_j == 2).collect();
1522 for pair in &g3_pairs {
1523 assert!(pair.p_value < 0.05, "p = {} for ({}, {})", pair.p_value, pair.group_i, pair.group_j);
1524 }
1525 }
1526
1527 #[test]
1528 fn test_logistic_convergence() {
1529 let x = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
1531 let y = [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0];
1532 let r = logistic_regression(&x, &y, 10, 1).unwrap();
1533 assert!(r.iterations <= 100, "iterations = {}", r.iterations);
1534 assert!(r.coefficients[1] > 0.0, "beta_1 = {}", r.coefficients[1]);
1536 }
1537
1538 #[test]
1539 fn test_b7_determinism() {
1540 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1541 let y = [6.0, 7.0, 8.0, 9.0, 10.0];
1542 let r1 = mann_whitney(&x, &y).unwrap();
1543 let r2 = mann_whitney(&x, &y).unwrap();
1544 assert_eq!(r1.u_statistic.to_bits(), r2.u_statistic.to_bits());
1545 assert_eq!(r1.p_value.to_bits(), r2.p_value.to_bits());
1546 }
1547}