1use cjc_repro::KahanAccumulatorF64;
9use crate::distributions::{t_cdf, chi2_cdf, f_cdf, normal_cdf};
10use crate::stats;
11
12#[derive(Debug, Clone)]
18pub struct TTestResult {
19 pub t_statistic: f64,
20 pub p_value: f64, pub df: f64, pub mean: f64,
23 pub se: f64,
24}
25
26pub fn t_test(data: &[f64], mu: f64) -> Result<TTestResult, String> {
28 if data.len() < 2 {
29 return Err("t_test: need at least 2 observations".into());
30 }
31 let n = data.len() as f64;
32 let mean = {
33 let mut acc = KahanAccumulatorF64::new();
34 for &x in data { acc.add(x); }
35 acc.finalize() / n
36 };
37 let s = stats::sample_sd(data)?;
38 let se = s / n.sqrt();
39 let t = (mean - mu) / se;
40 let df = n - 1.0;
41 let p = 2.0 * (1.0 - t_cdf(t.abs(), df));
43 Ok(TTestResult { t_statistic: t, p_value: p, df, mean, se })
44}
45
46pub fn t_test_two_sample(x: &[f64], y: &[f64]) -> Result<TTestResult, String> {
48 if x.len() < 2 || y.len() < 2 {
49 return Err("t_test_two_sample: need at least 2 observations in each group".into());
50 }
51 let nx = x.len() as f64;
52 let ny = y.len() as f64;
53 let mean_x = {
54 let mut acc = KahanAccumulatorF64::new();
55 for &v in x { acc.add(v); }
56 acc.finalize() / nx
57 };
58 let mean_y = {
59 let mut acc = KahanAccumulatorF64::new();
60 for &v in y { acc.add(v); }
61 acc.finalize() / ny
62 };
63 let var_x = stats::sample_variance(x)?;
64 let var_y = stats::sample_variance(y)?;
65 let se = (var_x / nx + var_y / ny).sqrt();
66 let t = (mean_x - mean_y) / se;
67 let num = (var_x / nx + var_y / ny).powi(2);
69 let denom = (var_x / nx).powi(2) / (nx - 1.0) + (var_y / ny).powi(2) / (ny - 1.0);
70 let df = num / denom;
71 let p = 2.0 * (1.0 - t_cdf(t.abs(), df));
72 Ok(TTestResult { t_statistic: t, p_value: p, df, mean: mean_x - mean_y, se })
73}
74
75pub fn t_test_paired(x: &[f64], y: &[f64]) -> Result<TTestResult, String> {
77 if x.len() != y.len() {
78 return Err("t_test_paired: arrays must have same length".into());
79 }
80 let diffs: Vec<f64> = x.iter().zip(y.iter()).map(|(a, b)| a - b).collect();
81 t_test(&diffs, 0.0)
82}
83
84#[derive(Debug, Clone)]
90pub struct ChiSquaredResult {
91 pub chi2: f64,
92 pub p_value: f64,
93 pub df: f64,
94}
95
96pub fn chi_squared_test(observed: &[f64], expected: &[f64]) -> Result<ChiSquaredResult, String> {
98 if observed.len() != expected.len() {
99 return Err("chi_squared_test: observed and expected must have same length".into());
100 }
101 if observed.is_empty() {
102 return Err("chi_squared_test: empty data".into());
103 }
104 let mut acc = KahanAccumulatorF64::new();
105 for i in 0..observed.len() {
106 if expected[i] <= 0.0 {
107 return Err(format!("chi_squared_test: expected[{i}] must be > 0"));
108 }
109 let diff = observed[i] - expected[i];
110 acc.add(diff * diff / expected[i]);
111 }
112 let chi2 = acc.finalize();
113 let df = (observed.len() - 1) as f64;
114 let p = 1.0 - chi2_cdf(chi2, df);
115 Ok(ChiSquaredResult { chi2, p_value: p, df })
116}
117
118#[derive(Debug, Clone)]
124pub struct AnovaResult {
125 pub f_statistic: f64,
126 pub p_value: f64,
127 pub df_between: f64,
128 pub df_within: f64,
129 pub ss_between: f64,
130 pub ss_within: f64,
131}
132
133pub fn anova_oneway(groups: &[&[f64]]) -> Result<AnovaResult, String> {
135 if groups.len() < 2 {
136 return Err("anova_oneway: need at least 2 groups".into());
137 }
138 let k = groups.len();
139 let n_total: usize = groups.iter().map(|g| g.len()).sum();
140
141 let mut grand_acc = KahanAccumulatorF64::new();
143 for &g in groups {
144 for &x in g {
145 grand_acc.add(x);
146 }
147 }
148 let grand_mean = grand_acc.finalize() / n_total as f64;
149
150 let mut ss_between_acc = KahanAccumulatorF64::new();
152 let mut ss_within_acc = KahanAccumulatorF64::new();
153 for &g in groups {
154 let ni = g.len() as f64;
155 let mut group_acc = KahanAccumulatorF64::new();
156 for &x in g { group_acc.add(x); }
157 let group_mean = group_acc.finalize() / ni;
158 let diff = group_mean - grand_mean;
159 ss_between_acc.add(ni * diff * diff);
160 for &x in g {
161 let d = x - group_mean;
162 ss_within_acc.add(d * d);
163 }
164 }
165 let ss_between = ss_between_acc.finalize();
166 let ss_within = ss_within_acc.finalize();
167 let df_between = (k - 1) as f64;
168 let df_within = (n_total - k) as f64;
169
170 if df_within <= 0.0 || ss_within == 0.0 {
171 return Err("anova_oneway: insufficient data".into());
172 }
173
174 let ms_between = ss_between / df_between;
175 let ms_within = ss_within / df_within;
176 let f_stat = ms_between / ms_within;
177 let p = 1.0 - f_cdf(f_stat, df_between, df_within);
178
179 Ok(AnovaResult {
180 f_statistic: f_stat,
181 p_value: p,
182 df_between,
183 df_within,
184 ss_between,
185 ss_within,
186 })
187}
188
189pub fn f_test(x: &[f64], y: &[f64]) -> Result<(f64, f64), String> {
191 let var_x = stats::sample_variance(x)?;
192 let var_y = stats::sample_variance(y)?;
193 let f = var_x / var_y;
194 let df1 = (x.len() - 1) as f64;
195 let df2 = (y.len() - 1) as f64;
196 let p = if f > 1.0 {
197 2.0 * (1.0 - f_cdf(f, df1, df2))
198 } else {
199 2.0 * f_cdf(f, df1, df2)
200 };
201 Ok((f, p))
202}
203
204#[derive(Debug, Clone)]
210pub struct LmResult {
211 pub coefficients: Vec<f64>, pub std_errors: Vec<f64>,
213 pub t_values: Vec<f64>,
214 pub p_values: Vec<f64>,
215 pub r_squared: f64,
216 pub adj_r_squared: f64,
217 pub residuals: Vec<f64>,
218 pub f_statistic: f64,
219 pub f_p_value: f64,
220}
221
222pub fn lm(x_flat: &[f64], y: &[f64], n: usize, p: usize) -> Result<LmResult, String> {
227 if x_flat.len() != n * p {
228 return Err(format!("lm: x_matrix size {} != n*p = {}", x_flat.len(), n * p));
229 }
230 if y.len() != n {
231 return Err(format!("lm: y length {} != n = {n}", y.len()));
232 }
233 if n <= p + 1 {
234 return Err("lm: need n > p+1 for regression with intercept".into());
235 }
236
237 let pp = p + 1; let mut x_aug = vec![0.0; n * pp];
240 for i in 0..n {
241 x_aug[i * pp] = 1.0; for j in 0..p {
243 x_aug[i * pp + (j + 1)] = x_flat[i * p + j];
244 }
245 }
246
247 let m = n;
249 let mut q_t_y = y.to_vec(); let mut r = x_aug.clone();
251
252 for j in 0..pp {
254 let mut norm_sq = 0.0;
256 for i in j..m {
257 norm_sq += r[i * pp + j] * r[i * pp + j];
258 }
259 let norm = norm_sq.sqrt();
260 if norm < 1e-15 {
261 return Err("lm: rank-deficient design matrix".into());
262 }
263 let sign = if r[j * pp + j] >= 0.0 { 1.0 } else { -1.0 };
264 let u0 = r[j * pp + j] + sign * norm;
265 let mut v = vec![0.0; m - j];
267 v[0] = 1.0;
268 for i in 1..(m - j) {
269 v[i] = r[(j + i) * pp + j] / u0;
270 }
271 let tau = 2.0 / {
272 let mut acc = KahanAccumulatorF64::new();
273 for &vi in &v { acc.add(vi * vi); }
274 acc.finalize()
275 };
276
277 for col in j..pp {
279 let mut dot = 0.0;
280 for i in 0..v.len() {
281 dot += v[i] * r[(j + i) * pp + col];
282 }
283 for i in 0..v.len() {
284 r[(j + i) * pp + col] -= tau * dot * v[i];
285 }
286 }
287 {
289 let mut dot = 0.0;
290 for i in 0..v.len() {
291 dot += v[i] * q_t_y[j + i];
292 }
293 for i in 0..v.len() {
294 q_t_y[j + i] -= tau * dot * v[i];
295 }
296 }
297 }
298
299 let mut beta = vec![0.0; pp];
301 for i in (0..pp).rev() {
302 let mut s = q_t_y[i];
303 for j in (i + 1)..pp {
304 s -= r[i * pp + j] * beta[j];
305 }
306 beta[i] = s / r[i * pp + i];
307 }
308
309 let mut residuals = vec![0.0; n];
311 let mut ss_res_acc = KahanAccumulatorF64::new();
312 for i in 0..n {
313 let mut y_hat = 0.0;
314 for j in 0..pp {
315 y_hat += x_aug[i * pp + j] * beta[j];
316 }
317 residuals[i] = y[i] - y_hat;
318 ss_res_acc.add(residuals[i] * residuals[i]);
319 }
320 let ss_res = ss_res_acc.finalize();
321
322 let y_mean = {
324 let mut acc = KahanAccumulatorF64::new();
325 for &yi in y { acc.add(yi); }
326 acc.finalize() / n as f64
327 };
328 let mut ss_tot_acc = KahanAccumulatorF64::new();
329 for &yi in y {
330 let d = yi - y_mean;
331 ss_tot_acc.add(d * d);
332 }
333 let ss_tot = ss_tot_acc.finalize();
334
335 let r_squared = if ss_tot > 0.0 { 1.0 - ss_res / ss_tot } else { 0.0 };
336 let adj_r_squared = 1.0 - (1.0 - r_squared) * ((n - 1) as f64) / ((n - pp) as f64);
337
338 let mse = ss_res / (n - pp) as f64;
340 let mut r_inv = vec![0.0; pp * pp];
342 for i in 0..pp {
343 r_inv[i * pp + i] = 1.0 / r[i * pp + i];
344 for j in (0..i).rev() {
345 let mut s = 0.0;
346 for k in (j + 1)..=i {
347 s += r[j * pp + k] * r_inv[k * pp + i];
348 }
349 r_inv[j * pp + i] = -s / r[j * pp + j];
350 }
351 }
352 let mut std_errors = Vec::with_capacity(pp);
354 let mut t_values = Vec::with_capacity(pp);
355 let mut p_values = Vec::with_capacity(pp);
356 let df = (n - pp) as f64;
357 for i in 0..pp {
358 let mut diag = 0.0;
359 for k in i..pp {
360 diag += r_inv[i * pp + k] * r_inv[i * pp + k];
361 }
362 let se = (diag * mse).sqrt();
363 std_errors.push(se);
364 let t = if se > 0.0 { beta[i] / se } else { 0.0 };
365 t_values.push(t);
366 let pv = 2.0 * (1.0 - t_cdf(t.abs(), df));
367 p_values.push(pv);
368 }
369
370 let ss_reg = ss_tot - ss_res;
372 let df_reg = (pp - 1) as f64;
373 let f_stat = if df_reg > 0.0 && mse > 0.0 {
374 (ss_reg / df_reg) / mse
375 } else {
376 0.0
377 };
378 let f_p = 1.0 - f_cdf(f_stat, df_reg, df);
379
380 Ok(LmResult {
381 coefficients: beta,
382 std_errors,
383 t_values,
384 p_values,
385 r_squared,
386 adj_r_squared,
387 residuals,
388 f_statistic: f_stat,
389 f_p_value: f_p,
390 })
391}
392
393pub fn wls(
401 x_flat: &[f64],
402 y: &[f64],
403 weights: &[f64],
404 n: usize,
405 p: usize,
406) -> Result<LmResult, String> {
407 if x_flat.len() != n * p {
408 return Err(format!("wls: x_matrix size {} != n*p = {}", x_flat.len(), n * p));
409 }
410 if y.len() != n || weights.len() != n {
411 return Err("wls: y and weights must have length n".into());
412 }
413 for (i, &w) in weights.iter().enumerate() {
414 if w <= 0.0 {
415 return Err(format!("wls: weight[{i}] = {w} must be positive"));
416 }
417 }
418 let mut x_w = vec![0.0; n * p];
420 let mut y_w = vec![0.0; n];
421 for i in 0..n {
422 let sw = weights[i].sqrt();
423 y_w[i] = y[i] * sw;
424 for j in 0..p {
425 x_w[i * p + j] = x_flat[i * p + j] * sw;
426 }
427 }
428 lm(&x_w, &y_w, n, p)
429}
430
431#[derive(Debug, Clone)]
437pub struct TukeyHsdPair {
438 pub group_i: usize,
439 pub group_j: usize,
440 pub mean_diff: f64,
441 pub se: f64,
442 pub q_statistic: f64,
443 pub p_value: f64,
444}
445
446pub fn tukey_hsd(groups: &[&[f64]]) -> Result<Vec<TukeyHsdPair>, String> {
448 if groups.len() < 2 {
449 return Err("tukey_hsd: need at least 2 groups".into());
450 }
451 let k = groups.len();
452 let mut n_total = 0usize;
454 let mut means = Vec::with_capacity(k);
455 let mut ssw = KahanAccumulatorF64::new();
456 for g in groups {
457 if g.is_empty() { return Err("tukey_hsd: empty group".into()); }
458 n_total += g.len();
459 let mut acc = KahanAccumulatorF64::new();
460 for &x in *g { acc.add(x); }
461 let m = acc.finalize() / g.len() as f64;
462 means.push(m);
463 for &x in *g {
464 let d = x - m;
465 ssw.add(d * d);
466 }
467 }
468 let df_w = (n_total - k) as f64;
469 if df_w <= 0.0 { return Err("tukey_hsd: not enough degrees of freedom".into()); }
470 let msw = ssw.finalize() / df_w;
471
472 let mut results = Vec::new();
473 for i in 0..k {
474 for j in (i + 1)..k {
475 let ni = groups[i].len() as f64;
476 let nj = groups[j].len() as f64;
477 let se = (msw * 0.5 * (1.0 / ni + 1.0 / nj)).sqrt();
478 let mean_diff = means[i] - means[j];
479 let q = mean_diff.abs() / se;
480 let raw_p = (k as f64) * ((k - 1) as f64) * (1.0 - normal_cdf(q / 2.0_f64.sqrt()));
483 let p = raw_p.min(1.0).max(0.0);
484 results.push(TukeyHsdPair { group_i: i, group_j: j, mean_diff, se, q_statistic: q, p_value: p });
485 }
486 }
487 Ok(results)
488}
489
490#[derive(Debug, Clone)]
492pub struct MannWhitneyResult {
493 pub u_statistic: f64,
494 pub z_score: f64,
495 pub p_value: f64,
496}
497
498pub fn mann_whitney(x: &[f64], y: &[f64]) -> Result<MannWhitneyResult, String> {
500 if x.is_empty() || y.is_empty() {
501 return Err("mann_whitney: both groups must be non-empty".into());
502 }
503 let n1 = x.len();
504 let n2 = y.len();
505 let n = n1 + n2;
506
507 let mut combined: Vec<(f64, usize, usize)> = Vec::with_capacity(n);
509 for (i, &v) in x.iter().enumerate() {
510 combined.push((v, 0, i)); }
512 for (i, &v) in y.iter().enumerate() {
513 combined.push((v, 1, i)); }
515 combined.sort_by(|a, b| a.0.total_cmp(&b.0).then(a.1.cmp(&b.1)).then(a.2.cmp(&b.2)));
516
517 let mut ranks = vec![0.0; n];
519 let mut i = 0;
520 while i < n {
521 let mut j = i;
522 while j < n && combined[j].0.to_bits() == combined[i].0.to_bits() {
523 j += 1;
524 }
525 let avg_rank = (i + 1 + j) as f64 / 2.0;
526 for k in i..j {
527 ranks[k] = avg_rank;
528 }
529 i = j;
530 }
531
532 let mut r1 = KahanAccumulatorF64::new();
534 for (idx, &(_, group, _)) in combined.iter().enumerate() {
535 if group == 0 { r1.add(ranks[idx]); }
536 }
537 let r1_val = r1.finalize();
538 let u1 = r1_val - (n1 as f64 * (n1 as f64 + 1.0)) / 2.0;
539 let u2 = (n1 as f64) * (n2 as f64) - u1;
540 let u = u1.min(u2);
541
542 let mu = (n1 as f64 * n2 as f64) / 2.0;
543 let sigma = ((n1 as f64 * n2 as f64 * (n as f64 + 1.0)) / 12.0).sqrt();
544 let z = if sigma > 0.0 { (u - mu) / sigma } else { 0.0 };
545 let p = 2.0 * (1.0 - normal_cdf(z.abs()));
546
547 Ok(MannWhitneyResult { u_statistic: u, z_score: z, p_value: p })
548}
549
550#[derive(Debug, Clone)]
552pub struct KruskalWallisResult {
553 pub h_statistic: f64,
554 pub p_value: f64,
555 pub df: f64,
556}
557
558pub fn kruskal_wallis(groups: &[&[f64]]) -> Result<KruskalWallisResult, String> {
560 if groups.len() < 2 {
561 return Err("kruskal_wallis: need at least 2 groups".into());
562 }
563 let k = groups.len();
564 let mut all: Vec<(f64, usize, usize)> = Vec::new();
565 let mut group_sizes = Vec::with_capacity(k);
566 for (gi, g) in groups.iter().enumerate() {
567 if g.is_empty() { return Err("kruskal_wallis: empty group".into()); }
568 group_sizes.push(g.len());
569 for (i, &v) in g.iter().enumerate() {
570 all.push((v, gi, i));
571 }
572 }
573 let n = all.len();
574 all.sort_by(|a, b| a.0.total_cmp(&b.0).then(a.1.cmp(&b.1)).then(a.2.cmp(&b.2)));
575
576 let mut ranks = vec![0.0; n];
578 let mut i = 0;
579 while i < n {
580 let mut j = i;
581 while j < n && all[j].0.to_bits() == all[i].0.to_bits() { j += 1; }
582 let avg = (i + 1 + j) as f64 / 2.0;
583 for idx in i..j { ranks[idx] = avg; }
584 i = j;
585 }
586
587 let mut rank_sums = vec![KahanAccumulatorF64::new(); k];
589 for (idx, &(_, gi, _)) in all.iter().enumerate() {
590 rank_sums[gi].add(ranks[idx]);
591 }
592
593 let nf = n as f64;
594 let mut h_acc = KahanAccumulatorF64::new();
595 for (gi, acc) in rank_sums.iter().enumerate() {
596 let ri = acc.clone().finalize();
597 let ni = group_sizes[gi] as f64;
598 h_acc.add(ri * ri / ni);
599 }
600 let h = (12.0 / (nf * (nf + 1.0))) * h_acc.finalize() - 3.0 * (nf + 1.0);
601 let df = (k - 1) as f64;
602 let p = 1.0 - chi2_cdf(h, df);
603
604 Ok(KruskalWallisResult { h_statistic: h, p_value: p, df })
605}
606
607#[derive(Debug, Clone)]
609pub struct WilcoxonResult {
610 pub w_statistic: f64,
611 pub z_score: f64,
612 pub p_value: f64,
613}
614
615pub fn wilcoxon_signed_rank(x: &[f64], y: &[f64]) -> Result<WilcoxonResult, String> {
617 if x.len() != y.len() {
618 return Err("wilcoxon_signed_rank: x and y must have same length".into());
619 }
620 if x.len() < 2 {
621 return Err("wilcoxon_signed_rank: need at least 2 observations".into());
622 }
623
624 let mut diffs: Vec<(f64, usize)> = Vec::new(); let mut signs: Vec<f64> = Vec::new();
627 for (i, (&xi, &yi)) in x.iter().zip(y.iter()).enumerate() {
628 let d = xi - yi;
629 if d.abs() > 1e-15 {
630 diffs.push((d.abs(), i));
631 signs.push(if d > 0.0 { 1.0 } else { -1.0 });
632 }
633 }
634 let nr = diffs.len();
635 if nr == 0 {
636 return Ok(WilcoxonResult { w_statistic: 0.0, z_score: 0.0, p_value: 1.0 });
637 }
638
639 let mut indexed: Vec<(usize, f64)> = diffs.iter().enumerate().map(|(i, &(v, _))| (i, v)).collect();
641 indexed.sort_by(|a, b| a.1.total_cmp(&b.1).then(a.0.cmp(&b.0)));
642
643 let mut ranks = vec![0.0; nr];
645 let mut i = 0;
646 while i < nr {
647 let mut j = i;
648 while j < nr && indexed[j].1.to_bits() == indexed[i].1.to_bits() { j += 1; }
649 let avg = (i + 1 + j) as f64 / 2.0;
650 for k in i..j { ranks[indexed[k].0] = avg; }
651 i = j;
652 }
653
654 let mut w_plus = KahanAccumulatorF64::new();
655 let mut w_minus = KahanAccumulatorF64::new();
656 for (i, &s) in signs.iter().enumerate() {
657 if s > 0.0 { w_plus.add(ranks[i]); } else { w_minus.add(ranks[i]); }
658 }
659 let wp = w_plus.finalize();
660 let wm = w_minus.finalize();
661 let w = wp.min(wm);
662
663 let nf = nr as f64;
664 let mu = nf * (nf + 1.0) / 4.0;
665 let sigma = (nf * (nf + 1.0) * (2.0 * nf + 1.0) / 24.0).sqrt();
666 let z = if sigma > 0.0 { (w - mu) / sigma } else { 0.0 };
667 let p = 2.0 * (1.0 - normal_cdf(z.abs()));
668
669 Ok(WilcoxonResult { w_statistic: w, z_score: z, p_value: p })
670}
671
672pub fn bonferroni(p_values: &[f64]) -> Vec<f64> {
674 let m = p_values.len() as f64;
675 p_values.iter().map(|&p| (p * m).min(1.0)).collect()
676}
677
678pub fn fdr_bh(p_values: &[f64]) -> Vec<f64> {
680 let m = p_values.len();
681 if m == 0 { return vec![]; }
682
683 let mut indexed: Vec<(usize, f64)> = p_values.iter().copied().enumerate().collect();
685 indexed.sort_by(|a, b| a.1.total_cmp(&b.1).then(a.0.cmp(&b.0)));
686
687 let mut adjusted_sorted = vec![0.0; m];
689 for (rank_0, &(_, p)) in indexed.iter().enumerate() {
690 let rank = rank_0 + 1;
691 adjusted_sorted[rank_0] = (p * m as f64 / rank as f64).min(1.0);
692 }
693
694 for i in (0..m - 1).rev() {
696 if adjusted_sorted[i] > adjusted_sorted[i + 1] {
697 adjusted_sorted[i] = adjusted_sorted[i + 1];
698 }
699 }
700
701 let mut result = vec![0.0; m];
703 for (rank_0, &(orig_idx, _)) in indexed.iter().enumerate() {
704 result[orig_idx] = adjusted_sorted[rank_0];
705 }
706 result
707}
708
709#[derive(Debug, Clone)]
711pub struct LogisticResult {
712 pub coefficients: Vec<f64>,
713 pub std_errors: Vec<f64>,
714 pub z_values: Vec<f64>,
715 pub p_values: Vec<f64>,
716 pub log_likelihood: f64,
717 pub aic: f64,
718 pub iterations: usize,
719}
720
721pub fn logistic_regression(
725 x_flat: &[f64],
726 y: &[f64],
727 n: usize,
728 p: usize,
729) -> Result<LogisticResult, String> {
730 if x_flat.len() != n * p {
731 return Err(format!("logistic_regression: x size {} != n*p = {}", x_flat.len(), n * p));
732 }
733 if y.len() != n {
734 return Err("logistic_regression: y must have length n".into());
735 }
736 let pp = p + 1; let mut x = vec![0.0; n * pp];
740 for i in 0..n {
741 x[i * pp] = 1.0; for j in 0..p {
743 x[i * pp + j + 1] = x_flat[i * p + j];
744 }
745 }
746
747 let mut beta = vec![0.0; pp];
748 let max_iter = 100;
749 let tol = 1e-8;
750 let mut iterations = 0;
751
752 for iter in 0..max_iter {
753 iterations = iter + 1;
754
755 let mut mu = vec![0.0; n];
757 for i in 0..n {
758 let mut eta = 0.0;
759 for j in 0..pp {
760 eta += x[i * pp + j] * beta[j];
761 }
762 mu[i] = 1.0 / (1.0 + (-eta).exp());
763 mu[i] = mu[i].max(1e-10).min(1.0 - 1e-10);
765 }
766
767 let w: Vec<f64> = mu.iter().map(|&m| m * (1.0 - m)).collect();
769
770 let mut xtwx = vec![0.0; pp * pp];
772 for i in 0..n {
773 for j in 0..pp {
774 for k in 0..pp {
775 xtwx[j * pp + k] += x[i * pp + j] * w[i] * x[i * pp + k];
776 }
777 }
778 }
779
780 let mut grad = vec![0.0; pp];
782 for i in 0..n {
783 let r = y[i] - mu[i];
784 for j in 0..pp {
785 grad[j] += x[i * pp + j] * r;
786 }
787 }
788
789 let delta = solve_symmetric(&xtwx, &grad, pp)?;
792
793 let mut max_change = 0.0_f64;
795 for j in 0..pp {
796 beta[j] += delta[j];
797 max_change = max_change.max(delta[j].abs());
798 }
799
800 if max_change < tol {
801 break;
802 }
803 }
804
805 let mut mu = vec![0.0; n];
807 for i in 0..n {
808 let mut eta = 0.0;
809 for j in 0..pp { eta += x[i * pp + j] * beta[j]; }
810 mu[i] = 1.0 / (1.0 + (-eta).exp());
811 mu[i] = mu[i].max(1e-10).min(1.0 - 1e-10);
812 }
813
814 let mut ll = KahanAccumulatorF64::new();
816 for i in 0..n {
817 ll.add(y[i] * mu[i].ln() + (1.0 - y[i]) * (1.0 - mu[i]).ln());
818 }
819 let log_likelihood = ll.finalize();
820 let aic = -2.0 * log_likelihood + 2.0 * pp as f64;
821
822 let w: Vec<f64> = mu.iter().map(|&m| m * (1.0 - m)).collect();
824 let mut xtwx = vec![0.0; pp * pp];
825 for i in 0..n {
826 for j in 0..pp {
827 for k in 0..pp {
828 xtwx[j * pp + k] += x[i * pp + j] * w[i] * x[i * pp + k];
829 }
830 }
831 }
832 let inv = invert_symmetric(&xtwx, pp)?;
833 let mut std_errors = Vec::with_capacity(pp);
834 let mut z_values = Vec::with_capacity(pp);
835 let mut p_values = Vec::with_capacity(pp);
836 for j in 0..pp {
837 let se = inv[j * pp + j].max(0.0).sqrt();
838 std_errors.push(se);
839 let z = if se > 0.0 { beta[j] / se } else { 0.0 };
840 z_values.push(z);
841 let pv = 2.0 * (1.0 - normal_cdf(z.abs()));
842 p_values.push(pv);
843 }
844
845 Ok(LogisticResult {
846 coefficients: beta,
847 std_errors,
848 z_values,
849 p_values,
850 log_likelihood,
851 aic,
852 iterations,
853 })
854}
855
856fn solve_symmetric(a: &[f64], b: &[f64], n: usize) -> Result<Vec<f64>, String> {
858 let mut l = vec![0.0; n * n];
860 for i in 0..n {
861 for j in 0..=i {
862 let mut sum = 0.0;
863 for k in 0..j {
864 sum += l[i * n + k] * l[j * n + k];
865 }
866 if i == j {
867 let diag = a[i * n + i] - sum;
868 if diag <= 0.0 {
869 let mut a_reg = a.to_vec();
871 for ii in 0..n { a_reg[ii * n + ii] += 1e-6; }
872 return solve_symmetric(&a_reg, b, n);
873 }
874 l[i * n + j] = diag.sqrt();
875 } else {
876 l[i * n + j] = (a[i * n + j] - sum) / l[j * n + j];
877 }
878 }
879 }
880 let mut y = vec![0.0; n];
882 for i in 0..n {
883 let mut sum = 0.0;
884 for j in 0..i { sum += l[i * n + j] * y[j]; }
885 y[i] = (b[i] - sum) / l[i * n + i];
886 }
887 let mut x = vec![0.0; n];
889 for i in (0..n).rev() {
890 let mut sum = 0.0;
891 for j in (i + 1)..n { sum += l[j * n + i] * x[j]; }
892 x[i] = (y[i] - sum) / l[i * n + i];
893 }
894 Ok(x)
895}
896
897fn invert_symmetric(a: &[f64], n: usize) -> Result<Vec<f64>, String> {
899 let mut inv = vec![0.0; n * n];
900 for col in 0..n {
901 let mut e = vec![0.0; n];
902 e[col] = 1.0;
903 let x = solve_symmetric(a, &e, n)?;
904 for row in 0..n {
905 inv[row * n + col] = x[row];
906 }
907 }
908 Ok(inv)
909}
910
911#[cfg(test)]
916mod tests {
917 use super::*;
918
919 #[test]
920 fn test_t_test_known_mean() {
921 let data = [5.1, 4.9, 5.0, 5.2, 4.8, 5.0, 5.1, 4.9];
922 let r = t_test(&data, 5.0).unwrap();
923 assert!(r.p_value > 0.05, "p = {}", r.p_value);
925 }
926
927 #[test]
928 fn test_t_test_shifted() {
929 let data = [10.1, 10.2, 10.0, 9.9, 10.3, 10.1, 10.0, 10.2];
931 let r = t_test(&data, 0.0).unwrap();
932 assert!(r.p_value < 0.001, "p = {}", r.p_value);
933 }
934
935 #[test]
936 fn test_t_test_two_sample() {
937 let x = [10.0, 11.0, 12.0, 13.0, 14.0];
938 let y = [20.0, 21.0, 22.0, 23.0, 24.0];
939 let r = t_test_two_sample(&x, &y).unwrap();
940 assert!(r.p_value < 0.001);
941 }
942
943 #[test]
944 fn test_chi_squared_uniform() {
945 let observed = [20.0, 20.0, 20.0, 20.0, 20.0];
946 let expected = [20.0, 20.0, 20.0, 20.0, 20.0];
947 let r = chi_squared_test(&observed, &expected).unwrap();
948 assert_eq!(r.chi2, 0.0);
949 assert!((r.p_value - 1.0).abs() < 1e-6);
950 }
951
952 #[test]
953 fn test_anova_equal_groups() {
954 let g1 = [5.0, 5.1, 4.9, 5.0, 5.2];
955 let g2 = [5.0, 4.8, 5.1, 5.0, 4.9];
956 let g3 = [5.1, 5.0, 4.9, 5.0, 5.1];
957 let r = anova_oneway(&[&g1, &g2, &g3]).unwrap();
958 assert!(r.p_value > 0.05, "p = {}", r.p_value);
960 }
961
962 #[test]
963 fn test_anova_different_groups() {
964 let g1 = [1.0, 2.0, 3.0, 2.0, 1.0];
965 let g2 = [10.0, 11.0, 12.0, 11.0, 10.0];
966 let g3 = [20.0, 21.0, 22.0, 21.0, 20.0];
967 let r = anova_oneway(&[&g1, &g2, &g3]).unwrap();
968 assert!(r.p_value < 0.001);
969 }
970
971 #[test]
972 fn test_lm_simple() {
973 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
975 let y = [3.0, 5.0, 7.0, 9.0, 11.0];
976 let r = lm(&x, &y, 5, 1).unwrap();
977 assert!((r.coefficients[0] - 1.0).abs() < 1e-10, "intercept = {}", r.coefficients[0]);
979 assert!((r.coefficients[1] - 2.0).abs() < 1e-10, "slope = {}", r.coefficients[1]);
980 assert!((r.r_squared - 1.0).abs() < 1e-10);
981 }
982
983 #[test]
984 fn test_determinism() {
985 let data = [1.0, 2.0, 3.0, 4.0, 5.0];
986 let r1 = t_test(&data, 3.0).unwrap();
987 let r2 = t_test(&data, 3.0).unwrap();
988 assert_eq!(r1.t_statistic.to_bits(), r2.t_statistic.to_bits());
989 assert_eq!(r1.p_value.to_bits(), r2.p_value.to_bits());
990 }
991
992 #[test]
993 fn test_wls_uniform_weights() {
994 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
996 let y: Vec<f64> = x.iter().map(|&xi| 1.0 + 2.0 * xi).collect();
997 let w = vec![1.0; 5];
998 let r = wls(&x, &y, &w, 5, 1).unwrap();
999 assert!((r.coefficients[0] - 1.0).abs() < 1e-8, "intercept = {}", r.coefficients[0]);
1000 assert!((r.coefficients[1] - 2.0).abs() < 1e-8, "slope = {}", r.coefficients[1]);
1001 }
1002
1003 #[test]
1006 fn test_mann_whitney_identical() {
1007 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1008 let y = [1.0, 2.0, 3.0, 4.0, 5.0];
1009 let r = mann_whitney(&x, &y).unwrap();
1010 assert!(r.p_value > 0.05, "p = {}", r.p_value);
1011 }
1012
1013 #[test]
1014 fn test_mann_whitney_separated() {
1015 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1016 let y = [10.0, 11.0, 12.0, 13.0, 14.0];
1017 let r = mann_whitney(&x, &y).unwrap();
1018 assert!(r.p_value < 0.05, "p = {}", r.p_value);
1019 }
1020
1021 #[test]
1022 fn test_kruskal_wallis_identical() {
1023 let g1 = [1.0, 2.0, 3.0, 4.0, 5.0];
1024 let g2 = [1.0, 2.0, 3.0, 4.0, 5.0];
1025 let r = kruskal_wallis(&[&g1, &g2]).unwrap();
1026 assert!(r.p_value > 0.05, "p = {}", r.p_value);
1027 }
1028
1029 #[test]
1030 fn test_kruskal_wallis_different() {
1031 let g1 = [1.0, 2.0, 3.0];
1032 let g2 = [10.0, 11.0, 12.0];
1033 let g3 = [20.0, 21.0, 22.0];
1034 let r = kruskal_wallis(&[&g1, &g2, &g3]).unwrap();
1035 assert!(r.p_value < 0.05, "p = {}", r.p_value);
1036 }
1037
1038 #[test]
1039 fn test_wilcoxon_no_difference() {
1040 let x = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
1041 let y = [1.1, 1.9, 3.1, 3.9, 5.1, 5.9, 7.1, 7.9]; let r = wilcoxon_signed_rank(&x, &y).unwrap();
1043 assert!(r.p_value > 0.05, "p = {}", r.p_value);
1044 }
1045
1046 #[test]
1047 fn test_wilcoxon_clear_shift() {
1048 let x = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
1049 let y: Vec<f64> = x.iter().map(|&v| v + 5.0).collect();
1050 let r = wilcoxon_signed_rank(&x, &y).unwrap();
1051 assert!(r.p_value < 0.05, "p = {}", r.p_value);
1052 }
1053
1054 #[test]
1055 fn test_bonferroni_basic() {
1056 let pvals = [0.01, 0.04, 0.5];
1057 let adj = bonferroni(&pvals);
1058 assert!((adj[0] - 0.03).abs() < 1e-12);
1059 assert!((adj[1] - 0.12).abs() < 1e-12);
1060 assert!((adj[2] - 1.0).abs() < 1e-12);
1061 }
1062
1063 #[test]
1064 fn test_fdr_bh_known() {
1065 let pvals = [0.01, 0.04, 0.03, 0.5];
1066 let adj = fdr_bh(&pvals);
1067 assert!(adj[0] < 0.05, "adj[0] = {}", adj[0]);
1074 assert!(adj[2] < 0.07, "adj[2] = {}", adj[2]);
1075 }
1076
1077 #[test]
1078 fn test_fdr_bh_preserves_order() {
1079 let pvals = [0.5, 0.01, 0.3];
1080 let adj = fdr_bh(&pvals);
1081 assert!(adj[1] < adj[0], "adj[0]={}, adj[1]={}", adj[0], adj[1]);
1083 }
1084
1085 #[test]
1086 fn test_tukey_hsd_all_same() {
1087 let g1 = [5.0, 5.0, 5.0, 5.0, 5.0];
1088 let g2 = [5.0, 5.0, 5.0, 5.0, 5.0];
1089 let g3 = [5.0, 5.0, 5.0, 5.0, 5.0];
1090 let results = tukey_hsd(&[&g1, &g2, &g3]).unwrap();
1091 for pair in &results {
1092 assert!(pair.mean_diff.abs() < 1e-12);
1093 }
1094 }
1095
1096 #[test]
1097 fn test_tukey_hsd_one_different() {
1098 let g1 = [1.0, 2.0, 3.0, 4.0, 5.0];
1099 let g2 = [2.0, 3.0, 4.0, 5.0, 6.0];
1100 let g3 = [20.0, 21.0, 22.0, 23.0, 24.0];
1101 let results = tukey_hsd(&[&g1, &g2, &g3]).unwrap();
1102 let g3_pairs: Vec<_> = results.iter().filter(|p| p.group_i == 2 || p.group_j == 2).collect();
1104 for pair in &g3_pairs {
1105 assert!(pair.p_value < 0.05, "p = {} for ({}, {})", pair.p_value, pair.group_i, pair.group_j);
1106 }
1107 }
1108
1109 #[test]
1110 fn test_logistic_convergence() {
1111 let x = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
1113 let y = [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0];
1114 let r = logistic_regression(&x, &y, 10, 1).unwrap();
1115 assert!(r.iterations <= 100, "iterations = {}", r.iterations);
1116 assert!(r.coefficients[1] > 0.0, "beta_1 = {}", r.coefficients[1]);
1118 }
1119
1120 #[test]
1121 fn test_b7_determinism() {
1122 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1123 let y = [6.0, 7.0, 8.0, 9.0, 10.0];
1124 let r1 = mann_whitney(&x, &y).unwrap();
1125 let r2 = mann_whitney(&x, &y).unwrap();
1126 assert_eq!(r1.u_statistic.to_bits(), r2.u_statistic.to_bits());
1127 assert_eq!(r1.p_value.to_bits(), r2.p_value.to_bits());
1128 }
1129}