cjc_runtime/
hypothesis.rs

1//! Statistical hypothesis tests — t-test (one-sample, two-sample, paired),
2//! chi-squared goodness-of-fit, ANOVA, F-test.
3//!
4//! # Determinism Contract
5//! All tests are deterministic — same input => identical results.
6//! Uses Kahan summation for all reductions.
7
8use cjc_repro::KahanAccumulatorF64;
9use crate::distributions::{t_cdf, chi2_cdf, f_cdf, normal_cdf};
10use crate::stats;
11
12// ---------------------------------------------------------------------------
13// T-test results
14// ---------------------------------------------------------------------------
15
16/// Result of a t-test.
17#[derive(Debug, Clone)]
18pub struct TTestResult {
19    pub t_statistic: f64,
20    pub p_value: f64,      // two-tailed
21    pub df: f64,            // degrees of freedom
22    pub mean: f64,
23    pub se: f64,
24}
25
26/// One-sample t-test: is the mean significantly different from mu?
27pub fn t_test(data: &[f64], mu: f64) -> Result<TTestResult, String> {
28    if data.len() < 2 {
29        return Err("t_test: need at least 2 observations".into());
30    }
31    let n = data.len() as f64;
32    let mean = {
33        let mut acc = KahanAccumulatorF64::new();
34        for &x in data { acc.add(x); }
35        acc.finalize() / n
36    };
37    let s = stats::sample_sd(data)?;
38    let se = s / n.sqrt();
39    let t = (mean - mu) / se;
40    let df = n - 1.0;
41    // Two-tailed p-value
42    let p = 2.0 * (1.0 - t_cdf(t.abs(), df));
43    Ok(TTestResult { t_statistic: t, p_value: p, df, mean, se })
44}
45
46/// Two-sample independent t-test (Welch's — unequal variance).
47pub fn t_test_two_sample(x: &[f64], y: &[f64]) -> Result<TTestResult, String> {
48    if x.len() < 2 || y.len() < 2 {
49        return Err("t_test_two_sample: need at least 2 observations in each group".into());
50    }
51    let nx = x.len() as f64;
52    let ny = y.len() as f64;
53    let mean_x = {
54        let mut acc = KahanAccumulatorF64::new();
55        for &v in x { acc.add(v); }
56        acc.finalize() / nx
57    };
58    let mean_y = {
59        let mut acc = KahanAccumulatorF64::new();
60        for &v in y { acc.add(v); }
61        acc.finalize() / ny
62    };
63    let var_x = stats::sample_variance(x)?;
64    let var_y = stats::sample_variance(y)?;
65    let se = (var_x / nx + var_y / ny).sqrt();
66    let t = (mean_x - mean_y) / se;
67    // Welch-Satterthwaite degrees of freedom
68    let num = (var_x / nx + var_y / ny).powi(2);
69    let denom = (var_x / nx).powi(2) / (nx - 1.0) + (var_y / ny).powi(2) / (ny - 1.0);
70    let df = num / denom;
71    let p = 2.0 * (1.0 - t_cdf(t.abs(), df));
72    Ok(TTestResult { t_statistic: t, p_value: p, df, mean: mean_x - mean_y, se })
73}
74
75/// Paired t-test.
76pub fn t_test_paired(x: &[f64], y: &[f64]) -> Result<TTestResult, String> {
77    if x.len() != y.len() {
78        return Err("t_test_paired: arrays must have same length".into());
79    }
80    let diffs: Vec<f64> = x.iter().zip(y.iter()).map(|(a, b)| a - b).collect();
81    t_test(&diffs, 0.0)
82}
83
84// ---------------------------------------------------------------------------
85// Chi-squared test
86// ---------------------------------------------------------------------------
87
88/// Result of a chi-squared test.
89#[derive(Debug, Clone)]
90pub struct ChiSquaredResult {
91    pub chi2: f64,
92    pub p_value: f64,
93    pub df: f64,
94}
95
96/// Chi-squared goodness-of-fit test.
97pub fn chi_squared_test(observed: &[f64], expected: &[f64]) -> Result<ChiSquaredResult, String> {
98    if observed.len() != expected.len() {
99        return Err("chi_squared_test: observed and expected must have same length".into());
100    }
101    if observed.is_empty() {
102        return Err("chi_squared_test: empty data".into());
103    }
104    let mut acc = KahanAccumulatorF64::new();
105    for i in 0..observed.len() {
106        if expected[i] <= 0.0 {
107            return Err(format!("chi_squared_test: expected[{i}] must be > 0"));
108        }
109        let diff = observed[i] - expected[i];
110        acc.add(diff * diff / expected[i]);
111    }
112    let chi2 = acc.finalize();
113    let df = (observed.len() - 1) as f64;
114    let p = 1.0 - chi2_cdf(chi2, df);
115    Ok(ChiSquaredResult { chi2, p_value: p, df })
116}
117
118// ---------------------------------------------------------------------------
119// ANOVA (Sprint 6)
120// ---------------------------------------------------------------------------
121
122/// Result of ANOVA.
123#[derive(Debug, Clone)]
124pub struct AnovaResult {
125    pub f_statistic: f64,
126    pub p_value: f64,
127    pub df_between: f64,
128    pub df_within: f64,
129    pub ss_between: f64,
130    pub ss_within: f64,
131}
132
133/// One-way ANOVA: compare means across groups.
134pub fn anova_oneway(groups: &[&[f64]]) -> Result<AnovaResult, String> {
135    if groups.len() < 2 {
136        return Err("anova_oneway: need at least 2 groups".into());
137    }
138    let k = groups.len();
139    let n_total: usize = groups.iter().map(|g| g.len()).sum();
140
141    // Grand mean
142    let mut grand_acc = KahanAccumulatorF64::new();
143    for &g in groups {
144        for &x in g {
145            grand_acc.add(x);
146        }
147    }
148    let grand_mean = grand_acc.finalize() / n_total as f64;
149
150    // SS between and SS within
151    let mut ss_between_acc = KahanAccumulatorF64::new();
152    let mut ss_within_acc = KahanAccumulatorF64::new();
153    for &g in groups {
154        let ni = g.len() as f64;
155        let mut group_acc = KahanAccumulatorF64::new();
156        for &x in g { group_acc.add(x); }
157        let group_mean = group_acc.finalize() / ni;
158        let diff = group_mean - grand_mean;
159        ss_between_acc.add(ni * diff * diff);
160        for &x in g {
161            let d = x - group_mean;
162            ss_within_acc.add(d * d);
163        }
164    }
165    let ss_between = ss_between_acc.finalize();
166    let ss_within = ss_within_acc.finalize();
167    let df_between = (k - 1) as f64;
168    let df_within = (n_total - k) as f64;
169
170    if df_within <= 0.0 || ss_within == 0.0 {
171        return Err("anova_oneway: insufficient data".into());
172    }
173
174    let ms_between = ss_between / df_between;
175    let ms_within = ss_within / df_within;
176    let f_stat = ms_between / ms_within;
177    let p = 1.0 - f_cdf(f_stat, df_between, df_within);
178
179    Ok(AnovaResult {
180        f_statistic: f_stat,
181        p_value: p,
182        df_between,
183        df_within,
184        ss_between,
185        ss_within,
186    })
187}
188
189/// F-test for equality of variances.
190pub fn f_test(x: &[f64], y: &[f64]) -> Result<(f64, f64), String> {
191    let var_x = stats::sample_variance(x)?;
192    let var_y = stats::sample_variance(y)?;
193    let f = var_x / var_y;
194    let df1 = (x.len() - 1) as f64;
195    let df2 = (y.len() - 1) as f64;
196    let p = if f > 1.0 {
197        2.0 * (1.0 - f_cdf(f, df1, df2))
198    } else {
199        2.0 * f_cdf(f, df1, df2)
200    };
201    Ok((f, p))
202}
203
204// ---------------------------------------------------------------------------
205// Linear Regression (Sprint 5)
206// ---------------------------------------------------------------------------
207
208/// Result of linear regression.
209#[derive(Debug, Clone)]
210pub struct LmResult {
211    pub coefficients: Vec<f64>,   // [intercept, slope1, slope2, ...]
212    pub std_errors: Vec<f64>,
213    pub t_values: Vec<f64>,
214    pub p_values: Vec<f64>,
215    pub r_squared: f64,
216    pub adj_r_squared: f64,
217    pub residuals: Vec<f64>,
218    pub f_statistic: f64,
219    pub f_p_value: f64,
220}
221
222/// Ordinary least squares regression: y = Xb + e.
223/// x_matrix: flattened row-major (n x p), y: (n).
224/// Adds intercept column automatically.
225/// Uses QR decomposition for numerical stability.
226pub fn lm(x_flat: &[f64], y: &[f64], n: usize, p: usize) -> Result<LmResult, String> {
227    if x_flat.len() != n * p {
228        return Err(format!("lm: x_matrix size {} != n*p = {}", x_flat.len(), n * p));
229    }
230    if y.len() != n {
231        return Err(format!("lm: y length {} != n = {n}", y.len()));
232    }
233    if n <= p + 1 {
234        return Err("lm: need n > p+1 for regression with intercept".into());
235    }
236
237    // Build design matrix with intercept: X_aug = [1 | X], shape (n, p+1)
238    let pp = p + 1; // p+1 columns (intercept + predictors)
239    let mut x_aug = vec![0.0; n * pp];
240    for i in 0..n {
241        x_aug[i * pp] = 1.0; // intercept
242        for j in 0..p {
243            x_aug[i * pp + (j + 1)] = x_flat[i * p + j];
244        }
245    }
246
247    // QR decomposition of X_aug (m x pp) via Householder
248    let m = n;
249    let mut q_t_y = y.to_vec(); // will be overwritten with Q^T * y
250    let mut r = x_aug.clone();
251
252    // Householder QR in-place on r, accumulate Q^T * y
253    for j in 0..pp {
254        // Compute Householder vector for column j, rows j..m
255        let mut norm_sq = 0.0;
256        for i in j..m {
257            norm_sq += r[i * pp + j] * r[i * pp + j];
258        }
259        let norm = norm_sq.sqrt();
260        if norm < 1e-15 {
261            return Err("lm: rank-deficient design matrix".into());
262        }
263        let sign = if r[j * pp + j] >= 0.0 { 1.0 } else { -1.0 };
264        let u0 = r[j * pp + j] + sign * norm;
265        // v = [1, r[j+1,j]/u0, ..., r[m-1,j]/u0]
266        let mut v = vec![0.0; m - j];
267        v[0] = 1.0;
268        for i in 1..(m - j) {
269            v[i] = r[(j + i) * pp + j] / u0;
270        }
271        let tau = 2.0 / {
272            let mut acc = KahanAccumulatorF64::new();
273            for &vi in &v { acc.add(vi * vi); }
274            acc.finalize()
275        };
276
277        // Apply reflection to r columns j..pp
278        for col in j..pp {
279            let mut dot = 0.0;
280            for i in 0..v.len() {
281                dot += v[i] * r[(j + i) * pp + col];
282            }
283            for i in 0..v.len() {
284                r[(j + i) * pp + col] -= tau * dot * v[i];
285            }
286        }
287        // Apply reflection to q_t_y
288        {
289            let mut dot = 0.0;
290            for i in 0..v.len() {
291                dot += v[i] * q_t_y[j + i];
292            }
293            for i in 0..v.len() {
294                q_t_y[j + i] -= tau * dot * v[i];
295            }
296        }
297    }
298
299    // Back-substitute: R * beta = Q^T * y (upper-triangular R is in r[0..pp, 0..pp])
300    let mut beta = vec![0.0; pp];
301    for i in (0..pp).rev() {
302        let mut s = q_t_y[i];
303        for j in (i + 1)..pp {
304            s -= r[i * pp + j] * beta[j];
305        }
306        beta[i] = s / r[i * pp + i];
307    }
308
309    // Residuals
310    let mut residuals = vec![0.0; n];
311    let mut ss_res_acc = KahanAccumulatorF64::new();
312    for i in 0..n {
313        let mut y_hat = 0.0;
314        for j in 0..pp {
315            y_hat += x_aug[i * pp + j] * beta[j];
316        }
317        residuals[i] = y[i] - y_hat;
318        ss_res_acc.add(residuals[i] * residuals[i]);
319    }
320    let ss_res = ss_res_acc.finalize();
321
322    // SS total
323    let y_mean = {
324        let mut acc = KahanAccumulatorF64::new();
325        for &yi in y { acc.add(yi); }
326        acc.finalize() / n as f64
327    };
328    let mut ss_tot_acc = KahanAccumulatorF64::new();
329    for &yi in y {
330        let d = yi - y_mean;
331        ss_tot_acc.add(d * d);
332    }
333    let ss_tot = ss_tot_acc.finalize();
334
335    let r_squared = if ss_tot > 0.0 { 1.0 - ss_res / ss_tot } else { 0.0 };
336    let adj_r_squared = 1.0 - (1.0 - r_squared) * ((n - 1) as f64) / ((n - pp) as f64);
337
338    // Standard errors of coefficients
339    let mse = ss_res / (n - pp) as f64;
340    // Invert R'R for (X'X)^-1 diagonal — using R^-1
341    let mut r_inv = vec![0.0; pp * pp];
342    for i in 0..pp {
343        r_inv[i * pp + i] = 1.0 / r[i * pp + i];
344        for j in (0..i).rev() {
345            let mut s = 0.0;
346            for k in (j + 1)..=i {
347                s += r[j * pp + k] * r_inv[k * pp + i];
348            }
349            r_inv[j * pp + i] = -s / r[j * pp + j];
350        }
351    }
352    // diag((R^-1)(R^-1)^T) * mse
353    let mut std_errors = Vec::with_capacity(pp);
354    let mut t_values = Vec::with_capacity(pp);
355    let mut p_values = Vec::with_capacity(pp);
356    let df = (n - pp) as f64;
357    for i in 0..pp {
358        let mut diag = 0.0;
359        for k in i..pp {
360            diag += r_inv[i * pp + k] * r_inv[i * pp + k];
361        }
362        let se = (diag * mse).sqrt();
363        std_errors.push(se);
364        let t = if se > 0.0 { beta[i] / se } else { 0.0 };
365        t_values.push(t);
366        let pv = 2.0 * (1.0 - t_cdf(t.abs(), df));
367        p_values.push(pv);
368    }
369
370    // F-statistic
371    let ss_reg = ss_tot - ss_res;
372    let df_reg = (pp - 1) as f64;
373    let f_stat = if df_reg > 0.0 && mse > 0.0 {
374        (ss_reg / df_reg) / mse
375    } else {
376        0.0
377    };
378    let f_p = 1.0 - f_cdf(f_stat, df_reg, df);
379
380    Ok(LmResult {
381        coefficients: beta,
382        std_errors,
383        t_values,
384        p_values,
385        r_squared,
386        adj_r_squared,
387        residuals,
388        f_statistic: f_stat,
389        f_p_value: f_p,
390    })
391}
392
393// ---------------------------------------------------------------------------
394// Phase B5: Weighted Least Squares
395// ---------------------------------------------------------------------------
396
397/// Weighted least squares regression.
398/// Transforms to OLS: X_w = W^{1/2} * X, y_w = W^{1/2} * y.
399/// Then applies standard QR-based least squares via lm().
400pub fn wls(
401    x_flat: &[f64],
402    y: &[f64],
403    weights: &[f64],
404    n: usize,
405    p: usize,
406) -> Result<LmResult, String> {
407    if x_flat.len() != n * p {
408        return Err(format!("wls: x_matrix size {} != n*p = {}", x_flat.len(), n * p));
409    }
410    if y.len() != n || weights.len() != n {
411        return Err("wls: y and weights must have length n".into());
412    }
413    for (i, &w) in weights.iter().enumerate() {
414        if w <= 0.0 {
415            return Err(format!("wls: weight[{i}] = {w} must be positive"));
416        }
417    }
418    // Transform: multiply each row by sqrt(weight)
419    let mut x_w = vec![0.0; n * p];
420    let mut y_w = vec![0.0; n];
421    for i in 0..n {
422        let sw = weights[i].sqrt();
423        y_w[i] = y[i] * sw;
424        for j in 0..p {
425            x_w[i * p + j] = x_flat[i * p + j] * sw;
426        }
427    }
428    lm(&x_w, &y_w, n, p)
429}
430
431// ---------------------------------------------------------------------------
432// Phase B7: Non-parametric tests & multiple comparisons
433// ---------------------------------------------------------------------------
434
435/// Tukey HSD pairwise comparison result.
436#[derive(Debug, Clone)]
437pub struct TukeyHsdPair {
438    pub group_i: usize,
439    pub group_j: usize,
440    pub mean_diff: f64,
441    pub se: f64,
442    pub q_statistic: f64,
443    pub p_value: f64,
444}
445
446/// Tukey HSD post-hoc test after one-way ANOVA.
447pub fn tukey_hsd(groups: &[&[f64]]) -> Result<Vec<TukeyHsdPair>, String> {
448    if groups.len() < 2 {
449        return Err("tukey_hsd: need at least 2 groups".into());
450    }
451    let k = groups.len();
452    // Compute MSW (mean square within)
453    let mut n_total = 0usize;
454    let mut means = Vec::with_capacity(k);
455    let mut ssw = KahanAccumulatorF64::new();
456    for g in groups {
457        if g.is_empty() { return Err("tukey_hsd: empty group".into()); }
458        n_total += g.len();
459        let mut acc = KahanAccumulatorF64::new();
460        for &x in *g { acc.add(x); }
461        let m = acc.finalize() / g.len() as f64;
462        means.push(m);
463        for &x in *g {
464            let d = x - m;
465            ssw.add(d * d);
466        }
467    }
468    let df_w = (n_total - k) as f64;
469    if df_w <= 0.0 { return Err("tukey_hsd: not enough degrees of freedom".into()); }
470    let msw = ssw.finalize() / df_w;
471
472    let mut results = Vec::new();
473    for i in 0..k {
474        for j in (i + 1)..k {
475            let ni = groups[i].len() as f64;
476            let nj = groups[j].len() as f64;
477            let se = (msw * 0.5 * (1.0 / ni + 1.0 / nj)).sqrt();
478            let mean_diff = means[i] - means[j];
479            let q = mean_diff.abs() / se;
480            // Approximate p-value using normal approximation for studentized range
481            // p ≈ k * (k-1) * (1 - Φ(q / sqrt(2)))
482            let raw_p = (k as f64) * ((k - 1) as f64) * (1.0 - normal_cdf(q / 2.0_f64.sqrt()));
483            let p = raw_p.min(1.0).max(0.0);
484            results.push(TukeyHsdPair { group_i: i, group_j: j, mean_diff, se, q_statistic: q, p_value: p });
485        }
486    }
487    Ok(results)
488}
489
490/// Mann-Whitney U test result.
491#[derive(Debug, Clone)]
492pub struct MannWhitneyResult {
493    pub u_statistic: f64,
494    pub z_score: f64,
495    pub p_value: f64,
496}
497
498/// Mann-Whitney U test (Wilcoxon rank-sum test).
499pub fn mann_whitney(x: &[f64], y: &[f64]) -> Result<MannWhitneyResult, String> {
500    if x.is_empty() || y.is_empty() {
501        return Err("mann_whitney: both groups must be non-empty".into());
502    }
503    let n1 = x.len();
504    let n2 = y.len();
505    let n = n1 + n2;
506
507    // Combine and rank with stable index tie-breaking
508    let mut combined: Vec<(f64, usize, usize)> = Vec::with_capacity(n);
509    for (i, &v) in x.iter().enumerate() {
510        combined.push((v, 0, i)); // group 0
511    }
512    for (i, &v) in y.iter().enumerate() {
513        combined.push((v, 1, i)); // group 1
514    }
515    combined.sort_by(|a, b| a.0.total_cmp(&b.0).then(a.1.cmp(&b.1)).then(a.2.cmp(&b.2)));
516
517    // Assign average ranks
518    let mut ranks = vec![0.0; n];
519    let mut i = 0;
520    while i < n {
521        let mut j = i;
522        while j < n && combined[j].0.to_bits() == combined[i].0.to_bits() {
523            j += 1;
524        }
525        let avg_rank = (i + 1 + j) as f64 / 2.0;
526        for k in i..j {
527            ranks[k] = avg_rank;
528        }
529        i = j;
530    }
531
532    // R1 = sum of ranks for group 0
533    let mut r1 = KahanAccumulatorF64::new();
534    for (idx, &(_, group, _)) in combined.iter().enumerate() {
535        if group == 0 { r1.add(ranks[idx]); }
536    }
537    let r1_val = r1.finalize();
538    let u1 = r1_val - (n1 as f64 * (n1 as f64 + 1.0)) / 2.0;
539    let u2 = (n1 as f64) * (n2 as f64) - u1;
540    let u = u1.min(u2);
541
542    let mu = (n1 as f64 * n2 as f64) / 2.0;
543    let sigma = ((n1 as f64 * n2 as f64 * (n as f64 + 1.0)) / 12.0).sqrt();
544    let z = if sigma > 0.0 { (u - mu) / sigma } else { 0.0 };
545    let p = 2.0 * (1.0 - normal_cdf(z.abs()));
546
547    Ok(MannWhitneyResult { u_statistic: u, z_score: z, p_value: p })
548}
549
550/// Kruskal-Wallis H test result.
551#[derive(Debug, Clone)]
552pub struct KruskalWallisResult {
553    pub h_statistic: f64,
554    pub p_value: f64,
555    pub df: f64,
556}
557
558/// Kruskal-Wallis H test: non-parametric one-way ANOVA on ranks.
559pub fn kruskal_wallis(groups: &[&[f64]]) -> Result<KruskalWallisResult, String> {
560    if groups.len() < 2 {
561        return Err("kruskal_wallis: need at least 2 groups".into());
562    }
563    let k = groups.len();
564    let mut all: Vec<(f64, usize, usize)> = Vec::new();
565    let mut group_sizes = Vec::with_capacity(k);
566    for (gi, g) in groups.iter().enumerate() {
567        if g.is_empty() { return Err("kruskal_wallis: empty group".into()); }
568        group_sizes.push(g.len());
569        for (i, &v) in g.iter().enumerate() {
570            all.push((v, gi, i));
571        }
572    }
573    let n = all.len();
574    all.sort_by(|a, b| a.0.total_cmp(&b.0).then(a.1.cmp(&b.1)).then(a.2.cmp(&b.2)));
575
576    // Assign average ranks
577    let mut ranks = vec![0.0; n];
578    let mut i = 0;
579    while i < n {
580        let mut j = i;
581        while j < n && all[j].0.to_bits() == all[i].0.to_bits() { j += 1; }
582        let avg = (i + 1 + j) as f64 / 2.0;
583        for idx in i..j { ranks[idx] = avg; }
584        i = j;
585    }
586
587    // Sum of ranks per group
588    let mut rank_sums = vec![KahanAccumulatorF64::new(); k];
589    for (idx, &(_, gi, _)) in all.iter().enumerate() {
590        rank_sums[gi].add(ranks[idx]);
591    }
592
593    let nf = n as f64;
594    let mut h_acc = KahanAccumulatorF64::new();
595    for (gi, acc) in rank_sums.iter().enumerate() {
596        let ri = acc.clone().finalize();
597        let ni = group_sizes[gi] as f64;
598        h_acc.add(ri * ri / ni);
599    }
600    let h = (12.0 / (nf * (nf + 1.0))) * h_acc.finalize() - 3.0 * (nf + 1.0);
601    let df = (k - 1) as f64;
602    let p = 1.0 - chi2_cdf(h, df);
603
604    Ok(KruskalWallisResult { h_statistic: h, p_value: p, df })
605}
606
607/// Wilcoxon signed-rank test result.
608#[derive(Debug, Clone)]
609pub struct WilcoxonResult {
610    pub w_statistic: f64,
611    pub z_score: f64,
612    pub p_value: f64,
613}
614
615/// Wilcoxon signed-rank test for paired data.
616pub fn wilcoxon_signed_rank(x: &[f64], y: &[f64]) -> Result<WilcoxonResult, String> {
617    if x.len() != y.len() {
618        return Err("wilcoxon_signed_rank: x and y must have same length".into());
619    }
620    if x.len() < 2 {
621        return Err("wilcoxon_signed_rank: need at least 2 observations".into());
622    }
623
624    // Compute differences, remove zeros
625    let mut diffs: Vec<(f64, usize)> = Vec::new(); // (abs_diff, original_index)
626    let mut signs: Vec<f64> = Vec::new();
627    for (i, (&xi, &yi)) in x.iter().zip(y.iter()).enumerate() {
628        let d = xi - yi;
629        if d.abs() > 1e-15 {
630            diffs.push((d.abs(), i));
631            signs.push(if d > 0.0 { 1.0 } else { -1.0 });
632        }
633    }
634    let nr = diffs.len();
635    if nr == 0 {
636        return Ok(WilcoxonResult { w_statistic: 0.0, z_score: 0.0, p_value: 1.0 });
637    }
638
639    // Sort by absolute difference (stable)
640    let mut indexed: Vec<(usize, f64)> = diffs.iter().enumerate().map(|(i, &(v, _))| (i, v)).collect();
641    indexed.sort_by(|a, b| a.1.total_cmp(&b.1).then(a.0.cmp(&b.0)));
642
643    // Average ranks
644    let mut ranks = vec![0.0; nr];
645    let mut i = 0;
646    while i < nr {
647        let mut j = i;
648        while j < nr && indexed[j].1.to_bits() == indexed[i].1.to_bits() { j += 1; }
649        let avg = (i + 1 + j) as f64 / 2.0;
650        for k in i..j { ranks[indexed[k].0] = avg; }
651        i = j;
652    }
653
654    let mut w_plus = KahanAccumulatorF64::new();
655    let mut w_minus = KahanAccumulatorF64::new();
656    for (i, &s) in signs.iter().enumerate() {
657        if s > 0.0 { w_plus.add(ranks[i]); } else { w_minus.add(ranks[i]); }
658    }
659    let wp = w_plus.finalize();
660    let wm = w_minus.finalize();
661    let w = wp.min(wm);
662
663    let nf = nr as f64;
664    let mu = nf * (nf + 1.0) / 4.0;
665    let sigma = (nf * (nf + 1.0) * (2.0 * nf + 1.0) / 24.0).sqrt();
666    let z = if sigma > 0.0 { (w - mu) / sigma } else { 0.0 };
667    let p = 2.0 * (1.0 - normal_cdf(z.abs()));
668
669    Ok(WilcoxonResult { w_statistic: w, z_score: z, p_value: p })
670}
671
672/// Bonferroni correction: adjusted_p[i] = min(p[i] * m, 1.0).
673pub fn bonferroni(p_values: &[f64]) -> Vec<f64> {
674    let m = p_values.len() as f64;
675    p_values.iter().map(|&p| (p * m).min(1.0)).collect()
676}
677
678/// Benjamini-Hochberg FDR correction.
679pub fn fdr_bh(p_values: &[f64]) -> Vec<f64> {
680    let m = p_values.len();
681    if m == 0 { return vec![]; }
682
683    // Sort p-values keeping track of original index
684    let mut indexed: Vec<(usize, f64)> = p_values.iter().copied().enumerate().collect();
685    indexed.sort_by(|a, b| a.1.total_cmp(&b.1).then(a.0.cmp(&b.0)));
686
687    // Adjust: p_adj[i] = p[i] * m / rank
688    let mut adjusted_sorted = vec![0.0; m];
689    for (rank_0, &(_, p)) in indexed.iter().enumerate() {
690        let rank = rank_0 + 1;
691        adjusted_sorted[rank_0] = (p * m as f64 / rank as f64).min(1.0);
692    }
693
694    // Enforce monotonicity (backwards)
695    for i in (0..m - 1).rev() {
696        if adjusted_sorted[i] > adjusted_sorted[i + 1] {
697            adjusted_sorted[i] = adjusted_sorted[i + 1];
698        }
699    }
700
701    // Map back to original order
702    let mut result = vec![0.0; m];
703    for (rank_0, &(orig_idx, _)) in indexed.iter().enumerate() {
704        result[orig_idx] = adjusted_sorted[rank_0];
705    }
706    result
707}
708
709/// Logistic regression result.
710#[derive(Debug, Clone)]
711pub struct LogisticResult {
712    pub coefficients: Vec<f64>,
713    pub std_errors: Vec<f64>,
714    pub z_values: Vec<f64>,
715    pub p_values: Vec<f64>,
716    pub log_likelihood: f64,
717    pub aic: f64,
718    pub iterations: usize,
719}
720
721/// Logistic regression via IRLS.
722/// x_flat: row-major n x p matrix (NO intercept column — auto-added).
723/// y: binary 0/1 response.
724pub fn logistic_regression(
725    x_flat: &[f64],
726    y: &[f64],
727    n: usize,
728    p: usize,
729) -> Result<LogisticResult, String> {
730    if x_flat.len() != n * p {
731        return Err(format!("logistic_regression: x size {} != n*p = {}", x_flat.len(), n * p));
732    }
733    if y.len() != n {
734        return Err("logistic_regression: y must have length n".into());
735    }
736    let pp = p + 1; // with intercept
737
738    // Build X with intercept column
739    let mut x = vec![0.0; n * pp];
740    for i in 0..n {
741        x[i * pp] = 1.0; // intercept
742        for j in 0..p {
743            x[i * pp + j + 1] = x_flat[i * p + j];
744        }
745    }
746
747    let mut beta = vec![0.0; pp];
748    let max_iter = 100;
749    let tol = 1e-8;
750    let mut iterations = 0;
751
752    for iter in 0..max_iter {
753        iterations = iter + 1;
754
755        // mu = sigmoid(X * beta)
756        let mut mu = vec![0.0; n];
757        for i in 0..n {
758            let mut eta = 0.0;
759            for j in 0..pp {
760                eta += x[i * pp + j] * beta[j];
761            }
762            mu[i] = 1.0 / (1.0 + (-eta).exp());
763            // Clamp for numerical stability
764            mu[i] = mu[i].max(1e-10).min(1.0 - 1e-10);
765        }
766
767        // W = diag(mu * (1 - mu))
768        let w: Vec<f64> = mu.iter().map(|&m| m * (1.0 - m)).collect();
769
770        // X^T W X (pp x pp)
771        let mut xtwx = vec![0.0; pp * pp];
772        for i in 0..n {
773            for j in 0..pp {
774                for k in 0..pp {
775                    xtwx[j * pp + k] += x[i * pp + j] * w[i] * x[i * pp + k];
776                }
777            }
778        }
779
780        // X^T (y - mu)
781        let mut grad = vec![0.0; pp];
782        for i in 0..n {
783            let r = y[i] - mu[i];
784            for j in 0..pp {
785                grad[j] += x[i * pp + j] * r;
786            }
787        }
788
789        // Solve xtwx * delta = grad via Cholesky
790        // Since xtwx is positive definite (for well-conditioned data)
791        let delta = solve_symmetric(&xtwx, &grad, pp)?;
792
793        // Update beta
794        let mut max_change = 0.0_f64;
795        for j in 0..pp {
796            beta[j] += delta[j];
797            max_change = max_change.max(delta[j].abs());
798        }
799
800        if max_change < tol {
801            break;
802        }
803    }
804
805    // Final mu for log-likelihood and standard errors
806    let mut mu = vec![0.0; n];
807    for i in 0..n {
808        let mut eta = 0.0;
809        for j in 0..pp { eta += x[i * pp + j] * beta[j]; }
810        mu[i] = 1.0 / (1.0 + (-eta).exp());
811        mu[i] = mu[i].max(1e-10).min(1.0 - 1e-10);
812    }
813
814    // Log-likelihood
815    let mut ll = KahanAccumulatorF64::new();
816    for i in 0..n {
817        ll.add(y[i] * mu[i].ln() + (1.0 - y[i]) * (1.0 - mu[i]).ln());
818    }
819    let log_likelihood = ll.finalize();
820    let aic = -2.0 * log_likelihood + 2.0 * pp as f64;
821
822    // Standard errors from (X^T W X)^{-1}
823    let w: Vec<f64> = mu.iter().map(|&m| m * (1.0 - m)).collect();
824    let mut xtwx = vec![0.0; pp * pp];
825    for i in 0..n {
826        for j in 0..pp {
827            for k in 0..pp {
828                xtwx[j * pp + k] += x[i * pp + j] * w[i] * x[i * pp + k];
829            }
830        }
831    }
832    let inv = invert_symmetric(&xtwx, pp)?;
833    let mut std_errors = Vec::with_capacity(pp);
834    let mut z_values = Vec::with_capacity(pp);
835    let mut p_values = Vec::with_capacity(pp);
836    for j in 0..pp {
837        let se = inv[j * pp + j].max(0.0).sqrt();
838        std_errors.push(se);
839        let z = if se > 0.0 { beta[j] / se } else { 0.0 };
840        z_values.push(z);
841        let pv = 2.0 * (1.0 - normal_cdf(z.abs()));
842        p_values.push(pv);
843    }
844
845    Ok(LogisticResult {
846        coefficients: beta,
847        std_errors,
848        z_values,
849        p_values,
850        log_likelihood,
851        aic,
852        iterations,
853    })
854}
855
856/// Solve A*x = b for symmetric positive definite A (Cholesky).
857fn solve_symmetric(a: &[f64], b: &[f64], n: usize) -> Result<Vec<f64>, String> {
858    // Cholesky decomposition: A = L * L^T
859    let mut l = vec![0.0; n * n];
860    for i in 0..n {
861        for j in 0..=i {
862            let mut sum = 0.0;
863            for k in 0..j {
864                sum += l[i * n + k] * l[j * n + k];
865            }
866            if i == j {
867                let diag = a[i * n + i] - sum;
868                if diag <= 0.0 {
869                    // Fall back to regularized version
870                    let mut a_reg = a.to_vec();
871                    for ii in 0..n { a_reg[ii * n + ii] += 1e-6; }
872                    return solve_symmetric(&a_reg, b, n);
873                }
874                l[i * n + j] = diag.sqrt();
875            } else {
876                l[i * n + j] = (a[i * n + j] - sum) / l[j * n + j];
877            }
878        }
879    }
880    // Forward substitution: L * y = b
881    let mut y = vec![0.0; n];
882    for i in 0..n {
883        let mut sum = 0.0;
884        for j in 0..i { sum += l[i * n + j] * y[j]; }
885        y[i] = (b[i] - sum) / l[i * n + i];
886    }
887    // Backward substitution: L^T * x = y
888    let mut x = vec![0.0; n];
889    for i in (0..n).rev() {
890        let mut sum = 0.0;
891        for j in (i + 1)..n { sum += l[j * n + i] * x[j]; }
892        x[i] = (y[i] - sum) / l[i * n + i];
893    }
894    Ok(x)
895}
896
897/// Invert symmetric positive definite matrix via Cholesky.
898fn invert_symmetric(a: &[f64], n: usize) -> Result<Vec<f64>, String> {
899    let mut inv = vec![0.0; n * n];
900    for col in 0..n {
901        let mut e = vec![0.0; n];
902        e[col] = 1.0;
903        let x = solve_symmetric(a, &e, n)?;
904        for row in 0..n {
905            inv[row * n + col] = x[row];
906        }
907    }
908    Ok(inv)
909}
910
911// ---------------------------------------------------------------------------
912// Normality Tests
913// ---------------------------------------------------------------------------
914
915/// Result of a normality test.
916pub struct NormalityResult {
917    pub statistic: f64,
918    pub p_value: f64,
919}
920
921/// Jarque-Bera normality test.
922/// Tests whether data has skewness and kurtosis matching a normal distribution.
923/// JB = (n/6) * (S² + (K-3)²/4)
924pub fn jarque_bera(data: &[f64]) -> Result<NormalityResult, String> {
925    let n = data.len();
926    if n < 3 { return Err("jarque_bera: need at least 3 observations".into()); }
927    let nf = n as f64;
928
929    // Mean
930    let mut acc = KahanAccumulatorF64::new();
931    for &x in data { acc.add(x); }
932    let mean = acc.finalize() / nf;
933
934    // Central moments via Kahan
935    let mut m2_acc = KahanAccumulatorF64::new();
936    let mut m3_acc = KahanAccumulatorF64::new();
937    let mut m4_acc = KahanAccumulatorF64::new();
938    for &x in data {
939        let d = x - mean;
940        let d2 = d * d;
941        m2_acc.add(d2);
942        m3_acc.add(d2 * d);
943        m4_acc.add(d2 * d2);
944    }
945    let m2 = m2_acc.finalize() / nf;
946    let m3 = m3_acc.finalize() / nf;
947    let m4 = m4_acc.finalize() / nf;
948
949    if m2 == 0.0 { return Err("jarque_bera: zero variance".into()); }
950
951    let skewness = m3 / m2.powf(1.5);
952    let kurtosis = m4 / (m2 * m2);
953
954    let jb = (nf / 6.0) * (skewness * skewness + (kurtosis - 3.0).powi(2) / 4.0);
955
956    // p-value from chi-squared distribution with 2 degrees of freedom
957    // P(X > jb) = exp(-jb/2) for chi2(2)
958    let p_value = (-jb / 2.0).exp();
959
960    Ok(NormalityResult { statistic: jb, p_value })
961}
962
963/// Anderson-Darling test for normality.
964/// Compares the empirical CDF to a normal CDF.
965pub fn anderson_darling(data: &[f64]) -> Result<NormalityResult, String> {
966    let n = data.len();
967    if n < 8 { return Err("anderson_darling: need at least 8 observations".into()); }
968    let nf = n as f64;
969
970    // Mean and std
971    let mut acc = KahanAccumulatorF64::new();
972    for &x in data { acc.add(x); }
973    let mean = acc.finalize() / nf;
974    let mut var_acc = KahanAccumulatorF64::new();
975    for &x in data { let d = x - mean; var_acc.add(d * d); }
976    let std = (var_acc.finalize() / (nf - 1.0)).sqrt();
977    if std == 0.0 { return Err("anderson_darling: zero standard deviation".into()); }
978
979    // Sort and standardize
980    let mut sorted = data.to_vec();
981    sorted.sort_by(|a, b| a.total_cmp(b));
982    let z: Vec<f64> = sorted.iter().map(|&x| (x - mean) / std).collect();
983
984    // A² statistic
985    let mut a2_acc = KahanAccumulatorF64::new();
986    for i in 0..n {
987        let phi_zi = normal_cdf(z[i]);
988        let phi_zn = normal_cdf(z[n - 1 - i]);
989        // Clamp to avoid log(0)
990        let p1 = phi_zi.max(1e-15).min(1.0 - 1e-15);
991        let p2 = phi_zn.max(1e-15).min(1.0 - 1e-15);
992        let term = (2.0 * (i as f64) + 1.0) * (p1.ln() + (1.0 - p2).ln());
993        a2_acc.add(term);
994    }
995    let a2 = -nf - a2_acc.finalize() / nf;
996
997    // Adjusted statistic
998    let a2_star = a2 * (1.0 + 0.75 / nf + 2.25 / (nf * nf));
999
1000    // Approximate p-value using D'Agostino & Stephens (1986) table
1001    let p_value = if a2_star >= 1.0359 { 0.0 }      // < 0.005
1002        else if a2_star >= 0.8737 { 0.01 }
1003        else if a2_star >= 0.6305 { 0.025 }
1004        else if a2_star >= 0.5091 { 0.05 }
1005        else if a2_star >= 0.3565 { 0.10 }
1006        else if a2_star >= 0.2006 { 0.25 }
1007        else { 0.50 };  // > 0.25
1008
1009    Ok(NormalityResult { statistic: a2_star, p_value })
1010}
1011
1012// normal_cdf is already imported from crate::distributions at the top of this file.
1013
1014/// Kolmogorov-Smirnov one-sample test for normality.
1015/// Compares empirical distribution to a standard normal.
1016pub fn ks_test_normal(data: &[f64]) -> Result<NormalityResult, String> {
1017    let n = data.len();
1018    if n < 5 { return Err("ks_test: need at least 5 observations".into()); }
1019    let nf = n as f64;
1020
1021    // Standardize
1022    let mut acc = KahanAccumulatorF64::new();
1023    for &x in data { acc.add(x); }
1024    let mean = acc.finalize() / nf;
1025    let mut var_acc = KahanAccumulatorF64::new();
1026    for &x in data { let d = x - mean; var_acc.add(d * d); }
1027    let std = (var_acc.finalize() / (nf - 1.0)).sqrt();
1028    if std == 0.0 { return Err("ks_test: zero standard deviation".into()); }
1029
1030    let mut sorted = data.to_vec();
1031    sorted.sort_by(|a, b| a.total_cmp(b));
1032
1033    let mut d_max = 0.0f64;
1034    for i in 0..n {
1035        let z = (sorted[i] - mean) / std;
1036        let f_z = normal_cdf(z);
1037        let d_plus = ((i + 1) as f64 / nf - f_z).abs();
1038        let d_minus = (f_z - i as f64 / nf).abs();
1039        d_max = d_max.max(d_plus).max(d_minus);
1040    }
1041
1042    // Approximate p-value using Kolmogorov distribution
1043    // P(D > d) ≈ 2 * sum_{k=1}^{inf} (-1)^{k+1} * exp(-2k²n*d²)
1044    let nd2 = nf * d_max * d_max;
1045    let mut p_value = 0.0;
1046    for k in 1..=100 {
1047        let kf = k as f64;
1048        let sign = if k % 2 == 1 { 1.0 } else { -1.0 };
1049        let term = sign * (-2.0 * kf * kf * nd2).exp();
1050        p_value += term;
1051        if term.abs() < 1e-15 { break; }
1052    }
1053    let p_value = (2.0 * p_value).max(0.0).min(1.0);
1054
1055    Ok(NormalityResult { statistic: d_max, p_value })
1056}
1057
1058// ---------------------------------------------------------------------------
1059// Effect Sizes
1060// ---------------------------------------------------------------------------
1061
1062/// Cohen's d: standardized difference between two group means.
1063pub fn cohens_d(x: &[f64], y: &[f64]) -> Result<f64, String> {
1064    if x.len() < 2 || y.len() < 2 {
1065        return Err("cohens_d: need at least 2 observations per group".into());
1066    }
1067
1068    let mean_x = kahan_mean(x);
1069    let mean_y = kahan_mean(y);
1070    let var_x = kahan_var(x, mean_x);
1071    let var_y = kahan_var(y, mean_y);
1072
1073    let nx = x.len() as f64;
1074    let ny = y.len() as f64;
1075
1076    // Pooled standard deviation
1077    let sp = (((nx - 1.0) * var_x + (ny - 1.0) * var_y) / (nx + ny - 2.0)).sqrt();
1078    if sp == 0.0 { return Ok(0.0); }
1079
1080    Ok((mean_x - mean_y) / sp)
1081}
1082
1083/// Eta-squared: proportion of variance explained by group membership.
1084/// Input: array of groups (each is a slice of f64).
1085pub fn eta_squared(groups: &[&[f64]]) -> Result<f64, String> {
1086    if groups.len() < 2 { return Err("eta_squared: need at least 2 groups".into()); }
1087
1088    let mut grand_acc = KahanAccumulatorF64::new();
1089    let mut total_n = 0usize;
1090    for g in groups {
1091        for &x in *g { grand_acc.add(x); total_n += 1; }
1092    }
1093    if total_n == 0 { return Err("eta_squared: no observations".into()); }
1094    let grand_mean = grand_acc.finalize() / total_n as f64;
1095
1096    // SS_between and SS_total
1097    let mut ss_between = KahanAccumulatorF64::new();
1098    let mut ss_total = KahanAccumulatorF64::new();
1099    for g in groups {
1100        let gm = kahan_mean(g);
1101        let ni = g.len() as f64;
1102        ss_between.add(ni * (gm - grand_mean).powi(2));
1103        for &x in *g {
1104            ss_total.add((x - grand_mean).powi(2));
1105        }
1106    }
1107
1108    let ss_t = ss_total.finalize();
1109    if ss_t == 0.0 { return Ok(0.0); }
1110
1111    Ok(ss_between.finalize() / ss_t)
1112}
1113
1114/// Cramér's V: association between two categorical variables.
1115/// Input: contingency table as flat row-major array with dimensions r x c.
1116pub fn cramers_v(table: &[f64], nrows: usize, ncols: usize) -> Result<f64, String> {
1117    if table.len() != nrows * ncols {
1118        return Err(format!("cramers_v: table size {} != {}x{}", table.len(), nrows, ncols));
1119    }
1120    if nrows < 2 || ncols < 2 {
1121        return Err("cramers_v: need at least 2x2 table".into());
1122    }
1123
1124    // Row sums, col sums, total
1125    let mut row_sums = vec![0.0; nrows];
1126    let mut col_sums = vec![0.0; ncols];
1127    let mut total = 0.0;
1128    for r in 0..nrows {
1129        for c in 0..ncols {
1130            let v = table[r * ncols + c];
1131            row_sums[r] += v;
1132            col_sums[c] += v;
1133            total += v;
1134        }
1135    }
1136    if total == 0.0 { return Err("cramers_v: empty table".into()); }
1137
1138    // Chi-squared statistic
1139    let mut chi2 = KahanAccumulatorF64::new();
1140    for r in 0..nrows {
1141        for c in 0..ncols {
1142            let expected = row_sums[r] * col_sums[c] / total;
1143            if expected > 0.0 {
1144                let diff = table[r * ncols + c] - expected;
1145                chi2.add(diff * diff / expected);
1146            }
1147        }
1148    }
1149
1150    let k = (nrows.min(ncols) - 1) as f64;
1151    if k == 0.0 { return Ok(0.0); }
1152
1153    Ok((chi2.finalize() / (total * k)).sqrt())
1154}
1155
1156/// Levene's test for equality of variances across groups.
1157pub fn levene_test(groups: &[&[f64]]) -> Result<(f64, f64), String> {
1158    if groups.len() < 2 { return Err("levene_test: need at least 2 groups".into()); }
1159    let k = groups.len();
1160    let mut total_n = 0usize;
1161
1162    // Compute |x_ij - median_i| for each group
1163    let mut z_groups: Vec<Vec<f64>> = Vec::with_capacity(k);
1164    for g in groups {
1165        if g.is_empty() { return Err("levene_test: empty group".into()); }
1166        let mut sorted = g.to_vec();
1167        sorted.sort_by(|a, b| a.total_cmp(b));
1168        let med = if sorted.len() % 2 == 0 {
1169            (sorted[sorted.len()/2 - 1] + sorted[sorted.len()/2]) / 2.0
1170        } else {
1171            sorted[sorted.len()/2]
1172        };
1173        let z: Vec<f64> = g.iter().map(|&x| (x - med).abs()).collect();
1174        total_n += z.len();
1175        z_groups.push(z);
1176    }
1177
1178    // Now do one-way ANOVA on the z values
1179    let z_refs: Vec<&[f64]> = z_groups.iter().map(|v| v.as_slice()).collect();
1180    let anova = anova_oneway(&z_refs)?;
1181    Ok((anova.f_statistic, anova.p_value))
1182}
1183
1184/// Bartlett's test for equality of variances.
1185pub fn bartlett_test(groups: &[&[f64]]) -> Result<(f64, f64), String> {
1186    if groups.len() < 2 { return Err("bartlett_test: need at least 2 groups".into()); }
1187    let k = groups.len();
1188
1189    let mut vars = Vec::with_capacity(k);
1190    let mut ns = Vec::with_capacity(k);
1191    let mut total_n = 0usize;
1192
1193    for g in groups {
1194        let n = g.len();
1195        if n < 2 { return Err("bartlett_test: each group needs at least 2 observations".into()); }
1196        let m = kahan_mean(g);
1197        let v = kahan_var(g, m);
1198        vars.push(v);
1199        ns.push(n);
1200        total_n += n;
1201    }
1202
1203    let nk = total_n - k; // total df
1204    let nkf = nk as f64;
1205
1206    // Pooled variance
1207    let mut sp2_acc = KahanAccumulatorF64::new();
1208    for i in 0..k {
1209        sp2_acc.add((ns[i] as f64 - 1.0) * vars[i]);
1210    }
1211    let sp2 = sp2_acc.finalize() / nkf;
1212    if sp2 == 0.0 { return Err("bartlett_test: zero pooled variance".into()); }
1213
1214    // Bartlett statistic
1215    let mut num_acc = KahanAccumulatorF64::new();
1216    let mut denom_acc = KahanAccumulatorF64::new();
1217    for i in 0..k {
1218        let ni_m1 = ns[i] as f64 - 1.0;
1219        num_acc.add(ni_m1 * (vars[i] / sp2).max(1e-300).ln());
1220        denom_acc.add(1.0 / ni_m1);
1221    }
1222    let t = nkf * sp2.ln() - num_acc.finalize();
1223    let c = 1.0 + (1.0 / (3.0 * (k as f64 - 1.0))) * (denom_acc.finalize() - 1.0 / nkf);
1224    let bartlett = t / c;
1225
1226    // p-value from chi2 with k-1 df (approximate using gamma)
1227    let df = (k - 1) as f64;
1228    let p_value = chi2_survival(bartlett, df);
1229
1230    Ok((bartlett, p_value))
1231}
1232
1233// Helper: chi-squared survival function P(X > x) for given df.
1234// Uses the regularized incomplete gamma function approximation.
1235fn chi2_survival(x: f64, df: f64) -> f64 {
1236    if x <= 0.0 { return 1.0; }
1237    // For integer df, use series expansion of lower incomplete gamma
1238    let a = df / 2.0;
1239    let z = x / 2.0;
1240    // P(X <= x) = regularized_gamma_p(a, z)
1241    // P(X > x) = 1 - P(X <= x)
1242    1.0 - regularized_gamma_p(a, z)
1243}
1244
1245fn regularized_gamma_p(a: f64, x: f64) -> f64 {
1246    if x < a + 1.0 {
1247        // Series expansion
1248        gamma_series(a, x)
1249    } else {
1250        // Continued fraction
1251        1.0 - gamma_cf(a, x)
1252    }
1253}
1254
1255fn gamma_series(a: f64, x: f64) -> f64 {
1256    let ln_gamma_a = ln_gamma(a);
1257    let mut sum = 1.0 / a;
1258    let mut term = 1.0 / a;
1259    for n in 1..200 {
1260        term *= x / (a + n as f64);
1261        sum += term;
1262        if term.abs() < sum.abs() * 1e-15 { break; }
1263    }
1264    sum * (-x + a * x.ln() - ln_gamma_a).exp()
1265}
1266
1267fn gamma_cf(a: f64, x: f64) -> f64 {
1268    let ln_gamma_a = ln_gamma(a);
1269    let mut f = 1e-30;
1270    let mut c = 1e-30;
1271    let mut d = 1.0 / (x + 1.0 - a);
1272    f = d;
1273    for n in 1..200 {
1274        let an = -(n as f64) * (n as f64 - a);
1275        let bn = x + 2.0 * n as f64 + 1.0 - a;
1276        d = bn + an * d;
1277        if d.abs() < 1e-30 { d = 1e-30; }
1278        c = bn + an / c;
1279        if c.abs() < 1e-30 { c = 1e-30; }
1280        d = 1.0 / d;
1281        let delta = d * c;
1282        f *= delta;
1283        if (delta - 1.0).abs() < 1e-15 { break; }
1284    }
1285    f * (-x + a * x.ln() - ln_gamma_a).exp()
1286}
1287
1288/// Stirling's approximation for ln(Gamma(x)).
1289fn ln_gamma(x: f64) -> f64 {
1290    // Lanczos approximation (g=7, n=9)
1291    let coeffs = [
1292        0.99999999999980993,
1293        676.5203681218851,
1294        -1259.1392167224028,
1295        771.32342877765313,
1296        -176.61502916214059,
1297        12.507343278686905,
1298        -0.13857109526572012,
1299        9.9843695780195716e-6,
1300        1.5056327351493116e-7,
1301    ];
1302    if x < 0.5 {
1303        let s = std::f64::consts::PI / (std::f64::consts::PI * x).sin();
1304        return s.abs().ln() - ln_gamma(1.0 - x);
1305    }
1306    let x = x - 1.0;
1307    let mut ag = coeffs[0];
1308    for i in 1..9 {
1309        ag += coeffs[i] / (x + i as f64);
1310    }
1311    let t = x + 7.5;
1312    0.5 * (2.0 * std::f64::consts::PI).ln() + (x + 0.5) * t.ln() - t + ag.ln()
1313}
1314
1315// Kahan-mean helper (used by effect size functions).
1316fn kahan_mean(data: &[f64]) -> f64 {
1317    let mut acc = KahanAccumulatorF64::new();
1318    for &x in data { acc.add(x); }
1319    acc.finalize() / data.len() as f64
1320}
1321
1322// Kahan-variance helper (given mean).
1323fn kahan_var(data: &[f64], mean: f64) -> f64 {
1324    let mut acc = KahanAccumulatorF64::new();
1325    for &x in data { let d = x - mean; acc.add(d * d); }
1326    acc.finalize() / (data.len() as f64 - 1.0)
1327}
1328
1329// ---------------------------------------------------------------------------
1330// Tests
1331// ---------------------------------------------------------------------------
1332
1333#[cfg(test)]
1334mod tests {
1335    use super::*;
1336
1337    #[test]
1338    fn test_t_test_known_mean() {
1339        let data = [5.1, 4.9, 5.0, 5.2, 4.8, 5.0, 5.1, 4.9];
1340        let r = t_test(&data, 5.0).unwrap();
1341        // mean is very close to 5.0, so p should be large (non-significant)
1342        assert!(r.p_value > 0.05, "p = {}", r.p_value);
1343    }
1344
1345    #[test]
1346    fn test_t_test_shifted() {
1347        // data clearly different from 0
1348        let data = [10.1, 10.2, 10.0, 9.9, 10.3, 10.1, 10.0, 10.2];
1349        let r = t_test(&data, 0.0).unwrap();
1350        assert!(r.p_value < 0.001, "p = {}", r.p_value);
1351    }
1352
1353    #[test]
1354    fn test_t_test_two_sample() {
1355        let x = [10.0, 11.0, 12.0, 13.0, 14.0];
1356        let y = [20.0, 21.0, 22.0, 23.0, 24.0];
1357        let r = t_test_two_sample(&x, &y).unwrap();
1358        assert!(r.p_value < 0.001);
1359    }
1360
1361    #[test]
1362    fn test_chi_squared_uniform() {
1363        let observed = [20.0, 20.0, 20.0, 20.0, 20.0];
1364        let expected = [20.0, 20.0, 20.0, 20.0, 20.0];
1365        let r = chi_squared_test(&observed, &expected).unwrap();
1366        assert_eq!(r.chi2, 0.0);
1367        assert!((r.p_value - 1.0).abs() < 1e-6);
1368    }
1369
1370    #[test]
1371    fn test_anova_equal_groups() {
1372        let g1 = [5.0, 5.1, 4.9, 5.0, 5.2];
1373        let g2 = [5.0, 4.8, 5.1, 5.0, 4.9];
1374        let g3 = [5.1, 5.0, 4.9, 5.0, 5.1];
1375        let r = anova_oneway(&[&g1, &g2, &g3]).unwrap();
1376        // Groups with similar means → non-significant
1377        assert!(r.p_value > 0.05, "p = {}", r.p_value);
1378    }
1379
1380    #[test]
1381    fn test_anova_different_groups() {
1382        let g1 = [1.0, 2.0, 3.0, 2.0, 1.0];
1383        let g2 = [10.0, 11.0, 12.0, 11.0, 10.0];
1384        let g3 = [20.0, 21.0, 22.0, 21.0, 20.0];
1385        let r = anova_oneway(&[&g1, &g2, &g3]).unwrap();
1386        assert!(r.p_value < 0.001);
1387    }
1388
1389    #[test]
1390    fn test_lm_simple() {
1391        // y = 2*x + 1
1392        let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1393        let y = [3.0, 5.0, 7.0, 9.0, 11.0];
1394        let r = lm(&x, &y, 5, 1).unwrap();
1395        // intercept ≈ 1, slope ≈ 2
1396        assert!((r.coefficients[0] - 1.0).abs() < 1e-10, "intercept = {}", r.coefficients[0]);
1397        assert!((r.coefficients[1] - 2.0).abs() < 1e-10, "slope = {}", r.coefficients[1]);
1398        assert!((r.r_squared - 1.0).abs() < 1e-10);
1399    }
1400
1401    #[test]
1402    fn test_determinism() {
1403        let data = [1.0, 2.0, 3.0, 4.0, 5.0];
1404        let r1 = t_test(&data, 3.0).unwrap();
1405        let r2 = t_test(&data, 3.0).unwrap();
1406        assert_eq!(r1.t_statistic.to_bits(), r2.t_statistic.to_bits());
1407        assert_eq!(r1.p_value.to_bits(), r2.p_value.to_bits());
1408    }
1409
1410    #[test]
1411    fn test_wls_uniform_weights() {
1412        // WLS with uniform weights should match OLS
1413        let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1414        let y: Vec<f64> = x.iter().map(|&xi| 1.0 + 2.0 * xi).collect();
1415        let w = vec![1.0; 5];
1416        let r = wls(&x, &y, &w, 5, 1).unwrap();
1417        assert!((r.coefficients[0] - 1.0).abs() < 1e-8, "intercept = {}", r.coefficients[0]);
1418        assert!((r.coefficients[1] - 2.0).abs() < 1e-8, "slope = {}", r.coefficients[1]);
1419    }
1420
1421    // --- B7: Non-parametric tests ---
1422
1423    #[test]
1424    fn test_mann_whitney_identical() {
1425        let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1426        let y = [1.0, 2.0, 3.0, 4.0, 5.0];
1427        let r = mann_whitney(&x, &y).unwrap();
1428        assert!(r.p_value > 0.05, "p = {}", r.p_value);
1429    }
1430
1431    #[test]
1432    fn test_mann_whitney_separated() {
1433        let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1434        let y = [10.0, 11.0, 12.0, 13.0, 14.0];
1435        let r = mann_whitney(&x, &y).unwrap();
1436        assert!(r.p_value < 0.05, "p = {}", r.p_value);
1437    }
1438
1439    #[test]
1440    fn test_kruskal_wallis_identical() {
1441        let g1 = [1.0, 2.0, 3.0, 4.0, 5.0];
1442        let g2 = [1.0, 2.0, 3.0, 4.0, 5.0];
1443        let r = kruskal_wallis(&[&g1, &g2]).unwrap();
1444        assert!(r.p_value > 0.05, "p = {}", r.p_value);
1445    }
1446
1447    #[test]
1448    fn test_kruskal_wallis_different() {
1449        let g1 = [1.0, 2.0, 3.0];
1450        let g2 = [10.0, 11.0, 12.0];
1451        let g3 = [20.0, 21.0, 22.0];
1452        let r = kruskal_wallis(&[&g1, &g2, &g3]).unwrap();
1453        assert!(r.p_value < 0.05, "p = {}", r.p_value);
1454    }
1455
1456    #[test]
1457    fn test_wilcoxon_no_difference() {
1458        let x = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
1459        let y = [1.1, 1.9, 3.1, 3.9, 5.1, 5.9, 7.1, 7.9]; // very close
1460        let r = wilcoxon_signed_rank(&x, &y).unwrap();
1461        assert!(r.p_value > 0.05, "p = {}", r.p_value);
1462    }
1463
1464    #[test]
1465    fn test_wilcoxon_clear_shift() {
1466        let x = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
1467        let y: Vec<f64> = x.iter().map(|&v| v + 5.0).collect();
1468        let r = wilcoxon_signed_rank(&x, &y).unwrap();
1469        assert!(r.p_value < 0.05, "p = {}", r.p_value);
1470    }
1471
1472    #[test]
1473    fn test_bonferroni_basic() {
1474        let pvals = [0.01, 0.04, 0.5];
1475        let adj = bonferroni(&pvals);
1476        assert!((adj[0] - 0.03).abs() < 1e-12);
1477        assert!((adj[1] - 0.12).abs() < 1e-12);
1478        assert!((adj[2] - 1.0).abs() < 1e-12);
1479    }
1480
1481    #[test]
1482    fn test_fdr_bh_known() {
1483        let pvals = [0.01, 0.04, 0.03, 0.5];
1484        let adj = fdr_bh(&pvals);
1485        // Sorted: 0.01(idx0), 0.03(idx2), 0.04(idx1), 0.5(idx3)
1486        // Rank 1: 0.01*4/1 = 0.04
1487        // Rank 2: 0.03*4/2 = 0.06
1488        // Rank 3: 0.04*4/3 ≈ 0.0533
1489        // Rank 4: 0.5*4/4 = 0.5
1490        // Monotonicity: 0.04, 0.0533, 0.0533, 0.5
1491        assert!(adj[0] < 0.05, "adj[0] = {}", adj[0]);
1492        assert!(adj[2] < 0.07, "adj[2] = {}", adj[2]);
1493    }
1494
1495    #[test]
1496    fn test_fdr_bh_preserves_order() {
1497        let pvals = [0.5, 0.01, 0.3];
1498        let adj = fdr_bh(&pvals);
1499        // Original p[0]=0.5 should still have the largest adjusted
1500        assert!(adj[1] < adj[0], "adj[0]={}, adj[1]={}", adj[0], adj[1]);
1501    }
1502
1503    #[test]
1504    fn test_tukey_hsd_all_same() {
1505        let g1 = [5.0, 5.0, 5.0, 5.0, 5.0];
1506        let g2 = [5.0, 5.0, 5.0, 5.0, 5.0];
1507        let g3 = [5.0, 5.0, 5.0, 5.0, 5.0];
1508        let results = tukey_hsd(&[&g1, &g2, &g3]).unwrap();
1509        for pair in &results {
1510            assert!(pair.mean_diff.abs() < 1e-12);
1511        }
1512    }
1513
1514    #[test]
1515    fn test_tukey_hsd_one_different() {
1516        let g1 = [1.0, 2.0, 3.0, 4.0, 5.0];
1517        let g2 = [2.0, 3.0, 4.0, 5.0, 6.0];
1518        let g3 = [20.0, 21.0, 22.0, 23.0, 24.0];
1519        let results = tukey_hsd(&[&g1, &g2, &g3]).unwrap();
1520        // g3 comparisons should be significant
1521        let g3_pairs: Vec<_> = results.iter().filter(|p| p.group_i == 2 || p.group_j == 2).collect();
1522        for pair in &g3_pairs {
1523            assert!(pair.p_value < 0.05, "p = {} for ({}, {})", pair.p_value, pair.group_i, pair.group_j);
1524        }
1525    }
1526
1527    #[test]
1528    fn test_logistic_convergence() {
1529        // Well-separated data should converge
1530        let x = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
1531        let y = [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0];
1532        let r = logistic_regression(&x, &y, 10, 1).unwrap();
1533        assert!(r.iterations <= 100, "iterations = {}", r.iterations);
1534        // Coefficient for x should be positive (higher x → more likely y=1)
1535        assert!(r.coefficients[1] > 0.0, "beta_1 = {}", r.coefficients[1]);
1536    }
1537
1538    #[test]
1539    fn test_b7_determinism() {
1540        let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1541        let y = [6.0, 7.0, 8.0, 9.0, 10.0];
1542        let r1 = mann_whitney(&x, &y).unwrap();
1543        let r2 = mann_whitney(&x, &y).unwrap();
1544        assert_eq!(r1.u_statistic.to_bits(), r2.u_statistic.to_bits());
1545        assert_eq!(r1.p_value.to_bits(), r2.p_value.to_bits());
1546    }
1547}
cjc_runtime/hypothesis.rs

cjc_runtime/
hypothesis.rs