Skip to main content

limma/
diffsplice.rs

1//! Differential exon usage. Port of limma's `diffSplice` and `topSplice`
2//! (`diffSplice.R`, `topSplice.R`).
3//!
4//! `diff_splice` takes an exon-level [`MArrayLM`] fit (one row per exon) plus a
5//! gene-id label per exon, and tests whether each exon's log-fold-change departs
6//! from its gene's average — i.e. whether exon usage changes between conditions.
7//! It returns exon-level moderated t-statistics and gene-level F / Simes /
8//! Bonferroni summaries. `top_splice` collates a single coefficient's results
9//! into a ranked table, mirroring `topSplice`.
10//!
11//! The annotation-frame plumbing of R's `diffSplice` (carrying through arbitrary
12//! `genes` columns and detecting which are gene-level) is out of scope for this
13//! numeric port: gene and exon identity are carried as plain string labels.
14
15use anyhow::{bail, Result};
16use ndarray::{Array1, Array2};
17
18use crate::ebayes::squeeze_var_legacy;
19use crate::fit::MArrayLM;
20use crate::special::{f_sf, t_sf};
21use crate::toptable::p_adjust_bh;
22
23/// Result of [`diff_splice`]. Exon-level fields have one row per *kept* exon
24/// (exons belonging to genes with at least two exons), in gene-sorted order;
25/// gene-level fields have one row per kept gene.
26#[derive(Clone, Debug)]
27pub struct DiffSplice {
28    pub coef_names: Vec<String>,
29
30    // --- Exon level (kept exons, gene-sorted) ---
31    /// Exon identifier per kept exon (defaults to the 1-based original row index).
32    pub exon_id: Vec<String>,
33    /// Gene identifier per kept exon.
34    pub exon_geneid: Vec<String>,
35    /// 0-based index into the kept-gene arrays for each kept exon.
36    pub exon_gene_index: Vec<usize>,
37    /// Leverage-adjusted exon log-fold-changes (relative to the gene average).
38    pub coefficients: Array2<f64>,
39    /// Exon moderated t-statistics.
40    pub t: Array2<f64>,
41    /// Two-sided exon p-values.
42    pub p_value: Array2<f64>,
43
44    // --- Gene level (kept genes) ---
45    pub gene_id: Vec<String>,
46    pub gene_nexons: Vec<usize>,
47    pub gene_df_prior: Array1<f64>,
48    pub gene_df_residual: Array1<f64>,
49    pub gene_df_total: Array1<f64>,
50    pub gene_s2: Array1<f64>,
51    pub gene_s2_post: Array1<f64>,
52    /// Gene-level F-statistics (one column per coefficient).
53    pub gene_f: Array2<f64>,
54    pub gene_f_p_value: Array2<f64>,
55    pub gene_simes_p_value: Array2<f64>,
56    pub gene_bonferroni_p_value: Array2<f64>,
57    /// 0-based first/last (inclusive) kept-exon row index for each kept gene.
58    pub gene_firstexon: Vec<usize>,
59    pub gene_lastexon: Vec<usize>,
60}
61
62/// `diffSplice` — test for differential exon usage from an exon-level fit.
63///
64/// * `fit` — an [`MArrayLM`] with one row per exon (from `lmFit` on exon data).
65/// * `geneid` — gene label for each exon (length = `fit` rows).
66/// * `exonid` — optional exon label per exon; defaults to 1-based row indices.
67/// * `robust` / `legacy` — forwarded to `squeezeVar` (R defaults: `FALSE`).
68pub fn diff_splice(
69    fit: &MArrayLM,
70    geneid: &[String],
71    exonid: Option<&[String]>,
72    robust: bool,
73    legacy: bool,
74) -> Result<DiffSplice> {
75    let n_exons = fit.coefficients.nrows();
76    let n_coef = fit.coefficients.ncols();
77    if geneid.len() != n_exons {
78        bail!(
79            "geneid has length {} but fit has {} exons",
80            geneid.len(),
81            n_exons
82        );
83    }
84    if let Some(eid) = exonid {
85        if eid.len() != n_exons {
86            bail!(
87                "exonid has length {} but fit has {} exons",
88                eid.len(),
89                n_exons
90            );
91        }
92    }
93
94    // Exon labels default to the 1-based original row index (R: ExonID=1:nrow).
95    let exon_label: Vec<String> = match exonid {
96        Some(eid) => eid.to_vec(),
97        None => (1..=n_exons).map(|i| i.to_string()).collect(),
98    };
99
100    // Sort exons by gene (then exon label), stably — matching R's stable radix
101    // `order(geneid)` / `order(geneid, exonid)`.
102    let mut order: Vec<usize> = (0..n_exons).collect();
103    match exonid {
104        Some(eid) => {
105            order.sort_by(|&a, &b| geneid[a].cmp(&geneid[b]).then_with(|| eid[a].cmp(&eid[b])))
106        }
107        None => order.sort_by(|&a, &b| geneid[a].cmp(&geneid[b])),
108    }
109
110    let gid: Vec<&str> = order.iter().map(|&i| geneid[i].as_str()).collect();
111    let elabel: Vec<String> = order.iter().map(|&i| exon_label[i].clone()).collect();
112
113    let mut exon_s2: Vec<f64> = order.iter().map(|&i| fit.sigma[i].powi(2)).collect();
114    let exon_dfres: Vec<f64> = order.iter().map(|&i| fit.df_residual[i]).collect();
115    // Zero residual variance where residual df vanishes (R: exon.s2[df<1e-6] <- 0).
116    if exon_dfres.iter().cloned().fold(f64::INFINITY, f64::min) < 1e-6 {
117        for i in 0..n_exons {
118            if exon_dfres[i] < 1e-6 {
119                exon_s2[i] = 0.0;
120            }
121        }
122    }
123
124    // Contiguous gene groups (sorted order => first-appearance == sorted order,
125    // matching rowsum(..., reorder=FALSE)).
126    let mut group_start: Vec<usize> = Vec::new();
127    let mut group_len: Vec<usize> = Vec::new();
128    let mut group_id: Vec<&str> = Vec::new();
129    let mut i = 0;
130    while i < n_exons {
131        let g = gid[i];
132        let start = i;
133        while i < n_exons && gid[i] == g {
134            i += 1;
135        }
136        group_id.push(g);
137        group_start.push(start);
138        group_len.push(i - start);
139    }
140    let n_genes_all = group_id.len();
141
142    // Gene-wise exon counts, residual df and pooled residual variances.
143    let gene_dfres_all: Vec<f64> = (0..n_genes_all)
144        .map(|gi| {
145            let (s, l) = (group_start[gi], group_len[gi]);
146            (s..s + l).map(|k| exon_dfres[k]).sum()
147        })
148        .collect();
149    let gene_s2_all: Vec<f64> = (0..n_genes_all)
150        .map(|gi| {
151            let (s, l) = (group_start[gi], group_len[gi]);
152            let num: f64 = (s..s + l).map(|k| exon_dfres[k] * exon_s2[k]).sum();
153            let den: f64 = (s..s + l).map(|k| exon_dfres[k]).sum();
154            num / den
155        })
156        .collect();
157
158    // Empirical-Bayes posterior gene variances (legacy=FALSE forces unequal-df).
159    let squeeze = squeeze_var_legacy(
160        &Array1::from(gene_s2_all.clone()),
161        &Array1::from(gene_dfres_all.clone()),
162        None,
163        robust,
164        Some(legacy),
165    )?;
166
167    // Keep genes with more than one exon.
168    let kept_idx: Vec<usize> = (0..n_genes_all).filter(|&gi| group_len[gi] > 1).collect();
169    let ngenes = kept_idx.len();
170    if ngenes == 0 {
171        bail!("no genes with more than one exon");
172    }
173
174    let gene_nexons: Vec<usize> = kept_idx.iter().map(|&gi| group_len[gi]).collect();
175    let gene_df_test: Vec<f64> = gene_nexons.iter().map(|&n| (n - 1) as f64).collect();
176    let gene_df_residual: Vec<f64> = kept_idx.iter().map(|&gi| gene_dfres_all[gi]).collect();
177    let gene_s2_kept: Vec<f64> = kept_idx.iter().map(|&gi| gene_s2_all[gi]).collect();
178    let gene_s2_post: Vec<f64> = kept_idx.iter().map(|&gi| squeeze.var_post[gi]).collect();
179    let df_prior_kept: Vec<f64> = if squeeze.df_prior.len() > 1 {
180        kept_idx.iter().map(|&gi| squeeze.df_prior[gi]).collect()
181    } else {
182        vec![squeeze.df_prior[0]; ngenes]
183    };
184    // df.total = df.residual + df.prior, capped at the total kept residual df.
185    let sum_dfres_kept: f64 = gene_df_residual.iter().sum();
186    let gene_df_total: Vec<f64> = (0..ngenes)
187        .map(|gi| (gene_df_residual[gi] + df_prior_kept[gi]).min(sum_dfres_kept))
188        .collect();
189
190    // Kept exons + their kept-gene index (contiguous per gene).
191    let mut kept_rows: Vec<usize> = Vec::new();
192    let mut g: Vec<usize> = Vec::new();
193    let mut gene_firstexon: Vec<usize> = Vec::with_capacity(ngenes);
194    let mut gene_lastexon: Vec<usize> = Vec::with_capacity(ngenes);
195    for (new_gi, &gi) in kept_idx.iter().enumerate() {
196        let (s, l) = (group_start[gi], group_len[gi]);
197        gene_firstexon.push(kept_rows.len());
198        for k in s..s + l {
199            kept_rows.push(k);
200            g.push(new_gi);
201        }
202        gene_lastexon.push(kept_rows.len() - 1);
203    }
204    let n_kept = kept_rows.len();
205
206    let mut ecoef = Array2::<f64>::zeros((n_kept, n_coef));
207    let mut esdu = Array2::<f64>::zeros((n_kept, n_coef));
208    for (r, &k) in kept_rows.iter().enumerate() {
209        // Gather the kept exon's row straight from the source matrices — no
210        // per-row temporary Vec.
211        let orig = order[k];
212        for c in 0..n_coef {
213            ecoef[[r, c]] = fit.coefficients[[orig, c]];
214            esdu[[r, c]] = fit.stdev_unscaled[[orig, c]];
215        }
216    }
217    let exon_geneid: Vec<String> = kept_rows.iter().map(|&k| gid[k].to_string()).collect();
218    let exon_id_kept: Vec<String> = kept_rows.iter().map(|&k| elabel[k].clone()).collect();
219
220    // Inverse-variance weights and per-gene weighted mean coefficient.
221    let mut u2 = Array2::<f64>::zeros((n_kept, n_coef));
222    let mut u2_rowsum = Array2::<f64>::zeros((ngenes, n_coef));
223    let mut cu2_rowsum = Array2::<f64>::zeros((ngenes, n_coef));
224    for r in 0..n_kept {
225        let gg = g[r];
226        for c in 0..n_coef {
227            let w = 1.0 / (esdu[[r, c]] * esdu[[r, c]]);
228            u2[[r, c]] = w;
229            u2_rowsum[[gg, c]] += w;
230            cu2_rowsum[[gg, c]] += ecoef[[r, c]] * w;
231        }
232    }
233
234    // Center exon coefficients on the gene mean; moderated t before leverage.
235    // Every cell of `exon_coef` is assigned in the loop below, so start from
236    // zeros rather than cloning `ecoef`.
237    let mut exon_coef = Array2::<f64>::zeros((n_kept, n_coef));
238    let mut exon_t = Array2::<f64>::zeros((n_kept, n_coef));
239    for r in 0..n_kept {
240        let gg = g[r];
241        let sp = gene_s2_post[gg].sqrt();
242        for c in 0..n_coef {
243            let centered = ecoef[[r, c]] - cu2_rowsum[[gg, c]] / u2_rowsum[[gg, c]];
244            exon_coef[[r, c]] = centered;
245            exon_t[[r, c]] = centered / esdu[[r, c]] / sp;
246        }
247    }
248    // Gene F = mean of squared exon t over the gene's test df.
249    let mut gene_f = Array2::<f64>::zeros((ngenes, n_coef));
250    for r in 0..n_kept {
251        let gg = g[r];
252        for c in 0..n_coef {
253            gene_f[[gg, c]] += exon_t[[r, c]] * exon_t[[r, c]];
254        }
255    }
256    for gg in 0..ngenes {
257        for c in 0..n_coef {
258            gene_f[[gg, c]] /= gene_df_test[gg];
259        }
260    }
261    // Leverage rescaling of exon coefficients / t, then p-values.
262    let mut exon_p = Array2::<f64>::zeros((n_kept, n_coef));
263    for r in 0..n_kept {
264        let gg = g[r];
265        let df = gene_df_total[gg];
266        for c in 0..n_coef {
267            let one_m_lev = 1.0 - u2[[r, c]] / u2_rowsum[[gg, c]];
268            exon_coef[[r, c]] /= one_m_lev;
269            exon_t[[r, c]] /= one_m_lev.sqrt();
270            exon_p[[r, c]] = 2.0 * t_sf(exon_t[[r, c]].abs(), df);
271        }
272    }
273    let mut gene_f_p = Array2::<f64>::zeros((ngenes, n_coef));
274    for gg in 0..ngenes {
275        for c in 0..n_coef {
276            gene_f_p[[gg, c]] = f_sf(gene_f[[gg, c]], gene_df_test[gg], gene_df_total[gg]);
277        }
278    }
279
280    // Simes and Bonferroni combination of exon p-values per gene per coef.
281    let mut gene_simes_p = Array2::<f64>::zeros((ngenes, n_coef));
282    let mut gene_bonf_p = Array2::<f64>::zeros((ngenes, n_coef));
283    for gg in 0..ngenes {
284        let m = gene_nexons[gg] as f64;
285        let (lo, hi) = (gene_firstexon[gg], gene_lastexon[gg]);
286        for c in 0..n_coef {
287            let mut ps: Vec<f64> = (lo..=hi).map(|r| exon_p[[r, c]]).collect();
288            ps.sort_by(|a, b| a.partial_cmp(b).unwrap());
289            let mut simes = f64::INFINITY;
290            for (k, &p) in ps.iter().enumerate() {
291                let adj = p * m / ((k + 1) as f64);
292                if adj < simes {
293                    simes = adj;
294                }
295            }
296            gene_simes_p[[gg, c]] = simes;
297            gene_bonf_p[[gg, c]] = (ps[0] * m).min(1.0);
298        }
299    }
300
301    Ok(DiffSplice {
302        coef_names: fit.coef_names.clone(),
303        exon_id: exon_id_kept,
304        exon_geneid,
305        exon_gene_index: g,
306        coefficients: exon_coef,
307        t: exon_t,
308        p_value: exon_p,
309        gene_id: kept_idx
310            .iter()
311            .map(|&gi| group_id[gi].to_string())
312            .collect(),
313        gene_nexons,
314        gene_df_prior: Array1::from(df_prior_kept),
315        gene_df_residual: Array1::from(gene_df_residual),
316        gene_df_total: Array1::from(gene_df_total),
317        gene_s2: Array1::from(gene_s2_kept),
318        gene_s2_post: Array1::from(gene_s2_post),
319        gene_f,
320        gene_f_p_value: gene_f_p,
321        gene_simes_p_value: gene_simes_p,
322        gene_bonferroni_p_value: gene_bonf_p,
323        gene_firstexon,
324        gene_lastexon,
325    })
326}
327
328/// Which statistic `top_splice` ranks on. Mirrors `topSplice`'s `test` argument.
329#[derive(Clone, Copy, Debug, PartialEq, Eq)]
330pub enum SpliceTest {
331    /// Gene-level Simes combination of exon p-values.
332    Simes,
333    /// Gene-level F-test across exons.
334    F,
335    /// Exon-level moderated t-test.
336    T,
337    /// Exon-level TREAT test relative to a log-fold-change threshold.
338    Treat,
339}
340
341/// `top_splice` row ordering. Mirrors `topSplice`'s `sort.by` argument.
342#[derive(Clone, Copy, Debug, PartialEq, Eq)]
343pub enum SpliceSort {
344    /// Ascending p-value.
345    P,
346    /// Original (diffSplice) order.
347    None,
348    /// Descending absolute logFC (exon t-test only).
349    LogFC,
350    /// Descending exon count, ties broken by ascending p-value (gene tests only).
351    NExons,
352}
353
354/// One row of a [`top_splice`] table.
355#[derive(Clone, Debug)]
356pub struct TopSpliceRow {
357    /// Exon id (t / treat tests) or gene id (F / simes tests).
358    pub id: String,
359    /// Gene id (equals `id` for gene-level tests).
360    pub geneid: String,
361    pub nexons: Option<usize>,
362    pub logfc: Option<f64>,
363    pub t: Option<f64>,
364    pub f: Option<f64>,
365    pub p_value: f64,
366    pub fdr: f64,
367}
368
369/// `topSplice` — collate one coefficient's `diffSplice` results into a ranked
370/// table. `coef` is a 0-based column index. `treat_lfc > 0` with
371/// [`SpliceTest::T`] switches to the TREAT variant (as R does).
372pub fn top_splice(
373    ds: &DiffSplice,
374    coef: usize,
375    test: SpliceTest,
376    number: usize,
377    fdr: f64,
378    sort_by: SpliceSort,
379    treat_lfc: f64,
380) -> Result<Vec<TopSpliceRow>> {
381    let n_coef = ds.gene_f.ncols();
382    if coef >= n_coef {
383        bail!("coef index {} out of range (have {})", coef, n_coef);
384    }
385    if matches!(sort_by, SpliceSort::LogFC) && !matches!(test, SpliceTest::T) {
386        bail!("sorting by logFC only available with the exon t-test");
387    }
388    if matches!(sort_by, SpliceSort::NExons) && matches!(test, SpliceTest::T) {
389        bail!("sorting by NExons only available with gene-level tests");
390    }
391    // R promotes test="t" with a positive treat.lfc to the TREAT variant.
392    let test = if matches!(test, SpliceTest::T) && treat_lfc > 0.0 {
393        SpliceTest::Treat
394    } else {
395        test
396    };
397
398    let mut rows: Vec<TopSpliceRow> = match test {
399        SpliceTest::T | SpliceTest::Treat => (0..ds.coefficients.nrows())
400            .map(|r| {
401                let logfc = ds.coefficients[[r, coef]];
402                let t = ds.t[[r, coef]];
403                let p = if matches!(test, SpliceTest::Treat) {
404                    diff_splice_treat(ds, r, coef, treat_lfc)
405                } else {
406                    ds.p_value[[r, coef]]
407                };
408                TopSpliceRow {
409                    id: ds.exon_id[r].clone(),
410                    geneid: ds.exon_geneid[r].clone(),
411                    nexons: None,
412                    logfc: Some(logfc),
413                    t: Some(t),
414                    f: None,
415                    p_value: p,
416                    fdr: f64::NAN,
417                }
418            })
419            .collect(),
420        SpliceTest::F => (0..ds.gene_id.len())
421            .map(|gg| TopSpliceRow {
422                id: ds.gene_id[gg].clone(),
423                geneid: ds.gene_id[gg].clone(),
424                nexons: Some(ds.gene_nexons[gg]),
425                logfc: None,
426                t: None,
427                f: Some(ds.gene_f[[gg, coef]]),
428                p_value: ds.gene_f_p_value[[gg, coef]],
429                fdr: f64::NAN,
430            })
431            .collect(),
432        SpliceTest::Simes => (0..ds.gene_id.len())
433            .map(|gg| TopSpliceRow {
434                id: ds.gene_id[gg].clone(),
435                geneid: ds.gene_id[gg].clone(),
436                nexons: Some(ds.gene_nexons[gg]),
437                logfc: None,
438                t: None,
439                f: None,
440                p_value: ds.gene_simes_p_value[[gg, coef]],
441                fdr: f64::NAN,
442            })
443            .collect(),
444    };
445
446    // BH FDR over the full set, then optional FDR filtering.
447    let pv: Vec<f64> = rows.iter().map(|r| r.p_value).collect();
448    let adj = p_adjust_bh(&pv);
449    for (r, a) in rows.iter_mut().zip(adj) {
450        r.fdr = a;
451    }
452    if fdr < 1.0 {
453        rows.retain(|r| r.fdr <= fdr);
454    }
455
456    // R: `number <- min(number, nrow)`; `if(number <= 1) return(out)` unsorted.
457    let number = number.min(rows.len());
458    if number <= 1 {
459        return Ok(rows);
460    }
461
462    // Stable sort by the requested key (matching R's stable `order`).
463    let mut idx: Vec<usize> = (0..rows.len()).collect();
464    match sort_by {
465        SpliceSort::P => {
466            idx.sort_by(|&a, &b| rows[a].p_value.partial_cmp(&rows[b].p_value).unwrap())
467        }
468        SpliceSort::LogFC => idx.sort_by(|&a, &b| {
469            rows[b]
470                .logfc
471                .unwrap()
472                .abs()
473                .partial_cmp(&rows[a].logfc.unwrap().abs())
474                .unwrap()
475        }),
476        SpliceSort::NExons => idx.sort_by(|&a, &b| {
477            rows[b]
478                .nexons
479                .unwrap()
480                .cmp(&rows[a].nexons.unwrap())
481                .then_with(|| rows[a].p_value.partial_cmp(&rows[b].p_value).unwrap())
482        }),
483        SpliceSort::None => {}
484    }
485    idx.truncate(number);
486    Ok(idx.into_iter().map(|i| rows[i].clone()).collect())
487}
488
489/// `.diffSpliceTreat`: exon-level TREAT p-value relative to `lfc`.
490fn diff_splice_treat(ds: &DiffSplice, r: usize, coef: usize, lfc: f64) -> f64 {
491    let lfc = lfc.abs();
492    let acoef = ds.coefficients[[r, coef]].abs();
493    let se = acoef / ds.t[[r, coef]].abs();
494    let df = ds.gene_df_total[ds.exon_gene_index[r]];
495    let p = t_sf((acoef - lfc) / se, df) + t_sf((acoef + lfc) / se, df);
496    if p.is_nan() {
497        1.0
498    } else {
499        p
500    }
501}
502
503#[cfg(test)]
504// Reference literals are copied verbatim from R's 17-significant-digit output
505// (scratch/diffsplice_ref.R); the redundant final digit is intentional.
506#[allow(clippy::excessive_precision, clippy::approx_constant)]
507mod tests {
508    use super::*;
509    use crate::fit::lmfit;
510
511    // Reference values from scratch/diffsplice_ref.R (limma 3.68.3, R 4.6.0).
512    fn fixture() -> DiffSplice {
513        #[rustfmt::skip]
514        let e = Array2::from_shape_vec((12, 6), vec![
515            5.1, 4.8, 5.3, 7.2, 7.0, 7.5,
516            3.2, 3.5, 3.0, 3.1, 3.4, 3.3,
517            6.0, 6.2, 5.8, 4.1, 4.0, 4.3,
518            8.0, 8.1, 7.9, 8.2, 8.0, 8.1,
519            2.0, 2.2, 1.9, 4.0, 4.1, 3.8,
520            4.4, 4.6, 4.2, 5.0, 5.2, 4.9,
521            7.1, 7.0, 7.3, 7.0, 6.9, 7.2,
522            3.3, 3.1, 3.5, 6.0, 6.2, 5.8,
523            5.5, 5.7, 5.3, 5.4, 5.6, 5.2,
524            6.6, 6.4, 6.8, 6.5, 6.7, 6.3,
525            4.0, 4.2, 3.8, 7.0, 7.1, 6.9,
526            9.0, 9.1, 8.9, 6.0, 5.9, 6.1,
527        ]).unwrap();
528        #[rustfmt::skip]
529        let design = Array2::from_shape_vec((6, 2), vec![
530            1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
531        ]).unwrap();
532        let geneid: Vec<String> = [
533            "G1", "G1", "G1", "G2", "G2", "G3", "G3", "G3", "G3", "G4", "G5", "G5",
534        ]
535        .iter()
536        .map(|s| s.to_string())
537        .collect();
538        let names: Vec<String> = (1..=12).map(|i| format!("ex{i}")).collect();
539        let coefs = vec!["Intercept".to_string(), "grpB".to_string()];
540        let fit = lmfit(&e, &design, names, coefs).unwrap();
541        diff_splice(&fit, &geneid, None, false, false).unwrap()
542    }
543
544    fn rclose(got: f64, want: f64) -> bool {
545        if want.is_nan() {
546            return got.is_nan();
547        }
548        (got - want).abs() <= 1e-6 * want.abs().max(1e-300) + 1e-12
549    }
550
551    fn assert_mat(got: &Array2<f64>, want: &[[f64; 2]]) {
552        assert_eq!(got.nrows(), want.len());
553        for (r, row) in want.iter().enumerate() {
554            for c in 0..2 {
555                assert!(
556                    rclose(got[[r, c]], row[c]),
557                    "mismatch at [{r},{c}]: got {} want {}",
558                    got[[r, c]],
559                    row[c]
560                );
561            }
562        }
563    }
564
565    #[test]
566    fn diff_splice_gene_level_matches_r() {
567        let ds = fixture();
568        assert_eq!(ds.gene_id, ["G1", "G2", "G3", "G5"]);
569        assert_eq!(ds.gene_nexons, [3, 2, 4, 2]);
570        assert_eq!(ds.gene_df_residual.to_vec(), [12.0, 8.0, 16.0, 8.0]);
571        assert_eq!(ds.gene_df_total.to_vec(), [44.0, 44.0, 44.0, 44.0]);
572        assert!(rclose(ds.gene_df_prior[0], 8134.8447803826621));
573
574        let s2 = [
575            0.046111111111111179,
576            0.016666666666666594,
577            0.033750000000000002,
578            0.01749999999999995,
579        ];
580        let s2p = [
581            0.032952104211060054,
582            0.032916712208519362,
583            0.032934297253120325,
584            0.032917530923212625,
585        ];
586        for i in 0..4 {
587            assert!(rclose(ds.gene_s2[i], s2[i]));
588            assert!(rclose(ds.gene_s2_post[i], s2p[i]));
589        }
590
591        assert_mat(
592            &ds.gene_f,
593            &[
594                [180.36278640252982, 185.3363349152192],
595                [1622.3268693537839, 76.582172525751389],
596                [243.61635141837323, 79.805356499122354],
597                [1139.2105953352645, 820.2316286413901],
598            ],
599        );
600        assert_mat(
601            &ds.gene_f_p_value,
602            &[
603                [6.2867278756513315e-22, 3.6850195711806162e-22],
604                [2.2936973401206709e-36, 3.4576638739475986e-11],
605                [2.0512282816641372e-27, 7.91897383774075e-18],
606                [4.3073010662264811e-33, 4.3519465964385832e-30],
607            ],
608        );
609        assert_mat(
610            &ds.gene_simes_p_value,
611            &[
612                [2.3179315147636661e-21, 1.8221109981110334e-20],
613                [2.2936973401206709e-36, 3.4576638739475986e-11],
614                [3.08052592995522e-25, 3.0664863233974733e-18],
615                [4.3073010662264811e-33, 4.3519465964385832e-30],
616            ],
617        );
618        assert_mat(
619            &ds.gene_bonferroni_p_value,
620            &[
621                [2.3179315147636661e-21, 1.8221109981110334e-20],
622                [4.5873946802413418e-36, 6.9153277478951973e-11],
623                [3.08052592995522e-25, 3.0664863233974733e-18],
624                [8.6146021324529622e-33, 8.7038931928770445e-30],
625            ],
626        );
627    }
628
629    #[test]
630    fn diff_splice_exon_level_matches_r() {
631        let ds = fixture();
632        assert_eq!(
633            ds.exon_geneid,
634            ["G1", "G1", "G1", "G2", "G2", "G3", "G3", "G3", "G3", "G5", "G5"]
635        );
636        assert_eq!(
637            ds.exon_id,
638            ["1", "2", "3", "4", "5", "6", "7", "8", "9", "11", "12"]
639        );
640        assert_mat(
641            &ds.coefficients,
642            &[
643                [0.44999999999999707, 3.0833333333333366],
644                [-2.2999999999999985, -0.1166666666666679],
645                [1.8500000000000014, -2.966666666666669],
646                [5.9666666666666686, -1.8333333333333348],
647                [-5.9666666666666694, 1.8333333333333348],
648                [-0.9111111111111091, -0.20000000000000048],
649                [2.7333333333333307, -1.1777777777777767],
650                [-2.3777777777777787, 2.5555555555555558],
651                [0.55555555555555591, -1.1777777777777783],
652                [-5.0000000000000036, 6.0000000000000018],
653                [5.0000000000000036, -6.0000000000000027],
654            ],
655        );
656        assert_mat(
657            &ds.t,
658            &[
659                [3.5057903027150417, 16.985522142464994],
660                [-17.918483769432541, -0.64269543241760041],
661                [14.412693466717498, -16.342826710047394],
662                [40.278118989766433, -8.7511240721264709],
663                [-40.27811898976644, 8.7511240721264709],
664                [-7.5307529659810823, -1.1689126440774047],
665                [22.592258897943278, -6.883596681789137],
666                [-19.653428472194584, 14.93610600765569],
667                [4.5919225402323809, -6.8835966817891467],
668                [-33.75219393365807, 28.639686252495675],
669                [33.75219393365807, -28.639686252495682],
670            ],
671        );
672        assert_mat(
673            &ds.p_value,
674            &[
675                [0.0010604632227702825, 6.0737033270367781e-21],
676                [7.7264383825455539e-22, 0.52375685669844319],
677                [2.8381673681066256e-18, 2.6414727327492922e-20],
678                [2.2936973401206709e-36, 3.4576638739475986e-11],
679                [2.2936973401206709e-36, 3.4576638739475986e-11],
680                [1.9210853211775688e-09, 0.2487328076651891],
681                [7.7013148248880499e-26, 1.6935989640586146e-08],
682                [2.0608265277598819e-23, 7.6662158084936832e-19],
683                [3.6630572076199251e-05, 1.6935989640585597e-08],
684                [4.3073010662264811e-33, 4.3519465964385832e-30],
685                [4.3073010662264811e-33, 4.3519465964385222e-30],
686            ],
687        );
688    }
689
690    #[test]
691    fn top_splice_f_matches_r() {
692        let ds = fixture();
693        let top = top_splice(&ds, 1, SpliceTest::F, 100, 1.0, SpliceSort::P, 0.0).unwrap();
694        let ids: Vec<&str> = top.iter().map(|r| r.id.as_str()).collect();
695        assert_eq!(ids, ["G5", "G1", "G3", "G2"]);
696        let f = [
697            820.2316286413901,
698            185.3363349152192,
699            79.805356499122354,
700            76.582172525751389,
701        ];
702        let p = [
703            4.3519465964385832e-30,
704            3.6850195711806162e-22,
705            7.91897383774075e-18,
706            3.4576638739475986e-11,
707        ];
708        let fdr = [
709            1.7407786385754333e-29,
710            7.3700391423612324e-22,
711            1.0558631783654333e-17,
712            3.4576638739475986e-11,
713        ];
714        for (i, row) in top.iter().enumerate() {
715            assert!(rclose(row.f.unwrap(), f[i]));
716            assert!(rclose(row.p_value, p[i]));
717            assert!(rclose(row.fdr, fdr[i]));
718        }
719    }
720
721    #[test]
722    fn top_splice_simes_matches_r() {
723        let ds = fixture();
724        let top = top_splice(&ds, 1, SpliceTest::Simes, 100, 1.0, SpliceSort::P, 0.0).unwrap();
725        let ids: Vec<&str> = top.iter().map(|r| r.id.as_str()).collect();
726        assert_eq!(ids, ["G5", "G1", "G3", "G2"]);
727        let p = [
728            4.3519465964385832e-30,
729            1.8221109981110334e-20,
730            3.0664863233974733e-18,
731            3.4576638739475986e-11,
732        ];
733        let fdr = [
734            1.7407786385754333e-29,
735            3.6442219962220669e-20,
736            4.0886484311966305e-18,
737            3.4576638739475986e-11,
738        ];
739        for (i, row) in top.iter().enumerate() {
740            assert!(rclose(row.p_value, p[i]));
741            assert!(rclose(row.fdr, fdr[i]));
742        }
743    }
744
745    #[test]
746    fn top_splice_t_matches_r() {
747        let ds = fixture();
748        let top = top_splice(&ds, 1, SpliceTest::T, 100, 1.0, SpliceSort::P, 0.0).unwrap();
749        let ids: Vec<&str> = top.iter().map(|r| r.id.as_str()).collect();
750        // Exons 11 and 12 (gene G5) tie to 15 significant digits (p ~= 4.35e-30);
751        // their relative order is decided by sub-ULP rounding in `pt`, so only
752        // assert they occupy the top two slots. The remaining order is exact.
753        let mut head: Vec<&str> = ids[..2].to_vec();
754        head.sort_unstable();
755        assert_eq!(head, ["11", "12"]);
756        assert_eq!(&ids[2..], &["1", "3", "8", "4", "5", "9", "7", "6", "2"]);
757        for row in &top[..2] {
758            assert!(rclose(row.p_value, 4.3519465964385832e-30));
759            assert!(rclose(row.fdr, 2.3935706280412207e-29));
760            assert!(rclose(row.logfc.unwrap().abs(), 6.0));
761        }
762        assert!(rclose(top[2].p_value, 6.0737033270367781e-21));
763        assert!(rclose(top[10].p_value, 0.52375685669844319));
764    }
765}