use std::collections::HashSet;
use anyhow::{bail, Result};
use ndarray::{Array1, Array2};
use statrs::distribution::{Beta, ContinuousCDF, Normal, StudentsT};
use crate::ebayes::{fit_fdist, squeeze_var, squeeze_var_post, tmixture_vector};
use crate::linalg::{eigh, matrix_rank, qr_econ, qr_full_q};
use crate::proptruenull::{prop_true_null, PropTrueNullMethod};
use crate::rng::RRng;
use crate::special::gauss_legendre_01;
use crate::zscore::{zscore_t, ZscoreTMethod};
fn rank_average(x: &[f64]) -> Vec<f64> {
let n = x.len();
let mut idx: Vec<usize> = (0..n).collect();
idx.sort_by(|&a, &b| x[a].partial_cmp(&x[b]).unwrap());
let mut ranks = vec![0.0; n];
let mut i = 0;
while i < n {
let mut j = i;
while j + 1 < n && x[idx[j + 1]] == x[idx[i]] {
j += 1;
}
let avg = ((i + 1 + j + 1) as f64) / 2.0;
for &k in &idx[i..=j] {
ranks[k] = avg;
}
i = j + 1;
}
ranks
}
fn rank_average_and_ties(x: &[f64]) -> (Vec<f64>, Vec<usize>) {
let n = x.len();
let mut idx: Vec<usize> = (0..n).collect();
idx.sort_by(|&a, &b| x[a].partial_cmp(&x[b]).unwrap());
let mut ranks = vec![0.0; n];
let mut sizes = Vec::new();
let mut i = 0;
while i < n {
let mut j = i;
while j + 1 < n && x[idx[j + 1]] == x[idx[i]] {
j += 1;
}
let avg = ((i + 1 + j + 1) as f64) / 2.0;
for &k in &idx[i..=j] {
ranks[k] = avg;
}
sizes.push(j - i + 1);
i = j + 1;
}
(ranks, sizes)
}
fn pt_lower(x: f64, df: f64) -> f64 {
if df.is_infinite() {
Normal::new(0.0, 1.0).unwrap().cdf(x)
} else {
StudentsT::new(0.0, 1.0, df).unwrap().cdf(x)
}
}
fn pt_upper(x: f64, df: f64) -> f64 {
pt_lower(-x, df)
}
pub fn rank_sum_test_with_correlation(
index: &[usize],
statistics: &[f64],
correlation: f64,
df: f64,
) -> (f64, f64) {
let (r, tie_sizes) = rank_average_and_ties(statistics);
rank_sum_core(index, statistics.len(), &r, &tie_sizes, correlation, df)
}
fn rank_sum_core(
index: &[usize],
n: usize,
r: &[f64],
tie_sizes: &[usize],
correlation: f64,
df: f64,
) -> (f64, f64) {
let n1 = index.len();
let n2 = n - n1;
let sum_r1: f64 = index.iter().map(|&i| r[i]).sum();
let n1f = n1 as f64;
let n2f = n2 as f64;
let nf = n as f64;
let u = n1f * n2f + n1f * (n1f + 1.0) / 2.0 - sum_r1;
let mu = n1f * n2f / 2.0;
let mut sigma2 = if correlation == 0.0 || n1 == 1 {
n1f * n2f * (nf + 1.0) / 12.0
} else {
let s = std::f64::consts::FRAC_PI_2 * n1f * n2f
+ 0.5_f64.asin() * n1f * n2f * (n2f - 1.0)
+ (correlation / 2.0).asin() * n1f * (n1f - 1.0) * n2f * (n2f - 1.0)
+ ((correlation + 1.0) / 2.0).asin() * n1f * (n1f - 1.0) * n2f;
s / 2.0 / std::f64::consts::PI
};
if tie_sizes.iter().any(|&c| c > 1) {
let adjustment: f64 = tie_sizes
.iter()
.map(|&c| {
let cf = c as f64;
cf * (cf + 1.0) * (cf - 1.0)
})
.sum::<f64>()
/ (nf * (nf + 1.0) * (nf - 1.0));
sigma2 *= 1.0 - adjustment;
}
let sd = sigma2.sqrt();
let zlowertail = (u + 0.5 - mu) / sd;
let zuppertail = (u - 0.5 - mu) / sd;
let less = pt_upper(zuppertail, df);
let greater = pt_lower(zlowertail, df);
(less, greater)
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Alternative {
Up,
Down,
Either,
Mixed,
}
pub fn gene_set_test(index: &[usize], statistics: &[f64], alternative: Alternative) -> f64 {
let mut stats = statistics.to_vec();
let mut alt = alternative;
match alt {
Alternative::Mixed => {
for s in stats.iter_mut() {
*s = s.abs();
}
}
Alternative::Down => {
for s in stats.iter_mut() {
*s = -*s;
}
alt = Alternative::Up;
}
_ => {}
}
let (less, greater) = rank_sum_test_with_correlation(index, &stats, 0.0, f64::INFINITY);
match alt {
Alternative::Up => greater,
Alternative::Either => 2.0 * less.min(greater),
Alternative::Mixed => greater,
Alternative::Down => less,
}
}
pub fn wilcox_gst(index: &[usize], statistics: &[f64]) -> f64 {
gene_set_test(index, statistics, Alternative::Mixed)
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Direction {
Up,
Down,
}
#[derive(Clone, Debug)]
pub struct CameraResult {
pub set: usize,
pub n_genes: usize,
pub direction: Direction,
pub p_value: f64,
pub fdr: f64,
}
fn mean(x: &[f64]) -> f64 {
x.iter().sum::<f64>() / x.len() as f64
}
fn sample_var(x: &[f64]) -> f64 {
let n = x.len();
let m = mean(x);
x.iter().map(|&v| (v - m) * (v - m)).sum::<f64>() / (n as f64 - 1.0)
}
pub fn camera_pr(
statistic: &[f64],
index: &[Vec<usize>],
inter_gene_cor: f64,
use_ranks: bool,
sort: bool,
) -> Vec<CameraResult> {
let g = statistic.len();
let gf = g as f64;
let mean_stat = mean(statistic);
let var_stat = sample_var(statistic);
let df = if use_ranks { f64::INFINITY } else { gf - 2.0 };
let ranks = use_ranks.then(|| rank_average_and_ties(statistic));
let mut rows: Vec<CameraResult> = Vec::with_capacity(index.len());
for (si, iset) in index.iter().enumerate() {
let m = iset.len();
let (down, up) = if let Some((r, tie_sizes)) = ranks.as_ref() {
rank_sum_core(iset, g, r, tie_sizes, inter_gene_cor, df)
} else {
let mf = m as f64;
let m2 = gf - mf;
let vif = 1.0 + (mf - 1.0) * inter_gene_cor;
let mean_in_set = iset.iter().map(|&i| statistic[i]).sum::<f64>() / mf;
let delta = gf / m2 * (mean_in_set - mean_stat);
let var_pooled = ((gf - 1.0) * var_stat - delta * delta * mf * m2 / gf) / (gf - 2.0);
let t = delta / (var_pooled * (vif / mf + 1.0 / m2)).sqrt();
(pt_lower(t, df), pt_upper(t, df))
};
let p_value = 2.0 * down.min(up);
let direction = if down < up {
Direction::Down
} else {
Direction::Up
};
rows.push(CameraResult {
set: si,
n_genes: m,
direction,
p_value,
fdr: f64::NAN,
});
}
if rows.len() > 1 {
let pvals: Vec<f64> = rows.iter().map(|r| r.p_value).collect();
let fdr = crate::toptable::p_adjust_bh(&pvals);
for (r, f) in rows.iter_mut().zip(fdr) {
r.fdr = f;
}
} else if let Some(r) = rows.first_mut() {
r.fdr = r.p_value;
}
if sort && rows.len() > 1 {
rows.sort_by(|a, b| a.p_value.partial_cmp(&b.p_value).unwrap());
}
rows
}
pub fn inter_gene_correlation(y: &Array2<f64>, design: &Array2<f64>) -> (f64, f64) {
let g = y.nrows();
let n = y.ncols();
let rank = matrix_rank(design);
let nres = n - rank;
let qfull = qr_full_q(design);
let effects = qfull.t().dot(&y.t());
let mut sigma = vec![0.0; g];
for (gi, s) in sigma.iter_mut().enumerate() {
let mut acc = 0.0;
for k in rank..n {
let e = effects[[k, gi]];
acc += e * e;
}
*s = (acc / nres as f64).sqrt();
}
let mut sumsq = 0.0;
for k in rank..n {
let mut ubar = 0.0;
for gi in 0..g {
ubar += effects[[k, gi]] / sigma[gi];
}
ubar /= g as f64;
sumsq += ubar * ubar;
}
let vif = g as f64 * sumsq / nres as f64;
let correlation = (vif - 1.0) / (g as f64 - 1.0);
(vif, correlation)
}
fn zscore_t_hill(x: f64, df: f64) -> f64 {
let a = df - 0.5;
let b = 48.0 * a * a;
let mut z = a * (x * x / df).ln_1p();
z = (((((-0.4 * z - 3.3) * z - 24.0) * z - 85.5) / (0.8 * z * z + 100.0 + b) + z + 3.0) / b
+ 1.0)
* z.sqrt();
z * x.signum()
}
fn move_coef_last(design: &Array2<f64>, coef: usize) -> Array2<f64> {
let p = design.ncols();
if coef == p - 1 {
return design.to_owned();
}
let n = design.nrows();
let mut order: Vec<usize> = (0..p).filter(|&c| c != coef).collect();
order.push(coef);
let mut out = Array2::<f64>::zeros((n, p));
for (newj, &oldj) in order.iter().enumerate() {
out.column_mut(newj).assign(&design.column(oldj));
}
out
}
#[derive(Clone, Debug)]
pub struct ContrastAsCoef {
pub design: Array2<f64>,
pub coef: Vec<usize>,
pub rank: usize,
}
pub fn contrast_as_coef(
design: &Array2<f64>,
contrast: &Array2<f64>,
first: bool,
) -> Result<ContrastAsCoef> {
let n = design.nrows();
let p = design.ncols();
if contrast.nrows() != p {
bail!(
"contrast_as_coef: contrast rows ({}) must match design cols ({})",
contrast.nrows(),
p
);
}
let nc = contrast.ncols();
let rank = matrix_rank(contrast);
if rank == 0 {
bail!("contrast_as_coef: contrast is all zero");
}
if rank != nc {
bail!(
"contrast_as_coef: only full-column-rank contrasts are supported (rank {} of {} columns)",
rank,
nc
);
}
let k = nc;
let qfull = qr_full_q(contrast); let (_, rmat) = qr_econ(contrast); let mut designt = qfull.t().dot(&design.t());
for col in 0..n {
for i in (0..k).rev() {
let mut s = designt[[i, col]];
for j in (i + 1)..k {
s -= rmat[[i, j]] * designt[[j, col]];
}
designt[[i, col]] = s / rmat[[i, i]];
}
}
let reformed = designt.t().to_owned();
if first {
Ok(ContrastAsCoef {
design: reformed,
coef: (0..k).collect(),
rank,
})
} else {
let mut out = Array2::<f64>::zeros((n, p));
for (newj, oldj) in (k..p).chain(0..k).enumerate() {
out.column_mut(newj).assign(&reformed.column(oldj));
}
Ok(ContrastAsCoef {
design: out,
coef: (p - k..p).collect(),
rank,
})
}
}
pub fn camera(
exprs: &Array2<f64>,
design: &Array2<f64>,
coef: usize,
index: &[Vec<usize>],
inter_gene_cor: f64,
use_ranks: bool,
sort: bool,
) -> Result<Vec<CameraResult>> {
let g = exprs.nrows();
let n = exprs.ncols();
let p = design.ncols();
assert!(g >= 3, "camera: need at least 3 genes");
let df_residual = n as f64 - p as f64;
assert!(df_residual >= 1.0, "camera: no residual df");
let design = move_coef_last(design, coef);
let qfull = qr_full_q(&design);
let (_, r) = qr_econ(&design);
let effects = qfull.t().dot(&exprs.t());
let sign = if r[[p - 1, p - 1]] < 0.0 { -1.0 } else { 1.0 };
let unscaledt: Vec<f64> = (0..g).map(|gi| effects[[p - 1, gi]] * sign).collect();
let mut sigma2 = Array1::<f64>::zeros(g);
for (gi, s) in sigma2.iter_mut().enumerate() {
let mut acc = 0.0;
for k in p..n {
let e = effects[[k, gi]];
acc += e * e;
}
*s = acc / df_residual;
}
let sv = squeeze_var(&sigma2, &Array1::from_elem(g, df_residual), None, false)?;
let mut stat = vec![0.0; g];
if use_ranks {
for gi in 0..g {
stat[gi] = unscaledt[gi] / sv.var_post[gi].sqrt();
}
} else {
let df_total = (df_residual + sv.df_prior[0]).min(g as f64 * df_residual);
for gi in 0..g {
let modt = unscaledt[gi] / sv.var_post[gi].sqrt();
stat[gi] = zscore_t_hill(modt, df_total);
}
}
let cor = inter_gene_cor.max(0.0);
Ok(camera_pr(&stat, index, cor, use_ranks, sort))
}
fn lm_effects(exprs: &Array2<f64>, design: &Array2<f64>, coef: usize) -> Array2<f64> {
let g = exprs.nrows();
let n = exprs.ncols();
let p = design.ncols();
let design = move_coef_last(design, coef);
let qfull = qr_full_q(&design);
let (_, r) = qr_econ(&design);
let full = qfull.t().dot(&exprs.t()); let signc = if r[[p - 1, p - 1]] < 0.0 { -1.0 } else { 1.0 };
let neff = n - p + 1;
let mut eff = Array2::<f64>::zeros((g, neff));
for gi in 0..g {
eff[[gi, 0]] = full[[p - 1, gi]] * signc;
for k in 1..neff {
eff[[gi, k]] = full[[p - 1 + k, gi]];
}
}
eff
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum FrySort {
Directional,
Mixed,
NoSort,
}
#[derive(Clone, Debug)]
pub struct FryResult {
pub set: usize,
pub n_genes: usize,
pub direction: Direction,
pub p_value: f64,
pub fdr: f64,
pub p_value_mixed: f64,
pub fdr_mixed: f64,
}
fn fry_mixed_pvalue(eff: &Array2<f64>, iset: &[usize]) -> f64 {
let neff = eff.ncols();
let m = iset.len();
let mut a: Vec<f64> = if neff <= m {
let mut gram = Array2::<f64>::zeros((neff, neff));
for &gi in iset {
for i in 0..neff {
for j in 0..neff {
gram[[i, j]] += eff[[gi, i]] * eff[[gi, j]];
}
}
}
eigh(&gram).0.to_vec()
} else {
let mut gram = Array2::<f64>::zeros((m, m));
for (ai, &gi) in iset.iter().enumerate() {
for (bi, &gj) in iset.iter().enumerate() {
let mut s = 0.0;
for k in 0..neff {
s += eff[[gi, k]] * eff[[gj, k]];
}
gram[[ai, bi]] = s;
}
}
eigh(&gram).0.to_vec()
};
a.reverse();
let d1 = a.len();
let d1f = d1 as f64;
let d = d1f - 1.0;
let beta_mean = 1.0 / d1f;
let beta_var = d / d1f / d1f / (d1f / 2.0 + 1.0);
let a1 = a[0];
let ad1 = a[d1 - 1];
let span = a1 - ad1;
let sum_col1_sq: f64 = iset.iter().map(|&gi| eff[[gi, 0]] * eff[[gi, 0]]).sum();
let fobs = (sum_col1_sq - ad1) / span;
let suma: f64 = a.iter().sum();
let suma2: f64 = a.iter().map(|&v| v * v).sum();
let frb_mean = (suma * beta_mean - ad1) / span;
let quad = beta_var * suma2 - (beta_var / d) * (suma * suma - suma2);
let frb_var = quad / (span * span);
let alphaplusbeta = frb_mean * (1.0 - frb_mean) / frb_var - 1.0;
let alpha = alphaplusbeta * frb_mean;
let beta = alphaplusbeta - alpha;
let dist = Beta::new(alpha, beta).unwrap();
1.0 - dist.cdf(fobs)
}
pub fn fry(
exprs: &Array2<f64>,
design: &Array2<f64>,
coef: usize,
index: &[Vec<usize>],
sort: FrySort,
) -> Result<Vec<FryResult>> {
let mut eff = lm_effects(exprs, design, coef);
let g = eff.nrows();
let neff = eff.ncols();
let df_residual = (neff - 1) as f64;
let (nodes, weights) = gauss_legendre_01(128);
let normal = Normal::new(0.0, 1.0).unwrap();
let mut eu2max = 0.0;
for (&x, &w) in nodes.iter().zip(weights.iter()) {
let q = normal.inverse_cdf((x + 1.0) / 2.0);
eu2max += (df_residual + 1.0) * x.powf(df_residual) * (q * q) * w;
}
let mut s2_robust = Array1::<f64>::zeros(g);
let mut s2 = Array1::<f64>::zeros(g);
for gi in 0..g {
let mut sumsq = 0.0;
let mut maxsq = f64::NEG_INFINITY;
let mut sumsq_resid = 0.0;
for k in 0..neff {
let e2 = eff[[gi, k]] * eff[[gi, k]];
sumsq += e2;
if e2 > maxsq {
maxsq = e2;
}
if k >= 1 {
sumsq_resid += e2;
}
}
s2_robust[gi] = (sumsq - maxsq) / (df_residual + 1.0 - eu2max);
s2[gi] = sumsq_resid / df_residual;
}
let (scale, df2) = fit_fdist(&s2, &Array1::from_elem(g, df_residual));
let s2_robust = squeeze_var_post(
&s2_robust,
&Array1::from_elem(g, 0.92 * df_residual),
&Array1::from_elem(g, scale),
&Array1::from_elem(g, df2),
);
for gi in 0..g {
let s = s2_robust[gi].sqrt();
for k in 0..neff {
eff[[gi, k]] /= s;
}
}
let mut rows: Vec<FryResult> = Vec::with_capacity(index.len());
for (si, iset) in index.iter().enumerate() {
let m = iset.len();
let mut colmean = vec![0.0; neff];
for &gi in iset {
for (k, cm) in colmean.iter_mut().enumerate() {
*cm += eff[[gi, k]];
}
}
for cm in colmean.iter_mut() {
*cm /= m as f64;
}
let mean_resid_sq = colmean[1..].iter().map(|&v| v * v).sum::<f64>() / (neff - 1) as f64;
let t_stat = colmean[0] / mean_resid_sq.sqrt();
let direction = if t_stat < 0.0 {
Direction::Down
} else {
Direction::Up
};
let p_value = 2.0 * pt_lower(-t_stat.abs(), df_residual);
let p_value_mixed = if m > 1 {
fry_mixed_pvalue(&eff, iset)
} else {
p_value
};
rows.push(FryResult {
set: si,
n_genes: m,
direction,
p_value,
fdr: f64::NAN,
p_value_mixed,
fdr_mixed: f64::NAN,
});
}
if rows.len() > 1 {
let p: Vec<f64> = rows.iter().map(|r| r.p_value).collect();
let pm: Vec<f64> = rows.iter().map(|r| r.p_value_mixed).collect();
let fdr = crate::toptable::p_adjust_bh(&p);
let fdr_mixed = crate::toptable::p_adjust_bh(&pm);
for (r, (f, fm)) in rows.iter_mut().zip(fdr.into_iter().zip(fdr_mixed)) {
r.fdr = f;
r.fdr_mixed = fm;
}
} else if let Some(r) = rows.first_mut() {
r.fdr = r.p_value;
r.fdr_mixed = r.p_value_mixed;
}
match sort {
FrySort::Directional => rows.sort_by(|a, b| {
a.p_value
.partial_cmp(&b.p_value)
.unwrap()
.then(b.n_genes.cmp(&a.n_genes))
.then(a.p_value_mixed.partial_cmp(&b.p_value_mixed).unwrap())
}),
FrySort::Mixed => rows.sort_by(|a, b| {
a.p_value_mixed
.partial_cmp(&b.p_value_mixed)
.unwrap()
.then(b.n_genes.cmp(&a.n_genes))
.then(a.p_value.partial_cmp(&b.p_value).unwrap())
}),
FrySort::NoSort => {}
}
Ok(rows)
}
#[derive(Clone, Debug)]
pub struct Roast {
pub active_prop: [f64; 4],
pub p_value: [f64; 4],
pub n_genes_in_set: usize,
}
pub fn roast(
exprs: &Array2<f64>,
design: &Array2<f64>,
coef: usize,
index: &[usize],
nrot: usize,
rng: &mut RRng,
) -> Result<Roast> {
let (eff, var_prior, df_prior, var_post_all) = roast_prepare(exprs, design, coef)?;
let (set_eff, var_post) = subset_effects(&eff, &var_post_all, index);
Ok(roast_effects(
&set_eff, var_prior, df_prior, &var_post, nrot, rng,
))
}
fn roast_prepare(
exprs: &Array2<f64>,
design: &Array2<f64>,
coef: usize,
) -> Result<(Array2<f64>, f64, f64, Array1<f64>)> {
let eff = lm_effects(exprs, design, coef);
let g = eff.nrows();
let neff = eff.ncols();
let df_residual = (neff - 1) as f64;
let mut s2 = Array1::<f64>::zeros(g);
for gi in 0..g {
let mut acc = 0.0;
for k in 1..neff {
acc += eff[[gi, k]] * eff[[gi, k]];
}
s2[gi] = acc / df_residual;
}
let sv = squeeze_var(&s2, &Array1::from_elem(g, df_residual), None, false)?;
Ok((eff, sv.var_prior[0], sv.df_prior[0], sv.var_post))
}
fn subset_effects(
eff: &Array2<f64>,
var_post_all: &Array1<f64>,
index: &[usize],
) -> (Array2<f64>, Vec<f64>) {
let neff = eff.ncols();
let nset = index.len();
let mut set_eff = Array2::<f64>::zeros((nset, neff));
let mut var_post = vec![0.0; nset];
for (si, &gi) in index.iter().enumerate() {
for k in 0..neff {
set_eff[[si, k]] = eff[[gi, k]];
}
var_post[si] = var_post_all[gi];
}
(set_eff, var_post)
}
fn roast_effects(
effects: &Array2<f64>,
var_prior: f64,
df_prior: f64,
var_post: &[f64],
nrot: usize,
rng: &mut RRng,
) -> Roast {
let nset = effects.nrows();
let neff = effects.ncols();
let df_residual = (neff - 1) as f64;
let df_total = df_prior + df_residual;
let df_total_winsor = df_total.min(10000.0);
let prior_term = df_prior * var_prior;
let nset_f = nset as f64;
let sqrt2 = std::f64::consts::SQRT_2;
let mut sum_modt = 0.0;
let mut sum_abs_modt = 0.0;
let mut n_up = 0usize;
let mut n_down = 0usize;
for gi in 0..nset {
let modt = zscore_t(
effects[[gi, 0]] / var_post[gi].sqrt(),
df_total_winsor,
ZscoreTMethod::Bailey,
);
sum_modt += modt;
sum_abs_modt += modt.abs();
if modt > sqrt2 {
n_up += 1;
}
if modt < -sqrt2 {
n_down += 1;
}
}
let a1 = n_up as f64 / nset_f;
let a2 = n_down as f64 / nset_f;
let m = sum_modt / nset_f;
let statobs_down = -m;
let statobs_up = m;
let statobs_mixed = sum_abs_modt / nset_f;
let mut rowsq = vec![0.0; nset];
for gi in 0..nset {
let mut acc = 0.0;
for k in 0..neff {
acc += effects[[gi, k]] * effects[[gi, k]];
}
rowsq[gi] = acc;
}
let chunk = 1000usize;
let nchunk = nrot.div_ceil(chunk);
let nroti0 = nrot.div_ceil(nchunk);
let overshoot = nchunk * nroti0 - nrot;
let mut count = [0i64; 4];
for chunki in 0..nchunk {
let nroti = if chunki == nchunk - 1 {
nroti0 - overshoot
} else {
nroti0
};
let draws = rng.rnorm(nroti * neff);
let ctx = RotationCtx {
draws: &draws,
nroti,
neff,
nset,
effects,
rowsq: &rowsq,
prior_term,
df_residual,
df_total,
df_total_winsor,
nset_f,
statobs_down,
statobs_up,
statobs_mixed,
};
let part = ctx.count_rotations();
count[0] += part[0];
count[1] += part[1];
count[3] += part[2];
}
count[2] = count[0].min(count[1]);
let nrot_i = nrot as i64;
let denom = [2 * nrot_i + 1, 2 * nrot_i + 1, nrot_i + 1, nrot_i + 1];
let mut p_value = [0.0; 4];
for i in 0..4 {
p_value[i] = (count[i] as f64 + 1.0) / denom[i] as f64;
}
Roast {
active_prop: [a2, a1, a1.max(a2), a1 + a2],
p_value,
n_genes_in_set: nset,
}
}
struct RotationCtx<'a> {
draws: &'a [f64],
nroti: usize,
neff: usize,
nset: usize,
effects: &'a Array2<f64>,
rowsq: &'a [f64],
prior_term: f64,
df_residual: f64,
df_total: f64,
df_total_winsor: f64,
nset_f: f64,
statobs_down: f64,
statobs_up: f64,
statobs_mixed: f64,
}
impl RotationCtx<'_> {
#[inline]
fn count_one(&self, r: usize, zrow: &mut [f64]) -> [i64; 3] {
let mut znorm = 0.0;
for (k, z) in zrow.iter_mut().enumerate() {
let v = self.draws[k * self.nroti + r];
*z = v;
znorm += v * v;
}
let znorm = znorm.sqrt();
for z in zrow.iter_mut() {
*z /= znorm;
}
let mut sum_z = 0.0;
let mut sum_abs_z = 0.0;
for gi in 0..self.nset {
let mut t = 0.0;
for (k, &zv) in zrow.iter().enumerate() {
t += self.effects[[gi, k]] * zv;
}
let s2r0 = (self.rowsq[gi] - t * t) / self.df_residual;
let s2r = (self.prior_term + self.df_residual * s2r0) / self.df_total;
let z = zscore_t(t / s2r.sqrt(), self.df_total_winsor, ZscoreTMethod::Bailey);
sum_z += z;
sum_abs_z += z.abs();
}
let up_r = sum_z / self.nset_f;
let down_r = -up_r;
let mixed_r = sum_abs_z / self.nset_f;
[
(down_r > self.statobs_down) as i64 + (up_r > self.statobs_down) as i64,
(down_r > self.statobs_up) as i64 + (up_r > self.statobs_up) as i64,
(mixed_r > self.statobs_mixed) as i64,
]
}
#[cfg(feature = "parallel")]
fn count_rotations(&self) -> [i64; 3] {
use rayon::prelude::*;
(0..self.nroti)
.into_par_iter()
.fold(
|| ([0i64; 3], vec![0.0; self.neff]),
|(mut acc, mut zrow), r| {
let c = self.count_one(r, &mut zrow);
acc[0] += c[0];
acc[1] += c[1];
acc[2] += c[2];
(acc, zrow)
},
)
.map(|(acc, _)| acc)
.reduce(|| [0i64; 3], |a, b| [a[0] + b[0], a[1] + b[1], a[2] + b[2]])
}
#[cfg(not(feature = "parallel"))]
fn count_rotations(&self) -> [i64; 3] {
let mut acc = [0i64; 3];
let mut zrow = vec![0.0; self.neff];
for r in 0..self.nroti {
let c = self.count_one(r, &mut zrow);
acc[0] += c[0];
acc[1] += c[1];
acc[2] += c[2];
}
acc
}
}
#[derive(Clone, Debug)]
pub struct MroastRow {
pub set: usize,
pub n_genes: usize,
pub prop_down: f64,
pub prop_up: f64,
pub direction: Direction,
pub p_value: f64,
pub fdr: f64,
pub p_value_mixed: f64,
pub fdr_mixed: f64,
}
#[allow(clippy::too_many_arguments)]
pub fn mroast(
exprs: &Array2<f64>,
design: &Array2<f64>,
coef: usize,
index: &[Vec<usize>],
nrot: usize,
midp: bool,
sort: FrySort,
rng: &mut RRng,
) -> Result<Vec<MroastRow>> {
let (eff, var_prior, df_prior, var_post_all) = roast_prepare(exprs, design, coef)?;
let mut rows = Vec::with_capacity(index.len());
for (si, set) in index.iter().enumerate() {
let (set_eff, var_post) = subset_effects(&eff, &var_post_all, set);
let r = roast_effects(&set_eff, var_prior, df_prior, &var_post, nrot, rng);
let direction = if r.p_value[1] < r.p_value[0] {
Direction::Up
} else {
Direction::Down
};
rows.push(MroastRow {
set: si,
n_genes: r.n_genes_in_set,
prop_down: r.active_prop[0],
prop_up: r.active_prop[1],
direction,
p_value: r.p_value[2],
fdr: f64::NAN,
p_value_mixed: r.p_value[3],
fdr_mixed: f64::NAN,
});
}
let midp_adj = if midp { 0.5 / (nrot as f64 + 1.0) } else { 0.0 };
let two_sided: Vec<f64> = rows.iter().map(|r| r.p_value - midp_adj).collect();
let mixed: Vec<f64> = rows.iter().map(|r| r.p_value_mixed - midp_adj).collect();
let mut fdr = crate::toptable::p_adjust_bh(&two_sided);
let mut fdr_mixed = crate::toptable::p_adjust_bh(&mixed);
if midp {
for (i, r) in rows.iter().enumerate() {
fdr[i] = fdr[i].max(r.p_value);
fdr_mixed[i] = fdr_mixed[i].max(r.p_value_mixed);
}
}
for (r, (f, fm)) in rows.iter_mut().zip(fdr.into_iter().zip(fdr_mixed)) {
r.fdr = f;
r.fdr_mixed = fm;
}
match sort {
FrySort::Directional => rows.sort_by(|a, b| {
a.p_value
.partial_cmp(&b.p_value)
.unwrap()
.then(
b.prop_up
.max(b.prop_down)
.partial_cmp(&a.prop_up.max(a.prop_down))
.unwrap(),
)
.then(b.n_genes.cmp(&a.n_genes))
.then(a.p_value_mixed.partial_cmp(&b.p_value_mixed).unwrap())
}),
FrySort::Mixed => rows.sort_by(|a, b| {
a.p_value_mixed
.partial_cmp(&b.p_value_mixed)
.unwrap()
.then(
(b.prop_up + b.prop_down)
.partial_cmp(&(a.prop_up + a.prop_down))
.unwrap(),
)
.then(b.n_genes.cmp(&a.n_genes))
.then(a.p_value.partial_cmp(&b.p_value).unwrap())
}),
FrySort::NoSort => {}
}
Ok(rows)
}
#[derive(Clone, Debug)]
pub struct RomerRow {
pub set: usize,
pub n_genes: usize,
pub p_up: f64,
pub p_down: f64,
pub p_mixed: f64,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum RomerAlternative {
Up,
Down,
Mixed,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum RomerStatistic {
Mean,
FloorMean,
Mean50,
}
#[allow(clippy::too_many_arguments)]
pub fn romer(
exprs: &Array2<f64>,
design: &Array2<f64>,
coef: usize,
index: &[Vec<usize>],
set_statistic: RomerStatistic,
nrot: usize,
shrink_resid: bool,
rng: &mut RRng,
) -> Result<Vec<RomerRow>> {
let g = exprs.nrows();
let n = exprs.ncols();
let p = design.ncols();
let d = (n - p) as f64;
let p0 = p - 1;
let neff = n - p0;
let design = move_coef_last(design, coef);
let qfull = qr_full_q(&design);
let (_, rmat) = qr_econ(&design);
let full = qfull.t().dot(&exprs.t()); let signc = if rmat[[p - 1, p - 1]] < 0.0 {
-1.0
} else {
1.0
};
let mut s2 = Array1::<f64>::zeros(g);
for (gi, s) in s2.iter_mut().enumerate() {
let mut acc = 0.0;
for k in p..n {
let e = full[[k, gi]];
acc += e * e;
}
*s = acc / d;
}
let sv = squeeze_var(&s2, &Array1::from_elem(g, d), None, false)?;
let d0 = sv.df_prior[0];
let s02 = sv.var_prior[0];
let mut ymat = Array2::<f64>::zeros((g, neff));
let mut yy = vec![0.0; g];
let mut modt = vec![0.0; g];
for gi in 0..g {
let mut acc = 0.0;
for k in 0..neff {
let e = full[[p0 + k, gi]];
ymat[[gi, k]] = e;
acc += e * e;
}
yy[gi] = acc;
modt[gi] = signc * ymat[[gi, 0]] / sv.var_post[gi].sqrt();
}
if shrink_resid {
let pvals: Vec<f64> = modt
.iter()
.map(|&m| 2.0 * pt_upper(m.abs(), d0 + d))
.collect();
let proportion = 1.0 - prop_true_null(&pvals, PropTrueNullMethod::Lfdr, 20);
let stdev_unscaled = 1.0 / rmat[[p - 1, p - 1]].abs();
let var_unscaled = stdev_unscaled * stdev_unscaled;
let df_total = d + d0;
let var_prior_lim = (0.01 / s02, 16.0 / s02);
let su = vec![stdev_unscaled; g];
let dt = vec![df_total; g];
let mut var_prior = tmixture_vector(&modt, &su, &dt, proportion, var_prior_lim);
if var_prior.is_nan() {
var_prior = 1.0 / s02;
}
let r = (var_unscaled + var_prior) / var_unscaled;
let logodds = (proportion / (1.0 - proportion)).ln() - r.ln() / 2.0;
for gi in 0..g {
let t2 = modt[gi] * modt[gi];
let kernel = if d0 > 1e6 {
t2 * (1.0 - 1.0 / r) / 2.0
} else {
(1.0 + df_total) / 2.0 * ((t2 + df_total) / (t2 / r + df_total)).ln()
};
let lods = logodds + kernel;
let prob_de = lods.exp() / (1.0 + lods.exp());
ymat[[gi, 0]] *= (var_unscaled / (var_unscaled + var_prior * prob_de)).sqrt();
}
}
let gf = g as f64;
let obs = romer_set_stats(&modt, index, gf, set_statistic);
let down_low = matches!(set_statistic, RomerStatistic::Mean50);
let mut draws = Vec::with_capacity(nrot * neff);
for _ in 0..nrot {
draws.extend_from_slice(&rng.rnorm(neff));
}
let ctx = RomerRotCtx {
draws: &draws,
neff,
ymat: &ymat,
yy: &yy,
index,
obs: &obs,
gf,
set_statistic,
d,
d0,
s02,
signc,
g,
down_low,
};
let nset = index.len();
let count: Vec<[i64; 3]> = {
#[cfg(feature = "parallel")]
{
use rayon::prelude::*;
(0..nrot)
.into_par_iter()
.fold(
|| (vec![[0i64; 3]; nset], vec![0.0f64; neff], vec![0.0f64; g]),
|(mut acc, mut rvec, mut modtr), r| {
ctx.add_rotation(r, &mut rvec, &mut modtr, &mut acc);
(acc, rvec, modtr)
},
)
.map(|(acc, _, _)| acc)
.reduce(
|| vec![[0i64; 3]; nset],
|mut a, b| {
for (x, y) in a.iter_mut().zip(&b) {
x[0] += y[0];
x[1] += y[1];
x[2] += y[2];
}
a
},
)
}
#[cfg(not(feature = "parallel"))]
{
let mut acc = vec![[0i64; 3]; nset];
let mut rvec = vec![0.0f64; neff];
let mut modtr = vec![0.0f64; g];
for r in 0..nrot {
ctx.add_rotation(r, &mut rvec, &mut modtr, &mut acc);
}
acc
}
};
let denom = nrot as f64 + 1.0;
Ok(index
.iter()
.enumerate()
.map(|(si, set)| RomerRow {
set: si,
n_genes: set.len(),
p_up: (count[si][0] as f64 + 1.0) / denom,
p_down: (count[si][1] as f64 + 1.0) / denom,
p_mixed: (count[si][2] as f64 + 1.0) / denom,
})
.collect())
}
struct RomerRotCtx<'a> {
draws: &'a [f64],
neff: usize,
ymat: &'a Array2<f64>,
yy: &'a [f64],
index: &'a [Vec<usize>],
obs: &'a [[f64; 3]],
gf: f64,
set_statistic: RomerStatistic,
d: f64,
d0: f64,
s02: f64,
signc: f64,
g: usize,
down_low: bool,
}
impl RomerRotCtx<'_> {
fn add_rotation(&self, r: usize, rvec: &mut [f64], modtr: &mut [f64], acc: &mut [[i64; 3]]) {
let row = &self.draws[r * self.neff..(r + 1) * self.neff];
let mut nrm = 0.0;
for (k, &v) in row.iter().enumerate() {
rvec[k] = v;
nrm += v * v;
}
let nrm = nrm.sqrt();
for v in rvec.iter_mut() {
*v /= nrm;
}
for (gi, m) in modtr.iter_mut().enumerate().take(self.g) {
let mut br = 0.0;
for (k, &rv) in rvec.iter().enumerate().take(self.neff) {
br += rv * self.ymat[[gi, k]];
}
let s2r = (self.yy[gi] - br * br) / self.d;
let sdr_post = if self.d0.is_finite() {
((self.d0 * self.s02 + self.d * s2r) / (self.d0 + self.d)).sqrt()
} else {
self.s02.sqrt()
};
*m = self.signc * br / sdr_post;
}
let rot = romer_set_stats(modtr, self.index, self.gf, self.set_statistic);
for (c, (o, rr)) in acc.iter_mut().zip(self.obs.iter().zip(&rot)) {
if rr[0] >= o[0] {
c[0] += 1;
}
let down_hit = if self.down_low {
rr[1] <= o[1]
} else {
rr[1] >= o[1]
};
if down_hit {
c[1] += 1;
}
if rr[2] >= o[2] {
c[2] += 1;
}
}
}
}
fn set_mean_ranks(stat: &[f64], index: &[Vec<usize>], gf: f64) -> Vec<[f64; 3]> {
let r = rank_average(stat);
let abs: Vec<f64> = stat.iter().map(|v| v.abs()).collect();
let ra = rank_average(&abs);
index
.iter()
.map(|set| {
let sz = set.len() as f64;
let mut up = 0.0;
let mut dn = 0.0;
let mut mx = 0.0;
for &gi in set {
up += r[gi];
dn += gf - r[gi] + 1.0;
mx += ra[gi];
}
[up / sz, dn / sz, mx / sz]
})
.collect()
}
fn romer_set_stats(
stat: &[f64],
index: &[Vec<usize>],
gf: f64,
set_statistic: RomerStatistic,
) -> Vec<[f64; 3]> {
match set_statistic {
RomerStatistic::Mean => set_mean_ranks(stat, index, gf),
RomerStatistic::FloorMean => {
let up_r = rank_average(&stat.iter().map(|&v| v.max(0.0)).collect::<Vec<_>>());
let dn_r = rank_average(&stat.iter().map(|&v| (-v).max(0.0)).collect::<Vec<_>>());
let mx_r = rank_average(&stat.iter().map(|&v| v.abs().max(1.0)).collect::<Vec<_>>());
index
.iter()
.map(|set| {
let sz = set.len() as f64;
let mut up = 0.0;
let mut dn = 0.0;
let mut mx = 0.0;
for &gi in set {
up += up_r[gi];
dn += dn_r[gi];
mx += mx_r[gi];
}
[up / sz, dn / sz, mx / sz]
})
.collect()
}
RomerStatistic::Mean50 => {
let r = rank_average(stat);
let ra = rank_average(&stat.iter().map(|&v| v.abs()).collect::<Vec<_>>());
index
.iter()
.map(|set| {
let m = set.len().div_ceil(2); let r_set: Vec<f64> = set.iter().map(|&gi| r[gi]).collect();
let ra_set: Vec<f64> = set.iter().map(|&gi| ra[gi]).collect();
let (small, large) = mean_half(&r_set, m);
let (_, large_abs) = mean_half(&ra_set, m);
[large, small, large_abs]
})
.collect()
}
}
}
fn mean_half(x: &[f64], n: usize) -> (f64, f64) {
let l = x.len();
let mut a = x.to_vec();
a.sort_by(|p, q| p.partial_cmp(q).unwrap());
let small = a[..n].iter().sum::<f64>() / n as f64;
let large = if l % 2 == 0 {
a[n..].iter().sum::<f64>() / (l - n) as f64
} else {
a[(n - 1)..].iter().sum::<f64>() / (l - n + 1) as f64
};
(small, large)
}
pub fn top_romer(rows: &[RomerRow], n: usize, alternative: RomerAlternative) -> Vec<RomerRow> {
let mut idx: Vec<usize> = (0..rows.len()).collect();
let key = |r: &RomerRow| match alternative {
RomerAlternative::Up => r.p_up,
RomerAlternative::Down => r.p_down,
RomerAlternative::Mixed => r.p_mixed,
};
idx.sort_by(|&a, &b| {
let primary = key(&rows[a]).partial_cmp(&key(&rows[b])).unwrap();
let secondary = match alternative {
RomerAlternative::Mixed => rows[a]
.p_up
.min(rows[a].p_down)
.partial_cmp(&rows[b].p_up.min(rows[b].p_down))
.unwrap(),
_ => rows[a].p_mixed.partial_cmp(&rows[b].p_mixed).unwrap(),
};
primary
.then(secondary)
.then(rows[b].n_genes.cmp(&rows[a].n_genes))
});
idx.into_iter()
.take(n.min(rows.len()))
.map(|i| rows[i].clone())
.collect()
}
pub fn ids2indices(
gene_sets: &[Vec<String>],
identifiers: &[String],
remove_empty: bool,
) -> Vec<Vec<usize>> {
let mut out = Vec::with_capacity(gene_sets.len());
for set in gene_sets {
let want: HashSet<&str> = set.iter().map(|s| s.as_str()).collect();
let idx: Vec<usize> = identifiers
.iter()
.enumerate()
.filter_map(|(i, id)| want.contains(id.as_str()).then_some(i))
.collect();
if remove_empty && idx.is_empty() {
continue;
}
out.push(idx);
}
out
}
#[cfg(test)]
mod tests {
use super::*;
fn fixture() -> Vec<f64> {
vec![
2.1, -0.5, 1.8, 0.3, -1.2, 2.5, -0.1, 1.1, -2.2, 0.7, 1.8, -0.9, 0.4, -1.5, 2.0, -0.3,
1.3, -0.8, 0.6, -2.1,
]
}
fn up_set() -> Vec<usize> {
vec![0, 2, 5, 7, 14, 16]
}
#[test]
fn rank_average_handles_ties() {
let r = rank_average(&fixture());
assert_eq!(r[2], r[10]);
}
#[test]
fn gene_set_test_matches_r() {
let stats = fixture();
let idx = up_set();
let cases = [
(Alternative::Up, 0.000645718763498011),
(Alternative::Down, 0.999517239270778),
(Alternative::Either, 0.00129143752699602),
(Alternative::Mixed, 0.0143292516670446),
];
for (alt, want) in cases {
let got = gene_set_test(&idx, &stats, alt);
assert!(
(got - want).abs() < 1e-9,
"gene_set_test({alt:?}): got {got}, want {want}"
);
}
let w = wilcox_gst(&idx, &stats);
assert!((w - 0.0143292516670446).abs() < 1e-9);
}
#[test]
fn rank_sum_test_matches_r() {
let stats = fixture();
let idx = up_set();
let (less, greater) = rank_sum_test_with_correlation(&idx, &stats, 0.1, 10.0);
assert!((less - 0.991665460749303).abs() < 1e-9);
assert!((greater - 0.0094257162710415).abs() < 1e-9);
let (less, greater) = rank_sum_test_with_correlation(&idx, &stats, 0.0, f64::INFINITY);
assert!((less - 0.999517239270778).abs() < 1e-9);
assert!((greater - 0.000645718763498011).abs() < 1e-9);
let idx2 = [1, 4, 8, 13, 19];
let (less, greater) = rank_sum_test_with_correlation(&idx2, &stats, 0.25, 18.0);
assert!((less - 0.0152351621428473).abs() < 1e-9);
assert!((greater - 0.986720588745113).abs() < 1e-9);
}
fn three_sets() -> Vec<Vec<usize>> {
vec![
vec![0, 2, 5, 7, 14, 16], vec![1, 4, 8, 13, 19], vec![3, 6, 9, 12], ]
}
#[test]
fn camera_pr_parametric_matches_r() {
let stat = fixture();
let sets = three_sets();
let rows = camera_pr(&stat, &sets, 0.01, false, true);
let want = [
(
0usize,
6usize,
Direction::Up,
0.000305279883783743,
0.000489203611923099,
),
(
1,
5,
Direction::Down,
0.000326135741282066,
0.000489203611923099,
),
(2, 4, Direction::Up, 0.91114902618042, 0.91114902618042),
];
assert_eq!(rows.len(), want.len());
for (r, (set, ng, dir, p, fdr)) in rows.iter().zip(want) {
assert_eq!(r.set, set);
assert_eq!(r.n_genes, ng);
assert_eq!(r.direction, dir);
assert!(
(r.p_value - p).abs() < 1e-9,
"p: got {}, want {p}",
r.p_value
);
assert!((r.fdr - fdr).abs() < 1e-9, "fdr: got {}, want {fdr}", r.fdr);
}
}
#[test]
fn camera_pr_use_ranks_matches_r() {
let stat = fixture();
let sets = three_sets();
let rows = camera_pr(&stat, &sets, 0.01, true, true);
let want = [
(
0usize,
Direction::Up,
0.00153858324497317,
0.00385566266453651,
),
(1, Direction::Down, 0.00257044177635767, 0.00385566266453651),
(2, Direction::Up, 0.962711741316641, 0.962711741316641),
];
for (r, (set, dir, p, fdr)) in rows.iter().zip(want) {
assert_eq!(r.set, set);
assert_eq!(r.direction, dir);
assert!(
(r.p_value - p).abs() < 1e-9,
"p: got {}, want {p}",
r.p_value
);
assert!((r.fdr - fdr).abs() < 1e-9, "fdr: got {}, want {fdr}", r.fdr);
}
}
fn camera_exprs() -> Array2<f64> {
Array2::from_shape_vec(
(12, 6),
vec![
4.871, 4.629, 4.697, 5.807, 4.798, 5.195, 6.356, 6.349, 6.764, 4.125, 3.125, 4.752, 4.298, 4.659, 4.508, 5.936, 4.075, 7.367, 8.896, 9.420, 8.915, 9.165, 9.466, 8.598, 6.563, 6.610, 6.813, 6.123, 6.155, 7.309, 4.443, 4.283, 3.851, 5.435, 5.304, 5.784, 7.247, 7.184, 7.620, 6.533, 7.878, 6.820, 7.456, 7.644, 8.368, 9.096, 7.422, 10.245, 7.229, 6.945, 6.986, 8.178, 7.445, 10.159, 5.378, 5.177, 4.919, 7.692, 6.023, 7.432, 8.748, 9.133, 9.280, 9.431, 10.394, 11.954, 6.697, 7.010, 6.719, 4.293, 3.114, 5.796, ],
)
.unwrap()
}
fn camera_design() -> Array2<f64> {
Array2::from_shape_vec(
(6, 2),
vec![1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
)
.unwrap()
}
fn camera_sets() -> Vec<Vec<usize>> {
vec![
vec![0, 1, 2, 3], vec![4, 5, 6, 7, 8], vec![9, 10, 11], ]
}
#[test]
fn inter_gene_correlation_matches_r() {
let (vif, cor) = inter_gene_correlation(&camera_exprs(), &camera_design());
assert!((vif - 3.54050719052897).abs() < 1e-9, "vif: {vif}");
assert!((cor - 0.230955199138998).abs() < 1e-9, "cor: {cor}");
}
#[test]
fn camera_parametric_matches_r() {
let rows = camera(
&camera_exprs(),
&camera_design(),
1,
&camera_sets(),
0.01,
false,
true,
)
.unwrap();
let want = [
(
0usize,
4usize,
Direction::Down,
0.42121952380793,
0.753808705609041,
),
(1, 5, Direction::Up, 0.502539137072694, 0.753808705609041),
(2, 3, Direction::Up, 0.916115986180527, 0.916115986180527),
];
assert_eq!(rows.len(), want.len());
for (r, (set, ng, dir, p, fdr)) in rows.iter().zip(want) {
assert_eq!(r.set, set);
assert_eq!(r.n_genes, ng);
assert_eq!(r.direction, dir);
assert!(
(r.p_value - p).abs() < 1e-7,
"p: got {}, want {p}",
r.p_value
);
assert!((r.fdr - fdr).abs() < 1e-7, "fdr: got {}, want {fdr}", r.fdr);
}
}
#[test]
fn camera_use_ranks_matches_r() {
let rows = camera(
&camera_exprs(),
&camera_design(),
1,
&camera_sets(),
0.01,
true,
true,
)
.unwrap();
let want = [
(
0usize,
Direction::Down,
0.354526685759271,
0.693805951243274,
),
(2, Direction::Up, 0.462537300828849, 0.693805951243274),
(1, Direction::Up, 0.872315053291437, 0.872315053291437),
];
assert_eq!(rows.len(), want.len());
for (r, (set, dir, p, fdr)) in rows.iter().zip(want) {
assert_eq!(r.set, set);
assert_eq!(r.direction, dir);
assert!(
(r.p_value - p).abs() < 1e-7,
"p: got {}, want {p}",
r.p_value
);
assert!((r.fdr - fdr).abs() < 1e-7, "fdr: got {}, want {fdr}", r.fdr);
}
}
#[test]
fn fry_matches_r() {
let rows = fry(
&camera_exprs(),
&camera_design(),
1,
&camera_sets(),
FrySort::Directional,
)
.unwrap();
let want = [
(
1usize,
5usize,
Direction::Up,
0.124433966893834,
0.373301900681503,
0.0667070665318511,
0.0667070665318511,
),
(
0,
4,
Direction::Down,
0.44028222758847,
0.45071371571786,
0.00113516116620128,
0.00170274174930192,
),
(
2,
3,
Direction::Up,
0.45071371571786,
0.45071371571786,
0.000139022937183932,
0.000417068811551796,
),
];
assert_eq!(rows.len(), want.len());
for (r, (set, ng, dir, p, fdr, pm, fdrm)) in rows.iter().zip(want) {
assert_eq!(r.set, set);
assert_eq!(r.n_genes, ng);
assert_eq!(r.direction, dir);
assert!(
(r.p_value - p).abs() < 1e-6,
"p: got {}, want {p}",
r.p_value
);
assert!((r.fdr - fdr).abs() < 1e-6, "fdr: got {}, want {fdr}", r.fdr);
assert!(
(r.p_value_mixed - pm).abs() < 1e-6,
"pm: got {}, want {pm}",
r.p_value_mixed
);
assert!(
(r.fdr_mixed - fdrm).abs() < 1e-6,
"fdrm: got {}, want {fdrm}",
r.fdr_mixed
);
}
}
#[test]
fn ids2indices_maps_and_drops_empty() {
let ids: Vec<String> = ["a", "b", "c", "d", "e"]
.iter()
.map(|s| s.to_string())
.collect();
let sets = vec![
vec!["b".to_string(), "d".to_string()],
vec!["x".to_string()],
vec!["a".to_string(), "e".to_string(), "c".to_string()],
];
let with_empty = ids2indices(&sets, &ids, false);
assert_eq!(with_empty, vec![vec![1, 3], vec![], vec![0, 2, 4]]);
let without = ids2indices(&sets, &ids, true);
assert_eq!(without, vec![vec![1, 3], vec![0, 2, 4]]);
}
#[test]
#[allow(clippy::excessive_precision)]
fn roast_matches_r() {
let g = 50usize;
let n = 6usize;
let y_data = RRng::new(2024).rnorm(g * n);
let y = Array2::from_shape_vec((n, g), y_data)
.unwrap()
.t()
.to_owned();
let design = Array2::from_shape_vec(
(2, n),
vec![1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0],
)
.unwrap()
.t()
.to_owned();
let check = |tag: &str,
seed: i32,
index: &[usize],
nrot: usize,
want_active: [f64; 4],
want_p: [f64; 4]| {
let mut rng = RRng::new(seed);
let out = roast(&y, &design, 1, index, nrot, &mut rng).unwrap();
assert_eq!(out.n_genes_in_set, index.len());
for i in 0..4 {
assert!(
(out.active_prop[i] - want_active[i]).abs() < 1e-12,
"{tag} active[{i}]: got {}, want {}",
out.active_prop[i],
want_active[i]
);
assert!(
(out.p_value[i] - want_p[i]).abs() < 1e-12,
"{tag} p[{i}]: got {}, want {}",
out.p_value[i],
want_p[i]
);
}
};
let idx_a: Vec<usize> = (0..10).collect();
check(
"A",
99,
&idx_a,
1999,
[0.1, 0.0, 0.1, 0.1],
[0.47211802950737686, 0.52813203300825207, 0.944, 0.344],
);
let idx_b: Vec<usize> = (10..35).collect();
check(
"B",
7,
&idx_b,
1999,
[0.0, 0.12, 0.12, 0.12],
[0.90547636909227303, 0.094773693423355843, 0.1895, 0.384],
);
check(
"C",
123,
&idx_a,
999,
[0.1, 0.0, 0.1, 0.1],
[0.47423711855927964, 0.52626313156578286, 0.948, 0.364],
);
}
#[test]
#[allow(clippy::excessive_precision)]
fn mroast_matches_r() {
let g = 50usize;
let n = 6usize;
let y = Array2::from_shape_vec((n, g), RRng::new(2024).rnorm(g * n))
.unwrap()
.t()
.to_owned();
let design = Array2::from_shape_vec(
(2, n),
vec![1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0],
)
.unwrap()
.t()
.to_owned();
let index: Vec<Vec<usize>> = vec![
(0..10).collect(),
(10..35).collect(),
(4..15).chain(39..50).collect(),
(19..24).collect(),
];
let mut rng = RRng::new(314);
let tab = mroast(
&y,
&design,
1,
&index,
1999,
true,
FrySort::NoSort,
&mut rng,
)
.unwrap();
let want_ngenes = [10usize, 25, 22, 5];
let want_propdown = [0.1, 0.0, 0.090909090909090912, 0.0];
let want_propup = [0.0, 0.12, 0.090909090909090912, 0.4];
let want_dir = [
Direction::Down,
Direction::Up,
Direction::Down,
Direction::Up,
];
let want_p = [0.943, 0.1875, 0.6645, 0.124];
let want_fdr = [0.943, 0.3745, 0.8856666666666666, 0.3745];
let want_pm = [0.348, 0.403, 0.5915, 0.1265];
let want_fdrm = [0.537, 0.537, 0.5915, 0.505];
for i in 0..4 {
assert_eq!(tab[i].set, i, "row {i} set");
assert_eq!(tab[i].n_genes, want_ngenes[i], "row {i} ngenes");
assert_eq!(tab[i].direction, want_dir[i], "row {i} direction");
assert!(
(tab[i].prop_down - want_propdown[i]).abs() < 1e-12,
"row {i} propdown: got {}, want {}",
tab[i].prop_down,
want_propdown[i]
);
assert!(
(tab[i].prop_up - want_propup[i]).abs() < 1e-12,
"row {i} propup: got {}, want {}",
tab[i].prop_up,
want_propup[i]
);
assert!(
(tab[i].p_value - want_p[i]).abs() < 1e-12,
"row {i} pvalue: got {}, want {}",
tab[i].p_value,
want_p[i]
);
assert!(
(tab[i].fdr - want_fdr[i]).abs() < 1e-12,
"row {i} fdr: got {}, want {}",
tab[i].fdr,
want_fdr[i]
);
assert!(
(tab[i].p_value_mixed - want_pm[i]).abs() < 1e-12,
"row {i} pvalue_mixed: got {}, want {}",
tab[i].p_value_mixed,
want_pm[i]
);
assert!(
(tab[i].fdr_mixed - want_fdrm[i]).abs() < 1e-12,
"row {i} fdr_mixed: got {}, want {}",
tab[i].fdr_mixed,
want_fdrm[i]
);
}
let mut rng = RRng::new(314);
let td = mroast(
&y,
&design,
1,
&index,
1999,
true,
FrySort::Directional,
&mut rng,
)
.unwrap();
let order_d: Vec<usize> = td.iter().map(|r| r.set).collect();
assert_eq!(order_d, vec![3, 1, 2, 0], "directional order");
let mut rng = RRng::new(314);
let tm = mroast(&y, &design, 1, &index, 1999, true, FrySort::Mixed, &mut rng).unwrap();
let order_m: Vec<usize> = tm.iter().map(|r| r.set).collect();
assert_eq!(order_m, vec![3, 0, 1, 2], "mixed order");
}
#[test]
#[allow(clippy::excessive_precision)]
fn romer_matches_r() {
let g = 50usize;
let n = 6usize;
let y = Array2::from_shape_vec((n, g), RRng::new(2024).rnorm(g * n))
.unwrap()
.t()
.to_owned();
let design = Array2::from_shape_vec(
(2, n),
vec![1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0],
)
.unwrap()
.t()
.to_owned();
let index: Vec<Vec<usize>> = vec![
(0..10).collect(),
(10..35).collect(),
(4..15).chain(39..50).collect(),
(19..24).collect(),
];
let mut rng = RRng::new(271);
let tab = romer(
&y,
&design,
1,
&index,
RomerStatistic::Mean,
999,
true,
&mut rng,
)
.unwrap();
let want_ngenes = [10usize, 25, 22, 5];
let want_up = [0.563, 0.476, 0.987, 0.492];
let want_down = [0.441, 0.527, 0.016, 0.526];
let want_mixed = [0.231, 0.341, 0.632, 0.139];
for i in 0..4 {
assert_eq!(tab[i].set, i, "row {i} set");
assert_eq!(tab[i].n_genes, want_ngenes[i], "row {i} ngenes");
assert!(
(tab[i].p_up - want_up[i]).abs() < 1e-12,
"row {i} up: got {}, want {}",
tab[i].p_up,
want_up[i]
);
assert!(
(tab[i].p_down - want_down[i]).abs() < 1e-12,
"row {i} down: got {}, want {}",
tab[i].p_down,
want_down[i]
);
assert!(
(tab[i].p_mixed - want_mixed[i]).abs() < 1e-12,
"row {i} mixed: got {}, want {}",
tab[i].p_mixed,
want_mixed[i]
);
}
let order = |a: RomerAlternative| -> Vec<usize> {
top_romer(&tab, 4, a).iter().map(|r| r.set).collect()
};
assert_eq!(order(RomerAlternative::Up), vec![1, 3, 0, 2], "top up");
assert_eq!(order(RomerAlternative::Down), vec![2, 0, 3, 1], "top down");
assert_eq!(
order(RomerAlternative::Mixed),
vec![3, 0, 1, 2],
"top mixed"
);
let check_stat =
|stat: RomerStatistic, up: [i64; 4], down: [i64; 4], mixed: [i64; 4], tag: &str| {
let mut rng = RRng::new(271);
let t = romer(&y, &design, 1, &index, stat, 999, true, &mut rng).unwrap();
let p = |c: i64| (c as f64 + 1.0) / 1000.0;
for i in 0..4 {
assert!((t[i].p_up - p(up[i])).abs() < 1e-12, "{tag} row {i} up");
assert!(
(t[i].p_down - p(down[i])).abs() < 1e-12,
"{tag} row {i} down"
);
assert!(
(t[i].p_mixed - p(mixed[i])).abs() < 1e-12,
"{tag} row {i} mixed"
);
}
};
check_stat(
RomerStatistic::FloorMean,
[426, 505, 925, 452],
[477, 367, 121, 169],
[900, 366, 666, 201],
"floormean",
);
check_stat(
RomerStatistic::Mean50,
[479, 351, 935, 329],
[361, 396, 109, 165],
[690, 246, 707, 169],
"mean50",
);
}
#[test]
#[allow(clippy::excessive_precision)]
fn contrast_as_coef_matches_r() {
let design = Array2::from_shape_vec(
(3, 6),
vec![
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, ],
)
.unwrap()
.t()
.to_owned();
let check = |out: &ContrastAsCoef, rank: usize, coef: &[usize], cm: &[f64]| {
assert_eq!(out.rank, rank, "rank");
assert_eq!(out.coef.as_slice(), coef, "coef");
let n = out.design.nrows();
let p = out.design.ncols();
let mut flat = Vec::with_capacity(n * p);
for j in 0..p {
for i in 0..n {
flat.push(out.design[[i, j]]);
}
}
assert_eq!(flat.len(), cm.len(), "design length");
for (idx, (&a, &b)) in flat.iter().zip(cm).enumerate() {
assert!((a - b).abs() < 1e-12, "design[{idx}]: {a} vs {b}");
}
};
let v1 = Array2::from_shape_vec((3, 1), vec![0.0, 1.0, -1.0]).unwrap();
check(
&contrast_as_coef(&design, &v1, true).unwrap(),
1,
&[0],
&[
0.0,
0.0,
0.49999999999999994,
0.49999999999999994,
-0.49999999999999994,
-0.49999999999999994,
-0.70710678118654746,
-0.70710678118654746,
-0.20710678118654746,
-0.20710678118654746,
-0.20710678118654754,
-0.20710678118654754,
0.70710678118654746,
0.70710678118654746,
1.2071067811865475,
1.2071067811865475,
1.2071067811865475,
1.2071067811865475,
],
);
check(
&contrast_as_coef(&design, &v1, false).unwrap(),
1,
&[2],
&[
-0.70710678118654746,
-0.70710678118654746,
-0.20710678118654746,
-0.20710678118654746,
-0.20710678118654754,
-0.20710678118654754,
0.70710678118654746,
0.70710678118654746,
1.2071067811865475,
1.2071067811865475,
1.2071067811865475,
1.2071067811865475,
0.0,
0.0,
0.49999999999999994,
0.49999999999999994,
-0.49999999999999994,
-0.49999999999999994,
],
);
let m2 = Array2::from_shape_vec((3, 2), vec![0.0, -2.0, 1.0, 1.0, -1.0, 1.0]).unwrap();
check(
&contrast_as_coef(&design, &m2, true).unwrap(),
2,
&[0, 1],
&[
0.0,
0.0,
0.49999999999999994,
0.49999999999999994,
-0.49999999999999994,
-0.49999999999999994,
-0.33333333333333337,
-0.33333333333333337,
-0.16666666666666663,
-0.16666666666666663,
-0.16666666666666669,
-0.16666666666666669,
0.57735026918962573,
0.57735026918962573,
1.1547005383792515,
1.1547005383792515,
1.1547005383792515,
1.1547005383792515,
],
);
check(
&contrast_as_coef(&design, &m2, false).unwrap(),
2,
&[1, 2],
&[
0.57735026918962573,
0.57735026918962573,
1.1547005383792515,
1.1547005383792515,
1.1547005383792515,
1.1547005383792515,
0.0,
0.0,
0.49999999999999994,
0.49999999999999994,
-0.49999999999999994,
-0.49999999999999994,
-0.33333333333333337,
-0.33333333333333337,
-0.16666666666666663,
-0.16666666666666663,
-0.16666666666666669,
-0.16666666666666669,
],
);
}
}