use crate::geneset::GeneSets;
use crate::kcdf;
use crate::matrix::ExprMatrix;
use crate::par;
use crate::prep;
use crate::rank;
use crate::result::EnrichmentResult;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Kcdf {
Gaussian,
Poisson,
}
#[derive(Clone, Debug)]
pub struct GsvaParams {
pub min_size: usize,
pub max_size: usize,
pub tau: f64,
pub max_diff: bool,
pub abs_ranking: bool,
pub kcdf: Kcdf,
}
impl Default for GsvaParams {
fn default() -> Self {
GsvaParams {
min_size: 1,
max_size: usize::MAX,
tau: 1.0,
max_diff: true,
abs_ranking: false,
kcdf: Kcdf::Gaussian,
}
}
}
pub fn gsva(expr: &ExprMatrix, gene_sets: &GeneSets, params: &GsvaParams) -> EnrichmentResult {
let filtered = prep::filter_constant_rows(expr);
let (names, indices) =
prep::map_and_filter_sets(&filtered, gene_sets, params.min_size, params.max_size);
let p = filtered.nrow();
let nsamp = filtered.ncol();
let kcdf_vals = match params.kcdf {
Kcdf::Gaussian => kcdf::gaussian_left_tail(&filtered),
Kcdf::Poisson => kcdf::poisson_left_tail(&filtered),
};
let half_p = p as f64 / 2.0;
let nsets = names.len();
let columns: Vec<Vec<f64>> = par::map_collect(nsamp, |j| {
let col: Vec<f64> = (0..p).map(|g| kcdf_vals[g * nsamp + j]).collect();
let ranks = rank::rank_last(&col);
let mut dos = vec![0usize; p];
let mut srs = vec![0.0f64; p];
for g in 0..p {
let r = ranks[g];
dos[g] = p - r + 1;
srs[g] = (half_p - r as f64).abs();
}
indices
.iter()
.map(|members| walk_score(members, &dos, &srs, p, params))
.collect()
});
let mut scores = vec![0.0f64; nsets * nsamp];
for (j, col) in columns.iter().enumerate() {
for (s, &v) in col.iter().enumerate() {
scores[s * nsamp + j] = v;
}
}
EnrichmentResult {
gene_sets: names,
samples: expr.col_names().to_vec(),
scores,
}
}
fn walk_score(members: &[usize], dos: &[usize], srs: &[f64], n: usize, params: &GsvaParams) -> f64 {
let mut step_in = vec![0.0f64; n];
let mut step_out = vec![1i64; n];
let unit_tau = params.tau == 1.0;
for &g in members {
let pos = dos[g] - 1; step_in[pos] = if unit_tau {
srs[g]
} else {
srs[g].powf(params.tau)
};
step_out[pos] = 0;
}
for i in 1..n {
step_in[i] += step_in[i - 1];
step_out[i] += step_out[i - 1];
}
let in_last = step_in[n - 1];
let out_last = step_out[n - 1];
if !(in_last > 0.0 && out_last > 0) {
return f64::NAN;
}
let out_last_f = out_last as f64;
let mut walk_pos = 0.0f64;
let mut walk_neg = 0.0f64;
for i in 0..n {
let w = step_in[i] / in_last - step_out[i] as f64 / out_last_f;
if w > walk_pos {
walk_pos = w;
}
if w < walk_neg {
walk_neg = w;
}
}
if params.max_diff {
if params.abs_ranking {
walk_pos - walk_neg
} else {
walk_pos + walk_neg
}
} else if walk_pos > walk_neg.abs() {
walk_pos
} else {
walk_neg
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::geneset::{GeneSet, GeneSets};
#[test]
fn small_gsva_shapes() {
let expr = ExprMatrix::new(
vec!["G1".into(), "G2".into(), "G3".into(), "G4".into()],
vec!["S1".into(), "S2".into(), "S3".into()],
vec![
1.0, 2.0, 3.0, 4.0, 6.0, 8.0, 3.0, 2.0, 1.0, 10.0, 5.0, 2.0, ],
);
let gs = GeneSets::new(vec![GeneSet::new(
"SET",
vec!["G1".into(), "G2".into(), "G3".into()],
)]);
let res = gsva(&expr, &gs, &GsvaParams::default());
assert_eq!(res.gene_sets, ["SET"]);
assert_eq!(res.samples, ["S1", "S2", "S3"]);
assert_eq!(res.scores.len(), 3);
assert!(res.scores.iter().all(|v| v.is_finite()));
}
}