use crate::matrix::ExprMatrix;
use crate::par;
use crate::prep;
use crate::rank;
use crate::result::EnrichmentResult;
#[derive(Clone, Debug)]
pub struct SsgseaParams {
pub min_size: usize,
pub max_size: usize,
pub alpha: f64,
pub normalize: bool,
}
impl Default for SsgseaParams {
fn default() -> Self {
SsgseaParams {
min_size: 1,
max_size: usize::MAX,
alpha: 0.25,
normalize: true,
}
}
}
pub fn ssgsea(
expr: &ExprMatrix,
gene_sets: &crate::geneset::GeneSets,
params: &SsgseaParams,
) -> EnrichmentResult {
let (names, indices) =
prep::map_and_filter_sets(expr, gene_sets, params.min_size, params.max_size);
let p = expr.nrow(); let nsamp = expr.ncol();
let pf = p as f64;
let total_weight = pf * (pf + 1.0) / 2.0;
let unit_alpha = params.alpha == 1.0;
let nsets = names.len();
let columns: Vec<Vec<f64>> = par::map_collect(nsamp, |j| {
let col: Vec<f64> = (0..p).map(|i| expr.get(i, j)).collect();
let ranks = rank::rank_average(&col);
let r_int: Vec<f64> = ranks.iter().map(|&r| r.trunc()).collect();
let ra: Vec<f64> = if unit_alpha {
r_int.clone()
} else {
r_int.iter().map(|&r| r.powf(params.alpha)).collect()
};
let order_desc = rank::order_decreasing(&r_int);
let mut pos = vec![0usize; p];
for (rank_pos, &g) in order_desc.iter().enumerate() {
pos[g] = rank_pos + 1;
}
indices
.iter()
.map(|idx| {
let k = idx.len();
let mut num_sum = 0.0f64; let mut num_comp = 0.0f64;
let mut den_sum = 0.0f64; let mut den_comp = 0.0f64;
let mut wsum = 0.0f64; for &g in idx {
let weight = (p - pos[g] + 1) as f64;
let ra_g = ra[g];
let term = ra_g * weight;
let t = num_sum + term;
if num_sum.abs() >= term.abs() {
num_comp += (num_sum - t) + term;
} else {
num_comp += (term - t) + num_sum;
}
num_sum = t;
let t = den_sum + ra_g;
if den_sum.abs() >= ra_g.abs() {
den_comp += (den_sum - t) + ra_g;
} else {
den_comp += (ra_g - t) + den_sum;
}
den_sum = t;
wsum += weight;
}
let step_in = (num_sum + num_comp) / (den_sum + den_comp);
let step_out = (total_weight - wsum) / ((p - k) as f64);
step_in - step_out
})
.collect()
});
let mut scores = vec![0.0f64; nsets * nsamp];
for (j, col) in columns.iter().enumerate() {
for (s, &v) in col.iter().enumerate() {
scores[s * nsamp + j] = v;
}
}
if params.normalize {
let mut mn = f64::INFINITY;
let mut mx = f64::NEG_INFINITY;
for &v in &scores {
if v < mn {
mn = v;
}
if v > mx {
mx = v;
}
}
let denom = mx - mn;
for v in &mut scores {
*v /= denom;
}
}
EnrichmentResult {
gene_sets: names,
samples: expr.col_names().to_vec(),
scores,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::geneset::{GeneSet, GeneSets};
#[test]
fn small_ssgsea_shapes() {
let expr = ExprMatrix::new(
vec!["G1".into(), "G2".into(), "G3".into(), "G4".into()],
vec!["S1".into(), "S2".into(), "S3".into()],
vec![
1.0, 2.0, 3.0, 4.0, 6.0, 8.0, 3.0, 2.0, 1.0, 10.0, 5.0, 1.0, ],
);
let gs = GeneSets::new(vec![GeneSet::new("SET", vec!["G1".into(), "G2".into()])]);
let res = ssgsea(&expr, &gs, &SsgseaParams::default());
assert_eq!(res.gene_sets, ["SET"]);
assert_eq!(res.samples, ["S1", "S2", "S3"]);
assert_eq!(res.scores.len(), 3);
}
}