gsva-rust 0.1.0

Pure-Rust port of the GSVA family of gene-set enrichment methods (GSVA, ssGSEA, z-score, PLAGE), validated for numeric parity against the Bioconductor GSVA package.
Documentation
//! R-faithful ranking helpers.
//!
//! GSVA-family methods rank genes within each sample. These helpers reproduce
//! R's `order` (stable, ascending) and `rank` (average ties — R's default
//! `ties.method = "average"`). Inputs are assumed finite (expression values);
//! ordering uses [`f64::total_cmp`] so the result is deterministic regardless.

/// Indices that sort `x` ascending, ties broken by original position.
///
/// Equivalent to R's `order(x)` (minus the 1-based offset): a stable ascending
/// sort, so equal values keep their input order.
pub fn order_increasing(x: &[f64]) -> Vec<usize> {
    let mut idx: Vec<usize> = (0..x.len()).collect();
    idx.sort_by(|&a, &b| x[a].total_cmp(&x[b]).then(a.cmp(&b)));
    idx
}

/// Indices that sort `x` descending, ties broken by ascending original
/// position. Equivalent to R's `order(x, decreasing = TRUE)` (minus the 1-based
/// offset): R's default radix sort is stable, so tied values keep their input
/// order even when the sort is reversed (verified against R 4.6.0).
pub fn order_decreasing(x: &[f64]) -> Vec<usize> {
    let mut idx: Vec<usize> = (0..x.len()).collect();
    idx.sort_by(|&a, &b| x[b].total_cmp(&x[a]).then(a.cmp(&b)));
    idx
}

/// Integer ranks of `x`, 1-based, ascending, with ties broken by R's
/// `ties.method = "last"`: among equal values the *earlier*-occurring element
/// receives the *larger* rank (the reverse of `"first"`).
///
/// This is the tie rule GSVA's `compute.col.ranks` uses (`colRanks(Z,
/// ties.method = "last")`). Verified against R 4.6.0:
/// `rank(c(0.2, 0.9, 0.5, 0.9, 0.1), ties.method = "last") == c(2, 5, 3, 4, 1)`.
pub fn rank_last(x: &[f64]) -> Vec<usize> {
    // Sort ascending by value; for ties, the larger original index sorts first
    // so it receives the smaller rank, i.e. the earlier index gets the larger
    // rank — exactly R's "last" rule.
    let mut idx: Vec<usize> = (0..x.len()).collect();
    idx.sort_by(|&a, &b| x[a].total_cmp(&x[b]).then(b.cmp(&a)));
    let mut ranks = vec![0usize; x.len()];
    for (r, &i) in idx.iter().enumerate() {
        ranks[i] = r + 1;
    }
    ranks
}

/// Average ranks of `x`, 1-based, with tied values sharing the mean of the
/// ranks they span. Equivalent to R's `rank(x)` with `ties.method = "average"`.
pub fn rank_average(x: &[f64]) -> Vec<f64> {
    let n = x.len();
    let order = order_increasing(x);
    let mut ranks = vec![0.0f64; n];
    let mut i = 0;
    while i < n {
        let mut j = i;
        while j + 1 < n && x[order[j + 1]] == x[order[i]] {
            j += 1;
        }
        // 0-based positions i..=j map to 1-based ranks (i+1)..=(j+1).
        let avg = ((i + 1) + (j + 1)) as f64 / 2.0;
        for &pos in &order[i..=j] {
            ranks[pos] = avg;
        }
        i = j + 1;
    }
    ranks
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn order_is_stable_ascending() {
        // R: order(c(3, 1, 2, 1)) == c(2, 4, 3, 1)  (1-based; ties keep order)
        let o = order_increasing(&[3.0, 1.0, 2.0, 1.0]);
        assert_eq!(o, [1, 3, 2, 0]);
    }

    #[test]
    fn rank_average_ties() {
        // R: rank(c(3, 1, 2, 1)) == c(4.0, 1.5, 3.0, 1.5)
        let r = rank_average(&[3.0, 1.0, 2.0, 1.0]);
        assert_eq!(r, [4.0, 1.5, 3.0, 1.5]);
    }

    #[test]
    fn rank_no_ties() {
        // R: rank(c(10, 30, 20)) == c(1, 3, 2)
        let r = rank_average(&[10.0, 30.0, 20.0]);
        assert_eq!(r, [1.0, 3.0, 2.0]);
    }

    #[test]
    fn rank_last_ties() {
        // R: rank(c(0.2, 0.9, 0.5, 0.9, 0.1), ties.method = "last") == c(2,5,3,4,1)
        let r = rank_last(&[0.2, 0.9, 0.5, 0.9, 0.1]);
        assert_eq!(r, [2, 5, 3, 4, 1]);
        // R: rank(c(7, 7, 7), ties.method = "last") == c(3, 2, 1)
        let r = rank_last(&[7.0, 7.0, 7.0]);
        assert_eq!(r, [3, 2, 1]);
        // No ties: ordinary ascending integer ranks.
        let r = rank_last(&[10.0, 30.0, 20.0]);
        assert_eq!(r, [1, 3, 2]);
    }

    #[test]
    fn order_decreasing_stable_ties() {
        // R: order(c(5, 3, 5, 3), decreasing = TRUE) == c(1, 3, 2, 4)
        let o = order_decreasing(&[5.0, 3.0, 5.0, 3.0]);
        assert_eq!(o, [0, 2, 1, 3]);
        // R: order(c(2, 2, 2, 1, 3), decreasing = TRUE) == c(5, 1, 2, 3, 4)
        let o = order_decreasing(&[2.0, 2.0, 2.0, 1.0, 3.0]);
        assert_eq!(o, [4, 0, 1, 2, 3]);
    }
}