xcell-rust 0.1.0

//! Stage 3 of xCell: `spillOver`.
//!
//! Corrects for signal "spillover" between related cell types by solving, per
//! sample, a non-negative least-squares problem against the compensation
//! matrix `K`.

use gsva::EnrichmentResult;

use crate::data::SpillModel;

/// Mirrors xCell 1.1.0 `spillOver`. Scales the compensation matrix by `alpha`
/// and forces a unit diagonal (`K <- K * alpha; diag(K) <- 1`), then for each
/// sample solves `min_{x >= 0} || K x - transformed ||` over the cell types
/// shared between the transformed scores and `K`, clamping any residual
/// negatives to zero.
pub fn spill_over(
    transformed: &EnrichmentResult,
    spill: &SpillModel,
    alpha: f64,
) -> EnrichmentResult {
    let nsamp = transformed.samples.len();

    // rows <- rownames(transformed)[rownames(transformed) %in% rownames(K)]:
    // the cell types in `transformed` (in its order) that `K` also knows about.
    // For each we record its row in `transformed.scores` (the right-hand side)
    // and its row/column in `K` (the design matrix), so both stay aligned.
    let mut row_names = Vec::new();
    let mut t_rows = Vec::new();
    let mut k_rows = Vec::new();
    for (ti, ct) in transformed.gene_sets.iter().enumerate() {
        if let Some(ki) = spill.k.row_of(ct) {
            row_names.push(ct.clone());
            t_rows.push(ti);
            k_rows.push(ki);
        }
    }
    let n = row_names.len();

    // Build A = K[rows, rows] with off-diagonal entries scaled by alpha and the
    // diagonal forced to 1. R scales the whole matrix then overwrites the
    // diagonal (`K <- K * alpha; diag(K) <- 1`), so off-diagonals carry the
    // alpha factor and the diagonal is exactly 1 regardless of K's own diagonal.
    let mut a = vec![0.0f64; n * n];
    for (ri, &ki) in k_rows.iter().enumerate() {
        for (ci, &kj) in k_rows.iter().enumerate() {
            a[ri * n + ci] = if ri == ci {
                1.0
            } else {
                spill.k.get(ki, kj) * alpha
            };
        }
    }

    // Per-sample NNLS: the transformed scores for the `rows` cell types form the
    // right-hand side b; the solution is that sample's spillover-adjusted score.
    // Each sample is independent, so map over samples (in parallel under the
    // `parallel` feature) and reassemble in index order. A sample's NNLS is
    // deterministic and self-contained — it reads the shared design matrix `a`
    // read-only and owns its own `b` — so the result is bit-identical to a serial
    // sweep and to R, never an approximation.
    let per_sample: Vec<Vec<f64>> = crate::par::map_collect(nsamp, |j| {
        let mut b = vec![0.0f64; n];
        for (ri, &tr) in t_rows.iter().enumerate() {
            b[ri] = transformed.scores[tr * nsamp + j];
        }
        let x = nnls(&a, n, n, &b);
        // scores[scores < 0] <- 0. NNLS already guarantees x >= 0, but the R code
        // clamps unconditionally, so we do too for exact fidelity.
        x.iter()
            .map(|&xi| if xi < 0.0 { 0.0 } else { xi })
            .collect()
    });

    // Scatter each sample's solution into the column-major score block.
    let mut scores = vec![0.0f64; n * nsamp];
    for (j, x) in per_sample.iter().enumerate() {
        for (ri, &xi) in x.iter().enumerate() {
            scores[ri * nsamp + j] = xi;
        }
    }

    EnrichmentResult {
        gene_sets: row_names,
        samples: transformed.samples.clone(),
        scores,
    }
}

/// Lawson–Hanson non-negative least squares: solve `min_{x >= 0} ||A x - b||`
/// for the `m × n` row-major matrix `a` (row `i`, column `j` at `a[i*n + j]`)
/// and right-hand side `b` (length `m`); returns `x` (length `n`).
///
/// xCell calls `pracma::lsqlincon(C, d, lb = 0)`, whose only constraint is
/// `x >= 0` — i.e. exactly NNLS. `lsqlincon` reduces the least-squares problem
/// to the quadratic program `min ½ xᵀ(CᵀC)x − (Cᵀd)ᵀx` and hands it to
/// `quadprog`, so it works on the Gram matrix `CᵀC`; we do the same here.
/// Because xCell's `C = alpha*K` with unit diagonal is full column rank, `CᵀC`
/// is symmetric positive-definite and the objective is strictly convex: the
/// optimum is unique, so Lawson–Hanson and `quadprog` reach the same point (to
/// solver tolerance — this stage is the one place xCell-rust is not expected to
/// be bit-identical to R).
fn nnls(a: &[f64], m: usize, n: usize, b: &[f64]) -> Vec<f64> {
    // Normal-equation pieces: G = AᵀA (n×n) and c = Aᵀb (n). Working with these
    // keeps the passive-set solves small (n ≈ 64) and mirrors quadprog's CᵀC.
    let mut g = vec![0.0f64; n * n];
    for i in 0..n {
        for j in 0..n {
            let mut s = 0.0;
            for k in 0..m {
                s += a[k * n + i] * a[k * n + j];
            }
            g[i * n + j] = s;
        }
    }
    let mut c = vec![0.0f64; n];
    for i in 0..n {
        let mut s = 0.0;
        for k in 0..m {
            s += a[k * n + i] * b[k];
        }
        c[i] = s;
    }

    let mut x = vec![0.0f64; n];
    let mut passive = vec![false; n];
    // w = Aᵀb − AᵀA x = Aᵀ(b − A x): the negative gradient / KKT multipliers.
    let mut w = c.clone();

    let c_inf = c.iter().fold(0.0f64, |mx, &v| mx.max(v.abs()));
    let tol = 1e-12 * c_inf.max(1.0);
    let max_outer = 3 * n + 1;
    let mut outer = 0;

    loop {
        // Bring the active index with the most-positive gradient into the
        // passive set; stop when none exceeds the tolerance.
        let mut chosen = None;
        let mut best = tol;
        for j in 0..n {
            if !passive[j] && w[j] > best {
                best = w[j];
                chosen = Some(j);
            }
        }
        let Some(t) = chosen else { break };
        if outer >= max_outer {
            break;
        }
        outer += 1;
        passive[t] = true;

        // Inner loop: unconstrained least squares on the passive set, retreating
        // any coordinate that turns non-positive, until the passive solution is
        // strictly positive.
        loop {
            let idx: Vec<usize> = (0..n).filter(|&j| passive[j]).collect();
            let s_p = solve_spd_subset(&g, n, &idx, &c);

            let mut s = vec![0.0f64; n];
            let mut min_s = f64::INFINITY;
            for (k, &j) in idx.iter().enumerate() {
                s[j] = s_p[k];
                if s_p[k] < min_s {
                    min_s = s_p[k];
                }
            }
            if min_s > 0.0 {
                x = s;
                break;
            }

            // Step to the first passive coordinate that hits zero.
            let mut step = f64::INFINITY;
            for &j in &idx {
                if s[j] <= 0.0 {
                    let denom = x[j] - s[j];
                    if denom > 0.0 {
                        let ratio = x[j] / denom;
                        if ratio < step {
                            step = ratio;
                        }
                    }
                }
            }
            if !step.is_finite() {
                x = s;
                break;
            }
            for j in 0..n {
                x[j] += step * (s[j] - x[j]);
            }
            // Drop coordinates that hit zero from the passive set.
            for j in 0..n {
                if passive[j] && x[j] <= 0.0 {
                    passive[j] = false;
                    x[j] = 0.0;
                }
            }
        }

        // Recompute w = Aᵀb − AᵀA x for the next outer iteration.
        for i in 0..n {
            let mut s = 0.0;
            for j in 0..n {
                s += g[i * n + j] * x[j];
            }
            w[i] = c[i] - s;
        }
    }

    x
}

/// Solve the symmetric-positive-definite subsystem `G[idx, idx] z = rhs[idx]`
/// by Gaussian elimination with partial pivoting, returning `z` aligned to
/// `idx`. `g` is the full `n × n` Gram matrix in row-major order.
fn solve_spd_subset(g: &[f64], n: usize, idx: &[usize], rhs: &[f64]) -> Vec<f64> {
    let k = idx.len();
    let mut m = vec![0.0f64; k * k];
    let mut v = vec![0.0f64; k];
    for (r, &ir) in idx.iter().enumerate() {
        for (col, &ic) in idx.iter().enumerate() {
            m[r * k + col] = g[ir * n + ic];
        }
        v[r] = rhs[ir];
    }

    // Forward elimination with partial pivoting.
    for col in 0..k {
        let mut piv = col;
        let mut piv_val = m[col * k + col].abs();
        for r in (col + 1)..k {
            let val = m[r * k + col].abs();
            if val > piv_val {
                piv_val = val;
                piv = r;
            }
        }
        if piv != col {
            for col2 in 0..k {
                m.swap(col * k + col2, piv * k + col2);
            }
            v.swap(col, piv);
        }
        let d = m[col * k + col];
        if d == 0.0 {
            continue; // singular column; leave as-is (should not happen for SPD)
        }
        for r in (col + 1)..k {
            let f = m[r * k + col] / d;
            if f != 0.0 {
                for col2 in col..k {
                    m[r * k + col2] -= f * m[col * k + col2];
                }
                v[r] -= f * v[col];
            }
        }
    }

    // Back substitution.
    let mut z = vec![0.0f64; k];
    for r in (0..k).rev() {
        let mut s = v[r];
        for col in (r + 1)..k {
            s -= m[r * k + col] * z[col];
        }
        let d = m[r * k + r];
        z[r] = if d != 0.0 { s / d } else { 0.0 };
    }
    z
}