limma-rust 0.1.0

//! Small dense linear-algebra routines implemented in pure Rust so the crate
//! has no BLAS/LAPACK system dependency. Designed matrices in limma are
//! `n_samples x n_coef` with very few columns, so simple O(n p^2) algorithms
//! are more than fast enough.

use ndarray::{s, Array1, Array2};

const EPS: f64 = f64::EPSILON;

/// Economy-size Householder QR decomposition of an `n x p` matrix (n >= p).
///
/// Returns `(q, r)` with `q` of shape `n x p` (orthonormal columns) and `r` of
/// shape `p x p` (upper triangular), such that `a == q.dot(&r)`.
pub fn qr_econ(a: &Array2<f64>) -> (Array2<f64>, Array2<f64>) {
    let n = a.nrows();
    let p = a.ncols();
    assert!(n >= p, "qr_econ requires n >= p (got {}x{})", n, p);

    let mut r = a.clone();
    // Householder vectors stored column by column (length n - k for column k).
    let mut vs: Vec<Array1<f64>> = Vec::with_capacity(p);

    for k in 0..p {
        // Column k below the diagonal.
        let col = r.slice(s![k.., k]).to_owned();
        let norm = col.dot(&col).sqrt();
        if norm == 0.0 {
            vs.push(Array1::zeros(n - k));
            continue;
        }
        let alpha = if col[0] >= 0.0 { -norm } else { norm };
        let mut v = col.clone();
        v[0] -= alpha;
        let vnorm2 = v.dot(&v);
        if vnorm2 == 0.0 {
            vs.push(Array1::zeros(n - k));
            continue;
        }
        // Apply reflector H = I - 2 v v^T / (v^T v) to R[k.., k..].
        for j in k..p {
            let mut colj = r.slice(s![k.., j]).to_owned();
            let dot = v.dot(&colj);
            let factor = 2.0 * dot / vnorm2;
            for i in 0..(n - k) {
                colj[i] -= factor * v[i];
            }
            r.slice_mut(s![k.., j]).assign(&colj);
        }
        vs.push(v.clone());
        // store normalization implicitly via vnorm2; recompute when building Q.
    }

    // Build economy Q by applying the reflectors (in reverse) to the first p
    // columns of the identity.
    let mut q = Array2::<f64>::eye(n);
    let q = {
        let mut qfull = q.slice_mut(s![.., ..]).to_owned();
        // qfull is n x n identity; apply reflectors H_0 ... H_{p-1} so that
        // Q = H_0 H_1 ... H_{p-1}. Apply in reverse order to identity columns.
        for k in (0..p).rev() {
            let v = &vs[k];
            let vnorm2 = v.dot(v);
            if vnorm2 == 0.0 {
                continue;
            }
            for j in 0..n {
                let mut colj = qfull.slice(s![k.., j]).to_owned();
                let dot = v.dot(&colj);
                let factor = 2.0 * dot / vnorm2;
                for i in 0..(n - k) {
                    colj[i] -= factor * v[i];
                }
                qfull.slice_mut(s![k.., j]).assign(&colj);
            }
        }
        qfull.slice(s![.., ..p]).to_owned()
    };
    let _ = q.nrows();

    let r_top = r.slice(s![..p, ..p]).to_owned();
    // Zero the strictly-lower triangle for cleanliness.
    let mut r_clean = r_top;
    for i in 0..p {
        for j in 0..i {
            r_clean[[i, j]] = 0.0;
        }
    }
    (q, r_clean)
}

/// Full Householder QR: the complete `n x n` orthogonal factor `Q` of an
/// `n x p` matrix (`n >= p`), such that `Q^T A` is upper-triangular. The
/// trailing `n - p` columns of `Q` form an orthonormal basis for the orthogonal
/// complement of `col(A)` (equivalently the null space of `A^T`). Used to build
/// the natural-spline basis in [`crate::splines`].
pub fn qr_full_q(a: &Array2<f64>) -> Array2<f64> {
    let n = a.nrows();
    let p = a.ncols();
    assert!(n >= p, "qr_full_q requires n >= p (got {}x{})", n, p);

    let mut r = a.clone();
    let mut vs: Vec<Array1<f64>> = Vec::with_capacity(p);
    for k in 0..p {
        let col = r.slice(s![k.., k]).to_owned();
        let norm = col.dot(&col).sqrt();
        if norm == 0.0 {
            vs.push(Array1::zeros(n - k));
            continue;
        }
        let alpha = if col[0] >= 0.0 { -norm } else { norm };
        let mut v = col.clone();
        v[0] -= alpha;
        let vnorm2 = v.dot(&v);
        if vnorm2 == 0.0 {
            vs.push(Array1::zeros(n - k));
            continue;
        }
        for j in k..p {
            let mut colj = r.slice(s![k.., j]).to_owned();
            let dot = v.dot(&colj);
            let factor = 2.0 * dot / vnorm2;
            for i in 0..(n - k) {
                colj[i] -= factor * v[i];
            }
            r.slice_mut(s![k.., j]).assign(&colj);
        }
        vs.push(v);
    }

    // Q = H_0 H_1 ... H_{p-1} applied to the full identity (keep all n columns).
    let mut q = Array2::<f64>::eye(n);
    for k in (0..p).rev() {
        let v = &vs[k];
        let vnorm2 = v.dot(v);
        if vnorm2 == 0.0 {
            continue;
        }
        for j in 0..n {
            let mut colj = q.slice(s![k.., j]).to_owned();
            let dot = v.dot(&colj);
            let factor = 2.0 * dot / vnorm2;
            for i in 0..(n - k) {
                colj[i] -= factor * v[i];
            }
            q.slice_mut(s![k.., j]).assign(&colj);
        }
    }
    q
}

/// Solve `R x = b` for upper-triangular `R` (p x p) by back substitution.
pub fn solve_upper(r: &Array2<f64>, b: &Array1<f64>) -> Array1<f64> {
    let p = r.nrows();
    let mut x = Array1::<f64>::zeros(p);
    for i in (0..p).rev() {
        let mut sum = b[i];
        for j in (i + 1)..p {
            sum -= r[[i, j]] * x[j];
        }
        x[i] = sum / r[[i, i]];
    }
    x
}

/// Solve `Uᵀ x = b` for upper-triangular `U` (p x p) by forward substitution.
/// Equivalent to R's `backsolve(U, b, transpose = TRUE)` (the transpose `Uᵀ` is
/// lower-triangular, so the substitution runs top to bottom).
pub fn solve_upper_transpose(u: &Array2<f64>, b: &Array1<f64>) -> Array1<f64> {
    let p = u.nrows();
    let mut x = Array1::<f64>::zeros(p);
    for i in 0..p {
        let mut sum = b[i];
        for j in 0..i {
            sum -= u[[j, i]] * x[j];
        }
        x[i] = sum / u[[i, i]];
    }
    x
}

/// Invert an upper-triangular matrix `R` (p x p).
pub fn inv_upper(r: &Array2<f64>) -> Array2<f64> {
    let p = r.nrows();
    let mut inv = Array2::<f64>::zeros((p, p));
    for col in 0..p {
        let mut e = Array1::<f64>::zeros(p);
        e[col] = 1.0;
        let x = solve_upper(r, &e);
        for row in 0..p {
            inv[[row, col]] = x[row];
        }
    }
    inv
}

/// Given the `R` factor of `X = Q R`, return `(X^T X)^{-1} = R^{-1} R^{-T}`.
pub fn xtx_inv_from_r(r: &Array2<f64>) -> Array2<f64> {
    let rinv = inv_upper(r);
    rinv.dot(&rinv.t())
}

/// Upper-triangular Cholesky factor `U` such that `U^T U == a`
/// (matches R `chol(a)`, the upper-triangular factor).
pub fn cholesky_upper(a: &Array2<f64>) -> Array2<f64> {
    let n = a.nrows();
    let mut u = Array2::<f64>::zeros((n, n));
    for j in 0..n {
        let mut sum = a[[j, j]];
        for k in 0..j {
            sum -= u[[k, j]] * u[[k, j]];
        }
        let ujj = sum.max(0.0).sqrt();
        u[[j, j]] = ujj;
        for i in (j + 1)..n {
            let mut s = a[[j, i]];
            for k in 0..j {
                s -= u[[k, j]] * u[[k, i]];
            }
            u[[j, i]] = if ujj > 0.0 { s / ujj } else { 0.0 };
        }
    }
    u
}

/// Scale a covariance matrix into a correlation matrix.
/// Mirrors R's `cov2cor`.
pub fn cov2cor(cov: &Array2<f64>) -> Array2<f64> {
    let n = cov.nrows();
    let v: Array1<f64> = (0..n).map(|i| cov[[i, i]].sqrt()).collect();
    let mut cor = Array2::<f64>::zeros((n, n));
    for i in 0..n {
        for j in 0..n {
            if cov[[i, j]] == 0.0 {
                cor[[i, j]] = 0.0;
            } else {
                cor[[i, j]] = cov[[i, j]] / (v[i] * v[j]);
            }
        }
    }
    cor
}

/// Symmetric eigendecomposition via the cyclic Jacobi algorithm.
///
/// Returns `(eigenvalues, eigenvectors)` with eigenvalues in **ascending**
/// order and eigenvectors as columns.
pub fn eigh(a: &Array2<f64>) -> (Array1<f64>, Array2<f64>) {
    let n = a.nrows();
    let mut a = a.clone();
    let mut v = Array2::<f64>::eye(n);

    for _sweep in 0..100 {
        // Off-diagonal Frobenius norm.
        let mut off = 0.0;
        for p in 0..n {
            for q in (p + 1)..n {
                off += a[[p, q]] * a[[p, q]];
            }
        }
        if off.sqrt() <= 1e-300 || off < 1e-30 {
            break;
        }
        for p in 0..n {
            for q in (p + 1)..n {
                let apq = a[[p, q]];
                if apq.abs() <= 1e-300 {
                    continue;
                }
                let app = a[[p, p]];
                let aqq = a[[q, q]];
                let theta = (aqq - app) / (2.0 * apq);
                let t = theta.signum() / (theta.abs() + (theta * theta + 1.0).sqrt());
                let c = 1.0 / (t * t + 1.0).sqrt();
                let s = t * c;
                // Rotate rows/cols p,q.
                for k in 0..n {
                    let akp = a[[k, p]];
                    let akq = a[[k, q]];
                    a[[k, p]] = c * akp - s * akq;
                    a[[k, q]] = s * akp + c * akq;
                }
                for k in 0..n {
                    let apk = a[[p, k]];
                    let aqk = a[[q, k]];
                    a[[p, k]] = c * apk - s * aqk;
                    a[[q, k]] = s * apk + c * aqk;
                }
                for k in 0..n {
                    let vkp = v[[k, p]];
                    let vkq = v[[k, q]];
                    v[[k, p]] = c * vkp - s * vkq;
                    v[[k, q]] = s * vkp + c * vkq;
                }
            }
        }
    }

    let mut evals: Vec<(f64, usize)> = (0..n).map(|i| (a[[i, i]], i)).collect();
    evals.sort_by(|x, y| x.0.partial_cmp(&y.0).unwrap());

    let values: Array1<f64> = evals.iter().map(|&(val, _)| val).collect();
    let mut vectors = Array2::<f64>::zeros((n, n));
    for (new_col, &(_, old_col)) in evals.iter().enumerate() {
        for row in 0..n {
            vectors[[row, new_col]] = v[[row, old_col]];
        }
    }
    (values, vectors)
}

/// Top-`k` left singular vectors and singular values of an `n x p` matrix,
/// in **descending** singular-value order, computed from the eigendecomposition
/// of the `p x p` Gram matrix `aᵀa`. Intended for the tall-skinny limma case
/// (residual-effect matrices with few columns), where this is far cheaper than
/// an `n x n` decomposition.
///
/// Returns `(svals, u)` with `svals` of length `k` and `u` of shape `n x k`
/// (the left singular vectors as columns). Like any SVD, the singular vectors
/// are defined only up to sign; columns whose singular value is numerically
/// zero are returned as zero.
pub fn svd_left(a: &Array2<f64>, k: usize) -> (Array1<f64>, Array2<f64>) {
    let n = a.nrows();
    let p = a.ncols();
    let k = k.min(p).min(n);
    let gram = a.t().dot(a);
    let (evals, evecs) = eigh(&gram); // ascending eigenvalues
    let mut svals = Array1::<f64>::zeros(k);
    let mut u = Array2::<f64>::zeros((n, k));
    for j in 0..k {
        let col = p - 1 - j; // largest eigenvalues are last
        let sigma = evals[col].max(0.0).sqrt();
        svals[j] = sigma;
        if sigma > 0.0 {
            let av = a.dot(&evecs.column(col));
            for i in 0..n {
                u[[i, j]] = av[i] / sigma;
            }
        }
    }
    (svals, u)
}

/// Number of singular values of `a` greater than the tolerance
/// `smax * max(nrow, ncol) * EPS`, computed from the eigenvalues of `a^T a`.
pub fn matrix_rank(a: &Array2<f64>) -> usize {
    let ata = a.t().dot(a);
    let (evals, _) = eigh(&ata);
    let sv: Vec<f64> = evals.iter().map(|&e| e.max(0.0).sqrt()).collect();
    let smax = sv.iter().cloned().fold(0.0_f64, f64::max);
    let dim = a.nrows().max(a.ncols()) as f64;
    let tol = smax * dim * EPS;
    sv.iter().filter(|&&s| s > tol).count()
}

/// Assemble a block-diagonal matrix from `blocks`, placed top-left to
/// bottom-right with zeros elsewhere. Port of limma's `blockDiag`.
pub fn block_diag(blocks: &[Array2<f64>]) -> Array2<f64> {
    let total_rows: usize = blocks.iter().map(|b| b.nrows()).sum();
    let total_cols: usize = blocks.iter().map(|b| b.ncols()).sum();
    let mut z = Array2::<f64>::zeros((total_rows, total_cols));
    let (mut r, mut c) = (0usize, 0usize);
    for b in blocks {
        let (br, bc) = (b.nrows(), b.ncols());
        z.slice_mut(s![r..r + br, c..c + bc]).assign(b);
        r += br;
        c += bc;
    }
    z
}

#[cfg(test)]
mod tests {
    use super::*;
    use ndarray::array;

    fn max_abs_diff(a: &Array2<f64>, b: &Array2<f64>) -> f64 {
        a.iter()
            .zip(b.iter())
            .map(|(x, y)| (x - y).abs())
            .fold(0.0, f64::max)
    }

    #[test]
    fn qr_reconstructs() {
        let a = array![[1.0, 2.0], [3.0, 4.0], [5.0, 7.0], [1.0, 0.0]];
        let (q, r) = qr_econ(&a);
        let recon = q.dot(&r);
        assert!(max_abs_diff(&a, &recon) < 1e-12);
        // Columns of Q orthonormal.
        let qtq = q.t().dot(&q);
        assert!(max_abs_diff(&qtq, &Array2::eye(2)) < 1e-12);
    }

    #[test]
    fn xtx_inverse_matches_normal_equations() {
        let a = array![[1.0, 0.0], [1.0, 1.0], [1.0, 2.0], [1.0, 3.0]];
        let (_q, r) = qr_econ(&a);
        let inv = xtx_inv_from_r(&r);
        let ata = a.t().dot(&a);
        let prod = ata.dot(&inv);
        assert!(max_abs_diff(&prod, &Array2::eye(2)) < 1e-10);
    }

    #[test]
    fn cholesky_upper_roundtrip() {
        let a = array![[4.0, 2.0, 0.6], [2.0, 5.0, 1.0], [0.6, 1.0, 3.0]];
        let u = cholesky_upper(&a);
        let recon = u.t().dot(&u);
        assert!(max_abs_diff(&a, &recon) < 1e-12);
    }

    #[test]
    fn eigh_ascending() {
        let a = array![[2.0, 1.0], [1.0, 2.0]];
        let (vals, vecs) = eigh(&a);
        assert!((vals[0] - 1.0).abs() < 1e-10);
        assert!((vals[1] - 3.0).abs() < 1e-10);
        // A v = lambda v
        let av = a.dot(&vecs.column(1).to_owned());
        let lv = &vecs.column(1).to_owned() * vals[1];
        assert!((av - lv).iter().map(|x| x.abs()).fold(0.0, f64::max) < 1e-10);
    }

    #[test]
    fn block_diag_matches_r() {
        // Reference: blockDiag(matrix(1:6,2,3), matrix(7:10,2,2)) in limma.
        let a = array![[1.0, 3.0, 5.0], [2.0, 4.0, 6.0]];
        let b = array![[7.0, 9.0], [8.0, 10.0]];
        let want = array![
            [1.0, 3.0, 5.0, 0.0, 0.0],
            [2.0, 4.0, 6.0, 0.0, 0.0],
            [0.0, 0.0, 0.0, 7.0, 9.0],
            [0.0, 0.0, 0.0, 8.0, 10.0]
        ];
        assert_eq!(block_diag(&[a, b]), want);
    }
}