rsomics-limma-array-weights 0.1.0

Estimate per-sample (array) quality weights by REML for a log-expression matrix + design — a clean-room Rust reimplementation of limma's arrayWeights
Documentation
//! Differential compat against limma arrayWeights (default method="reml", prior.n=10).
//!
//! - `golden` always runs: ours vs a committed R-captured weight vector.
//! - `live_r` runs only when an Rscript with limma is found; it regenerates the
//!   oracle and diffs against ours (loud-skip otherwise).
//!
//! Tolerance is 0.5% relative. limma's exact prior.n moderation coupling inside the
//! REML scoring iteration cannot be reproduced bit-exactly from the published method
//! alone (clean-room: no GPL source), leaving a small residual that grows for very
//! small designs; on realistic gene/array counts it stays well under this bound.

use std::collections::BTreeMap;
use std::path::PathBuf;
use std::process::Command;

const EPS: f64 = 5e-3; // relative

fn ours() -> PathBuf {
    PathBuf::from(env!("CARGO_BIN_EXE_rsomics-limma-array-weights"))
}

fn golden(name: &str) -> PathBuf {
    PathBuf::from(env!("CARGO_MANIFEST_DIR"))
        .join("tests/golden")
        .join(name)
}

fn manifest(rel: &str) -> PathBuf {
    PathBuf::from(env!("CARGO_MANIFEST_DIR")).join(rel)
}

fn parse(text: &str) -> BTreeMap<String, f64> {
    let mut rows = BTreeMap::new();
    for line in text.lines().skip(1) {
        if line.is_empty() {
            continue;
        }
        let mut f = line.split('\t');
        let sample = f.next().unwrap().to_string();
        let w: f64 = f.next().unwrap().trim().parse().unwrap();
        rows.insert(sample, w);
    }
    rows
}

fn assert_close(a: &BTreeMap<String, f64>, b: &BTreeMap<String, f64>, label: &str) {
    assert_eq!(a.len(), b.len(), "{label}: sample count mismatch");
    let mut max_rel = 0.0f64;
    for (sample, x) in a {
        let y = b
            .get(sample)
            .unwrap_or_else(|| panic!("{label}: missing sample {sample}"));
        let rel = (x - y).abs() / y.abs().max(1e-9);
        max_rel = max_rel.max(rel);
        assert!(rel < EPS, "{label}: {sample} ours={x} ref={y} rel={rel:e}");
    }
    eprintln!("{label}: max relative deviation = {max_rel:e}");
}

fn run_ours() -> String {
    let out = Command::new(ours())
        .arg(golden("expr.tsv"))
        .args(["--design", golden("design.tsv").to_str().unwrap()])
        .output()
        .unwrap();
    assert!(
        out.status.success(),
        "ours failed: {}",
        String::from_utf8_lossy(&out.stderr)
    );
    String::from_utf8(out.stdout).unwrap()
}

#[test]
fn golden_weights() {
    let ours_out = run_ours();
    let expected = std::fs::read_to_string(golden("weights.expected.tsv")).unwrap();
    assert_close(
        &parse(&ours_out),
        &parse(&expected),
        "arrayWeights (golden)",
    );
}

/// Locate an Rscript that has limma installed. Prefers the project's r-bioc conda
/// env (direct binary, no `conda run`), then falls back to PATH.
fn rscript() -> Option<String> {
    let conda = format!(
        "{}/miniconda3/envs/r-bioc/bin/Rscript",
        std::env::var("HOME").unwrap_or_default()
    );
    for cand in [conda.as_str(), "Rscript"] {
        let ok = Command::new(cand)
            .args([
                "-e",
                "if(!requireNamespace('limma',quietly=TRUE)) quit(status=1)",
            ])
            .output()
            .map(|o| o.status.success())
            .unwrap_or(false);
        if ok {
            return Some(cand.to_string());
        }
    }
    None
}

#[test]
fn live_r_weights() {
    let Some(rs) = rscript() else {
        eprintln!("SKIP live_r_weights: no Rscript with limma found");
        return;
    };
    let scratch = std::env::temp_dir();
    let r_out = scratch.join(format!("aw_r_{}.tsv", std::process::id()));
    let oracle = Command::new(&rs)
        .arg(manifest("tests/aw_oracle.R"))
        .arg(golden("expr.tsv"))
        .arg(golden("design.tsv"))
        .arg(&r_out)
        .output()
        .unwrap();
    assert!(
        oracle.status.success(),
        "oracle failed: {}",
        String::from_utf8_lossy(&oracle.stderr)
    );
    let ours_out = run_ours();
    let r_ref = std::fs::read_to_string(&r_out).unwrap();
    let _ = std::fs::remove_file(&r_out);
    assert_close(&parse(&ours_out), &parse(&r_ref), "arrayWeights (live R)");
}