limma-rust 0.1.0

//! Input/output for expression matrices, design and contrast matrices, and
//! result tables. The delimited-text *reader* (`read_matrix`) lives behind the
//! `cli` feature -- it is the only consumer of the `csv` crate; everything else
//! here (the design/contrast aligners and every table writer, including
//! `write_fit`) is std-only and always compiled.

use std::collections::HashMap;
use std::fs::File;
use std::io::{BufWriter, Write};
use std::path::Path;

use anyhow::{bail, Context, Result};
use ndarray::Array2;

use crate::decidetests::{p_adjust, Adjust, DecideMethod, TestResults};
use crate::fit::MArrayLM;
use crate::toptable::{TopRow, TopRowF};

/// A labelled numeric matrix: `data` is `rows x cols`, with `row_names` and
/// `col_names` taken from the first column and header row respectively.
pub struct LabeledMatrix {
    pub data: Array2<f64>,
    pub row_names: Vec<String>,
    pub col_names: Vec<String>,
}

/// Read a labelled matrix from a delimited text file, choosing the delimiter
/// from the file extension: `.tsv` / `.tab` are read tab-separated, everything
/// else (`.csv`, `.txt`, no extension) comma-separated. The first row is a
/// header whose first cell is an ignored corner label; the first column of each
/// subsequent row is the row name. Empty / `NA` / `NaN` cells parse to
/// `f64::NAN`. Use [`read_matrix_with_delimiter`] to force a delimiter.
#[cfg(feature = "cli")]
pub fn read_matrix(path: &Path) -> Result<LabeledMatrix> {
    read_matrix_with_delimiter(path, delimiter_for_path(path))
}

/// Pick the field delimiter for `path` from its extension: tab for `.tsv` /
/// `.tab` (case-insensitive), comma otherwise.
#[cfg(feature = "cli")]
fn delimiter_for_path(path: &Path) -> u8 {
    match path
        .extension()
        .and_then(|e| e.to_str())
        .map(str::to_ascii_lowercase)
        .as_deref()
    {
        Some("tsv") | Some("tab") => b'\t',
        _ => b',',
    }
}

/// Read a labelled matrix from a delimited text file using an explicit
/// `delimiter` byte (e.g. `b','` or `b'\t'`). The first row is a header whose
/// first cell is an ignored corner label; the first column of each subsequent
/// row is the row name. Empty / `NA` / `NaN` cells parse to `f64::NAN`.
#[cfg(feature = "cli")]
pub fn read_matrix_with_delimiter(path: &Path, delimiter: u8) -> Result<LabeledMatrix> {
    let mut rdr = csv::ReaderBuilder::new()
        .has_headers(true)
        .flexible(false)
        .delimiter(delimiter)
        .from_path(path)
        .with_context(|| format!("opening {}", path.display()))?;

    let header = rdr.headers()?.clone();
    if header.len() < 2 {
        bail!("{}: expected at least one data column", path.display());
    }
    let col_names: Vec<String> = header.iter().skip(1).map(|s| s.to_string()).collect();
    let ncol = col_names.len();

    let mut row_names = Vec::new();
    let mut values: Vec<f64> = Vec::new();
    // Read into a single reused ByteRecord rather than allocating a fresh
    // StringRecord per row: one buffer for the whole file instead of one per
    // line. Numeric cells still go through `parse_cell` on a UTF-8 view, so the
    // parsed values are byte-for-byte identical to the previous path.
    let mut rec = csv::ByteRecord::new();
    let mut i = 0usize;
    while rdr
        .read_byte_record(&mut rec)
        .with_context(|| format!("{}: reading row {}", path.display(), i + 1))?
    {
        if rec.len() != ncol + 1 {
            bail!(
                "{}: row {} has {} fields, expected {}",
                path.display(),
                i + 1,
                rec.len(),
                ncol + 1
            );
        }
        row_names.push(String::from_utf8_lossy(&rec[0]).into_owned());
        values.reserve(ncol);
        for j in 0..ncol {
            let cell = std::str::from_utf8(&rec[j + 1]).map_or(f64::NAN, parse_cell);
            values.push(cell);
        }
        i += 1;
    }
    let nrow = row_names.len();
    let data = Array2::from_shape_vec((nrow, ncol), values)
        .with_context(|| format!("{}: assembling matrix", path.display()))?;
    Ok(LabeledMatrix {
        data,
        row_names,
        col_names,
    })
}

#[cfg(feature = "cli")]
fn parse_cell(s: &str) -> f64 {
    let t = s.trim();
    if t.is_empty() || t.eq_ignore_ascii_case("na") || t.eq_ignore_ascii_case("nan") {
        return f64::NAN;
    }
    t.parse::<f64>().unwrap_or(f64::NAN)
}

/// Reorder the rows of `design` so they line up with `sample_order` (the
/// expression-matrix column names), matching on row name. Returns the aligned
/// `n_samples x n_coef` design matrix.
pub fn align_design(design: &LabeledMatrix, sample_order: &[String]) -> Result<Array2<f64>> {
    let index: HashMap<&str, usize> = design
        .row_names
        .iter()
        .enumerate()
        .map(|(i, n)| (n.as_str(), i))
        .collect();

    // If every expression sample names a design row, align by name; otherwise
    // require an exact positional match.
    let by_name = sample_order.iter().all(|s| index.contains_key(s.as_str()));
    let ncoef = design.col_names.len();
    let mut out = Array2::<f64>::zeros((sample_order.len(), ncoef));
    if by_name {
        for (new_i, s) in sample_order.iter().enumerate() {
            let old_i = index[s.as_str()];
            for j in 0..ncoef {
                out[[new_i, j]] = design.data[[old_i, j]];
            }
        }
    } else {
        if design.row_names.len() != sample_order.len() {
            bail!(
                "design has {} rows but expression has {} samples, and sample names do not match",
                design.row_names.len(),
                sample_order.len()
            );
        }
        out = design.data.clone();
    }
    Ok(out)
}

/// Reorder the rows of a contrast matrix to match `coef_order` (the design's
/// coefficient names). Returns the `n_coef x n_contrasts` matrix.
pub fn align_contrasts(contrasts: &LabeledMatrix, coef_order: &[String]) -> Result<Array2<f64>> {
    let index: HashMap<&str, usize> = contrasts
        .row_names
        .iter()
        .enumerate()
        .map(|(i, n)| (n.as_str(), i))
        .collect();
    let by_name = coef_order.iter().all(|c| index.contains_key(c.as_str()));
    let ncont = contrasts.col_names.len();
    let mut out = Array2::<f64>::zeros((coef_order.len(), ncont));
    if by_name {
        for (new_i, c) in coef_order.iter().enumerate() {
            let old_i = index[c.as_str()];
            for j in 0..ncont {
                out[[new_i, j]] = contrasts.data[[old_i, j]];
            }
        }
    } else {
        if contrasts.row_names.len() != coef_order.len() {
            bail!("contrast rows do not match design coefficients by name or position");
        }
        out = contrasts.data.clone();
    }
    Ok(out)
}

fn fmt(v: f64) -> String {
    if v.is_nan() {
        "NA".to_string()
    } else {
        format!("{:.10e}", v)
    }
}

/// High-precision formatter for the parity dump: 17 significant digits is
/// enough to round-trip an f64 exactly, so the comparison against R is
/// limited by the computation, not by CSV rounding.
fn fmt_hp(v: f64) -> String {
    if v.is_nan() {
        "NA".to_string()
    } else {
        format!("{:.17e}", v)
    }
}

/// Quote `field` for `sep`-delimited output following RFC 4180: when it
/// contains the `sep` byte, a double quote, a carriage return, or a line feed,
/// wrap the whole field in double quotes and double any embedded quotes;
/// otherwise return it unchanged. This lets ids and labels that contain the
/// delimiter — e.g. metabolite names like `1,2-Di(4Z,7Z,10Z)-PE` — survive a
/// round-trip through any RFC-4180 reader (`pandas.read_csv`, R `read.csv`, the
/// `csv` crate, ...). `sep` must be ASCII.
fn csv_quote(field: &str, sep: u8) -> std::borrow::Cow<'_, str> {
    let needs_quote = field
        .bytes()
        .any(|b| b == sep || b == b'"' || b == b'\n' || b == b'\r');
    if !needs_quote {
        return std::borrow::Cow::Borrowed(field);
    }
    let mut quoted = String::with_capacity(field.len() + 2);
    quoted.push('"');
    for ch in field.chars() {
        if ch == '"' {
            quoted.push('"');
        }
        quoted.push(ch);
    }
    quoted.push('"');
    std::borrow::Cow::Owned(quoted)
}

/// Write a single-contrast top table.
pub fn write_top_table(path: &Path, rows: &[TopRow]) -> Result<()> {
    let mut w = BufWriter::new(File::create(path)?);
    writeln!(w, "id,log2FoldChange,lfcSE,AveExpr,t,P.Value,adj.P.Val,B")?;
    for r in rows {
        writeln!(
            w,
            "{},{},{},{},{},{},{},{}",
            csv_quote(&r.id, b','),
            fmt(r.log2_fold_change),
            fmt(r.lfc_se),
            fmt(r.ave_expr),
            fmt(r.t),
            fmt(r.p_value),
            fmt(r.adj_p_value),
            fmt(r.b)
        )?;
    }
    Ok(())
}

/// Write a multi-contrast (F-statistic) top table.
pub fn write_top_table_f(path: &Path, rows: &[TopRowF]) -> Result<()> {
    let mut w = BufWriter::new(File::create(path)?);
    writeln!(w, "id,AveExpr,F,P.Value,adj.P.Val")?;
    for r in rows {
        writeln!(
            w,
            "{},{},{},{},{}",
            csv_quote(&r.id, b','),
            fmt(r.ave_expr),
            fmt(r.f),
            fmt(r.p_value),
            fmt(r.adj_p_value)
        )?;
    }
    Ok(())
}

/// Write a `decideTests` outcome matrix (genes x contrasts) of -1/0/1, in
/// original gene order. Header is `id` followed by the contrast names.
pub fn write_test_results(path: &Path, res: &TestResults) -> Result<()> {
    let mut w = BufWriter::new(File::create(path)?);
    write!(w, "id")?;
    for name in &res.coef_names {
        write!(w, ",{}", csv_quote(name, b','))?;
    }
    writeln!(w)?;
    for g in 0..res.data.nrows() {
        write!(w, "{}", csv_quote(&res.gene_names[g], b','))?;
        for j in 0..res.data.ncols() {
            write!(w, ",{}", res.data[[g, j]])?;
        }
        writeln!(w)?;
    }
    Ok(())
}

/// Dump the full per-gene fit statistics in original gene order, one CSV row
/// per gene per coefficient. Used for numerical parity checks against R limma.
pub fn write_fit_dump(path: &Path, fit: &MArrayLM) -> Result<()> {
    let mut w = BufWriter::new(File::create(path)?);
    writeln!(
        w,
        "id,coef,coefficient,stdev_unscaled,t,p_value,lods,sigma,s2_post,df_total,F,F_p_value"
    )?;
    let t = fit.t.as_ref();
    let p = fit.p_value.as_ref();
    let lods = fit.lods.as_ref();
    let s2_post = fit.s2_post.as_ref();
    let df_total = fit.df_total.as_ref();
    let f = fit.f_stat.as_ref();
    let fp = fit.f_p_value.as_ref();
    for g in 0..fit.n_genes() {
        for j in 0..fit.n_coef() {
            writeln!(
                w,
                "{},{},{},{},{},{},{},{},{},{},{},{}",
                csv_quote(&fit.gene_names[g], b','),
                csv_quote(&fit.coef_names[j], b','),
                fmt_hp(fit.coefficients[[g, j]]),
                fmt_hp(fit.stdev_unscaled[[g, j]]),
                fmt_hp(t.map(|m| m[[g, j]]).unwrap_or(f64::NAN)),
                fmt_hp(p.map(|m| m[[g, j]]).unwrap_or(f64::NAN)),
                fmt_hp(lods.map(|m| m[[g, j]]).unwrap_or(f64::NAN)),
                fmt_hp(fit.sigma[g]),
                fmt_hp(s2_post.map(|m| m[g]).unwrap_or(f64::NAN)),
                fmt_hp(df_total.map(|m| m[g]).unwrap_or(f64::NAN)),
                fmt_hp(f.map(|m| m[g]).unwrap_or(f64::NAN)),
                fmt_hp(fp.map(|m| m[g]).unwrap_or(f64::NAN)),
            )?;
        }
    }
    Ok(())
}

/// Format a finite `f64` the way R's `as.character`/`write.table` do: up to 15
/// significant digits, trailing zeros dropped, choosing fixed vs scientific
/// notation by whichever is shorter (R's `scipen = 0`, ties → fixed).
/// `NaN` → `"NA"`, infinities → `"Inf"`/`"-Inf"`.
fn fmt_r(v: f64) -> String {
    if v.is_nan() {
        return "NA".to_string();
    }
    if v.is_infinite() {
        return if v > 0.0 { "Inf" } else { "-Inf" }.to_string();
    }
    if v == 0.0 {
        return "0".to_string();
    }
    let neg = v < 0.0;
    let a = v.abs();

    // Canonical 15-significant-digit form, then the fewest significant digits
    // (1..=15) that reproduce it — mirroring R's minimal round-trip width.
    let s15 = format!("{:.14e}", a);
    let mut mant = s15.clone();
    for d in 1..=15usize {
        let cand = format!("{:.*e}", d - 1, a);
        if format!("{:.14e}", cand.parse::<f64>().unwrap()) == s15 {
            mant = cand;
            break;
        }
    }

    let (mant_part, exp_part) = mant.split_once('e').unwrap();
    let exp: i32 = exp_part.parse().unwrap();
    let digits: String = mant_part.chars().filter(|&c| c != '.').collect();

    let fixed = fmt_fixed(&digits, exp);
    let sci = fmt_sci(&digits, exp);
    let body = if fixed.len() <= sci.len() { fixed } else { sci };
    if neg {
        format!("-{body}")
    } else {
        body
    }
}

/// Render significant `digits` (no leading zeros, first digit has place value
/// `10^exp`) in plain fixed-point notation, trailing zeros trimmed.
fn fmt_fixed(digits: &str, exp: i32) -> String {
    let ndig = digits.len() as i32;
    if exp >= 0 {
        let ip_len = exp + 1;
        if ip_len >= ndig {
            format!("{}{}", digits, "0".repeat((ip_len - ndig) as usize))
        } else {
            let (ip, fp) = digits.split_at(ip_len as usize);
            let fp = fp.trim_end_matches('0');
            if fp.is_empty() {
                ip.to_string()
            } else {
                format!("{ip}.{fp}")
            }
        }
    } else {
        let zeros = (-exp - 1) as usize;
        let frac = format!("{}{}", "0".repeat(zeros), digits);
        let frac = frac.trim_end_matches('0');
        format!("0.{frac}")
    }
}

/// Render significant `digits` in R's scientific notation: one leading digit,
/// then a signed two-(or-more)-digit exponent (`1.92170677729296e-05`).
fn fmt_sci(digits: &str, exp: i32) -> String {
    let (first, rest) = digits.split_at(1);
    let mant = if rest.is_empty() {
        first.to_string()
    } else {
        format!("{first}.{rest}")
    };
    let sign = if exp < 0 { '-' } else { '+' };
    format!("{mant}e{sign}{:02}", exp.abs())
}

/// Round `v` to `decimals` digits after the decimal point (negative `decimals`
/// rounds to powers of ten), matching R's `round`.
fn round_dec(v: f64, decimals: i32) -> f64 {
    if !v.is_finite() {
        return v;
    }
    if decimals >= 0 {
        format!("{:.*}", decimals as usize, v).parse().unwrap_or(v)
    } else {
        let f = 10f64.powi(-decimals);
        (v / f).round() * f
    }
}

/// Options for [`write_fit`], mirroring the arguments of limma's `write.fit`.
pub struct WriteFitOptions {
    /// Significant-digit budget; `None` writes full precision (limma default).
    pub digits: Option<i32>,
    /// P-value adjustment method applied to the coefficient p-values.
    pub adjust: Adjust,
    /// Whether adjustment is done per coefficient (`Separate`) or pooled across
    /// the whole matrix (`Global`).
    pub method: DecideMethod,
    /// Adjustment method for the moderated-F p-values.
    pub f_adjust: Adjust,
    /// Field separator (limma default `'\t'`).
    pub sep: char,
    /// Emit the gene names as a leading row-name column.
    pub row_names: bool,
}

impl Default for WriteFitOptions {
    fn default() -> Self {
        Self {
            digits: None,
            adjust: Adjust::None,
            method: DecideMethod::Separate,
            f_adjust: Adjust::None,
            sep: '\t',
            row_names: true,
        }
    }
}

/// Adjust a `genes x coefs` p-value matrix either per column (`Separate`) or
/// over all entries pooled column-major (`Global`), matching write.fit.
fn adjust_pvalue_matrix(
    p: &ndarray::Array2<f64>,
    method: Adjust,
    decide: DecideMethod,
) -> ndarray::Array2<f64> {
    let (ng, nc) = p.dim();
    let mut out = ndarray::Array2::<f64>::zeros((ng, nc));
    // write.fit only distinguishes "global" from "separate" (its default);
    // anything that is not Global is treated as per-coefficient separate.
    if let DecideMethod::Global = decide {
        // R `as.vector(matrix)` is column-major: column 0 first.
        let flat: Vec<f64> = (0..nc)
            .flat_map(|j| (0..ng).map(move |g| p[[g, j]]))
            .collect();
        let adj = p_adjust(&flat, method);
        let mut k = 0;
        for j in 0..nc {
            for g in 0..ng {
                out[[g, j]] = adj[k];
                k += 1;
            }
        }
    } else {
        for j in 0..nc {
            let col: Vec<f64> = (0..ng).map(|g| p[[g, j]]).collect();
            let adj = p_adjust(&col, method);
            for g in 0..ng {
                out[[g, j]] = adj[g];
            }
        }
    }
    out
}

/// Write an eBayes-processed [`MArrayLM`] fit to a delimited file, port of
/// limma's `write.fit`. The fit must already carry moderated `t` and p-values
/// (run [`crate::ebayes`] or [`crate::treat`] first). Columns are emitted in
/// limma's order — `AveExpr`, then per-coefficient `Coef`, `t`, `P.value`,
/// optional `P.value.adj`, then `F`/`F.p.value` (when present) and optional
/// `F.p.value.adj`, then optional `Results` from a `decideTests` matrix. With a
/// single coefficient the per-coefficient columns carry no `.<name>` suffix,
/// matching R's `drop()`.
pub fn write_fit(
    path: &Path,
    fit: &MArrayLM,
    results: Option<&TestResults>,
    opts: &WriteFitOptions,
) -> Result<()> {
    let t = fit
        .t
        .as_ref()
        .context("write_fit needs moderated t-statistics; run eBayes/treat first")?;
    let p = fit
        .p_value
        .as_ref()
        .context("write_fit needs p-values; run eBayes/treat first")?;
    let ng = fit.n_genes();
    let nc = fit.n_coef();

    let padj = match opts.adjust {
        Adjust::None => None,
        m => Some(adjust_pvalue_matrix(p, m, opts.method)),
    };
    let f = fit.f_stat.as_ref();
    let fp = fit.f_p_value.as_ref();
    let fpadj = match (opts.f_adjust, fp) {
        (Adjust::None, _) | (_, None) => None,
        (m, Some(fpv)) => Some(p_adjust(fpv.as_slice().unwrap(), m)),
    };

    // Per-column rounding offset relative to `digits`, as in write.fit.
    let cell = |v: f64, delta: i32| -> String {
        match opts.digits {
            Some(d) => fmt_r(round_dec(v, d + delta)),
            None => fmt_r(v),
        }
    };
    let cname = |base: &str, j: usize| -> String {
        if nc == 1 {
            base.to_string()
        } else {
            format!("{base}.{}", fit.coef_names[j])
        }
    };

    let mut columns: Vec<(String, Vec<String>)> = Vec::new();
    columns.push((
        "AveExpr".to_string(),
        (0..ng).map(|g| cell(fit.amean[g], -1)).collect(),
    ));
    for j in 0..nc {
        columns.push((
            cname("Coef", j),
            (0..ng).map(|g| cell(fit.coefficients[[g, j]], 0)).collect(),
        ));
    }
    for j in 0..nc {
        columns.push((
            cname("t", j),
            (0..ng).map(|g| cell(t[[g, j]], -1)).collect(),
        ));
    }
    for j in 0..nc {
        columns.push((
            cname("P.value", j),
            (0..ng).map(|g| cell(p[[g, j]], 2)).collect(),
        ));
    }
    if let Some(pa) = &padj {
        for j in 0..nc {
            columns.push((
                cname("P.value.adj", j),
                (0..ng).map(|g| cell(pa[[g, j]], 3)).collect(),
            ));
        }
    }
    if let Some(fv) = f {
        columns.push(("F".to_string(), (0..ng).map(|g| cell(fv[g], -1)).collect()));
    }
    if let Some(fpv) = fp {
        columns.push((
            "F.p.value".to_string(),
            (0..ng).map(|g| cell(fpv[g], 2)).collect(),
        ));
    }
    if let Some(fpa) = &fpadj {
        columns.push((
            "F.p.value.adj".to_string(),
            (0..ng).map(|g| cell(fpa[g], 3)).collect(),
        ));
    }
    if let Some(res) = results {
        let rc = res.data.ncols();
        for j in 0..rc {
            let nm = if rc == 1 {
                "Results".to_string()
            } else {
                format!("Results.{}", res.coef_names[j])
            };
            columns.push((nm, (0..ng).map(|g| res.data[[g, j]].to_string()).collect()));
        }
    }

    let sep = opts.sep.to_string();
    let mut w = BufWriter::new(File::create(path)?);

    // Header: with row names, R's col.names=NA leaves a blank corner cell.
    let header: Vec<&str> = columns.iter().map(|(h, _)| h.as_str()).collect();
    if opts.row_names {
        write!(w, "{sep}")?;
    }
    writeln!(w, "{}", header.join(&sep))?;

    for g in 0..ng {
        if opts.row_names {
            write!(w, "{}{sep}", fit.gene_names[g])?;
        }
        let row: Vec<&str> = columns.iter().map(|(_, c)| c[g].as_str()).collect();
        writeln!(w, "{}", row.join(&sep))?;
    }
    Ok(())
}

#[cfg(test)]
mod write_fit_tests {
    use super::*;
    use crate::ebayes::ebayes;
    use crate::fit::lmfit;
    use ndarray::Array2;

    // Reference strings from R `as.character(<double>)` (limma 3.68.3 / R 4.6.0),
    // exactly what write.table emits for the full-precision (no-digits) path.
    #[test]
    fn fmt_r_matches_as_character() {
        let cases: &[(f64, &str)] = &[
            (0.0, "0"),
            (0.502, "0.502"),
            (-1.069, "-1.069"),
            (6.29433333333333, "6.29433333333333"),
            (0.744216015086904, "0.744216015086904"),
            (0.000595501060937467, "0.000595501060937467"),
            (1.92170677729296e-05, "1.92170677729296e-05"),
            (7.00632124072959e-06, "7.00632124072959e-06"),
            (36.1843587501443, "36.1843587501443"),
            (0.0001, "1e-04"),
            (100000.0, "1e+05"),
            (123456.0, "123456"),
        ];
        for &(v, want) in cases {
            assert_eq!(fmt_r(v), want, "fmt_r({v})");
        }
        assert_eq!(fmt_r(f64::NAN), "NA");
        assert_eq!(fmt_r(f64::INFINITY), "Inf");
        assert_eq!(fmt_r(f64::NEG_INFINITY), "-Inf");
    }

    fn build_fit() -> MArrayLM {
        // set.seed(1); round(rnorm(30,6,2),3) laid out 5 genes x 6 samples.
        let e = Array2::from_shape_vec(
            (5, 6),
            vec![
                4.747, 4.359, 9.024, 5.91, 7.838, 5.888, 6.367, 6.975, 6.78, 5.968, 7.564, 5.688,
                4.329, 7.477, 4.758, 7.888, 6.149, 3.058, 9.191, 7.152, 1.571, 7.642, 2.021, 5.044,
                6.659, 5.389, 8.25, 7.188, 7.24, 6.836,
            ],
        )
        .unwrap();
        let design = Array2::from_shape_vec(
            (6, 2),
            vec![1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
        )
        .unwrap();
        let genes = (1..=5).map(|i| format!("g{i}")).collect();
        let coefs = vec!["Intercept".to_string(), "grpB".to_string()];
        let mut fit = lmfit(&e, &design, genes, coefs).unwrap();
        ebayes(&mut fit, 0.01, (0.1, 4.0), false, false).unwrap();
        fit
    }

    fn tmp(tag: &str) -> std::path::PathBuf {
        let nanos = std::time::SystemTime::now()
            .duration_since(std::time::UNIX_EPOCH)
            .unwrap()
            .as_nanos();
        std::env::temp_dir().join(format!(
            "limma_wf_{}_{}_{}.tsv",
            tag,
            std::process::id(),
            nanos
        ))
    }

    fn close(a: f64, b: f64) -> bool {
        (a - b).abs() <= 1e-6 * (1.0 + b.abs())
    }

    #[test]
    fn write_fit_matches_r_separate() {
        let fit = build_fit();
        let path = tmp("sep");
        let opts = WriteFitOptions {
            adjust: Adjust::BH,
            ..Default::default()
        };
        write_fit(&path, &fit, None, &opts).unwrap();
        let txt = std::fs::read_to_string(&path).unwrap();
        let _ = std::fs::remove_file(&path);

        let mut lines = txt.lines();
        assert_eq!(
            lines.next().unwrap(),
            "\tAveExpr\tCoef.Intercept\tCoef.grpB\tt.Intercept\tt.grpB\tP.value.Intercept\t\
             P.value.grpB\tP.value.adj.Intercept\tP.value.adj.grpB\tF\tF.p.value"
        );

        // Each row of write.fit(adjust="BH", method="separate") from R.
        let expected: [[f64; 11]; 5] = [
            [
                6.29433333333333,
                6.04333333333333,
                0.502,
                5.77088235610168,
                0.338964636041287,
                0.000595501060937467,
                0.744216015086904,
                0.000992501768229112,
                0.911512695521572,
                36.1843587501443,
                0.000166248928492668,
            ],
            [
                6.557,
                6.70733333333334,
                -0.300666666666668,
                9.78282382638691,
                -0.310087762759466,
                1.92170677729296e-05,
                0.765193889323088,
                8.20450194802642e-05,
                0.911512695521572,
                91.5097315793615,
                7.00632124072959e-06,
            ],
            [
                5.60983333333333,
                5.52133333333334,
                0.176999999999999,
                5.07576159719018,
                0.115057654632127,
                0.00128191400310164,
                0.911512695521572,
                0.00160239250387704,
                0.911512695521572,
                26.6025021648719,
                0.000452243348899521,
            ],
            [
                5.43683333333333,
                5.97133333333334,
                -1.069,
                3.76176082911824,
                -0.476192523101982,
                0.00658339642469066,
                0.647914700333665,
                0.00658339642469066,
                0.911512695521572,
                11.844291449427,
                0.00515864337996187,
            ],
            [
                6.927,
                6.766,
                0.322000000000001,
                9.0399993371551,
                0.304212656857555,
                3.28180077921057e-05,
                0.769488416182624,
                8.20450194802642e-05,
                0.911512695521572,
                85.7033369243552,
                8.80865266401849e-06,
            ],
        ];
        let mut g = 0;
        for line in lines {
            let f: Vec<&str> = line.split('\t').collect();
            assert_eq!(f.len(), 12, "row {g} field count");
            assert_eq!(f[0], format!("g{}", g + 1));
            for (c, &want) in expected[g].iter().enumerate() {
                let got: f64 = f[c + 1].parse().unwrap();
                assert!(close(got, want), "cell [{g}][{c}] got {got} want {want}");
            }
            g += 1;
        }
        assert_eq!(g, 5);
    }

    #[test]
    fn write_fit_global_and_none_headers() {
        let fit = build_fit();

        // method="global" pools the adjustment across all coefficients.
        let pg = tmp("glob");
        write_fit(
            &pg,
            &fit,
            None,
            &WriteFitOptions {
                adjust: Adjust::BH,
                method: DecideMethod::Global,
                ..Default::default()
            },
        )
        .unwrap();
        let gtxt = std::fs::read_to_string(&pg).unwrap();
        let _ = std::fs::remove_file(&pg);
        let g1: Vec<&str> = gtxt.lines().nth(1).unwrap().split('\t').collect();
        // P.value.adj.Intercept / .grpB for gene 1 under global BH.
        assert!(close(g1[8].parse().unwrap(), 0.00198500353645822));
        assert!(close(g1[9].parse().unwrap(), 0.854987129091805));

        // adjust="none" drops the two P.value.adj columns from the header.
        let pn = tmp("none");
        write_fit(&pn, &fit, None, &WriteFitOptions::default()).unwrap();
        let ntxt = std::fs::read_to_string(&pn).unwrap();
        let _ = std::fs::remove_file(&pn);
        assert_eq!(
            ntxt.lines().next().unwrap(),
            "\tAveExpr\tCoef.Intercept\tCoef.grpB\tt.Intercept\tt.grpB\t\
             P.value.Intercept\tP.value.grpB\tF\tF.p.value"
        );
    }

    #[test]
    fn csv_quote_rfc4180() {
        use std::borrow::Cow;
        // Plain ids are returned untouched (and without allocating).
        assert!(matches!(csv_quote("NAMPT", b','), Cow::Borrowed(_)));
        assert_eq!(&*csv_quote("NAMPT", b','), "NAMPT");
        // A comma in the field forces quoting of the whole field.
        assert_eq!(
            &*csv_quote("1,2-Di(4Z,7Z,10Z)-PE", b','),
            "\"1,2-Di(4Z,7Z,10Z)-PE\""
        );
        // Embedded double quotes are doubled, per RFC 4180.
        assert_eq!(&*csv_quote("a\"b", b','), "\"a\"\"b\"");
        // Newlines also trigger quoting.
        assert_eq!(&*csv_quote("a\nb", b','), "\"a\nb\"");
        // Quoting keys off the active separator: a comma is harmless under TAB,
        // but a TAB is not.
        assert!(matches!(csv_quote("a,b", b'\t'), Cow::Borrowed(_)));
        assert_eq!(&*csv_quote("a\tb", b'\t'), "\"a\tb\"");
    }

    #[test]
    fn write_top_table_quotes_comma_ids() {
        let row = |id: &str| TopRow {
            id: id.to_string(),
            log2_fold_change: 1.23,
            lfc_se: 0.45,
            ave_expr: 8.1,
            t: 2.0,
            p_value: 0.01,
            adj_p_value: 0.02,
            b: 0.5,
        };
        let rows = vec![
            row("1,2-Di(4Z,7Z,10Z)-PE"),
            row("NAMPT"),
            row("weird\"name"),
        ];
        let path = tmp("quote");
        write_top_table(&path, &rows).unwrap();
        let txt = std::fs::read_to_string(&path).unwrap();
        let _ = std::fs::remove_file(&path);

        let mut lines = txt.lines();
        assert_eq!(
            lines.next().unwrap(),
            "id,log2FoldChange,lfcSE,AveExpr,t,P.Value,adj.P.Val,B"
        );
        // The comma-laden id is wrapped in quotes, so exactly 7 numeric fields
        // (8 columns) follow it -- the row no longer shatters under a CSV reader.
        let l1 = lines.next().unwrap();
        let q = "\"1,2-Di(4Z,7Z,10Z)-PE\"";
        assert!(l1.starts_with(q), "comma id not quoted: {l1}");
        assert_eq!(
            l1[q.len()..].matches(',').count(),
            7,
            "expected 7 trailing numeric fields: {l1}"
        );
        // A comma-free id stays bare.
        assert!(lines.next().unwrap().starts_with("NAMPT,"));
        // An embedded quote is doubled and the field wrapped.
        assert!(lines.next().unwrap().starts_with("\"weird\"\"name\","));
    }
}

#[cfg(all(test, feature = "cli"))]
mod read_matrix_tests {
    use super::*;

    #[test]
    fn delimiter_from_extension() {
        assert_eq!(delimiter_for_path(Path::new("x.tsv")), b'\t');
        assert_eq!(delimiter_for_path(Path::new("x.tab")), b'\t');
        // Case-insensitive on the extension.
        assert_eq!(delimiter_for_path(Path::new("X.TSV")), b'\t');
        assert_eq!(delimiter_for_path(Path::new("dir/y.Tab")), b'\t');
        // Everything else falls back to comma.
        assert_eq!(delimiter_for_path(Path::new("x.csv")), b',');
        assert_eq!(delimiter_for_path(Path::new("x.txt")), b',');
        assert_eq!(delimiter_for_path(Path::new("noext")), b',');
    }

    fn tmp(tag: &str, ext: &str) -> std::path::PathBuf {
        let nanos = std::time::SystemTime::now()
            .duration_since(std::time::UNIX_EPOCH)
            .unwrap()
            .as_nanos();
        std::env::temp_dir().join(format!(
            "limma_rm_{}_{}_{}.{}",
            tag,
            std::process::id(),
            nanos,
            ext
        ))
    }

    // Same 2x3 table, one delimiter each, with a blank cell that must read NaN.
    fn body(sep: char) -> String {
        format!(
            "gene{sep}s1{sep}s2{sep}s3\n\
             g1{sep}1.5{sep}{sep}3.5\n\
             g2{sep}4{sep}NA{sep}6\n"
        )
    }

    fn check(m: &LabeledMatrix) {
        assert_eq!(m.col_names, ["s1", "s2", "s3"]);
        assert_eq!(m.row_names, ["g1", "g2"]);
        assert_eq!(m.data.shape(), [2, 3]);
        assert_eq!(m.data[[0, 0]], 1.5);
        assert!(m.data[[0, 1]].is_nan()); // blank cell
        assert_eq!(m.data[[0, 2]], 3.5);
        assert_eq!(m.data[[1, 0]], 4.0);
        assert!(m.data[[1, 1]].is_nan()); // NA cell
        assert_eq!(m.data[[1, 2]], 6.0);
    }

    #[test]
    fn auto_detects_tsv_and_csv() {
        // `.tsv` extension -> tab-delimited via read_matrix's auto-detect.
        let pt = tmp("auto", "tsv");
        std::fs::write(&pt, body('\t')).unwrap();
        let mt = read_matrix(&pt).unwrap();
        let _ = std::fs::remove_file(&pt);
        check(&mt);

        // `.csv` extension -> comma-delimited.
        let pc = tmp("auto", "csv");
        std::fs::write(&pc, body(',')).unwrap();
        let mc = read_matrix(&pc).unwrap();
        let _ = std::fs::remove_file(&pc);
        check(&mc);
    }

    #[test]
    fn explicit_delimiter_overrides_extension() {
        // A semicolon-delimited file named `.csv`: auto-detect would pick comma
        // and mis-parse, but the explicit override reads it correctly.
        let p = tmp("override", "csv");
        std::fs::write(&p, body(';')).unwrap();
        let m = read_matrix_with_delimiter(&p, b';').unwrap();
        let _ = std::fs::remove_file(&p);
        check(&m);
    }
}