rsomics-limma-array-weights 0.1.0

Estimate per-sample (array) quality weights by REML for a log-expression matrix + design — a clean-room Rust reimplementation of limma's arrayWeights
Documentation
use std::path::PathBuf;

use clap::Parser;
use rsomics_common::{CommonFlags, Result, RsomicsError, Tool, ToolMeta};
use rsomics_help::{Example, FlagSpec, HelpSpec, Origin, Section};

use rsomics_limma_array_weights::{Options, run, write_weights};

pub const META: ToolMeta = ToolMeta {
    name: env!("CARGO_PKG_NAME"),
    version: env!("CARGO_PKG_VERSION"),
};

#[derive(Parser, Debug)]
#[command(name = "rsomics-limma-array-weights", version, about, long_about = None, disable_help_flag = true)]
pub struct Cli {
    /// log-expression matrix TSV: header = sample ids, col 1 = gene ids.
    pub expr: PathBuf,
    /// Design matrix TSV: header = coefficient names, col 1 = sample ids.
    #[arg(long)]
    design: PathBuf,
    /// Prior genes: larger squeezes weights more strongly toward equality.
    #[arg(long, default_value_t = 10.0)]
    prior_n: f64,
    /// Max REML scoring iterations.
    #[arg(long, default_value_t = 50)]
    maxiter: usize,
    /// Convergence tolerance on the largest gamma step.
    #[arg(long, default_value_t = 1e-5)]
    tol: f64,
    /// Weights TSV destination; "-" is stdout.
    #[arg(short = 'o', long, default_value = "-")]
    output: String,
    #[command(flatten)]
    pub common: CommonFlags,
}

impl Tool for Cli {
    fn meta() -> ToolMeta {
        META
    }
    fn common(&self) -> &CommonFlags {
        &self.common
    }

    fn execute(self) -> Result<()> {
        let opts = Options {
            expr: &self.expr,
            design: &self.design,
            prior_n: self.prior_n,
            maxiter: self.maxiter,
            tol: self.tol,
        };
        let res = run(&opts)?;

        let mut out: Box<dyn std::io::Write> = if self.output == "-" {
            Box::new(std::io::stdout().lock())
        } else {
            Box::new(std::fs::File::create(&self.output).map_err(RsomicsError::Io)?)
        };
        write_weights(&res, &mut out)?;

        if !self.common.quiet {
            eprintln!(
                "{} arrays, {} genes, converged={} in {} iters",
                res.samples.len(),
                res.n_genes,
                res.converged,
                res.iters
            );
        }
        Ok(())
    }
}

pub static HELP: HelpSpec = HelpSpec {
    name: env!("CARGO_PKG_NAME"),
    version: env!("CARGO_PKG_VERSION"),
    tagline: "Estimate per-array quality weights by REML.",
    origin: Some(Origin {
        upstream: "limma arrayWeights",
        upstream_license: "GPL (>=2)",
        our_license: "MIT OR Apache-2.0",
        paper_doi: Some("10.1186/1471-2105-7-261"),
    }),
    usage_lines: &["<expr.tsv> --design <design.tsv> [--prior-n N] [-o weights.tsv]"],
    sections: &[Section {
        title: "OPTIONS",
        flags: &[
            FlagSpec {
                short: None,
                long: "design",
                aliases: &[],
                value: Some("<path>"),
                type_hint: Some("PathBuf"),
                required: true,
                default: None,
                description: "Design matrix TSV (header = coefficient names, col 1 = sample ids).",
                why_default: None,
            },
            FlagSpec {
                short: None,
                long: "prior-n",
                aliases: &[],
                value: Some("<N>"),
                type_hint: Some("f64"),
                required: false,
                default: Some("10"),
                description: "Prior genes; squeezes weights toward equality.",
                why_default: Some("limma's default."),
            },
            FlagSpec {
                short: None,
                long: "maxiter",
                aliases: &[],
                value: Some("<N>"),
                type_hint: Some("usize"),
                required: false,
                default: Some("50"),
                description: "Max REML scoring iterations.",
                why_default: Some("limma's default."),
            },
            FlagSpec {
                short: None,
                long: "tol",
                aliases: &[],
                value: Some("<x>"),
                type_hint: Some("f64"),
                required: false,
                default: Some("1e-5"),
                description: "Convergence tolerance on the largest gamma step.",
                why_default: Some("limma's default."),
            },
            FlagSpec {
                short: Some('o'),
                long: "output",
                aliases: &[],
                value: Some("<path>"),
                type_hint: Some("String"),
                required: false,
                default: Some("-"),
                description: "Weights TSV destination; \"-\" is stdout.",
                why_default: None,
            },
        ],
    }],
    examples: &[Example {
        description: "Per-array weights for a two-group design",
        command: "rsomics-limma-array-weights E.tsv --design design.tsv -o weights.tsv",
    }],
    json_result_schema_doc: None,
};

#[cfg(test)]
mod tests {
    use super::*;
    use clap::CommandFactory;

    #[test]
    fn cli_debug_assert() {
        Cli::command().debug_assert();
    }
}