rsomics-avelogcpm 0.1.0

Per-gene average log2-CPM of a count matrix via the edgeR aveLogCPM one-group negative-binomial fit
Documentation
use std::path::PathBuf;

use clap::Parser;
use rsomics_common::{CommonFlags, Result, RsomicsError, Tool, ToolMeta};
use rsomics_help::{Example, FlagSpec, HelpSpec, Section};

use rsomics_avelogcpm::{AveLogCpmOpts, ave_log_cpm};

pub const META: ToolMeta = ToolMeta {
    name: env!("CARGO_PKG_NAME"),
    version: env!("CARGO_PKG_VERSION"),
};

#[derive(Parser, Debug)]
#[command(name = "rsomics-avelogcpm", version, about, long_about = None, disable_help_flag = true)]
pub struct Cli {
    pub counts: PathBuf,
    #[arg(short = 'o', long, default_value = "-")]
    output: String,
    #[arg(long, default_value_t = 2.0)]
    prior_count: f64,
    #[arg(long, default_value_t = 0.05)]
    dispersion: f64,
    #[arg(long)]
    norm_factors: Option<PathBuf>,
    #[command(flatten)]
    pub common: CommonFlags,
}

impl Tool for Cli {
    fn meta() -> ToolMeta {
        META
    }
    fn common(&self) -> &CommonFlags {
        &self.common
    }

    fn execute(self) -> Result<()> {
        let mut out: Box<dyn std::io::Write> = if self.output == "-" {
            Box::new(std::io::stdout().lock())
        } else {
            Box::new(std::fs::File::create(&self.output).map_err(RsomicsError::Io)?)
        };
        let opts = AveLogCpmOpts {
            prior_count: self.prior_count,
            dispersion: self.dispersion,
        };
        let n = ave_log_cpm(&self.counts, self.norm_factors.as_deref(), &opts, &mut out)?;
        if !self.common.quiet {
            eprintln!("{n} genes scored");
        }
        Ok(())
    }
}

pub static HELP: HelpSpec = HelpSpec {
    name: env!("CARGO_PKG_NAME"),
    version: env!("CARGO_PKG_VERSION"),
    tagline: "Per-gene average log2-CPM of a count matrix (edgeR aveLogCPM).",
    origin: None,
    usage_lines: &[
        "<counts.tsv> [--prior-count N] [--dispersion D] [--norm-factors f.tsv] [-o out.tsv]",
    ],
    sections: &[Section {
        title: "OPTIONS",
        flags: &[
            FlagSpec {
                short: None,
                long: "prior-count",
                aliases: &[],
                value: Some("<float>"),
                type_hint: Some("f64"),
                required: false,
                default: Some("2"),
                description: "Average prior count added before the fit, scaled per library size.",
                why_default: Some("edgeR's default prior.count."),
            },
            FlagSpec {
                short: None,
                long: "dispersion",
                aliases: &[],
                value: Some("<float>"),
                type_hint: Some("f64"),
                required: false,
                default: Some("0.05"),
                description: "Negative-binomial dispersion used by the one-group fit.",
                why_default: Some("edgeR's default dispersion for aveLogCPM."),
            },
            FlagSpec {
                short: None,
                long: "norm-factors",
                aliases: &[],
                value: Some("<path>"),
                type_hint: Some("PathBuf"),
                required: false,
                default: None,
                description: "Per-sample normalization factors (TMM etc.); multiplied into library sizes.",
                why_default: None,
            },
        ],
    }],
    examples: &[
        Example {
            description: "Average log2-CPM per gene",
            command: "rsomics-avelogcpm counts.tsv -o avelogcpm.tsv",
        },
        Example {
            description: "With TMM-normalized library sizes",
            command: "rsomics-avelogcpm counts.tsv --norm-factors tmm.tsv -o avelogcpm.tsv",
        },
    ],
    json_result_schema_doc: None,
};

#[cfg(test)]
mod tests {
    use super::*;
    use clap::CommandFactory;

    #[test]
    fn cli_debug_assert() {
        Cli::command().debug_assert();
    }
}