rsomics-sc-scale 0.1.0

Z-score scaling of a single-cell count matrix — matches scanpy pp.scale (zero_center, ddof=1 std, symmetric clip)
Documentation
use std::path::PathBuf;

use clap::Parser;
use rsomics_common::{CommonFlags, Result, Tool, ToolMeta};
use rsomics_help::{Example, FlagSpec, HelpSpec, Origin, Section};

use rsomics_sc_scale::{ScaleParams, open_output, parse_max_value, run};

pub const META: ToolMeta = ToolMeta {
    name: env!("CARGO_PKG_NAME"),
    version: env!("CARGO_PKG_VERSION"),
};

#[derive(Parser, Debug)]
#[command(name = "rsomics-sc-scale", version, about, long_about = None, disable_help_flag = true)]
pub struct Cli {
    /// 10x MTX directory (matrix.mtx[.gz], genes×cells).
    pub input: PathBuf,

    #[arg(short = 'o', long, default_value = "-")]
    output: String,

    /// Clip scaled values to [-max-value, max-value]; absent = no clipping.
    #[arg(long = "max-value")]
    max_value: Option<String>,

    #[command(flatten)]
    pub common: CommonFlags,
}

impl Tool for Cli {
    fn meta() -> ToolMeta {
        META
    }
    fn common(&self) -> &CommonFlags {
        &self.common
    }

    fn execute(self) -> Result<()> {
        self.common.install_rayon_pool()?;
        let params = ScaleParams {
            max_value: parse_max_value(self.max_value.as_deref())?,
        };
        let out = open_output(&self.output)?;
        let (genes, cells) = run(&self.input, &params, out)?;
        if !self.common.quiet {
            eprintln!("scaled {cells} cells × {genes} genes");
        }
        Ok(())
    }
}

pub static HELP: HelpSpec = HelpSpec {
    name: env!("CARGO_PKG_NAME"),
    version: env!("CARGO_PKG_VERSION"),
    tagline: "Per-gene z-score scaling of a single-cell count matrix.",
    origin: Some(Origin {
        upstream: "scanpy sc.pp.scale",
        upstream_license: "BSD-3-Clause",
        our_license: "MIT OR Apache-2.0",
        paper_doi: Some("10.1186/s13059-017-1382-0"),
    }),
    usage_lines: &["<10x-mtx-dir> [--max-value <float>] [-o out.mtx]"],
    sections: &[Section {
        title: "OPTIONS",
        flags: &[
            FlagSpec {
                short: Some('o'),
                long: "output",
                aliases: &[],
                value: Some("<path>"),
                type_hint: Some("String"),
                required: false,
                default: Some("-"),
                description: "Output dense MTX path (genes×cells array); '-' for stdout.",
                why_default: Some("Streams to stdout for pipeline composition."),
            },
            FlagSpec {
                short: None,
                long: "max-value",
                aliases: &[],
                value: Some("<float>"),
                type_hint: Some("f64"),
                required: false,
                default: None,
                description: "Clip scaled values to [-max-value, max-value].",
                why_default: Some("Matches scanpy's max_value=None (no clipping)."),
            },
        ],
    }],
    examples: &[
        Example {
            description: "scanpy-default scaling (zero-center, no clip)",
            command: "rsomics-sc-scale filtered_feature_bc_matrix/ -o scaled.mtx",
        },
        Example {
            description: "scale and clip z-scores to ±10 (common scanpy idiom)",
            command: "rsomics-sc-scale mtx_dir/ --max-value 10 -o scaled.mtx",
        },
    ],
    json_result_schema_doc: None,
};

#[cfg(test)]
mod tests {
    use super::*;
    use clap::CommandFactory;

    #[test]
    fn cli_debug_assert() {
        Cli::command().debug_assert();
    }
}