rsomics-rda 0.1.0

Redundancy Analysis (RDA) of a response table against constraints — scikit-bio skbio.stats.ordination.rda equivalent (linear regression + SVD canonical and residual axes)
Documentation
use std::fs::File;
use std::io::{BufReader, BufWriter, Write};
use std::path::PathBuf;

use clap::Parser;
use rsomics_common::{CommonFlags, Result, RsomicsError, Tool, ToolMeta};
use rsomics_help::{Example, FlagSpec, HelpSpec, Origin, Section};

use rsomics_rda::{Matrix, run};

pub const META: ToolMeta = ToolMeta {
    name: env!("CARGO_PKG_NAME"),
    version: env!("CARGO_PKG_VERSION"),
};

#[derive(Parser, Debug)]
#[command(name = "rsomics-rda", version, about, long_about = None, disable_help_flag = true)]
pub struct Cli {
    /// Response table TSV (samples × species); reads stdin when "-" or omitted.
    #[arg(default_value = "-")]
    response: PathBuf,

    /// Constraint/explanatory table TSV (samples × variables).
    #[arg(long = "constraints", value_name = "PATH")]
    constraints: PathBuf,

    /// Scaling type: 1 (distance biplot) or 2 (correlation biplot).
    #[arg(long, default_value_t = 1)]
    scaling: u8,

    /// Scale response columns to unit standard deviation before centring.
    #[arg(long = "scale-y", default_value_t = false)]
    scale_y: bool,

    /// Parse inputs as comma-separated instead of tab-separated.
    #[arg(long, default_value_t = false)]
    csv: bool,

    /// Output path; writes stdout when "-".
    #[arg(short = 'o', long, default_value = "-")]
    output: String,

    #[command(flatten)]
    pub common: CommonFlags,
}

impl Tool for Cli {
    fn meta() -> ToolMeta {
        META
    }
    fn common(&self) -> &CommonFlags {
        &self.common
    }

    fn execute(self) -> Result<()> {
        self.common.install_rayon_pool()?;

        let delim = if self.csv { ',' } else { '\t' };

        let response_reader: Box<dyn std::io::BufRead> = if self.response.as_os_str() == "-" {
            Box::new(BufReader::new(std::io::stdin().lock()))
        } else {
            Box::new(BufReader::new(File::open(&self.response).map_err(|e| {
                RsomicsError::InvalidInput(format!("{}: {e}", self.response.display()))
            })?))
        };
        let response = Matrix::parse(response_reader, delim)?;

        let constraints_reader = BufReader::new(File::open(&self.constraints).map_err(|e| {
            RsomicsError::InvalidInput(format!("{}: {e}", self.constraints.display()))
        })?);
        let constraints = Matrix::parse(constraints_reader, delim)?;

        let mut out: Box<dyn Write> = if self.output == "-" {
            Box::new(BufWriter::new(std::io::stdout().lock()))
        } else {
            Box::new(BufWriter::new(
                File::create(&self.output).map_err(RsomicsError::Io)?,
            ))
        };
        run(
            &response,
            &constraints,
            &mut out,
            self.scaling,
            self.scale_y,
        )?;
        out.flush().map_err(RsomicsError::Io)
    }
}

pub static HELP: HelpSpec = HelpSpec {
    name: env!("CARGO_PKG_NAME"),
    version: env!("CARGO_PKG_VERSION"),
    tagline: "Redundancy Analysis of a response table against explanatory constraints.",
    origin: Some(Origin {
        upstream: "scikit-bio skbio.stats.ordination.rda",
        upstream_license: "BSD-3-Clause",
        our_license: "MIT OR Apache-2.0",
        paper_doi: Some("10.1016/B978-0-444-53868-0.50009-5"),
    }),
    usage_lines: &["<response.tsv> --constraints env.tsv [--scaling 1|2] [--scale-y] [-o out.tsv]"],
    sections: &[Section {
        title: "OPTIONS",
        flags: &[
            FlagSpec {
                short: None,
                long: "constraints",
                aliases: &[],
                value: Some("<path>"),
                type_hint: Some("path"),
                required: true,
                default: None,
                description: "Explanatory-variable table (samples × variables).",
                why_default: None,
            },
            FlagSpec {
                short: None,
                long: "scaling",
                aliases: &[],
                value: Some("<1|2>"),
                type_hint: Some("u8"),
                required: false,
                default: Some("1"),
                description: "Scaling type: 1 distance biplot, 2 correlation biplot.",
                why_default: None,
            },
            FlagSpec {
                short: None,
                long: "scale-y",
                aliases: &[],
                value: None,
                type_hint: None,
                required: false,
                default: Some("false"),
                description: "Scale response columns to unit standard deviation.",
                why_default: None,
            },
            FlagSpec {
                short: None,
                long: "csv",
                aliases: &[],
                value: None,
                type_hint: None,
                required: false,
                default: Some("false"),
                description: "Parse inputs as comma-separated.",
                why_default: None,
            },
            FlagSpec {
                short: Some('o'),
                long: "output",
                aliases: &[],
                value: Some("<path>"),
                type_hint: Some("String"),
                required: false,
                default: Some("-"),
                description: "Output path (- for stdout).",
                why_default: None,
            },
        ],
    }],
    examples: &[
        Example {
            description: "RDA of a species table constrained by environment",
            command: "rsomics-rda species.tsv --constraints env.tsv",
        },
        Example {
            description: "Correlation biplot (scaling 2) into a file",
            command: "rsomics-rda species.tsv --constraints env.tsv --scaling 2 -o rda.tsv",
        },
    ],
    json_result_schema_doc: None,
};

#[cfg(test)]
mod tests {
    use super::*;
    use clap::CommandFactory;

    #[test]
    fn cli_debug_assert() {
        Cli::command().debug_assert();
    }
}