rsomics-gradient-trajectory 0.1.0

Gradient/trajectory ANOVA over ordination coordinates (QIIME-style microbiome trajectory analysis): per-group trajectory vectors plus closed-form one-way ANOVA F/p, selectable algorithm — a Rust reimplementation of scikit-bio's skbio.stats.gradient.
Documentation
use std::fs::File;
use std::io::{BufReader, BufWriter, Write};
use std::path::PathBuf;

use clap::{Parser, ValueEnum};
use rsomics_common::{CommonFlags, Result, RsomicsError, Tool, ToolMeta};
use rsomics_help::{Example, FlagSpec, HelpSpec, Origin, Section};

use rsomics_gradient_trajectory::{Algorithm, Params, run};

pub const META: ToolMeta = ToolMeta {
    name: env!("CARGO_PKG_NAME"),
    version: env!("CARGO_PKG_VERSION"),
};

#[derive(Copy, Clone, Debug, ValueEnum)]
pub enum Algo {
    Trajectory,
    Average,
    FirstDifference,
    WindowDifference,
}

impl From<Algo> for Algorithm {
    fn from(a: Algo) -> Algorithm {
        match a {
            Algo::Trajectory => Algorithm::Trajectory,
            Algo::Average => Algorithm::Average,
            Algo::FirstDifference => Algorithm::FirstDifference,
            Algo::WindowDifference => Algorithm::WindowDifference,
        }
    }
}

#[derive(Parser, Debug)]
#[command(name = "rsomics-gradient-trajectory", version, about, long_about = None, disable_help_flag = true)]
pub struct Cli {
    /// Ordination coordinates TSV/CSV (samples x PC axes); reads stdin when "-".
    #[arg(default_value = "-")]
    coords: PathBuf,

    /// Proportion-explained vector (one value per axis).
    #[arg(long)]
    prop: PathBuf,

    /// Sample metadata TSV/CSV (id + categorical/numeric columns).
    #[arg(long)]
    metadata: PathBuf,

    #[arg(long, value_enum, default_value_t = Algo::Trajectory)]
    algorithm: Algo,

    /// Metadata categories to build trajectories for (comma-separated; all if omitted).
    #[arg(long, value_delimiter = ',')]
    trajectory_categories: Vec<String>,

    /// Metadata category whose value orders samples within each group.
    #[arg(long)]
    sort_category: Option<String>,

    #[arg(long, default_value_t = 3)]
    axes: usize,

    /// Weight trajectories by spacing in the (numeric) sort category.
    #[arg(long, default_value_t = false)]
    weighted: bool,

    /// Window size for the window-difference algorithm.
    #[arg(long, default_value_t = 3)]
    window_size: usize,

    /// Parse inputs as comma-separated instead of tab-separated.
    #[arg(long, default_value_t = false)]
    csv: bool,

    #[arg(short = 'o', long, default_value = "-")]
    output: String,

    #[command(flatten)]
    pub common: CommonFlags,
}

impl Tool for Cli {
    fn meta() -> ToolMeta {
        META
    }
    fn common(&self) -> &CommonFlags {
        &self.common
    }

    fn execute(self) -> Result<()> {
        let delim = if self.csv { ',' } else { '\t' };
        let coords_reader: Box<dyn std::io::BufRead> = if self.coords.as_os_str() == "-" {
            Box::new(BufReader::new(std::io::stdin().lock()))
        } else {
            Box::new(BufReader::new(File::open(&self.coords).map_err(|e| {
                RsomicsError::InvalidInput(format!("{}: {e}", self.coords.display()))
            })?))
        };
        let prop_reader =
            BufReader::new(File::open(&self.prop).map_err(|e| {
                RsomicsError::InvalidInput(format!("{}: {e}", self.prop.display()))
            })?);
        let meta_reader = BufReader::new(File::open(&self.metadata).map_err(|e| {
            RsomicsError::InvalidInput(format!("{}: {e}", self.metadata.display()))
        })?);
        let mut out: Box<dyn Write> = if self.output == "-" {
            Box::new(BufWriter::new(std::io::stdout().lock()))
        } else {
            Box::new(BufWriter::new(
                File::create(&self.output).map_err(RsomicsError::Io)?,
            ))
        };

        let params = Params {
            algorithm: self.algorithm.into(),
            trajectory_categories: &self.trajectory_categories,
            sort_category: self.sort_category.as_deref(),
            axes: self.axes,
            weighted: self.weighted,
            window_size: self.window_size,
        };
        run(
            coords_reader,
            prop_reader,
            meta_reader,
            &mut out,
            delim,
            &params,
        )?;
        out.flush().map_err(RsomicsError::Io)
    }
}

pub static HELP: HelpSpec = HelpSpec {
    name: env!("CARGO_PKG_NAME"),
    version: env!("CARGO_PKG_VERSION"),
    tagline: "Gradient/trajectory ANOVA over ordination coordinates.",
    origin: Some(Origin {
        upstream: "scikit-bio skbio.stats.gradient",
        upstream_license: "BSD-3-Clause",
        our_license: "MIT OR Apache-2.0",
        paper_doi: Some("10.1186/2047-217X-2-16"),
    }),
    usage_lines: &[
        "[coords.tsv] --prop prop.tsv --metadata meta.tsv [--algorithm trajectory] \
         [--sort-category Time] [--trajectory-categories Group] [-o result.tsv]",
    ],
    sections: &[Section {
        title: "OPTIONS",
        flags: &[
            FlagSpec {
                short: None,
                long: "prop",
                aliases: &[],
                value: Some("<path>"),
                type_hint: None,
                required: true,
                default: None,
                description: "Proportion-explained vector (one value per axis).",
                why_default: None,
            },
            FlagSpec {
                short: None,
                long: "metadata",
                aliases: &[],
                value: Some("<path>"),
                type_hint: None,
                required: true,
                default: None,
                description: "Sample metadata TSV/CSV (id + columns).",
                why_default: None,
            },
            FlagSpec {
                short: None,
                long: "algorithm",
                aliases: &[],
                value: Some("<algo>"),
                type_hint: Some("trajectory|average|first-difference|window-difference"),
                required: false,
                default: Some("trajectory"),
                description: "Trajectory algorithm (skbio RMS/avg/diff/wdiff).",
                why_default: None,
            },
            FlagSpec {
                short: None,
                long: "trajectory-categories",
                aliases: &[],
                value: Some("<cols>"),
                type_hint: None,
                required: false,
                default: None,
                description: "Categories to analyse (comma-separated; all if omitted).",
                why_default: None,
            },
            FlagSpec {
                short: None,
                long: "sort-category",
                aliases: &[],
                value: Some("<col>"),
                type_hint: None,
                required: false,
                default: None,
                description: "Metadata column ordering samples within a group.",
                why_default: None,
            },
            FlagSpec {
                short: None,
                long: "axes",
                aliases: &[],
                value: Some("<n>"),
                type_hint: Some("usize"),
                required: false,
                default: Some("3"),
                description: "Number of PC axes to use.",
                why_default: None,
            },
            FlagSpec {
                short: None,
                long: "weighted",
                aliases: &[],
                value: None,
                type_hint: None,
                required: false,
                default: Some("false"),
                description: "Weight by spacing in the numeric sort category.",
                why_default: None,
            },
            FlagSpec {
                short: None,
                long: "window-size",
                aliases: &[],
                value: Some("<n>"),
                type_hint: Some("usize"),
                required: false,
                default: Some("3"),
                description: "Window size (window-difference algorithm).",
                why_default: None,
            },
            FlagSpec {
                short: Some('o'),
                long: "output",
                aliases: &[],
                value: Some("<path>"),
                type_hint: Some("String"),
                required: false,
                default: Some("-"),
                description: "Output path (- for stdout).",
                why_default: None,
            },
        ],
    }],
    examples: &[Example {
        description: "RMS trajectory ANOVA over Group, sorted by Time",
        command: "rsomics-gradient-trajectory coords.tsv --prop prop.tsv --metadata meta.tsv \
                  --trajectory-categories Group --sort-category Time",
    }],
    json_result_schema_doc: None,
};

#[cfg(test)]
mod tests {
    use super::*;
    use clap::CommandFactory;

    #[test]
    fn cli_debug_assert() {
        Cli::command().debug_assert();
    }
}