rsomics-gradient-trajectory 0.1.0

Gradient/trajectory ANOVA over ordination coordinates (QIIME-style microbiome trajectory analysis): per-group trajectory vectors plus closed-form one-way ANOVA F/p, selectable algorithm — a Rust reimplementation of scikit-bio's skbio.stats.gradient.
Documentation
use std::io::{BufRead, Write};

use rsomics_common::{Result, RsomicsError};

mod anova;
mod fmt;
mod gradient;
mod io;
mod natsort;

pub use gradient::{Algorithm, GradientResult, Params, gradient_anova};
pub use io::{Coords, Metadata, parse_coords, parse_metadata, parse_prop};

use fmt::pyrepr;

/// Write the gradient ANOVA result as TSV. One block per category: a header line
/// with the ANOVA probability (or the skip message), then for each group a line
/// with its mean and the trajectory components.
///
/// # Errors
/// Propagates write errors.
pub fn write_tsv<W: Write>(res: &GradientResult, mut out: W) -> Result<()> {
    writeln!(out, "# algorithm\t{}", res.algorithm).map_err(RsomicsError::Io)?;
    writeln!(out, "# weighted\t{}", res.weighted).map_err(RsomicsError::Io)?;
    for cat in &res.categories {
        match cat.probability {
            Some(p) => writeln!(
                out,
                "category\t{}\tprobability\t{}",
                cat.category,
                pyrepr(p)
            ),
            None => writeln!(
                out,
                "category\t{}\tprobability\tNA\t{}",
                cat.category,
                cat.message.as_deref().unwrap_or("")
            ),
        }
        .map_err(RsomicsError::Io)?;
        if let Some(groups) = &cat.groups {
            for g in groups {
                let traj = g
                    .trajectory
                    .iter()
                    .map(|&x| pyrepr(x))
                    .collect::<Vec<_>>()
                    .join(",");
                writeln!(
                    out,
                    "group\t{}\tmean\t{}\ttrajectory\t{}",
                    g.name,
                    pyrepr(g.mean),
                    traj
                )
                .map_err(RsomicsError::Io)?;
            }
        }
    }
    Ok(())
}

/// # Errors
/// Propagates parse, computation, and write errors.
#[allow(clippy::too_many_arguments)]
pub fn run<W: Write>(
    coords_reader: impl BufRead,
    prop_reader: impl BufRead,
    meta_reader: impl BufRead,
    out: W,
    delim: char,
    params: &Params,
) -> Result<()> {
    let coords = parse_coords(coords_reader, delim)?;
    let prop = parse_prop(prop_reader)?;
    let meta = parse_metadata(meta_reader, delim)?;
    let trajectory_categories = if params.trajectory_categories.is_empty() {
        meta.columns.clone()
    } else {
        params.trajectory_categories.to_vec()
    };
    let effective = Params {
        trajectory_categories: &trajectory_categories,
        ..*params
    };
    let res = gradient_anova(&coords, &prop, &meta, &effective)?;
    write_tsv(&res, out)
}

#[cfg(test)]
mod tests {
    use super::*;

    // skbio docstring example: AverageGradientANOVA over Treatment, sort by Weight.
    const COORDS: &str = "#id\tPC1\tPC2\tPC3\tPC4\n\
        PC.354\t0.2761\t-0.0341\t0.0633\t0.1004\n\
        PC.355\t0.2364\t0.2186\t-0.0301\t-0.0225\n\
        PC.356\t0.2208\t0.0874\t-0.3519\t-0.0031\n\
        PC.607\t-0.1055\t-0.4140\t-0.15\t-0.116\n\
        PC.634\t-0.3716\t0.1154\t0.0721\t0.0898\n";
    const PROP: &str = "25.6216\n15.7715\n14.1215\n11.6913\n9.8304\n";
    const META: &str = "#SampleID\tTreatment\tWeight\n\
        PC.354\tControl\t60\n\
        PC.355\tControl\t55\n\
        PC.356\tControl\t50\n\
        PC.607\tFast\t65\n\
        PC.634\tFast\t68\n";

    fn parse_all() -> (Coords, Vec<f64>, Metadata) {
        (
            parse_coords(COORDS.as_bytes(), '\t').unwrap(),
            parse_prop(PROP.as_bytes()).unwrap(),
            parse_metadata(META.as_bytes(), '\t').unwrap(),
        )
    }

    #[test]
    fn average_matches_skbio_docstring() {
        let (c, p, m) = parse_all();
        let cats = vec!["Treatment".to_string()];
        let params = Params {
            algorithm: Algorithm::Average,
            trajectory_categories: &cats,
            sort_category: Some("Weight"),
            axes: 3,
            weighted: false,
            window_size: 0,
        };
        let res = gradient_anova(&c, &p, &m, &params).unwrap();
        assert_eq!(res.algorithm, "avg");
        let cat = &res.categories[0];
        assert_eq!(cat.category, "Treatment");
        let p = cat.probability.unwrap();
        assert!((p - 0.011_847_828_238_170_035).abs() < 1e-10, "p = {p}");
        let groups = cat.groups.as_ref().unwrap();
        let control = groups.iter().find(|g| g.name == "Control").unwrap();
        // skbio: Control trajectory [3.52199973, 2.29597001, 3.20309816], avg 3.00702263
        assert!((control.trajectory[0] - 3.521_999_733_386_39).abs() < 1e-9);
        assert!((control.trajectory[1] - 2.295_970_013_149_5).abs() < 1e-9);
        assert!((control.trajectory[2] - 3.203_098_155_333_92).abs() < 1e-9);
        assert!((control.mean - 3.007_022_633_956_61).abs() < 1e-9);
    }

    #[test]
    fn trajectory_small_group_skips_anova() {
        // Fast group has 2 samples -> trajectory length 1 -> ANOVA can't run.
        let (c, p, m) = parse_all();
        let cats = vec!["Treatment".to_string()];
        let params = Params {
            algorithm: Algorithm::Trajectory,
            trajectory_categories: &cats,
            sort_category: Some("Weight"),
            axes: 3,
            weighted: false,
            window_size: 0,
        };
        let res = gradient_anova(&c, &p, &m, &params).unwrap();
        let cat = &res.categories[0];
        assert!(cat.probability.is_none());
        assert!(
            cat.message
                .as_deref()
                .unwrap()
                .contains("more than 1 element")
        );
    }
}