rsomics-gradient-trajectory 0.1.0

Gradient/trajectory ANOVA over ordination coordinates (QIIME-style microbiome trajectory analysis): per-group trajectory vectors plus closed-form one-way ANOVA F/p, selectable algorithm — a Rust reimplementation of scikit-bio's skbio.stats.gradient.
Documentation
use std::collections::HashMap;
use std::io::BufRead;

use rsomics_common::{Result, RsomicsError};

/// Ordination coordinates: a sample-id-indexed matrix of PC axis values, plus
/// the per-axis proportion explained. Either a plain TSV/CSV (`#id` header then
/// one row per sample) or an scikit-bio `OrdinationResults` file, whose `Site`
/// block carries the same matrix and whose `Proportion explained` block carries
/// the vector.
pub struct Coords {
    pub ids: Vec<String>,
    pub naxes: usize,
    /// Row-major `ids.len() x naxes`.
    pub data: Vec<f64>,
}

impl Coords {
    pub fn row(&self, i: usize) -> &[f64] {
        &self.data[i * self.naxes..(i + 1) * self.naxes]
    }
}

pub fn parse_coords<R: BufRead>(reader: R, delim: char) -> Result<Coords> {
    let mut ids = Vec::new();
    let mut data = Vec::new();
    let mut naxes = 0usize;
    for line in reader.lines() {
        let line = line.map_err(RsomicsError::Io)?;
        let t = line.trim_end();
        if t.is_empty() || t.starts_with('#') {
            continue;
        }
        let mut fields = t.split(delim);
        let id = fields.next().unwrap().trim().to_string();
        let vals: Vec<f64> = fields
            .map(|f| {
                f.trim().parse().map_err(|_| {
                    RsomicsError::InvalidInput(format!("sample '{id}': '{f}' is not numeric"))
                })
            })
            .collect::<Result<_>>()?;
        if naxes == 0 {
            naxes = vals.len();
        } else if vals.len() != naxes {
            return Err(RsomicsError::InvalidInput(format!(
                "sample '{id}' has {} axes, expected {naxes}",
                vals.len()
            )));
        }
        ids.push(id);
        data.extend(vals);
    }
    if ids.is_empty() {
        return Err(RsomicsError::InvalidInput("no coordinate rows".into()));
    }
    Ok(Coords { ids, naxes, data })
}

/// Proportion explained: one value per axis, whitespace- or newline-separated.
pub fn parse_prop<R: BufRead>(reader: R) -> Result<Vec<f64>> {
    let mut prop = Vec::new();
    for line in reader.lines() {
        let line = line.map_err(RsomicsError::Io)?;
        for tok in line.split([',', '\t', ' ']) {
            let t = tok.trim();
            if t.is_empty() {
                continue;
            }
            prop.push(t.parse().map_err(|_| {
                RsomicsError::InvalidInput(format!("proportion '{t}' is not numeric"))
            })?);
        }
    }
    if prop.is_empty() {
        return Err(RsomicsError::InvalidInput(
            "empty proportion-explained vector".into(),
        ));
    }
    Ok(prop)
}

/// Sample metadata: a `#SampleID` header naming the columns, then one row per
/// sample. Values are strings; numeric coercion happens at use sites (sort,
/// weighting).
pub struct Metadata {
    pub columns: Vec<String>,
    pub rows: HashMap<String, Vec<String>>,
}

impl Metadata {
    pub fn col_index(&self, name: &str) -> Result<usize> {
        self.columns
            .iter()
            .position(|c| c == name)
            .ok_or_else(|| RsomicsError::InvalidInput(format!("category '{name}' not in metadata")))
    }

    pub fn value<'a>(&'a self, sid: &str, col: usize) -> &'a str {
        &self.rows[sid][col]
    }
}

pub fn parse_metadata<R: BufRead>(reader: R, delim: char) -> Result<Metadata> {
    let mut lines = reader.lines();
    let header = loop {
        match lines.next() {
            Some(l) => {
                let l = l.map_err(RsomicsError::Io)?;
                if l.trim().is_empty() {
                    continue;
                }
                break l;
            }
            None => return Err(RsomicsError::InvalidInput("empty metadata".into())),
        }
    };
    let columns: Vec<String> = header
        .trim_start_matches('#')
        .split(delim)
        .skip(1)
        .map(|s| s.trim().to_string())
        .collect();
    let mut rows = HashMap::new();
    for line in lines {
        let line = line.map_err(RsomicsError::Io)?;
        let t = line.trim_end();
        if t.is_empty() || t.starts_with('#') {
            continue;
        }
        let mut fields = t.split(delim);
        let sid = fields.next().unwrap().trim().to_string();
        let vals: Vec<String> = fields.map(|f| f.trim().to_string()).collect();
        if vals.len() != columns.len() {
            return Err(RsomicsError::InvalidInput(format!(
                "metadata row '{sid}' has {} values, expected {}",
                vals.len(),
                columns.len()
            )));
        }
        rows.insert(sid, vals);
    }
    Ok(Metadata { columns, rows })
}