Skip to main content

rsomics_gradient_trajectory/
lib.rs

1use std::io::{BufRead, Write};
2
3use rsomics_common::{Result, RsomicsError};
4
5mod anova;
6mod fmt;
7mod gradient;
8mod io;
9mod natsort;
10
11pub use gradient::{Algorithm, GradientResult, Params, gradient_anova};
12pub use io::{Coords, Metadata, parse_coords, parse_metadata, parse_prop};
13
14use fmt::pyrepr;
15
16/// Write the gradient ANOVA result as TSV. One block per category: a header line
17/// with the ANOVA probability (or the skip message), then for each group a line
18/// with its mean and the trajectory components.
19///
20/// # Errors
21/// Propagates write errors.
22pub fn write_tsv<W: Write>(res: &GradientResult, mut out: W) -> Result<()> {
23    writeln!(out, "# algorithm\t{}", res.algorithm).map_err(RsomicsError::Io)?;
24    writeln!(out, "# weighted\t{}", res.weighted).map_err(RsomicsError::Io)?;
25    for cat in &res.categories {
26        match cat.probability {
27            Some(p) => writeln!(
28                out,
29                "category\t{}\tprobability\t{}",
30                cat.category,
31                pyrepr(p)
32            ),
33            None => writeln!(
34                out,
35                "category\t{}\tprobability\tNA\t{}",
36                cat.category,
37                cat.message.as_deref().unwrap_or("")
38            ),
39        }
40        .map_err(RsomicsError::Io)?;
41        if let Some(groups) = &cat.groups {
42            for g in groups {
43                let traj = g
44                    .trajectory
45                    .iter()
46                    .map(|&x| pyrepr(x))
47                    .collect::<Vec<_>>()
48                    .join(",");
49                writeln!(
50                    out,
51                    "group\t{}\tmean\t{}\ttrajectory\t{}",
52                    g.name,
53                    pyrepr(g.mean),
54                    traj
55                )
56                .map_err(RsomicsError::Io)?;
57            }
58        }
59    }
60    Ok(())
61}
62
63/// # Errors
64/// Propagates parse, computation, and write errors.
65#[allow(clippy::too_many_arguments)]
66pub fn run<W: Write>(
67    coords_reader: impl BufRead,
68    prop_reader: impl BufRead,
69    meta_reader: impl BufRead,
70    out: W,
71    delim: char,
72    params: &Params,
73) -> Result<()> {
74    let coords = parse_coords(coords_reader, delim)?;
75    let prop = parse_prop(prop_reader)?;
76    let meta = parse_metadata(meta_reader, delim)?;
77    let trajectory_categories = if params.trajectory_categories.is_empty() {
78        meta.columns.clone()
79    } else {
80        params.trajectory_categories.to_vec()
81    };
82    let effective = Params {
83        trajectory_categories: &trajectory_categories,
84        ..*params
85    };
86    let res = gradient_anova(&coords, &prop, &meta, &effective)?;
87    write_tsv(&res, out)
88}
89
90#[cfg(test)]
91mod tests {
92    use super::*;
93
94    // skbio docstring example: AverageGradientANOVA over Treatment, sort by Weight.
95    const COORDS: &str = "#id\tPC1\tPC2\tPC3\tPC4\n\
96        PC.354\t0.2761\t-0.0341\t0.0633\t0.1004\n\
97        PC.355\t0.2364\t0.2186\t-0.0301\t-0.0225\n\
98        PC.356\t0.2208\t0.0874\t-0.3519\t-0.0031\n\
99        PC.607\t-0.1055\t-0.4140\t-0.15\t-0.116\n\
100        PC.634\t-0.3716\t0.1154\t0.0721\t0.0898\n";
101    const PROP: &str = "25.6216\n15.7715\n14.1215\n11.6913\n9.8304\n";
102    const META: &str = "#SampleID\tTreatment\tWeight\n\
103        PC.354\tControl\t60\n\
104        PC.355\tControl\t55\n\
105        PC.356\tControl\t50\n\
106        PC.607\tFast\t65\n\
107        PC.634\tFast\t68\n";
108
109    fn parse_all() -> (Coords, Vec<f64>, Metadata) {
110        (
111            parse_coords(COORDS.as_bytes(), '\t').unwrap(),
112            parse_prop(PROP.as_bytes()).unwrap(),
113            parse_metadata(META.as_bytes(), '\t').unwrap(),
114        )
115    }
116
117    #[test]
118    fn average_matches_skbio_docstring() {
119        let (c, p, m) = parse_all();
120        let cats = vec!["Treatment".to_string()];
121        let params = Params {
122            algorithm: Algorithm::Average,
123            trajectory_categories: &cats,
124            sort_category: Some("Weight"),
125            axes: 3,
126            weighted: false,
127            window_size: 0,
128        };
129        let res = gradient_anova(&c, &p, &m, &params).unwrap();
130        assert_eq!(res.algorithm, "avg");
131        let cat = &res.categories[0];
132        assert_eq!(cat.category, "Treatment");
133        let p = cat.probability.unwrap();
134        assert!((p - 0.011_847_828_238_170_035).abs() < 1e-10, "p = {p}");
135        let groups = cat.groups.as_ref().unwrap();
136        let control = groups.iter().find(|g| g.name == "Control").unwrap();
137        // skbio: Control trajectory [3.52199973, 2.29597001, 3.20309816], avg 3.00702263
138        assert!((control.trajectory[0] - 3.521_999_733_386_39).abs() < 1e-9);
139        assert!((control.trajectory[1] - 2.295_970_013_149_5).abs() < 1e-9);
140        assert!((control.trajectory[2] - 3.203_098_155_333_92).abs() < 1e-9);
141        assert!((control.mean - 3.007_022_633_956_61).abs() < 1e-9);
142    }
143
144    #[test]
145    fn trajectory_small_group_skips_anova() {
146        // Fast group has 2 samples -> trajectory length 1 -> ANOVA can't run.
147        let (c, p, m) = parse_all();
148        let cats = vec!["Treatment".to_string()];
149        let params = Params {
150            algorithm: Algorithm::Trajectory,
151            trajectory_categories: &cats,
152            sort_category: Some("Weight"),
153            axes: 3,
154            weighted: false,
155            window_size: 0,
156        };
157        let res = gradient_anova(&c, &p, &m, &params).unwrap();
158        let cat = &res.categories[0];
159        assert!(cat.probability.is_none());
160        assert!(
161            cat.message
162                .as_deref()
163                .unwrap()
164                .contains("more than 1 element")
165        );
166    }
167}