Skip to main content

gapsmith_db/
pathway.rs

1//! Pathway table loader — handles `meta_pwy.tbl`, `kegg_pwy.tbl`,
2//! `seed_pwy.tbl`, `custom_pwy.tbl`.
3//!
4//! The four files are structurally identical apart from column count: some
5//! have a trailing `spont` column, some don't. We parse positionally so the
6//! same loader handles all variants.
7//!
8//! Columns (max schema, 14):
9//! `id, name, altname, hierarchy, taxrange, reaId, reaEc, keyRea, reaName,
10//!  reaNr, ecNr, superpathway, status, spont`.
11
12use crate::common::{csv_err, io_err, DbError};
13use serde::{Deserialize, Serialize};
14use std::path::Path;
15
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
17#[serde(rename_all = "lowercase")]
18pub enum PwySource {
19    MetaCyc,
20    Kegg,
21    Seed,
22    Custom,
23}
24
25#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct PathwayRow {
27    pub id: String,
28    pub name: String,
29    #[serde(default, skip_serializing_if = "String::is_empty")]
30    pub altname: String,
31    #[serde(default, skip_serializing_if = "String::is_empty")]
32    pub hierarchy: String,
33    #[serde(default, skip_serializing_if = "String::is_empty")]
34    pub taxrange: String,
35    /// Comma-separated reaction ids (MetaCyc RXN ids, KEGG R-numbers, or SEED rxn ids).
36    #[serde(default, skip_serializing_if = "String::is_empty")]
37    pub rea_id: String,
38    #[serde(default, skip_serializing_if = "String::is_empty")]
39    pub rea_ec: String,
40    /// Comma-separated "key reaction" ids (presence ⇒ pathway considered present).
41    #[serde(default, skip_serializing_if = "String::is_empty")]
42    pub key_rea: String,
43    #[serde(default, skip_serializing_if = "String::is_empty")]
44    pub rea_name: String,
45    #[serde(default)]
46    pub rea_nr: u32,
47    #[serde(default)]
48    pub ec_nr: u32,
49    #[serde(default, skip_serializing_if = "String::is_empty")]
50    pub superpathway: String,
51    /// Free-form status string (usually `TRUE` / `FALSE` / empty).
52    #[serde(default, skip_serializing_if = "String::is_empty")]
53    pub status: String,
54    /// Comma-separated ids of reactions considered spontaneous. Empty when
55    /// the source file lacks the `spont` column.
56    #[serde(default, skip_serializing_if = "String::is_empty")]
57    pub spont: String,
58    pub source: PwySource,
59}
60
61impl PathwayRow {
62    pub fn rea_ids(&self) -> Vec<&str> {
63        self.rea_id.split(',').map(str::trim).filter(|s| !s.is_empty()).collect()
64    }
65    pub fn ec_list(&self) -> Vec<&str> {
66        self.rea_ec.split(',').map(str::trim).filter(|s| !s.is_empty()).collect()
67    }
68    pub fn key_rea_list(&self) -> Vec<&str> {
69        self.key_rea.split(',').map(str::trim).filter(|s| !s.is_empty()).collect()
70    }
71    pub fn spont_list(&self) -> Vec<&str> {
72        self.spont.split(',').map(str::trim).filter(|s| !s.is_empty()).collect()
73    }
74}
75
76#[derive(Debug, Default, Serialize, Deserialize)]
77pub struct PathwayTable {
78    pub source: Option<PwySource>,
79    pub rows: Vec<PathwayRow>,
80}
81
82impl PathwayTable {
83    pub fn load(path: impl AsRef<Path>, source: PwySource) -> Result<Self, DbError> {
84        let path = path.as_ref();
85        let f = std::fs::File::open(path).map_err(|e| io_err(path, e))?;
86        let mut rdr = csv::ReaderBuilder::new()
87            .delimiter(b'\t')
88            .has_headers(true)
89            .quoting(false)
90            .flexible(true)
91            .from_reader(f);
92        let headers = rdr.headers().map_err(|e| csv_err(path, e))?.clone();
93        let col = |name: &str| headers.iter().position(|h| h.trim() == name);
94        let c = Cols {
95            id: col("id").unwrap_or(0),
96            name: col("name").unwrap_or(1),
97            altname: col("altname"),
98            hierarchy: col("hierarchy"),
99            taxrange: col("taxrange"),
100            rea_id: col("reaId"),
101            rea_ec: col("reaEc"),
102            key_rea: col("keyRea"),
103            rea_name: col("reaName"),
104            rea_nr: col("reaNr"),
105            ec_nr: col("ecNr"),
106            superpathway: col("superpathway"),
107            status: col("status"),
108            spont: col("spont"),
109        };
110        let mut rows = Vec::new();
111        for rec in rdr.records() {
112            let rec = rec.map_err(|e| csv_err(path, e))?;
113            rows.push(PathwayRow {
114                id: rec.get(c.id).unwrap_or("").to_string(),
115                name: rec.get(c.name).unwrap_or("").to_string(),
116                altname: c.altname.and_then(|i| rec.get(i)).unwrap_or("").to_string(),
117                hierarchy: c.hierarchy.and_then(|i| rec.get(i)).unwrap_or("").to_string(),
118                taxrange: c.taxrange.and_then(|i| rec.get(i)).unwrap_or("").to_string(),
119                rea_id: c.rea_id.and_then(|i| rec.get(i)).unwrap_or("").to_string(),
120                rea_ec: c.rea_ec.and_then(|i| rec.get(i)).unwrap_or("").to_string(),
121                key_rea: c.key_rea.and_then(|i| rec.get(i)).unwrap_or("").to_string(),
122                rea_name: c.rea_name.and_then(|i| rec.get(i)).unwrap_or("").to_string(),
123                rea_nr: c.rea_nr.and_then(|i| rec.get(i).and_then(|s| s.trim().parse().ok())).unwrap_or(0),
124                ec_nr: c.ec_nr.and_then(|i| rec.get(i).and_then(|s| s.trim().parse().ok())).unwrap_or(0),
125                superpathway: c.superpathway.and_then(|i| rec.get(i)).unwrap_or("").to_string(),
126                status: c.status.and_then(|i| rec.get(i)).unwrap_or("").to_string(),
127                spont: c.spont.and_then(|i| rec.get(i)).unwrap_or("").to_string(),
128                source,
129            });
130        }
131        tracing::info!(path = %path.display(), rows = rows.len(), ?source, "loaded pathway table");
132        Ok(Self { source: Some(source), rows })
133    }
134
135    pub fn len(&self) -> usize {
136        self.rows.len()
137    }
138    pub fn is_empty(&self) -> bool {
139        self.rows.is_empty()
140    }
141}
142
143struct Cols {
144    id: usize,
145    name: usize,
146    altname: Option<usize>,
147    hierarchy: Option<usize>,
148    taxrange: Option<usize>,
149    rea_id: Option<usize>,
150    rea_ec: Option<usize>,
151    key_rea: Option<usize>,
152    rea_name: Option<usize>,
153    rea_nr: Option<usize>,
154    ec_nr: Option<usize>,
155    superpathway: Option<usize>,
156    status: Option<usize>,
157    spont: Option<usize>,
158}
159
160#[cfg(test)]
161mod tests {
162    use super::*;
163    use std::io::Write;
164
165    #[test]
166    fn parses_meta_pwy_schema() {
167        let d = tempfile::tempdir().unwrap();
168        let p = d.path().join("m.tsv");
169        let mut f = std::fs::File::create(&p).unwrap();
170        writeln!(
171            f,
172            "id\tname\taltname\thierarchy\ttaxrange\treaId\treaEc\tkeyRea\treaName\treaNr\tecNr\tsuperpathway\tstatus\tspont"
173        )
174        .unwrap();
175        writeln!(
176            f,
177            "PWY-1\tExample\talt\th\ttax\trxn1,rxn2\t1.1.1.1\trxn1\tex\t2\t1\tFALSE\tTRUE\trxn2"
178        )
179        .unwrap();
180        let t = PathwayTable::load(&p, PwySource::MetaCyc).unwrap();
181        assert_eq!(t.rows.len(), 1);
182        let r = &t.rows[0];
183        assert_eq!(r.id, "PWY-1");
184        assert_eq!(r.rea_ids(), vec!["rxn1", "rxn2"]);
185        assert_eq!(r.key_rea_list(), vec!["rxn1"]);
186        assert_eq!(r.spont_list(), vec!["rxn2"]);
187        assert_eq!(r.rea_nr, 2);
188    }
189
190    #[test]
191    fn parses_kegg_pwy_without_spont() {
192        let d = tempfile::tempdir().unwrap();
193        let p = d.path().join("k.tsv");
194        let mut f = std::fs::File::create(&p).unwrap();
195        writeln!(
196            f,
197            "id\tname\taltname\thierarchy\ttaxrange\treaId\treaEc\tkeyRea\treaName\treaNr\tecNr\tsuperpathway\tstatus"
198        )
199        .unwrap();
200        writeln!(
201            f,
202            "map00010\tGlycolysis\t\tkegg;Metabolism\t\tR01061\t1.2.1.12\t\t\t1\t1\tFALSE\tTRUE"
203        )
204        .unwrap();
205        let t = PathwayTable::load(&p, PwySource::Kegg).unwrap();
206        assert_eq!(t.rows.len(), 1);
207        assert!(t.rows[0].spont.is_empty());
208    }
209}