1use crate::common::{csv_err, io_err, DbError};
13use serde::{Deserialize, Serialize};
14use std::path::Path;
15
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
17#[serde(rename_all = "lowercase")]
18pub enum PwySource {
19 MetaCyc,
20 Kegg,
21 Seed,
22 Custom,
23}
24
25#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct PathwayRow {
27 pub id: String,
28 pub name: String,
29 #[serde(default, skip_serializing_if = "String::is_empty")]
30 pub altname: String,
31 #[serde(default, skip_serializing_if = "String::is_empty")]
32 pub hierarchy: String,
33 #[serde(default, skip_serializing_if = "String::is_empty")]
34 pub taxrange: String,
35 #[serde(default, skip_serializing_if = "String::is_empty")]
37 pub rea_id: String,
38 #[serde(default, skip_serializing_if = "String::is_empty")]
39 pub rea_ec: String,
40 #[serde(default, skip_serializing_if = "String::is_empty")]
42 pub key_rea: String,
43 #[serde(default, skip_serializing_if = "String::is_empty")]
44 pub rea_name: String,
45 #[serde(default)]
46 pub rea_nr: u32,
47 #[serde(default)]
48 pub ec_nr: u32,
49 #[serde(default, skip_serializing_if = "String::is_empty")]
50 pub superpathway: String,
51 #[serde(default, skip_serializing_if = "String::is_empty")]
53 pub status: String,
54 #[serde(default, skip_serializing_if = "String::is_empty")]
57 pub spont: String,
58 pub source: PwySource,
59}
60
61impl PathwayRow {
62 pub fn rea_ids(&self) -> Vec<&str> {
63 self.rea_id.split(',').map(str::trim).filter(|s| !s.is_empty()).collect()
64 }
65 pub fn ec_list(&self) -> Vec<&str> {
66 self.rea_ec.split(',').map(str::trim).filter(|s| !s.is_empty()).collect()
67 }
68 pub fn key_rea_list(&self) -> Vec<&str> {
69 self.key_rea.split(',').map(str::trim).filter(|s| !s.is_empty()).collect()
70 }
71 pub fn spont_list(&self) -> Vec<&str> {
72 self.spont.split(',').map(str::trim).filter(|s| !s.is_empty()).collect()
73 }
74}
75
76#[derive(Debug, Default, Serialize, Deserialize)]
77pub struct PathwayTable {
78 pub source: Option<PwySource>,
79 pub rows: Vec<PathwayRow>,
80}
81
82impl PathwayTable {
83 pub fn load(path: impl AsRef<Path>, source: PwySource) -> Result<Self, DbError> {
84 let path = path.as_ref();
85 let f = std::fs::File::open(path).map_err(|e| io_err(path, e))?;
86 let mut rdr = csv::ReaderBuilder::new()
87 .delimiter(b'\t')
88 .has_headers(true)
89 .quoting(false)
90 .flexible(true)
91 .from_reader(f);
92 let headers = rdr.headers().map_err(|e| csv_err(path, e))?.clone();
93 let col = |name: &str| headers.iter().position(|h| h.trim() == name);
94 let c = Cols {
95 id: col("id").unwrap_or(0),
96 name: col("name").unwrap_or(1),
97 altname: col("altname"),
98 hierarchy: col("hierarchy"),
99 taxrange: col("taxrange"),
100 rea_id: col("reaId"),
101 rea_ec: col("reaEc"),
102 key_rea: col("keyRea"),
103 rea_name: col("reaName"),
104 rea_nr: col("reaNr"),
105 ec_nr: col("ecNr"),
106 superpathway: col("superpathway"),
107 status: col("status"),
108 spont: col("spont"),
109 };
110 let mut rows = Vec::new();
111 for rec in rdr.records() {
112 let rec = rec.map_err(|e| csv_err(path, e))?;
113 rows.push(PathwayRow {
114 id: rec.get(c.id).unwrap_or("").to_string(),
115 name: rec.get(c.name).unwrap_or("").to_string(),
116 altname: c.altname.and_then(|i| rec.get(i)).unwrap_or("").to_string(),
117 hierarchy: c.hierarchy.and_then(|i| rec.get(i)).unwrap_or("").to_string(),
118 taxrange: c.taxrange.and_then(|i| rec.get(i)).unwrap_or("").to_string(),
119 rea_id: c.rea_id.and_then(|i| rec.get(i)).unwrap_or("").to_string(),
120 rea_ec: c.rea_ec.and_then(|i| rec.get(i)).unwrap_or("").to_string(),
121 key_rea: c.key_rea.and_then(|i| rec.get(i)).unwrap_or("").to_string(),
122 rea_name: c.rea_name.and_then(|i| rec.get(i)).unwrap_or("").to_string(),
123 rea_nr: c.rea_nr.and_then(|i| rec.get(i).and_then(|s| s.trim().parse().ok())).unwrap_or(0),
124 ec_nr: c.ec_nr.and_then(|i| rec.get(i).and_then(|s| s.trim().parse().ok())).unwrap_or(0),
125 superpathway: c.superpathway.and_then(|i| rec.get(i)).unwrap_or("").to_string(),
126 status: c.status.and_then(|i| rec.get(i)).unwrap_or("").to_string(),
127 spont: c.spont.and_then(|i| rec.get(i)).unwrap_or("").to_string(),
128 source,
129 });
130 }
131 tracing::info!(path = %path.display(), rows = rows.len(), ?source, "loaded pathway table");
132 Ok(Self { source: Some(source), rows })
133 }
134
135 pub fn len(&self) -> usize {
136 self.rows.len()
137 }
138 pub fn is_empty(&self) -> bool {
139 self.rows.is_empty()
140 }
141}
142
143struct Cols {
144 id: usize,
145 name: usize,
146 altname: Option<usize>,
147 hierarchy: Option<usize>,
148 taxrange: Option<usize>,
149 rea_id: Option<usize>,
150 rea_ec: Option<usize>,
151 key_rea: Option<usize>,
152 rea_name: Option<usize>,
153 rea_nr: Option<usize>,
154 ec_nr: Option<usize>,
155 superpathway: Option<usize>,
156 status: Option<usize>,
157 spont: Option<usize>,
158}
159
160#[cfg(test)]
161mod tests {
162 use super::*;
163 use std::io::Write;
164
165 #[test]
166 fn parses_meta_pwy_schema() {
167 let d = tempfile::tempdir().unwrap();
168 let p = d.path().join("m.tsv");
169 let mut f = std::fs::File::create(&p).unwrap();
170 writeln!(
171 f,
172 "id\tname\taltname\thierarchy\ttaxrange\treaId\treaEc\tkeyRea\treaName\treaNr\tecNr\tsuperpathway\tstatus\tspont"
173 )
174 .unwrap();
175 writeln!(
176 f,
177 "PWY-1\tExample\talt\th\ttax\trxn1,rxn2\t1.1.1.1\trxn1\tex\t2\t1\tFALSE\tTRUE\trxn2"
178 )
179 .unwrap();
180 let t = PathwayTable::load(&p, PwySource::MetaCyc).unwrap();
181 assert_eq!(t.rows.len(), 1);
182 let r = &t.rows[0];
183 assert_eq!(r.id, "PWY-1");
184 assert_eq!(r.rea_ids(), vec!["rxn1", "rxn2"]);
185 assert_eq!(r.key_rea_list(), vec!["rxn1"]);
186 assert_eq!(r.spont_list(), vec!["rxn2"]);
187 assert_eq!(r.rea_nr, 2);
188 }
189
190 #[test]
191 fn parses_kegg_pwy_without_spont() {
192 let d = tempfile::tempdir().unwrap();
193 let p = d.path().join("k.tsv");
194 let mut f = std::fs::File::create(&p).unwrap();
195 writeln!(
196 f,
197 "id\tname\taltname\thierarchy\ttaxrange\treaId\treaEc\tkeyRea\treaName\treaNr\tecNr\tsuperpathway\tstatus"
198 )
199 .unwrap();
200 writeln!(
201 f,
202 "map00010\tGlycolysis\t\tkegg;Metabolism\t\tR01061\t1.2.1.12\t\t\t1\t1\tFALSE\tTRUE"
203 )
204 .unwrap();
205 let t = PathwayTable::load(&p, PwySource::Kegg).unwrap();
206 assert_eq!(t.rows.len(), 1);
207 assert!(t.rows[0].spont.is_empty());
208 }
209}