gapsmith_db/
stoich_parse.rs1use gapsmith_core::CpdId;
28
29#[derive(Clone, Debug, PartialEq)]
30pub struct StoichTerm {
31 pub coef: f64,
32 pub cpd: CpdId,
33 pub compartment: u8,
34 pub reserved: u8,
36 pub name: String,
39}
40
41#[derive(Debug, thiserror::Error)]
42pub enum StoichParseError {
43 #[error("term {index} (`{fragment}`): {msg}")]
44 BadTerm {
45 index: usize,
46 fragment: String,
47 msg: String,
48 },
49 #[error("empty stoichiometry string")]
50 Empty,
51}
52
53pub fn parse_stoichiometry(s: &str) -> Result<Vec<StoichTerm>, StoichParseError> {
55 let s = s.trim();
56 if s.is_empty() {
57 return Err(StoichParseError::Empty);
58 }
59 let mut out = Vec::new();
60 for (i, raw_term) in split_top_level(s, ';').iter().enumerate() {
61 let t = raw_term.trim();
62 if t.is_empty() {
63 continue;
64 }
65 out.push(parse_term(i, t)?);
66 }
67 if out.is_empty() {
68 return Err(StoichParseError::Empty);
69 }
70 Ok(out)
71}
72
73fn parse_term(index: usize, t: &str) -> Result<StoichTerm, StoichParseError> {
74 let parts = split_top_level(t, ':');
75 if parts.len() < 3 {
76 return Err(StoichParseError::BadTerm {
77 index,
78 fragment: t.to_string(),
79 msg: format!("expected at least 3 `:`-separated fields, got {}", parts.len()),
80 });
81 }
82
83 let coef: f64 = parts[0].trim().parse().map_err(|_| StoichParseError::BadTerm {
84 index,
85 fragment: t.to_string(),
86 msg: format!("coefficient `{}` is not a number", parts[0]),
87 })?;
88 let cpd = CpdId::new(parts[1].trim());
89 let compartment: u8 = parts[2].trim().parse().map_err(|_| StoichParseError::BadTerm {
90 index,
91 fragment: t.to_string(),
92 msg: format!("compartment `{}` is not u8", parts[2]),
93 })?;
94 let reserved: u8 = if parts.len() >= 4 {
95 parts[3].trim().parse().unwrap_or(0)
96 } else {
97 0
98 };
99 let name = if parts.len() >= 5 {
100 let joined = parts[4..].join(":");
102 strip_quotes(joined.trim())
103 } else {
104 String::new()
105 };
106 Ok(StoichTerm { coef, cpd, compartment, reserved, name })
107}
108
109fn split_top_level(s: &str, delim: char) -> Vec<String> {
112 let mut out = Vec::new();
113 let mut buf = String::new();
114 let mut in_quotes = false;
115 let mut prev_backslash = false;
116 for c in s.chars() {
117 if prev_backslash {
118 buf.push(c);
119 prev_backslash = false;
120 continue;
121 }
122 match c {
123 '\\' => {
124 buf.push(c);
125 prev_backslash = true;
126 }
127 '"' => {
128 in_quotes = !in_quotes;
129 buf.push(c);
130 }
131 c if c == delim && !in_quotes => {
132 out.push(std::mem::take(&mut buf));
133 }
134 _ => buf.push(c),
135 }
136 }
137 out.push(buf);
138 out
139}
140
141fn strip_quotes(s: &str) -> String {
142 let bytes = s.as_bytes();
143 if bytes.len() >= 2 && bytes.first() == Some(&b'"') && bytes.last() == Some(&b'"') {
144 s[1..s.len() - 1].to_string()
145 } else {
146 s.to_string()
147 }
148}
149
150#[cfg(test)]
151mod tests {
152 use super::*;
153
154 #[test]
155 fn canonical_row() {
156 let s = r#"-1:cpd00001:0:0:"H2O";-1:cpd00012:0:0:"PPi";2:cpd00009:0:0:"Phosphate";1:cpd00067:0:0:"H+""#;
157 let terms = parse_stoichiometry(s).unwrap();
158 assert_eq!(terms.len(), 4);
159 assert_eq!(terms[0].coef, -1.0);
160 assert_eq!(terms[0].cpd.as_str(), "cpd00001");
161 assert_eq!(terms[0].compartment, 0);
162 assert_eq!(terms[0].name, "H2O");
163 assert_eq!(terms[2].coef, 2.0);
164 assert_eq!(terms[3].name, "H+");
165 }
166
167 #[test]
168 fn five_term_real_row() {
169 let s = r#"-1:cpd00001:0:0:"H2O";-3:cpd00067:0:0:"H+";-1:cpd00742:0:0:"Allophanate";2:cpd00011:0:0:"CO2";2:cpd00013:0:0:"NH3""#;
171 let terms = parse_stoichiometry(s).unwrap();
172 assert_eq!(terms.len(), 5);
173 assert_eq!(terms.iter().map(|t| t.coef).sum::<f64>(), -1.0 - 3.0 - 1.0 + 2.0 + 2.0);
174 }
175
176 #[test]
177 fn colon_inside_quoted_name_preserved() {
178 let s = r#"-1:cpd00001:0:0:"X: Y: Z";1:cpd00002:0:0:"W""#;
179 let terms = parse_stoichiometry(s).unwrap();
180 assert_eq!(terms.len(), 2);
181 assert_eq!(terms[0].name, "X: Y: Z");
182 }
183
184 #[test]
185 fn semicolon_inside_quoted_name_preserved() {
186 let s = r#"-1:cpd00001:0:0:"A; B";1:cpd00002:0:0:"C""#;
187 let terms = parse_stoichiometry(s).unwrap();
188 assert_eq!(terms.len(), 2);
189 assert_eq!(terms[0].name, "A; B");
190 assert_eq!(terms[1].cpd.as_str(), "cpd00002");
191 }
192
193 #[test]
194 fn missing_name_is_ok() {
195 let terms = parse_stoichiometry("-1:cpd00001:0:0").unwrap();
196 assert_eq!(terms.len(), 1);
197 assert_eq!(terms[0].name, "");
198 }
199
200 #[test]
201 fn fractional_coef() {
202 let terms = parse_stoichiometry("-0.5:cpd00001:0:0:\"A\"").unwrap();
203 assert_eq!(terms[0].coef, -0.5);
204 }
205
206 #[test]
207 fn empty_is_error() {
208 assert!(matches!(parse_stoichiometry("").unwrap_err(), StoichParseError::Empty));
209 assert!(matches!(parse_stoichiometry(" ").unwrap_err(), StoichParseError::Empty));
210 assert!(matches!(parse_stoichiometry(";;;").unwrap_err(), StoichParseError::Empty));
211 }
212
213 #[test]
214 fn bad_term_reports_index() {
215 let err = parse_stoichiometry("-1:cpd:0:0:X;bad").unwrap_err();
216 assert!(matches!(err, StoichParseError::BadTerm { index: 1, .. }));
217 }
218}