use gapsmith_core::CpdId;
#[derive(Clone, Debug, PartialEq)]
pub struct StoichTerm {
pub coef: f64,
pub cpd: CpdId,
pub compartment: u8,
pub reserved: u8,
pub name: String,
}
#[derive(Debug, thiserror::Error)]
pub enum StoichParseError {
#[error("term {index} (`{fragment}`): {msg}")]
BadTerm {
index: usize,
fragment: String,
msg: String,
},
#[error("empty stoichiometry string")]
Empty,
}
pub fn parse_stoichiometry(s: &str) -> Result<Vec<StoichTerm>, StoichParseError> {
let s = s.trim();
if s.is_empty() {
return Err(StoichParseError::Empty);
}
let mut out = Vec::new();
for (i, raw_term) in split_top_level(s, ';').iter().enumerate() {
let t = raw_term.trim();
if t.is_empty() {
continue;
}
out.push(parse_term(i, t)?);
}
if out.is_empty() {
return Err(StoichParseError::Empty);
}
Ok(out)
}
fn parse_term(index: usize, t: &str) -> Result<StoichTerm, StoichParseError> {
let parts = split_top_level(t, ':');
if parts.len() < 3 {
return Err(StoichParseError::BadTerm {
index,
fragment: t.to_string(),
msg: format!("expected at least 3 `:`-separated fields, got {}", parts.len()),
});
}
let coef: f64 = parts[0].trim().parse().map_err(|_| StoichParseError::BadTerm {
index,
fragment: t.to_string(),
msg: format!("coefficient `{}` is not a number", parts[0]),
})?;
let cpd = CpdId::new(parts[1].trim());
let compartment: u8 = parts[2].trim().parse().map_err(|_| StoichParseError::BadTerm {
index,
fragment: t.to_string(),
msg: format!("compartment `{}` is not u8", parts[2]),
})?;
let reserved: u8 = if parts.len() >= 4 {
parts[3].trim().parse().unwrap_or(0)
} else {
0
};
let name = if parts.len() >= 5 {
let joined = parts[4..].join(":");
strip_quotes(joined.trim())
} else {
String::new()
};
Ok(StoichTerm { coef, cpd, compartment, reserved, name })
}
fn split_top_level(s: &str, delim: char) -> Vec<String> {
let mut out = Vec::new();
let mut buf = String::new();
let mut in_quotes = false;
let mut prev_backslash = false;
for c in s.chars() {
if prev_backslash {
buf.push(c);
prev_backslash = false;
continue;
}
match c {
'\\' => {
buf.push(c);
prev_backslash = true;
}
'"' => {
in_quotes = !in_quotes;
buf.push(c);
}
c if c == delim && !in_quotes => {
out.push(std::mem::take(&mut buf));
}
_ => buf.push(c),
}
}
out.push(buf);
out
}
fn strip_quotes(s: &str) -> String {
let bytes = s.as_bytes();
if bytes.len() >= 2 && bytes.first() == Some(&b'"') && bytes.last() == Some(&b'"') {
s[1..s.len() - 1].to_string()
} else {
s.to_string()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn canonical_row() {
let s = r#"-1:cpd00001:0:0:"H2O";-1:cpd00012:0:0:"PPi";2:cpd00009:0:0:"Phosphate";1:cpd00067:0:0:"H+""#;
let terms = parse_stoichiometry(s).unwrap();
assert_eq!(terms.len(), 4);
assert_eq!(terms[0].coef, -1.0);
assert_eq!(terms[0].cpd.as_str(), "cpd00001");
assert_eq!(terms[0].compartment, 0);
assert_eq!(terms[0].name, "H2O");
assert_eq!(terms[2].coef, 2.0);
assert_eq!(terms[3].name, "H+");
}
#[test]
fn five_term_real_row() {
let s = r#"-1:cpd00001:0:0:"H2O";-3:cpd00067:0:0:"H+";-1:cpd00742:0:0:"Allophanate";2:cpd00011:0:0:"CO2";2:cpd00013:0:0:"NH3""#;
let terms = parse_stoichiometry(s).unwrap();
assert_eq!(terms.len(), 5);
assert_eq!(terms.iter().map(|t| t.coef).sum::<f64>(), -1.0 - 3.0 - 1.0 + 2.0 + 2.0);
}
#[test]
fn colon_inside_quoted_name_preserved() {
let s = r#"-1:cpd00001:0:0:"X: Y: Z";1:cpd00002:0:0:"W""#;
let terms = parse_stoichiometry(s).unwrap();
assert_eq!(terms.len(), 2);
assert_eq!(terms[0].name, "X: Y: Z");
}
#[test]
fn semicolon_inside_quoted_name_preserved() {
let s = r#"-1:cpd00001:0:0:"A; B";1:cpd00002:0:0:"C""#;
let terms = parse_stoichiometry(s).unwrap();
assert_eq!(terms.len(), 2);
assert_eq!(terms[0].name, "A; B");
assert_eq!(terms[1].cpd.as_str(), "cpd00002");
}
#[test]
fn missing_name_is_ok() {
let terms = parse_stoichiometry("-1:cpd00001:0:0").unwrap();
assert_eq!(terms.len(), 1);
assert_eq!(terms[0].name, "");
}
#[test]
fn fractional_coef() {
let terms = parse_stoichiometry("-0.5:cpd00001:0:0:\"A\"").unwrap();
assert_eq!(terms[0].coef, -0.5);
}
#[test]
fn empty_is_error() {
assert!(matches!(parse_stoichiometry("").unwrap_err(), StoichParseError::Empty));
assert!(matches!(parse_stoichiometry(" ").unwrap_err(), StoichParseError::Empty));
assert!(matches!(parse_stoichiometry(";;;").unwrap_err(), StoichParseError::Empty));
}
#[test]
fn bad_term_reports_index() {
let err = parse_stoichiometry("-1:cpd:0:0:X;bad").unwrap_err();
assert!(matches!(err, StoichParseError::BadTerm { index: 1, .. }));
}
}