Skip to main content

gapsmith_sbml/
writer.rs

1//! Streaming SBML writer.
2//!
3//! Design: a single pass over the model, driving a `quick_xml::Writer`. We
4//! build no intermediate DOM — every element is emitted in order. That keeps
5//! memory flat even for very large genome-scale models (~20k reactions) and
6//! makes the code easy to follow top-to-bottom.
7
8use crate::namespaces::*;
9use crate::DEFAULT_BOUND;
10use gapsmith_core::{Gpr, Model};
11use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event};
12use quick_xml::writer::Writer;
13use std::collections::{BTreeMap, HashMap};
14use std::fs::File;
15use std::io::{BufWriter, Write};
16use std::path::Path;
17
18#[derive(Debug, thiserror::Error)]
19pub enum SbmlError {
20    #[error("i/o error on `{path}`: {source}")]
21    Io {
22        path: std::path::PathBuf,
23        #[source]
24        source: std::io::Error,
25    },
26    #[error("write error: {0}")]
27    Write(#[from] std::io::Error),
28    #[error("XML error: {0}")]
29    Xml(#[from] quick_xml::Error),
30    #[error("GPR parse error on reaction `{rxn}`: {source}")]
31    Gpr {
32        rxn: String,
33        #[source]
34        source: gapsmith_core::GprParseError,
35    },
36    #[error("model shape mismatch: {0}")]
37    ShapeMismatch(#[from] gapsmith_core::ModelError),
38}
39
40/// Options for [`write_sbml`]. All fields are optional; defaults produce a
41/// COBRApy-compatible document.
42#[derive(Debug, Clone)]
43pub struct WriteOptions {
44    /// Whether to indent / pretty-print. Default: true.
45    pub pretty: bool,
46    /// Objective identifier. Default: `"obj"`.
47    pub objective_id: String,
48    /// Sense of the objective. Default: `"maximize"`.
49    pub objective_sense: ObjectiveSense,
50}
51
52#[derive(Debug, Clone, Copy)]
53pub enum ObjectiveSense {
54    Maximize,
55    Minimize,
56}
57
58impl Default for WriteOptions {
59    fn default() -> Self {
60        Self {
61            pretty: true,
62            objective_id: "obj".into(),
63            objective_sense: ObjectiveSense::Maximize,
64        }
65    }
66}
67
68/// Entry point: write `model` as SBML to `path`.
69pub fn write_sbml(
70    model: &Model,
71    path: impl AsRef<Path>,
72    opts: &WriteOptions,
73) -> Result<(), SbmlError> {
74    model.check_shape()?;
75    let path = path.as_ref();
76    let f = File::create(path).map_err(|e| SbmlError::Io {
77        path: path.to_path_buf(),
78        source: e,
79    })?;
80    let mut sink = BufWriter::new(f);
81    write_to(model, &mut sink, opts)?;
82    sink.flush().map_err(|e| SbmlError::Io {
83        path: path.to_path_buf(),
84        source: e,
85    })?;
86    Ok(())
87}
88
89/// Write the SBML document to any [`Write`] sink. Used both by [`write_sbml`]
90/// (after opening the file) and by the test suite (to a memory buffer).
91pub fn write_to<W: Write>(
92    model: &Model,
93    sink: &mut W,
94    opts: &WriteOptions,
95) -> Result<(), SbmlError> {
96    let mut w: Writer<&mut W> = if opts.pretty {
97        Writer::new_with_indent(sink, b' ', 2)
98    } else {
99        Writer::new(sink)
100    };
101
102    // XML declaration.
103    w.write_event(Event::Decl(BytesDecl::new("1.0", Some("UTF-8"), None)))?;
104
105    // <sbml ...>
106    let mut sbml = BytesStart::new("sbml");
107    sbml.push_attribute(("xmlns", CORE));
108    sbml.push_attribute((ATTR_XMLNS_FBC, FBC));
109    sbml.push_attribute((ATTR_XMLNS_GROUPS, GROUPS));
110    sbml.push_attribute(("level", "3"));
111    sbml.push_attribute(("version", "1"));
112    sbml.push_attribute((ATTR_FBC_REQUIRED, "false"));
113    sbml.push_attribute((ATTR_GROUPS_REQUIRED, "false"));
114    w.write_event(Event::Start(sbml))?;
115
116    write_model(&mut w, model, opts)?;
117
118    w.write_event(Event::End(BytesEnd::new("sbml")))?;
119    Ok(())
120}
121
122fn write_model<W: Write>(
123    w: &mut Writer<&mut W>,
124    model: &Model,
125    opts: &WriteOptions,
126) -> Result<(), SbmlError> {
127    let mut el = BytesStart::new("model");
128    // SBML SIds require `[A-Za-z_][A-Za-z0-9_]*` — sanitize to avoid
129    // COBRApy warnings when the model id contains `-`, `.`, etc. We
130    // keep the original as the `name` so the provenance survives.
131    let sanitized = sanitize_sid(model.annot.id.as_str());
132    el.push_attribute(("id", sanitized.as_str()));
133    if let Some(ref name) = model.annot.name {
134        el.push_attribute(("name", name.as_str()));
135    } else if sanitized != model.annot.id {
136        el.push_attribute(("name", model.annot.id.as_str()));
137    }
138    // Model-level default units. libSBML emits a warning for every compartment
139    // or species without declared units unless the enclosing model carries
140    // defaults — so we set them once here.
141    el.push_attribute(("substanceUnits", "substance"));
142    el.push_attribute(("timeUnits", "time"));
143    el.push_attribute(("volumeUnits", "volume"));
144    el.push_attribute(("extentUnits", "substance"));
145    el.push_attribute((ATTR_FBC_STRICT, "true"));
146    w.write_event(Event::Start(el))?;
147
148    write_notes(w, model)?;
149    write_unit_definitions(w)?;
150    write_compartments(w, model)?;
151    write_species(w, model)?;
152    let bounds = resolve_bound_parameters(model);
153    write_parameters(w, &bounds)?;
154    write_reactions(w, model, &bounds)?;
155    write_objectives(w, model, opts)?;
156    write_gene_products(w, model)?;
157    write_groups(w, model)?;
158
159    w.write_event(Event::End(BytesEnd::new("model")))?;
160    Ok(())
161}
162
163fn write_notes<W: Write>(w: &mut Writer<&mut W>, model: &Model) -> Result<(), SbmlError> {
164    if model.annot.gapsmith_version.is_none()
165        && model.annot.seqdb_version.is_none()
166        && model.annot.tax_domain.is_none()
167        && model.annot.gram.is_none()
168        && model.annot.notes.is_empty()
169    {
170        return Ok(());
171    }
172    w.write_event(Event::Start(BytesStart::new("notes")))?;
173
174    let mut body = BytesStart::new("body");
175    body.push_attribute(("xmlns", XHTML));
176    w.write_event(Event::Start(body))?;
177
178    if let Some(v) = &model.annot.gapsmith_version {
179        write_note(w, "gapsmith version", v)?;
180    }
181    if let Some(v) = &model.annot.seqdb_version {
182        write_note(w, "sequence DB version", v)?;
183    }
184    if let Some(v) = &model.annot.tax_domain {
185        write_note(w, "tax_domain", v)?;
186    }
187    if let Some(v) = &model.annot.gram {
188        write_note(w, "gram", v)?;
189    }
190    for n in &model.annot.notes {
191        w.write_event(Event::Start(BytesStart::new("p")))?;
192        w.write_event(Event::Text(BytesText::new(n)))?;
193        w.write_event(Event::End(BytesEnd::new("p")))?;
194    }
195
196    w.write_event(Event::End(BytesEnd::new("body")))?;
197    w.write_event(Event::End(BytesEnd::new("notes")))?;
198    Ok(())
199}
200
201fn write_note<W: Write>(
202    w: &mut Writer<&mut W>,
203    key: &str,
204    value: &str,
205) -> Result<(), SbmlError> {
206    w.write_event(Event::Start(BytesStart::new("p")))?;
207    w.write_event(Event::Text(BytesText::new(&format!("{key}: {value}"))))?;
208    w.write_event(Event::End(BytesEnd::new("p")))?;
209    Ok(())
210}
211
212fn write_unit_definitions<W: Write>(w: &mut Writer<&mut W>) -> Result<(), SbmlError> {
213    w.write_event(Event::Start(BytesStart::new("listOfUnitDefinitions")))?;
214
215    // Flux unit used on every reaction bound parameter.
216    emit_unit_def(w, "mmol_per_gDW_per_hr", &[
217        ("mole", 1, -3, 1.0),
218        ("gram", -1, 0, 1.0),
219        ("second", -1, 0, 3600.0),
220    ])?;
221
222    // Default substance unit referenced by `model/@substanceUnits` —
223    // millimoles (matches cobrar convention).
224    emit_unit_def(w, "substance", &[("mole", 1, -3, 1.0)])?;
225
226    // Default time unit — hours.
227    emit_unit_def(w, "time", &[("second", 1, 0, 3600.0)])?;
228
229    // Default volume unit — litres (placeholder; gapseq models don't use
230    // volumes meaningfully but libSBML wants a declaration).
231    emit_unit_def(w, "volume", &[("litre", 1, 0, 1.0)])?;
232
233    w.write_event(Event::End(BytesEnd::new("listOfUnitDefinitions")))?;
234    Ok(())
235}
236
237fn emit_unit_def<W: Write>(
238    w: &mut Writer<&mut W>,
239    id: &str,
240    units: &[(&str, i32, i32, f64)],
241) -> Result<(), SbmlError> {
242    let mut ud = BytesStart::new("unitDefinition");
243    ud.push_attribute(("id", id));
244    w.write_event(Event::Start(ud))?;
245    w.write_event(Event::Start(BytesStart::new("listOfUnits")))?;
246    for (kind, exp, scale, mult) in units {
247        write_unit(w, kind, *exp, *scale, *mult)?;
248    }
249    w.write_event(Event::End(BytesEnd::new("listOfUnits")))?;
250    w.write_event(Event::End(BytesEnd::new("unitDefinition")))?;
251    Ok(())
252}
253
254fn write_unit<W: Write>(
255    w: &mut Writer<&mut W>,
256    kind: &str,
257    exponent: i32,
258    scale: i32,
259    multiplier: f64,
260) -> Result<(), SbmlError> {
261    let mut el = BytesStart::new("unit");
262    el.push_attribute(("kind", kind));
263    let e = exponent.to_string();
264    let s = scale.to_string();
265    let m = fmt_f64(multiplier);
266    el.push_attribute(("exponent", e.as_str()));
267    el.push_attribute(("scale", s.as_str()));
268    el.push_attribute(("multiplier", m.as_str()));
269    w.write_event(Event::Empty(el))?;
270    Ok(())
271}
272
273fn write_compartments<W: Write>(
274    w: &mut Writer<&mut W>,
275    model: &Model,
276) -> Result<(), SbmlError> {
277    w.write_event(Event::Start(BytesStart::new("listOfCompartments")))?;
278    for c in &model.compartments {
279        let mut el = BytesStart::new("compartment");
280        el.push_attribute(("id", c.id.as_str()));
281        if !c.name.is_empty() {
282            el.push_attribute(("name", c.name.as_str()));
283        }
284        el.push_attribute(("constant", "true"));
285        el.push_attribute(("spatialDimensions", "3"));
286        el.push_attribute(("size", "1"));
287        el.push_attribute(("units", "volume"));
288        w.write_event(Event::Empty(el))?;
289    }
290    w.write_event(Event::End(BytesEnd::new("listOfCompartments")))?;
291    Ok(())
292}
293
294fn write_species<W: Write>(w: &mut Writer<&mut W>, model: &Model) -> Result<(), SbmlError> {
295    w.write_event(Event::Start(BytesStart::new("listOfSpecies")))?;
296    for m in &model.mets {
297        let comp_id = model
298            .compartments
299            .get(m.compartment.0 as usize)
300            .map(|c| c.id.as_str())
301            .unwrap_or("c0");
302        let sid = species_id(m.id.as_str(), comp_id);
303
304        let mut el = BytesStart::new("species");
305        el.push_attribute(("id", sid.as_str()));
306        el.push_attribute(("name", m.name.as_str()));
307        el.push_attribute(("compartment", comp_id));
308        el.push_attribute(("hasOnlySubstanceUnits", "true"));
309        el.push_attribute(("boundaryCondition", "false"));
310        el.push_attribute(("constant", "false"));
311        let charge = m.charge.to_string();
312        el.push_attribute((ATTR_FBC_CHARGE, charge.as_str()));
313        if let Some(f) = &m.formula {
314            if !f.is_empty() && f != "null" {
315                el.push_attribute((ATTR_FBC_FORMULA, f.as_str()));
316            }
317        }
318        w.write_event(Event::Empty(el))?;
319    }
320    w.write_event(Event::End(BytesEnd::new("listOfSpecies")))?;
321    Ok(())
322}
323
324/// Resolved flux-bound parameters for every reaction.
325///
326/// Each reaction has `(lb_param_id, ub_param_id)`. Shared defaults
327/// (`cobra_default_lb`, `cobra_default_ub`, `cobra_0_bound`) are reused; any
328/// remaining numeric bounds get their own `R_<id>_<lower|upper>_bound`
329/// parameter, emitted inside `<listOfParameters>`.
330struct BoundResolution {
331    per_rxn: HashMap<usize, (String, String)>,
332    /// Deterministic (id, value) pairs for custom parameters.
333    custom: Vec<(String, f64)>,
334}
335
336fn resolve_bound_parameters(model: &Model) -> BoundResolution {
337    let mut per_rxn = HashMap::new();
338    let mut customs: BTreeMap<String, f64> = BTreeMap::new();
339    for (i, r) in model.rxns.iter().enumerate() {
340        let lb_id = bound_param_id(r.lb, r.id.as_str(), "lower_bound");
341        let ub_id = bound_param_id(r.ub, r.id.as_str(), "upper_bound");
342        if is_custom(&lb_id) {
343            customs.insert(lb_id.clone(), r.lb);
344        }
345        if is_custom(&ub_id) {
346            customs.insert(ub_id.clone(), r.ub);
347        }
348        per_rxn.insert(i, (lb_id, ub_id));
349    }
350    BoundResolution { per_rxn, custom: customs.into_iter().collect() }
351}
352
353fn bound_param_id(v: f64, rxn_id: &str, suffix: &str) -> String {
354    if v == -DEFAULT_BOUND {
355        "cobra_default_lb".into()
356    } else if v == DEFAULT_BOUND {
357        "cobra_default_ub".into()
358    } else if v == 0.0 {
359        "cobra_0_bound".into()
360    } else {
361        format!("R_{rxn_id}_{suffix}")
362    }
363}
364
365fn is_custom(id: &str) -> bool {
366    !matches!(id, "cobra_default_lb" | "cobra_default_ub" | "cobra_0_bound")
367}
368
369fn write_parameters<W: Write>(
370    w: &mut Writer<&mut W>,
371    bounds: &BoundResolution,
372) -> Result<(), SbmlError> {
373    w.write_event(Event::Start(BytesStart::new("listOfParameters")))?;
374    write_shared_parameter(w, "cobra_default_lb", -DEFAULT_BOUND)?;
375    write_shared_parameter(w, "cobra_default_ub", DEFAULT_BOUND)?;
376    write_shared_parameter(w, "cobra_0_bound", 0.0)?;
377    for (id, v) in &bounds.custom {
378        write_shared_parameter(w, id, *v)?;
379    }
380    w.write_event(Event::End(BytesEnd::new("listOfParameters")))?;
381    Ok(())
382}
383
384fn write_shared_parameter<W: Write>(
385    w: &mut Writer<&mut W>,
386    id: &str,
387    value: f64,
388) -> Result<(), SbmlError> {
389    let mut el = BytesStart::new("parameter");
390    el.push_attribute(("id", id));
391    let v = fmt_f64(value);
392    el.push_attribute(("value", v.as_str()));
393    el.push_attribute(("constant", "true"));
394    el.push_attribute(("sboTerm", SBO_FLUX_BOUND));
395    el.push_attribute(("units", "mmol_per_gDW_per_hr"));
396    w.write_event(Event::Empty(el))?;
397    Ok(())
398}
399
400fn write_reactions<W: Write>(
401    w: &mut Writer<&mut W>,
402    model: &Model,
403    bounds: &BoundResolution,
404) -> Result<(), SbmlError> {
405    w.write_event(Event::Start(BytesStart::new("listOfReactions")))?;
406    for (i, r) in model.rxns.iter().enumerate() {
407        let (lb_id, ub_id) = &bounds.per_rxn[&i];
408        let mut el = BytesStart::new("reaction");
409        let rid = reaction_id(r.id.as_str());
410        el.push_attribute(("id", rid.as_str()));
411        if !r.name.is_empty() {
412            el.push_attribute(("name", r.name.as_str()));
413        }
414        let reversible = matches!(r.reversibility(), gapsmith_core::Reversibility::Reversible);
415        el.push_attribute(("reversible", if reversible { "true" } else { "false" }));
416        el.push_attribute(("fast", "false"));
417        el.push_attribute((ATTR_FBC_LOWER, lb_id.as_str()));
418        el.push_attribute((ATTR_FBC_UPPER, ub_id.as_str()));
419        w.write_event(Event::Start(el))?;
420
421        write_stoich_refs(w, model, i)?;
422        if let Some(raw) = &r.gpr_raw {
423            if !raw.trim().is_empty() {
424                let gpr: Gpr = raw.parse().map_err(|source| SbmlError::Gpr {
425                    rxn: r.id.as_str().to_string(),
426                    source,
427                })?;
428                write_gpr(w, &gpr)?;
429            }
430        }
431
432        w.write_event(Event::End(BytesEnd::new("reaction")))?;
433    }
434    w.write_event(Event::End(BytesEnd::new("listOfReactions")))?;
435    Ok(())
436}
437
438fn write_stoich_refs<W: Write>(
439    w: &mut Writer<&mut W>,
440    model: &Model,
441    rxn_idx: usize,
442) -> Result<(), SbmlError> {
443    let col = model.s.column(rxn_idx);
444    let (reactants, products): (Vec<_>, Vec<_>) = col.into_iter().partition(|(_, v)| *v < 0.0);
445
446    emit_refs(w, model, &reactants, "listOfReactants")?;
447    emit_refs(w, model, &products, "listOfProducts")?;
448    Ok(())
449}
450
451fn emit_refs<W: Write>(
452    w: &mut Writer<&mut W>,
453    model: &Model,
454    list: &[(usize, f64)],
455    tag: &str,
456) -> Result<(), SbmlError> {
457    if list.is_empty() {
458        return Ok(());
459    }
460    w.write_event(Event::Start(BytesStart::new(tag)))?;
461    for &(row, v) in list {
462        let met = &model.mets[row];
463        let comp_id = model
464            .compartments
465            .get(met.compartment.0 as usize)
466            .map(|c| c.id.as_str())
467            .unwrap_or("c0");
468        let sid = species_id(met.id.as_str(), comp_id);
469        let mut s = BytesStart::new("speciesReference");
470        s.push_attribute(("species", sid.as_str()));
471        let stoich = fmt_f64(v.abs());
472        s.push_attribute(("stoichiometry", stoich.as_str()));
473        s.push_attribute(("constant", "true"));
474        w.write_event(Event::Empty(s))?;
475    }
476    w.write_event(Event::End(BytesEnd::new(tag)))?;
477    Ok(())
478}
479
480fn write_gpr<W: Write>(w: &mut Writer<&mut W>, gpr: &Gpr) -> Result<(), SbmlError> {
481    w.write_event(Event::Start(BytesStart::new(TAG_FBC_GPA)))?;
482    write_gpr_node(w, gpr)?;
483    w.write_event(Event::End(BytesEnd::new(TAG_FBC_GPA)))?;
484    Ok(())
485}
486
487fn write_gpr_node<W: Write>(w: &mut Writer<&mut W>, gpr: &Gpr) -> Result<(), SbmlError> {
488    match gpr {
489        Gpr::Gene { id } => {
490            let mut el = BytesStart::new(TAG_FBC_GENEPRODUCTREF);
491            let gid = gene_id(id.as_str());
492            el.push_attribute((ATTR_FBC_GENEPRODUCT, gid.as_str()));
493            w.write_event(Event::Empty(el))?;
494        }
495        Gpr::And { operands } => {
496            w.write_event(Event::Start(BytesStart::new(TAG_FBC_AND)))?;
497            for op in operands {
498                write_gpr_node(w, op)?;
499            }
500            w.write_event(Event::End(BytesEnd::new(TAG_FBC_AND)))?;
501        }
502        Gpr::Or { operands } => {
503            w.write_event(Event::Start(BytesStart::new(TAG_FBC_OR)))?;
504            for op in operands {
505                write_gpr_node(w, op)?;
506            }
507            w.write_event(Event::End(BytesEnd::new(TAG_FBC_OR)))?;
508        }
509    }
510    Ok(())
511}
512
513fn write_objectives<W: Write>(
514    w: &mut Writer<&mut W>,
515    model: &Model,
516    opts: &WriteOptions,
517) -> Result<(), SbmlError> {
518    let mut el = BytesStart::new(TAG_FBC_OBJECTIVES);
519    el.push_attribute((ATTR_FBC_ACTIVE_OBJECTIVE, opts.objective_id.as_str()));
520    w.write_event(Event::Start(el))?;
521
522    let sense = match opts.objective_sense {
523        ObjectiveSense::Maximize => "maximize",
524        ObjectiveSense::Minimize => "minimize",
525    };
526    let mut obj = BytesStart::new(TAG_FBC_OBJECTIVE);
527    obj.push_attribute((ATTR_FBC_ID, opts.objective_id.as_str()));
528    obj.push_attribute((ATTR_FBC_TYPE, sense));
529    w.write_event(Event::Start(obj))?;
530
531    w.write_event(Event::Start(BytesStart::new(TAG_FBC_FLUX_OBJECTIVES)))?;
532    for r in &model.rxns {
533        if r.obj_coef != 0.0 {
534            let mut fo = BytesStart::new(TAG_FBC_FLUX_OBJECTIVE);
535            let rid = reaction_id(r.id.as_str());
536            fo.push_attribute((ATTR_FBC_REACTION, rid.as_str()));
537            let c = fmt_f64(r.obj_coef);
538            fo.push_attribute((ATTR_FBC_COEFFICIENT, c.as_str()));
539            w.write_event(Event::Empty(fo))?;
540        }
541    }
542    w.write_event(Event::End(BytesEnd::new(TAG_FBC_FLUX_OBJECTIVES)))?;
543
544    w.write_event(Event::End(BytesEnd::new(TAG_FBC_OBJECTIVE)))?;
545    w.write_event(Event::End(BytesEnd::new(TAG_FBC_OBJECTIVES)))?;
546    Ok(())
547}
548
549fn write_gene_products<W: Write>(
550    w: &mut Writer<&mut W>,
551    model: &Model,
552) -> Result<(), SbmlError> {
553    // Collect unique genes from reaction GPRs and from model.genes.
554    let mut seen: std::collections::BTreeSet<String> = Default::default();
555    for g in &model.genes {
556        seen.insert(g.as_str().to_string());
557    }
558    for r in &model.rxns {
559        if let Some(raw) = &r.gpr_raw {
560            if let Ok(gpr) = raw.parse::<Gpr>() {
561                let mut acc = Vec::new();
562                gpr.collect_genes(&mut acc);
563                for g in acc {
564                    seen.insert(g.as_str().to_string());
565                }
566            }
567        }
568    }
569    if seen.is_empty() {
570        return Ok(());
571    }
572    w.write_event(Event::Start(BytesStart::new(TAG_FBC_GENEPRODUCTS)))?;
573    for g in seen {
574        let mut el = BytesStart::new(TAG_FBC_GENEPRODUCT);
575        let gid = gene_id(&g);
576        el.push_attribute((ATTR_FBC_ID, gid.as_str()));
577        el.push_attribute((ATTR_FBC_LABEL, g.as_str()));
578        w.write_event(Event::Empty(el))?;
579    }
580    w.write_event(Event::End(BytesEnd::new(TAG_FBC_GENEPRODUCTS)))?;
581    Ok(())
582}
583
584fn write_groups<W: Write>(w: &mut Writer<&mut W>, model: &Model) -> Result<(), SbmlError> {
585    let mut by_subsys: BTreeMap<&str, Vec<usize>> = Default::default();
586    for (i, r) in model.rxns.iter().enumerate() {
587        if let Some(ss) = &r.subsystem {
588            if !ss.is_empty() {
589                by_subsys.entry(ss.as_str()).or_default().push(i);
590            }
591        }
592    }
593    if by_subsys.is_empty() {
594        return Ok(());
595    }
596    w.write_event(Event::Start(BytesStart::new(TAG_GROUPS_GROUPS)))?;
597    for (idx, (subsys, rxn_idxs)) in by_subsys.iter().enumerate() {
598        let gid = format!("g_{}", idx + 1);
599        let mut gel = BytesStart::new(TAG_GROUPS_GROUP);
600        gel.push_attribute((ATTR_GROUPS_ID, gid.as_str()));
601        gel.push_attribute((ATTR_GROUPS_NAME, *subsys));
602        gel.push_attribute((ATTR_GROUPS_KIND, "partonomy"));
603        gel.push_attribute(("sboTerm", "SBO:0000633"));
604        w.write_event(Event::Start(gel))?;
605
606        w.write_event(Event::Start(BytesStart::new(TAG_GROUPS_MEMBERS)))?;
607        for &ri in rxn_idxs {
608            let rid = reaction_id(model.rxns[ri].id.as_str());
609            let mut m = BytesStart::new(TAG_GROUPS_MEMBER);
610            m.push_attribute((ATTR_GROUPS_IDREF, rid.as_str()));
611            w.write_event(Event::Empty(m))?;
612        }
613        w.write_event(Event::End(BytesEnd::new(TAG_GROUPS_MEMBERS)))?;
614
615        w.write_event(Event::End(BytesEnd::new(TAG_GROUPS_GROUP)))?;
616    }
617    w.write_event(Event::End(BytesEnd::new(TAG_GROUPS_GROUPS)))?;
618    Ok(())
619}
620
621// -- Helpers --
622
623/// Canonicalize a metabolite id + compartment into a COBRApy-compatible
624/// `M_<cpd>_<comp>` species id. Idempotent in three ways:
625///
626/// - Keeps an already-`M_`-prefixed id verbatim.
627/// - Does not append `_<comp>` when the cpd id already ends with it
628///   (gapseq's draft stores metabolites as `cpd00001_c0`, so the
629///   compartment is baked into the id itself).
630/// - Trims trailing separator if the cpd ends with `_`.
631fn species_id(cpd: &str, comp: &str) -> String {
632    if cpd.starts_with("M_") {
633        return cpd.to_string();
634    }
635    let suffix = format!("_{comp}");
636    if cpd.ends_with(&suffix) {
637        format!("M_{cpd}")
638    } else {
639        format!("M_{cpd}{suffix}")
640    }
641}
642
643fn reaction_id(rxn: &str) -> String {
644    if rxn.starts_with("R_") {
645        rxn.to_string()
646    } else {
647        format!("R_{rxn}")
648    }
649}
650
651/// Sanitize a string into a valid SBML SId: `[A-Za-z_][A-Za-z0-9_]*`.
652/// Non-conforming characters (`-`, `.`, `:`, space, …) are replaced
653/// with `_`. Leading digits get an underscore prefix.
654fn sanitize_sid(s: &str) -> String {
655    if s.is_empty() {
656        return "_".to_string();
657    }
658    let mut out = String::with_capacity(s.len() + 1);
659    let mut chars = s.chars();
660    let first = chars.next().unwrap();
661    if first.is_ascii_alphabetic() || first == '_' {
662        out.push(first);
663    } else if first.is_ascii_digit() {
664        out.push('_');
665        out.push(first);
666    } else {
667        out.push('_');
668    }
669    for c in chars {
670        if c.is_ascii_alphanumeric() || c == '_' {
671            out.push(c);
672        } else {
673            out.push('_');
674        }
675    }
676    out
677}
678
679fn gene_id(gene: &str) -> String {
680    if gene.starts_with("G_") {
681        gene.to_string()
682    } else {
683        format!("G_{gene}")
684    }
685}
686
687/// Format an `f64` without a trailing `.0` if it's an integer value.
688/// Mirrors cobrar / COBRApy output style.
689fn fmt_f64(v: f64) -> String {
690    if v.fract() == 0.0 && v.is_finite() && v.abs() < 1e15 {
691        format!("{}", v as i64)
692    } else {
693        format!("{v}")
694    }
695}
696
697#[cfg(test)]
698mod tests {
699    use super::*;
700    use gapsmith_core::{CompartmentId, Metabolite, Reaction, StoichMatrix};
701
702    fn toy() -> Model {
703        let mut m = Model::new("toy_model");
704        m.annot.name = Some("Test model".into());
705        m.annot.gapsmith_version = Some("0.1.0".into());
706        m.mets.push(Metabolite::new("cpd00001", "H2O", CompartmentId::CYTOSOL));
707        m.mets.push(Metabolite::new("cpd00002", "ATP", CompartmentId::CYTOSOL));
708        m.mets.push({
709            let mut x = Metabolite::new("cpd00007", "O2", CompartmentId::EXTRACELLULAR);
710            x.formula = Some("O2".into());
711            x.charge = 0;
712            x
713        });
714        let mut r1 = Reaction::new("rxn00001", "ATPase", 0.0, 1000.0);
715        r1.obj_coef = 1.0;
716        r1.subsystem = Some("Central metabolism".into());
717        r1.gpr_raw = Some("(b0001 and b0002) or b0003".into());
718        m.rxns.push(r1);
719        let mut ex = Reaction::new("EX_cpd00007_e0", "O2 exchange", -1000.0, 1000.0);
720        ex.is_exchange = true;
721        m.rxns.push(ex);
722        m.s = StoichMatrix::from_triplets(
723            3,
724            2,
725            vec![(0, 0, 1.0), (1, 0, -1.0), (2, 1, -1.0)],
726        );
727        m
728    }
729
730    #[test]
731    fn toy_writes_valid_sbml() {
732        let m = toy();
733        let mut buf = Vec::new();
734        write_to(&m, &mut buf, &WriteOptions::default()).unwrap();
735        let s = String::from_utf8(buf).unwrap();
736        assert!(s.starts_with("<?xml"), "output: {s}");
737        assert!(s.contains("<sbml"));
738        assert!(s.contains("xmlns:fbc=\""));
739        assert!(s.contains("xmlns:groups=\""));
740        assert!(s.contains("<model id=\"toy_model\""));
741        assert!(s.contains("fbc:strict=\"true\""));
742        assert!(s.contains("<species id=\"M_cpd00001_c0\""));
743        assert!(s.contains("<reaction id=\"R_rxn00001\""));
744        assert!(s.contains("fbc:lowerFluxBound=\"cobra_0_bound\""));
745        assert!(s.contains("fbc:upperFluxBound=\"cobra_default_ub\""));
746        assert!(s.contains("<fbc:geneProductAssociation>"));
747        assert!(s.contains("<fbc:or>"));
748        assert!(s.contains("<fbc:and>"));
749        assert!(s.contains("G_b0001"));
750        assert!(s.contains("fbc:fluxObjective fbc:reaction=\"R_rxn00001\""));
751        assert!(s.contains("groups:group "));
752        assert!(s.contains("Central metabolism"));
753    }
754
755    #[test]
756    fn parse_back_with_quick_xml() {
757        use quick_xml::events::Event as E;
758        use quick_xml::reader::Reader;
759        let m = toy();
760        let mut buf = Vec::new();
761        write_to(&m, &mut buf, &WriteOptions::default()).unwrap();
762        let mut rdr = Reader::from_reader(std::io::Cursor::new(buf));
763        let mut stack: Vec<String> = Vec::new();
764        let mut species = 0usize;
765        let mut rxns = 0usize;
766        let mut compartments = 0usize;
767        let mut bufx = Vec::new();
768        loop {
769            match rdr.read_event_into(&mut bufx).unwrap() {
770                E::Start(e) => {
771                    let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
772                    stack.push(name);
773                }
774                E::Empty(e) => {
775                    let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
776                    if name == "species" {
777                        species += 1;
778                    } else if name == "compartment" {
779                        compartments += 1;
780                    }
781                }
782                E::End(_) => {
783                    if let Some(n) = stack.pop() {
784                        if n == "reaction" {
785                            rxns += 1;
786                        }
787                    }
788                }
789                E::Eof => break,
790                _ => {}
791            }
792            bufx.clear();
793        }
794        assert_eq!(species, 3);
795        assert_eq!(rxns, 2);
796        // Three compartments always emitted (c0, e0, p0).
797        assert_eq!(compartments, 3);
798    }
799}