Skip to main content

gapsmith_db/
lib.rs

1//! Loaders for gapseq's reference-data tables in `dat/`.
2//!
3//! Every loader is self-contained: pass in a path (or the `DataRoot` for the
4//! batch loader), get back a plain Rust value. No implicit paths, no globals.
5//!
6//! The parsers are tolerant of the exact minor variations seen in real
7//! gapseq `dat/` files (some tables have 13 columns, others 14; MNXref
8//! prefixes blocks of `#`-comments). Row validation errors carry line
9//! numbers so the culprit can be found quickly.
10
11pub mod biomass;
12pub mod common;
13pub mod complex;
14pub mod exception;
15pub mod medium_rules;
16pub mod mnxref;
17pub mod pathway;
18pub mod seed;
19pub mod stoich_hash;
20pub mod stoich_parse;
21pub mod subex;
22pub mod tcdb;
23
24pub use biomass::{BiomassTemplate, BiomassComponent, BiomassGroup, BiomassError};
25pub use common::DbError;
26pub use complex::{ComplexSubunitEntry, ComplexSubunitTable};
27pub use exception::ExceptionRow;
28pub use medium_rules::MediumRule;
29pub use mnxref::{MnxrefSeed, MnxrefSeedOther};
30pub use pathway::{PathwayRow, PathwayTable, PwySource};
31pub use seed::{SeedCpdRow, SeedRxnRow, load_seed_metabolites, load_seed_reactions};
32pub use stoich_hash::rxn_stoich_hash;
33pub use stoich_parse::{parse_stoichiometry, StoichTerm, StoichParseError};
34pub use subex::SubexRow;
35pub use tcdb::TcdbSubstrateRow;
36
37use std::path::{Path, PathBuf};
38
39/// The full set of reference tables gapseq consults. Fields are populated
40/// lazily as the relevant loaders are called; [`DataRoot::load`] loads
41/// everything at once.
42#[derive(Default)]
43pub struct DataRoot {
44    pub root: PathBuf,
45    pub seed_rxns: Vec<SeedRxnRow>,
46    pub seed_cpds: Vec<SeedCpdRow>,
47    pub mnxref_seed: Vec<MnxrefSeed>,
48    pub mnxref_seed_other: Vec<MnxrefSeedOther>,
49    pub meta_pwy: PathwayTable,
50    pub kegg_pwy: PathwayTable,
51    pub seed_pwy: PathwayTable,
52    pub custom_pwy: PathwayTable,
53    pub medium_rules: Vec<MediumRule>,
54    pub complex_subunit: ComplexSubunitTable,
55    pub subex: Vec<SubexRow>,
56    pub tcdb_substrates: Vec<TcdbSubstrateRow>,
57    pub exception: Vec<ExceptionRow>,
58    pub biomass_gram_pos: Option<BiomassTemplate>,
59    pub biomass_gram_neg: Option<BiomassTemplate>,
60    pub biomass_archaea: Option<BiomassTemplate>,
61}
62
63impl DataRoot {
64    /// Load every reference table from a single `dat/` root. Individual
65    /// missing files (e.g. if the user only has the core bundle) are logged
66    /// at `warn!` and left empty.
67    pub fn load(root: impl AsRef<Path>) -> Result<Self, DbError> {
68        let root = root.as_ref().to_path_buf();
69        let mut out = DataRoot { root: root.clone(), ..Default::default() };
70
71        out.seed_rxns = load_seed_reactions(root.join("seed_reactions_corrected.tsv"))?;
72        out.seed_cpds = load_seed_metabolites(root.join("seed_metabolites_edited.tsv"))?;
73        out.mnxref_seed = mnxref::load_mnxref_seed(root.join("mnxref_seed.tsv"))?;
74        out.mnxref_seed_other = mnxref::load_mnxref_seed_other(root.join("mnxref_seed-other.tsv"))?;
75        out.meta_pwy = PathwayTable::load(root.join("meta_pwy.tbl"), PwySource::MetaCyc)?;
76        out.kegg_pwy = PathwayTable::load(root.join("kegg_pwy.tbl"), PwySource::Kegg)?;
77        out.seed_pwy = PathwayTable::load(root.join("seed_pwy.tbl"), PwySource::Seed)?;
78        out.custom_pwy = PathwayTable::load(root.join("custom_pwy.tbl"), PwySource::Custom)?;
79        out.medium_rules = medium_rules::load(root.join("medium_prediction_rules.tsv"))?;
80        out.complex_subunit = ComplexSubunitTable::load(root.join("complex_subunit_dict.tsv"))?;
81        out.subex = subex::load(root.join("subex.tbl"))?;
82        out.tcdb_substrates = tcdb::load_substrates(root.join("tcdb_substrates.tbl"))?;
83        out.exception = exception::load(root.join("exception.tbl"))?;
84
85        let bm_dir = root.join("biomass");
86        out.biomass_gram_pos = BiomassTemplate::load_opt(bm_dir.join("biomass_Gram_pos.json"))?;
87        out.biomass_gram_neg = BiomassTemplate::load_opt(bm_dir.join("biomass_Gram_neg.json"))?;
88        out.biomass_archaea = BiomassTemplate::load_opt(bm_dir.join("biomass_archaea.json"))?;
89
90        Ok(out)
91    }
92}