Skip to main content

gapsmith_db/
tcdb.rs

1//! `dat/tcdb_substrates.tbl` loader.
2//!
3//! Format: two columns (no header), TAB-separated.
4//!
5//! ```text
6//! 2.A.1.28.4<TAB>CHEBI:5651;ferroheme b
7//! 1.A.11.4.1<TAB>CHEBI:7435;ammonium|CHEBI:7434;ammonia|...
8//! ```
9//!
10//! The substrate column is a `|`-separated list of `CHEBI:<id>;<name>` pairs.
11
12use crate::common::{csv_err, io_err, DbError};
13use serde::{Deserialize, Serialize};
14use std::path::Path;
15
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct TcdbSubstrateRow {
18    pub tc_id: String,
19    pub substrates: Vec<TcdbSubstrate>,
20}
21
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct TcdbSubstrate {
24    pub chebi: String,
25    pub name: String,
26}
27
28pub fn load_substrates(path: impl AsRef<Path>) -> Result<Vec<TcdbSubstrateRow>, DbError> {
29    let path = path.as_ref();
30    let f = std::fs::File::open(path).map_err(|e| io_err(path, e))?;
31    let mut rdr = csv::ReaderBuilder::new()
32        .delimiter(b'\t')
33        .has_headers(false)
34        .quoting(false)
35        .flexible(true)
36        .from_reader(f);
37
38    let mut out = Vec::new();
39    for rec in rdr.records() {
40        let rec = rec.map_err(|e| csv_err(path, e))?;
41        if rec.len() < 2 {
42            continue;
43        }
44        let tc_id = rec.get(0).unwrap_or("").to_string();
45        let raw = rec.get(1).unwrap_or("");
46        let subs: Vec<TcdbSubstrate> = raw
47            .split('|')
48            .filter_map(|pair| {
49                let (chebi, name) = pair.split_once(';')?;
50                Some(TcdbSubstrate { chebi: chebi.to_string(), name: name.to_string() })
51            })
52            .collect();
53        out.push(TcdbSubstrateRow { tc_id, substrates: subs });
54    }
55    tracing::info!(path = %path.display(), rows = out.len(), "loaded tcdb substrates");
56    Ok(out)
57}