Skip to main content

gapsmith_db/
complex.rs

1//! `dat/complex_subunit_dict.tsv` loader.
2//!
3//! Columns: `rxn, subunit_synonym, subunit`.
4//!
5//! Used during alignment post-processing to map subunit synonyms (case-insensitive,
6//! substring search) to canonical subunit names, so that a complex can be
7//! considered "present" when enough of its subunits have good blast hits
8//! (`src/complex_detection.R:10–37`).
9
10use crate::common::{csv_err, io_err, DbError};
11use serde::{Deserialize, Serialize};
12use std::collections::HashMap;
13use std::path::Path;
14
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct ComplexSubunitEntry {
17    pub rxn: String,
18    pub subunit_synonym: String,
19    pub subunit: String,
20}
21
22#[derive(Debug, Default, Serialize, Deserialize)]
23pub struct ComplexSubunitTable {
24    pub rows: Vec<ComplexSubunitEntry>,
25    /// Inverse lookup: `rxn → [(synonym, canonical)]` built at load time.
26    pub by_rxn: HashMap<String, Vec<(String, String)>>,
27}
28
29impl ComplexSubunitTable {
30    pub fn load(path: impl AsRef<Path>) -> Result<Self, DbError> {
31        let path = path.as_ref();
32        let f = std::fs::File::open(path).map_err(|e| io_err(path, e))?;
33        let mut rdr = csv::ReaderBuilder::new()
34            .delimiter(b'\t')
35            .has_headers(true)
36            .quoting(false)
37            .flexible(true)
38            .from_reader(f);
39        let mut rows = Vec::new();
40        for rec in rdr.deserialize::<ComplexSubunitEntry>() {
41            rows.push(rec.map_err(|e| csv_err(path, e))?);
42        }
43        let mut by_rxn: HashMap<String, Vec<(String, String)>> = HashMap::new();
44        for r in &rows {
45            by_rxn
46                .entry(r.rxn.clone())
47                .or_default()
48                .push((r.subunit_synonym.clone(), r.subunit.clone()));
49        }
50        tracing::info!(path = %path.display(), rows = rows.len(), "loaded complex subunit dict");
51        Ok(Self { rows, by_rxn })
52    }
53
54    pub fn for_rxn(&self, rxn: &str) -> Option<&[(String, String)]> {
55        self.by_rxn.get(rxn).map(|v| v.as_slice())
56    }
57
58    pub fn len(&self) -> usize {
59        self.rows.len()
60    }
61    pub fn is_empty(&self) -> bool {
62        self.rows.is_empty()
63    }
64}