Skip to main content

gapsmith_db/
exception.rs

1//! `dat/exception.tbl` loader — enzymes with known false-friends, requiring
2//! stricter identity cutoffs (see `src/analyse_alignments.R:108–143`).
3//!
4//! Columns: `enzyme/reaction, comment`. Lines starting with `#` are comments.
5
6use crate::common::{io_err, DbError};
7use serde::{Deserialize, Serialize};
8use std::io::BufRead;
9use std::path::Path;
10
11#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct ExceptionRow {
13    pub id: String,
14    #[serde(default, skip_serializing_if = "String::is_empty")]
15    pub comment: String,
16}
17
18pub fn load(path: impl AsRef<Path>) -> Result<Vec<ExceptionRow>, DbError> {
19    let path = path.as_ref();
20    let f = std::fs::File::open(path).map_err(|e| io_err(path, e))?;
21    let rdr = std::io::BufReader::new(f);
22    let mut out = Vec::new();
23    let mut saw_header = false;
24    for line in rdr.lines() {
25        let line = line.map_err(|e| io_err(path, e))?;
26        let trimmed = line.trim_start();
27        if trimmed.starts_with('#') || trimmed.is_empty() {
28            continue;
29        }
30        if !saw_header {
31            saw_header = true;
32            // First non-comment line is the header; skip.
33            continue;
34        }
35        let mut cols = line.splitn(2, '\t');
36        let id = cols.next().unwrap_or("").trim().to_string();
37        let comment = cols.next().unwrap_or("").trim().to_string();
38        if !id.is_empty() {
39            out.push(ExceptionRow { id, comment });
40        }
41    }
42    tracing::info!(path = %path.display(), rows = out.len(), "loaded exception table");
43    Ok(out)
44}
45
46#[cfg(test)]
47mod tests {
48    use super::*;
49    use std::io::Write;
50
51    #[test]
52    fn skips_comment_and_header() {
53        let d = tempfile::tempdir().unwrap();
54        let p = d.path().join("e.tbl");
55        let mut f = std::fs::File::create(&p).unwrap();
56        writeln!(f, "# file with enzymes having false friends").unwrap();
57        writeln!(f, "enzyme/reaction\tcomment").unwrap();
58        writeln!(f, "7.1.1.9\tcytochrome-c oxidase").unwrap();
59        writeln!(f, "1.11.1.6\tcatalase").unwrap();
60        let r = load(&p).unwrap();
61        assert_eq!(r.len(), 2);
62        assert_eq!(r[0].id, "7.1.1.9");
63        assert_eq!(r[1].comment, "catalase");
64    }
65}