gapsmith_db/
medium_rules.rs1use crate::common::{csv_err, io_err, DbError};
9use serde::{Deserialize, Serialize};
10use std::path::Path;
11
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct MediumRule {
14 pub nutrient: String,
15 pub cpd_id: String,
16 pub rule: String,
17 pub max_flux: Option<f64>,
19 pub proton_balance: String,
20 #[serde(default, skip_serializing_if = "String::is_empty")]
21 pub comment: String,
22 #[serde(default, skip_serializing_if = "String::is_empty")]
23 pub category: String,
24}
25
26pub fn load(path: impl AsRef<Path>) -> Result<Vec<MediumRule>, DbError> {
27 let path = path.as_ref();
28 let f = std::fs::File::open(path).map_err(|e| io_err(path, e))?;
29 let mut rdr = csv::ReaderBuilder::new()
30 .delimiter(b'\t')
31 .has_headers(true)
32 .quoting(false)
33 .flexible(true)
34 .from_reader(f);
35
36 let headers_raw = rdr.byte_headers().map_err(|e| csv_err(path, e))?.clone();
40 let headers: Vec<String> = headers_raw
41 .iter()
42 .map(|h| String::from_utf8_lossy(h).trim().to_string())
43 .collect();
44 let col = |name: &str| -> Option<usize> {
45 headers.iter().position(|h| h == name)
46 };
47 let c_nut = col("Nutrient").ok_or_else(|| DbError::Parse {
48 path: path.to_path_buf(),
49 line: 1,
50 msg: "missing `Nutrient` column".into(),
51 })?;
52 let c_cpd = col("cpd.id").ok_or_else(|| DbError::Parse {
53 path: path.to_path_buf(),
54 line: 1,
55 msg: "missing `cpd.id` column".into(),
56 })?;
57 let c_rule = col("rule").ok_or_else(|| DbError::Parse {
58 path: path.to_path_buf(),
59 line: 1,
60 msg: "missing `rule` column".into(),
61 })?;
62 let c_flux = col("maxFlux").ok_or_else(|| DbError::Parse {
63 path: path.to_path_buf(),
64 line: 1,
65 msg: "missing `maxFlux` column".into(),
66 })?;
67 let c_proton = col("proton.balance").unwrap_or(usize::MAX);
68 let c_comment = col("Comment").unwrap_or(usize::MAX);
69 let c_cat = col("Category").unwrap_or(usize::MAX);
70
71 let mut out = Vec::new();
72 for rec in rdr.byte_records() {
73 let rec = rec.map_err(|e| csv_err(path, e))?;
74 let get = |c: usize| -> String {
75 if c == usize::MAX {
76 String::new()
77 } else {
78 rec.get(c)
79 .map(|b| String::from_utf8_lossy(b).trim().to_string())
80 .unwrap_or_default()
81 }
82 };
83 let nutrient = get(c_nut);
84 let cpd_id = get(c_cpd);
85 let rule = get(c_rule);
86 if nutrient.is_empty() && cpd_id.is_empty() && rule.is_empty() {
88 continue;
89 }
90 let flux_raw = get(c_flux);
91 let max_flux = match flux_raw.as_str() {
92 "" | "NA" | "na" | "N/A" => None,
93 other => Some(other.parse::<f64>().map_err(|_| DbError::Parse {
94 path: path.to_path_buf(),
95 line: rec.position().map(|p| p.line()).unwrap_or(0),
96 msg: format!("maxFlux `{other}` is not a number"),
97 })?),
98 };
99 out.push(MediumRule {
100 nutrient,
101 cpd_id,
102 rule,
103 max_flux,
104 proton_balance: get(c_proton),
105 comment: get(c_comment),
106 category: get(c_cat),
107 });
108 }
109 tracing::info!(path = %path.display(), rows = out.len(), "loaded medium rules");
110 Ok(out)
111}
112
113#[cfg(test)]
114mod tests {
115 use super::*;
116 use std::io::Write;
117
118 #[test]
119 fn parses_rules() {
120 let d = tempfile::tempdir().unwrap();
121 let p = d.path().join("r.tsv");
122 let mut f = std::fs::File::create(&p).unwrap();
123 writeln!(
124 f,
125 "Nutrient\tcpd.id\trule\tmaxFlux\tproton.balance\tComment\tCategory"
126 )
127 .unwrap();
128 writeln!(f, "Water\tcpd00001\tTRUE\t100\tFALSE\tCore medium compound\tInorganics").unwrap();
129 writeln!(f, "O2\tcpd00007\tpwy1\tNA\tFALSE\tno O2\tInorganics").unwrap();
130 writeln!(f, "\t\t\t\t\t\t").unwrap(); writeln!(f, "Glc\tcpd00027\trxn1\t5\tTRUE\t\tSaccharides").unwrap();
132 let rows = load(&p).unwrap();
133 assert_eq!(rows.len(), 3);
134 assert_eq!(rows[0].cpd_id, "cpd00001");
135 assert_eq!(rows[0].max_flux, Some(100.0));
136 assert_eq!(rows[1].max_flux, None);
137 assert_eq!(rows[2].cpd_id, "cpd00027");
138 }
139}