1use md5;
31use std::path::{Path, PathBuf};
32
33#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
34pub enum SeqSrc {
35 Reviewed,
36 #[default]
37 PreferRev,
38 Both,
39 Unreviewed,
40}
41
42#[derive(Debug, Clone)]
43pub struct SeqfileOptions {
44 pub tax_root: PathBuf,
47 pub user_root: PathBuf,
52 pub seq_src: SeqSrc,
53}
54
55#[derive(Debug, Clone)]
56pub struct ResolvedSeq {
57 pub path: PathBuf,
59 pub label: String,
63}
64
65pub fn resolve_for_reaction(
76 opts: &SeqfileOptions,
77 rxn_id: &str,
78 ec: &str,
79 name: &str,
80) -> Vec<ResolvedSeq> {
81 let mut out: Vec<ResolvedSeq> = Vec::new();
82
83 for one_ec in ec.split('/').map(str::trim).filter(|s| !s.is_empty()) {
85 if let Some(r) = probe(&opts.user_root, "user", &format!("{one_ec}.fasta")) {
86 out.push(r);
87 }
88 }
89 let name_trim = name.trim();
91 if !name_trim.is_empty() && !looks_like_rxn_id(name_trim) {
92 let hash = md5_hex(name_trim);
93 if let Some(r) = probe(&opts.user_root, "user", &format!("{hash}.fasta")) {
94 out.push(r);
95 }
96 }
97 if !rxn_id.is_empty() {
99 if let Some(r) = probe(&opts.user_root, "user", &format!("{rxn_id}.fasta")) {
100 out.push(r);
101 }
102 }
103 if !out.is_empty() {
104 return out;
105 }
106
107 if let Some(r) = probe(&opts.tax_root, "rxn", &format!("{rxn_id}.fasta")) {
109 return vec![r];
110 }
111
112 let ec_trim = ec.trim();
114 if !ec_trim.is_empty() {
115 let mut ec_paths = Vec::new();
116 for one_ec in ec_trim.split('/').map(str::trim).filter(|s| !s.is_empty()) {
117 ec_paths.extend(resolve_by_stem(opts, one_ec));
118 }
119 if !ec_paths.is_empty() {
120 return ec_paths;
121 }
122 }
123
124 if !name_trim.is_empty() && !looks_like_rxn_id(name_trim) {
126 let hash = md5_hex(name_trim);
127 let hits = resolve_by_stem(opts, &hash);
128 if !hits.is_empty() {
129 return hits;
130 }
131 }
132
133 Vec::new()
134}
135
136pub fn looks_like_rxn_id(name: &str) -> bool {
141 if let Some(rest) = name.strip_prefix("rxn") {
142 return !rest.is_empty() && rest.bytes().all(|b| b.is_ascii_digit());
143 }
144 if let Some(rest) = name.strip_prefix("Rxn") {
145 return !rest.is_empty() && rest.bytes().all(|b| b.is_ascii_digit());
146 }
147 name.starts_with("RXN")
148}
149
150pub fn md5_hex(s: &str) -> String {
153 use md5::{Digest, Md5};
154 let mut h = Md5::new();
155 h.update(s.as_bytes());
156 let out = h.finalize();
157 let mut hex = String::with_capacity(32);
158 for b in out.iter() {
159 hex.push_str(&format!("{b:02x}"));
160 }
161 hex
162}
163
164fn resolve_by_stem(opts: &SeqfileOptions, ec: &str) -> Vec<ResolvedSeq> {
165 let f = format!("{ec}.fasta");
166 let rev = probe(&opts.tax_root, "rev", &f);
167 let unrev = probe(&opts.tax_root, "unrev", &f);
168 match opts.seq_src {
169 SeqSrc::Reviewed => rev.into_iter().collect(),
170 SeqSrc::PreferRev => rev.or(unrev).into_iter().collect(),
171 SeqSrc::Both => vec![rev, unrev].into_iter().flatten().collect(),
172 SeqSrc::Unreviewed => unrev.into_iter().collect(),
173 }
174}
175
176fn probe(tax_root: &Path, dir: &str, filename: &str) -> Option<ResolvedSeq> {
177 let p = tax_root.join(dir).join(filename);
178 if !p.is_file() {
179 return None;
180 }
181 let meta = std::fs::metadata(&p).ok()?;
182 if meta.len() == 0 {
183 return None;
184 }
185 Some(ResolvedSeq { path: p, label: format!("{dir}/{filename}") })
186}
187
188#[cfg(test)]
189mod tests {
190 use super::*;
191 use std::fs;
192
193 #[test]
194 fn prefers_rxn_dir() {
195 let d = tempfile::tempdir().unwrap();
196 let root = d.path();
197 for sub in ["rxn", "rev", "unrev", "user"] {
198 fs::create_dir_all(root.join(sub)).unwrap();
199 }
200 fs::write(root.join("rxn/RXN-1.fasta"), ">a\nA\n").unwrap();
201 fs::write(root.join("rev/1.1.1.1.fasta"), ">b\nB\n").unwrap();
202
203 let opts = SeqfileOptions {
204 tax_root: root.into(),
205 user_root: root.into(),
206 seq_src: SeqSrc::PreferRev,
207 };
208 let r = resolve_for_reaction(&opts, "RXN-1", "1.1.1.1", "");
209 assert_eq!(r.len(), 1);
210 assert_eq!(r[0].label, "rxn/RXN-1.fasta");
211 }
212
213 #[test]
214 fn falls_back_to_reviewed_ec() {
215 let d = tempfile::tempdir().unwrap();
216 let root = d.path();
217 fs::create_dir_all(root.join("rev")).unwrap();
218 fs::write(root.join("rev/1.1.1.1.fasta"), ">b\nB\n").unwrap();
219 let opts = SeqfileOptions {
220 tax_root: root.into(),
221 user_root: root.into(),
222 seq_src: SeqSrc::PreferRev,
223 };
224 let r = resolve_for_reaction(&opts, "RXN-missing", "1.1.1.1", "");
225 assert_eq!(r.len(), 1);
226 assert_eq!(r[0].label, "rev/1.1.1.1.fasta");
227 }
228
229 #[test]
230 fn both_mode_returns_rev_and_unrev() {
231 let d = tempfile::tempdir().unwrap();
232 let root = d.path();
233 for sub in ["rev", "unrev"] {
234 fs::create_dir_all(root.join(sub)).unwrap();
235 }
236 fs::write(root.join("rev/1.1.1.1.fasta"), ">r\nR\n").unwrap();
237 fs::write(root.join("unrev/1.1.1.1.fasta"), ">u\nU\n").unwrap();
238 let opts = SeqfileOptions {
239 tax_root: root.into(),
240 user_root: root.into(),
241 seq_src: SeqSrc::Both,
242 };
243 let r = resolve_for_reaction(&opts, "RXN-missing", "1.1.1.1", "");
244 assert_eq!(r.len(), 2);
245 }
246
247 #[test]
248 fn user_override_wins() {
249 let d = tempfile::tempdir().unwrap();
250 let root = d.path();
251 for sub in ["rxn", "rev", "user"] {
252 fs::create_dir_all(root.join(sub)).unwrap();
253 }
254 fs::write(root.join("rxn/RXN-1.fasta"), ">a\n").unwrap();
255 fs::write(root.join("rev/1.1.1.1.fasta"), ">b\n").unwrap();
256 fs::write(root.join("user/RXN-1.fasta"), ">c\nC\n").unwrap();
257 let opts = SeqfileOptions {
258 tax_root: root.into(),
259 user_root: root.into(),
260 seq_src: SeqSrc::PreferRev,
261 };
262 let r = resolve_for_reaction(&opts, "RXN-1", "1.1.1.1", "");
263 assert_eq!(r[0].label, "user/RXN-1.fasta");
264 }
265
266 #[test]
267 fn md5_hex_matches_gnu_md5sum() {
268 assert_eq!(md5_hex("Acetaldehyde dehydrogenase"), "1390704749ddc17f2b61599cf204ac4a");
270 assert_eq!(md5_hex(""), "d41d8cd98f00b204e9800998ecf8427e");
272 }
273
274 #[test]
275 fn looks_like_rxn_id_filter() {
276 assert!(looks_like_rxn_id("rxn00001"));
277 assert!(looks_like_rxn_id("Rxn00001"));
278 assert!(looks_like_rxn_id("RXN-8099"));
279 assert!(!looks_like_rxn_id("Acetaldehyde dehydrogenase"));
280 assert!(!looks_like_rxn_id("ATP synthase"));
281 assert!(!looks_like_rxn_id(""));
282 }
283
284 #[test]
285 fn reaname_md5_fallback() {
286 let d = tempfile::tempdir().unwrap();
287 let root = d.path();
288 fs::create_dir_all(root.join("rev")).unwrap();
289 let hash = md5_hex("Acetaldehyde dehydrogenase");
290 fs::write(root.join(format!("rev/{hash}.fasta")), ">x\nX\n").unwrap();
291 let opts = SeqfileOptions {
292 tax_root: root.into(),
293 user_root: root.into(),
294 seq_src: SeqSrc::PreferRev,
295 };
296 let r = resolve_for_reaction(&opts, "RXN-missing", "", "Acetaldehyde dehydrogenase");
297 assert_eq!(r.len(), 1);
298 assert_eq!(r[0].label, format!("rev/{hash}.fasta"));
299 }
300
301 #[test]
302 fn reaname_fallback_skips_rxn_id_names() {
303 let d = tempfile::tempdir().unwrap();
304 let root = d.path();
305 fs::create_dir_all(root.join("rev")).unwrap();
306 let hash = md5_hex("rxn00001");
307 fs::write(root.join(format!("rev/{hash}.fasta")), ">x\nX\n").unwrap();
308 let opts = SeqfileOptions {
309 tax_root: root.into(),
310 user_root: root.into(),
311 seq_src: SeqSrc::PreferRev,
312 };
313 let r = resolve_for_reaction(&opts, "", "", "rxn00001");
314 assert!(r.is_empty());
317 }
318
319 #[test]
320 fn empty_file_treated_as_missing() {
321 let d = tempfile::tempdir().unwrap();
322 let root = d.path();
323 fs::create_dir_all(root.join("rxn")).unwrap();
324 fs::write(root.join("rxn/RXN-1.fasta"), "").unwrap();
325 let opts = SeqfileOptions {
326 tax_root: root.into(),
327 user_root: root.into(),
328 seq_src: SeqSrc::PreferRev,
329 };
330 let r = resolve_for_reaction(&opts, "RXN-1", "", "");
331 assert!(r.is_empty());
332 }
333}