use md5;
use std::path::{Path, PathBuf};
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub enum SeqSrc {
Reviewed,
#[default]
PreferRev,
Both,
Unreviewed,
}
#[derive(Debug, Clone)]
pub struct SeqfileOptions {
pub tax_root: PathBuf,
pub user_root: PathBuf,
pub seq_src: SeqSrc,
}
#[derive(Debug, Clone)]
pub struct ResolvedSeq {
pub path: PathBuf,
pub label: String,
}
pub fn resolve_for_reaction(
opts: &SeqfileOptions,
rxn_id: &str,
ec: &str,
name: &str,
) -> Vec<ResolvedSeq> {
let mut out: Vec<ResolvedSeq> = Vec::new();
for one_ec in ec.split('/').map(str::trim).filter(|s| !s.is_empty()) {
if let Some(r) = probe(&opts.user_root, "user", &format!("{one_ec}.fasta")) {
out.push(r);
}
}
let name_trim = name.trim();
if !name_trim.is_empty() && !looks_like_rxn_id(name_trim) {
let hash = md5_hex(name_trim);
if let Some(r) = probe(&opts.user_root, "user", &format!("{hash}.fasta")) {
out.push(r);
}
}
if !rxn_id.is_empty() {
if let Some(r) = probe(&opts.user_root, "user", &format!("{rxn_id}.fasta")) {
out.push(r);
}
}
if !out.is_empty() {
return out;
}
if let Some(r) = probe(&opts.tax_root, "rxn", &format!("{rxn_id}.fasta")) {
return vec![r];
}
let ec_trim = ec.trim();
if !ec_trim.is_empty() {
let mut ec_paths = Vec::new();
for one_ec in ec_trim.split('/').map(str::trim).filter(|s| !s.is_empty()) {
ec_paths.extend(resolve_by_stem(opts, one_ec));
}
if !ec_paths.is_empty() {
return ec_paths;
}
}
if !name_trim.is_empty() && !looks_like_rxn_id(name_trim) {
let hash = md5_hex(name_trim);
let hits = resolve_by_stem(opts, &hash);
if !hits.is_empty() {
return hits;
}
}
Vec::new()
}
pub fn looks_like_rxn_id(name: &str) -> bool {
if let Some(rest) = name.strip_prefix("rxn") {
return !rest.is_empty() && rest.bytes().all(|b| b.is_ascii_digit());
}
if let Some(rest) = name.strip_prefix("Rxn") {
return !rest.is_empty() && rest.bytes().all(|b| b.is_ascii_digit());
}
name.starts_with("RXN")
}
pub fn md5_hex(s: &str) -> String {
use md5::{Digest, Md5};
let mut h = Md5::new();
h.update(s.as_bytes());
let out = h.finalize();
let mut hex = String::with_capacity(32);
for b in out.iter() {
hex.push_str(&format!("{b:02x}"));
}
hex
}
fn resolve_by_stem(opts: &SeqfileOptions, ec: &str) -> Vec<ResolvedSeq> {
let f = format!("{ec}.fasta");
let rev = probe(&opts.tax_root, "rev", &f);
let unrev = probe(&opts.tax_root, "unrev", &f);
match opts.seq_src {
SeqSrc::Reviewed => rev.into_iter().collect(),
SeqSrc::PreferRev => rev.or(unrev).into_iter().collect(),
SeqSrc::Both => vec![rev, unrev].into_iter().flatten().collect(),
SeqSrc::Unreviewed => unrev.into_iter().collect(),
}
}
fn probe(tax_root: &Path, dir: &str, filename: &str) -> Option<ResolvedSeq> {
let p = tax_root.join(dir).join(filename);
if !p.is_file() {
return None;
}
let meta = std::fs::metadata(&p).ok()?;
if meta.len() == 0 {
return None;
}
Some(ResolvedSeq { path: p, label: format!("{dir}/{filename}") })
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
#[test]
fn prefers_rxn_dir() {
let d = tempfile::tempdir().unwrap();
let root = d.path();
for sub in ["rxn", "rev", "unrev", "user"] {
fs::create_dir_all(root.join(sub)).unwrap();
}
fs::write(root.join("rxn/RXN-1.fasta"), ">a\nA\n").unwrap();
fs::write(root.join("rev/1.1.1.1.fasta"), ">b\nB\n").unwrap();
let opts = SeqfileOptions {
tax_root: root.into(),
user_root: root.into(),
seq_src: SeqSrc::PreferRev,
};
let r = resolve_for_reaction(&opts, "RXN-1", "1.1.1.1", "");
assert_eq!(r.len(), 1);
assert_eq!(r[0].label, "rxn/RXN-1.fasta");
}
#[test]
fn falls_back_to_reviewed_ec() {
let d = tempfile::tempdir().unwrap();
let root = d.path();
fs::create_dir_all(root.join("rev")).unwrap();
fs::write(root.join("rev/1.1.1.1.fasta"), ">b\nB\n").unwrap();
let opts = SeqfileOptions {
tax_root: root.into(),
user_root: root.into(),
seq_src: SeqSrc::PreferRev,
};
let r = resolve_for_reaction(&opts, "RXN-missing", "1.1.1.1", "");
assert_eq!(r.len(), 1);
assert_eq!(r[0].label, "rev/1.1.1.1.fasta");
}
#[test]
fn both_mode_returns_rev_and_unrev() {
let d = tempfile::tempdir().unwrap();
let root = d.path();
for sub in ["rev", "unrev"] {
fs::create_dir_all(root.join(sub)).unwrap();
}
fs::write(root.join("rev/1.1.1.1.fasta"), ">r\nR\n").unwrap();
fs::write(root.join("unrev/1.1.1.1.fasta"), ">u\nU\n").unwrap();
let opts = SeqfileOptions {
tax_root: root.into(),
user_root: root.into(),
seq_src: SeqSrc::Both,
};
let r = resolve_for_reaction(&opts, "RXN-missing", "1.1.1.1", "");
assert_eq!(r.len(), 2);
}
#[test]
fn user_override_wins() {
let d = tempfile::tempdir().unwrap();
let root = d.path();
for sub in ["rxn", "rev", "user"] {
fs::create_dir_all(root.join(sub)).unwrap();
}
fs::write(root.join("rxn/RXN-1.fasta"), ">a\n").unwrap();
fs::write(root.join("rev/1.1.1.1.fasta"), ">b\n").unwrap();
fs::write(root.join("user/RXN-1.fasta"), ">c\nC\n").unwrap();
let opts = SeqfileOptions {
tax_root: root.into(),
user_root: root.into(),
seq_src: SeqSrc::PreferRev,
};
let r = resolve_for_reaction(&opts, "RXN-1", "1.1.1.1", "");
assert_eq!(r[0].label, "user/RXN-1.fasta");
}
#[test]
fn md5_hex_matches_gnu_md5sum() {
assert_eq!(md5_hex("Acetaldehyde dehydrogenase"), "1390704749ddc17f2b61599cf204ac4a");
assert_eq!(md5_hex(""), "d41d8cd98f00b204e9800998ecf8427e");
}
#[test]
fn looks_like_rxn_id_filter() {
assert!(looks_like_rxn_id("rxn00001"));
assert!(looks_like_rxn_id("Rxn00001"));
assert!(looks_like_rxn_id("RXN-8099"));
assert!(!looks_like_rxn_id("Acetaldehyde dehydrogenase"));
assert!(!looks_like_rxn_id("ATP synthase"));
assert!(!looks_like_rxn_id(""));
}
#[test]
fn reaname_md5_fallback() {
let d = tempfile::tempdir().unwrap();
let root = d.path();
fs::create_dir_all(root.join("rev")).unwrap();
let hash = md5_hex("Acetaldehyde dehydrogenase");
fs::write(root.join(format!("rev/{hash}.fasta")), ">x\nX\n").unwrap();
let opts = SeqfileOptions {
tax_root: root.into(),
user_root: root.into(),
seq_src: SeqSrc::PreferRev,
};
let r = resolve_for_reaction(&opts, "RXN-missing", "", "Acetaldehyde dehydrogenase");
assert_eq!(r.len(), 1);
assert_eq!(r[0].label, format!("rev/{hash}.fasta"));
}
#[test]
fn reaname_fallback_skips_rxn_id_names() {
let d = tempfile::tempdir().unwrap();
let root = d.path();
fs::create_dir_all(root.join("rev")).unwrap();
let hash = md5_hex("rxn00001");
fs::write(root.join(format!("rev/{hash}.fasta")), ">x\nX\n").unwrap();
let opts = SeqfileOptions {
tax_root: root.into(),
user_root: root.into(),
seq_src: SeqSrc::PreferRev,
};
let r = resolve_for_reaction(&opts, "", "", "rxn00001");
assert!(r.is_empty());
}
#[test]
fn empty_file_treated_as_missing() {
let d = tempfile::tempdir().unwrap();
let root = d.path();
fs::create_dir_all(root.join("rxn")).unwrap();
fs::write(root.join("rxn/RXN-1.fasta"), "").unwrap();
let opts = SeqfileOptions {
tax_root: root.into(),
user_root: root.into(),
seq_src: SeqSrc::PreferRev,
};
let r = resolve_for_reaction(&opts, "RXN-1", "", "");
assert!(r.is_empty());
}
}