Skip to main content

bio_forge/io/
context.rs

1//! Residue-name normalization utilities shared across all structure readers and writers.
2//!
3//! This module exposes [`IoContext`], a registry-backed helper that maps thousands of PDB
4//! aliases to canonical residue codes and links those codes to [`StandardResidue`] values.
5//! File parsers call it to ensure consistent downstream handling of polymer types while
6//! exporters reuse it to apply user-provided aliases.
7
8use crate::model::types::StandardResidue;
9use std::collections::HashMap;
10
11/// Canonicalization state for residue names used during IO operations.
12///
13/// [`IoContext`] stores both the alias-to-canonical mapping as well as the
14/// canonical-to-[`StandardResidue`] lookup so that structure builders can translate raw
15/// residue labels into actionable metadata.
16#[derive(Debug, Clone)]
17pub struct IoContext {
18    /// Maps arbitrary residue labels to their canonical three-letter code.
19    alias_map: HashMap<String, String>,
20    /// Records which canonical names correspond to standard residues.
21    standard_map: HashMap<String, StandardResidue>,
22}
23
24impl IoContext {
25    /// Creates a context pre-populated with the built-in alias registry.
26    ///
27    /// The registry covers variants for protonation, modifications, and common force-field
28    /// naming conventions so that raw files parse into consistent structures.
29    ///
30    /// # Returns
31    ///
32    /// A context instance ready for use by IO helpers and downstream operations.
33    pub fn new_default() -> Self {
34        let mut alias_map = HashMap::new();
35        let mut standard_map = HashMap::new();
36
37        // Registers a canonical residue name and its `StandardResidue` annotation.
38        macro_rules! register_standard {
39            ($canonical:expr, $enum_val:expr) => {
40                alias_map.insert($canonical.to_string(), $canonical.to_string());
41                standard_map.insert($canonical.to_string(), $enum_val);
42            };
43        }
44
45        // Associates an alias with the canonical residue label without tagging it standard.
46        macro_rules! register_alias {
47            ($alias:expr, $canonical:expr) => {
48                alias_map.insert($alias.to_string(), $canonical.to_string());
49            };
50        }
51
52        register_standard!("ALA", StandardResidue::ALA);
53        register_standard!("ARG", StandardResidue::ARG);
54        register_standard!("ARN", StandardResidue::ARG);
55        register_standard!("ASN", StandardResidue::ASN);
56        register_standard!("ASP", StandardResidue::ASP);
57        register_standard!("ASH", StandardResidue::ASP);
58        register_standard!("CYS", StandardResidue::CYS);
59        register_standard!("CYM", StandardResidue::CYS);
60        register_standard!("CYX", StandardResidue::CYS);
61        register_standard!("GLN", StandardResidue::GLN);
62        register_standard!("GLU", StandardResidue::GLU);
63        register_standard!("GLH", StandardResidue::GLU);
64        register_standard!("GLY", StandardResidue::GLY);
65        register_standard!("HID", StandardResidue::HIS);
66        register_standard!("HIE", StandardResidue::HIS);
67        register_standard!("HIP", StandardResidue::HIS);
68        register_standard!("ILE", StandardResidue::ILE);
69        register_standard!("LEU", StandardResidue::LEU);
70        register_standard!("LYS", StandardResidue::LYS);
71        register_standard!("LYN", StandardResidue::LYS);
72        register_standard!("MET", StandardResidue::MET);
73        register_standard!("PHE", StandardResidue::PHE);
74        register_standard!("PRO", StandardResidue::PRO);
75        register_standard!("SER", StandardResidue::SER);
76        register_standard!("THR", StandardResidue::THR);
77        register_standard!("TRP", StandardResidue::TRP);
78        register_standard!("TYR", StandardResidue::TYR);
79        register_standard!("TYM", StandardResidue::TYR);
80        register_standard!("VAL", StandardResidue::VAL);
81
82        register_standard!("DA", StandardResidue::DA);
83        register_standard!("DC", StandardResidue::DC);
84        register_standard!("DG", StandardResidue::DG);
85        register_standard!("DT", StandardResidue::DT);
86        register_standard!("DI", StandardResidue::DI);
87
88        register_standard!("A", StandardResidue::A);
89        register_standard!("C", StandardResidue::C);
90        register_standard!("G", StandardResidue::G);
91        register_standard!("U", StandardResidue::U);
92        register_standard!("I", StandardResidue::I);
93
94        register_standard!("HOH", StandardResidue::HOH);
95
96        register_alias!("AIB", "ALA");
97        register_alias!("ALM", "ALA");
98        register_alias!("AYA", "ALA");
99        register_alias!("BNN", "ALA");
100        register_alias!("CHG", "ALA");
101        register_alias!("CSD", "ALA");
102        register_alias!("DAL", "ALA");
103        register_alias!("DHA", "ALA");
104        register_alias!("DNP", "ALA");
105        register_alias!("FLA", "ALA");
106        register_alias!("HAC", "ALA");
107        register_alias!("MAA", "ALA");
108        register_alias!("PRR", "ALA");
109        register_alias!("TIH", "ALA");
110        register_alias!("TPQ", "ALA");
111
112        register_alias!("ACL", "ARG");
113        register_alias!("AGM", "ARG");
114        register_alias!("ARM", "ARG");
115        register_alias!("DAR", "ARG");
116        register_alias!("HAR", "ARG");
117        register_alias!("HMR", "ARG");
118
119        register_alias!("AR0", "ARN");
120
121        register_alias!("MEN", "ASN");
122
123        register_alias!("2AS", "ASP");
124        register_alias!("ASA", "ASP");
125        register_alias!("ASB", "ASP");
126        register_alias!("ASK", "ASP");
127        register_alias!("ASL", "ASP");
128        register_alias!("ASQ", "ASP");
129        register_alias!("BHD", "ASP");
130        register_alias!("DAS", "ASP");
131        register_alias!("DSP", "ASP");
132        register_alias!("IAS", "ASP");
133
134        register_alias!("BCS", "CYS");
135        register_alias!("BUC", "CYS");
136        register_alias!("C5C", "CYS");
137        register_alias!("C6C", "CYS");
138        register_alias!("CAS", "CYS");
139        register_alias!("CCS", "CYS");
140        register_alias!("CEA", "CYS");
141        register_alias!("CME", "CYS");
142        register_alias!("CSO", "CYS");
143        register_alias!("CSP", "CYS");
144        register_alias!("CSS", "CYS");
145        register_alias!("CSW", "CYS");
146        register_alias!("CSX", "CYS");
147        register_alias!("CY1", "CYS");
148        register_alias!("CY3", "CYS");
149        register_alias!("CYG", "CYS");
150        register_alias!("CYQ", "CYS");
151        register_alias!("DCY", "CYS");
152        register_alias!("EFC", "CYS");
153        register_alias!("OCS", "CYS");
154        register_alias!("PEC", "CYS");
155        register_alias!("PR3", "CYS");
156        register_alias!("PYX", "CYS");
157        register_alias!("SCH", "CYS");
158        register_alias!("SCS", "CYS");
159        register_alias!("SCY", "CYS");
160        register_alias!("SHC", "CYS");
161        register_alias!("SMC", "CYS");
162        register_alias!("SOC", "CYS");
163
164        register_alias!("5HP", "GLU");
165        register_alias!("CGU", "GLU");
166        register_alias!("DGL", "GLU");
167        register_alias!("GGL", "GLU");
168        register_alias!("GMA", "GLU");
169        register_alias!("PCA", "GLU");
170
171        register_alias!("GLP", "GLH");
172
173        register_alias!("DGN", "GLN");
174
175        register_alias!("GL3", "GLY");
176        register_alias!("GLZ", "GLY");
177        register_alias!("GSC", "GLY");
178        register_alias!("MPQ", "GLY");
179        register_alias!("MSA", "GLY");
180        register_alias!("NMC", "GLY");
181        register_alias!("SAR", "GLY");
182
183        register_alias!("HSD", "HID");
184        register_alias!("HIS", "HID");
185        register_alias!("3AH", "HID");
186        register_alias!("DHI", "HID");
187        register_alias!("HIC", "HID");
188        register_alias!("MHS", "HID");
189        register_alias!("NEM", "HID");
190        register_alias!("NEP", "HID");
191
192        register_alias!("HSE", "HIE");
193
194        register_alias!("DIL", "ILE");
195        register_alias!("IIL", "ILE");
196
197        register_alias!("BUG", "LEU");
198        register_alias!("CLE", "LEU");
199        register_alias!("DLE", "LEU");
200        register_alias!("MK8", "LEU");
201        register_alias!("MLE", "LEU");
202        register_alias!("NLE", "LEU");
203        register_alias!("NLN", "LEU");
204        register_alias!("NLP", "LEU");
205
206        register_alias!("5OW", "LYS");
207        register_alias!("ALY", "LYS");
208        register_alias!("DLY", "LYS");
209        register_alias!("KCX", "LYS");
210        register_alias!("LLP", "LYS");
211        register_alias!("LLY", "LYS");
212        register_alias!("LYM", "LYS");
213        register_alias!("LYZ", "LYS");
214        register_alias!("SHR", "LYS");
215        register_alias!("TRG", "LYS");
216
217        register_alias!("CXM", "MET");
218        register_alias!("FME", "MET");
219        register_alias!("MSE", "MET");
220        register_alias!("OMT", "MET");
221
222        register_alias!("DAH", "PHE");
223        register_alias!("DPN", "PHE");
224        register_alias!("HPQ", "PHE");
225        register_alias!("PHI", "PHE");
226        register_alias!("PHL", "PHE");
227
228        register_alias!("DPR", "PRO");
229        register_alias!("HYP", "PRO");
230
231        register_alias!("DSN", "SER");
232        register_alias!("MIS", "SER");
233        register_alias!("OAS", "SER");
234        register_alias!("SAC", "SER");
235        register_alias!("SEL", "SER");
236        register_alias!("SEP", "SER");
237        register_alias!("SET", "SER");
238        register_alias!("SVA", "SER");
239
240        register_alias!("ALO", "THR");
241        register_alias!("BMT", "THR");
242        register_alias!("DTH", "THR");
243        register_alias!("TPO", "THR");
244
245        register_alias!("DTR", "TRP");
246        register_alias!("HTR", "TRP");
247        register_alias!("LTR", "TRP");
248        register_alias!("TPL", "TRP");
249        register_alias!("TRO", "TRP");
250
251        register_alias!("DTY", "TYR");
252        register_alias!("IYR", "TYR");
253        register_alias!("PAQ", "TYR");
254        register_alias!("PTR", "TYR");
255        register_alias!("STY", "TYR");
256        register_alias!("TYB", "TYR");
257        register_alias!("TYI", "TYR");
258        register_alias!("TYQ", "TYR");
259        register_alias!("TYS", "TYR");
260        register_alias!("TYY", "TYR");
261
262        register_alias!("APP", "ASH");
263
264        register_alias!("DIV", "VAL");
265        register_alias!("DVA", "VAL");
266        register_alias!("MVA", "VAL");
267
268        register_alias!("WAT", "HOH");
269        register_alias!("SOL", "HOH");
270        register_alias!("TIP", "HOH");
271        register_alias!("TIP3", "HOH");
272        register_alias!("TP3", "HOH");
273        register_alias!("SPC", "HOH");
274
275        let self_register_list = [
276            "ALA", "ARN", "ARG", "ASH", "ASN", "ASP", "CYM", "CYS", "CYX", "GLH", "GLN", "GLU",
277            "GLY", "HID", "HIE", "HIP", "ILE", "LEU", "LYN", "LYS", "MET", "PHE", "PRO", "SER",
278            "THR", "TRP", "TYM", "TYR", "VAL", "A", "C", "DA", "DC", "DG", "DI", "DT", "G", "I",
279            "U", "HOH",
280        ];
281
282        for name in self_register_list {
283            alias_map.insert(name.to_string(), name.to_string());
284        }
285
286        Self {
287            alias_map,
288            standard_map,
289        }
290    }
291
292    /// Resolves an input residue name to its canonical representative.
293    ///
294    /// When the name is unknown the original string is returned unchanged, preserving
295    /// user-provided labels for heterogens.
296    ///
297    /// # Arguments
298    ///
299    /// * `name` - Raw residue identifier from an input file.
300    ///
301    /// # Returns
302    ///
303    /// A borrowed string slice representing the canonical code, or the original input when
304    /// no alias mapping exists.
305    pub fn resolve_name<'a>(&'a self, name: &'a str) -> &'a str {
306        self.alias_map.get(name).map(|s| s.as_str()).unwrap_or(name)
307    }
308
309    /// Maps a canonical residue name to its [`StandardResidue`] entry when possible.
310    ///
311    /// Aliases must be resolved first; the lookup only matches canonical keys that were
312    /// registered via [`IoContext::new_default`].
313    ///
314    /// # Arguments
315    ///
316    /// * `name` - Canonical residue code returned by [`resolve_name`](Self::resolve_name).
317    ///
318    /// # Returns
319    ///
320    /// `Some(StandardResidue)` when the name matches a known polymer residue, otherwise
321    /// `None` for heterogens and custom ligands.
322    pub fn map_to_standard(&self, name: &str) -> Option<StandardResidue> {
323        self.standard_map.get(name).copied()
324    }
325
326    /// Adds or overrides an alias pointing to a canonical residue name.
327    ///
328    /// This is useful when callers want to supply additional naming conventions without
329    /// rebuilding the default table.
330    ///
331    /// # Arguments
332    ///
333    /// * `alias` - The alternative label to match within input files.
334    /// * `canonical` - The canonical residue code the alias should resolve to.
335    pub fn add_alias(&mut self, alias: impl Into<String>, canonical: impl Into<String>) {
336        self.alias_map.insert(alias.into(), canonical.into());
337    }
338
339    /// Classifies a residue by returning the canonical name plus optional standard metadata.
340    ///
341    /// This combines [`resolve_name`](Self::resolve_name) and
342    /// [`map_to_standard`](Self::map_to_standard) to provide a single lookup step for IO
343    /// pipelines.
344    ///
345    /// # Arguments
346    ///
347    /// * `raw_name` - The residue label read directly from an input structure file.
348    ///
349    /// # Returns
350    ///
351    /// A tuple containing the canonical residue name as an owned `String` and the optional
352    /// [`StandardResidue`] classification.
353    pub fn classify_residue(&self, raw_name: &str) -> (String, Option<StandardResidue>) {
354        let canonical = self.resolve_name(raw_name);
355        let standard = self.map_to_standard(canonical);
356        (canonical.to_string(), standard)
357    }
358}
359
360impl Default for IoContext {
361    /// Constructs the default IO context.
362    ///
363    /// # Returns
364    ///
365    /// A direct alias of [`IoContext::new_default`].
366    fn default() -> Self {
367        Self::new_default()
368    }
369}
370
371#[cfg(test)]
372mod tests {
373    use super::*;
374    use crate::model::types::StandardResidue;
375
376    #[test]
377    fn io_context_new_default_creates_context_with_mappings() {
378        let context = IoContext::new_default();
379
380        assert!(context.alias_map.contains_key("ALA"));
381        assert!(context.standard_map.contains_key("ALA"));
382        assert!(context.alias_map.contains_key("HOH"));
383        assert!(context.standard_map.contains_key("HOH"));
384
385        assert!(context.alias_map.contains_key("AIB"));
386        assert!(context.alias_map.contains_key("WAT"));
387    }
388
389    #[test]
390    fn io_context_default_creates_same_as_new_default() {
391        let context1 = IoContext::new_default();
392        let context2 = IoContext::default();
393
394        assert_eq!(context1.alias_map.len(), context2.alias_map.len());
395        assert_eq!(context1.standard_map.len(), context2.standard_map.len());
396
397        assert_eq!(context1.alias_map.get("ALA"), context2.alias_map.get("ALA"));
398        assert_eq!(
399            context1.standard_map.get("ALA"),
400            context2.standard_map.get("ALA")
401        );
402    }
403
404    #[test]
405    fn io_context_clone_creates_identical_copy() {
406        let context = IoContext::new_default();
407        let cloned = context.clone();
408
409        assert_eq!(context.alias_map, cloned.alias_map);
410        assert_eq!(context.standard_map, cloned.standard_map);
411    }
412
413    #[test]
414    fn io_context_debug_formats_correctly() {
415        let context = IoContext::new_default();
416        let debug_str = format!("{:?}", context);
417
418        assert!(debug_str.contains("IoContext"));
419        assert!(debug_str.contains("alias_map"));
420        assert!(debug_str.contains("standard_map"));
421    }
422
423    #[test]
424    fn resolve_name_returns_canonical_for_standard_residue() {
425        let context = IoContext::new_default();
426
427        assert_eq!(context.resolve_name("ALA"), "ALA");
428        assert_eq!(context.resolve_name("GLY"), "GLY");
429        assert_eq!(context.resolve_name("HOH"), "HOH");
430    }
431
432    #[test]
433    fn resolve_name_returns_canonical_for_alias() {
434        let context = IoContext::new_default();
435
436        assert_eq!(context.resolve_name("AIB"), "ALA");
437        assert_eq!(context.resolve_name("WAT"), "HOH");
438        assert_eq!(context.resolve_name("SOL"), "HOH");
439        assert_eq!(context.resolve_name("DAL"), "ALA");
440    }
441
442    #[test]
443    fn resolve_name_returns_original_for_unknown_name() {
444        let context = IoContext::new_default();
445
446        assert_eq!(context.resolve_name("UNKNOWN"), "UNKNOWN");
447        assert_eq!(context.resolve_name("XYZ123"), "XYZ123");
448    }
449
450    #[test]
451    fn map_to_standard_returns_correct_enum_for_standard_residues() {
452        let context = IoContext::new_default();
453
454        assert_eq!(context.map_to_standard("ALA"), Some(StandardResidue::ALA));
455        assert_eq!(context.map_to_standard("GLY"), Some(StandardResidue::GLY));
456        assert_eq!(context.map_to_standard("ARG"), Some(StandardResidue::ARG));
457        assert_eq!(context.map_to_standard("HOH"), Some(StandardResidue::HOH));
458        assert_eq!(context.map_to_standard("DA"), Some(StandardResidue::DA));
459        assert_eq!(context.map_to_standard("A"), Some(StandardResidue::A));
460    }
461
462    #[test]
463    fn map_to_standard_returns_none_for_aliases() {
464        let context = IoContext::new_default();
465
466        assert_eq!(context.map_to_standard("AIB"), None);
467        assert_eq!(context.map_to_standard("WAT"), None);
468        assert_eq!(context.map_to_standard("ARN"), Some(StandardResidue::ARG));
469    }
470
471    #[test]
472    fn map_to_standard_returns_none_for_unknown_names() {
473        let context = IoContext::new_default();
474
475        assert_eq!(context.map_to_standard("UNKNOWN"), None);
476        assert_eq!(context.map_to_standard("XYZ"), None);
477    }
478
479    #[test]
480    fn add_alias_adds_new_alias_mapping() {
481        let mut context = IoContext::new_default();
482
483        context.add_alias("TEST_ALIAS", "ALA");
484
485        assert_eq!(context.resolve_name("TEST_ALIAS"), "ALA");
486        assert_eq!(context.map_to_standard("TEST_ALIAS"), None);
487    }
488
489    #[test]
490    fn add_alias_overwrites_existing_alias() {
491        let mut context = IoContext::new_default();
492
493        assert_eq!(context.resolve_name("AIB"), "ALA");
494
495        context.add_alias("AIB", "GLY");
496
497        assert_eq!(context.resolve_name("AIB"), "GLY");
498    }
499
500    #[test]
501    fn add_alias_with_string_types() {
502        let mut context = IoContext::new_default();
503
504        context.add_alias("STR_ALIAS", "GLY");
505        assert_eq!(context.resolve_name("STR_ALIAS"), "GLY");
506
507        context.add_alias(String::from("OWNED_ALIAS"), String::from("ALA"));
508        assert_eq!(context.resolve_name("OWNED_ALIAS"), "ALA");
509    }
510
511    #[test]
512    fn context_handles_case_sensitivity() {
513        let context = IoContext::new_default();
514
515        assert_eq!(context.resolve_name("ala"), "ala");
516        assert_eq!(context.resolve_name("ALA"), "ALA");
517        assert_eq!(context.map_to_standard("ala"), None);
518        assert_eq!(context.map_to_standard("ALA"), Some(StandardResidue::ALA));
519    }
520
521    #[test]
522    fn classify_residue_returns_canonical_and_standard() {
523        let context = IoContext::new_default();
524
525        let (name, standard) = context.classify_residue("WAT");
526        assert_eq!(name, "HOH");
527        assert_eq!(standard, Some(StandardResidue::HOH));
528
529        let (name, standard) = context.classify_residue("ALA");
530        assert_eq!(name, "ALA");
531        assert_eq!(standard, Some(StandardResidue::ALA));
532    }
533
534    #[test]
535    fn classify_residue_handles_unknowns() {
536        let context = IoContext::new_default();
537
538        let (name, standard) = context.classify_residue("LIG");
539        assert_eq!(name, "LIG");
540        assert!(standard.is_none());
541    }
542}