bio_forge/io/
context.rs

1//! Residue-name normalization utilities shared across all structure readers and writers.
2//!
3//! This module exposes [`IoContext`], a registry-backed helper that maps thousands of PDB
4//! aliases to canonical residue codes and links those codes to [`StandardResidue`] values.
5//! File parsers call it to ensure consistent downstream handling of polymer types while
6//! exporters reuse it to apply user-provided aliases.
7
8use crate::model::types::StandardResidue;
9use std::collections::HashMap;
10
11/// Canonicalization state for residue names used during IO operations.
12///
13/// [`IoContext`] stores both the alias-to-canonical mapping as well as the
14/// canonical-to-[`StandardResidue`] lookup so that structure builders can translate raw
15/// residue labels into actionable metadata.
16#[derive(Debug, Clone)]
17pub struct IoContext {
18    /// Maps arbitrary residue labels to their canonical three-letter code.
19    alias_map: HashMap<String, String>,
20    /// Records which canonical names correspond to standard residues.
21    standard_map: HashMap<String, StandardResidue>,
22}
23
24impl IoContext {
25    /// Creates a context pre-populated with the built-in alias registry.
26    ///
27    /// The registry covers variants for protonation, modifications, and common force-field
28    /// naming conventions so that raw files parse into consistent structures.
29    ///
30    /// # Returns
31    ///
32    /// A context instance ready for use by IO helpers and downstream operations.
33    pub fn new_default() -> Self {
34        let mut alias_map = HashMap::new();
35        let mut standard_map = HashMap::new();
36
37        // Registers a canonical residue name and its `StandardResidue` annotation.
38        macro_rules! register_standard {
39            ($canonical:expr, $enum_val:expr) => {
40                alias_map.insert($canonical.to_string(), $canonical.to_string());
41                standard_map.insert($canonical.to_string(), $enum_val);
42            };
43        }
44
45        // Associates an alias with the canonical residue label without tagging it standard.
46        macro_rules! register_alias {
47            ($alias:expr, $canonical:expr) => {
48                alias_map.insert($alias.to_string(), $canonical.to_string());
49            };
50        }
51
52        register_standard!("ALA", StandardResidue::ALA);
53        register_standard!("ARG", StandardResidue::ARG);
54        register_standard!("ARN", StandardResidue::ARG);
55        register_standard!("ASN", StandardResidue::ASN);
56        register_standard!("ASP", StandardResidue::ASP);
57        register_standard!("ASH", StandardResidue::ASP);
58        register_standard!("CYS", StandardResidue::CYS);
59        register_standard!("CYM", StandardResidue::CYS);
60        register_standard!("CYX", StandardResidue::CYS);
61        register_standard!("GLN", StandardResidue::GLN);
62        register_standard!("GLU", StandardResidue::GLU);
63        register_standard!("GLH", StandardResidue::GLU);
64        register_standard!("GLY", StandardResidue::GLY);
65        register_standard!("HID", StandardResidue::HIS);
66        register_standard!("HIE", StandardResidue::HIS);
67        register_standard!("HIP", StandardResidue::HIS);
68        register_standard!("ILE", StandardResidue::ILE);
69        register_standard!("LEU", StandardResidue::LEU);
70        register_standard!("LYS", StandardResidue::LYS);
71        register_standard!("LYN", StandardResidue::LYS);
72        register_standard!("MET", StandardResidue::MET);
73        register_standard!("PHE", StandardResidue::PHE);
74        register_standard!("PRO", StandardResidue::PRO);
75        register_standard!("SER", StandardResidue::SER);
76        register_standard!("THR", StandardResidue::THR);
77        register_standard!("TRP", StandardResidue::TRP);
78        register_standard!("TYR", StandardResidue::TYR);
79        register_standard!("TYM", StandardResidue::TYR);
80        register_standard!("VAL", StandardResidue::VAL);
81
82        register_standard!("DA", StandardResidue::DA);
83        register_standard!("DC", StandardResidue::DC);
84        register_standard!("DG", StandardResidue::DG);
85        register_standard!("DT", StandardResidue::DT);
86        register_standard!("DI", StandardResidue::DI);
87
88        register_standard!("A", StandardResidue::A);
89        register_standard!("C", StandardResidue::C);
90        register_standard!("G", StandardResidue::G);
91        register_standard!("U", StandardResidue::U);
92        register_standard!("I", StandardResidue::I);
93
94        register_standard!("HOH", StandardResidue::HOH);
95
96        register_alias!("AIB", "ALA");
97        register_alias!("ALM", "ALA");
98        register_alias!("AYA", "ALA");
99        register_alias!("BNN", "ALA");
100        register_alias!("CHG", "ALA");
101        register_alias!("CSD", "ALA");
102        register_alias!("DAL", "ALA");
103        register_alias!("DHA", "ALA");
104        register_alias!("DNP", "ALA");
105        register_alias!("FLA", "ALA");
106        register_alias!("HAC", "ALA");
107        register_alias!("MAA", "ALA");
108        register_alias!("PRR", "ALA");
109        register_alias!("TIH", "ALA");
110        register_alias!("TPQ", "ALA");
111
112        register_alias!("ACL", "ARG");
113        register_alias!("AGM", "ARG");
114        register_alias!("ARM", "ARG");
115        register_alias!("DAR", "ARG");
116        register_alias!("HAR", "ARG");
117        register_alias!("HMR", "ARG");
118
119        register_alias!("AR0", "ARN");
120
121        register_alias!("MEN", "ASN");
122
123        register_alias!("2AS", "ASP");
124        register_alias!("ASA", "ASP");
125        register_alias!("ASB", "ASP");
126        register_alias!("ASK", "ASP");
127        register_alias!("ASL", "ASP");
128        register_alias!("ASQ", "ASP");
129        register_alias!("BHD", "ASP");
130        register_alias!("DAS", "ASP");
131        register_alias!("DSP", "ASP");
132        register_alias!("IAS", "ASP");
133
134        register_alias!("BCS", "CYS");
135        register_alias!("BUC", "CYS");
136        register_alias!("C5C", "CYS");
137        register_alias!("C6C", "CYS");
138        register_alias!("CAS", "CYS");
139        register_alias!("CCS", "CYS");
140        register_alias!("CEA", "CYS");
141        register_alias!("CME", "CYS");
142        register_alias!("CSO", "CYS");
143        register_alias!("CSP", "CYS");
144        register_alias!("CSS", "CYS");
145        register_alias!("CSW", "CYS");
146        register_alias!("CSX", "CYS");
147        register_alias!("CY1", "CYS");
148        register_alias!("CY3", "CYS");
149        register_alias!("CYG", "CYS");
150        register_alias!("CYQ", "CYS");
151        register_alias!("DCY", "CYS");
152        register_alias!("EFC", "CYS");
153        register_alias!("OCS", "CYS");
154        register_alias!("PEC", "CYS");
155        register_alias!("PR3", "CYS");
156        register_alias!("PYX", "CYS");
157        register_alias!("SCH", "CYS");
158        register_alias!("SCS", "CYS");
159        register_alias!("SCY", "CYS");
160        register_alias!("SHC", "CYS");
161        register_alias!("SMC", "CYS");
162        register_alias!("SOC", "CYS");
163
164        register_alias!("5HP", "GLU");
165        register_alias!("CGU", "GLU");
166        register_alias!("DGL", "GLU");
167        register_alias!("GGL", "GLU");
168        register_alias!("GMA", "GLU");
169        register_alias!("PCA", "GLU");
170
171        register_alias!("GLP", "GLH");
172
173        register_alias!("DGN", "GLN");
174
175        register_alias!("GL3", "GLY");
176        register_alias!("GLZ", "GLY");
177        register_alias!("GSC", "GLY");
178        register_alias!("MPQ", "GLY");
179        register_alias!("MSA", "GLY");
180        register_alias!("NMC", "GLY");
181        register_alias!("SAR", "GLY");
182
183        register_alias!("HIS", "HID");
184        register_alias!("3AH", "HID");
185        register_alias!("DHI", "HID");
186        register_alias!("HIC", "HID");
187        register_alias!("MHS", "HID");
188        register_alias!("NEM", "HID");
189        register_alias!("NEP", "HID");
190
191        register_alias!("DIL", "ILE");
192        register_alias!("IIL", "ILE");
193
194        register_alias!("BUG", "LEU");
195        register_alias!("CLE", "LEU");
196        register_alias!("DLE", "LEU");
197        register_alias!("MK8", "LEU");
198        register_alias!("MLE", "LEU");
199        register_alias!("NLE", "LEU");
200        register_alias!("NLN", "LEU");
201        register_alias!("NLP", "LEU");
202
203        register_alias!("5OW", "LYS");
204        register_alias!("ALY", "LYS");
205        register_alias!("DLY", "LYS");
206        register_alias!("KCX", "LYS");
207        register_alias!("LLP", "LYS");
208        register_alias!("LLY", "LYS");
209        register_alias!("LYM", "LYS");
210        register_alias!("LYZ", "LYS");
211        register_alias!("SHR", "LYS");
212        register_alias!("TRG", "LYS");
213
214        register_alias!("CXM", "MET");
215        register_alias!("FME", "MET");
216        register_alias!("MSE", "MET");
217        register_alias!("OMT", "MET");
218
219        register_alias!("DAH", "PHE");
220        register_alias!("DPN", "PHE");
221        register_alias!("HPQ", "PHE");
222        register_alias!("PHI", "PHE");
223        register_alias!("PHL", "PHE");
224
225        register_alias!("DPR", "PRO");
226        register_alias!("HYP", "PRO");
227
228        register_alias!("DSN", "SER");
229        register_alias!("MIS", "SER");
230        register_alias!("OAS", "SER");
231        register_alias!("SAC", "SER");
232        register_alias!("SEL", "SER");
233        register_alias!("SEP", "SER");
234        register_alias!("SET", "SER");
235        register_alias!("SVA", "SER");
236
237        register_alias!("ALO", "THR");
238        register_alias!("BMT", "THR");
239        register_alias!("DTH", "THR");
240        register_alias!("TPO", "THR");
241
242        register_alias!("DTR", "TRP");
243        register_alias!("HTR", "TRP");
244        register_alias!("LTR", "TRP");
245        register_alias!("TPL", "TRP");
246        register_alias!("TRO", "TRP");
247
248        register_alias!("DTY", "TYR");
249        register_alias!("IYR", "TYR");
250        register_alias!("PAQ", "TYR");
251        register_alias!("PTR", "TYR");
252        register_alias!("STY", "TYR");
253        register_alias!("TYB", "TYR");
254        register_alias!("TYI", "TYR");
255        register_alias!("TYQ", "TYR");
256        register_alias!("TYS", "TYR");
257        register_alias!("TYY", "TYR");
258
259        register_alias!("APP", "ASH");
260
261        register_alias!("DIV", "VAL");
262        register_alias!("DVA", "VAL");
263        register_alias!("MVA", "VAL");
264
265        register_alias!("WAT", "HOH");
266        register_alias!("SOL", "HOH");
267        register_alias!("TIP", "HOH");
268        register_alias!("TIP3", "HOH");
269        register_alias!("TP3", "HOH");
270        register_alias!("SPC", "HOH");
271
272        let self_register_list = [
273            "ALA", "ARN", "ARG", "ASH", "ASN", "ASP", "CYM", "CYS", "CYX", "GLH", "GLN", "GLU",
274            "GLY", "HID", "HIE", "HIP", "ILE", "LEU", "LYN", "LYS", "MET", "PHE", "PRO", "SER",
275            "THR", "TRP", "TYM", "TYR", "VAL", "A", "C", "DA", "DC", "DG", "DI", "DT", "G", "I",
276            "U", "HOH",
277        ];
278
279        for name in self_register_list {
280            alias_map.insert(name.to_string(), name.to_string());
281        }
282
283        Self {
284            alias_map,
285            standard_map,
286        }
287    }
288
289    /// Resolves an input residue name to its canonical representative.
290    ///
291    /// When the name is unknown the original string is returned unchanged, preserving
292    /// user-provided labels for heterogens.
293    ///
294    /// # Arguments
295    ///
296    /// * `name` - Raw residue identifier from an input file.
297    ///
298    /// # Returns
299    ///
300    /// A borrowed string slice representing the canonical code, or the original input when
301    /// no alias mapping exists.
302    pub fn resolve_name<'a>(&'a self, name: &'a str) -> &'a str {
303        self.alias_map.get(name).map(|s| s.as_str()).unwrap_or(name)
304    }
305
306    /// Maps a canonical residue name to its [`StandardResidue`] entry when possible.
307    ///
308    /// Aliases must be resolved first; the lookup only matches canonical keys that were
309    /// registered via [`IoContext::new_default`].
310    ///
311    /// # Arguments
312    ///
313    /// * `name` - Canonical residue code returned by [`resolve_name`](Self::resolve_name).
314    ///
315    /// # Returns
316    ///
317    /// `Some(StandardResidue)` when the name matches a known polymer residue, otherwise
318    /// `None` for heterogens and custom ligands.
319    pub fn map_to_standard(&self, name: &str) -> Option<StandardResidue> {
320        self.standard_map.get(name).copied()
321    }
322
323    /// Adds or overrides an alias pointing to a canonical residue name.
324    ///
325    /// This is useful when callers want to supply additional naming conventions without
326    /// rebuilding the default table.
327    ///
328    /// # Arguments
329    ///
330    /// * `alias` - The alternative label to match within input files.
331    /// * `canonical` - The canonical residue code the alias should resolve to.
332    pub fn add_alias(&mut self, alias: impl Into<String>, canonical: impl Into<String>) {
333        self.alias_map.insert(alias.into(), canonical.into());
334    }
335
336    /// Classifies a residue by returning the canonical name plus optional standard metadata.
337    ///
338    /// This combines [`resolve_name`](Self::resolve_name) and
339    /// [`map_to_standard`](Self::map_to_standard) to provide a single lookup step for IO
340    /// pipelines.
341    ///
342    /// # Arguments
343    ///
344    /// * `raw_name` - The residue label read directly from an input structure file.
345    ///
346    /// # Returns
347    ///
348    /// A tuple containing the canonical residue name as an owned `String` and the optional
349    /// [`StandardResidue`] classification.
350    pub fn classify_residue(&self, raw_name: &str) -> (String, Option<StandardResidue>) {
351        let canonical = self.resolve_name(raw_name);
352        let standard = self.map_to_standard(canonical);
353        (canonical.to_string(), standard)
354    }
355}
356
357impl Default for IoContext {
358    /// Constructs the default IO context.
359    ///
360    /// # Returns
361    ///
362    /// A direct alias of [`IoContext::new_default`].
363    fn default() -> Self {
364        Self::new_default()
365    }
366}
367
368#[cfg(test)]
369mod tests {
370    use super::*;
371    use crate::model::types::StandardResidue;
372
373    #[test]
374    fn io_context_new_default_creates_context_with_mappings() {
375        let context = IoContext::new_default();
376
377        assert!(context.alias_map.contains_key("ALA"));
378        assert!(context.standard_map.contains_key("ALA"));
379        assert!(context.alias_map.contains_key("HOH"));
380        assert!(context.standard_map.contains_key("HOH"));
381
382        assert!(context.alias_map.contains_key("AIB"));
383        assert!(context.alias_map.contains_key("WAT"));
384    }
385
386    #[test]
387    fn io_context_default_creates_same_as_new_default() {
388        let context1 = IoContext::new_default();
389        let context2 = IoContext::default();
390
391        assert_eq!(context1.alias_map.len(), context2.alias_map.len());
392        assert_eq!(context1.standard_map.len(), context2.standard_map.len());
393
394        assert_eq!(context1.alias_map.get("ALA"), context2.alias_map.get("ALA"));
395        assert_eq!(
396            context1.standard_map.get("ALA"),
397            context2.standard_map.get("ALA")
398        );
399    }
400
401    #[test]
402    fn io_context_clone_creates_identical_copy() {
403        let context = IoContext::new_default();
404        let cloned = context.clone();
405
406        assert_eq!(context.alias_map, cloned.alias_map);
407        assert_eq!(context.standard_map, cloned.standard_map);
408    }
409
410    #[test]
411    fn io_context_debug_formats_correctly() {
412        let context = IoContext::new_default();
413        let debug_str = format!("{:?}", context);
414
415        assert!(debug_str.contains("IoContext"));
416        assert!(debug_str.contains("alias_map"));
417        assert!(debug_str.contains("standard_map"));
418    }
419
420    #[test]
421    fn resolve_name_returns_canonical_for_standard_residue() {
422        let context = IoContext::new_default();
423
424        assert_eq!(context.resolve_name("ALA"), "ALA");
425        assert_eq!(context.resolve_name("GLY"), "GLY");
426        assert_eq!(context.resolve_name("HOH"), "HOH");
427    }
428
429    #[test]
430    fn resolve_name_returns_canonical_for_alias() {
431        let context = IoContext::new_default();
432
433        assert_eq!(context.resolve_name("AIB"), "ALA");
434        assert_eq!(context.resolve_name("WAT"), "HOH");
435        assert_eq!(context.resolve_name("SOL"), "HOH");
436        assert_eq!(context.resolve_name("DAL"), "ALA");
437    }
438
439    #[test]
440    fn resolve_name_returns_original_for_unknown_name() {
441        let context = IoContext::new_default();
442
443        assert_eq!(context.resolve_name("UNKNOWN"), "UNKNOWN");
444        assert_eq!(context.resolve_name("XYZ123"), "XYZ123");
445    }
446
447    #[test]
448    fn map_to_standard_returns_correct_enum_for_standard_residues() {
449        let context = IoContext::new_default();
450
451        assert_eq!(context.map_to_standard("ALA"), Some(StandardResidue::ALA));
452        assert_eq!(context.map_to_standard("GLY"), Some(StandardResidue::GLY));
453        assert_eq!(context.map_to_standard("ARG"), Some(StandardResidue::ARG));
454        assert_eq!(context.map_to_standard("HOH"), Some(StandardResidue::HOH));
455        assert_eq!(context.map_to_standard("DA"), Some(StandardResidue::DA));
456        assert_eq!(context.map_to_standard("A"), Some(StandardResidue::A));
457    }
458
459    #[test]
460    fn map_to_standard_returns_none_for_aliases() {
461        let context = IoContext::new_default();
462
463        assert_eq!(context.map_to_standard("AIB"), None);
464        assert_eq!(context.map_to_standard("WAT"), None);
465        assert_eq!(context.map_to_standard("ARN"), Some(StandardResidue::ARG));
466    }
467
468    #[test]
469    fn map_to_standard_returns_none_for_unknown_names() {
470        let context = IoContext::new_default();
471
472        assert_eq!(context.map_to_standard("UNKNOWN"), None);
473        assert_eq!(context.map_to_standard("XYZ"), None);
474    }
475
476    #[test]
477    fn add_alias_adds_new_alias_mapping() {
478        let mut context = IoContext::new_default();
479
480        context.add_alias("TEST_ALIAS", "ALA");
481
482        assert_eq!(context.resolve_name("TEST_ALIAS"), "ALA");
483        assert_eq!(context.map_to_standard("TEST_ALIAS"), None);
484    }
485
486    #[test]
487    fn add_alias_overwrites_existing_alias() {
488        let mut context = IoContext::new_default();
489
490        assert_eq!(context.resolve_name("AIB"), "ALA");
491
492        context.add_alias("AIB", "GLY");
493
494        assert_eq!(context.resolve_name("AIB"), "GLY");
495    }
496
497    #[test]
498    fn add_alias_with_string_types() {
499        let mut context = IoContext::new_default();
500
501        context.add_alias("STR_ALIAS", "GLY");
502        assert_eq!(context.resolve_name("STR_ALIAS"), "GLY");
503
504        context.add_alias(String::from("OWNED_ALIAS"), String::from("ALA"));
505        assert_eq!(context.resolve_name("OWNED_ALIAS"), "ALA");
506    }
507
508    #[test]
509    fn context_handles_case_sensitivity() {
510        let context = IoContext::new_default();
511
512        assert_eq!(context.resolve_name("ala"), "ala");
513        assert_eq!(context.resolve_name("ALA"), "ALA");
514        assert_eq!(context.map_to_standard("ala"), None);
515        assert_eq!(context.map_to_standard("ALA"), Some(StandardResidue::ALA));
516    }
517
518    #[test]
519    fn classify_residue_returns_canonical_and_standard() {
520        let context = IoContext::new_default();
521
522        let (name, standard) = context.classify_residue("WAT");
523        assert_eq!(name, "HOH");
524        assert_eq!(standard, Some(StandardResidue::HOH));
525
526        let (name, standard) = context.classify_residue("ALA");
527        assert_eq!(name, "ALA");
528        assert_eq!(standard, Some(StandardResidue::ALA));
529    }
530
531    #[test]
532    fn classify_residue_handles_unknowns() {
533        let context = IoContext::new_default();
534
535        let (name, standard) = context.classify_residue("LIG");
536        assert_eq!(name, "LIG");
537        assert!(standard.is_none());
538    }
539}