dreid_forge/model/
metadata.rs

1//! Biological metadata for biomolecular systems.
2//!
3//! This module provides data structures for storing PDB/mmCIF-style
4//! annotations that describe the biological context of atoms within
5//! protein, nucleic acid, and other biomolecular structures.
6//!
7//! # Overview
8//!
9//! - [`StandardResidue`] — Enumeration of standard amino acids, nucleotides, and water
10//! - [`ResidueCategory`] — Classification as standard, hetero, or ion
11//! - [`ResiduePosition`] — Terminal position annotations (N/C-terminal, 5'/3'-end)
12//! - [`AtomResidueInfo`] — Per-atom biological context (residue name, chain, etc.)
13//! - [`BioMetadata`] — Collection of atom-level biological annotations
14//!
15//! # Builder Pattern
16//!
17//! Use [`AtomResidueInfo::builder`] to construct instances with optional fields:
18//!
19//! ```
20//! use dreid_forge::{AtomResidueInfo, StandardResidue, ResidueCategory, ResiduePosition};
21//!
22//! let info = AtomResidueInfo::builder("CA", "ALA", 42, "A")
23//!     .standard_name(Some(StandardResidue::ALA))
24//!     .category(ResidueCategory::Standard)
25//!     .position(ResiduePosition::NTerminal)
26//!     .build();
27//!
28//! assert_eq!(info.atom_name, "CA");
29//! assert_eq!(info.chain_id, "A");
30//! ```
31
32/// Standard residue types from PDB/mmCIF nomenclature.
33///
34/// Covers the 20 canonical amino acids, common nucleotides (RNA and DNA),
35/// and water (HOH).
36///
37/// # Amino Acids
38///
39/// Three-letter codes for all 20 standard amino acids:
40/// ALA, ARG, ASN, ASP, CYS, GLN, GLU, GLY, HIS, ILE,
41/// LEU, LYS, MET, PHE, PRO, SER, THR, TRP, TYR, VAL.
42///
43/// # Nucleotides
44///
45/// - RNA: A, C, G, U, I (inosine)
46/// - DNA: DA, DC, DG, DT, DI
47///
48/// # Solvent
49///
50/// - HOH: Water molecule
51#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
52pub enum StandardResidue {
53    /// Alanine
54    ALA,
55    /// Arginine
56    ARG,
57    /// Asparagine
58    ASN,
59    /// Aspartic acid
60    ASP,
61    /// Cysteine
62    CYS,
63    /// Glutamine
64    GLN,
65    /// Glutamic acid
66    GLU,
67    /// Glycine
68    GLY,
69    /// Histidine
70    HIS,
71    /// Isoleucine
72    ILE,
73    /// Leucine
74    LEU,
75    /// Lysine
76    LYS,
77    /// Methionine
78    MET,
79    /// Phenylalanine
80    PHE,
81    /// Proline
82    PRO,
83    /// Serine
84    SER,
85    /// Threonine
86    THR,
87    /// Tryptophan
88    TRP,
89    /// Tyrosine
90    TYR,
91    /// Valine
92    VAL,
93    /// Adenosine (RNA)
94    A,
95    /// Cytidine (RNA)
96    C,
97    /// Guanosine (RNA)
98    G,
99    /// Uridine (RNA)
100    U,
101    /// Inosine (RNA)
102    I,
103    /// Deoxyadenosine (DNA)
104    DA,
105    /// Deoxycytidine (DNA)
106    DC,
107    /// Deoxyguanosine (DNA)
108    DG,
109    /// Deoxythymidine (DNA)
110    DT,
111    /// Deoxyinosine (DNA)
112    DI,
113    /// Water molecule
114    HOH,
115}
116
117/// Classification of a residue's chemical nature.
118///
119/// Used to distinguish between standard biomolecular components
120/// and non-standard entities like ligands or ions.
121#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
122pub enum ResidueCategory {
123    /// Standard amino acid or nucleotide.
124    Standard,
125    /// Hetero atom group (ligand, modified residue, etc.).
126    Hetero,
127    /// Monoatomic or polyatomic ion.
128    Ion,
129}
130
131/// Position of a residue within its polymer chain.
132///
133/// Indicates whether the residue is at a terminal position,
134/// which affects protonation states and hydrogen bonding.
135#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
136pub enum ResiduePosition {
137    /// No special terminal position (or not applicable).
138    None,
139    /// Internal residue within the chain.
140    Internal,
141    /// N-terminal residue of a protein chain.
142    NTerminal,
143    /// C-terminal residue of a protein chain.
144    CTerminal,
145    /// 5' end of a nucleic acid strand.
146    FivePrime,
147    /// 3' end of a nucleic acid strand.
148    ThreePrime,
149}
150
151/// Biological annotation for a single atom.
152///
153/// Stores PDB/mmCIF-style metadata that describes an atom's
154/// context within a biomolecular structure, including residue
155/// identity, chain assignment, and terminal position.
156///
157/// # Fields
158///
159/// * `atom_name` — PDB atom name (e.g., "CA", "N", "O")
160/// * `residue_name` — Three-letter residue code (e.g., "ALA", "HOH")
161/// * `residue_id` — Residue sequence number
162/// * `chain_id` — Chain identifier
163/// * `insertion_code` — PDB insertion code (`None` if not present)
164/// * `standard_name` — Parsed [`StandardResidue`] if recognized
165/// * `category` — Residue classification
166/// * `position` — Terminal position within chain
167///
168/// # Construction
169///
170/// Use [`AtomResidueInfo::builder`] for convenient construction
171/// with default values for optional fields.
172#[derive(Debug, Clone, PartialEq, Eq)]
173pub struct AtomResidueInfo {
174    /// PDB-style atom name (e.g., "CA", "CB", "N").
175    pub atom_name: String,
176    /// Three-letter residue code (e.g., "ALA", "LIG").
177    pub residue_name: String,
178    /// Residue sequence number.
179    pub residue_id: i32,
180    /// Chain identifier.
181    pub chain_id: String,
182    /// PDB insertion code for residue numbering conflicts (`None` if absent).
183    pub insertion_code: Option<char>,
184    /// Parsed standard residue type, if recognized.
185    pub standard_name: Option<StandardResidue>,
186    /// Classification of the residue (standard, hetero, ion).
187    pub category: ResidueCategory,
188    /// Terminal position within the polymer chain.
189    pub position: ResiduePosition,
190}
191
192impl AtomResidueInfo {
193    /// Creates a builder for constructing an [`AtomResidueInfo`].
194    ///
195    /// # Arguments
196    ///
197    /// * `atom_name` — PDB-style atom name
198    /// * `residue_name` — Three-letter residue code
199    /// * `residue_id` — Residue sequence number
200    /// * `chain_id` — Chain identifier
201    ///
202    /// # Returns
203    ///
204    /// An [`AtomResidueBuilder`] with default values for optional fields.
205    ///
206    /// # Examples
207    ///
208    /// ```
209    /// use dreid_forge::{AtomResidueInfo, ResidueCategory, ResiduePosition};
210    ///
211    /// let info = AtomResidueInfo::builder("CA", "GLY", 1, "A")
212    ///     .category(ResidueCategory::Standard)
213    ///     .position(ResiduePosition::Internal)
214    ///     .build();
215    ///
216    /// assert_eq!(info.residue_name, "GLY");
217    /// ```
218    pub fn builder(
219        atom_name: impl Into<String>,
220        residue_name: impl Into<String>,
221        residue_id: i32,
222        chain_id: impl Into<String>,
223    ) -> AtomResidueBuilder {
224        AtomResidueBuilder {
225            atom_name: atom_name.into(),
226            residue_name: residue_name.into(),
227            residue_id,
228            chain_id: chain_id.into(),
229            insertion_code: None,
230            standard_name: None,
231            category: ResidueCategory::Standard,
232            position: ResiduePosition::None,
233        }
234    }
235}
236
237/// Builder for constructing [`AtomResidueInfo`] instances.
238///
239/// Provides a fluent API for setting optional fields with sensible defaults.
240#[derive(Debug, Clone)]
241pub struct AtomResidueBuilder {
242    atom_name: String,
243    residue_name: String,
244    residue_id: i32,
245    chain_id: String,
246    insertion_code: Option<char>,
247    standard_name: Option<StandardResidue>,
248    category: ResidueCategory,
249    position: ResiduePosition,
250}
251
252impl AtomResidueBuilder {
253    /// Sets the PDB insertion code.
254    ///
255    /// # Arguments
256    ///
257    /// * `code` — Optional insertion code character
258    pub fn insertion_code_opt(mut self, code: Option<char>) -> Self {
259        self.insertion_code = code;
260        self
261    }
262
263    /// Sets the standard residue type.
264    ///
265    /// # Arguments
266    ///
267    /// * `name` — Optional [`StandardResidue`] variant
268    pub fn standard_name(mut self, name: Option<StandardResidue>) -> Self {
269        self.standard_name = name;
270        self
271    }
272
273    /// Sets the residue category.
274    ///
275    /// # Arguments
276    ///
277    /// * `category` — [`ResidueCategory`] classification
278    pub fn category(mut self, category: ResidueCategory) -> Self {
279        self.category = category;
280        self
281    }
282
283    /// Sets the terminal position.
284    ///
285    /// # Arguments
286    ///
287    /// * `position` — [`ResiduePosition`] within the chain
288    pub fn position(mut self, position: ResiduePosition) -> Self {
289        self.position = position;
290        self
291    }
292
293    /// Builds the final [`AtomResidueInfo`] instance.
294    ///
295    /// # Returns
296    ///
297    /// A fully constructed [`AtomResidueInfo`].
298    pub fn build(self) -> AtomResidueInfo {
299        AtomResidueInfo {
300            atom_name: self.atom_name,
301            residue_name: self.residue_name,
302            residue_id: self.residue_id,
303            chain_id: self.chain_id,
304            insertion_code: self.insertion_code,
305            standard_name: self.standard_name,
306            category: self.category,
307            position: self.position,
308        }
309    }
310}
311
312/// Collection of biological metadata for all atoms in a system.
313///
314/// Stores per-atom biological annotations in a vector that parallels
315/// the atom indices in a [`System`](crate::System). Each entry provides
316/// PDB/mmCIF-style information about the atom's residue context.
317///
318/// # Examples
319///
320/// ```
321/// use dreid_forge::{AtomResidueInfo, BioMetadata, ResidueCategory, ResiduePosition};
322///
323/// let mut metadata = BioMetadata::with_capacity(100);
324///
325/// let info = AtomResidueInfo::builder("N", "ALA", 1, "A")
326///     .category(ResidueCategory::Standard)
327///     .position(ResiduePosition::NTerminal)
328///     .build();
329///
330/// metadata.atom_info.push(info);
331/// assert_eq!(metadata.atom_info.len(), 1);
332///
333/// // Set target pH for charge assignment
334/// metadata.target_ph = Some(7.0);
335/// ```
336#[derive(Debug, Clone, Default)]
337pub struct BioMetadata {
338    /// Per-atom biological annotations, indexed by atom order.
339    pub atom_info: Vec<AtomResidueInfo>,
340    /// Target pH for protonation state determination.
341    pub target_ph: Option<f64>,
342}
343
344impl PartialEq for BioMetadata {
345    fn eq(&self, other: &Self) -> bool {
346        self.atom_info == other.atom_info && self.target_ph == other.target_ph
347    }
348}
349
350impl Eq for BioMetadata {}
351
352impl BioMetadata {
353    /// Creates an empty [`BioMetadata`] container.
354    pub fn new() -> Self {
355        Self::default()
356    }
357
358    /// Creates a [`BioMetadata`] with pre-allocated capacity.
359    ///
360    /// # Arguments
361    ///
362    /// * `capacity` — Number of atoms to pre-allocate space for
363    pub fn with_capacity(capacity: usize) -> Self {
364        Self {
365            atom_info: Vec::with_capacity(capacity),
366            target_ph: None,
367        }
368    }
369
370    /// Returns the target pH, defaulting to physiological pH (7.4) if not set.
371    pub fn effective_ph(&self) -> f64 {
372        self.target_ph.unwrap_or(7.4)
373    }
374}
375
376#[cfg(test)]
377mod tests {
378    use super::*;
379
380    #[test]
381    fn atom_residue_info_new_and_all_fields() {
382        let info = AtomResidueInfo::builder("CA", "ALA", 42, "A")
383            .insertion_code_opt(Some('x'))
384            .standard_name(Some(StandardResidue::ALA))
385            .category(ResidueCategory::Standard)
386            .position(ResiduePosition::Internal)
387            .build();
388        assert_eq!(info.atom_name, "CA");
389        assert_eq!(info.residue_name, "ALA");
390        assert_eq!(info.residue_id, 42);
391        assert_eq!(info.chain_id, "A");
392        assert_eq!(info.insertion_code, Some('x'));
393        assert_eq!(info.standard_name, Some(StandardResidue::ALA));
394        assert_eq!(info.category, ResidueCategory::Standard);
395        assert_eq!(info.position, ResiduePosition::Internal);
396    }
397
398    #[test]
399    fn atom_residue_info_defaults_and_clone() {
400        let info = AtomResidueInfo::builder("N", "GLY", 1, "B")
401            .category(ResidueCategory::Hetero)
402            .position(ResiduePosition::None)
403            .build();
404        assert_eq!(info.insertion_code, None);
405        let cloned = info.clone();
406        assert_eq!(info, cloned);
407    }
408
409    #[test]
410    fn atom_residue_info_accepts_into_inputs() {
411        let atom_name = String::from("O1");
412        let residue_name = "LIG";
413        let info = AtomResidueInfo::builder(atom_name, residue_name, 7, "Z")
414            .insertion_code_opt(Some('1'))
415            .category(ResidueCategory::Hetero)
416            .position(ResiduePosition::CTerminal)
417            .build();
418        assert_eq!(info.atom_name, "O1");
419        assert_eq!(info.residue_name, "LIG");
420        assert_eq!(info.residue_id, 7);
421        assert_eq!(info.chain_id, "Z");
422        assert_eq!(info.insertion_code, Some('1'));
423        assert_eq!(info.standard_name, None);
424        assert_eq!(info.category, ResidueCategory::Hetero);
425        assert_eq!(info.position, ResiduePosition::CTerminal);
426    }
427
428    #[test]
429    fn bio_metadata_new_and_capacity() {
430        let mut bm = BioMetadata::with_capacity(4);
431        assert!(bm.atom_info.capacity() >= 4);
432
433        let info1 = AtomResidueInfo::builder("CA", "ALA", 1, "A")
434            .standard_name(Some(StandardResidue::ALA))
435            .category(ResidueCategory::Standard)
436            .position(ResiduePosition::Internal)
437            .build();
438        let info2 = AtomResidueInfo::builder("CB", "ALA", 1, "A")
439            .standard_name(Some(StandardResidue::ALA))
440            .category(ResidueCategory::Standard)
441            .position(ResiduePosition::Internal)
442            .build();
443        bm.atom_info.push(info1.clone());
444        bm.atom_info.push(info2.clone());
445
446        assert_eq!(bm.atom_info.len(), 2);
447        assert_eq!(bm.atom_info[0], info1);
448        assert_eq!(bm.atom_info[1], info2);
449    }
450
451    #[test]
452    fn bio_metadata_target_ph() {
453        let mut bm = BioMetadata::new();
454        assert!(bm.target_ph.is_none());
455        assert!((bm.effective_ph() - 7.4).abs() < f64::EPSILON);
456
457        bm.target_ph = Some(6.5);
458        assert!((bm.effective_ph() - 6.5).abs() < f64::EPSILON);
459    }
460
461    #[test]
462    fn bio_metadata_equality() {
463        let info = AtomResidueInfo::builder("N", "ALA", 1, "A")
464            .category(ResidueCategory::Standard)
465            .build();
466
467        let bm1 = BioMetadata {
468            atom_info: vec![info.clone()],
469            target_ph: Some(7.0),
470        };
471        let bm2 = BioMetadata {
472            atom_info: vec![info.clone()],
473            target_ph: Some(7.0),
474        };
475        let bm3 = BioMetadata {
476            atom_info: vec![info],
477            target_ph: Some(7.5),
478        };
479
480        assert_eq!(bm1, bm2);
481        assert_ne!(bm1, bm3);
482    }
483
484    #[test]
485    fn debug_contains_expected_fields() {
486        let info = AtomResidueInfo::builder("C1", "LIG", -1, "Z")
487            .insertion_code_opt(Some('A'))
488            .category(ResidueCategory::Hetero)
489            .position(ResiduePosition::NTerminal)
490            .build();
491        let bm = BioMetadata {
492            atom_info: vec![info.clone()],
493            target_ph: None,
494        };
495        let s_info = format!("{:?}", info);
496        let s_bm = format!("{:?}", bm);
497        assert!(s_info.contains("atom_name"));
498        assert!(s_info.contains("residue_name"));
499        assert!(s_info.contains("residue_id"));
500        assert!(s_info.contains("chain_id"));
501        assert!(s_info.contains("insertion_code"));
502        assert!(s_info.contains("standard_name") || s_info.contains("StandardResidue"));
503        assert!(s_info.contains("category") || s_info.contains("ResidueCategory"));
504        assert!(s_info.contains("position") || s_info.contains("ResiduePosition"));
505        assert!(s_bm.contains("AtomResidueInfo"));
506        assert!(s_bm.contains("LIG"));
507    }
508}