dreid_forge/model/metadata.rs
1//! Biological metadata for biomolecular systems.
2//!
3//! This module provides data structures for storing PDB/mmCIF-style
4//! annotations that describe the biological context of atoms within
5//! protein, nucleic acid, and other biomolecular structures.
6//!
7//! # Overview
8//!
9//! - [`StandardResidue`] — Enumeration of standard amino acids, nucleotides, and water
10//! - [`ResidueCategory`] — Classification as standard, hetero, or ion
11//! - [`ResiduePosition`] — Terminal position annotations (N/C-terminal, 5'/3'-end)
12//! - [`AtomResidueInfo`] — Per-atom biological context (residue name, chain, etc.)
13//! - [`BioMetadata`] — Collection of atom-level biological annotations
14//!
15//! # Builder Pattern
16//!
17//! Use [`AtomResidueInfo::builder`] to construct instances with optional fields:
18//!
19//! ```
20//! use dreid_forge::{AtomResidueInfo, StandardResidue, ResidueCategory, ResiduePosition};
21//!
22//! let info = AtomResidueInfo::builder("CA", "ALA", 42, "A")
23//! .standard_name(Some(StandardResidue::ALA))
24//! .category(ResidueCategory::Standard)
25//! .position(ResiduePosition::NTerminal)
26//! .build();
27//!
28//! assert_eq!(info.atom_name, "CA");
29//! assert_eq!(info.chain_id, "A");
30//! ```
31
32/// Standard residue types from PDB/mmCIF nomenclature.
33///
34/// Covers the 20 canonical amino acids, common nucleotides (RNA and DNA),
35/// and water (HOH).
36///
37/// # Amino Acids
38///
39/// Three-letter codes for all 20 standard amino acids:
40/// ALA, ARG, ASN, ASP, CYS, GLN, GLU, GLY, HIS, ILE,
41/// LEU, LYS, MET, PHE, PRO, SER, THR, TRP, TYR, VAL.
42///
43/// # Nucleotides
44///
45/// - RNA: A, C, G, U, I (inosine)
46/// - DNA: DA, DC, DG, DT, DI
47///
48/// # Solvent
49///
50/// - HOH: Water molecule
51#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
52pub enum StandardResidue {
53 /// Alanine
54 ALA,
55 /// Arginine
56 ARG,
57 /// Asparagine
58 ASN,
59 /// Aspartic acid
60 ASP,
61 /// Cysteine
62 CYS,
63 /// Glutamine
64 GLN,
65 /// Glutamic acid
66 GLU,
67 /// Glycine
68 GLY,
69 /// Histidine
70 HIS,
71 /// Isoleucine
72 ILE,
73 /// Leucine
74 LEU,
75 /// Lysine
76 LYS,
77 /// Methionine
78 MET,
79 /// Phenylalanine
80 PHE,
81 /// Proline
82 PRO,
83 /// Serine
84 SER,
85 /// Threonine
86 THR,
87 /// Tryptophan
88 TRP,
89 /// Tyrosine
90 TYR,
91 /// Valine
92 VAL,
93 /// Adenosine (RNA)
94 A,
95 /// Cytidine (RNA)
96 C,
97 /// Guanosine (RNA)
98 G,
99 /// Uridine (RNA)
100 U,
101 /// Inosine (RNA)
102 I,
103 /// Deoxyadenosine (DNA)
104 DA,
105 /// Deoxycytidine (DNA)
106 DC,
107 /// Deoxyguanosine (DNA)
108 DG,
109 /// Deoxythymidine (DNA)
110 DT,
111 /// Deoxyinosine (DNA)
112 DI,
113 /// Water molecule
114 HOH,
115}
116
117/// Classification of a residue's chemical nature.
118///
119/// Used to distinguish between standard biomolecular components
120/// and non-standard entities like ligands or ions.
121#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
122pub enum ResidueCategory {
123 /// Standard amino acid or nucleotide.
124 Standard,
125 /// Hetero atom group (ligand, modified residue, etc.).
126 Hetero,
127 /// Monoatomic or polyatomic ion.
128 Ion,
129}
130
131/// Position of a residue within its polymer chain.
132///
133/// Indicates whether the residue is at a terminal position,
134/// which affects protonation states and hydrogen bonding.
135#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
136pub enum ResiduePosition {
137 /// No special terminal position (or not applicable).
138 None,
139 /// Internal residue within the chain.
140 Internal,
141 /// N-terminal residue of a protein chain.
142 NTerminal,
143 /// C-terminal residue of a protein chain.
144 CTerminal,
145 /// 5' end of a nucleic acid strand.
146 FivePrime,
147 /// 3' end of a nucleic acid strand.
148 ThreePrime,
149}
150
151/// Biological annotation for a single atom.
152///
153/// Stores PDB/mmCIF-style metadata that describes an atom's
154/// context within a biomolecular structure, including residue
155/// identity, chain assignment, and terminal position.
156///
157/// # Fields
158///
159/// * `atom_name` — PDB atom name (e.g., "CA", "N", "O")
160/// * `residue_name` — Three-letter residue code (e.g., "ALA", "HOH")
161/// * `residue_id` — Residue sequence number
162/// * `chain_id` — Chain identifier
163/// * `insertion_code` — PDB insertion code (`None` if not present)
164/// * `standard_name` — Parsed [`StandardResidue`] if recognized
165/// * `category` — Residue classification
166/// * `position` — Terminal position within chain
167///
168/// # Construction
169///
170/// Use [`AtomResidueInfo::builder`] for convenient construction
171/// with default values for optional fields.
172#[derive(Debug, Clone, PartialEq, Eq)]
173pub struct AtomResidueInfo {
174 /// PDB-style atom name (e.g., "CA", "CB", "N").
175 pub atom_name: String,
176 /// Three-letter residue code (e.g., "ALA", "LIG").
177 pub residue_name: String,
178 /// Residue sequence number.
179 pub residue_id: i32,
180 /// Chain identifier.
181 pub chain_id: String,
182 /// PDB insertion code for residue numbering conflicts (`None` if absent).
183 pub insertion_code: Option<char>,
184 /// Parsed standard residue type, if recognized.
185 pub standard_name: Option<StandardResidue>,
186 /// Classification of the residue (standard, hetero, ion).
187 pub category: ResidueCategory,
188 /// Terminal position within the polymer chain.
189 pub position: ResiduePosition,
190}
191
192impl AtomResidueInfo {
193 /// Creates a builder for constructing an [`AtomResidueInfo`].
194 ///
195 /// # Arguments
196 ///
197 /// * `atom_name` — PDB-style atom name
198 /// * `residue_name` — Three-letter residue code
199 /// * `residue_id` — Residue sequence number
200 /// * `chain_id` — Chain identifier
201 ///
202 /// # Returns
203 ///
204 /// An [`AtomResidueBuilder`] with default values for optional fields.
205 ///
206 /// # Examples
207 ///
208 /// ```
209 /// use dreid_forge::{AtomResidueInfo, ResidueCategory, ResiduePosition};
210 ///
211 /// let info = AtomResidueInfo::builder("CA", "GLY", 1, "A")
212 /// .category(ResidueCategory::Standard)
213 /// .position(ResiduePosition::Internal)
214 /// .build();
215 ///
216 /// assert_eq!(info.residue_name, "GLY");
217 /// ```
218 pub fn builder(
219 atom_name: impl Into<String>,
220 residue_name: impl Into<String>,
221 residue_id: i32,
222 chain_id: impl Into<String>,
223 ) -> AtomResidueBuilder {
224 AtomResidueBuilder {
225 atom_name: atom_name.into(),
226 residue_name: residue_name.into(),
227 residue_id,
228 chain_id: chain_id.into(),
229 insertion_code: None,
230 standard_name: None,
231 category: ResidueCategory::Standard,
232 position: ResiduePosition::None,
233 }
234 }
235}
236
237/// Builder for constructing [`AtomResidueInfo`] instances.
238///
239/// Provides a fluent API for setting optional fields with sensible defaults.
240#[derive(Debug, Clone)]
241pub struct AtomResidueBuilder {
242 atom_name: String,
243 residue_name: String,
244 residue_id: i32,
245 chain_id: String,
246 insertion_code: Option<char>,
247 standard_name: Option<StandardResidue>,
248 category: ResidueCategory,
249 position: ResiduePosition,
250}
251
252impl AtomResidueBuilder {
253 /// Sets the PDB insertion code.
254 ///
255 /// # Arguments
256 ///
257 /// * `code` — Optional insertion code character
258 pub fn insertion_code_opt(mut self, code: Option<char>) -> Self {
259 self.insertion_code = code;
260 self
261 }
262
263 /// Sets the standard residue type.
264 ///
265 /// # Arguments
266 ///
267 /// * `name` — Optional [`StandardResidue`] variant
268 pub fn standard_name(mut self, name: Option<StandardResidue>) -> Self {
269 self.standard_name = name;
270 self
271 }
272
273 /// Sets the residue category.
274 ///
275 /// # Arguments
276 ///
277 /// * `category` — [`ResidueCategory`] classification
278 pub fn category(mut self, category: ResidueCategory) -> Self {
279 self.category = category;
280 self
281 }
282
283 /// Sets the terminal position.
284 ///
285 /// # Arguments
286 ///
287 /// * `position` — [`ResiduePosition`] within the chain
288 pub fn position(mut self, position: ResiduePosition) -> Self {
289 self.position = position;
290 self
291 }
292
293 /// Builds the final [`AtomResidueInfo`] instance.
294 ///
295 /// # Returns
296 ///
297 /// A fully constructed [`AtomResidueInfo`].
298 pub fn build(self) -> AtomResidueInfo {
299 AtomResidueInfo {
300 atom_name: self.atom_name,
301 residue_name: self.residue_name,
302 residue_id: self.residue_id,
303 chain_id: self.chain_id,
304 insertion_code: self.insertion_code,
305 standard_name: self.standard_name,
306 category: self.category,
307 position: self.position,
308 }
309 }
310}
311
312/// Collection of biological metadata for all atoms in a system.
313///
314/// Stores per-atom biological annotations in a vector that parallels
315/// the atom indices in a [`System`](crate::System). Each entry provides
316/// PDB/mmCIF-style information about the atom's residue context.
317///
318/// # Examples
319///
320/// ```
321/// use dreid_forge::{AtomResidueInfo, BioMetadata, ResidueCategory, ResiduePosition};
322///
323/// let mut metadata = BioMetadata::with_capacity(100);
324///
325/// let info = AtomResidueInfo::builder("N", "ALA", 1, "A")
326/// .category(ResidueCategory::Standard)
327/// .position(ResiduePosition::NTerminal)
328/// .build();
329///
330/// metadata.atom_info.push(info);
331/// assert_eq!(metadata.atom_info.len(), 1);
332///
333/// // Set target pH for charge assignment
334/// metadata.target_ph = Some(7.0);
335/// ```
336#[derive(Debug, Clone, Default)]
337pub struct BioMetadata {
338 /// Per-atom biological annotations, indexed by atom order.
339 pub atom_info: Vec<AtomResidueInfo>,
340 /// Target pH for protonation state determination.
341 pub target_ph: Option<f64>,
342}
343
344impl PartialEq for BioMetadata {
345 fn eq(&self, other: &Self) -> bool {
346 self.atom_info == other.atom_info && self.target_ph == other.target_ph
347 }
348}
349
350impl Eq for BioMetadata {}
351
352impl BioMetadata {
353 /// Creates an empty [`BioMetadata`] container.
354 pub fn new() -> Self {
355 Self::default()
356 }
357
358 /// Creates a [`BioMetadata`] with pre-allocated capacity.
359 ///
360 /// # Arguments
361 ///
362 /// * `capacity` — Number of atoms to pre-allocate space for
363 pub fn with_capacity(capacity: usize) -> Self {
364 Self {
365 atom_info: Vec::with_capacity(capacity),
366 target_ph: None,
367 }
368 }
369
370 /// Returns the target pH, defaulting to physiological pH (7.4) if not set.
371 pub fn effective_ph(&self) -> f64 {
372 self.target_ph.unwrap_or(7.4)
373 }
374}
375
376#[cfg(test)]
377mod tests {
378 use super::*;
379
380 #[test]
381 fn atom_residue_info_new_and_all_fields() {
382 let info = AtomResidueInfo::builder("CA", "ALA", 42, "A")
383 .insertion_code_opt(Some('x'))
384 .standard_name(Some(StandardResidue::ALA))
385 .category(ResidueCategory::Standard)
386 .position(ResiduePosition::Internal)
387 .build();
388 assert_eq!(info.atom_name, "CA");
389 assert_eq!(info.residue_name, "ALA");
390 assert_eq!(info.residue_id, 42);
391 assert_eq!(info.chain_id, "A");
392 assert_eq!(info.insertion_code, Some('x'));
393 assert_eq!(info.standard_name, Some(StandardResidue::ALA));
394 assert_eq!(info.category, ResidueCategory::Standard);
395 assert_eq!(info.position, ResiduePosition::Internal);
396 }
397
398 #[test]
399 fn atom_residue_info_defaults_and_clone() {
400 let info = AtomResidueInfo::builder("N", "GLY", 1, "B")
401 .category(ResidueCategory::Hetero)
402 .position(ResiduePosition::None)
403 .build();
404 assert_eq!(info.insertion_code, None);
405 let cloned = info.clone();
406 assert_eq!(info, cloned);
407 }
408
409 #[test]
410 fn atom_residue_info_accepts_into_inputs() {
411 let atom_name = String::from("O1");
412 let residue_name = "LIG";
413 let info = AtomResidueInfo::builder(atom_name, residue_name, 7, "Z")
414 .insertion_code_opt(Some('1'))
415 .category(ResidueCategory::Hetero)
416 .position(ResiduePosition::CTerminal)
417 .build();
418 assert_eq!(info.atom_name, "O1");
419 assert_eq!(info.residue_name, "LIG");
420 assert_eq!(info.residue_id, 7);
421 assert_eq!(info.chain_id, "Z");
422 assert_eq!(info.insertion_code, Some('1'));
423 assert_eq!(info.standard_name, None);
424 assert_eq!(info.category, ResidueCategory::Hetero);
425 assert_eq!(info.position, ResiduePosition::CTerminal);
426 }
427
428 #[test]
429 fn bio_metadata_new_and_capacity() {
430 let mut bm = BioMetadata::with_capacity(4);
431 assert!(bm.atom_info.capacity() >= 4);
432
433 let info1 = AtomResidueInfo::builder("CA", "ALA", 1, "A")
434 .standard_name(Some(StandardResidue::ALA))
435 .category(ResidueCategory::Standard)
436 .position(ResiduePosition::Internal)
437 .build();
438 let info2 = AtomResidueInfo::builder("CB", "ALA", 1, "A")
439 .standard_name(Some(StandardResidue::ALA))
440 .category(ResidueCategory::Standard)
441 .position(ResiduePosition::Internal)
442 .build();
443 bm.atom_info.push(info1.clone());
444 bm.atom_info.push(info2.clone());
445
446 assert_eq!(bm.atom_info.len(), 2);
447 assert_eq!(bm.atom_info[0], info1);
448 assert_eq!(bm.atom_info[1], info2);
449 }
450
451 #[test]
452 fn bio_metadata_target_ph() {
453 let mut bm = BioMetadata::new();
454 assert!(bm.target_ph.is_none());
455 assert!((bm.effective_ph() - 7.4).abs() < f64::EPSILON);
456
457 bm.target_ph = Some(6.5);
458 assert!((bm.effective_ph() - 6.5).abs() < f64::EPSILON);
459 }
460
461 #[test]
462 fn bio_metadata_equality() {
463 let info = AtomResidueInfo::builder("N", "ALA", 1, "A")
464 .category(ResidueCategory::Standard)
465 .build();
466
467 let bm1 = BioMetadata {
468 atom_info: vec![info.clone()],
469 target_ph: Some(7.0),
470 };
471 let bm2 = BioMetadata {
472 atom_info: vec![info.clone()],
473 target_ph: Some(7.0),
474 };
475 let bm3 = BioMetadata {
476 atom_info: vec![info],
477 target_ph: Some(7.5),
478 };
479
480 assert_eq!(bm1, bm2);
481 assert_ne!(bm1, bm3);
482 }
483
484 #[test]
485 fn debug_contains_expected_fields() {
486 let info = AtomResidueInfo::builder("C1", "LIG", -1, "Z")
487 .insertion_code_opt(Some('A'))
488 .category(ResidueCategory::Hetero)
489 .position(ResiduePosition::NTerminal)
490 .build();
491 let bm = BioMetadata {
492 atom_info: vec![info.clone()],
493 target_ph: None,
494 };
495 let s_info = format!("{:?}", info);
496 let s_bm = format!("{:?}", bm);
497 assert!(s_info.contains("atom_name"));
498 assert!(s_info.contains("residue_name"));
499 assert!(s_info.contains("residue_id"));
500 assert!(s_info.contains("chain_id"));
501 assert!(s_info.contains("insertion_code"));
502 assert!(s_info.contains("standard_name") || s_info.contains("StandardResidue"));
503 assert!(s_info.contains("category") || s_info.contains("ResidueCategory"));
504 assert!(s_info.contains("position") || s_info.contains("ResiduePosition"));
505 assert!(s_bm.contains("AtomResidueInfo"));
506 assert!(s_bm.contains("LIG"));
507 }
508}