Skip to main content

cyanea_struct/
types.rs

1//! Core types for macromolecular 3D structure representation.
2
3use cyanea_core::{Annotated, ContentAddressable, Summarizable};
4use sha2::{Digest, Sha256};
5
6use alloc::format;
7use alloc::string::String;
8use alloc::vec::Vec;
9
10/// A point in 3D Cartesian space.
11#[derive(Debug, Clone, Copy, PartialEq)]
12#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
13pub struct Point3D {
14    pub x: f64,
15    pub y: f64,
16    pub z: f64,
17}
18
19impl Point3D {
20    /// Create a new point.
21    pub fn new(x: f64, y: f64, z: f64) -> Self {
22        Self { x, y, z }
23    }
24
25    /// The origin.
26    pub fn zero() -> Self {
27        Self {
28            x: 0.0,
29            y: 0.0,
30            z: 0.0,
31        }
32    }
33
34    /// Euclidean distance to another point.
35    pub fn distance_to(&self, other: &Point3D) -> f64 {
36        let dx = self.x - other.x;
37        let dy = self.y - other.y;
38        let dz = self.z - other.z;
39        (dx * dx + dy * dy + dz * dz).sqrt()
40    }
41
42    /// Dot product.
43    pub fn dot(&self, other: &Point3D) -> f64 {
44        self.x * other.x + self.y * other.y + self.z * other.z
45    }
46
47    /// Cross product.
48    pub fn cross(&self, other: &Point3D) -> Point3D {
49        Point3D {
50            x: self.y * other.z - self.z * other.y,
51            y: self.z * other.x - self.x * other.z,
52            z: self.x * other.y - self.y * other.x,
53        }
54    }
55
56    /// Vector magnitude.
57    pub fn norm(&self) -> f64 {
58        (self.x * self.x + self.y * self.y + self.z * self.z).sqrt()
59    }
60
61    /// Unit vector in the same direction, or zero if magnitude is zero.
62    pub fn normalize(&self) -> Point3D {
63        let n = self.norm();
64        if n < 1e-15 {
65            Point3D::zero()
66        } else {
67            Point3D {
68                x: self.x / n,
69                y: self.y / n,
70                z: self.z / n,
71            }
72        }
73    }
74
75    /// Vector addition.
76    pub fn add(&self, other: &Point3D) -> Point3D {
77        Point3D {
78            x: self.x + other.x,
79            y: self.y + other.y,
80            z: self.z + other.z,
81        }
82    }
83
84    /// Vector subtraction.
85    pub fn sub(&self, other: &Point3D) -> Point3D {
86        Point3D {
87            x: self.x - other.x,
88            y: self.y - other.y,
89            z: self.z - other.z,
90        }
91    }
92
93    /// Scalar multiplication.
94    pub fn scale(&self, s: f64) -> Point3D {
95        Point3D {
96            x: self.x * s,
97            y: self.y * s,
98            z: self.z * s,
99        }
100    }
101}
102
103/// A single atom in a macromolecular structure.
104#[derive(Debug, Clone)]
105#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
106pub struct Atom {
107    /// Atom serial number.
108    pub serial: u32,
109    /// Atom name (e.g. "CA", "N", "CB").
110    pub name: String,
111    /// Alternate location indicator.
112    pub alt_loc: Option<char>,
113    /// 3D coordinates in Angstroms.
114    pub coords: Point3D,
115    /// Occupancy factor.
116    pub occupancy: f64,
117    /// Temperature factor (B-factor).
118    pub temp_factor: f64,
119    /// Element symbol.
120    pub element: Option<String>,
121    /// Formal charge.
122    pub charge: Option<i8>,
123    /// Whether this is a HETATM record.
124    pub is_hetatm: bool,
125}
126
127impl Atom {
128    /// Whether this atom is a backbone atom (N, CA, C, O).
129    pub fn is_backbone(&self) -> bool {
130        let trimmed = self.name.trim();
131        matches!(trimmed, "N" | "CA" | "C" | "O")
132    }
133
134    /// Whether this is an alpha carbon.
135    pub fn is_alpha_carbon(&self) -> bool {
136        self.name.trim() == "CA"
137    }
138}
139
140/// A residue (amino acid or nucleotide) in a chain.
141#[derive(Debug, Clone)]
142#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
143pub struct Residue {
144    /// Three-letter residue name (e.g. "ALA", "GLY").
145    pub name: String,
146    /// Sequence number from the PDB file.
147    pub seq_num: i32,
148    /// Insertion code.
149    pub i_code: Option<char>,
150    /// Atoms belonging to this residue.
151    pub atoms: Vec<Atom>,
152}
153
154impl Residue {
155    /// Get an atom by name, returning the first match.
156    pub fn get_atom(&self, name: &str) -> Option<&Atom> {
157        self.atoms.iter().find(|a| a.name.trim() == name)
158    }
159
160    /// Get the alpha carbon atom.
161    pub fn get_alpha_carbon(&self) -> Option<&Atom> {
162        self.get_atom("CA")
163    }
164
165    /// Return all backbone atoms (N, CA, C, O).
166    pub fn backbone_atoms(&self) -> Vec<&Atom> {
167        self.atoms.iter().filter(|a| a.is_backbone()).collect()
168    }
169
170    /// Geometric center of mass (unweighted) of all atoms.
171    pub fn center_of_mass(&self) -> Point3D {
172        if self.atoms.is_empty() {
173            return Point3D::zero();
174        }
175        let mut sum = Point3D::zero();
176        for atom in &self.atoms {
177            sum = sum.add(&atom.coords);
178        }
179        sum.scale(1.0 / self.atoms.len() as f64)
180    }
181}
182
183impl Annotated for Residue {
184    fn name(&self) -> &str {
185        &self.name
186    }
187}
188
189/// A polypeptide or polynucleotide chain.
190#[derive(Debug, Clone)]
191#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
192pub struct Chain {
193    /// Single-character chain identifier.
194    pub id: char,
195    /// Residues in this chain, in sequence order.
196    pub residues: Vec<Residue>,
197    /// String form of chain ID for trait impl.
198    chain_id_str: String,
199}
200
201impl Chain {
202    /// Create a new chain.
203    pub fn new(id: char, residues: Vec<Residue>) -> Self {
204        Self {
205            id,
206            residues,
207            chain_id_str: format!("Chain {}", id),
208        }
209    }
210
211    /// Number of residues.
212    pub fn residue_count(&self) -> usize {
213        self.residues.len()
214    }
215
216    /// Total number of atoms across all residues.
217    pub fn atom_count(&self) -> usize {
218        self.residues.iter().map(|r| r.atoms.len()).sum()
219    }
220
221    /// Collect all alpha carbon atoms in this chain.
222    pub fn alpha_carbons(&self) -> Vec<&Atom> {
223        self.residues
224            .iter()
225            .filter_map(|r| r.get_alpha_carbon())
226            .collect()
227    }
228}
229
230impl Annotated for Chain {
231    fn name(&self) -> &str {
232        &self.chain_id_str
233    }
234}
235
236/// A complete macromolecular structure (one or more chains).
237#[derive(Debug, Clone)]
238#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
239pub struct Structure {
240    /// PDB identifier or user-supplied name.
241    pub id: String,
242    /// Chains in this structure.
243    pub chains: Vec<Chain>,
244}
245
246impl Structure {
247    /// Number of chains.
248    pub fn chain_count(&self) -> usize {
249        self.chains.len()
250    }
251
252    /// Total residues across all chains.
253    pub fn residue_count(&self) -> usize {
254        self.chains.iter().map(|c| c.residue_count()).sum()
255    }
256
257    /// Total atoms across all chains.
258    pub fn atom_count(&self) -> usize {
259        self.chains.iter().map(|c| c.atom_count()).sum()
260    }
261
262    /// Get a chain by its single-character ID.
263    pub fn get_chain(&self, id: char) -> Option<&Chain> {
264        self.chains.iter().find(|c| c.id == id)
265    }
266
267    /// Collect all atoms across all chains.
268    pub fn all_atoms(&self) -> Vec<&Atom> {
269        self.chains
270            .iter()
271            .flat_map(|c| c.residues.iter().flat_map(|r| r.atoms.iter()))
272            .collect()
273    }
274
275    /// Collect all alpha carbon atoms across all chains.
276    pub fn alpha_carbons(&self) -> Vec<&Atom> {
277        self.chains.iter().flat_map(|c| c.alpha_carbons()).collect()
278    }
279
280    /// Geometric center of mass of all atoms.
281    pub fn center_of_mass(&self) -> Point3D {
282        let atoms = self.all_atoms();
283        if atoms.is_empty() {
284            return Point3D::zero();
285        }
286        let mut sum = Point3D::zero();
287        for atom in &atoms {
288            sum = sum.add(&atom.coords);
289        }
290        sum.scale(1.0 / atoms.len() as f64)
291    }
292}
293
294impl Annotated for Structure {
295    fn name(&self) -> &str {
296        &self.id
297    }
298}
299
300impl Summarizable for Structure {
301    fn summary(&self) -> String {
302        format!(
303            "Structure {} — {} chain(s), {} residue(s), {} atom(s)",
304            self.id,
305            self.chain_count(),
306            self.residue_count(),
307            self.atom_count(),
308        )
309    }
310}
311
312impl ContentAddressable for Structure {
313    fn content_hash(&self) -> String {
314        let mut hasher = Sha256::new();
315        hasher.update(self.id.as_bytes());
316        for chain in &self.chains {
317            hasher.update(&[chain.id as u8]);
318            for residue in &chain.residues {
319                hasher.update(residue.name.as_bytes());
320                hasher.update(&residue.seq_num.to_le_bytes());
321                for atom in &residue.atoms {
322                    hasher.update(atom.name.as_bytes());
323                    hasher.update(&atom.coords.x.to_le_bytes());
324                    hasher.update(&atom.coords.y.to_le_bytes());
325                    hasher.update(&atom.coords.z.to_le_bytes());
326                }
327            }
328        }
329        hex::encode(hasher.finalize())
330    }
331}
332
333#[cfg(test)]
334mod tests {
335    use super::*;
336    use alloc::vec;
337
338    fn make_atom(name: &str, x: f64, y: f64, z: f64) -> Atom {
339        Atom {
340            serial: 1,
341            name: name.into(),
342            alt_loc: None,
343            coords: Point3D::new(x, y, z),
344            occupancy: 1.0,
345            temp_factor: 0.0,
346            element: None,
347            charge: None,
348            is_hetatm: false,
349        }
350    }
351
352    #[test]
353    fn point3d_arithmetic() {
354        let a = Point3D::new(1.0, 2.0, 3.0);
355        let b = Point3D::new(4.0, 5.0, 6.0);
356        assert_eq!(a.add(&b), Point3D::new(5.0, 7.0, 9.0));
357        assert_eq!(a.sub(&b), Point3D::new(-3.0, -3.0, -3.0));
358        assert!((a.dot(&b) - 32.0).abs() < 1e-10);
359        assert!((a.scale(2.0).x - 2.0).abs() < 1e-10);
360        assert!((a.distance_to(&b) - (27.0_f64).sqrt()).abs() < 1e-10);
361    }
362
363    #[test]
364    fn point3d_cross_product() {
365        let x = Point3D::new(1.0, 0.0, 0.0);
366        let y = Point3D::new(0.0, 1.0, 0.0);
367        let z = x.cross(&y);
368        assert!((z.x).abs() < 1e-10);
369        assert!((z.y).abs() < 1e-10);
370        assert!((z.z - 1.0).abs() < 1e-10);
371    }
372
373    #[test]
374    fn atom_backbone_detection() {
375        let ca = make_atom("CA", 0.0, 0.0, 0.0);
376        let cb = make_atom("CB", 0.0, 0.0, 0.0);
377        let n = make_atom("N", 0.0, 0.0, 0.0);
378        assert!(ca.is_backbone());
379        assert!(ca.is_alpha_carbon());
380        assert!(!cb.is_backbone());
381        assert!(!cb.is_alpha_carbon());
382        assert!(n.is_backbone());
383    }
384
385    #[test]
386    fn residue_get_alpha_carbon() {
387        let r = Residue {
388            name: "ALA".into(),
389            seq_num: 1,
390            i_code: None,
391            atoms: vec![
392                make_atom("N", 0.0, 0.0, 0.0),
393                make_atom("CA", 1.0, 0.0, 0.0),
394                make_atom("C", 2.0, 0.0, 0.0),
395            ],
396        };
397        assert!(r.get_alpha_carbon().is_some());
398        assert_eq!(r.backbone_atoms().len(), 3);
399    }
400
401    #[test]
402    fn structure_summary_and_hash() {
403        let chain = Chain::new(
404            'A',
405            vec![Residue {
406                name: "GLY".into(),
407                seq_num: 1,
408                i_code: None,
409                atoms: vec![make_atom("CA", 1.0, 2.0, 3.0)],
410            }],
411        );
412        let s = Structure {
413            id: "1ABC".into(),
414            chains: vec![chain],
415        };
416        assert!(s.summary().contains("1ABC"));
417        assert!(s.summary().contains("1 chain"));
418        assert!(s.summary().contains("1 residue"));
419        assert!(s.summary().contains("1 atom"));
420
421        let hash = s.content_hash();
422        assert_eq!(hash.len(), 64); // SHA-256 hex
423        // Deterministic
424        assert_eq!(hash, s.content_hash());
425    }
426}