Skip to main content

chematic_core/
molecule.rs

1//! Molecule graph: atoms, bonds, and adjacency list.
2
3use crate::atom::Atom;
4use crate::bond::{BondEntry, BondOrder};
5
6/// Newtype index for an atom in a Molecule.
7#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
8pub struct AtomIdx(pub u32);
9
10/// Newtype index for a bond in a Molecule.
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
12pub struct BondIdx(pub u32);
13
14/// Error types for molecule construction.
15#[derive(Debug, Clone, PartialEq, Eq)]
16pub enum MolError {
17    /// Atom index out of range.
18    InvalidAtomIdx(AtomIdx),
19    /// Duplicate bond between the same pair of atoms.
20    DuplicateBond(AtomIdx, AtomIdx),
21}
22
23impl core::fmt::Display for MolError {
24    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
25        match self {
26            Self::InvalidAtomIdx(idx) => write!(f, "invalid atom index: {}", idx.0),
27            Self::DuplicateBond(a, b) => write!(f, "duplicate bond between atoms {} and {}", a.0, b.0),
28        }
29    }
30}
31
32/// An immutable molecular graph built via [`MoleculeBuilder`].
33///
34/// Representation: atom list + bond list + per-atom adjacency list.
35/// No external graph library is used; all graph traversal is domain-aware.
36pub struct Molecule {
37    atoms: Vec<Atom>,
38    bonds: Vec<BondEntry>,
39    /// adjacency[atom_idx] = list of (neighbor_atom_idx, bond_idx)
40    adjacency: Vec<Vec<(AtomIdx, BondIdx)>>,
41}
42
43impl Molecule {
44    /// Number of heavy atoms (does not count implicit H).
45    pub fn atom_count(&self) -> usize {
46        self.atoms.len()
47    }
48
49    /// Number of bonds (edges).
50    pub fn bond_count(&self) -> usize {
51        self.bonds.len()
52    }
53
54    /// Borrow atom by index.
55    ///
56    /// # Panics
57    /// Panics if `idx` is out of range (should not happen with indices from this molecule).
58    pub fn atom(&self, idx: AtomIdx) -> &Atom {
59        &self.atoms[idx.0 as usize]
60    }
61
62    /// Borrow bond by index.
63    pub fn bond(&self, idx: BondIdx) -> &BondEntry {
64        &self.bonds[idx.0 as usize]
65    }
66
67    /// Iterate over all atoms as `(AtomIdx, &Atom)`.
68    pub fn atoms(&self) -> impl Iterator<Item = (AtomIdx, &Atom)> {
69        self.atoms
70            .iter()
71            .enumerate()
72            .map(|(i, a)| (AtomIdx(i as u32), a))
73    }
74
75    /// Iterate over all bonds as `(BondIdx, &BondEntry)`.
76    pub fn bonds(&self) -> impl Iterator<Item = (BondIdx, &BondEntry)> {
77        self.bonds
78            .iter()
79            .enumerate()
80            .map(|(i, b)| (BondIdx(i as u32), b))
81    }
82
83    /// Iterate over neighbors of `idx` as `(neighbor_atom_idx, bond_idx)`.
84    pub fn neighbors(&self, idx: AtomIdx) -> impl Iterator<Item = (AtomIdx, BondIdx)> + '_ {
85        self.adjacency[idx.0 as usize].iter().copied()
86    }
87
88    /// Degree (number of connected bonds) of atom `idx`.
89    pub fn degree(&self, idx: AtomIdx) -> usize {
90        self.adjacency[idx.0 as usize].len()
91    }
92
93    /// Return the bond between `a` and `b`, or `None` if not connected.
94    pub fn bond_between(&self, a: AtomIdx, b: AtomIdx) -> Option<(BondIdx, &BondEntry)> {
95        for &(nb, bidx) in &self.adjacency[a.0 as usize] {
96            if nb == b {
97                return Some((bidx, &self.bonds[bidx.0 as usize]));
98            }
99        }
100        None
101    }
102
103    /// Molecular formula as a Hill-order string (C first, H second, then alphabetical).
104    pub fn formula(&self) -> String {
105        use std::collections::BTreeMap;
106        let mut counts: BTreeMap<&str, u32> = BTreeMap::new();
107
108        for (_, atom) in self.atoms() {
109            *counts.entry(atom.element.symbol()).or_insert(0) += 1;
110        }
111
112        let mut result = String::new();
113
114        // Hill order: C first
115        if let Some(&c) = counts.get("C") {
116            result.push('C');
117            if c > 1 { result.push_str(&c.to_string()); }
118            counts.remove("C");
119        }
120        // H second
121        if let Some(&h) = counts.get("H") {
122            result.push('H');
123            if h > 1 { result.push_str(&h.to_string()); }
124            counts.remove("H");
125        }
126        // rest alphabetically
127        for (sym, count) in &counts {
128            result.push_str(sym);
129            if *count > 1 { result.push_str(&count.to_string()); }
130        }
131
132        result
133    }
134}
135
136/// Builder for constructing a [`Molecule`] incrementally.
137///
138/// Usage: add atoms, add bonds, then call `build()`.
139#[derive(Default)]
140pub struct MoleculeBuilder {
141    atoms: Vec<Atom>,
142    bonds: Vec<BondEntry>,
143    adjacency: Vec<Vec<(AtomIdx, BondIdx)>>,
144}
145
146impl MoleculeBuilder {
147    pub fn new() -> Self {
148        Self::default()
149    }
150
151    /// Read-only reference to an atom already added to the builder.
152    ///
153    /// Used by the SMILES parser to infer implicit bond types without
154    /// consuming the builder (e.g. aromatic-aromatic → Aromatic bond).
155    ///
156    /// # Panics
157    /// Panics if `idx` is out of range.
158    pub fn atom_at(&self, idx: AtomIdx) -> &Atom {
159        &self.atoms[idx.0 as usize]
160    }
161
162    /// Number of atoms added so far.
163    pub fn atom_count(&self) -> usize {
164        self.atoms.len()
165    }
166
167    /// Iterate over already-added neighbors of `idx` as `(bond_idx, neighbor_atom_idx)`.
168    /// Used by kekulization tests to check whether a bond already exists in the builder.
169    pub fn atom_neighbors(&self, idx: AtomIdx) -> impl Iterator<Item = (BondIdx, AtomIdx)> + '_ {
170        self.adjacency[idx.0 as usize].iter().map(|&(nb, bidx)| (bidx, nb))
171    }
172
173    /// Add an atom and return its index.
174    pub fn add_atom(&mut self, atom: Atom) -> AtomIdx {
175        let idx = AtomIdx(self.atoms.len() as u32);
176        self.atoms.push(atom);
177        self.adjacency.push(Vec::new());
178        idx
179    }
180
181    /// Add a bond between two existing atoms.
182    ///
183    /// Returns an error if either atom index is invalid or if the bond already exists.
184    pub fn add_bond(&mut self, a: AtomIdx, b: AtomIdx, order: BondOrder) -> Result<BondIdx, MolError> {
185        let n = self.atoms.len() as u32;
186        if a.0 >= n { return Err(MolError::InvalidAtomIdx(a)); }
187        if b.0 >= n { return Err(MolError::InvalidAtomIdx(b)); }
188
189        // Check for duplicate
190        for &(nb, _) in &self.adjacency[a.0 as usize] {
191            if nb == b {
192                return Err(MolError::DuplicateBond(a, b));
193            }
194        }
195
196        let bidx = BondIdx(self.bonds.len() as u32);
197        self.bonds.push(BondEntry { atom1: a, atom2: b, order });
198        self.adjacency[a.0 as usize].push((b, bidx));
199        self.adjacency[b.0 as usize].push((a, bidx));
200        Ok(bidx)
201    }
202
203    /// Consume the builder and return an immutable [`Molecule`].
204    pub fn build(self) -> Molecule {
205        Molecule {
206            atoms: self.atoms,
207            bonds: self.bonds,
208            adjacency: self.adjacency,
209        }
210    }
211}
212
213#[cfg(test)]
214mod tests {
215    use super::*;
216    use crate::atom::Atom;
217    use crate::element::Element;
218
219    fn ethane() -> Molecule {
220        let mut b = MoleculeBuilder::new();
221        let c1 = b.add_atom(Atom::new(Element::C));
222        let c2 = b.add_atom(Atom::new(Element::C));
223        b.add_bond(c1, c2, BondOrder::Single).unwrap();
224        b.build()
225    }
226
227    #[test]
228    fn test_basic_molecule() {
229        let mol = ethane();
230        assert_eq!(mol.atom_count(), 2);
231        assert_eq!(mol.bond_count(), 1);
232    }
233
234    #[test]
235    fn test_adjacency() {
236        let mol = ethane();
237        let neighbors: Vec<_> = mol.neighbors(AtomIdx(0)).collect();
238        assert_eq!(neighbors.len(), 1);
239        assert_eq!(neighbors[0].0, AtomIdx(1));
240    }
241
242    #[test]
243    fn test_bond_between() {
244        let mol = ethane();
245        assert!(mol.bond_between(AtomIdx(0), AtomIdx(1)).is_some());
246        assert!(mol.bond_between(AtomIdx(1), AtomIdx(0)).is_some());
247    }
248
249    #[test]
250    fn test_duplicate_bond_error() {
251        let mut b = MoleculeBuilder::new();
252        let c1 = b.add_atom(Atom::new(Element::C));
253        let c2 = b.add_atom(Atom::new(Element::C));
254        b.add_bond(c1, c2, BondOrder::Single).unwrap();
255        let err = b.add_bond(c1, c2, BondOrder::Double);
256        assert!(matches!(err, Err(MolError::DuplicateBond(_, _))));
257    }
258
259    #[test]
260    fn test_formula() {
261        let mut b = MoleculeBuilder::new();
262        let c = b.add_atom(Atom::new(Element::C));
263        let n = b.add_atom(Atom::new(Element::N));
264        b.add_bond(c, n, BondOrder::Single).unwrap();
265        let mol = b.build();
266        assert_eq!(mol.formula(), "CN");
267    }
268}