Skip to main content

chematic_core/
molecule.rs

1//! Molecule graph: atoms, bonds, and adjacency list.
2
3use crate::atom::Atom;
4use crate::bond::{BondEntry, BondOrder};
5use crate::element::Element;
6
7/// Newtype index for an atom in a Molecule.
8#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
9pub struct AtomIdx(pub u32);
10
11/// Newtype index for a bond in a Molecule.
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
13pub struct BondIdx(pub u32);
14
15/// Error types for molecule construction.
16#[derive(Debug, Clone, PartialEq, Eq)]
17pub enum MolError {
18    /// Atom index out of range.
19    InvalidAtomIdx(AtomIdx),
20    /// Duplicate bond between the same pair of atoms.
21    DuplicateBond(AtomIdx, AtomIdx),
22}
23
24impl core::fmt::Display for MolError {
25    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
26        match self {
27            Self::InvalidAtomIdx(idx) => write!(f, "invalid atom index: {}", idx.0),
28            Self::DuplicateBond(a, b) => write!(f, "duplicate bond between atoms {} and {}", a.0, b.0),
29        }
30    }
31}
32
33/// An immutable molecular graph built via [`MoleculeBuilder`].
34///
35/// Representation: atom list + bond list + per-atom adjacency list.
36/// No external graph library is used; all graph traversal is domain-aware.
37pub struct Molecule {
38    atoms: Vec<Atom>,
39    bonds: Vec<BondEntry>,
40    /// adjacency[atom_idx] = list of (neighbor_atom_idx, bond_idx)
41    adjacency: Vec<Vec<(AtomIdx, BondIdx)>>,
42}
43
44impl Molecule {
45    /// Number of heavy atoms (does not count implicit H).
46    pub fn atom_count(&self) -> usize {
47        self.atoms.len()
48    }
49
50    /// Number of bonds (edges).
51    pub fn bond_count(&self) -> usize {
52        self.bonds.len()
53    }
54
55    /// Borrow atom by index.
56    ///
57    /// # Panics
58    /// Panics if `idx` is out of range (should not happen with indices from this molecule).
59    pub fn atom(&self, idx: AtomIdx) -> &Atom {
60        &self.atoms[idx.0 as usize]
61    }
62
63    /// Borrow bond by index.
64    pub fn bond(&self, idx: BondIdx) -> &BondEntry {
65        &self.bonds[idx.0 as usize]
66    }
67
68    /// Iterate over all atoms as `(AtomIdx, &Atom)`.
69    pub fn atoms(&self) -> impl Iterator<Item = (AtomIdx, &Atom)> {
70        self.atoms
71            .iter()
72            .enumerate()
73            .map(|(i, a)| (AtomIdx(i as u32), a))
74    }
75
76    /// Iterate over all bonds as `(BondIdx, &BondEntry)`.
77    pub fn bonds(&self) -> impl Iterator<Item = (BondIdx, &BondEntry)> {
78        self.bonds
79            .iter()
80            .enumerate()
81            .map(|(i, b)| (BondIdx(i as u32), b))
82    }
83
84    /// Iterate over neighbors of `idx` as `(neighbor_atom_idx, bond_idx)`.
85    pub fn neighbors(&self, idx: AtomIdx) -> impl Iterator<Item = (AtomIdx, BondIdx)> + '_ {
86        self.adjacency[idx.0 as usize].iter().copied()
87    }
88
89    /// Degree (number of connected bonds) of atom `idx`.
90    pub fn degree(&self, idx: AtomIdx) -> usize {
91        self.adjacency[idx.0 as usize].len()
92    }
93
94    /// Return the bond between `a` and `b`, or `None` if not connected.
95    pub fn bond_between(&self, a: AtomIdx, b: AtomIdx) -> Option<(BondIdx, &BondEntry)> {
96        self.adjacency[a.0 as usize]
97            .iter()
98            .find(|&&(nb, _)| nb == b)
99            .map(|&(_, bidx)| (bidx, &self.bonds[bidx.0 as usize]))
100    }
101
102    /// Molecular formula as a Hill-order string (C first, H second, then alphabetical).
103    pub fn formula(&self) -> String {
104        use std::collections::BTreeMap;
105        let mut counts: BTreeMap<&str, u32> = BTreeMap::new();
106        for (_, atom) in self.atoms() {
107            *counts.entry(atom.element.symbol()).or_insert(0) += 1;
108        }
109
110        let mut result = String::new();
111        let push_count = |sym: &str, n: u32, out: &mut String| {
112            out.push_str(sym);
113            if n > 1 {
114                out.push_str(&n.to_string());
115            }
116        };
117
118        // Hill order: C, then H, then the remaining symbols alphabetically.
119        if let Some(c) = counts.remove("C") {
120            push_count("C", c, &mut result);
121        }
122        if let Some(h) = counts.remove("H") {
123            push_count("H", h, &mut result);
124        }
125        for (sym, count) in &counts {
126            push_count(sym, *count, &mut result);
127        }
128        result
129    }
130}
131
132// ---------------------------------------------------------------------------
133// Immutable update methods (functional-style editing)
134// ---------------------------------------------------------------------------
135
136impl Molecule {
137    /// Return a new `Molecule` with one extra atom appended, along with the
138    /// index that the new atom will have in the returned molecule.
139    pub fn with_atom_added(&self, atom: Atom) -> (Molecule, AtomIdx) {
140        let mut builder = MoleculeBuilder::new();
141        for (_, a) in self.atoms() {
142            builder.add_atom(a.clone());
143        }
144        for (_, b) in self.bonds() {
145            let _ = builder.add_bond(b.atom1, b.atom2, b.order);
146        }
147        let new_idx = builder.add_atom(atom);
148        (builder.build(), new_idx)
149    }
150
151    /// Return a new `Molecule` with one extra bond added, along with the index
152    /// of the newly added bond in the returned molecule.
153    ///
154    /// Returns `Err` if `a == b` or the bond already exists (same semantics as
155    /// [`MoleculeBuilder::add_bond`]).
156    pub fn with_bond_added(
157        &self,
158        a: AtomIdx,
159        b: AtomIdx,
160        order: BondOrder,
161    ) -> Result<(Molecule, BondIdx), MolError> {
162        let mut builder = MoleculeBuilder::new();
163        for (_, atom) in self.atoms() {
164            builder.add_atom(atom.clone());
165        }
166        for (_, bond) in self.bonds() {
167            let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
168        }
169        let bond_idx = builder.add_bond(a, b, order)?;
170        Ok((builder.build(), bond_idx))
171    }
172
173    /// Return a new `Molecule` with the formal charge of atom `idx` changed.
174    pub fn with_atom_charge(&self, idx: AtomIdx, charge: i8) -> Molecule {
175        let mut builder = MoleculeBuilder::new();
176        for (aidx, atom) in self.atoms() {
177            let mut a = atom.clone();
178            if aidx == idx { a.charge = charge; }
179            builder.add_atom(a);
180        }
181        for (_, bond) in self.bonds() {
182            let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
183        }
184        builder.build()
185    }
186
187    /// Return a new `Molecule` with the element of atom `idx` changed.
188    ///
189    /// Chirality and hydrogen count are reset to `None` when the element
190    /// changes, since those properties are element-specific.
191    pub fn with_atom_element(&self, idx: AtomIdx, el: Element) -> Molecule {
192        let mut builder = MoleculeBuilder::new();
193        for (aidx, atom) in self.atoms() {
194            let mut a = atom.clone();
195            if aidx == idx {
196                a.element = el;
197                // Reset element-specific fields so valence stays consistent.
198                a.chirality = crate::atom::Chirality::None;
199                a.hydrogen_count = None;
200                a.aromatic = false;
201            }
202            builder.add_atom(a);
203        }
204        for (_, bond) in self.bonds() {
205            let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
206        }
207        builder.build()
208    }
209
210    /// Return a new `Molecule` with atom `idx` and all bonds involving it
211    /// removed.  Atom indices of survivors shift down past the removed slot.
212    ///
213    /// The returned tuple also includes a mapping from **old** `AtomIdx` to
214    /// **new** `AtomIdx` (indices that fall below `idx` are unchanged; indices
215    /// above `idx` decrease by 1).
216    pub fn with_atom_removed(&self, idx: AtomIdx) -> (Molecule, Vec<Option<AtomIdx>>) {
217        let n = self.atom_count();
218        let removed = idx.0 as usize;
219
220        // Build old→new index table.
221        let mut remap: Vec<Option<AtomIdx>> = vec![None; n];
222        let mut new_pos = 0u32;
223        for old in 0..n {
224            if old == removed {
225                continue;
226            }
227            remap[old] = Some(AtomIdx(new_pos));
228            new_pos += 1;
229        }
230
231        let mut builder = MoleculeBuilder::new();
232        for (aidx, atom) in self.atoms() {
233            if aidx == idx { continue; }
234            builder.add_atom(atom.clone());
235        }
236        for (_, bond) in self.bonds() {
237            if bond.atom1 == idx || bond.atom2 == idx { continue; }
238            if let (Some(a1), Some(a2)) = (remap[bond.atom1.0 as usize], remap[bond.atom2.0 as usize]) {
239                let _ = builder.add_bond(a1, a2, bond.order);
240            }
241        }
242        (builder.build(), remap)
243    }
244
245    /// Return a new `Molecule` with bond `idx` removed.
246    ///
247    /// Atom indices are unchanged.  Bond indices of survivors shift down.
248    pub fn with_bond_removed(&self, idx: BondIdx) -> Molecule {
249        let mut builder = MoleculeBuilder::new();
250        for (_, atom) in self.atoms() {
251            builder.add_atom(atom.clone());
252        }
253        for (bidx, bond) in self.bonds() {
254            if bidx == idx { continue; }
255            let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
256        }
257        builder.build()
258    }
259}
260
261/// Builder for constructing a [`Molecule`] incrementally.
262///
263/// Usage: add atoms, add bonds, then call `build()`.
264#[derive(Default)]
265pub struct MoleculeBuilder {
266    atoms: Vec<Atom>,
267    bonds: Vec<BondEntry>,
268    adjacency: Vec<Vec<(AtomIdx, BondIdx)>>,
269}
270
271impl MoleculeBuilder {
272    pub fn new() -> Self {
273        Self::default()
274    }
275
276    /// Read-only reference to an atom already added to the builder.
277    ///
278    /// Used by the SMILES parser to infer implicit bond types without
279    /// consuming the builder (e.g. aromatic-aromatic → Aromatic bond).
280    ///
281    /// # Panics
282    /// Panics if `idx` is out of range.
283    pub fn atom_at(&self, idx: AtomIdx) -> &Atom {
284        &self.atoms[idx.0 as usize]
285    }
286
287    /// Number of atoms added so far.
288    pub fn atom_count(&self) -> usize {
289        self.atoms.len()
290    }
291
292    /// Iterate over already-added neighbors of `idx` as `(bond_idx, neighbor_atom_idx)`.
293    /// Used by kekulization tests to check whether a bond already exists in the builder.
294    pub fn atom_neighbors(&self, idx: AtomIdx) -> impl Iterator<Item = (BondIdx, AtomIdx)> + '_ {
295        self.adjacency[idx.0 as usize].iter().map(|&(nb, bidx)| (bidx, nb))
296    }
297
298    /// Add an atom and return its index.
299    pub fn add_atom(&mut self, atom: Atom) -> AtomIdx {
300        let idx = AtomIdx(self.atoms.len() as u32);
301        self.atoms.push(atom);
302        self.adjacency.push(Vec::new());
303        idx
304    }
305
306    /// Add a bond between two existing atoms.
307    ///
308    /// Returns an error if either atom index is invalid or if the bond already exists.
309    pub fn add_bond(&mut self, a: AtomIdx, b: AtomIdx, order: BondOrder) -> Result<BondIdx, MolError> {
310        let n = self.atoms.len() as u32;
311        if a.0 >= n { return Err(MolError::InvalidAtomIdx(a)); }
312        if b.0 >= n { return Err(MolError::InvalidAtomIdx(b)); }
313
314        // Check for duplicate
315        for &(nb, _) in &self.adjacency[a.0 as usize] {
316            if nb == b {
317                return Err(MolError::DuplicateBond(a, b));
318            }
319        }
320
321        let bidx = BondIdx(self.bonds.len() as u32);
322        self.bonds.push(BondEntry { atom1: a, atom2: b, order });
323        self.adjacency[a.0 as usize].push((b, bidx));
324        self.adjacency[b.0 as usize].push((a, bidx));
325        Ok(bidx)
326    }
327
328    /// Consume the builder and return an immutable [`Molecule`].
329    pub fn build(self) -> Molecule {
330        Molecule {
331            atoms: self.atoms,
332            bonds: self.bonds,
333            adjacency: self.adjacency,
334        }
335    }
336}
337
338#[cfg(test)]
339mod tests {
340    use super::*;
341    use crate::atom::Atom;
342    use crate::element::Element;
343
344    fn ethane() -> Molecule {
345        let mut b = MoleculeBuilder::new();
346        let c1 = b.add_atom(Atom::new(Element::C));
347        let c2 = b.add_atom(Atom::new(Element::C));
348        b.add_bond(c1, c2, BondOrder::Single).unwrap();
349        b.build()
350    }
351
352    #[test]
353    fn test_basic_molecule() {
354        let mol = ethane();
355        assert_eq!(mol.atom_count(), 2);
356        assert_eq!(mol.bond_count(), 1);
357    }
358
359    #[test]
360    fn test_adjacency() {
361        let mol = ethane();
362        let neighbors: Vec<_> = mol.neighbors(AtomIdx(0)).collect();
363        assert_eq!(neighbors.len(), 1);
364        assert_eq!(neighbors[0].0, AtomIdx(1));
365    }
366
367    #[test]
368    fn test_bond_between() {
369        let mol = ethane();
370        assert!(mol.bond_between(AtomIdx(0), AtomIdx(1)).is_some());
371        assert!(mol.bond_between(AtomIdx(1), AtomIdx(0)).is_some());
372    }
373
374    #[test]
375    fn test_duplicate_bond_error() {
376        let mut b = MoleculeBuilder::new();
377        let c1 = b.add_atom(Atom::new(Element::C));
378        let c2 = b.add_atom(Atom::new(Element::C));
379        b.add_bond(c1, c2, BondOrder::Single).unwrap();
380        let err = b.add_bond(c1, c2, BondOrder::Double);
381        assert!(matches!(err, Err(MolError::DuplicateBond(_, _))));
382    }
383
384    #[test]
385    fn test_formula() {
386        let mut b = MoleculeBuilder::new();
387        let c = b.add_atom(Atom::new(Element::C));
388        let n = b.add_atom(Atom::new(Element::N));
389        b.add_bond(c, n, BondOrder::Single).unwrap();
390        let mol = b.build();
391        assert_eq!(mol.formula(), "CN");
392    }
393}