Skip to main content

chematic_core/
molecule.rs

1//! Molecule graph: atoms, bonds, and adjacency list.
2
3use crate::atom::Atom;
4use crate::bond::{BondEntry, BondOrder};
5use crate::element::Element;
6use crate::stereo_group::StereoGroup;
7
8/// Newtype index for an atom in a Molecule.
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
10pub struct AtomIdx(pub u32);
11
12/// Newtype index for a bond in a Molecule.
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
14pub struct BondIdx(pub u32);
15
16/// Error types for molecule construction.
17#[derive(Debug, Clone, PartialEq, Eq)]
18pub enum MolError {
19    /// Atom index out of range.
20    InvalidAtomIdx(AtomIdx),
21    /// Duplicate bond between the same pair of atoms.
22    DuplicateBond(AtomIdx, AtomIdx),
23}
24
25impl core::fmt::Display for MolError {
26    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
27        match self {
28            Self::InvalidAtomIdx(idx) => write!(f, "invalid atom index: {}", idx.0),
29            Self::DuplicateBond(a, b) => {
30                write!(f, "duplicate bond between atoms {} and {}", a.0, b.0)
31            }
32        }
33    }
34}
35
36impl std::error::Error for MolError {}
37
38/// An immutable molecular graph built via [`MoleculeBuilder`].
39///
40/// Representation: atom list + bond list + per-atom adjacency list.
41/// No external graph library is used; all graph traversal is domain-aware.
42pub struct Molecule {
43    atoms: Vec<Atom>,
44    bonds: Vec<BondEntry>,
45    /// adjacency[atom_idx] = list of (neighbor_atom_idx, bond_idx)
46    adjacency: Vec<Vec<(AtomIdx, BondIdx)>>,
47    /// Enhanced stereo groups (ChemDraw V3000 Absolute / Or / And).
48    stereo_groups: Vec<StereoGroup>,
49}
50
51impl Molecule {
52    /// Number of heavy atoms (does not count implicit H).
53    pub fn atom_count(&self) -> usize {
54        self.atoms.len()
55    }
56
57    /// Number of bonds (edges).
58    pub fn bond_count(&self) -> usize {
59        self.bonds.len()
60    }
61
62    /// Borrow atom by index.
63    ///
64    /// # Panics
65    /// Panics if `idx` is out of range (should not happen with indices from this molecule).
66    pub fn atom(&self, idx: AtomIdx) -> &Atom {
67        let i = idx.0 as usize;
68        if i >= self.atoms.len() {
69            panic!("atom index {} out of range (molecule has {} atoms)", idx.0, self.atoms.len());
70        }
71        &self.atoms[i]
72    }
73
74    /// Borrow bond by index.
75    pub fn bond(&self, idx: BondIdx) -> &BondEntry {
76        let i = idx.0 as usize;
77        if i >= self.bonds.len() {
78            panic!("bond index {} out of range (molecule has {} bonds)", idx.0, self.bonds.len());
79        }
80        &self.bonds[i]
81    }
82
83    /// Iterate over all atoms as `(AtomIdx, &Atom)`.
84    pub fn atoms(&self) -> impl Iterator<Item = (AtomIdx, &Atom)> {
85        self.atoms
86            .iter()
87            .enumerate()
88            .map(|(i, a)| (AtomIdx(i as u32), a))
89    }
90
91    /// Iterate over all bonds as `(BondIdx, &BondEntry)`.
92    pub fn bonds(&self) -> impl Iterator<Item = (BondIdx, &BondEntry)> {
93        self.bonds
94            .iter()
95            .enumerate()
96            .map(|(i, b)| (BondIdx(i as u32), b))
97    }
98
99    /// Iterate over neighbors of `idx` as `(neighbor_atom_idx, bond_idx)`.
100    pub fn neighbors(&self, idx: AtomIdx) -> impl Iterator<Item = (AtomIdx, BondIdx)> + '_ {
101        let i = idx.0 as usize;
102        if i >= self.adjacency.len() {
103            panic!("atom index {} out of range (molecule has {} atoms)", idx.0, self.adjacency.len());
104        }
105        self.adjacency[i].iter().copied()
106    }
107
108    /// Degree (number of connected bonds) of atom `idx`.
109    pub fn degree(&self, idx: AtomIdx) -> usize {
110        let i = idx.0 as usize;
111        if i >= self.adjacency.len() {
112            panic!("atom index {} out of range (molecule has {} atoms)", idx.0, self.adjacency.len());
113        }
114        self.adjacency[i].len()
115    }
116
117    /// Return the bond between `a` and `b`, or `None` if not connected or indices are out of bounds.
118    pub fn bond_between(&self, a: AtomIdx, b: AtomIdx) -> Option<(BondIdx, &BondEntry)> {
119        let a_idx = a.0 as usize;
120        let b_idx = b.0 as usize;
121        if a_idx >= self.adjacency.len() || b_idx >= self.atoms.len() {
122            return None;
123        }
124        self.adjacency[a_idx]
125            .iter()
126            .find(|&&(nb, _)| nb == b)
127            .and_then(|&(_, bidx)| {
128                let bond_idx = bidx.0 as usize;
129                if bond_idx < self.bonds.len() {
130                    Some((bidx, &self.bonds[bond_idx]))
131                } else {
132                    None
133                }
134            })
135    }
136
137    /// Molecular formula as a Hill-order string (C first, H second, then alphabetical).
138    pub fn formula(&self) -> String {
139        use std::collections::BTreeMap;
140        let mut counts: BTreeMap<&str, u32> = BTreeMap::new();
141        for (_, atom) in self.atoms() {
142            *counts.entry(atom.element.symbol()).or_insert(0) += 1;
143        }
144
145        let mut result = String::new();
146        let push_count = |sym: &str, n: u32, out: &mut String| {
147            out.push_str(sym);
148            if n > 1 {
149                out.push_str(&n.to_string());
150            }
151        };
152
153        // Hill order: C, then H, then the remaining symbols alphabetically.
154        if let Some(c) = counts.remove("C") {
155            push_count("C", c, &mut result);
156        }
157        if let Some(h) = counts.remove("H") {
158            push_count("H", h, &mut result);
159        }
160        for (sym, count) in &counts {
161            push_count(sym, *count, &mut result);
162        }
163        result
164    }
165}
166
167// ---------------------------------------------------------------------------
168// Immutable update methods (functional-style editing)
169// ---------------------------------------------------------------------------
170
171impl Molecule {
172    /// Return a new `Molecule` with one extra atom appended, along with the
173    /// index that the new atom will have in the returned molecule.
174    pub fn with_atom_added(&self, atom: Atom) -> (Molecule, AtomIdx) {
175        let mut builder = MoleculeBuilder::new();
176        for (_, a) in self.atoms() {
177            builder.add_atom(a.clone());
178        }
179        for (_, b) in self.bonds() {
180            let _ = builder.add_bond(b.atom1, b.atom2, b.order);
181        }
182        let new_idx = builder.add_atom(atom);
183        (builder.build(), new_idx)
184    }
185
186    /// Return a new `Molecule` with one extra bond added, along with the index
187    /// of the newly added bond in the returned molecule.
188    ///
189    /// Returns `Err` if `a == b` or the bond already exists (same semantics as
190    /// [`MoleculeBuilder::add_bond`]).
191    pub fn with_bond_added(
192        &self,
193        a: AtomIdx,
194        b: AtomIdx,
195        order: BondOrder,
196    ) -> Result<(Molecule, BondIdx), MolError> {
197        let mut builder = MoleculeBuilder::new();
198        for (_, atom) in self.atoms() {
199            builder.add_atom(atom.clone());
200        }
201        for (_, bond) in self.bonds() {
202            let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
203        }
204        let bond_idx = builder.add_bond(a, b, order)?;
205        Ok((builder.build(), bond_idx))
206    }
207
208    /// Return a new `Molecule` with the formal charge of atom `idx` changed.
209    pub fn with_atom_charge(&self, idx: AtomIdx, charge: i8) -> Molecule {
210        let mut builder = MoleculeBuilder::new();
211        for (aidx, atom) in self.atoms() {
212            let mut a = atom.clone();
213            if aidx == idx {
214                a.charge = charge;
215            }
216            builder.add_atom(a);
217        }
218        for (_, bond) in self.bonds() {
219            let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
220        }
221        builder.build()
222    }
223
224    /// Return a new `Molecule` with the element of atom `idx` changed.
225    ///
226    /// Chirality and hydrogen count are reset to `None` when the element
227    /// changes, since those properties are element-specific.
228    pub fn with_atom_element(&self, idx: AtomIdx, el: Element) -> Molecule {
229        let mut builder = MoleculeBuilder::new();
230        for (aidx, atom) in self.atoms() {
231            let mut a = atom.clone();
232            if aidx == idx {
233                a.element = el;
234                // Reset element-specific fields so valence stays consistent.
235                a.chirality = crate::atom::Chirality::None;
236                a.hydrogen_count = None;
237                a.aromatic = false;
238            }
239            builder.add_atom(a);
240        }
241        for (_, bond) in self.bonds() {
242            let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
243        }
244        builder.build()
245    }
246
247    /// Return a new `Molecule` with atom `idx` and all bonds involving it
248    /// removed.  Atom indices of survivors shift down past the removed slot.
249    ///
250    /// The returned tuple also includes a mapping from **old** `AtomIdx` to
251    /// **new** `AtomIdx` (indices that fall below `idx` are unchanged; indices
252    /// above `idx` decrease by 1).
253    pub fn with_atom_removed(&self, idx: AtomIdx) -> (Molecule, Vec<Option<AtomIdx>>) {
254        let n = self.atom_count();
255        let removed = idx.0 as usize;
256
257        // Build old→new index table.
258        let mut remap: Vec<Option<AtomIdx>> = vec![None; n];
259        let mut new_pos = 0u32;
260        for (old, slot) in remap.iter_mut().enumerate() {
261            if old == removed {
262                continue;
263            }
264            *slot = Some(AtomIdx(new_pos));
265            new_pos += 1;
266        }
267
268        let mut builder = MoleculeBuilder::new();
269        for (aidx, atom) in self.atoms() {
270            if aidx == idx {
271                continue;
272            }
273            builder.add_atom(atom.clone());
274        }
275        for (_, bond) in self.bonds() {
276            if bond.atom1 == idx || bond.atom2 == idx {
277                continue;
278            }
279            if let (Some(a1), Some(a2)) =
280                (remap[bond.atom1.0 as usize], remap[bond.atom2.0 as usize])
281            {
282                let _ = builder.add_bond(a1, a2, bond.order);
283            }
284        }
285        (builder.build(), remap)
286    }
287
288    /// Implicit hydrogen count for atom `idx` based on valence rules.
289    ///
290    /// Delegates to [`crate::valence::implicit_hcount`].
291    pub fn implicit_hydrogen_count(&self, idx: AtomIdx) -> u8 {
292        crate::valence::implicit_hcount(self, idx)
293    }
294
295    /// Hill-order molecular formula including implicit hydrogens.
296    ///
297    /// Unlike [`Self::formula`] (which counts only explicit heavy atoms),
298    /// this method adds the implicit H count for every atom so the result
299    /// reflects the true molecular composition (e.g. methane → "CH4").
300    pub fn total_formula(&self) -> String {
301        use std::collections::BTreeMap;
302        let mut counts: BTreeMap<&str, u32> = BTreeMap::new();
303        let mut implicit_h: u32 = 0;
304        for (aidx, atom) in self.atoms() {
305            *counts.entry(atom.element.symbol()).or_insert(0) += 1;
306            implicit_h += crate::valence::implicit_hcount(self, aidx) as u32;
307        }
308        *counts.entry("H").or_insert(0) += implicit_h;
309
310        let mut result = String::new();
311        let push_count = |sym: &str, n: u32, out: &mut String| {
312            out.push_str(sym);
313            if n > 1 {
314                out.push_str(&n.to_string());
315            }
316        };
317
318        if let Some(c) = counts.remove("C") {
319            push_count("C", c, &mut result);
320        }
321        if let Some(h) = counts.remove("H")
322            && h > 0
323        {
324            push_count("H", h, &mut result);
325        }
326        for (sym, count) in &counts {
327            push_count(sym, *count, &mut result);
328        }
329        result
330    }
331
332    /// Hill-order molecular formula with isotope labels.
333    ///
334    /// Like [`Self::formula`] but prefixes each element symbol with its
335    /// isotope number when `atom.isotope` is `Some(n)`.
336    /// Example: a molecule with one `¹³C` and one `O` → `"¹³CO"`.
337    pub fn formula_with_isotopes(&self) -> String {
338        use std::collections::BTreeMap;
339        // Collect (isotope_prefix + symbol) counts, heavy atoms only.
340        let mut counts: BTreeMap<String, u32> = BTreeMap::new();
341        let mut has_carbon = false;
342        let mut has_explicit_h = false;
343        for (_, atom) in self.atoms() {
344            let sym = atom.element.symbol();
345            let key = match atom.isotope {
346                Some(n) => format!("{n}{sym}"),
347                None => sym.to_string(),
348            };
349            if sym == "C" && atom.isotope.is_none() {
350                has_carbon = true;
351            }
352            if sym == "H" {
353                has_explicit_h = true;
354            }
355            *counts.entry(key).or_insert(0) += 1;
356        }
357
358        let push_count = |key: &str, n: u32, out: &mut String| {
359            out.push_str(key);
360            if n > 1 {
361                out.push_str(&n.to_string());
362            }
363        };
364
365        let mut result = String::new();
366        // Hill order: C first (if unlabelled C present), then H, then rest alphabetically.
367        if has_carbon && let Some(c) = counts.remove("C") {
368            push_count("C", c, &mut result);
369        }
370        if has_explicit_h && let Some(h) = counts.remove("H") {
371            push_count("H", h, &mut result);
372        }
373        for (key, count) in &counts {
374            push_count(key, *count, &mut result);
375        }
376        result
377    }
378
379    /// Return a new `Molecule` with atom `idx`'s aromatic flag changed.
380    pub fn with_atom_aromatic(&self, idx: AtomIdx, aromatic: bool) -> Molecule {
381        let mut builder = MoleculeBuilder::new();
382        for (aidx, atom) in self.atoms() {
383            let mut a = atom.clone();
384            if aidx == idx {
385                a.aromatic = aromatic;
386            }
387            builder.add_atom(a);
388        }
389        for (_, bond) in self.bonds() {
390            let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
391        }
392        builder.build()
393    }
394
395    /// Return a new `Molecule` with bond `idx`'s order changed.
396    pub fn with_bond_order(&self, idx: BondIdx, order: BondOrder) -> Molecule {
397        let mut builder = MoleculeBuilder::new();
398        for (_, atom) in self.atoms() {
399            builder.add_atom(atom.clone());
400        }
401        for (bidx, bond) in self.bonds() {
402            let o = if bidx == idx { order } else { bond.order };
403            let _ = builder.add_bond(bond.atom1, bond.atom2, o);
404        }
405        builder.build()
406    }
407
408    /// Return a new `Molecule` with bond `idx` removed.
409    ///
410    /// Atom indices are unchanged.  Bond indices of survivors shift down.
411    pub fn with_bond_removed(&self, idx: BondIdx) -> Molecule {
412        let mut builder = MoleculeBuilder::new();
413        for (_, atom) in self.atoms() {
414            builder.add_atom(atom.clone());
415        }
416        for (bidx, bond) in self.bonds() {
417            if bidx == idx {
418                continue;
419            }
420            let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
421        }
422        builder.build()
423    }
424}
425
426// ---------------------------------------------------------------------------
427// In-place mutation methods
428// ---------------------------------------------------------------------------
429
430impl Molecule {
431    /// Append a new atom and return its index.
432    pub fn add_atom(&mut self, atom: Atom) -> AtomIdx {
433        let idx = AtomIdx(self.atoms.len() as u32);
434        self.atoms.push(atom);
435        self.adjacency.push(vec![]);
436        idx
437    }
438
439    /// Remove atom `idx` and all bonds involving it.
440    ///
441    /// Returns a remapping table: `remap[old_idx]` gives the new `AtomIdx`
442    /// for surviving atoms, or `None` for the removed atom.  Atom indices
443    /// of atoms after the removed slot shift down by 1.
444    pub fn remove_atom(&mut self, idx: AtomIdx) -> Vec<Option<AtomIdx>> {
445        let n = self.atoms.len();
446        let removed = idx.0 as usize;
447
448        let mut remap: Vec<Option<AtomIdx>> = vec![None; n];
449        let mut new_pos = 0u32;
450        for (old, slot) in remap.iter_mut().enumerate() {
451            if old == removed {
452                continue;
453            }
454            *slot = Some(AtomIdx(new_pos));
455            new_pos += 1;
456        }
457
458        self.atoms.remove(removed);
459
460        // Keep only bonds not involving the removed atom; remap endpoints.
461        let mut new_bonds: Vec<BondEntry> = Vec::new();
462        for bond in &self.bonds {
463            if bond.atom1 == idx || bond.atom2 == idx {
464                continue;
465            }
466            if let (Some(a1), Some(a2)) =
467                (remap[bond.atom1.0 as usize], remap[bond.atom2.0 as usize])
468            {
469                new_bonds.push(BondEntry {
470                    atom1: a1,
471                    atom2: a2,
472                    order: bond.order,
473                });
474            }
475        }
476        self.bonds = new_bonds;
477
478        // Rebuild adjacency from scratch.
479        let new_n = self.atoms.len();
480        self.adjacency = vec![vec![]; new_n];
481        for (bidx, bond) in self.bonds.iter().enumerate() {
482            let bi = BondIdx(bidx as u32);
483            self.adjacency[bond.atom1.0 as usize].push((bond.atom2, bi));
484            self.adjacency[bond.atom2.0 as usize].push((bond.atom1, bi));
485        }
486
487        remap
488    }
489
490    /// Add a bond between `a` and `b` with the given `order`.
491    ///
492    /// Returns `Err` if `a == b` or the bond already exists.
493    pub fn add_bond(
494        &mut self,
495        a: AtomIdx,
496        b: AtomIdx,
497        order: BondOrder,
498    ) -> Result<BondIdx, MolError> {
499        let n = self.atoms.len() as u32;
500        if a.0 >= n {
501            return Err(MolError::InvalidAtomIdx(a));
502        }
503        if b.0 >= n {
504            return Err(MolError::InvalidAtomIdx(b));
505        }
506        if self.adjacency[a.0 as usize].iter().any(|&(nb, _)| nb == b) {
507            return Err(MolError::DuplicateBond(a, b));
508        }
509        let bidx = BondIdx(self.bonds.len() as u32);
510        self.bonds.push(BondEntry {
511            atom1: a,
512            atom2: b,
513            order,
514        });
515        self.adjacency[a.0 as usize].push((b, bidx));
516        self.adjacency[b.0 as usize].push((a, bidx));
517        Ok(bidx)
518    }
519
520    /// Remove bond `idx`.  Atom indices are unchanged; bond indices of
521    /// surviving bonds shift down past the removed slot.
522    pub fn remove_bond(&mut self, idx: BondIdx) {
523        let removed = idx.0 as usize;
524        if removed >= self.bonds.len() {
525            return;
526        }
527        self.bonds.remove(removed);
528        // Rebuild adjacency with renumbered bond indices.
529        let n = self.atoms.len();
530        self.adjacency = vec![vec![]; n];
531        for (bidx, bond) in self.bonds.iter().enumerate() {
532            let bi = BondIdx(bidx as u32);
533            self.adjacency[bond.atom1.0 as usize].push((bond.atom2, bi));
534            self.adjacency[bond.atom2.0 as usize].push((bond.atom1, bi));
535        }
536    }
537
538    /// Set the formal charge of atom `idx` in-place.
539    pub fn set_charge(&mut self, idx: AtomIdx, charge: i8) {
540        self.atoms[idx.0 as usize].charge = charge;
541    }
542
543    /// Set the element of atom `idx` in-place.
544    ///
545    /// Chirality and hydrogen count are reset (element-specific properties).
546    pub fn set_element(&mut self, idx: AtomIdx, el: Element) {
547        let a = &mut self.atoms[idx.0 as usize];
548        a.element = el;
549        a.chirality = crate::atom::Chirality::None;
550        a.hydrogen_count = None;
551        a.aromatic = false;
552    }
553
554    /// Set the CIP stereo code of atom `idx` in-place.
555    pub fn set_cip_code(&mut self, idx: AtomIdx, code: Option<crate::atom::CipCode>) {
556        self.atoms[idx.0 as usize].cip_code = code;
557    }
558
559    /// Return the enhanced stereo groups attached to this molecule.
560    pub fn stereo_groups(&self) -> &[StereoGroup] {
561        &self.stereo_groups
562    }
563
564    /// Replace the stereo group list in-place.
565    pub fn set_stereo_groups(&mut self, groups: Vec<StereoGroup>) {
566        self.stereo_groups = groups;
567    }
568
569    /// Add a single stereo group in-place.
570    pub fn add_stereo_group(&mut self, group: StereoGroup) {
571        self.stereo_groups.push(group);
572    }
573}
574
575// ---------------------------------------------------------------------------
576// Connectivity utilities
577// ---------------------------------------------------------------------------
578
579impl Molecule {
580    /// Return `true` if the molecule has exactly one connected component
581    /// (i.e. every atom can be reached from every other atom).
582    pub fn is_connected(&self) -> bool {
583        let n = self.atoms.len();
584        if n == 0 {
585            return true;
586        }
587        let mut visited = vec![false; n];
588        let mut stack = vec![AtomIdx(0)];
589        visited[0] = true;
590        let mut count = 1;
591        while let Some(cur) = stack.pop() {
592            for (nb, _) in self.neighbors(cur) {
593                if !visited[nb.0 as usize] {
594                    visited[nb.0 as usize] = true;
595                    count += 1;
596                    stack.push(nb);
597                }
598            }
599        }
600        count == n
601    }
602
603    /// Split the molecule into its connected components.
604    ///
605    /// Returns a `Vec` of sub-molecules, one per component.  Atoms are
606    /// renumbered within each sub-molecule starting at index 0.
607    pub fn fragments(&self) -> Vec<Molecule> {
608        let n = self.atoms.len();
609        if n == 0 {
610            return vec![];
611        }
612
613        let mut component: Vec<usize> = vec![usize::MAX; n];
614        let mut comp_id = 0;
615
616        for start in 0..n {
617            if component[start] != usize::MAX {
618                continue;
619            }
620            let mut stack = vec![start];
621            component[start] = comp_id;
622            while let Some(cur) = stack.pop() {
623                for (nb, _) in self.neighbors(AtomIdx(cur as u32)) {
624                    let ni = nb.0 as usize;
625                    if component[ni] == usize::MAX {
626                        component[ni] = comp_id;
627                        stack.push(ni);
628                    }
629                }
630            }
631            comp_id += 1;
632        }
633
634        (0..comp_id)
635            .map(|cid| {
636                let mut builder = MoleculeBuilder::new();
637                let mut old_to_new: std::collections::HashMap<AtomIdx, AtomIdx> =
638                    std::collections::HashMap::new();
639                for (aidx, atom) in self.atoms() {
640                    if component[aidx.0 as usize] == cid {
641                        let new_idx = builder.add_atom(atom.clone());
642                        old_to_new.insert(aidx, new_idx);
643                    }
644                }
645                for (_, bond) in self.bonds() {
646                    if let (Some(&a1), Some(&a2)) =
647                        (old_to_new.get(&bond.atom1), old_to_new.get(&bond.atom2))
648                    {
649                        let _ = builder.add_bond(a1, a2, bond.order);
650                    }
651                }
652                builder.build()
653            })
654            .collect()
655    }
656}
657
658/// Builder for constructing a [`Molecule`] incrementally.
659///
660/// Usage: add atoms, add bonds, then call `build()`.
661#[derive(Default)]
662pub struct MoleculeBuilder {
663    atoms: Vec<Atom>,
664    bonds: Vec<BondEntry>,
665    adjacency: Vec<Vec<(AtomIdx, BondIdx)>>,
666    stereo_groups: Vec<StereoGroup>,
667}
668
669impl MoleculeBuilder {
670    pub fn new() -> Self {
671        Self::default()
672    }
673
674    /// Create a builder pre-populated with all atoms and bonds from `mol`.
675    ///
676    /// Use this to make incremental edits to an existing molecule instead of
677    /// reconstructing it from scratch.
678    pub fn from_molecule(mol: &Molecule) -> Self {
679        let mut b = Self::new();
680        for (_, atom) in mol.atoms() {
681            b.add_atom(atom.clone());
682        }
683        for (_, bond) in mol.bonds() {
684            let _ = b.add_bond(bond.atom1, bond.atom2, bond.order);
685        }
686        b.stereo_groups = mol.stereo_groups.clone();
687        b
688    }
689
690    /// Append a stereo group to this builder.
691    pub fn add_stereo_group(&mut self, group: StereoGroup) {
692        self.stereo_groups.push(group);
693    }
694
695    /// Read-only reference to an atom already added to the builder.
696    ///
697    /// Used by the SMILES parser to infer implicit bond types without
698    /// consuming the builder (e.g. aromatic-aromatic → Aromatic bond).
699    ///
700    /// # Panics
701    /// Panics if `idx` is out of range.
702    pub fn atom_at(&self, idx: AtomIdx) -> &Atom {
703        &self.atoms[idx.0 as usize]
704    }
705
706    /// Number of atoms added so far.
707    pub fn atom_count(&self) -> usize {
708        self.atoms.len()
709    }
710
711    /// Iterate over already-added neighbors of `idx` as `(bond_idx, neighbor_atom_idx)`.
712    /// Used by kekulization tests to check whether a bond already exists in the builder.
713    pub fn atom_neighbors(&self, idx: AtomIdx) -> impl Iterator<Item = (BondIdx, AtomIdx)> + '_ {
714        self.adjacency[idx.0 as usize]
715            .iter()
716            .map(|&(nb, bidx)| (bidx, nb))
717    }
718
719    /// Add an atom and return its index.
720    pub fn add_atom(&mut self, atom: Atom) -> AtomIdx {
721        let idx = AtomIdx(self.atoms.len() as u32);
722        self.atoms.push(atom);
723        self.adjacency.push(Vec::new());
724        idx
725    }
726
727    /// Add a bond between two existing atoms.
728    ///
729    /// Returns an error if either atom index is invalid or if the bond already exists.
730    pub fn add_bond(
731        &mut self,
732        a: AtomIdx,
733        b: AtomIdx,
734        order: BondOrder,
735    ) -> Result<BondIdx, MolError> {
736        let n = self.atoms.len() as u32;
737        if a.0 >= n {
738            return Err(MolError::InvalidAtomIdx(a));
739        }
740        if b.0 >= n {
741            return Err(MolError::InvalidAtomIdx(b));
742        }
743
744        // Check for duplicate
745        for &(nb, _) in &self.adjacency[a.0 as usize] {
746            if nb == b {
747                return Err(MolError::DuplicateBond(a, b));
748            }
749        }
750
751        let bidx = BondIdx(self.bonds.len() as u32);
752        self.bonds.push(BondEntry {
753            atom1: a,
754            atom2: b,
755            order,
756        });
757        self.adjacency[a.0 as usize].push((b, bidx));
758        self.adjacency[b.0 as usize].push((a, bidx));
759        Ok(bidx)
760    }
761
762    /// Consume the builder and return an immutable [`Molecule`].
763    pub fn build(self) -> Molecule {
764        Molecule {
765            atoms: self.atoms,
766            bonds: self.bonds,
767            adjacency: self.adjacency,
768            stereo_groups: self.stereo_groups,
769        }
770    }
771}
772
773#[cfg(test)]
774mod tests {
775    use super::*;
776    use crate::atom::Atom;
777    use crate::element::Element;
778
779    fn ethane() -> Molecule {
780        let mut b = MoleculeBuilder::new();
781        let c1 = b.add_atom(Atom::new(Element::C));
782        let c2 = b.add_atom(Atom::new(Element::C));
783        b.add_bond(c1, c2, BondOrder::Single).unwrap();
784        b.build()
785    }
786
787    #[test]
788    fn test_basic_molecule() {
789        let mol = ethane();
790        assert_eq!(mol.atom_count(), 2);
791        assert_eq!(mol.bond_count(), 1);
792    }
793
794    #[test]
795    fn test_adjacency() {
796        let mol = ethane();
797        let neighbors: Vec<_> = mol.neighbors(AtomIdx(0)).collect();
798        assert_eq!(neighbors.len(), 1);
799        assert_eq!(neighbors[0].0, AtomIdx(1));
800    }
801
802    #[test]
803    fn test_bond_between() {
804        let mol = ethane();
805        assert!(mol.bond_between(AtomIdx(0), AtomIdx(1)).is_some());
806        assert!(mol.bond_between(AtomIdx(1), AtomIdx(0)).is_some());
807    }
808
809    #[test]
810    fn test_duplicate_bond_error() {
811        let mut b = MoleculeBuilder::new();
812        let c1 = b.add_atom(Atom::new(Element::C));
813        let c2 = b.add_atom(Atom::new(Element::C));
814        b.add_bond(c1, c2, BondOrder::Single).unwrap();
815        let err = b.add_bond(c1, c2, BondOrder::Double);
816        assert!(matches!(err, Err(MolError::DuplicateBond(_, _))));
817    }
818
819    #[test]
820    fn test_formula() {
821        let mut b = MoleculeBuilder::new();
822        let c = b.add_atom(Atom::new(Element::C));
823        let n = b.add_atom(Atom::new(Element::N));
824        b.add_bond(c, n, BondOrder::Single).unwrap();
825        let mol = b.build();
826        assert_eq!(mol.formula(), "CN");
827    }
828
829    #[test]
830    fn test_implicit_hydrogen_count() {
831        // Isolated C atom (sp3, 4 bonds available): 4 implicit H
832        let mut b = MoleculeBuilder::new();
833        b.add_atom(Atom::organic(Element::C));
834        let mol = b.build();
835        assert_eq!(mol.implicit_hydrogen_count(AtomIdx(0)), 4);
836    }
837
838    #[test]
839    fn test_total_formula_methane() {
840        // Organic C atom with 0 explicit bonds → 4 implicit H → CH4
841        let mut b = MoleculeBuilder::new();
842        b.add_atom(Atom::organic(Element::C));
843        let mol = b.build();
844        assert_eq!(mol.total_formula(), "CH4");
845    }
846
847    #[test]
848    fn test_total_formula_no_hydrogen() {
849        // NaCl — neither Na nor Cl is in the organic subset, no implicit H
850        let mut b = MoleculeBuilder::new();
851        let na = b.add_atom(Atom::new(Element::NA));
852        let cl = b.add_atom(Atom::new(Element::CL));
853        b.add_bond(na, cl, BondOrder::Single).unwrap();
854        let mol = b.build();
855        assert_eq!(mol.total_formula(), "ClNa");
856    }
857
858    #[test]
859    fn test_with_atom_aromatic() {
860        let mol = ethane();
861        let updated = mol.with_atom_aromatic(AtomIdx(0), true);
862        assert!(updated.atom(AtomIdx(0)).aromatic);
863        assert!(!updated.atom(AtomIdx(1)).aromatic);
864    }
865
866    #[test]
867    fn test_with_bond_order() {
868        let mol = ethane();
869        let updated = mol.with_bond_order(BondIdx(0), BondOrder::Double);
870        assert_eq!(updated.bond(BondIdx(0)).order, BondOrder::Double);
871    }
872
873    // --- mutable API ---
874
875    #[test]
876    fn test_add_remove_atom() {
877        let mut mol = ethane();
878        let n_idx = mol.add_atom(Atom::new(Element::N));
879        assert_eq!(mol.atom_count(), 3);
880        assert_eq!(mol.atom(n_idx).element.atomic_number(), 7);
881
882        let remap = mol.remove_atom(n_idx);
883        assert_eq!(mol.atom_count(), 2);
884        assert!(remap[n_idx.0 as usize].is_none());
885    }
886
887    #[test]
888    fn test_add_remove_bond() {
889        let mut mol = ethane();
890        let n_idx = mol.add_atom(Atom::new(Element::N));
891        let bidx = mol.add_bond(AtomIdx(0), n_idx, BondOrder::Single).unwrap();
892        assert_eq!(mol.bond_count(), 2);
893        mol.remove_bond(bidx);
894        assert_eq!(mol.bond_count(), 1);
895    }
896
897    #[test]
898    fn test_set_charge_element() {
899        let mut mol = ethane();
900        mol.set_charge(AtomIdx(0), 1);
901        assert_eq!(mol.atom(AtomIdx(0)).charge, 1);
902        mol.set_element(AtomIdx(0), Element::N);
903        assert_eq!(mol.atom(AtomIdx(0)).element.atomic_number(), 7);
904    }
905
906    #[test]
907    fn test_is_connected() {
908        let mol = ethane();
909        assert!(mol.is_connected());
910
911        // Two separate atoms — disconnected
912        let mut b = MoleculeBuilder::new();
913        b.add_atom(Atom::new(Element::C));
914        b.add_atom(Atom::new(Element::N));
915        let disconnected = b.build();
916        assert!(!disconnected.is_connected());
917    }
918
919    #[test]
920    fn test_fragments() {
921        // "CC.N" — two components
922        let mut b = MoleculeBuilder::new();
923        let c1 = b.add_atom(Atom::organic(Element::C));
924        let c2 = b.add_atom(Atom::organic(Element::C));
925        b.add_bond(c1, c2, BondOrder::Single).unwrap();
926        b.add_atom(Atom::new(Element::N)); // disconnected N
927        let mol = b.build();
928        let frags = mol.fragments();
929        assert_eq!(frags.len(), 2);
930        let sizes: std::collections::HashSet<usize> =
931            frags.iter().map(|f| f.atom_count()).collect();
932        assert!(sizes.contains(&2));
933        assert!(sizes.contains(&1));
934    }
935
936    #[test]
937    fn test_builder_from_molecule() {
938        let mol = ethane();
939        let mut b = MoleculeBuilder::from_molecule(&mol);
940        b.add_atom(Atom::new(Element::O));
941        let mol2 = b.build();
942        assert_eq!(mol2.atom_count(), 3);
943        assert_eq!(mol2.bond_count(), 1); // original bond preserved
944    }
945}