Skip to main content

chematic_core/
molecule.rs

1//! Molecule graph: atoms, bonds, and adjacency list.
2
3use crate::atom::Atom;
4use crate::bond::{BondEntry, BondOrder};
5use crate::element::Element;
6use crate::stereo_group::StereoGroup;
7
8/// Newtype index for an atom in a Molecule.
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
10pub struct AtomIdx(pub u32);
11
12/// Newtype index for a bond in a Molecule.
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
14pub struct BondIdx(pub u32);
15
16/// Error types for molecule construction.
17#[derive(Debug, Clone, PartialEq, Eq)]
18pub enum MolError {
19    /// Atom index out of range.
20    InvalidAtomIdx(AtomIdx),
21    /// Duplicate bond between the same pair of atoms.
22    DuplicateBond(AtomIdx, AtomIdx),
23}
24
25impl core::fmt::Display for MolError {
26    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
27        match self {
28            Self::InvalidAtomIdx(idx) => write!(f, "invalid atom index: {}", idx.0),
29            Self::DuplicateBond(a, b) => {
30                write!(f, "duplicate bond between atoms {} and {}", a.0, b.0)
31            }
32        }
33    }
34}
35
36impl std::error::Error for MolError {}
37
38/// An immutable molecular graph built via [`MoleculeBuilder`].
39///
40/// Representation: atom list + bond list + per-atom adjacency list.
41/// No external graph library is used; all graph traversal is domain-aware.
42/// Sentinel used in `stereo_neighbor_order` to represent the implicit H in a bracket atom.
43pub const STEREO_H_SENTINEL: u32 = u32::MAX;
44
45pub struct Molecule {
46    atoms: Vec<Atom>,
47    bonds: Vec<BondEntry>,
48    /// adjacency[atom_idx] = list of (neighbor_atom_idx, bond_idx)
49    adjacency: Vec<Vec<(AtomIdx, BondIdx)>>,
50    /// Enhanced stereo groups (ChemDraw V3000 Absolute / Or / And).
51    stereo_groups: Vec<StereoGroup>,
52    /// SMILES-text-order neighbor sequence for chiral atoms.
53    ///
54    /// Keyed by atom index.  Each value lists the atom indices of neighbors in
55    /// the order they appeared in the SMILES string (including ring-closure
56    /// partners), with [`STEREO_H_SENTINEL`] (`u32::MAX`) standing in for the
57    /// implicit bracket H.  Populated by the SMILES parser; absent for atoms
58    /// not parsed from SMILES or without recorded stereo.
59    stereo_neighbor_order: std::collections::HashMap<u32, Vec<u32>>,
60}
61
62impl Molecule {
63    /// Number of heavy atoms (does not count implicit H).
64    pub fn atom_count(&self) -> usize {
65        self.atoms.len()
66    }
67
68    /// Number of bonds (edges).
69    pub fn bond_count(&self) -> usize {
70        self.bonds.len()
71    }
72
73    /// Borrow atom by index.
74    ///
75    /// # Panics
76    /// Panics if `idx` is out of range (should not happen with indices from this molecule).
77    pub fn atom(&self, idx: AtomIdx) -> &Atom {
78        let i = idx.0 as usize;
79        if i >= self.atoms.len() {
80            panic!("atom index {} out of range (molecule has {} atoms)", idx.0, self.atoms.len());
81        }
82        &self.atoms[i]
83    }
84
85    /// Borrow bond by index.
86    pub fn bond(&self, idx: BondIdx) -> &BondEntry {
87        let i = idx.0 as usize;
88        if i >= self.bonds.len() {
89            panic!("bond index {} out of range (molecule has {} bonds)", idx.0, self.bonds.len());
90        }
91        &self.bonds[i]
92    }
93
94    /// Iterate over all atoms as `(AtomIdx, &Atom)`.
95    pub fn atoms(&self) -> impl Iterator<Item = (AtomIdx, &Atom)> {
96        self.atoms
97            .iter()
98            .enumerate()
99            .map(|(i, a)| (AtomIdx(i as u32), a))
100    }
101
102    /// Iterate over all bonds as `(BondIdx, &BondEntry)`.
103    pub fn bonds(&self) -> impl Iterator<Item = (BondIdx, &BondEntry)> {
104        self.bonds
105            .iter()
106            .enumerate()
107            .map(|(i, b)| (BondIdx(i as u32), b))
108    }
109
110    /// Iterate over neighbors of `idx` as `(neighbor_atom_idx, bond_idx)`.
111    pub fn neighbors(&self, idx: AtomIdx) -> impl Iterator<Item = (AtomIdx, BondIdx)> + '_ {
112        let i = idx.0 as usize;
113        if i >= self.adjacency.len() {
114            panic!("atom index {} out of range (molecule has {} atoms)", idx.0, self.adjacency.len());
115        }
116        self.adjacency[i].iter().copied()
117    }
118
119    /// Degree (number of connected bonds) of atom `idx`.
120    pub fn degree(&self, idx: AtomIdx) -> usize {
121        let i = idx.0 as usize;
122        if i >= self.adjacency.len() {
123            panic!("atom index {} out of range (molecule has {} atoms)", idx.0, self.adjacency.len());
124        }
125        self.adjacency[i].len()
126    }
127
128    /// Return the bond between `a` and `b`, or `None` if not connected or indices are out of bounds.
129    pub fn bond_between(&self, a: AtomIdx, b: AtomIdx) -> Option<(BondIdx, &BondEntry)> {
130        let a_idx = a.0 as usize;
131        let b_idx = b.0 as usize;
132        if a_idx >= self.adjacency.len() || b_idx >= self.atoms.len() {
133            return None;
134        }
135        self.adjacency[a_idx]
136            .iter()
137            .find(|&&(nb, _)| nb == b)
138            .and_then(|&(_, bidx)| {
139                let bond_idx = bidx.0 as usize;
140                if bond_idx < self.bonds.len() {
141                    Some((bidx, &self.bonds[bond_idx]))
142                } else {
143                    None
144                }
145            })
146    }
147
148    /// Molecular formula as a Hill-order string (C first, H second, then alphabetical).
149    pub fn formula(&self) -> String {
150        use std::collections::BTreeMap;
151        let mut counts: BTreeMap<&str, u32> = BTreeMap::new();
152        for (_, atom) in self.atoms() {
153            *counts.entry(atom.element.symbol()).or_insert(0) += 1;
154        }
155
156        let mut result = String::new();
157        let push_count = |sym: &str, n: u32, out: &mut String| {
158            out.push_str(sym);
159            if n > 1 {
160                out.push_str(&n.to_string());
161            }
162        };
163
164        // Hill order: C, then H, then the remaining symbols alphabetically.
165        if let Some(c) = counts.remove("C") {
166            push_count("C", c, &mut result);
167        }
168        if let Some(h) = counts.remove("H") {
169            push_count("H", h, &mut result);
170        }
171        for (sym, count) in &counts {
172            push_count(sym, *count, &mut result);
173        }
174        result
175    }
176}
177
178// ---------------------------------------------------------------------------
179// Immutable update methods (functional-style editing)
180// ---------------------------------------------------------------------------
181
182impl Molecule {
183    /// Return a new `Molecule` with one extra atom appended, along with the
184    /// index that the new atom will have in the returned molecule.
185    pub fn with_atom_added(&self, atom: Atom) -> (Molecule, AtomIdx) {
186        let mut builder = MoleculeBuilder::new();
187        for (_, a) in self.atoms() {
188            builder.add_atom(a.clone());
189        }
190        for (_, b) in self.bonds() {
191            let _ = builder.add_bond(b.atom1, b.atom2, b.order);
192        }
193        builder.copy_stereo_from(self);
194        let new_idx = builder.add_atom(atom);
195        (builder.build(), new_idx)
196    }
197
198    /// Return a new `Molecule` with one extra bond added, along with the index
199    /// of the newly added bond in the returned molecule.
200    ///
201    /// Returns `Err` if `a == b` or the bond already exists (same semantics as
202    /// [`MoleculeBuilder::add_bond`]).
203    pub fn with_bond_added(
204        &self,
205        a: AtomIdx,
206        b: AtomIdx,
207        order: BondOrder,
208    ) -> Result<(Molecule, BondIdx), MolError> {
209        let mut builder = MoleculeBuilder::new();
210        for (_, atom) in self.atoms() {
211            builder.add_atom(atom.clone());
212        }
213        for (_, bond) in self.bonds() {
214            let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
215        }
216        builder.copy_stereo_from(self);
217        let bond_idx = builder.add_bond(a, b, order)?;
218        Ok((builder.build(), bond_idx))
219    }
220
221    /// Return a new `Molecule` with the formal charge of atom `idx` changed.
222    pub fn with_atom_charge(&self, idx: AtomIdx, charge: i8) -> Molecule {
223        let mut builder = MoleculeBuilder::new();
224        for (aidx, atom) in self.atoms() {
225            let mut a = atom.clone();
226            if aidx == idx {
227                a.charge = charge;
228            }
229            builder.add_atom(a);
230        }
231        for (_, bond) in self.bonds() {
232            let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
233        }
234        builder.copy_stereo_from(self);
235        builder.build()
236    }
237
238    /// Return a new `Molecule` with the element of atom `idx` changed.
239    ///
240    /// Chirality and hydrogen count are reset to `None` when the element
241    /// changes, since those properties are element-specific.
242    pub fn with_atom_element(&self, idx: AtomIdx, el: Element) -> Molecule {
243        let mut builder = MoleculeBuilder::new();
244        for (aidx, atom) in self.atoms() {
245            let mut a = atom.clone();
246            if aidx == idx {
247                a.element = el;
248                // Reset element-specific fields so valence stays consistent.
249                a.chirality = crate::atom::Chirality::None;
250                a.hydrogen_count = None;
251                a.aromatic = false;
252            }
253            builder.add_atom(a);
254        }
255        for (_, bond) in self.bonds() {
256            let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
257        }
258        builder.copy_stereo_from(self);
259        // Chirality was cleared for the changed atom; remove its stereo order too.
260        builder.clear_stereo_neighbor_order(idx);
261        builder.build()
262    }
263
264    /// Return a new `Molecule` with atom `idx` and all bonds involving it
265    /// removed.  Atom indices of survivors shift down past the removed slot.
266    ///
267    /// The returned tuple also includes a mapping from **old** `AtomIdx` to
268    /// **new** `AtomIdx` (indices that fall below `idx` are unchanged; indices
269    /// above `idx` decrease by 1).
270    pub fn with_atom_removed(&self, idx: AtomIdx) -> (Molecule, Vec<Option<AtomIdx>>) {
271        let n = self.atom_count();
272        let removed = idx.0 as usize;
273
274        // Build old→new index table.
275        let mut remap: Vec<Option<AtomIdx>> = vec![None; n];
276        let mut new_pos = 0u32;
277        for (old, slot) in remap.iter_mut().enumerate() {
278            if old == removed {
279                continue;
280            }
281            *slot = Some(AtomIdx(new_pos));
282            new_pos += 1;
283        }
284
285        let mut builder = MoleculeBuilder::new();
286        for (aidx, atom) in self.atoms() {
287            if aidx == idx {
288                continue;
289            }
290            builder.add_atom(atom.clone());
291        }
292        for (_, bond) in self.bonds() {
293            if bond.atom1 == idx || bond.atom2 == idx {
294                continue;
295            }
296            if let (Some(a1), Some(a2)) =
297                (remap[bond.atom1.0 as usize], remap[bond.atom2.0 as usize])
298            {
299                let _ = builder.add_bond(a1, a2, bond.order);
300            }
301        }
302        // Remap stereo neighbor order: drop removed atom's entry, remap neighbor indices.
303        for (old_key, order) in &self.stereo_neighbor_order {
304            let old_atom = *old_key as usize;
305            if old_atom == removed {
306                continue; // removed atom's stereo is gone
307            }
308            if let Some(Some(new_key)) = remap.get(old_atom) {
309                let new_order: Vec<u32> = order
310                    .iter()
311                    .filter_map(|&v| {
312                        if v == STEREO_H_SENTINEL {
313                            Some(STEREO_H_SENTINEL)
314                        } else if v as usize == removed {
315                            None // neighbor was the removed atom — stereo is now invalid
316                        } else {
317                            remap.get(v as usize).and_then(|r| r.map(|a| a.0))
318                        }
319                    })
320                    .collect();
321                builder.set_stereo_neighbor_order(*new_key, new_order);
322            }
323        }
324        (builder.build(), remap)
325    }
326
327    /// Implicit hydrogen count for atom `idx` based on valence rules.
328    ///
329    /// Delegates to [`crate::valence::implicit_hcount`].
330    pub fn implicit_hydrogen_count(&self, idx: AtomIdx) -> u8 {
331        crate::valence::implicit_hcount(self, idx)
332    }
333
334    /// Hill-order molecular formula including implicit hydrogens.
335    ///
336    /// Unlike [`Self::formula`] (which counts only explicit heavy atoms),
337    /// this method adds the implicit H count for every atom so the result
338    /// reflects the true molecular composition (e.g. methane → "CH4").
339    pub fn total_formula(&self) -> String {
340        use std::collections::BTreeMap;
341        let mut counts: BTreeMap<&str, u32> = BTreeMap::new();
342        let mut implicit_h: u32 = 0;
343        for (aidx, atom) in self.atoms() {
344            *counts.entry(atom.element.symbol()).or_insert(0) += 1;
345            implicit_h += crate::valence::implicit_hcount(self, aidx) as u32;
346        }
347        *counts.entry("H").or_insert(0) += implicit_h;
348
349        let mut result = String::new();
350        let push_count = |sym: &str, n: u32, out: &mut String| {
351            out.push_str(sym);
352            if n > 1 {
353                out.push_str(&n.to_string());
354            }
355        };
356
357        if let Some(c) = counts.remove("C") {
358            push_count("C", c, &mut result);
359        }
360        if let Some(h) = counts.remove("H")
361            && h > 0
362        {
363            push_count("H", h, &mut result);
364        }
365        for (sym, count) in &counts {
366            push_count(sym, *count, &mut result);
367        }
368        result
369    }
370
371    /// Hill-order molecular formula with isotope labels.
372    ///
373    /// Like [`Self::formula`] but prefixes each element symbol with its
374    /// isotope number when `atom.isotope` is `Some(n)`.
375    /// Example: a molecule with one `¹³C` and one `O` → `"¹³CO"`.
376    pub fn formula_with_isotopes(&self) -> String {
377        use std::collections::BTreeMap;
378        // Collect (isotope_prefix + symbol) counts, heavy atoms only.
379        let mut counts: BTreeMap<String, u32> = BTreeMap::new();
380        let mut has_carbon = false;
381        let mut has_explicit_h = false;
382        for (_, atom) in self.atoms() {
383            let sym = atom.element.symbol();
384            let key = match atom.isotope {
385                Some(n) => format!("{n}{sym}"),
386                None => sym.to_string(),
387            };
388            if sym == "C" && atom.isotope.is_none() {
389                has_carbon = true;
390            }
391            if sym == "H" {
392                has_explicit_h = true;
393            }
394            *counts.entry(key).or_insert(0) += 1;
395        }
396
397        let push_count = |key: &str, n: u32, out: &mut String| {
398            out.push_str(key);
399            if n > 1 {
400                out.push_str(&n.to_string());
401            }
402        };
403
404        let mut result = String::new();
405        // Hill order: C first (if unlabelled C present), then H, then rest alphabetically.
406        if has_carbon && let Some(c) = counts.remove("C") {
407            push_count("C", c, &mut result);
408        }
409        if has_explicit_h && let Some(h) = counts.remove("H") {
410            push_count("H", h, &mut result);
411        }
412        for (key, count) in &counts {
413            push_count(key, *count, &mut result);
414        }
415        result
416    }
417
418    /// Return a new `Molecule` with atom `idx`'s aromatic flag changed.
419    pub fn with_atom_aromatic(&self, idx: AtomIdx, aromatic: bool) -> Molecule {
420        let mut builder = MoleculeBuilder::new();
421        for (aidx, atom) in self.atoms() {
422            let mut a = atom.clone();
423            if aidx == idx {
424                a.aromatic = aromatic;
425            }
426            builder.add_atom(a);
427        }
428        for (_, bond) in self.bonds() {
429            let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
430        }
431        builder.copy_stereo_from(self);
432        builder.build()
433    }
434
435    /// Return a new `Molecule` with bond `idx`'s order changed.
436    pub fn with_bond_order(&self, idx: BondIdx, order: BondOrder) -> Molecule {
437        let mut builder = MoleculeBuilder::new();
438        for (_, atom) in self.atoms() {
439            builder.add_atom(atom.clone());
440        }
441        for (bidx, bond) in self.bonds() {
442            let o = if bidx == idx { order } else { bond.order };
443            let _ = builder.add_bond(bond.atom1, bond.atom2, o);
444        }
445        builder.copy_stereo_from(self);
446        builder.build()
447    }
448
449    /// Return a new `Molecule` with bond `idx` removed.
450    ///
451    /// Atom indices are unchanged.  Bond indices of survivors shift down.
452    pub fn with_bond_removed(&self, idx: BondIdx) -> Molecule {
453        let mut builder = MoleculeBuilder::new();
454        for (_, atom) in self.atoms() {
455            builder.add_atom(atom.clone());
456        }
457        for (bidx, bond) in self.bonds() {
458            if bidx == idx {
459                continue;
460            }
461            let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
462        }
463        builder.copy_stereo_from(self);
464        builder.build()
465    }
466}
467
468// ---------------------------------------------------------------------------
469// In-place mutation methods
470// ---------------------------------------------------------------------------
471
472impl Molecule {
473    /// Append a new atom and return its index.
474    pub fn add_atom(&mut self, atom: Atom) -> AtomIdx {
475        let idx = AtomIdx(self.atoms.len() as u32);
476        self.atoms.push(atom);
477        self.adjacency.push(vec![]);
478        idx
479    }
480
481    /// Remove atom `idx` and all bonds involving it.
482    ///
483    /// Returns a remapping table: `remap[old_idx]` gives the new `AtomIdx`
484    /// for surviving atoms, or `None` for the removed atom.  Atom indices
485    /// of atoms after the removed slot shift down by 1.
486    pub fn remove_atom(&mut self, idx: AtomIdx) -> Vec<Option<AtomIdx>> {
487        let n = self.atoms.len();
488        let removed = idx.0 as usize;
489
490        let mut remap: Vec<Option<AtomIdx>> = vec![None; n];
491        let mut new_pos = 0u32;
492        for (old, slot) in remap.iter_mut().enumerate() {
493            if old == removed {
494                continue;
495            }
496            *slot = Some(AtomIdx(new_pos));
497            new_pos += 1;
498        }
499
500        self.atoms.remove(removed);
501
502        // Keep only bonds not involving the removed atom; remap endpoints.
503        let mut new_bonds: Vec<BondEntry> = Vec::new();
504        for bond in &self.bonds {
505            if bond.atom1 == idx || bond.atom2 == idx {
506                continue;
507            }
508            if let (Some(a1), Some(a2)) =
509                (remap[bond.atom1.0 as usize], remap[bond.atom2.0 as usize])
510            {
511                new_bonds.push(BondEntry {
512                    atom1: a1,
513                    atom2: a2,
514                    order: bond.order,
515                });
516            }
517        }
518        self.bonds = new_bonds;
519
520        // Rebuild adjacency from scratch.
521        let new_n = self.atoms.len();
522        self.adjacency = vec![vec![]; new_n];
523        for (bidx, bond) in self.bonds.iter().enumerate() {
524            let bi = BondIdx(bidx as u32);
525            self.adjacency[bond.atom1.0 as usize].push((bond.atom2, bi));
526            self.adjacency[bond.atom2.0 as usize].push((bond.atom1, bi));
527        }
528
529        // Remap stereo neighbor order in-place.
530        let old_stereo = std::mem::take(&mut self.stereo_neighbor_order);
531        for (old_key, order) in old_stereo {
532            let old_atom = old_key as usize;
533            if old_atom == removed {
534                continue;
535            }
536            if let Some(Some(new_key)) = remap.get(old_atom) {
537                let new_order: Vec<u32> = order
538                    .iter()
539                    .filter_map(|&v| {
540                        if v == STEREO_H_SENTINEL {
541                            Some(STEREO_H_SENTINEL)
542                        } else if v as usize == removed {
543                            None
544                        } else {
545                            remap.get(v as usize).and_then(|r| r.map(|a| a.0))
546                        }
547                    })
548                    .collect();
549                self.stereo_neighbor_order.insert(new_key.0, new_order);
550            }
551        }
552
553        remap
554    }
555
556    /// Add a bond between `a` and `b` with the given `order`.
557    ///
558    /// Returns `Err` if `a == b` or the bond already exists.
559    pub fn add_bond(
560        &mut self,
561        a: AtomIdx,
562        b: AtomIdx,
563        order: BondOrder,
564    ) -> Result<BondIdx, MolError> {
565        let n = self.atoms.len() as u32;
566        if a.0 >= n {
567            return Err(MolError::InvalidAtomIdx(a));
568        }
569        if b.0 >= n {
570            return Err(MolError::InvalidAtomIdx(b));
571        }
572        if self.adjacency[a.0 as usize].iter().any(|&(nb, _)| nb == b) {
573            return Err(MolError::DuplicateBond(a, b));
574        }
575        let bidx = BondIdx(self.bonds.len() as u32);
576        self.bonds.push(BondEntry {
577            atom1: a,
578            atom2: b,
579            order,
580        });
581        self.adjacency[a.0 as usize].push((b, bidx));
582        self.adjacency[b.0 as usize].push((a, bidx));
583        Ok(bidx)
584    }
585
586    /// Remove bond `idx`.  Atom indices are unchanged; bond indices of
587    /// surviving bonds shift down past the removed slot.
588    pub fn remove_bond(&mut self, idx: BondIdx) {
589        let removed = idx.0 as usize;
590        if removed >= self.bonds.len() {
591            return;
592        }
593        self.bonds.remove(removed);
594        // Rebuild adjacency with renumbered bond indices.
595        let n = self.atoms.len();
596        self.adjacency = vec![vec![]; n];
597        for (bidx, bond) in self.bonds.iter().enumerate() {
598            let bi = BondIdx(bidx as u32);
599            self.adjacency[bond.atom1.0 as usize].push((bond.atom2, bi));
600            self.adjacency[bond.atom2.0 as usize].push((bond.atom1, bi));
601        }
602    }
603
604    /// Set the formal charge of atom `idx` in-place.
605    pub fn set_charge(&mut self, idx: AtomIdx, charge: i8) {
606        self.atoms[idx.0 as usize].charge = charge;
607    }
608
609    /// Set the element of atom `idx` in-place.
610    ///
611    /// Chirality and hydrogen count are reset (element-specific properties).
612    pub fn set_element(&mut self, idx: AtomIdx, el: Element) {
613        let a = &mut self.atoms[idx.0 as usize];
614        a.element = el;
615        a.chirality = crate::atom::Chirality::None;
616        a.hydrogen_count = None;
617        a.aromatic = false;
618    }
619
620    /// Set the CIP stereo code of atom `idx` in-place.
621    pub fn set_cip_code(&mut self, idx: AtomIdx, code: Option<crate::atom::CipCode>) {
622        self.atoms[idx.0 as usize].cip_code = code;
623    }
624
625    /// Return the enhanced stereo groups attached to this molecule.
626    pub fn stereo_groups(&self) -> &[StereoGroup] {
627        &self.stereo_groups
628    }
629
630    /// Replace the stereo group list in-place.
631    pub fn set_stereo_groups(&mut self, groups: Vec<StereoGroup>) {
632        self.stereo_groups = groups;
633    }
634
635    /// Add a single stereo group in-place.
636    pub fn add_stereo_group(&mut self, group: StereoGroup) {
637        self.stereo_groups.push(group);
638    }
639
640    /// SMILES-text-order neighbor sequence for a chiral atom.
641    ///
642    /// Returns `None` for atoms not parsed from SMILES or without stereo.
643    /// The slice contains neighbor atom indices in SMILES text order;
644    /// [`STEREO_H_SENTINEL`] (`u32::MAX`) marks the implicit bracket-H slot.
645    pub fn stereo_neighbor_order(&self, idx: AtomIdx) -> Option<&[u32]> {
646        self.stereo_neighbor_order
647            .get(&idx.0)
648            .map(|v| v.as_slice())
649    }
650
651    /// Set the SMILES stereo neighbor order for atom `idx`.
652    pub fn set_stereo_neighbor_order(&mut self, idx: AtomIdx, order: Vec<u32>) {
653        self.stereo_neighbor_order.insert(idx.0, order);
654    }
655}
656
657// ---------------------------------------------------------------------------
658// Connectivity utilities
659// ---------------------------------------------------------------------------
660
661impl Molecule {
662    /// Return `true` if the molecule has exactly one connected component
663    /// (i.e. every atom can be reached from every other atom).
664    pub fn is_connected(&self) -> bool {
665        let n = self.atoms.len();
666        if n == 0 {
667            return true;
668        }
669        let mut visited = vec![false; n];
670        let mut stack = vec![AtomIdx(0)];
671        visited[0] = true;
672        let mut count = 1;
673        while let Some(cur) = stack.pop() {
674            for (nb, _) in self.neighbors(cur) {
675                if !visited[nb.0 as usize] {
676                    visited[nb.0 as usize] = true;
677                    count += 1;
678                    stack.push(nb);
679                }
680            }
681        }
682        count == n
683    }
684
685    /// Split the molecule into its connected components.
686    ///
687    /// Returns a `Vec` of sub-molecules, one per component.  Atoms are
688    /// renumbered within each sub-molecule starting at index 0.
689    pub fn fragments(&self) -> Vec<Molecule> {
690        let n = self.atoms.len();
691        if n == 0 {
692            return vec![];
693        }
694
695        let mut component: Vec<usize> = vec![usize::MAX; n];
696        let mut comp_id = 0;
697
698        for start in 0..n {
699            if component[start] != usize::MAX {
700                continue;
701            }
702            let mut stack = vec![start];
703            component[start] = comp_id;
704            while let Some(cur) = stack.pop() {
705                for (nb, _) in self.neighbors(AtomIdx(cur as u32)) {
706                    let ni = nb.0 as usize;
707                    if component[ni] == usize::MAX {
708                        component[ni] = comp_id;
709                        stack.push(ni);
710                    }
711                }
712            }
713            comp_id += 1;
714        }
715
716        (0..comp_id)
717            .map(|cid| {
718                let mut builder = MoleculeBuilder::new();
719                let mut old_to_new: std::collections::HashMap<AtomIdx, AtomIdx> =
720                    std::collections::HashMap::new();
721                for (aidx, atom) in self.atoms() {
722                    if component[aidx.0 as usize] == cid {
723                        let new_idx = builder.add_atom(atom.clone());
724                        old_to_new.insert(aidx, new_idx);
725                    }
726                }
727                for (_, bond) in self.bonds() {
728                    if let (Some(&a1), Some(&a2)) =
729                        (old_to_new.get(&bond.atom1), old_to_new.get(&bond.atom2))
730                    {
731                        let _ = builder.add_bond(a1, a2, bond.order);
732                    }
733                }
734                builder.build()
735            })
736            .collect()
737    }
738}
739
740/// Builder for constructing a [`Molecule`] incrementally.
741///
742/// Usage: add atoms, add bonds, then call `build()`.
743#[derive(Default)]
744pub struct MoleculeBuilder {
745    atoms: Vec<Atom>,
746    bonds: Vec<BondEntry>,
747    adjacency: Vec<Vec<(AtomIdx, BondIdx)>>,
748    stereo_groups: Vec<StereoGroup>,
749    stereo_neighbor_order: std::collections::HashMap<u32, Vec<u32>>,
750}
751
752impl MoleculeBuilder {
753    pub fn new() -> Self {
754        Self::default()
755    }
756
757    /// Create a builder pre-populated with all atoms and bonds from `mol`.
758    ///
759    /// Use this to make incremental edits to an existing molecule instead of
760    /// reconstructing it from scratch.
761    pub fn from_molecule(mol: &Molecule) -> Self {
762        let mut b = Self::new();
763        for (_, atom) in mol.atoms() {
764            b.add_atom(atom.clone());
765        }
766        for (_, bond) in mol.bonds() {
767            let _ = b.add_bond(bond.atom1, bond.atom2, bond.order);
768        }
769        b.stereo_groups = mol.stereo_groups.clone();
770        b.stereo_neighbor_order = mol.stereo_neighbor_order.clone();
771        b
772    }
773
774    /// Set the SMILES stereo neighbor order for atom `idx`.
775    pub fn set_stereo_neighbor_order(&mut self, idx: AtomIdx, order: Vec<u32>) {
776        self.stereo_neighbor_order.insert(idx.0, order);
777    }
778
779    /// Remove the stereo neighbor order entry for atom `idx`.
780    pub fn clear_stereo_neighbor_order(&mut self, idx: AtomIdx) {
781        self.stereo_neighbor_order.remove(&idx.0);
782    }
783
784    /// Append a stereo group to this builder.
785    pub fn add_stereo_group(&mut self, group: StereoGroup) {
786        self.stereo_groups.push(group);
787    }
788
789    /// Copy all stereo neighbor order entries from `mol` into this builder.
790    pub fn copy_stereo_from(&mut self, mol: &Molecule) {
791        self.stereo_neighbor_order = mol.stereo_neighbor_order.clone();
792    }
793
794    /// Read-only reference to an atom already added to the builder.
795    ///
796    /// Used by the SMILES parser to infer implicit bond types without
797    /// consuming the builder (e.g. aromatic-aromatic → Aromatic bond).
798    ///
799    /// # Panics
800    /// Panics if `idx` is out of range.
801    pub fn atom_at(&self, idx: AtomIdx) -> &Atom {
802        &self.atoms[idx.0 as usize]
803    }
804
805    /// Number of atoms added so far.
806    pub fn atom_count(&self) -> usize {
807        self.atoms.len()
808    }
809
810    /// Iterate over already-added neighbors of `idx` as `(bond_idx, neighbor_atom_idx)`.
811    /// Used by kekulization tests to check whether a bond already exists in the builder.
812    pub fn atom_neighbors(&self, idx: AtomIdx) -> impl Iterator<Item = (BondIdx, AtomIdx)> + '_ {
813        self.adjacency[idx.0 as usize]
814            .iter()
815            .map(|&(nb, bidx)| (bidx, nb))
816    }
817
818    /// Add an atom and return its index.
819    pub fn add_atom(&mut self, atom: Atom) -> AtomIdx {
820        let idx = AtomIdx(self.atoms.len() as u32);
821        self.atoms.push(atom);
822        self.adjacency.push(Vec::new());
823        idx
824    }
825
826    /// Add a bond between two existing atoms.
827    ///
828    /// Returns an error if either atom index is invalid or if the bond already exists.
829    pub fn add_bond(
830        &mut self,
831        a: AtomIdx,
832        b: AtomIdx,
833        order: BondOrder,
834    ) -> Result<BondIdx, MolError> {
835        let n = self.atoms.len() as u32;
836        if a.0 >= n {
837            return Err(MolError::InvalidAtomIdx(a));
838        }
839        if b.0 >= n {
840            return Err(MolError::InvalidAtomIdx(b));
841        }
842
843        // Check for duplicate
844        for &(nb, _) in &self.adjacency[a.0 as usize] {
845            if nb == b {
846                return Err(MolError::DuplicateBond(a, b));
847            }
848        }
849
850        let bidx = BondIdx(self.bonds.len() as u32);
851        self.bonds.push(BondEntry {
852            atom1: a,
853            atom2: b,
854            order,
855        });
856        self.adjacency[a.0 as usize].push((b, bidx));
857        self.adjacency[b.0 as usize].push((a, bidx));
858        Ok(bidx)
859    }
860
861    /// Consume the builder and return an immutable [`Molecule`].
862    pub fn build(self) -> Molecule {
863        Molecule {
864            atoms: self.atoms,
865            bonds: self.bonds,
866            adjacency: self.adjacency,
867            stereo_groups: self.stereo_groups,
868            stereo_neighbor_order: self.stereo_neighbor_order,
869        }
870    }
871}
872
873#[cfg(test)]
874mod tests {
875    use super::*;
876    use crate::atom::Atom;
877    use crate::element::Element;
878
879    fn ethane() -> Molecule {
880        let mut b = MoleculeBuilder::new();
881        let c1 = b.add_atom(Atom::new(Element::C));
882        let c2 = b.add_atom(Atom::new(Element::C));
883        b.add_bond(c1, c2, BondOrder::Single).unwrap();
884        b.build()
885    }
886
887    #[test]
888    fn test_basic_molecule() {
889        let mol = ethane();
890        assert_eq!(mol.atom_count(), 2);
891        assert_eq!(mol.bond_count(), 1);
892    }
893
894    #[test]
895    fn test_adjacency() {
896        let mol = ethane();
897        let neighbors: Vec<_> = mol.neighbors(AtomIdx(0)).collect();
898        assert_eq!(neighbors.len(), 1);
899        assert_eq!(neighbors[0].0, AtomIdx(1));
900    }
901
902    #[test]
903    fn test_bond_between() {
904        let mol = ethane();
905        assert!(mol.bond_between(AtomIdx(0), AtomIdx(1)).is_some());
906        assert!(mol.bond_between(AtomIdx(1), AtomIdx(0)).is_some());
907    }
908
909    #[test]
910    fn test_duplicate_bond_error() {
911        let mut b = MoleculeBuilder::new();
912        let c1 = b.add_atom(Atom::new(Element::C));
913        let c2 = b.add_atom(Atom::new(Element::C));
914        b.add_bond(c1, c2, BondOrder::Single).unwrap();
915        let err = b.add_bond(c1, c2, BondOrder::Double);
916        assert!(matches!(err, Err(MolError::DuplicateBond(_, _))));
917    }
918
919    #[test]
920    fn test_formula() {
921        let mut b = MoleculeBuilder::new();
922        let c = b.add_atom(Atom::new(Element::C));
923        let n = b.add_atom(Atom::new(Element::N));
924        b.add_bond(c, n, BondOrder::Single).unwrap();
925        let mol = b.build();
926        assert_eq!(mol.formula(), "CN");
927    }
928
929    #[test]
930    fn test_implicit_hydrogen_count() {
931        // Isolated C atom (sp3, 4 bonds available): 4 implicit H
932        let mut b = MoleculeBuilder::new();
933        b.add_atom(Atom::organic(Element::C));
934        let mol = b.build();
935        assert_eq!(mol.implicit_hydrogen_count(AtomIdx(0)), 4);
936    }
937
938    #[test]
939    fn test_total_formula_methane() {
940        // Organic C atom with 0 explicit bonds → 4 implicit H → CH4
941        let mut b = MoleculeBuilder::new();
942        b.add_atom(Atom::organic(Element::C));
943        let mol = b.build();
944        assert_eq!(mol.total_formula(), "CH4");
945    }
946
947    #[test]
948    fn test_total_formula_no_hydrogen() {
949        // NaCl — neither Na nor Cl is in the organic subset, no implicit H
950        let mut b = MoleculeBuilder::new();
951        let na = b.add_atom(Atom::new(Element::NA));
952        let cl = b.add_atom(Atom::new(Element::CL));
953        b.add_bond(na, cl, BondOrder::Single).unwrap();
954        let mol = b.build();
955        assert_eq!(mol.total_formula(), "ClNa");
956    }
957
958    #[test]
959    fn test_with_atom_aromatic() {
960        let mol = ethane();
961        let updated = mol.with_atom_aromatic(AtomIdx(0), true);
962        assert!(updated.atom(AtomIdx(0)).aromatic);
963        assert!(!updated.atom(AtomIdx(1)).aromatic);
964    }
965
966    #[test]
967    fn test_with_bond_order() {
968        let mol = ethane();
969        let updated = mol.with_bond_order(BondIdx(0), BondOrder::Double);
970        assert_eq!(updated.bond(BondIdx(0)).order, BondOrder::Double);
971    }
972
973    // --- mutable API ---
974
975    #[test]
976    fn test_add_remove_atom() {
977        let mut mol = ethane();
978        let n_idx = mol.add_atom(Atom::new(Element::N));
979        assert_eq!(mol.atom_count(), 3);
980        assert_eq!(mol.atom(n_idx).element.atomic_number(), 7);
981
982        let remap = mol.remove_atom(n_idx);
983        assert_eq!(mol.atom_count(), 2);
984        assert!(remap[n_idx.0 as usize].is_none());
985    }
986
987    #[test]
988    fn test_add_remove_bond() {
989        let mut mol = ethane();
990        let n_idx = mol.add_atom(Atom::new(Element::N));
991        let bidx = mol.add_bond(AtomIdx(0), n_idx, BondOrder::Single).unwrap();
992        assert_eq!(mol.bond_count(), 2);
993        mol.remove_bond(bidx);
994        assert_eq!(mol.bond_count(), 1);
995    }
996
997    #[test]
998    fn test_set_charge_element() {
999        let mut mol = ethane();
1000        mol.set_charge(AtomIdx(0), 1);
1001        assert_eq!(mol.atom(AtomIdx(0)).charge, 1);
1002        mol.set_element(AtomIdx(0), Element::N);
1003        assert_eq!(mol.atom(AtomIdx(0)).element.atomic_number(), 7);
1004    }
1005
1006    #[test]
1007    fn test_is_connected() {
1008        let mol = ethane();
1009        assert!(mol.is_connected());
1010
1011        // Two separate atoms — disconnected
1012        let mut b = MoleculeBuilder::new();
1013        b.add_atom(Atom::new(Element::C));
1014        b.add_atom(Atom::new(Element::N));
1015        let disconnected = b.build();
1016        assert!(!disconnected.is_connected());
1017    }
1018
1019    #[test]
1020    fn test_fragments() {
1021        // "CC.N" — two components
1022        let mut b = MoleculeBuilder::new();
1023        let c1 = b.add_atom(Atom::organic(Element::C));
1024        let c2 = b.add_atom(Atom::organic(Element::C));
1025        b.add_bond(c1, c2, BondOrder::Single).unwrap();
1026        b.add_atom(Atom::new(Element::N)); // disconnected N
1027        let mol = b.build();
1028        let frags = mol.fragments();
1029        assert_eq!(frags.len(), 2);
1030        let sizes: std::collections::HashSet<usize> =
1031            frags.iter().map(|f| f.atom_count()).collect();
1032        assert!(sizes.contains(&2));
1033        assert!(sizes.contains(&1));
1034    }
1035
1036    #[test]
1037    fn test_builder_from_molecule() {
1038        let mol = ethane();
1039        let mut b = MoleculeBuilder::from_molecule(&mol);
1040        b.add_atom(Atom::new(Element::O));
1041        let mol2 = b.build();
1042        assert_eq!(mol2.atom_count(), 3);
1043        assert_eq!(mol2.bond_count(), 1); // original bond preserved
1044    }
1045}