Skip to main content

chematic_core/
molecule.rs

1//! Molecule graph: atoms, bonds, and adjacency list.
2
3use crate::atom::Atom;
4use crate::bond::{BondEntry, BondOrder};
5use crate::element::Element;
6use crate::stereo_group::StereoGroup;
7
8/// Newtype index for an atom in a Molecule.
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
10pub struct AtomIdx(pub u32);
11
12/// Newtype index for a bond in a Molecule.
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
14pub struct BondIdx(pub u32);
15
16/// Error types for molecule construction.
17#[derive(Debug, Clone, PartialEq, Eq)]
18pub enum MolError {
19    /// Atom index out of range.
20    InvalidAtomIdx(AtomIdx),
21    /// Duplicate bond between the same pair of atoms.
22    DuplicateBond(AtomIdx, AtomIdx),
23}
24
25impl core::fmt::Display for MolError {
26    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
27        match self {
28            Self::InvalidAtomIdx(idx) => write!(f, "invalid atom index: {}", idx.0),
29            Self::DuplicateBond(a, b) => {
30                write!(f, "duplicate bond between atoms {} and {}", a.0, b.0)
31            }
32        }
33    }
34}
35
36impl std::error::Error for MolError {}
37
38/// An immutable molecular graph built via [`MoleculeBuilder`].
39///
40/// Representation: atom list + bond list + per-atom adjacency list.
41/// No external graph library is used; all graph traversal is domain-aware.
42/// Sentinel used in `stereo_neighbor_order` to represent the implicit H in a bracket atom.
43pub const STEREO_H_SENTINEL: u32 = u32::MAX;
44
45pub struct Molecule {
46    atoms: Vec<Atom>,
47    bonds: Vec<BondEntry>,
48    /// adjacency[atom_idx] = list of (neighbor_atom_idx, bond_idx)
49    adjacency: Vec<Vec<(AtomIdx, BondIdx)>>,
50    /// Enhanced stereo groups (ChemDraw V3000 Absolute / Or / And).
51    stereo_groups: Vec<StereoGroup>,
52    /// SMILES-text-order neighbor sequence for chiral atoms.
53    ///
54    /// Keyed by atom index.  Each value lists the atom indices of neighbors in
55    /// the order they appeared in the SMILES string (including ring-closure
56    /// partners), with [`STEREO_H_SENTINEL`] (`u32::MAX`) standing in for the
57    /// implicit bracket H.  Populated by the SMILES parser; absent for atoms
58    /// not parsed from SMILES or without recorded stereo.
59    stereo_neighbor_order: std::collections::HashMap<u32, Vec<u32>>,
60}
61
62impl Molecule {
63    /// Number of heavy atoms (does not count implicit H).
64    pub fn atom_count(&self) -> usize {
65        self.atoms.len()
66    }
67
68    /// Number of bonds (edges).
69    pub fn bond_count(&self) -> usize {
70        self.bonds.len()
71    }
72
73    /// Borrow atom by index.
74    ///
75    /// # Panics
76    /// Panics if `idx` is out of range (should not happen with indices from this molecule).
77    ///
78    /// For a non-panicking variant, use [`Self::atom_opt`].
79    pub fn atom(&self, idx: AtomIdx) -> &Atom {
80        let i = idx.0 as usize;
81        if i >= self.atoms.len() {
82            panic!("atom index {} out of range (molecule has {} atoms)", idx.0, self.atoms.len());
83        }
84        &self.atoms[i]
85    }
86
87    /// Borrow atom by index, returning `None` if out of range.
88    pub fn atom_opt(&self, idx: AtomIdx) -> Option<&Atom> {
89        let i = idx.0 as usize;
90        if i < self.atoms.len() {
91            Some(&self.atoms[i])
92        } else {
93            None
94        }
95    }
96
97    /// Borrow bond by index.
98    ///
99    /// # Panics
100    /// Panics if `idx` is out of range (should not happen with indices from this molecule).
101    ///
102    /// For a non-panicking variant, use [`Self::bond_opt`].
103    pub fn bond(&self, idx: BondIdx) -> &BondEntry {
104        let i = idx.0 as usize;
105        if i >= self.bonds.len() {
106            panic!("bond index {} out of range (molecule has {} bonds)", idx.0, self.bonds.len());
107        }
108        &self.bonds[i]
109    }
110
111    /// Borrow bond by index, returning `None` if out of range.
112    pub fn bond_opt(&self, idx: BondIdx) -> Option<&BondEntry> {
113        let i = idx.0 as usize;
114        if i < self.bonds.len() {
115            Some(&self.bonds[i])
116        } else {
117            None
118        }
119    }
120
121    /// Iterate over all atoms as `(AtomIdx, &Atom)`.
122    pub fn atoms(&self) -> impl Iterator<Item = (AtomIdx, &Atom)> {
123        self.atoms
124            .iter()
125            .enumerate()
126            .map(|(i, a)| (AtomIdx(i as u32), a))
127    }
128
129    /// Iterate over all bonds as `(BondIdx, &BondEntry)`.
130    pub fn bonds(&self) -> impl Iterator<Item = (BondIdx, &BondEntry)> {
131        self.bonds
132            .iter()
133            .enumerate()
134            .map(|(i, b)| (BondIdx(i as u32), b))
135    }
136
137    /// Iterate over neighbors of `idx` as `(neighbor_atom_idx, bond_idx)`.
138    ///
139    /// # Panics
140    /// Panics if `idx` is out of range (should not happen with indices from this molecule).
141    ///
142    /// For a non-panicking variant, use [`Self::neighbors_opt`].
143    pub fn neighbors(&self, idx: AtomIdx) -> impl Iterator<Item = (AtomIdx, BondIdx)> + '_ {
144        let i = idx.0 as usize;
145        if i >= self.adjacency.len() {
146            panic!("atom index {} out of range (molecule has {} atoms)", idx.0, self.adjacency.len());
147        }
148        self.adjacency[i].iter().copied()
149    }
150
151    /// Iterate over neighbors of `idx` as `(neighbor_atom_idx, bond_idx)`, returning `None` if out of range.
152    pub fn neighbors_opt(&self, idx: AtomIdx) -> Option<Vec<(AtomIdx, BondIdx)>> {
153        let i = idx.0 as usize;
154        if i < self.adjacency.len() {
155            Some(self.adjacency[i].to_vec())
156        } else {
157            None
158        }
159    }
160
161    /// Degree (number of connected bonds) of atom `idx`.
162    ///
163    /// # Panics
164    /// Panics if `idx` is out of range (should not happen with indices from this molecule).
165    ///
166    /// For a non-panicking variant, use [`Self::degree_opt`].
167    pub fn degree(&self, idx: AtomIdx) -> usize {
168        let i = idx.0 as usize;
169        if i >= self.adjacency.len() {
170            panic!("atom index {} out of range (molecule has {} atoms)", idx.0, self.adjacency.len());
171        }
172        self.adjacency[i].len()
173    }
174
175    /// Degree (number of connected bonds) of atom `idx`, returning `None` if out of range.
176    pub fn degree_opt(&self, idx: AtomIdx) -> Option<usize> {
177        let i = idx.0 as usize;
178        if i < self.adjacency.len() {
179            Some(self.adjacency[i].len())
180        } else {
181            None
182        }
183    }
184
185    /// Return the bond between `a` and `b`, or `None` if not connected or indices are out of bounds.
186    pub fn bond_between(&self, a: AtomIdx, b: AtomIdx) -> Option<(BondIdx, &BondEntry)> {
187        let a_idx = a.0 as usize;
188        let b_idx = b.0 as usize;
189        if a_idx >= self.adjacency.len() || b_idx >= self.atoms.len() {
190            return None;
191        }
192        self.adjacency[a_idx]
193            .iter()
194            .find(|&&(nb, _)| nb == b)
195            .and_then(|&(_, bidx)| {
196                let bond_idx = bidx.0 as usize;
197                if bond_idx < self.bonds.len() {
198                    Some((bidx, &self.bonds[bond_idx]))
199                } else {
200                    None
201                }
202            })
203    }
204
205    /// Molecular formula as a Hill-order string (C first, H second, then alphabetical).
206    pub fn formula(&self) -> String {
207        use std::collections::BTreeMap;
208        let mut counts: BTreeMap<&str, u32> = BTreeMap::new();
209        for (_, atom) in self.atoms() {
210            *counts.entry(atom.element.symbol()).or_insert(0) += 1;
211        }
212        Self::format_hill_order_formula(&counts)
213    }
214}
215
216// ---------------------------------------------------------------------------
217// Immutable update methods (functional-style editing)
218// ---------------------------------------------------------------------------
219
220impl Molecule {
221    /// Format element counts in Hill order: C, H, then alphabetically.
222    fn format_hill_order_formula(counts: &std::collections::BTreeMap<&str, u32>) -> String {
223        let mut counts = counts.clone();
224        let mut result = String::new();
225        let push_count = |sym: &str, n: u32, out: &mut String| {
226            out.push_str(sym);
227            if n > 1 {
228                out.push_str(&n.to_string());
229            }
230        };
231        if let Some(c) = counts.remove("C") {
232            push_count("C", c, &mut result);
233        }
234        if let Some(h) = counts.remove("H")
235            && h > 0
236        {
237            push_count("H", h, &mut result);
238        }
239        for (sym, count) in &counts {
240            push_count(sym, *count, &mut result);
241        }
242        result
243    }
244
245    /// Return a new `Molecule` with one extra atom appended, along with the
246    /// index that the new atom will have in the returned molecule.
247    pub fn with_atom_added(&self, atom: Atom) -> (Molecule, AtomIdx) {
248        let mut builder = MoleculeBuilder::from_molecule(self);
249        let new_idx = builder.add_atom(atom);
250        (builder.build(), new_idx)
251    }
252
253    /// Return a new `Molecule` with one extra bond added, along with the index
254    /// of the newly added bond in the returned molecule.
255    ///
256    /// Returns `Err` if `a == b` or the bond already exists (same semantics as
257    /// [`MoleculeBuilder::add_bond`]).
258    pub fn with_bond_added(
259        &self,
260        a: AtomIdx,
261        b: AtomIdx,
262        order: BondOrder,
263    ) -> Result<(Molecule, BondIdx), MolError> {
264        let mut builder = MoleculeBuilder::from_molecule(self);
265        let bond_idx = builder.add_bond(a, b, order)?;
266        Ok((builder.build(), bond_idx))
267    }
268
269    /// Return a new `Molecule` with the formal charge of atom `idx` changed.
270    pub fn with_atom_charge(&self, idx: AtomIdx, charge: i8) -> Molecule {
271        let mut builder = MoleculeBuilder::new();
272        for (aidx, atom) in self.atoms() {
273            let mut a = atom.clone();
274            if aidx == idx {
275                a.charge = charge;
276            }
277            builder.add_atom(a);
278        }
279        for (_, bond) in self.bonds() {
280            let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
281        }
282        builder.copy_stereo_from(self);
283        builder.build()
284    }
285
286    /// Return a new `Molecule` with the element of atom `idx` changed.
287    ///
288    /// Chirality and hydrogen count are reset to `None` when the element
289    /// changes, since those properties are element-specific.
290    pub fn with_atom_element(&self, idx: AtomIdx, el: Element) -> Molecule {
291        let mut builder = MoleculeBuilder::new();
292        for (aidx, atom) in self.atoms() {
293            let mut a = atom.clone();
294            if aidx == idx {
295                a.element = el;
296                // Reset element-specific fields so valence stays consistent.
297                a.chirality = crate::atom::Chirality::None;
298                a.hydrogen_count = None;
299                a.aromatic = false;
300            }
301            builder.add_atom(a);
302        }
303        for (_, bond) in self.bonds() {
304            let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
305        }
306        builder.copy_stereo_from(self);
307        // Chirality was cleared for the changed atom; remove its stereo order too.
308        builder.clear_stereo_neighbor_order(idx);
309        builder.build()
310    }
311
312    /// Return a new `Molecule` with atom `idx` and all bonds involving it
313    /// removed.  Atom indices of survivors shift down past the removed slot.
314    ///
315    /// The returned tuple also includes a mapping from **old** `AtomIdx` to
316    /// **new** `AtomIdx` (indices that fall below `idx` are unchanged; indices
317    /// above `idx` decrease by 1).
318    pub fn with_atom_removed(&self, idx: AtomIdx) -> (Molecule, Vec<Option<AtomIdx>>) {
319        let n = self.atom_count();
320        let removed = idx.0 as usize;
321
322        // Build old→new index table.
323        let mut remap: Vec<Option<AtomIdx>> = vec![None; n];
324        let mut new_pos = 0u32;
325        for (old, slot) in remap.iter_mut().enumerate() {
326            if old == removed {
327                continue;
328            }
329            *slot = Some(AtomIdx(new_pos));
330            new_pos += 1;
331        }
332
333        let mut builder = MoleculeBuilder::new();
334        for (aidx, atom) in self.atoms() {
335            if aidx == idx {
336                continue;
337            }
338            builder.add_atom(atom.clone());
339        }
340        for (_, bond) in self.bonds() {
341            if bond.atom1 == idx || bond.atom2 == idx {
342                continue;
343            }
344            if let (Some(a1), Some(a2)) =
345                (remap[bond.atom1.0 as usize], remap[bond.atom2.0 as usize])
346            {
347                let _ = builder.add_bond(a1, a2, bond.order);
348            }
349        }
350        // Remap stereo neighbor order: drop removed atom's entry, remap neighbor indices.
351        for (old_key, order) in &self.stereo_neighbor_order {
352            let old_atom = *old_key as usize;
353            if old_atom == removed {
354                continue; // removed atom's stereo is gone
355            }
356            if let Some(Some(new_key)) = remap.get(old_atom) {
357                let new_order: Vec<u32> = order
358                    .iter()
359                    .filter_map(|&v| {
360                        if v == STEREO_H_SENTINEL {
361                            Some(STEREO_H_SENTINEL)
362                        } else if v as usize == removed {
363                            None // neighbor was the removed atom — stereo is now invalid
364                        } else {
365                            remap.get(v as usize).and_then(|r| r.map(|a| a.0))
366                        }
367                    })
368                    .collect();
369                builder.set_stereo_neighbor_order(*new_key, new_order);
370            }
371        }
372        (builder.build(), remap)
373    }
374
375    /// Implicit hydrogen count for atom `idx` based on valence rules.
376    ///
377    /// Delegates to [`crate::valence::implicit_hcount`].
378    pub fn implicit_hydrogen_count(&self, idx: AtomIdx) -> u8 {
379        crate::valence::implicit_hcount(self, idx)
380    }
381
382    /// Hill-order molecular formula including implicit hydrogens.
383    ///
384    /// Unlike [`Self::formula`] (which counts only explicit heavy atoms),
385    /// this method adds the implicit H count for every atom so the result
386    /// reflects the true molecular composition (e.g. methane → "CH4").
387    pub fn total_formula(&self) -> String {
388        use std::collections::BTreeMap;
389        let mut counts: BTreeMap<&str, u32> = BTreeMap::new();
390        let mut implicit_h: u32 = 0;
391        for (aidx, atom) in self.atoms() {
392            *counts.entry(atom.element.symbol()).or_insert(0) += 1;
393            implicit_h += crate::valence::implicit_hcount(self, aidx) as u32;
394        }
395        *counts.entry("H").or_insert(0) += implicit_h;
396        Self::format_hill_order_formula(&counts)
397    }
398
399    /// Hill-order molecular formula with isotope labels.
400    ///
401    /// Like [`Self::formula`] but prefixes each element symbol with its
402    /// isotope number when `atom.isotope` is `Some(n)`.
403    /// Example: a molecule with one `¹³C` and one `O` → `"¹³CO"`.
404    pub fn formula_with_isotopes(&self) -> String {
405        use std::collections::BTreeMap;
406        // Collect (isotope_prefix + symbol) counts, heavy atoms only.
407        let mut counts: BTreeMap<String, u32> = BTreeMap::new();
408        let mut has_carbon = false;
409        let mut has_explicit_h = false;
410        for (_, atom) in self.atoms() {
411            let sym = atom.element.symbol();
412            let key = match atom.isotope {
413                Some(n) => format!("{n}{sym}"),
414                None => sym.to_string(),
415            };
416            if sym == "C" && atom.isotope.is_none() {
417                has_carbon = true;
418            }
419            if sym == "H" {
420                has_explicit_h = true;
421            }
422            *counts.entry(key).or_insert(0) += 1;
423        }
424
425        let push_count = |key: &str, n: u32, out: &mut String| {
426            out.push_str(key);
427            if n > 1 {
428                out.push_str(&n.to_string());
429            }
430        };
431
432        let mut result = String::new();
433        // Hill order: C first (if unlabelled C present), then H, then rest alphabetically.
434        if has_carbon && let Some(c) = counts.remove("C") {
435            push_count("C", c, &mut result);
436        }
437        if has_explicit_h && let Some(h) = counts.remove("H") {
438            push_count("H", h, &mut result);
439        }
440        for (key, count) in &counts {
441            push_count(key, *count, &mut result);
442        }
443        result
444    }
445
446    /// Return a new `Molecule` with atom `idx`'s aromatic flag changed.
447    pub fn with_atom_aromatic(&self, idx: AtomIdx, aromatic: bool) -> Molecule {
448        let mut builder = MoleculeBuilder::new();
449        for (aidx, atom) in self.atoms() {
450            let mut a = atom.clone();
451            if aidx == idx {
452                a.aromatic = aromatic;
453            }
454            builder.add_atom(a);
455        }
456        for (_, bond) in self.bonds() {
457            let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
458        }
459        builder.copy_stereo_from(self);
460        builder.build()
461    }
462
463    /// Return a new `Molecule` with bond `idx`'s order changed.
464    pub fn with_bond_order(&self, idx: BondIdx, order: BondOrder) -> Molecule {
465        let mut builder = MoleculeBuilder::new();
466        for (_, atom) in self.atoms() {
467            builder.add_atom(atom.clone());
468        }
469        for (bidx, bond) in self.bonds() {
470            let o = if bidx == idx { order } else { bond.order };
471            let _ = builder.add_bond(bond.atom1, bond.atom2, o);
472        }
473        builder.copy_stereo_from(self);
474        builder.build()
475    }
476
477    /// Return a new `Molecule` with bond `idx` removed.
478    ///
479    /// Atom indices are unchanged.  Bond indices of survivors shift down.
480    pub fn with_bond_removed(&self, idx: BondIdx) -> Molecule {
481        let mut builder = MoleculeBuilder::new();
482        for (_, atom) in self.atoms() {
483            builder.add_atom(atom.clone());
484        }
485        for (bidx, bond) in self.bonds() {
486            if bidx == idx {
487                continue;
488            }
489            let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
490        }
491        builder.copy_stereo_from(self);
492        builder.build()
493    }
494}
495
496// ---------------------------------------------------------------------------
497// In-place mutation methods
498// ---------------------------------------------------------------------------
499
500impl Molecule {
501    /// Append a new atom and return its index.
502    pub fn add_atom(&mut self, atom: Atom) -> AtomIdx {
503        let idx = AtomIdx(self.atoms.len() as u32);
504        self.atoms.push(atom);
505        self.adjacency.push(vec![]);
506        idx
507    }
508
509    /// Remove atom `idx` and all bonds involving it.
510    ///
511    /// Returns a remapping table: `remap[old_idx]` gives the new `AtomIdx`
512    /// for surviving atoms, or `None` for the removed atom.  Atom indices
513    /// of atoms after the removed slot shift down by 1.
514    pub fn remove_atom(&mut self, idx: AtomIdx) -> Vec<Option<AtomIdx>> {
515        let n = self.atoms.len();
516        let removed = idx.0 as usize;
517
518        let mut remap: Vec<Option<AtomIdx>> = vec![None; n];
519        let mut new_pos = 0u32;
520        for (old, slot) in remap.iter_mut().enumerate() {
521            if old == removed {
522                continue;
523            }
524            *slot = Some(AtomIdx(new_pos));
525            new_pos += 1;
526        }
527
528        self.atoms.remove(removed);
529
530        // Keep only bonds not involving the removed atom; remap endpoints.
531        let mut new_bonds: Vec<BondEntry> = Vec::new();
532        for bond in &self.bonds {
533            if bond.atom1 == idx || bond.atom2 == idx {
534                continue;
535            }
536            if let (Some(a1), Some(a2)) =
537                (remap[bond.atom1.0 as usize], remap[bond.atom2.0 as usize])
538            {
539                new_bonds.push(BondEntry {
540                    atom1: a1,
541                    atom2: a2,
542                    order: bond.order,
543                });
544            }
545        }
546        self.bonds = new_bonds;
547
548        // Rebuild adjacency from scratch.
549        let new_n = self.atoms.len();
550        self.adjacency = vec![vec![]; new_n];
551        for (bidx, bond) in self.bonds.iter().enumerate() {
552            let bi = BondIdx(bidx as u32);
553            self.adjacency[bond.atom1.0 as usize].push((bond.atom2, bi));
554            self.adjacency[bond.atom2.0 as usize].push((bond.atom1, bi));
555        }
556
557        // Remap stereo neighbor order in-place.
558        let old_stereo = std::mem::take(&mut self.stereo_neighbor_order);
559        for (old_key, order) in old_stereo {
560            let old_atom = old_key as usize;
561            if old_atom == removed {
562                continue;
563            }
564            if let Some(Some(new_key)) = remap.get(old_atom) {
565                let new_order: Vec<u32> = order
566                    .iter()
567                    .filter_map(|&v| {
568                        if v == STEREO_H_SENTINEL {
569                            Some(STEREO_H_SENTINEL)
570                        } else if v as usize == removed {
571                            None
572                        } else {
573                            remap.get(v as usize).and_then(|r| r.map(|a| a.0))
574                        }
575                    })
576                    .collect();
577                self.stereo_neighbor_order.insert(new_key.0, new_order);
578            }
579        }
580
581        remap
582    }
583
584    /// Add a bond between `a` and `b` with the given `order`.
585    ///
586    /// Returns `Err` if `a == b` or the bond already exists.
587    pub fn add_bond(
588        &mut self,
589        a: AtomIdx,
590        b: AtomIdx,
591        order: BondOrder,
592    ) -> Result<BondIdx, MolError> {
593        let n = self.atoms.len() as u32;
594        if a.0 >= n {
595            return Err(MolError::InvalidAtomIdx(a));
596        }
597        if b.0 >= n {
598            return Err(MolError::InvalidAtomIdx(b));
599        }
600        if self.adjacency[a.0 as usize].iter().any(|&(nb, _)| nb == b) {
601            return Err(MolError::DuplicateBond(a, b));
602        }
603        let bidx = BondIdx(self.bonds.len() as u32);
604        self.bonds.push(BondEntry {
605            atom1: a,
606            atom2: b,
607            order,
608        });
609        self.adjacency[a.0 as usize].push((b, bidx));
610        self.adjacency[b.0 as usize].push((a, bidx));
611        Ok(bidx)
612    }
613
614    /// Remove bond `idx`.  Atom indices are unchanged; bond indices of
615    /// surviving bonds shift down past the removed slot.
616    pub fn remove_bond(&mut self, idx: BondIdx) {
617        let removed = idx.0 as usize;
618        if removed >= self.bonds.len() {
619            return;
620        }
621        self.bonds.remove(removed);
622        // Rebuild adjacency with renumbered bond indices.
623        let n = self.atoms.len();
624        self.adjacency = vec![vec![]; n];
625        for (bidx, bond) in self.bonds.iter().enumerate() {
626            let bi = BondIdx(bidx as u32);
627            self.adjacency[bond.atom1.0 as usize].push((bond.atom2, bi));
628            self.adjacency[bond.atom2.0 as usize].push((bond.atom1, bi));
629        }
630    }
631
632    /// Set the formal charge of atom `idx` in-place.
633    pub fn set_charge(&mut self, idx: AtomIdx, charge: i8) {
634        self.atoms[idx.0 as usize].charge = charge;
635    }
636
637    /// Set the element of atom `idx` in-place.
638    ///
639    /// Chirality and hydrogen count are reset (element-specific properties).
640    pub fn set_element(&mut self, idx: AtomIdx, el: Element) {
641        let a = &mut self.atoms[idx.0 as usize];
642        a.element = el;
643        a.chirality = crate::atom::Chirality::None;
644        a.hydrogen_count = None;
645        a.aromatic = false;
646    }
647
648    /// Set the CIP stereo code of atom `idx` in-place.
649    pub fn set_cip_code(&mut self, idx: AtomIdx, code: Option<crate::atom::CipCode>) {
650        self.atoms[idx.0 as usize].cip_code = code;
651    }
652
653    /// Return the enhanced stereo groups attached to this molecule.
654    pub fn stereo_groups(&self) -> &[StereoGroup] {
655        &self.stereo_groups
656    }
657
658    /// Replace the stereo group list in-place.
659    pub fn set_stereo_groups(&mut self, groups: Vec<StereoGroup>) {
660        self.stereo_groups = groups;
661    }
662
663    /// Add a single stereo group in-place.
664    pub fn add_stereo_group(&mut self, group: StereoGroup) {
665        self.stereo_groups.push(group);
666    }
667
668    /// SMILES-text-order neighbor sequence for a chiral atom.
669    ///
670    /// Returns `None` for atoms not parsed from SMILES or without stereo.
671    /// The slice contains neighbor atom indices in SMILES text order;
672    /// [`STEREO_H_SENTINEL`] (`u32::MAX`) marks the implicit bracket-H slot.
673    pub fn stereo_neighbor_order(&self, idx: AtomIdx) -> Option<&[u32]> {
674        self.stereo_neighbor_order
675            .get(&idx.0)
676            .map(|v| v.as_slice())
677    }
678
679    /// Set the SMILES stereo neighbor order for atom `idx`.
680    pub fn set_stereo_neighbor_order(&mut self, idx: AtomIdx, order: Vec<u32>) {
681        self.stereo_neighbor_order.insert(idx.0, order);
682    }
683}
684
685// ---------------------------------------------------------------------------
686// Connectivity utilities
687// ---------------------------------------------------------------------------
688
689impl Molecule {
690    /// Return `true` if the molecule has exactly one connected component
691    /// (i.e. every atom can be reached from every other atom).
692    pub fn is_connected(&self) -> bool {
693        let n = self.atoms.len();
694        if n == 0 {
695            return true;
696        }
697        let mut visited = vec![false; n];
698        let mut stack = vec![AtomIdx(0)];
699        visited[0] = true;
700        let mut count = 1;
701        while let Some(cur) = stack.pop() {
702            for (nb, _) in self.neighbors(cur) {
703                if !visited[nb.0 as usize] {
704                    visited[nb.0 as usize] = true;
705                    count += 1;
706                    stack.push(nb);
707                }
708            }
709        }
710        count == n
711    }
712
713    /// Split the molecule into its connected components.
714    ///
715    /// Returns a `Vec` of sub-molecules, one per component.  Atoms are
716    /// renumbered within each sub-molecule starting at index 0.
717    pub fn fragments(&self) -> Vec<Molecule> {
718        let n = self.atoms.len();
719        if n == 0 {
720            return vec![];
721        }
722
723        let mut component: Vec<usize> = vec![usize::MAX; n];
724        let mut comp_id = 0;
725
726        for start in 0..n {
727            if component[start] != usize::MAX {
728                continue;
729            }
730            let mut stack = vec![start];
731            component[start] = comp_id;
732            while let Some(cur) = stack.pop() {
733                for (nb, _) in self.neighbors(AtomIdx(cur as u32)) {
734                    let ni = nb.0 as usize;
735                    if component[ni] == usize::MAX {
736                        component[ni] = comp_id;
737                        stack.push(ni);
738                    }
739                }
740            }
741            comp_id += 1;
742        }
743
744        (0..comp_id)
745            .map(|cid| {
746                let mut builder = MoleculeBuilder::new();
747                let mut old_to_new: std::collections::HashMap<AtomIdx, AtomIdx> =
748                    std::collections::HashMap::new();
749                for (aidx, atom) in self.atoms() {
750                    if component[aidx.0 as usize] == cid {
751                        let new_idx = builder.add_atom(atom.clone());
752                        old_to_new.insert(aidx, new_idx);
753                    }
754                }
755                for (_, bond) in self.bonds() {
756                    if let (Some(&a1), Some(&a2)) =
757                        (old_to_new.get(&bond.atom1), old_to_new.get(&bond.atom2))
758                    {
759                        let _ = builder.add_bond(a1, a2, bond.order);
760                    }
761                }
762                builder.build()
763            })
764            .collect()
765    }
766}
767
768/// Builder for constructing a [`Molecule`] incrementally.
769///
770/// Usage: add atoms, add bonds, then call `build()`.
771#[derive(Default)]
772pub struct MoleculeBuilder {
773    atoms: Vec<Atom>,
774    bonds: Vec<BondEntry>,
775    adjacency: Vec<Vec<(AtomIdx, BondIdx)>>,
776    stereo_groups: Vec<StereoGroup>,
777    stereo_neighbor_order: std::collections::HashMap<u32, Vec<u32>>,
778}
779
780impl MoleculeBuilder {
781    pub fn new() -> Self {
782        Self::default()
783    }
784
785    /// Create a builder pre-populated with all atoms and bonds from `mol`.
786    ///
787    /// Use this to make incremental edits to an existing molecule instead of
788    /// reconstructing it from scratch.
789    pub fn from_molecule(mol: &Molecule) -> Self {
790        let mut b = Self::new();
791        for (_, atom) in mol.atoms() {
792            b.add_atom(atom.clone());
793        }
794        for (_, bond) in mol.bonds() {
795            let _ = b.add_bond(bond.atom1, bond.atom2, bond.order);
796        }
797        b.stereo_groups = mol.stereo_groups.clone();
798        b.stereo_neighbor_order = mol.stereo_neighbor_order.clone();
799        b
800    }
801
802    /// Set the SMILES stereo neighbor order for atom `idx`.
803    pub fn set_stereo_neighbor_order(&mut self, idx: AtomIdx, order: Vec<u32>) {
804        self.stereo_neighbor_order.insert(idx.0, order);
805    }
806
807    /// Remove the stereo neighbor order entry for atom `idx`.
808    pub fn clear_stereo_neighbor_order(&mut self, idx: AtomIdx) {
809        self.stereo_neighbor_order.remove(&idx.0);
810    }
811
812    /// Append a stereo group to this builder.
813    pub fn add_stereo_group(&mut self, group: StereoGroup) {
814        self.stereo_groups.push(group);
815    }
816
817    /// Copy all stereo neighbor order entries from `mol` into this builder.
818    pub fn copy_stereo_from(&mut self, mol: &Molecule) {
819        self.stereo_neighbor_order = mol.stereo_neighbor_order.clone();
820    }
821
822    /// Read-only reference to an atom already added to the builder.
823    ///
824    /// Used by the SMILES parser to infer implicit bond types without
825    /// consuming the builder (e.g. aromatic-aromatic → Aromatic bond).
826    ///
827    /// # Panics
828    /// Panics if `idx` is out of range.
829    pub fn atom_at(&self, idx: AtomIdx) -> &Atom {
830        &self.atoms[idx.0 as usize]
831    }
832
833    /// Number of atoms added so far.
834    pub fn atom_count(&self) -> usize {
835        self.atoms.len()
836    }
837
838    /// Iterate over already-added neighbors of `idx` as `(bond_idx, neighbor_atom_idx)`.
839    /// Used by kekulization tests to check whether a bond already exists in the builder.
840    pub fn atom_neighbors(&self, idx: AtomIdx) -> impl Iterator<Item = (BondIdx, AtomIdx)> + '_ {
841        self.adjacency[idx.0 as usize]
842            .iter()
843            .map(|&(nb, bidx)| (bidx, nb))
844    }
845
846    /// Add an atom and return its index.
847    pub fn add_atom(&mut self, atom: Atom) -> AtomIdx {
848        let idx = AtomIdx(self.atoms.len() as u32);
849        self.atoms.push(atom);
850        self.adjacency.push(Vec::new());
851        idx
852    }
853
854    /// Add a bond between two existing atoms.
855    ///
856    /// Returns an error if either atom index is invalid or if the bond already exists.
857    pub fn add_bond(
858        &mut self,
859        a: AtomIdx,
860        b: AtomIdx,
861        order: BondOrder,
862    ) -> Result<BondIdx, MolError> {
863        let n = self.atoms.len() as u32;
864        if a.0 >= n {
865            return Err(MolError::InvalidAtomIdx(a));
866        }
867        if b.0 >= n {
868            return Err(MolError::InvalidAtomIdx(b));
869        }
870
871        // Check for duplicate
872        for &(nb, _) in &self.adjacency[a.0 as usize] {
873            if nb == b {
874                return Err(MolError::DuplicateBond(a, b));
875            }
876        }
877
878        let bidx = BondIdx(self.bonds.len() as u32);
879        self.bonds.push(BondEntry {
880            atom1: a,
881            atom2: b,
882            order,
883        });
884        self.adjacency[a.0 as usize].push((b, bidx));
885        self.adjacency[b.0 as usize].push((a, bidx));
886        Ok(bidx)
887    }
888
889    /// Consume the builder and return an immutable [`Molecule`].
890    pub fn build(self) -> Molecule {
891        Molecule {
892            atoms: self.atoms,
893            bonds: self.bonds,
894            adjacency: self.adjacency,
895            stereo_groups: self.stereo_groups,
896            stereo_neighbor_order: self.stereo_neighbor_order,
897        }
898    }
899}
900
901#[cfg(test)]
902mod tests {
903    use super::*;
904    use crate::atom::Atom;
905    use crate::element::Element;
906
907    fn ethane() -> Molecule {
908        let mut b = MoleculeBuilder::new();
909        let c1 = b.add_atom(Atom::new(Element::C));
910        let c2 = b.add_atom(Atom::new(Element::C));
911        b.add_bond(c1, c2, BondOrder::Single).unwrap();
912        b.build()
913    }
914
915    #[test]
916    fn test_basic_molecule() {
917        let mol = ethane();
918        assert_eq!(mol.atom_count(), 2);
919        assert_eq!(mol.bond_count(), 1);
920    }
921
922    #[test]
923    fn test_adjacency() {
924        let mol = ethane();
925        let neighbors: Vec<_> = mol.neighbors(AtomIdx(0)).collect();
926        assert_eq!(neighbors.len(), 1);
927        assert_eq!(neighbors[0].0, AtomIdx(1));
928    }
929
930    #[test]
931    fn test_bond_between() {
932        let mol = ethane();
933        assert!(mol.bond_between(AtomIdx(0), AtomIdx(1)).is_some());
934        assert!(mol.bond_between(AtomIdx(1), AtomIdx(0)).is_some());
935    }
936
937    #[test]
938    fn test_duplicate_bond_error() {
939        let mut b = MoleculeBuilder::new();
940        let c1 = b.add_atom(Atom::new(Element::C));
941        let c2 = b.add_atom(Atom::new(Element::C));
942        b.add_bond(c1, c2, BondOrder::Single).unwrap();
943        let err = b.add_bond(c1, c2, BondOrder::Double);
944        assert!(matches!(err, Err(MolError::DuplicateBond(_, _))));
945    }
946
947    #[test]
948    fn test_formula() {
949        let mut b = MoleculeBuilder::new();
950        let c = b.add_atom(Atom::new(Element::C));
951        let n = b.add_atom(Atom::new(Element::N));
952        b.add_bond(c, n, BondOrder::Single).unwrap();
953        let mol = b.build();
954        assert_eq!(mol.formula(), "CN");
955    }
956
957    #[test]
958    fn test_implicit_hydrogen_count() {
959        // Isolated C atom (sp3, 4 bonds available): 4 implicit H
960        let mut b = MoleculeBuilder::new();
961        b.add_atom(Atom::organic(Element::C));
962        let mol = b.build();
963        assert_eq!(mol.implicit_hydrogen_count(AtomIdx(0)), 4);
964    }
965
966    #[test]
967    fn test_total_formula_methane() {
968        // Organic C atom with 0 explicit bonds → 4 implicit H → CH4
969        let mut b = MoleculeBuilder::new();
970        b.add_atom(Atom::organic(Element::C));
971        let mol = b.build();
972        assert_eq!(mol.total_formula(), "CH4");
973    }
974
975    #[test]
976    fn test_total_formula_no_hydrogen() {
977        // NaCl — neither Na nor Cl is in the organic subset, no implicit H
978        let mut b = MoleculeBuilder::new();
979        let na = b.add_atom(Atom::new(Element::NA));
980        let cl = b.add_atom(Atom::new(Element::CL));
981        b.add_bond(na, cl, BondOrder::Single).unwrap();
982        let mol = b.build();
983        assert_eq!(mol.total_formula(), "ClNa");
984    }
985
986    #[test]
987    fn test_with_atom_aromatic() {
988        let mol = ethane();
989        let updated = mol.with_atom_aromatic(AtomIdx(0), true);
990        assert!(updated.atom(AtomIdx(0)).aromatic);
991        assert!(!updated.atom(AtomIdx(1)).aromatic);
992    }
993
994    #[test]
995    fn test_with_bond_order() {
996        let mol = ethane();
997        let updated = mol.with_bond_order(BondIdx(0), BondOrder::Double);
998        assert_eq!(updated.bond(BondIdx(0)).order, BondOrder::Double);
999    }
1000
1001    // --- mutable API ---
1002
1003    #[test]
1004    fn test_add_remove_atom() {
1005        let mut mol = ethane();
1006        let n_idx = mol.add_atom(Atom::new(Element::N));
1007        assert_eq!(mol.atom_count(), 3);
1008        assert_eq!(mol.atom(n_idx).element.atomic_number(), 7);
1009
1010        let remap = mol.remove_atom(n_idx);
1011        assert_eq!(mol.atom_count(), 2);
1012        assert!(remap[n_idx.0 as usize].is_none());
1013    }
1014
1015    #[test]
1016    fn test_add_remove_bond() {
1017        let mut mol = ethane();
1018        let n_idx = mol.add_atom(Atom::new(Element::N));
1019        let bidx = mol.add_bond(AtomIdx(0), n_idx, BondOrder::Single).unwrap();
1020        assert_eq!(mol.bond_count(), 2);
1021        mol.remove_bond(bidx);
1022        assert_eq!(mol.bond_count(), 1);
1023    }
1024
1025    #[test]
1026    fn test_set_charge_element() {
1027        let mut mol = ethane();
1028        mol.set_charge(AtomIdx(0), 1);
1029        assert_eq!(mol.atom(AtomIdx(0)).charge, 1);
1030        mol.set_element(AtomIdx(0), Element::N);
1031        assert_eq!(mol.atom(AtomIdx(0)).element.atomic_number(), 7);
1032    }
1033
1034    #[test]
1035    fn test_is_connected() {
1036        let mol = ethane();
1037        assert!(mol.is_connected());
1038
1039        // Two separate atoms — disconnected
1040        let mut b = MoleculeBuilder::new();
1041        b.add_atom(Atom::new(Element::C));
1042        b.add_atom(Atom::new(Element::N));
1043        let disconnected = b.build();
1044        assert!(!disconnected.is_connected());
1045    }
1046
1047    #[test]
1048    fn test_fragments() {
1049        // "CC.N" — two components
1050        let mut b = MoleculeBuilder::new();
1051        let c1 = b.add_atom(Atom::organic(Element::C));
1052        let c2 = b.add_atom(Atom::organic(Element::C));
1053        b.add_bond(c1, c2, BondOrder::Single).unwrap();
1054        b.add_atom(Atom::new(Element::N)); // disconnected N
1055        let mol = b.build();
1056        let frags = mol.fragments();
1057        assert_eq!(frags.len(), 2);
1058        let sizes: std::collections::HashSet<usize> =
1059            frags.iter().map(|f| f.atom_count()).collect();
1060        assert!(sizes.contains(&2));
1061        assert!(sizes.contains(&1));
1062    }
1063
1064    #[test]
1065    fn test_builder_from_molecule() {
1066        let mol = ethane();
1067        let mut b = MoleculeBuilder::from_molecule(&mol);
1068        b.add_atom(Atom::new(Element::O));
1069        let mol2 = b.build();
1070        assert_eq!(mol2.atom_count(), 3);
1071        assert_eq!(mol2.bond_count(), 1); // original bond preserved
1072    }
1073
1074    // --- safe Option-returning variants ---
1075
1076    #[test]
1077    fn test_atom_opt_valid() {
1078        let mol = ethane();
1079        assert!(mol.atom_opt(AtomIdx(0)).is_some());
1080        assert!(mol.atom_opt(AtomIdx(1)).is_some());
1081        let atom = mol.atom_opt(AtomIdx(0)).unwrap();
1082        assert_eq!(atom.element.atomic_number(), 6);
1083    }
1084
1085    #[test]
1086    fn test_atom_opt_invalid() {
1087        let mol = ethane();
1088        assert!(mol.atom_opt(AtomIdx(2)).is_none());
1089        assert!(mol.atom_opt(AtomIdx(1000)).is_none());
1090    }
1091
1092    #[test]
1093    fn test_bond_opt_valid() {
1094        let mol = ethane();
1095        assert!(mol.bond_opt(BondIdx(0)).is_some());
1096        let bond = mol.bond_opt(BondIdx(0)).unwrap();
1097        assert_eq!(bond.order, BondOrder::Single);
1098    }
1099
1100    #[test]
1101    fn test_bond_opt_invalid() {
1102        let mol = ethane();
1103        assert!(mol.bond_opt(BondIdx(1)).is_none());
1104        assert!(mol.bond_opt(BondIdx(1000)).is_none());
1105    }
1106
1107    #[test]
1108    fn test_neighbors_opt_valid() {
1109        let mol = ethane();
1110        let neighbors = mol.neighbors_opt(AtomIdx(0)).unwrap();
1111        assert_eq!(neighbors.len(), 1);
1112        assert_eq!(neighbors[0].0, AtomIdx(1));
1113    }
1114
1115    #[test]
1116    fn test_neighbors_opt_isolated_atom() {
1117        let mut b = MoleculeBuilder::new();
1118        b.add_atom(Atom::new(Element::C));
1119        b.add_atom(Atom::new(Element::N));
1120        let mol = b.build();
1121        let neighbors = mol.neighbors_opt(AtomIdx(0)).unwrap();
1122        assert_eq!(neighbors.len(), 0);
1123    }
1124
1125    #[test]
1126    fn test_neighbors_opt_invalid() {
1127        let mol = ethane();
1128        assert!(mol.neighbors_opt(AtomIdx(2)).is_none());
1129        assert!(mol.neighbors_opt(AtomIdx(1000)).is_none());
1130    }
1131
1132    #[test]
1133    fn test_degree_opt_valid() {
1134        let mol = ethane();
1135        assert_eq!(mol.degree_opt(AtomIdx(0)), Some(1));
1136        assert_eq!(mol.degree_opt(AtomIdx(1)), Some(1));
1137    }
1138
1139    #[test]
1140    fn test_degree_opt_isolated_atom() {
1141        let mut b = MoleculeBuilder::new();
1142        b.add_atom(Atom::new(Element::C));
1143        b.add_atom(Atom::new(Element::N));
1144        let mol = b.build();
1145        assert_eq!(mol.degree_opt(AtomIdx(0)), Some(0));
1146        assert_eq!(mol.degree_opt(AtomIdx(1)), Some(0));
1147    }
1148
1149    #[test]
1150    fn test_degree_opt_invalid() {
1151        let mol = ethane();
1152        assert!(mol.degree_opt(AtomIdx(2)).is_none());
1153        assert!(mol.degree_opt(AtomIdx(1000)).is_none());
1154    }
1155
1156    #[test]
1157    fn test_degree_opt_multiple_bonds() {
1158        // Create a central atom with 3 neighbors
1159        let mut b = MoleculeBuilder::new();
1160        let center = b.add_atom(Atom::new(Element::C));
1161        let n1 = b.add_atom(Atom::new(Element::C));
1162        let n2 = b.add_atom(Atom::new(Element::N));
1163        let n3 = b.add_atom(Atom::new(Element::O));
1164        b.add_bond(center, n1, BondOrder::Single).unwrap();
1165        b.add_bond(center, n2, BondOrder::Double).unwrap();
1166        b.add_bond(center, n3, BondOrder::Single).unwrap();
1167        let mol = b.build();
1168        assert_eq!(mol.degree_opt(center), Some(3));
1169        assert_eq!(mol.degree_opt(n1), Some(1));
1170        assert_eq!(mol.degree_opt(n2), Some(1));
1171        assert_eq!(mol.degree_opt(n3), Some(1));
1172    }
1173}