Skip to main content

chematic_core/
molecule.rs

1//! Molecule graph: atoms, bonds, and adjacency list.
2
3use crate::atom::Atom;
4use crate::bond::{BondEntry, BondOrder};
5use crate::element::Element;
6use crate::stereo_group::StereoGroup;
7
8/// Newtype index for an atom in a Molecule.
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
10pub struct AtomIdx(pub u32);
11
12/// Newtype index for a bond in a Molecule.
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
14pub struct BondIdx(pub u32);
15
16/// Error types for molecule construction.
17#[derive(Debug, Clone, PartialEq, Eq)]
18pub enum MolError {
19    /// Atom index out of range.
20    InvalidAtomIdx(AtomIdx),
21    /// Duplicate bond between the same pair of atoms.
22    DuplicateBond(AtomIdx, AtomIdx),
23}
24
25impl core::fmt::Display for MolError {
26    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
27        match self {
28            Self::InvalidAtomIdx(idx) => write!(f, "invalid atom index: {}", idx.0),
29            Self::DuplicateBond(a, b) => {
30                write!(f, "duplicate bond between atoms {} and {}", a.0, b.0)
31            }
32        }
33    }
34}
35
36impl std::error::Error for MolError {}
37
38/// An immutable molecular graph built via [`MoleculeBuilder`].
39///
40/// Representation: atom list + bond list + per-atom adjacency list.
41/// No external graph library is used; all graph traversal is domain-aware.
42/// Sentinel used in `stereo_neighbor_order` to represent the implicit H in a bracket atom.
43pub const STEREO_H_SENTINEL: u32 = u32::MAX;
44
45pub struct Molecule {
46    atoms: Vec<Atom>,
47    bonds: Vec<BondEntry>,
48    /// adjacency[atom_idx] = list of (neighbor_atom_idx, bond_idx)
49    adjacency: Vec<Vec<(AtomIdx, BondIdx)>>,
50    /// Enhanced stereo groups (ChemDraw V3000 Absolute / Or / And).
51    stereo_groups: Vec<StereoGroup>,
52    /// SMILES-text-order neighbor sequence for chiral atoms.
53    ///
54    /// Keyed by atom index.  Each value lists the atom indices of neighbors in
55    /// the order they appeared in the SMILES string (including ring-closure
56    /// partners), with [`STEREO_H_SENTINEL`] (`u32::MAX`) standing in for the
57    /// implicit bracket H.  Populated by the SMILES parser; absent for atoms
58    /// not parsed from SMILES or without recorded stereo.
59    stereo_neighbor_order: std::collections::HashMap<u32, Vec<u32>>,
60}
61
62impl Molecule {
63    /// Number of heavy atoms (does not count implicit H).
64    pub fn atom_count(&self) -> usize {
65        self.atoms.len()
66    }
67
68    /// Number of bonds (edges).
69    pub fn bond_count(&self) -> usize {
70        self.bonds.len()
71    }
72
73    /// Borrow atom by index.
74    ///
75    /// # Panics
76    /// Panics if `idx` is out of range (should not happen with indices from this molecule).
77    ///
78    /// For a non-panicking variant, use [`Self::atom_opt`].
79    pub fn atom(&self, idx: AtomIdx) -> &Atom {
80        let i = idx.0 as usize;
81        if i >= self.atoms.len() {
82            panic!(
83                "atom index {} out of range (molecule has {} atoms)",
84                idx.0,
85                self.atoms.len()
86            );
87        }
88        &self.atoms[i]
89    }
90
91    /// Borrow atom by index, returning `None` if out of range.
92    pub fn atom_opt(&self, idx: AtomIdx) -> Option<&Atom> {
93        let i = idx.0 as usize;
94        if i < self.atoms.len() {
95            Some(&self.atoms[i])
96        } else {
97            None
98        }
99    }
100
101    /// Borrow bond by index.
102    ///
103    /// # Panics
104    /// Panics if `idx` is out of range (should not happen with indices from this molecule).
105    ///
106    /// For a non-panicking variant, use [`Self::bond_opt`].
107    pub fn bond(&self, idx: BondIdx) -> &BondEntry {
108        let i = idx.0 as usize;
109        if i >= self.bonds.len() {
110            panic!(
111                "bond index {} out of range (molecule has {} bonds)",
112                idx.0,
113                self.bonds.len()
114            );
115        }
116        &self.bonds[i]
117    }
118
119    /// Borrow bond by index, returning `None` if out of range.
120    pub fn bond_opt(&self, idx: BondIdx) -> Option<&BondEntry> {
121        let i = idx.0 as usize;
122        if i < self.bonds.len() {
123            Some(&self.bonds[i])
124        } else {
125            None
126        }
127    }
128
129    /// Iterate over all atoms as `(AtomIdx, &Atom)`.
130    pub fn atoms(&self) -> impl Iterator<Item = (AtomIdx, &Atom)> {
131        self.atoms
132            .iter()
133            .enumerate()
134            .map(|(i, a)| (AtomIdx(i as u32), a))
135    }
136
137    /// Iterate over all bonds as `(BondIdx, &BondEntry)`.
138    pub fn bonds(&self) -> impl Iterator<Item = (BondIdx, &BondEntry)> {
139        self.bonds
140            .iter()
141            .enumerate()
142            .map(|(i, b)| (BondIdx(i as u32), b))
143    }
144
145    /// Iterate over neighbors of `idx` as `(neighbor_atom_idx, bond_idx)`.
146    ///
147    /// # Panics
148    /// Panics if `idx` is out of range (should not happen with indices from this molecule).
149    ///
150    /// For a non-panicking variant, use [`Self::neighbors_opt`].
151    pub fn neighbors(&self, idx: AtomIdx) -> impl Iterator<Item = (AtomIdx, BondIdx)> + '_ {
152        let i = idx.0 as usize;
153        if i >= self.adjacency.len() {
154            panic!(
155                "atom index {} out of range (molecule has {} atoms)",
156                idx.0,
157                self.adjacency.len()
158            );
159        }
160        self.adjacency[i].iter().copied()
161    }
162
163    /// Iterate over neighbors of `idx` as `(neighbor_atom_idx, bond_idx)`, returning `None` if out of range.
164    pub fn neighbors_opt(&self, idx: AtomIdx) -> Option<Vec<(AtomIdx, BondIdx)>> {
165        let i = idx.0 as usize;
166        if i < self.adjacency.len() {
167            Some(self.adjacency[i].to_vec())
168        } else {
169            None
170        }
171    }
172
173    /// Degree (number of connected bonds) of atom `idx`.
174    ///
175    /// # Panics
176    /// Panics if `idx` is out of range (should not happen with indices from this molecule).
177    ///
178    /// For a non-panicking variant, use [`Self::degree_opt`].
179    pub fn degree(&self, idx: AtomIdx) -> usize {
180        let i = idx.0 as usize;
181        if i >= self.adjacency.len() {
182            panic!(
183                "atom index {} out of range (molecule has {} atoms)",
184                idx.0,
185                self.adjacency.len()
186            );
187        }
188        self.adjacency[i].len()
189    }
190
191    /// Degree (number of connected bonds) of atom `idx`, returning `None` if out of range.
192    pub fn degree_opt(&self, idx: AtomIdx) -> Option<usize> {
193        let i = idx.0 as usize;
194        if i < self.adjacency.len() {
195            Some(self.adjacency[i].len())
196        } else {
197            None
198        }
199    }
200
201    /// Return the bond between `a` and `b`, or `None` if not connected or indices are out of bounds.
202    pub fn bond_between(&self, a: AtomIdx, b: AtomIdx) -> Option<(BondIdx, &BondEntry)> {
203        let a_idx = a.0 as usize;
204        let b_idx = b.0 as usize;
205        if a_idx >= self.adjacency.len() || b_idx >= self.atoms.len() {
206            return None;
207        }
208        self.adjacency[a_idx]
209            .iter()
210            .find(|&&(nb, _)| nb == b)
211            .and_then(|&(_, bidx)| {
212                let bond_idx = bidx.0 as usize;
213                if bond_idx < self.bonds.len() {
214                    Some((bidx, &self.bonds[bond_idx]))
215                } else {
216                    None
217                }
218            })
219    }
220
221    /// Molecular formula as a Hill-order string (C first, H second, then alphabetical).
222    pub fn formula(&self) -> String {
223        use std::collections::BTreeMap;
224        let mut counts: BTreeMap<&str, u32> = BTreeMap::new();
225        for (_, atom) in self.atoms() {
226            *counts.entry(atom.element.symbol()).or_insert(0) += 1;
227        }
228        let mut result = Self::format_hill_order_formula(&counts);
229        let total_charge: i32 = self.atoms().map(|(_, a)| a.charge as i32).sum();
230        match total_charge {
231            0 => {}
232            1 => result.push('+'),
233            -1 => result.push('-'),
234            n if n > 0 => result.push_str(&format!("+{n}")),
235            n => result.push_str(&n.to_string()),
236        }
237        result
238    }
239}
240
241// ---------------------------------------------------------------------------
242// Immutable update methods (functional-style editing)
243// ---------------------------------------------------------------------------
244
245impl Molecule {
246    /// Format element counts in Hill order: C, H, then alphabetically.
247    fn format_hill_order_formula(counts: &std::collections::BTreeMap<&str, u32>) -> String {
248        let mut counts = counts.clone();
249        let mut result = String::new();
250        let push_count = |sym: &str, n: u32, out: &mut String| {
251            out.push_str(sym);
252            if n > 1 {
253                out.push_str(&n.to_string());
254            }
255        };
256        if let Some(c) = counts.remove("C") {
257            push_count("C", c, &mut result);
258        }
259        if let Some(h) = counts.remove("H")
260            && h > 0
261        {
262            push_count("H", h, &mut result);
263        }
264        for (sym, count) in &counts {
265            push_count(sym, *count, &mut result);
266        }
267        result
268    }
269
270    /// Return a new `Molecule` with one extra atom appended, along with the
271    /// index that the new atom will have in the returned molecule.
272    pub fn with_atom_added(&self, atom: Atom) -> (Molecule, AtomIdx) {
273        let mut builder = MoleculeBuilder::from_molecule(self);
274        let new_idx = builder.add_atom(atom);
275        (builder.build(), new_idx)
276    }
277
278    /// Return a new `Molecule` with one extra bond added, along with the index
279    /// of the newly added bond in the returned molecule.
280    ///
281    /// Returns `Err` if `a == b` or the bond already exists (same semantics as
282    /// [`MoleculeBuilder::add_bond`]).
283    pub fn with_bond_added(
284        &self,
285        a: AtomIdx,
286        b: AtomIdx,
287        order: BondOrder,
288    ) -> Result<(Molecule, BondIdx), MolError> {
289        let mut builder = MoleculeBuilder::from_molecule(self);
290        let bond_idx = builder.add_bond(a, b, order)?;
291        Ok((builder.build(), bond_idx))
292    }
293
294    /// Return a new `Molecule` with the formal charge of atom `idx` changed.
295    pub fn with_atom_charge(&self, idx: AtomIdx, charge: i8) -> Molecule {
296        let mut builder = MoleculeBuilder::new();
297        for (aidx, atom) in self.atoms() {
298            let mut a = atom.clone();
299            if aidx == idx {
300                a.charge = charge;
301            }
302            builder.add_atom(a);
303        }
304        for (_, bond) in self.bonds() {
305            let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
306        }
307        builder.copy_stereo_from(self);
308        builder.build()
309    }
310
311    /// Return a new `Molecule` with the element of atom `idx` changed.
312    ///
313    /// Chirality and hydrogen count are reset to `None` when the element
314    /// changes, since those properties are element-specific.
315    pub fn with_atom_element(&self, idx: AtomIdx, el: Element) -> Molecule {
316        let mut builder = MoleculeBuilder::new();
317        for (aidx, atom) in self.atoms() {
318            let mut a = atom.clone();
319            if aidx == idx {
320                a.element = el;
321                // Reset element-specific fields so valence stays consistent.
322                a.chirality = crate::atom::Chirality::None;
323                a.hydrogen_count = None;
324                a.aromatic = false;
325            }
326            builder.add_atom(a);
327        }
328        for (_, bond) in self.bonds() {
329            let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
330        }
331        builder.copy_stereo_from(self);
332        // Chirality was cleared for the changed atom; remove its stereo order too.
333        builder.clear_stereo_neighbor_order(idx);
334        builder.build()
335    }
336
337    /// Return a new `Molecule` with atom `idx` and all bonds involving it
338    /// removed.  Atom indices of survivors shift down past the removed slot.
339    ///
340    /// The returned tuple also includes a mapping from **old** `AtomIdx` to
341    /// **new** `AtomIdx` (indices that fall below `idx` are unchanged; indices
342    /// above `idx` decrease by 1).
343    pub fn with_atom_removed(&self, idx: AtomIdx) -> (Molecule, Vec<Option<AtomIdx>>) {
344        let n = self.atom_count();
345        let removed = idx.0 as usize;
346
347        // Build old→new index table.
348        let mut remap: Vec<Option<AtomIdx>> = vec![None; n];
349        let mut new_pos = 0u32;
350        for (old, slot) in remap.iter_mut().enumerate() {
351            if old == removed {
352                continue;
353            }
354            *slot = Some(AtomIdx(new_pos));
355            new_pos += 1;
356        }
357
358        let mut builder = MoleculeBuilder::new();
359        for (aidx, atom) in self.atoms() {
360            if aidx == idx {
361                continue;
362            }
363            builder.add_atom(atom.clone());
364        }
365        for (_, bond) in self.bonds() {
366            if bond.atom1 == idx || bond.atom2 == idx {
367                continue;
368            }
369            if let (Some(a1), Some(a2)) =
370                (remap[bond.atom1.0 as usize], remap[bond.atom2.0 as usize])
371            {
372                let _ = builder.add_bond(a1, a2, bond.order);
373            }
374        }
375        // Remap stereo neighbor order: drop removed atom's entry, remap neighbor indices.
376        for (old_key, order) in &self.stereo_neighbor_order {
377            let old_atom = *old_key as usize;
378            if old_atom == removed {
379                continue; // removed atom's stereo is gone
380            }
381            if let Some(Some(new_key)) = remap.get(old_atom) {
382                let new_order: Vec<u32> = order
383                    .iter()
384                    .filter_map(|&v| {
385                        if v == STEREO_H_SENTINEL {
386                            Some(STEREO_H_SENTINEL)
387                        } else if v as usize == removed {
388                            None // neighbor was the removed atom — stereo is now invalid
389                        } else {
390                            remap.get(v as usize).and_then(|r| r.map(|a| a.0))
391                        }
392                    })
393                    .collect();
394                builder.set_stereo_neighbor_order(*new_key, new_order);
395            }
396        }
397        (builder.build(), remap)
398    }
399
400    /// Implicit hydrogen count for atom `idx` based on valence rules.
401    ///
402    /// Delegates to [`crate::valence::implicit_hcount`].
403    pub fn implicit_hydrogen_count(&self, idx: AtomIdx) -> u8 {
404        crate::valence::implicit_hcount(self, idx)
405    }
406
407    /// Hill-order molecular formula including implicit hydrogens.
408    ///
409    /// Unlike [`Self::formula`] (which counts only explicit heavy atoms),
410    /// this method adds the implicit H count for every atom so the result
411    /// reflects the true molecular composition (e.g. methane → "CH4").
412    pub fn total_formula(&self) -> String {
413        use std::collections::BTreeMap;
414        let mut counts: BTreeMap<&str, u32> = BTreeMap::new();
415        let mut implicit_h: u32 = 0;
416        for (aidx, atom) in self.atoms() {
417            *counts.entry(atom.element.symbol()).or_insert(0) += 1;
418            implicit_h += crate::valence::implicit_hcount(self, aidx) as u32;
419        }
420        *counts.entry("H").or_insert(0) += implicit_h;
421        Self::format_hill_order_formula(&counts)
422    }
423
424    /// Hill-order molecular formula with isotope labels.
425    ///
426    /// Like [`Self::formula`] but prefixes each element symbol with its
427    /// isotope number when `atom.isotope` is `Some(n)`.
428    /// Example: a molecule with one `¹³C` and one `O` → `"¹³CO"`.
429    pub fn formula_with_isotopes(&self) -> String {
430        use std::collections::BTreeMap;
431        // Collect (isotope_prefix + symbol) counts, heavy atoms only.
432        let mut counts: BTreeMap<String, u32> = BTreeMap::new();
433        let mut has_carbon = false;
434        let mut has_explicit_h = false;
435        for (_, atom) in self.atoms() {
436            let sym = atom.element.symbol();
437            let key = match atom.isotope {
438                Some(n) => format!("{n}{sym}"),
439                None => sym.to_string(),
440            };
441            if sym == "C" && atom.isotope.is_none() {
442                has_carbon = true;
443            }
444            if sym == "H" {
445                has_explicit_h = true;
446            }
447            *counts.entry(key).or_insert(0) += 1;
448        }
449
450        let push_count = |key: &str, n: u32, out: &mut String| {
451            out.push_str(key);
452            if n > 1 {
453                out.push_str(&n.to_string());
454            }
455        };
456
457        let mut result = String::new();
458        // Hill order: C first (if unlabelled C present), then H, then rest alphabetically.
459        if has_carbon && let Some(c) = counts.remove("C") {
460            push_count("C", c, &mut result);
461        }
462        if has_explicit_h && let Some(h) = counts.remove("H") {
463            push_count("H", h, &mut result);
464        }
465        for (key, count) in &counts {
466            push_count(key, *count, &mut result);
467        }
468        result
469    }
470
471    /// Return a new `Molecule` with atom `idx`'s aromatic flag changed.
472    pub fn with_atom_aromatic(&self, idx: AtomIdx, aromatic: bool) -> Molecule {
473        let mut builder = MoleculeBuilder::new();
474        for (aidx, atom) in self.atoms() {
475            let mut a = atom.clone();
476            if aidx == idx {
477                a.aromatic = aromatic;
478            }
479            builder.add_atom(a);
480        }
481        for (_, bond) in self.bonds() {
482            let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
483        }
484        builder.copy_stereo_from(self);
485        builder.build()
486    }
487
488    /// Return a new `Molecule` with bond `idx`'s order changed.
489    pub fn with_bond_order(&self, idx: BondIdx, order: BondOrder) -> Molecule {
490        let mut builder = MoleculeBuilder::new();
491        for (_, atom) in self.atoms() {
492            builder.add_atom(atom.clone());
493        }
494        for (bidx, bond) in self.bonds() {
495            let o = if bidx == idx { order } else { bond.order };
496            let _ = builder.add_bond(bond.atom1, bond.atom2, o);
497        }
498        builder.copy_stereo_from(self);
499        builder.build()
500    }
501
502    /// Return a new `Molecule` with bond `idx` removed.
503    ///
504    /// Atom indices are unchanged.  Bond indices of survivors shift down.
505    pub fn with_bond_removed(&self, idx: BondIdx) -> Molecule {
506        let mut builder = MoleculeBuilder::new();
507        for (_, atom) in self.atoms() {
508            builder.add_atom(atom.clone());
509        }
510        for (bidx, bond) in self.bonds() {
511            if bidx == idx {
512                continue;
513            }
514            let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
515        }
516        builder.copy_stereo_from(self);
517        builder.build()
518    }
519}
520
521// ---------------------------------------------------------------------------
522// In-place mutation methods
523// ---------------------------------------------------------------------------
524
525impl Molecule {
526    /// Append a new atom and return its index.
527    pub fn add_atom(&mut self, atom: Atom) -> AtomIdx {
528        let idx = AtomIdx(self.atoms.len() as u32);
529        self.atoms.push(atom);
530        self.adjacency.push(vec![]);
531        idx
532    }
533
534    /// Remove atom `idx` and all bonds involving it.
535    ///
536    /// Returns a remapping table: `remap[old_idx]` gives the new `AtomIdx`
537    /// for surviving atoms, or `None` for the removed atom.  Atom indices
538    /// of atoms after the removed slot shift down by 1.
539    pub fn remove_atom(&mut self, idx: AtomIdx) -> Vec<Option<AtomIdx>> {
540        let n = self.atoms.len();
541        let removed = idx.0 as usize;
542
543        let mut remap: Vec<Option<AtomIdx>> = vec![None; n];
544        let mut new_pos = 0u32;
545        for (old, slot) in remap.iter_mut().enumerate() {
546            if old == removed {
547                continue;
548            }
549            *slot = Some(AtomIdx(new_pos));
550            new_pos += 1;
551        }
552
553        self.atoms.remove(removed);
554
555        // Keep only bonds not involving the removed atom; remap endpoints.
556        let mut new_bonds: Vec<BondEntry> = Vec::new();
557        for bond in &self.bonds {
558            if bond.atom1 == idx || bond.atom2 == idx {
559                continue;
560            }
561            if let (Some(a1), Some(a2)) =
562                (remap[bond.atom1.0 as usize], remap[bond.atom2.0 as usize])
563            {
564                new_bonds.push(BondEntry {
565                    atom1: a1,
566                    atom2: a2,
567                    order: bond.order,
568                });
569            }
570        }
571        self.bonds = new_bonds;
572
573        // Rebuild adjacency from scratch.
574        let new_n = self.atoms.len();
575        self.adjacency = vec![vec![]; new_n];
576        for (bidx, bond) in self.bonds.iter().enumerate() {
577            let bi = BondIdx(bidx as u32);
578            self.adjacency[bond.atom1.0 as usize].push((bond.atom2, bi));
579            self.adjacency[bond.atom2.0 as usize].push((bond.atom1, bi));
580        }
581
582        // Remap stereo neighbor order in-place.
583        let old_stereo = std::mem::take(&mut self.stereo_neighbor_order);
584        for (old_key, order) in old_stereo {
585            let old_atom = old_key as usize;
586            if old_atom == removed {
587                continue;
588            }
589            if let Some(Some(new_key)) = remap.get(old_atom) {
590                let new_order: Vec<u32> = order
591                    .iter()
592                    .filter_map(|&v| {
593                        if v == STEREO_H_SENTINEL {
594                            Some(STEREO_H_SENTINEL)
595                        } else if v as usize == removed {
596                            None
597                        } else {
598                            remap.get(v as usize).and_then(|r| r.map(|a| a.0))
599                        }
600                    })
601                    .collect();
602                self.stereo_neighbor_order.insert(new_key.0, new_order);
603            }
604        }
605
606        remap
607    }
608
609    /// Add a bond between `a` and `b` with the given `order`.
610    ///
611    /// Returns `Err` if `a == b` or the bond already exists.
612    pub fn add_bond(
613        &mut self,
614        a: AtomIdx,
615        b: AtomIdx,
616        order: BondOrder,
617    ) -> Result<BondIdx, MolError> {
618        let n = self.atoms.len() as u32;
619        if a.0 >= n {
620            return Err(MolError::InvalidAtomIdx(a));
621        }
622        if b.0 >= n {
623            return Err(MolError::InvalidAtomIdx(b));
624        }
625        if self.adjacency[a.0 as usize].iter().any(|&(nb, _)| nb == b) {
626            return Err(MolError::DuplicateBond(a, b));
627        }
628        let bidx = BondIdx(self.bonds.len() as u32);
629        self.bonds.push(BondEntry {
630            atom1: a,
631            atom2: b,
632            order,
633        });
634        self.adjacency[a.0 as usize].push((b, bidx));
635        self.adjacency[b.0 as usize].push((a, bidx));
636        Ok(bidx)
637    }
638
639    /// Remove bond `idx`.  Atom indices are unchanged; bond indices of
640    /// surviving bonds shift down past the removed slot.
641    pub fn remove_bond(&mut self, idx: BondIdx) {
642        let removed = idx.0 as usize;
643        if removed >= self.bonds.len() {
644            return;
645        }
646        self.bonds.remove(removed);
647        // Rebuild adjacency with renumbered bond indices.
648        let n = self.atoms.len();
649        self.adjacency = vec![vec![]; n];
650        for (bidx, bond) in self.bonds.iter().enumerate() {
651            let bi = BondIdx(bidx as u32);
652            self.adjacency[bond.atom1.0 as usize].push((bond.atom2, bi));
653            self.adjacency[bond.atom2.0 as usize].push((bond.atom1, bi));
654        }
655    }
656
657    /// Set the formal charge of atom `idx` in-place.
658    pub fn set_charge(&mut self, idx: AtomIdx, charge: i8) {
659        self.atoms[idx.0 as usize].charge = charge;
660    }
661
662    /// Set the element of atom `idx` in-place.
663    ///
664    /// Chirality and hydrogen count are reset (element-specific properties).
665    pub fn set_element(&mut self, idx: AtomIdx, el: Element) {
666        let a = &mut self.atoms[idx.0 as usize];
667        a.element = el;
668        a.chirality = crate::atom::Chirality::None;
669        a.hydrogen_count = None;
670        a.aromatic = false;
671    }
672
673    /// Set the CIP stereo code of atom `idx` in-place.
674    pub fn set_cip_code(&mut self, idx: AtomIdx, code: Option<crate::atom::CipCode>) {
675        self.atoms[idx.0 as usize].cip_code = code;
676    }
677
678    /// Return the enhanced stereo groups attached to this molecule.
679    pub fn stereo_groups(&self) -> &[StereoGroup] {
680        &self.stereo_groups
681    }
682
683    /// Replace the stereo group list in-place.
684    pub fn set_stereo_groups(&mut self, groups: Vec<StereoGroup>) {
685        self.stereo_groups = groups;
686    }
687
688    /// Add a single stereo group in-place.
689    pub fn add_stereo_group(&mut self, group: StereoGroup) {
690        self.stereo_groups.push(group);
691    }
692
693    /// SMILES-text-order neighbor sequence for a chiral atom.
694    ///
695    /// Returns `None` for atoms not parsed from SMILES or without stereo.
696    /// The slice contains neighbor atom indices in SMILES text order;
697    /// [`STEREO_H_SENTINEL`] (`u32::MAX`) marks the implicit bracket-H slot.
698    pub fn stereo_neighbor_order(&self, idx: AtomIdx) -> Option<&[u32]> {
699        self.stereo_neighbor_order.get(&idx.0).map(|v| v.as_slice())
700    }
701
702    /// Set the SMILES stereo neighbor order for atom `idx`.
703    pub fn set_stereo_neighbor_order(&mut self, idx: AtomIdx, order: Vec<u32>) {
704        self.stereo_neighbor_order.insert(idx.0, order);
705    }
706}
707
708// ---------------------------------------------------------------------------
709// Connectivity utilities
710// ---------------------------------------------------------------------------
711
712impl Molecule {
713    /// Return `true` if the molecule has exactly one connected component
714    /// (i.e. every atom can be reached from every other atom).
715    pub fn is_connected(&self) -> bool {
716        let n = self.atoms.len();
717        if n == 0 {
718            return true;
719        }
720        let mut visited = vec![false; n];
721        let mut stack = vec![AtomIdx(0)];
722        visited[0] = true;
723        let mut count = 1;
724        while let Some(cur) = stack.pop() {
725            for (nb, _) in self.neighbors(cur) {
726                if !visited[nb.0 as usize] {
727                    visited[nb.0 as usize] = true;
728                    count += 1;
729                    stack.push(nb);
730                }
731            }
732        }
733        count == n
734    }
735
736    /// Split the molecule into its connected components.
737    ///
738    /// Returns a `Vec` of sub-molecules, one per component.  Atoms are
739    /// renumbered within each sub-molecule starting at index 0.
740    pub fn fragments(&self) -> Vec<Molecule> {
741        let n = self.atoms.len();
742        if n == 0 {
743            return vec![];
744        }
745
746        let mut component: Vec<usize> = vec![usize::MAX; n];
747        let mut comp_id = 0;
748
749        for start in 0..n {
750            if component[start] != usize::MAX {
751                continue;
752            }
753            let mut stack = vec![start];
754            component[start] = comp_id;
755            while let Some(cur) = stack.pop() {
756                for (nb, _) in self.neighbors(AtomIdx(cur as u32)) {
757                    let ni = nb.0 as usize;
758                    if component[ni] == usize::MAX {
759                        component[ni] = comp_id;
760                        stack.push(ni);
761                    }
762                }
763            }
764            comp_id += 1;
765        }
766
767        (0..comp_id)
768            .map(|cid| {
769                let mut builder = MoleculeBuilder::new();
770                let mut old_to_new: std::collections::HashMap<AtomIdx, AtomIdx> =
771                    std::collections::HashMap::new();
772                for (aidx, atom) in self.atoms() {
773                    if component[aidx.0 as usize] == cid {
774                        let new_idx = builder.add_atom(atom.clone());
775                        old_to_new.insert(aidx, new_idx);
776                    }
777                }
778                for (_, bond) in self.bonds() {
779                    if let (Some(&a1), Some(&a2)) =
780                        (old_to_new.get(&bond.atom1), old_to_new.get(&bond.atom2))
781                    {
782                        let _ = builder.add_bond(a1, a2, bond.order);
783                    }
784                }
785                builder.build()
786            })
787            .collect()
788    }
789}
790
791/// Builder for constructing a [`Molecule`] incrementally.
792///
793/// Usage: add atoms, add bonds, then call `build()`.
794#[derive(Default)]
795pub struct MoleculeBuilder {
796    atoms: Vec<Atom>,
797    bonds: Vec<BondEntry>,
798    adjacency: Vec<Vec<(AtomIdx, BondIdx)>>,
799    stereo_groups: Vec<StereoGroup>,
800    stereo_neighbor_order: std::collections::HashMap<u32, Vec<u32>>,
801}
802
803impl MoleculeBuilder {
804    pub fn new() -> Self {
805        Self::default()
806    }
807
808    /// Create a builder pre-populated with all atoms and bonds from `mol`.
809    ///
810    /// Use this to make incremental edits to an existing molecule instead of
811    /// reconstructing it from scratch.
812    pub fn from_molecule(mol: &Molecule) -> Self {
813        let mut b = Self::new();
814        for (_, atom) in mol.atoms() {
815            b.add_atom(atom.clone());
816        }
817        for (_, bond) in mol.bonds() {
818            let _ = b.add_bond(bond.atom1, bond.atom2, bond.order);
819        }
820        b.stereo_groups = mol.stereo_groups.clone();
821        b.stereo_neighbor_order = mol.stereo_neighbor_order.clone();
822        b
823    }
824
825    /// Set the SMILES stereo neighbor order for atom `idx`.
826    pub fn set_stereo_neighbor_order(&mut self, idx: AtomIdx, order: Vec<u32>) {
827        self.stereo_neighbor_order.insert(idx.0, order);
828    }
829
830    /// Remove the stereo neighbor order entry for atom `idx`.
831    pub fn clear_stereo_neighbor_order(&mut self, idx: AtomIdx) {
832        self.stereo_neighbor_order.remove(&idx.0);
833    }
834
835    /// Append a stereo group to this builder.
836    pub fn add_stereo_group(&mut self, group: StereoGroup) {
837        self.stereo_groups.push(group);
838    }
839
840    /// Copy all stereo neighbor order entries from `mol` into this builder.
841    pub fn copy_stereo_from(&mut self, mol: &Molecule) {
842        self.stereo_neighbor_order = mol.stereo_neighbor_order.clone();
843    }
844
845    /// Read-only reference to an atom already added to the builder.
846    ///
847    /// Used by the SMILES parser to infer implicit bond types without
848    /// consuming the builder (e.g. aromatic-aromatic → Aromatic bond).
849    ///
850    /// # Panics
851    /// Panics if `idx` is out of range.
852    pub fn atom_at(&self, idx: AtomIdx) -> &Atom {
853        &self.atoms[idx.0 as usize]
854    }
855
856    /// Number of atoms added so far.
857    pub fn atom_count(&self) -> usize {
858        self.atoms.len()
859    }
860
861    /// Iterate over already-added neighbors of `idx` as `(bond_idx, neighbor_atom_idx)`.
862    /// Used by kekulization tests to check whether a bond already exists in the builder.
863    pub fn atom_neighbors(&self, idx: AtomIdx) -> impl Iterator<Item = (BondIdx, AtomIdx)> + '_ {
864        self.adjacency[idx.0 as usize]
865            .iter()
866            .map(|&(nb, bidx)| (bidx, nb))
867    }
868
869    /// Add an atom and return its index.
870    pub fn add_atom(&mut self, atom: Atom) -> AtomIdx {
871        let idx = AtomIdx(self.atoms.len() as u32);
872        self.atoms.push(atom);
873        self.adjacency.push(Vec::new());
874        idx
875    }
876
877    /// Add a bond between two existing atoms.
878    ///
879    /// Returns an error if either atom index is invalid or if the bond already exists.
880    pub fn add_bond(
881        &mut self,
882        a: AtomIdx,
883        b: AtomIdx,
884        order: BondOrder,
885    ) -> Result<BondIdx, MolError> {
886        let n = self.atoms.len() as u32;
887        if a.0 >= n {
888            return Err(MolError::InvalidAtomIdx(a));
889        }
890        if b.0 >= n {
891            return Err(MolError::InvalidAtomIdx(b));
892        }
893
894        // Check for duplicate
895        for &(nb, _) in &self.adjacency[a.0 as usize] {
896            if nb == b {
897                return Err(MolError::DuplicateBond(a, b));
898            }
899        }
900
901        let bidx = BondIdx(self.bonds.len() as u32);
902        self.bonds.push(BondEntry {
903            atom1: a,
904            atom2: b,
905            order,
906        });
907        self.adjacency[a.0 as usize].push((b, bidx));
908        self.adjacency[b.0 as usize].push((a, bidx));
909        Ok(bidx)
910    }
911
912    /// Consume the builder and return an immutable [`Molecule`].
913    pub fn build(self) -> Molecule {
914        Molecule {
915            atoms: self.atoms,
916            bonds: self.bonds,
917            adjacency: self.adjacency,
918            stereo_groups: self.stereo_groups,
919            stereo_neighbor_order: self.stereo_neighbor_order,
920        }
921    }
922}
923
924#[cfg(test)]
925mod tests {
926    use super::*;
927    use crate::atom::Atom;
928    use crate::element::Element;
929
930    fn ethane() -> Molecule {
931        let mut b = MoleculeBuilder::new();
932        let c1 = b.add_atom(Atom::new(Element::C));
933        let c2 = b.add_atom(Atom::new(Element::C));
934        b.add_bond(c1, c2, BondOrder::Single).unwrap();
935        b.build()
936    }
937
938    #[test]
939    fn test_basic_molecule() {
940        let mol = ethane();
941        assert_eq!(mol.atom_count(), 2);
942        assert_eq!(mol.bond_count(), 1);
943    }
944
945    #[test]
946    fn test_adjacency() {
947        let mol = ethane();
948        let neighbors: Vec<_> = mol.neighbors(AtomIdx(0)).collect();
949        assert_eq!(neighbors.len(), 1);
950        assert_eq!(neighbors[0].0, AtomIdx(1));
951    }
952
953    #[test]
954    fn test_bond_between() {
955        let mol = ethane();
956        assert!(mol.bond_between(AtomIdx(0), AtomIdx(1)).is_some());
957        assert!(mol.bond_between(AtomIdx(1), AtomIdx(0)).is_some());
958    }
959
960    #[test]
961    fn test_duplicate_bond_error() {
962        let mut b = MoleculeBuilder::new();
963        let c1 = b.add_atom(Atom::new(Element::C));
964        let c2 = b.add_atom(Atom::new(Element::C));
965        b.add_bond(c1, c2, BondOrder::Single).unwrap();
966        let err = b.add_bond(c1, c2, BondOrder::Double);
967        assert!(matches!(err, Err(MolError::DuplicateBond(_, _))));
968    }
969
970    #[test]
971    fn test_formula() {
972        let mut b = MoleculeBuilder::new();
973        let c = b.add_atom(Atom::new(Element::C));
974        let n = b.add_atom(Atom::new(Element::N));
975        b.add_bond(c, n, BondOrder::Single).unwrap();
976        let mol = b.build();
977        assert_eq!(mol.formula(), "CN");
978    }
979
980    #[test]
981    fn test_implicit_hydrogen_count() {
982        // Isolated C atom (sp3, 4 bonds available): 4 implicit H
983        let mut b = MoleculeBuilder::new();
984        b.add_atom(Atom::organic(Element::C));
985        let mol = b.build();
986        assert_eq!(mol.implicit_hydrogen_count(AtomIdx(0)), 4);
987    }
988
989    #[test]
990    fn test_total_formula_methane() {
991        // Organic C atom with 0 explicit bonds → 4 implicit H → CH4
992        let mut b = MoleculeBuilder::new();
993        b.add_atom(Atom::organic(Element::C));
994        let mol = b.build();
995        assert_eq!(mol.total_formula(), "CH4");
996    }
997
998    #[test]
999    fn test_total_formula_no_hydrogen() {
1000        // NaCl — neither Na nor Cl is in the organic subset, no implicit H
1001        let mut b = MoleculeBuilder::new();
1002        let na = b.add_atom(Atom::new(Element::NA));
1003        let cl = b.add_atom(Atom::new(Element::CL));
1004        b.add_bond(na, cl, BondOrder::Single).unwrap();
1005        let mol = b.build();
1006        assert_eq!(mol.total_formula(), "ClNa");
1007    }
1008
1009    #[test]
1010    fn test_with_atom_aromatic() {
1011        let mol = ethane();
1012        let updated = mol.with_atom_aromatic(AtomIdx(0), true);
1013        assert!(updated.atom(AtomIdx(0)).aromatic);
1014        assert!(!updated.atom(AtomIdx(1)).aromatic);
1015    }
1016
1017    #[test]
1018    fn test_with_bond_order() {
1019        let mol = ethane();
1020        let updated = mol.with_bond_order(BondIdx(0), BondOrder::Double);
1021        assert_eq!(updated.bond(BondIdx(0)).order, BondOrder::Double);
1022    }
1023
1024    // --- mutable API ---
1025
1026    #[test]
1027    fn test_add_remove_atom() {
1028        let mut mol = ethane();
1029        let n_idx = mol.add_atom(Atom::new(Element::N));
1030        assert_eq!(mol.atom_count(), 3);
1031        assert_eq!(mol.atom(n_idx).element.atomic_number(), 7);
1032
1033        let remap = mol.remove_atom(n_idx);
1034        assert_eq!(mol.atom_count(), 2);
1035        assert!(remap[n_idx.0 as usize].is_none());
1036    }
1037
1038    #[test]
1039    fn test_add_remove_bond() {
1040        let mut mol = ethane();
1041        let n_idx = mol.add_atom(Atom::new(Element::N));
1042        let bidx = mol.add_bond(AtomIdx(0), n_idx, BondOrder::Single).unwrap();
1043        assert_eq!(mol.bond_count(), 2);
1044        mol.remove_bond(bidx);
1045        assert_eq!(mol.bond_count(), 1);
1046    }
1047
1048    #[test]
1049    fn test_set_charge_element() {
1050        let mut mol = ethane();
1051        mol.set_charge(AtomIdx(0), 1);
1052        assert_eq!(mol.atom(AtomIdx(0)).charge, 1);
1053        mol.set_element(AtomIdx(0), Element::N);
1054        assert_eq!(mol.atom(AtomIdx(0)).element.atomic_number(), 7);
1055    }
1056
1057    #[test]
1058    fn test_is_connected() {
1059        let mol = ethane();
1060        assert!(mol.is_connected());
1061
1062        // Two separate atoms — disconnected
1063        let mut b = MoleculeBuilder::new();
1064        b.add_atom(Atom::new(Element::C));
1065        b.add_atom(Atom::new(Element::N));
1066        let disconnected = b.build();
1067        assert!(!disconnected.is_connected());
1068    }
1069
1070    #[test]
1071    fn test_fragments() {
1072        // "CC.N" — two components
1073        let mut b = MoleculeBuilder::new();
1074        let c1 = b.add_atom(Atom::organic(Element::C));
1075        let c2 = b.add_atom(Atom::organic(Element::C));
1076        b.add_bond(c1, c2, BondOrder::Single).unwrap();
1077        b.add_atom(Atom::new(Element::N)); // disconnected N
1078        let mol = b.build();
1079        let frags = mol.fragments();
1080        assert_eq!(frags.len(), 2);
1081        let sizes: std::collections::HashSet<usize> =
1082            frags.iter().map(|f| f.atom_count()).collect();
1083        assert!(sizes.contains(&2));
1084        assert!(sizes.contains(&1));
1085    }
1086
1087    #[test]
1088    fn test_builder_from_molecule() {
1089        let mol = ethane();
1090        let mut b = MoleculeBuilder::from_molecule(&mol);
1091        b.add_atom(Atom::new(Element::O));
1092        let mol2 = b.build();
1093        assert_eq!(mol2.atom_count(), 3);
1094        assert_eq!(mol2.bond_count(), 1); // original bond preserved
1095    }
1096
1097    // --- safe Option-returning variants ---
1098
1099    #[test]
1100    fn test_atom_opt_valid() {
1101        let mol = ethane();
1102        assert!(mol.atom_opt(AtomIdx(0)).is_some());
1103        assert!(mol.atom_opt(AtomIdx(1)).is_some());
1104        let atom = mol.atom_opt(AtomIdx(0)).unwrap();
1105        assert_eq!(atom.element.atomic_number(), 6);
1106    }
1107
1108    #[test]
1109    fn test_atom_opt_invalid() {
1110        let mol = ethane();
1111        assert!(mol.atom_opt(AtomIdx(2)).is_none());
1112        assert!(mol.atom_opt(AtomIdx(1000)).is_none());
1113    }
1114
1115    #[test]
1116    fn test_bond_opt_valid() {
1117        let mol = ethane();
1118        assert!(mol.bond_opt(BondIdx(0)).is_some());
1119        let bond = mol.bond_opt(BondIdx(0)).unwrap();
1120        assert_eq!(bond.order, BondOrder::Single);
1121    }
1122
1123    #[test]
1124    fn test_bond_opt_invalid() {
1125        let mol = ethane();
1126        assert!(mol.bond_opt(BondIdx(1)).is_none());
1127        assert!(mol.bond_opt(BondIdx(1000)).is_none());
1128    }
1129
1130    #[test]
1131    fn test_neighbors_opt_valid() {
1132        let mol = ethane();
1133        let neighbors = mol.neighbors_opt(AtomIdx(0)).unwrap();
1134        assert_eq!(neighbors.len(), 1);
1135        assert_eq!(neighbors[0].0, AtomIdx(1));
1136    }
1137
1138    #[test]
1139    fn test_neighbors_opt_isolated_atom() {
1140        let mut b = MoleculeBuilder::new();
1141        b.add_atom(Atom::new(Element::C));
1142        b.add_atom(Atom::new(Element::N));
1143        let mol = b.build();
1144        let neighbors = mol.neighbors_opt(AtomIdx(0)).unwrap();
1145        assert_eq!(neighbors.len(), 0);
1146    }
1147
1148    #[test]
1149    fn test_neighbors_opt_invalid() {
1150        let mol = ethane();
1151        assert!(mol.neighbors_opt(AtomIdx(2)).is_none());
1152        assert!(mol.neighbors_opt(AtomIdx(1000)).is_none());
1153    }
1154
1155    #[test]
1156    fn test_degree_opt_valid() {
1157        let mol = ethane();
1158        assert_eq!(mol.degree_opt(AtomIdx(0)), Some(1));
1159        assert_eq!(mol.degree_opt(AtomIdx(1)), Some(1));
1160    }
1161
1162    #[test]
1163    fn test_degree_opt_isolated_atom() {
1164        let mut b = MoleculeBuilder::new();
1165        b.add_atom(Atom::new(Element::C));
1166        b.add_atom(Atom::new(Element::N));
1167        let mol = b.build();
1168        assert_eq!(mol.degree_opt(AtomIdx(0)), Some(0));
1169        assert_eq!(mol.degree_opt(AtomIdx(1)), Some(0));
1170    }
1171
1172    #[test]
1173    fn test_degree_opt_invalid() {
1174        let mol = ethane();
1175        assert!(mol.degree_opt(AtomIdx(2)).is_none());
1176        assert!(mol.degree_opt(AtomIdx(1000)).is_none());
1177    }
1178
1179    #[test]
1180    fn test_degree_opt_multiple_bonds() {
1181        // Create a central atom with 3 neighbors
1182        let mut b = MoleculeBuilder::new();
1183        let center = b.add_atom(Atom::new(Element::C));
1184        let n1 = b.add_atom(Atom::new(Element::C));
1185        let n2 = b.add_atom(Atom::new(Element::N));
1186        let n3 = b.add_atom(Atom::new(Element::O));
1187        b.add_bond(center, n1, BondOrder::Single).unwrap();
1188        b.add_bond(center, n2, BondOrder::Double).unwrap();
1189        b.add_bond(center, n3, BondOrder::Single).unwrap();
1190        let mol = b.build();
1191        assert_eq!(mol.degree_opt(center), Some(3));
1192        assert_eq!(mol.degree_opt(n1), Some(1));
1193        assert_eq!(mol.degree_opt(n2), Some(1));
1194        assert_eq!(mol.degree_opt(n3), Some(1));
1195    }
1196}