Skip to main content

chematic_core/
molecule.rs

1//! Molecule graph: atoms, bonds, and adjacency list.
2
3use crate::atom::Atom;
4use crate::bond::{BondEntry, BondOrder};
5use crate::element::Element;
6use crate::stereo_group::StereoGroup;
7
8/// Newtype index for an atom in a Molecule.
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
10pub struct AtomIdx(pub u32);
11
12/// Newtype index for a bond in a Molecule.
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
14pub struct BondIdx(pub u32);
15
16/// Error types for molecule construction.
17#[derive(Debug, Clone, PartialEq, Eq)]
18pub enum MolError {
19    /// Atom index out of range.
20    InvalidAtomIdx(AtomIdx),
21    /// Duplicate bond between the same pair of atoms.
22    DuplicateBond(AtomIdx, AtomIdx),
23}
24
25impl core::fmt::Display for MolError {
26    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
27        match self {
28            Self::InvalidAtomIdx(idx) => write!(f, "invalid atom index: {}", idx.0),
29            Self::DuplicateBond(a, b) => {
30                write!(f, "duplicate bond between atoms {} and {}", a.0, b.0)
31            }
32        }
33    }
34}
35
36impl std::error::Error for MolError {}
37
38/// An immutable molecular graph built via [`MoleculeBuilder`].
39///
40/// Representation: atom list + bond list + per-atom adjacency list.
41/// No external graph library is used; all graph traversal is domain-aware.
42/// Sentinel used in `stereo_neighbor_order` to represent the implicit H in a bracket atom.
43pub const STEREO_H_SENTINEL: u32 = u32::MAX;
44
45pub struct Molecule {
46    atoms: Vec<Atom>,
47    bonds: Vec<BondEntry>,
48    /// adjacency[atom_idx] = list of (neighbor_atom_idx, bond_idx)
49    adjacency: Vec<Vec<(AtomIdx, BondIdx)>>,
50    /// Enhanced stereo groups (ChemDraw V3000 Absolute / Or / And).
51    stereo_groups: Vec<StereoGroup>,
52    /// SMILES-text-order neighbor sequence for chiral atoms.
53    ///
54    /// Keyed by atom index.  Each value lists the atom indices of neighbors in
55    /// the order they appeared in the SMILES string (including ring-closure
56    /// partners), with [`STEREO_H_SENTINEL`] (`u32::MAX`) standing in for the
57    /// implicit bracket H.  Populated by the SMILES parser; absent for atoms
58    /// not parsed from SMILES or without recorded stereo.
59    stereo_neighbor_order: std::collections::HashMap<u32, Vec<u32>>,
60}
61
62impl Molecule {
63    /// Number of heavy atoms (does not count implicit H).
64    pub fn atom_count(&self) -> usize {
65        self.atoms.len()
66    }
67
68    /// Number of bonds (edges).
69    pub fn bond_count(&self) -> usize {
70        self.bonds.len()
71    }
72
73    /// Borrow atom by index.
74    ///
75    /// # Panics
76    /// Panics if `idx` is out of range (should not happen with indices from this molecule).
77    ///
78    /// For a non-panicking variant, use [`Self::atom_opt`].
79    pub fn atom(&self, idx: AtomIdx) -> &Atom {
80        let i = idx.0 as usize;
81        if i >= self.atoms.len() {
82            panic!("atom index {} out of range (molecule has {} atoms)", idx.0, self.atoms.len());
83        }
84        &self.atoms[i]
85    }
86
87    /// Borrow atom by index, returning `None` if out of range.
88    pub fn atom_opt(&self, idx: AtomIdx) -> Option<&Atom> {
89        let i = idx.0 as usize;
90        if i < self.atoms.len() {
91            Some(&self.atoms[i])
92        } else {
93            None
94        }
95    }
96
97    /// Borrow bond by index.
98    ///
99    /// # Panics
100    /// Panics if `idx` is out of range (should not happen with indices from this molecule).
101    ///
102    /// For a non-panicking variant, use [`Self::bond_opt`].
103    pub fn bond(&self, idx: BondIdx) -> &BondEntry {
104        let i = idx.0 as usize;
105        if i >= self.bonds.len() {
106            panic!("bond index {} out of range (molecule has {} bonds)", idx.0, self.bonds.len());
107        }
108        &self.bonds[i]
109    }
110
111    /// Borrow bond by index, returning `None` if out of range.
112    pub fn bond_opt(&self, idx: BondIdx) -> Option<&BondEntry> {
113        let i = idx.0 as usize;
114        if i < self.bonds.len() {
115            Some(&self.bonds[i])
116        } else {
117            None
118        }
119    }
120
121    /// Iterate over all atoms as `(AtomIdx, &Atom)`.
122    pub fn atoms(&self) -> impl Iterator<Item = (AtomIdx, &Atom)> {
123        self.atoms
124            .iter()
125            .enumerate()
126            .map(|(i, a)| (AtomIdx(i as u32), a))
127    }
128
129    /// Iterate over all bonds as `(BondIdx, &BondEntry)`.
130    pub fn bonds(&self) -> impl Iterator<Item = (BondIdx, &BondEntry)> {
131        self.bonds
132            .iter()
133            .enumerate()
134            .map(|(i, b)| (BondIdx(i as u32), b))
135    }
136
137    /// Iterate over neighbors of `idx` as `(neighbor_atom_idx, bond_idx)`.
138    ///
139    /// # Panics
140    /// Panics if `idx` is out of range (should not happen with indices from this molecule).
141    ///
142    /// For a non-panicking variant, use [`Self::neighbors_opt`].
143    pub fn neighbors(&self, idx: AtomIdx) -> impl Iterator<Item = (AtomIdx, BondIdx)> + '_ {
144        let i = idx.0 as usize;
145        if i >= self.adjacency.len() {
146            panic!("atom index {} out of range (molecule has {} atoms)", idx.0, self.adjacency.len());
147        }
148        self.adjacency[i].iter().copied()
149    }
150
151    /// Iterate over neighbors of `idx` as `(neighbor_atom_idx, bond_idx)`, returning `None` if out of range.
152    pub fn neighbors_opt(&self, idx: AtomIdx) -> Option<Vec<(AtomIdx, BondIdx)>> {
153        let i = idx.0 as usize;
154        if i < self.adjacency.len() {
155            Some(self.adjacency[i].to_vec())
156        } else {
157            None
158        }
159    }
160
161    /// Degree (number of connected bonds) of atom `idx`.
162    ///
163    /// # Panics
164    /// Panics if `idx` is out of range (should not happen with indices from this molecule).
165    ///
166    /// For a non-panicking variant, use [`Self::degree_opt`].
167    pub fn degree(&self, idx: AtomIdx) -> usize {
168        let i = idx.0 as usize;
169        if i >= self.adjacency.len() {
170            panic!("atom index {} out of range (molecule has {} atoms)", idx.0, self.adjacency.len());
171        }
172        self.adjacency[i].len()
173    }
174
175    /// Degree (number of connected bonds) of atom `idx`, returning `None` if out of range.
176    pub fn degree_opt(&self, idx: AtomIdx) -> Option<usize> {
177        let i = idx.0 as usize;
178        if i < self.adjacency.len() {
179            Some(self.adjacency[i].len())
180        } else {
181            None
182        }
183    }
184
185    /// Return the bond between `a` and `b`, or `None` if not connected or indices are out of bounds.
186    pub fn bond_between(&self, a: AtomIdx, b: AtomIdx) -> Option<(BondIdx, &BondEntry)> {
187        let a_idx = a.0 as usize;
188        let b_idx = b.0 as usize;
189        if a_idx >= self.adjacency.len() || b_idx >= self.atoms.len() {
190            return None;
191        }
192        self.adjacency[a_idx]
193            .iter()
194            .find(|&&(nb, _)| nb == b)
195            .and_then(|&(_, bidx)| {
196                let bond_idx = bidx.0 as usize;
197                if bond_idx < self.bonds.len() {
198                    Some((bidx, &self.bonds[bond_idx]))
199                } else {
200                    None
201                }
202            })
203    }
204
205    /// Molecular formula as a Hill-order string (C first, H second, then alphabetical).
206    pub fn formula(&self) -> String {
207        use std::collections::BTreeMap;
208        let mut counts: BTreeMap<&str, u32> = BTreeMap::new();
209        for (_, atom) in self.atoms() {
210            *counts.entry(atom.element.symbol()).or_insert(0) += 1;
211        }
212        let mut result = Self::format_hill_order_formula(&counts);
213        let total_charge: i32 = self.atoms().map(|(_, a)| a.charge as i32).sum();
214        match total_charge {
215            0 => {}
216            1 => result.push('+'),
217            -1 => result.push('-'),
218            n if n > 0 => result.push_str(&format!("+{n}")),
219            n => result.push_str(&n.to_string()),
220        }
221        result
222    }
223}
224
225// ---------------------------------------------------------------------------
226// Immutable update methods (functional-style editing)
227// ---------------------------------------------------------------------------
228
229impl Molecule {
230    /// Format element counts in Hill order: C, H, then alphabetically.
231    fn format_hill_order_formula(counts: &std::collections::BTreeMap<&str, u32>) -> String {
232        let mut counts = counts.clone();
233        let mut result = String::new();
234        let push_count = |sym: &str, n: u32, out: &mut String| {
235            out.push_str(sym);
236            if n > 1 {
237                out.push_str(&n.to_string());
238            }
239        };
240        if let Some(c) = counts.remove("C") {
241            push_count("C", c, &mut result);
242        }
243        if let Some(h) = counts.remove("H")
244            && h > 0
245        {
246            push_count("H", h, &mut result);
247        }
248        for (sym, count) in &counts {
249            push_count(sym, *count, &mut result);
250        }
251        result
252    }
253
254    /// Return a new `Molecule` with one extra atom appended, along with the
255    /// index that the new atom will have in the returned molecule.
256    pub fn with_atom_added(&self, atom: Atom) -> (Molecule, AtomIdx) {
257        let mut builder = MoleculeBuilder::from_molecule(self);
258        let new_idx = builder.add_atom(atom);
259        (builder.build(), new_idx)
260    }
261
262    /// Return a new `Molecule` with one extra bond added, along with the index
263    /// of the newly added bond in the returned molecule.
264    ///
265    /// Returns `Err` if `a == b` or the bond already exists (same semantics as
266    /// [`MoleculeBuilder::add_bond`]).
267    pub fn with_bond_added(
268        &self,
269        a: AtomIdx,
270        b: AtomIdx,
271        order: BondOrder,
272    ) -> Result<(Molecule, BondIdx), MolError> {
273        let mut builder = MoleculeBuilder::from_molecule(self);
274        let bond_idx = builder.add_bond(a, b, order)?;
275        Ok((builder.build(), bond_idx))
276    }
277
278    /// Return a new `Molecule` with the formal charge of atom `idx` changed.
279    pub fn with_atom_charge(&self, idx: AtomIdx, charge: i8) -> Molecule {
280        let mut builder = MoleculeBuilder::new();
281        for (aidx, atom) in self.atoms() {
282            let mut a = atom.clone();
283            if aidx == idx {
284                a.charge = charge;
285            }
286            builder.add_atom(a);
287        }
288        for (_, bond) in self.bonds() {
289            let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
290        }
291        builder.copy_stereo_from(self);
292        builder.build()
293    }
294
295    /// Return a new `Molecule` with the element of atom `idx` changed.
296    ///
297    /// Chirality and hydrogen count are reset to `None` when the element
298    /// changes, since those properties are element-specific.
299    pub fn with_atom_element(&self, idx: AtomIdx, el: Element) -> Molecule {
300        let mut builder = MoleculeBuilder::new();
301        for (aidx, atom) in self.atoms() {
302            let mut a = atom.clone();
303            if aidx == idx {
304                a.element = el;
305                // Reset element-specific fields so valence stays consistent.
306                a.chirality = crate::atom::Chirality::None;
307                a.hydrogen_count = None;
308                a.aromatic = false;
309            }
310            builder.add_atom(a);
311        }
312        for (_, bond) in self.bonds() {
313            let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
314        }
315        builder.copy_stereo_from(self);
316        // Chirality was cleared for the changed atom; remove its stereo order too.
317        builder.clear_stereo_neighbor_order(idx);
318        builder.build()
319    }
320
321    /// Return a new `Molecule` with atom `idx` and all bonds involving it
322    /// removed.  Atom indices of survivors shift down past the removed slot.
323    ///
324    /// The returned tuple also includes a mapping from **old** `AtomIdx` to
325    /// **new** `AtomIdx` (indices that fall below `idx` are unchanged; indices
326    /// above `idx` decrease by 1).
327    pub fn with_atom_removed(&self, idx: AtomIdx) -> (Molecule, Vec<Option<AtomIdx>>) {
328        let n = self.atom_count();
329        let removed = idx.0 as usize;
330
331        // Build old→new index table.
332        let mut remap: Vec<Option<AtomIdx>> = vec![None; n];
333        let mut new_pos = 0u32;
334        for (old, slot) in remap.iter_mut().enumerate() {
335            if old == removed {
336                continue;
337            }
338            *slot = Some(AtomIdx(new_pos));
339            new_pos += 1;
340        }
341
342        let mut builder = MoleculeBuilder::new();
343        for (aidx, atom) in self.atoms() {
344            if aidx == idx {
345                continue;
346            }
347            builder.add_atom(atom.clone());
348        }
349        for (_, bond) in self.bonds() {
350            if bond.atom1 == idx || bond.atom2 == idx {
351                continue;
352            }
353            if let (Some(a1), Some(a2)) =
354                (remap[bond.atom1.0 as usize], remap[bond.atom2.0 as usize])
355            {
356                let _ = builder.add_bond(a1, a2, bond.order);
357            }
358        }
359        // Remap stereo neighbor order: drop removed atom's entry, remap neighbor indices.
360        for (old_key, order) in &self.stereo_neighbor_order {
361            let old_atom = *old_key as usize;
362            if old_atom == removed {
363                continue; // removed atom's stereo is gone
364            }
365            if let Some(Some(new_key)) = remap.get(old_atom) {
366                let new_order: Vec<u32> = order
367                    .iter()
368                    .filter_map(|&v| {
369                        if v == STEREO_H_SENTINEL {
370                            Some(STEREO_H_SENTINEL)
371                        } else if v as usize == removed {
372                            None // neighbor was the removed atom — stereo is now invalid
373                        } else {
374                            remap.get(v as usize).and_then(|r| r.map(|a| a.0))
375                        }
376                    })
377                    .collect();
378                builder.set_stereo_neighbor_order(*new_key, new_order);
379            }
380        }
381        (builder.build(), remap)
382    }
383
384    /// Implicit hydrogen count for atom `idx` based on valence rules.
385    ///
386    /// Delegates to [`crate::valence::implicit_hcount`].
387    pub fn implicit_hydrogen_count(&self, idx: AtomIdx) -> u8 {
388        crate::valence::implicit_hcount(self, idx)
389    }
390
391    /// Hill-order molecular formula including implicit hydrogens.
392    ///
393    /// Unlike [`Self::formula`] (which counts only explicit heavy atoms),
394    /// this method adds the implicit H count for every atom so the result
395    /// reflects the true molecular composition (e.g. methane → "CH4").
396    pub fn total_formula(&self) -> String {
397        use std::collections::BTreeMap;
398        let mut counts: BTreeMap<&str, u32> = BTreeMap::new();
399        let mut implicit_h: u32 = 0;
400        for (aidx, atom) in self.atoms() {
401            *counts.entry(atom.element.symbol()).or_insert(0) += 1;
402            implicit_h += crate::valence::implicit_hcount(self, aidx) as u32;
403        }
404        *counts.entry("H").or_insert(0) += implicit_h;
405        Self::format_hill_order_formula(&counts)
406    }
407
408    /// Hill-order molecular formula with isotope labels.
409    ///
410    /// Like [`Self::formula`] but prefixes each element symbol with its
411    /// isotope number when `atom.isotope` is `Some(n)`.
412    /// Example: a molecule with one `¹³C` and one `O` → `"¹³CO"`.
413    pub fn formula_with_isotopes(&self) -> String {
414        use std::collections::BTreeMap;
415        // Collect (isotope_prefix + symbol) counts, heavy atoms only.
416        let mut counts: BTreeMap<String, u32> = BTreeMap::new();
417        let mut has_carbon = false;
418        let mut has_explicit_h = false;
419        for (_, atom) in self.atoms() {
420            let sym = atom.element.symbol();
421            let key = match atom.isotope {
422                Some(n) => format!("{n}{sym}"),
423                None => sym.to_string(),
424            };
425            if sym == "C" && atom.isotope.is_none() {
426                has_carbon = true;
427            }
428            if sym == "H" {
429                has_explicit_h = true;
430            }
431            *counts.entry(key).or_insert(0) += 1;
432        }
433
434        let push_count = |key: &str, n: u32, out: &mut String| {
435            out.push_str(key);
436            if n > 1 {
437                out.push_str(&n.to_string());
438            }
439        };
440
441        let mut result = String::new();
442        // Hill order: C first (if unlabelled C present), then H, then rest alphabetically.
443        if has_carbon && let Some(c) = counts.remove("C") {
444            push_count("C", c, &mut result);
445        }
446        if has_explicit_h && let Some(h) = counts.remove("H") {
447            push_count("H", h, &mut result);
448        }
449        for (key, count) in &counts {
450            push_count(key, *count, &mut result);
451        }
452        result
453    }
454
455    /// Return a new `Molecule` with atom `idx`'s aromatic flag changed.
456    pub fn with_atom_aromatic(&self, idx: AtomIdx, aromatic: bool) -> Molecule {
457        let mut builder = MoleculeBuilder::new();
458        for (aidx, atom) in self.atoms() {
459            let mut a = atom.clone();
460            if aidx == idx {
461                a.aromatic = aromatic;
462            }
463            builder.add_atom(a);
464        }
465        for (_, bond) in self.bonds() {
466            let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
467        }
468        builder.copy_stereo_from(self);
469        builder.build()
470    }
471
472    /// Return a new `Molecule` with bond `idx`'s order changed.
473    pub fn with_bond_order(&self, idx: BondIdx, order: BondOrder) -> Molecule {
474        let mut builder = MoleculeBuilder::new();
475        for (_, atom) in self.atoms() {
476            builder.add_atom(atom.clone());
477        }
478        for (bidx, bond) in self.bonds() {
479            let o = if bidx == idx { order } else { bond.order };
480            let _ = builder.add_bond(bond.atom1, bond.atom2, o);
481        }
482        builder.copy_stereo_from(self);
483        builder.build()
484    }
485
486    /// Return a new `Molecule` with bond `idx` removed.
487    ///
488    /// Atom indices are unchanged.  Bond indices of survivors shift down.
489    pub fn with_bond_removed(&self, idx: BondIdx) -> Molecule {
490        let mut builder = MoleculeBuilder::new();
491        for (_, atom) in self.atoms() {
492            builder.add_atom(atom.clone());
493        }
494        for (bidx, bond) in self.bonds() {
495            if bidx == idx {
496                continue;
497            }
498            let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
499        }
500        builder.copy_stereo_from(self);
501        builder.build()
502    }
503}
504
505// ---------------------------------------------------------------------------
506// In-place mutation methods
507// ---------------------------------------------------------------------------
508
509impl Molecule {
510    /// Append a new atom and return its index.
511    pub fn add_atom(&mut self, atom: Atom) -> AtomIdx {
512        let idx = AtomIdx(self.atoms.len() as u32);
513        self.atoms.push(atom);
514        self.adjacency.push(vec![]);
515        idx
516    }
517
518    /// Remove atom `idx` and all bonds involving it.
519    ///
520    /// Returns a remapping table: `remap[old_idx]` gives the new `AtomIdx`
521    /// for surviving atoms, or `None` for the removed atom.  Atom indices
522    /// of atoms after the removed slot shift down by 1.
523    pub fn remove_atom(&mut self, idx: AtomIdx) -> Vec<Option<AtomIdx>> {
524        let n = self.atoms.len();
525        let removed = idx.0 as usize;
526
527        let mut remap: Vec<Option<AtomIdx>> = vec![None; n];
528        let mut new_pos = 0u32;
529        for (old, slot) in remap.iter_mut().enumerate() {
530            if old == removed {
531                continue;
532            }
533            *slot = Some(AtomIdx(new_pos));
534            new_pos += 1;
535        }
536
537        self.atoms.remove(removed);
538
539        // Keep only bonds not involving the removed atom; remap endpoints.
540        let mut new_bonds: Vec<BondEntry> = Vec::new();
541        for bond in &self.bonds {
542            if bond.atom1 == idx || bond.atom2 == idx {
543                continue;
544            }
545            if let (Some(a1), Some(a2)) =
546                (remap[bond.atom1.0 as usize], remap[bond.atom2.0 as usize])
547            {
548                new_bonds.push(BondEntry {
549                    atom1: a1,
550                    atom2: a2,
551                    order: bond.order,
552                });
553            }
554        }
555        self.bonds = new_bonds;
556
557        // Rebuild adjacency from scratch.
558        let new_n = self.atoms.len();
559        self.adjacency = vec![vec![]; new_n];
560        for (bidx, bond) in self.bonds.iter().enumerate() {
561            let bi = BondIdx(bidx as u32);
562            self.adjacency[bond.atom1.0 as usize].push((bond.atom2, bi));
563            self.adjacency[bond.atom2.0 as usize].push((bond.atom1, bi));
564        }
565
566        // Remap stereo neighbor order in-place.
567        let old_stereo = std::mem::take(&mut self.stereo_neighbor_order);
568        for (old_key, order) in old_stereo {
569            let old_atom = old_key as usize;
570            if old_atom == removed {
571                continue;
572            }
573            if let Some(Some(new_key)) = remap.get(old_atom) {
574                let new_order: Vec<u32> = order
575                    .iter()
576                    .filter_map(|&v| {
577                        if v == STEREO_H_SENTINEL {
578                            Some(STEREO_H_SENTINEL)
579                        } else if v as usize == removed {
580                            None
581                        } else {
582                            remap.get(v as usize).and_then(|r| r.map(|a| a.0))
583                        }
584                    })
585                    .collect();
586                self.stereo_neighbor_order.insert(new_key.0, new_order);
587            }
588        }
589
590        remap
591    }
592
593    /// Add a bond between `a` and `b` with the given `order`.
594    ///
595    /// Returns `Err` if `a == b` or the bond already exists.
596    pub fn add_bond(
597        &mut self,
598        a: AtomIdx,
599        b: AtomIdx,
600        order: BondOrder,
601    ) -> Result<BondIdx, MolError> {
602        let n = self.atoms.len() as u32;
603        if a.0 >= n {
604            return Err(MolError::InvalidAtomIdx(a));
605        }
606        if b.0 >= n {
607            return Err(MolError::InvalidAtomIdx(b));
608        }
609        if self.adjacency[a.0 as usize].iter().any(|&(nb, _)| nb == b) {
610            return Err(MolError::DuplicateBond(a, b));
611        }
612        let bidx = BondIdx(self.bonds.len() as u32);
613        self.bonds.push(BondEntry {
614            atom1: a,
615            atom2: b,
616            order,
617        });
618        self.adjacency[a.0 as usize].push((b, bidx));
619        self.adjacency[b.0 as usize].push((a, bidx));
620        Ok(bidx)
621    }
622
623    /// Remove bond `idx`.  Atom indices are unchanged; bond indices of
624    /// surviving bonds shift down past the removed slot.
625    pub fn remove_bond(&mut self, idx: BondIdx) {
626        let removed = idx.0 as usize;
627        if removed >= self.bonds.len() {
628            return;
629        }
630        self.bonds.remove(removed);
631        // Rebuild adjacency with renumbered bond indices.
632        let n = self.atoms.len();
633        self.adjacency = vec![vec![]; n];
634        for (bidx, bond) in self.bonds.iter().enumerate() {
635            let bi = BondIdx(bidx as u32);
636            self.adjacency[bond.atom1.0 as usize].push((bond.atom2, bi));
637            self.adjacency[bond.atom2.0 as usize].push((bond.atom1, bi));
638        }
639    }
640
641    /// Set the formal charge of atom `idx` in-place.
642    pub fn set_charge(&mut self, idx: AtomIdx, charge: i8) {
643        self.atoms[idx.0 as usize].charge = charge;
644    }
645
646    /// Set the element of atom `idx` in-place.
647    ///
648    /// Chirality and hydrogen count are reset (element-specific properties).
649    pub fn set_element(&mut self, idx: AtomIdx, el: Element) {
650        let a = &mut self.atoms[idx.0 as usize];
651        a.element = el;
652        a.chirality = crate::atom::Chirality::None;
653        a.hydrogen_count = None;
654        a.aromatic = false;
655    }
656
657    /// Set the CIP stereo code of atom `idx` in-place.
658    pub fn set_cip_code(&mut self, idx: AtomIdx, code: Option<crate::atom::CipCode>) {
659        self.atoms[idx.0 as usize].cip_code = code;
660    }
661
662    /// Return the enhanced stereo groups attached to this molecule.
663    pub fn stereo_groups(&self) -> &[StereoGroup] {
664        &self.stereo_groups
665    }
666
667    /// Replace the stereo group list in-place.
668    pub fn set_stereo_groups(&mut self, groups: Vec<StereoGroup>) {
669        self.stereo_groups = groups;
670    }
671
672    /// Add a single stereo group in-place.
673    pub fn add_stereo_group(&mut self, group: StereoGroup) {
674        self.stereo_groups.push(group);
675    }
676
677    /// SMILES-text-order neighbor sequence for a chiral atom.
678    ///
679    /// Returns `None` for atoms not parsed from SMILES or without stereo.
680    /// The slice contains neighbor atom indices in SMILES text order;
681    /// [`STEREO_H_SENTINEL`] (`u32::MAX`) marks the implicit bracket-H slot.
682    pub fn stereo_neighbor_order(&self, idx: AtomIdx) -> Option<&[u32]> {
683        self.stereo_neighbor_order
684            .get(&idx.0)
685            .map(|v| v.as_slice())
686    }
687
688    /// Set the SMILES stereo neighbor order for atom `idx`.
689    pub fn set_stereo_neighbor_order(&mut self, idx: AtomIdx, order: Vec<u32>) {
690        self.stereo_neighbor_order.insert(idx.0, order);
691    }
692}
693
694// ---------------------------------------------------------------------------
695// Connectivity utilities
696// ---------------------------------------------------------------------------
697
698impl Molecule {
699    /// Return `true` if the molecule has exactly one connected component
700    /// (i.e. every atom can be reached from every other atom).
701    pub fn is_connected(&self) -> bool {
702        let n = self.atoms.len();
703        if n == 0 {
704            return true;
705        }
706        let mut visited = vec![false; n];
707        let mut stack = vec![AtomIdx(0)];
708        visited[0] = true;
709        let mut count = 1;
710        while let Some(cur) = stack.pop() {
711            for (nb, _) in self.neighbors(cur) {
712                if !visited[nb.0 as usize] {
713                    visited[nb.0 as usize] = true;
714                    count += 1;
715                    stack.push(nb);
716                }
717            }
718        }
719        count == n
720    }
721
722    /// Split the molecule into its connected components.
723    ///
724    /// Returns a `Vec` of sub-molecules, one per component.  Atoms are
725    /// renumbered within each sub-molecule starting at index 0.
726    pub fn fragments(&self) -> Vec<Molecule> {
727        let n = self.atoms.len();
728        if n == 0 {
729            return vec![];
730        }
731
732        let mut component: Vec<usize> = vec![usize::MAX; n];
733        let mut comp_id = 0;
734
735        for start in 0..n {
736            if component[start] != usize::MAX {
737                continue;
738            }
739            let mut stack = vec![start];
740            component[start] = comp_id;
741            while let Some(cur) = stack.pop() {
742                for (nb, _) in self.neighbors(AtomIdx(cur as u32)) {
743                    let ni = nb.0 as usize;
744                    if component[ni] == usize::MAX {
745                        component[ni] = comp_id;
746                        stack.push(ni);
747                    }
748                }
749            }
750            comp_id += 1;
751        }
752
753        (0..comp_id)
754            .map(|cid| {
755                let mut builder = MoleculeBuilder::new();
756                let mut old_to_new: std::collections::HashMap<AtomIdx, AtomIdx> =
757                    std::collections::HashMap::new();
758                for (aidx, atom) in self.atoms() {
759                    if component[aidx.0 as usize] == cid {
760                        let new_idx = builder.add_atom(atom.clone());
761                        old_to_new.insert(aidx, new_idx);
762                    }
763                }
764                for (_, bond) in self.bonds() {
765                    if let (Some(&a1), Some(&a2)) =
766                        (old_to_new.get(&bond.atom1), old_to_new.get(&bond.atom2))
767                    {
768                        let _ = builder.add_bond(a1, a2, bond.order);
769                    }
770                }
771                builder.build()
772            })
773            .collect()
774    }
775}
776
777/// Builder for constructing a [`Molecule`] incrementally.
778///
779/// Usage: add atoms, add bonds, then call `build()`.
780#[derive(Default)]
781pub struct MoleculeBuilder {
782    atoms: Vec<Atom>,
783    bonds: Vec<BondEntry>,
784    adjacency: Vec<Vec<(AtomIdx, BondIdx)>>,
785    stereo_groups: Vec<StereoGroup>,
786    stereo_neighbor_order: std::collections::HashMap<u32, Vec<u32>>,
787}
788
789impl MoleculeBuilder {
790    pub fn new() -> Self {
791        Self::default()
792    }
793
794    /// Create a builder pre-populated with all atoms and bonds from `mol`.
795    ///
796    /// Use this to make incremental edits to an existing molecule instead of
797    /// reconstructing it from scratch.
798    pub fn from_molecule(mol: &Molecule) -> Self {
799        let mut b = Self::new();
800        for (_, atom) in mol.atoms() {
801            b.add_atom(atom.clone());
802        }
803        for (_, bond) in mol.bonds() {
804            let _ = b.add_bond(bond.atom1, bond.atom2, bond.order);
805        }
806        b.stereo_groups = mol.stereo_groups.clone();
807        b.stereo_neighbor_order = mol.stereo_neighbor_order.clone();
808        b
809    }
810
811    /// Set the SMILES stereo neighbor order for atom `idx`.
812    pub fn set_stereo_neighbor_order(&mut self, idx: AtomIdx, order: Vec<u32>) {
813        self.stereo_neighbor_order.insert(idx.0, order);
814    }
815
816    /// Remove the stereo neighbor order entry for atom `idx`.
817    pub fn clear_stereo_neighbor_order(&mut self, idx: AtomIdx) {
818        self.stereo_neighbor_order.remove(&idx.0);
819    }
820
821    /// Append a stereo group to this builder.
822    pub fn add_stereo_group(&mut self, group: StereoGroup) {
823        self.stereo_groups.push(group);
824    }
825
826    /// Copy all stereo neighbor order entries from `mol` into this builder.
827    pub fn copy_stereo_from(&mut self, mol: &Molecule) {
828        self.stereo_neighbor_order = mol.stereo_neighbor_order.clone();
829    }
830
831    /// Read-only reference to an atom already added to the builder.
832    ///
833    /// Used by the SMILES parser to infer implicit bond types without
834    /// consuming the builder (e.g. aromatic-aromatic → Aromatic bond).
835    ///
836    /// # Panics
837    /// Panics if `idx` is out of range.
838    pub fn atom_at(&self, idx: AtomIdx) -> &Atom {
839        &self.atoms[idx.0 as usize]
840    }
841
842    /// Number of atoms added so far.
843    pub fn atom_count(&self) -> usize {
844        self.atoms.len()
845    }
846
847    /// Iterate over already-added neighbors of `idx` as `(bond_idx, neighbor_atom_idx)`.
848    /// Used by kekulization tests to check whether a bond already exists in the builder.
849    pub fn atom_neighbors(&self, idx: AtomIdx) -> impl Iterator<Item = (BondIdx, AtomIdx)> + '_ {
850        self.adjacency[idx.0 as usize]
851            .iter()
852            .map(|&(nb, bidx)| (bidx, nb))
853    }
854
855    /// Add an atom and return its index.
856    pub fn add_atom(&mut self, atom: Atom) -> AtomIdx {
857        let idx = AtomIdx(self.atoms.len() as u32);
858        self.atoms.push(atom);
859        self.adjacency.push(Vec::new());
860        idx
861    }
862
863    /// Add a bond between two existing atoms.
864    ///
865    /// Returns an error if either atom index is invalid or if the bond already exists.
866    pub fn add_bond(
867        &mut self,
868        a: AtomIdx,
869        b: AtomIdx,
870        order: BondOrder,
871    ) -> Result<BondIdx, MolError> {
872        let n = self.atoms.len() as u32;
873        if a.0 >= n {
874            return Err(MolError::InvalidAtomIdx(a));
875        }
876        if b.0 >= n {
877            return Err(MolError::InvalidAtomIdx(b));
878        }
879
880        // Check for duplicate
881        for &(nb, _) in &self.adjacency[a.0 as usize] {
882            if nb == b {
883                return Err(MolError::DuplicateBond(a, b));
884            }
885        }
886
887        let bidx = BondIdx(self.bonds.len() as u32);
888        self.bonds.push(BondEntry {
889            atom1: a,
890            atom2: b,
891            order,
892        });
893        self.adjacency[a.0 as usize].push((b, bidx));
894        self.adjacency[b.0 as usize].push((a, bidx));
895        Ok(bidx)
896    }
897
898    /// Consume the builder and return an immutable [`Molecule`].
899    pub fn build(self) -> Molecule {
900        Molecule {
901            atoms: self.atoms,
902            bonds: self.bonds,
903            adjacency: self.adjacency,
904            stereo_groups: self.stereo_groups,
905            stereo_neighbor_order: self.stereo_neighbor_order,
906        }
907    }
908}
909
910#[cfg(test)]
911mod tests {
912    use super::*;
913    use crate::atom::Atom;
914    use crate::element::Element;
915
916    fn ethane() -> Molecule {
917        let mut b = MoleculeBuilder::new();
918        let c1 = b.add_atom(Atom::new(Element::C));
919        let c2 = b.add_atom(Atom::new(Element::C));
920        b.add_bond(c1, c2, BondOrder::Single).unwrap();
921        b.build()
922    }
923
924    #[test]
925    fn test_basic_molecule() {
926        let mol = ethane();
927        assert_eq!(mol.atom_count(), 2);
928        assert_eq!(mol.bond_count(), 1);
929    }
930
931    #[test]
932    fn test_adjacency() {
933        let mol = ethane();
934        let neighbors: Vec<_> = mol.neighbors(AtomIdx(0)).collect();
935        assert_eq!(neighbors.len(), 1);
936        assert_eq!(neighbors[0].0, AtomIdx(1));
937    }
938
939    #[test]
940    fn test_bond_between() {
941        let mol = ethane();
942        assert!(mol.bond_between(AtomIdx(0), AtomIdx(1)).is_some());
943        assert!(mol.bond_between(AtomIdx(1), AtomIdx(0)).is_some());
944    }
945
946    #[test]
947    fn test_duplicate_bond_error() {
948        let mut b = MoleculeBuilder::new();
949        let c1 = b.add_atom(Atom::new(Element::C));
950        let c2 = b.add_atom(Atom::new(Element::C));
951        b.add_bond(c1, c2, BondOrder::Single).unwrap();
952        let err = b.add_bond(c1, c2, BondOrder::Double);
953        assert!(matches!(err, Err(MolError::DuplicateBond(_, _))));
954    }
955
956    #[test]
957    fn test_formula() {
958        let mut b = MoleculeBuilder::new();
959        let c = b.add_atom(Atom::new(Element::C));
960        let n = b.add_atom(Atom::new(Element::N));
961        b.add_bond(c, n, BondOrder::Single).unwrap();
962        let mol = b.build();
963        assert_eq!(mol.formula(), "CN");
964    }
965
966    #[test]
967    fn test_implicit_hydrogen_count() {
968        // Isolated C atom (sp3, 4 bonds available): 4 implicit H
969        let mut b = MoleculeBuilder::new();
970        b.add_atom(Atom::organic(Element::C));
971        let mol = b.build();
972        assert_eq!(mol.implicit_hydrogen_count(AtomIdx(0)), 4);
973    }
974
975    #[test]
976    fn test_total_formula_methane() {
977        // Organic C atom with 0 explicit bonds → 4 implicit H → CH4
978        let mut b = MoleculeBuilder::new();
979        b.add_atom(Atom::organic(Element::C));
980        let mol = b.build();
981        assert_eq!(mol.total_formula(), "CH4");
982    }
983
984    #[test]
985    fn test_total_formula_no_hydrogen() {
986        // NaCl — neither Na nor Cl is in the organic subset, no implicit H
987        let mut b = MoleculeBuilder::new();
988        let na = b.add_atom(Atom::new(Element::NA));
989        let cl = b.add_atom(Atom::new(Element::CL));
990        b.add_bond(na, cl, BondOrder::Single).unwrap();
991        let mol = b.build();
992        assert_eq!(mol.total_formula(), "ClNa");
993    }
994
995    #[test]
996    fn test_with_atom_aromatic() {
997        let mol = ethane();
998        let updated = mol.with_atom_aromatic(AtomIdx(0), true);
999        assert!(updated.atom(AtomIdx(0)).aromatic);
1000        assert!(!updated.atom(AtomIdx(1)).aromatic);
1001    }
1002
1003    #[test]
1004    fn test_with_bond_order() {
1005        let mol = ethane();
1006        let updated = mol.with_bond_order(BondIdx(0), BondOrder::Double);
1007        assert_eq!(updated.bond(BondIdx(0)).order, BondOrder::Double);
1008    }
1009
1010    // --- mutable API ---
1011
1012    #[test]
1013    fn test_add_remove_atom() {
1014        let mut mol = ethane();
1015        let n_idx = mol.add_atom(Atom::new(Element::N));
1016        assert_eq!(mol.atom_count(), 3);
1017        assert_eq!(mol.atom(n_idx).element.atomic_number(), 7);
1018
1019        let remap = mol.remove_atom(n_idx);
1020        assert_eq!(mol.atom_count(), 2);
1021        assert!(remap[n_idx.0 as usize].is_none());
1022    }
1023
1024    #[test]
1025    fn test_add_remove_bond() {
1026        let mut mol = ethane();
1027        let n_idx = mol.add_atom(Atom::new(Element::N));
1028        let bidx = mol.add_bond(AtomIdx(0), n_idx, BondOrder::Single).unwrap();
1029        assert_eq!(mol.bond_count(), 2);
1030        mol.remove_bond(bidx);
1031        assert_eq!(mol.bond_count(), 1);
1032    }
1033
1034    #[test]
1035    fn test_set_charge_element() {
1036        let mut mol = ethane();
1037        mol.set_charge(AtomIdx(0), 1);
1038        assert_eq!(mol.atom(AtomIdx(0)).charge, 1);
1039        mol.set_element(AtomIdx(0), Element::N);
1040        assert_eq!(mol.atom(AtomIdx(0)).element.atomic_number(), 7);
1041    }
1042
1043    #[test]
1044    fn test_is_connected() {
1045        let mol = ethane();
1046        assert!(mol.is_connected());
1047
1048        // Two separate atoms — disconnected
1049        let mut b = MoleculeBuilder::new();
1050        b.add_atom(Atom::new(Element::C));
1051        b.add_atom(Atom::new(Element::N));
1052        let disconnected = b.build();
1053        assert!(!disconnected.is_connected());
1054    }
1055
1056    #[test]
1057    fn test_fragments() {
1058        // "CC.N" — two components
1059        let mut b = MoleculeBuilder::new();
1060        let c1 = b.add_atom(Atom::organic(Element::C));
1061        let c2 = b.add_atom(Atom::organic(Element::C));
1062        b.add_bond(c1, c2, BondOrder::Single).unwrap();
1063        b.add_atom(Atom::new(Element::N)); // disconnected N
1064        let mol = b.build();
1065        let frags = mol.fragments();
1066        assert_eq!(frags.len(), 2);
1067        let sizes: std::collections::HashSet<usize> =
1068            frags.iter().map(|f| f.atom_count()).collect();
1069        assert!(sizes.contains(&2));
1070        assert!(sizes.contains(&1));
1071    }
1072
1073    #[test]
1074    fn test_builder_from_molecule() {
1075        let mol = ethane();
1076        let mut b = MoleculeBuilder::from_molecule(&mol);
1077        b.add_atom(Atom::new(Element::O));
1078        let mol2 = b.build();
1079        assert_eq!(mol2.atom_count(), 3);
1080        assert_eq!(mol2.bond_count(), 1); // original bond preserved
1081    }
1082
1083    // --- safe Option-returning variants ---
1084
1085    #[test]
1086    fn test_atom_opt_valid() {
1087        let mol = ethane();
1088        assert!(mol.atom_opt(AtomIdx(0)).is_some());
1089        assert!(mol.atom_opt(AtomIdx(1)).is_some());
1090        let atom = mol.atom_opt(AtomIdx(0)).unwrap();
1091        assert_eq!(atom.element.atomic_number(), 6);
1092    }
1093
1094    #[test]
1095    fn test_atom_opt_invalid() {
1096        let mol = ethane();
1097        assert!(mol.atom_opt(AtomIdx(2)).is_none());
1098        assert!(mol.atom_opt(AtomIdx(1000)).is_none());
1099    }
1100
1101    #[test]
1102    fn test_bond_opt_valid() {
1103        let mol = ethane();
1104        assert!(mol.bond_opt(BondIdx(0)).is_some());
1105        let bond = mol.bond_opt(BondIdx(0)).unwrap();
1106        assert_eq!(bond.order, BondOrder::Single);
1107    }
1108
1109    #[test]
1110    fn test_bond_opt_invalid() {
1111        let mol = ethane();
1112        assert!(mol.bond_opt(BondIdx(1)).is_none());
1113        assert!(mol.bond_opt(BondIdx(1000)).is_none());
1114    }
1115
1116    #[test]
1117    fn test_neighbors_opt_valid() {
1118        let mol = ethane();
1119        let neighbors = mol.neighbors_opt(AtomIdx(0)).unwrap();
1120        assert_eq!(neighbors.len(), 1);
1121        assert_eq!(neighbors[0].0, AtomIdx(1));
1122    }
1123
1124    #[test]
1125    fn test_neighbors_opt_isolated_atom() {
1126        let mut b = MoleculeBuilder::new();
1127        b.add_atom(Atom::new(Element::C));
1128        b.add_atom(Atom::new(Element::N));
1129        let mol = b.build();
1130        let neighbors = mol.neighbors_opt(AtomIdx(0)).unwrap();
1131        assert_eq!(neighbors.len(), 0);
1132    }
1133
1134    #[test]
1135    fn test_neighbors_opt_invalid() {
1136        let mol = ethane();
1137        assert!(mol.neighbors_opt(AtomIdx(2)).is_none());
1138        assert!(mol.neighbors_opt(AtomIdx(1000)).is_none());
1139    }
1140
1141    #[test]
1142    fn test_degree_opt_valid() {
1143        let mol = ethane();
1144        assert_eq!(mol.degree_opt(AtomIdx(0)), Some(1));
1145        assert_eq!(mol.degree_opt(AtomIdx(1)), Some(1));
1146    }
1147
1148    #[test]
1149    fn test_degree_opt_isolated_atom() {
1150        let mut b = MoleculeBuilder::new();
1151        b.add_atom(Atom::new(Element::C));
1152        b.add_atom(Atom::new(Element::N));
1153        let mol = b.build();
1154        assert_eq!(mol.degree_opt(AtomIdx(0)), Some(0));
1155        assert_eq!(mol.degree_opt(AtomIdx(1)), Some(0));
1156    }
1157
1158    #[test]
1159    fn test_degree_opt_invalid() {
1160        let mol = ethane();
1161        assert!(mol.degree_opt(AtomIdx(2)).is_none());
1162        assert!(mol.degree_opt(AtomIdx(1000)).is_none());
1163    }
1164
1165    #[test]
1166    fn test_degree_opt_multiple_bonds() {
1167        // Create a central atom with 3 neighbors
1168        let mut b = MoleculeBuilder::new();
1169        let center = b.add_atom(Atom::new(Element::C));
1170        let n1 = b.add_atom(Atom::new(Element::C));
1171        let n2 = b.add_atom(Atom::new(Element::N));
1172        let n3 = b.add_atom(Atom::new(Element::O));
1173        b.add_bond(center, n1, BondOrder::Single).unwrap();
1174        b.add_bond(center, n2, BondOrder::Double).unwrap();
1175        b.add_bond(center, n3, BondOrder::Single).unwrap();
1176        let mol = b.build();
1177        assert_eq!(mol.degree_opt(center), Some(3));
1178        assert_eq!(mol.degree_opt(n1), Some(1));
1179        assert_eq!(mol.degree_opt(n2), Some(1));
1180        assert_eq!(mol.degree_opt(n3), Some(1));
1181    }
1182}