Skip to main content

chematic_core/
kekulization.rs

1//! Kekulization: assign alternating single/double bonds to aromatic systems.
2//!
3//! Algorithm:
4//! 1. Collect all aromatic atoms and aromatic bonds.
5//! 2. Build an adjacency structure restricted to aromatic bonds.
6//! 3. Find a maximum matching on the aromatic subgraph using
7//!    augmenting paths (DFS-based, O(V*E)).
8//! 4. Matched edges → Double bond; unmatched edges → Single bond.
9//! 5. Return the updated bond order map, or an error if no valid
10//!    Kekulé form exists (i.e. some atom that *must* be doubled is unmatched).
11//!
12//! Scope: handles all standard aromatic systems (benzene, naphthalene,
13//! pyridine, pyrrole, furan, thiophene, indole, imidazole, …).
14//! Edmond's blossom algorithm for exotic odd-member cycles is a future todo.
15
16use std::collections::{HashMap, HashSet};
17
18use crate::bond::BondOrder;
19use crate::molecule::{AtomIdx, BondIdx, Molecule};
20
21/// Error returned when no valid Kekulé form can be found.
22#[derive(Debug, Clone, PartialEq, Eq)]
23pub struct KekuleError {
24    pub detail: String,
25}
26
27impl core::fmt::Display for KekuleError {
28    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
29        write!(f, "kekulization failed: {}", self.detail)
30    }
31}
32
33impl std::error::Error for KekuleError {}
34
35/// Result of kekulization: a map from BondIdx to the new BondOrder.
36///
37/// Bonds NOT in this map are unchanged (non-aromatic bonds).
38pub type KekuleResult = HashMap<BondIdx, BondOrder>;
39
40/// Kekulize a molecule that contains aromatic bonds.
41///
42/// Returns a map of aromatic bond indices to their new (Single or Double) orders.
43/// All non-aromatic bonds are unchanged and not included in the result.
44///
45/// If the molecule has no aromatic bonds the result is empty (success, no-op).
46pub fn kekulize(mol: &Molecule) -> Result<KekuleResult, KekuleError> {
47    // Collect aromatic bonds and the atoms they touch.
48    let mut aromatic_bonds: Vec<BondIdx> = Vec::new();
49    let mut aromatic_atoms: HashSet<AtomIdx> = HashSet::new();
50
51    for (bidx, bond) in mol.bonds() {
52        if bond.order == BondOrder::Aromatic {
53            aromatic_bonds.push(bidx);
54            aromatic_atoms.insert(bond.atom1);
55            aromatic_atoms.insert(bond.atom2);
56        }
57    }
58
59    if aromatic_bonds.is_empty() {
60        return Ok(HashMap::new());
61    }
62
63    // Determine which aromatic atoms *must* be in a double bond.
64    // An aromatic atom must be double-bonded if it has no lone pair to donate.
65    // Heuristic: if the atom has no explicit/implicit H and is carbon or nitrogen-imine,
66    // it must be matched.
67    // For simplicity: atoms that must_match = those with no spare lone pair =
68    //   carbon (C), nitrogen-imine (N in pyridine — has no H when in 6-membered ring).
69    // Atoms that can be unmatched (lone-pair donors): O, S, N with H (pyrrole N).
70    //
71    // Practical rule used here: an atom can be *unmatched* iff it has an explicit
72    // H count > 0 OR it is O or S (lone-pair donors).
73    // All others must appear in the matching.
74    let must_match: HashSet<AtomIdx> = aromatic_atoms
75        .iter()
76        .copied()
77        .filter(|&idx| atom_must_be_matched(mol, idx))
78        .collect();
79
80    // Build adjacency list restricted to aromatic bonds BETWEEN must-match atoms.
81    //
82    // Lone-pair donors (O, S, [nH]) contribute their pi electrons via the lone pair,
83    // NOT via a double bond.  Including them in the matching adjacency causes the
84    // augmenting-path algorithm to assign double bonds to e.g. [nH]=C in pyrrole or
85    // indole, which is chemically wrong and produces incorrect implicit-H counts.
86    //
87    // Only bonds where BOTH endpoints are in must_match are valid double-bond
88    // candidates.  Lone-pair donors remain in aromatic_atoms (so their aromatic bonds
89    // become Single in the result) but are excluded from the matching graph.
90    let mut adj: HashMap<AtomIdx, Vec<(AtomIdx, BondIdx)>> = HashMap::new();
91    for &bidx in &aromatic_bonds {
92        let bond = mol.bond(bidx);
93        if must_match.contains(&bond.atom1) && must_match.contains(&bond.atom2) {
94            adj.entry(bond.atom1).or_default().push((bond.atom2, bidx));
95            adj.entry(bond.atom2).or_default().push((bond.atom1, bidx));
96        }
97    }
98
99    // Run maximum matching via augmenting paths.
100    let mut matching: HashMap<AtomIdx, AtomIdx> = HashMap::new(); // atom -> matched_partner
101
102    // Process must-match atoms in a deterministic order (by index) for reproducibility.
103    // Non-must-match atoms (lone-pair donors) are skipped — they never initiate
104    // augmenting paths and are never placed in the matching.
105    let mut sorted_atoms: Vec<AtomIdx> = must_match.iter().copied().collect();
106    sorted_atoms.sort();
107
108    for &start in &sorted_atoms {
109        if matching.contains_key(&start) {
110            continue; // already matched
111        }
112        // Try to find an augmenting path from `start`.
113        let mut visited: HashSet<AtomIdx> = HashSet::new();
114        augment(start, &adj, &mut matching, &mut visited);
115    }
116
117    // Verify that all must_match atoms are matched.
118    for &idx in &must_match {
119        if !matching.contains_key(&idx) {
120            return Err(KekuleError {
121                detail: format!(
122                    "atom {} ({}) cannot be assigned a double bond",
123                    idx.0,
124                    mol.atom(idx).element.symbol()
125                ),
126            });
127        }
128    }
129
130    // Build the result map: matched aromatic bonds → Double, rest → Single.
131    let mut double_bonds: HashSet<BondIdx> = HashSet::new();
132    for (&atom, &partner) in &matching {
133        if atom >= partner {
134            continue; // each pair shows up twice in `matching`; visit one orientation
135        }
136        if let Some((bidx, _)) = mol.bond_between(atom, partner) {
137            if mol.bond(bidx).order == BondOrder::Aromatic {
138                double_bonds.insert(bidx);
139            }
140        }
141    }
142
143    let result: KekuleResult = aromatic_bonds
144        .iter()
145        .map(|&bidx| {
146            let order = if double_bonds.contains(&bidx) {
147                BondOrder::Double
148            } else {
149                BondOrder::Single
150            };
151            (bidx, order)
152        })
153        .collect();
154    Ok(result)
155}
156
157/// Apply a Kekulé result to a molecule, returning a new Molecule with updated bond orders.
158///
159/// Aromatic flags on atoms are *not* cleared (the molecule retains the aromaticity
160/// annotation; only bond orders change).
161pub fn apply_kekule(mol: &Molecule, kekule: &KekuleResult) -> Molecule {
162    use crate::molecule::MoleculeBuilder;
163
164    let mut builder = MoleculeBuilder::new();
165
166    // Re-add all atoms.
167    for (_, atom) in mol.atoms() {
168        builder.add_atom(atom.clone());
169    }
170
171    // Re-add bonds, substituting updated orders for aromatic bonds.
172    for (bidx, bond) in mol.bonds() {
173        let order = kekule.get(&bidx).copied().unwrap_or(bond.order);
174        builder.add_bond(bond.atom1, bond.atom2, order)
175            .expect("duplicate bond during apply_kekule");
176    }
177
178    builder.build()
179}
180
181/// Attempt to find an augmenting path starting from `v` and update `matching`.
182/// Returns true if an augmenting path was found.
183fn augment(
184    v: AtomIdx,
185    adj: &HashMap<AtomIdx, Vec<(AtomIdx, BondIdx)>>,
186    matching: &mut HashMap<AtomIdx, AtomIdx>,
187    visited: &mut HashSet<AtomIdx>,
188) -> bool {
189    let Some(neighbors) = adj.get(&v) else { return false };
190    for &(u, _) in neighbors {
191        if !visited.insert(u) {
192            continue;
193        }
194        // u is unmatched, or we can re-route from u's current partner.
195        let can_augment = match matching.get(&u).copied() {
196            None => true,
197            Some(partner) => augment(partner, adj, matching, visited),
198        };
199        if can_augment {
200            matching.insert(v, u);
201            matching.insert(u, v);
202            return true;
203        }
204    }
205    false
206}
207
208/// Determine whether an aromatic atom *must* appear in the matching
209/// (i.e. requires a double bond for a valid Kekulé form).
210///
211/// An atom can be unmatched if it contributes a lone pair to the aromatic system:
212///   - O (furan-type oxygen)
213///   - S (thiophene-type sulfur)
214///   - N with an H (pyrrole-type nitrogen: [nH])
215///   - Se, As aromatic analogs
216///   - Any aromatic atom that already has an exocyclic double bond (e.g. the
217///     carbonyl carbon in coumarin/warfarin `c=O` fused into an aromatic ring).
218///     Such an atom's pi contribution comes from conjugation with the exocyclic
219///     bond; no additional ring double bond is needed or possible.
220///
221/// Everything else (C, N without H like pyridine) must be matched.
222fn atom_must_be_matched(mol: &Molecule, idx: AtomIdx) -> bool {
223    let atom = mol.atom(idx);
224    match atom.element.atomic_number() {
225        // O, S, Se always donate a lone pair → don't need a double bond.
226        8 | 16 | 34 => false,
227        // Boron aromatic (rare) — can donate lone pair.
228        5 => false,
229        // N with explicit H ([nH]) is a lone-pair donor; bare aromatic N (pyridine) must match.
230        7 => !matches!(atom.hydrogen_count, Some(h) if h > 0),
231        // Carbon and everything else: must be in the matching.
232        _ => true,
233    }
234}
235
236#[cfg(test)]
237mod tests {
238    use super::*;
239    use crate::atom::Atom;
240    use crate::element::Element;
241    use crate::molecule::MoleculeBuilder;
242
243    /// Build benzene as a fully aromatic SMILES-style molecule.
244    fn benzene() -> Molecule {
245        let mut b = MoleculeBuilder::new();
246        let atoms: Vec<_> = (0..6).map(|_| b.add_atom(Atom::aromatic(Element::C))).collect();
247        for i in 0..6 {
248            b.add_bond(atoms[i], atoms[(i + 1) % 6], BondOrder::Aromatic).unwrap();
249        }
250        b.build()
251    }
252
253    /// Build pyridine (5 aromatic C + 1 aromatic N, ring).
254    fn pyridine() -> Molecule {
255        let mut b = MoleculeBuilder::new();
256        let c1 = b.add_atom(Atom::aromatic(Element::C));
257        let c2 = b.add_atom(Atom::aromatic(Element::C));
258        let c3 = b.add_atom(Atom::aromatic(Element::C));
259        let n  = b.add_atom(Atom::aromatic(Element::N));
260        let c4 = b.add_atom(Atom::aromatic(Element::C));
261        let c5 = b.add_atom(Atom::aromatic(Element::C));
262        let atoms = [c1, c2, c3, n, c4, c5];
263        for i in 0..6 {
264            b.add_bond(atoms[i], atoms[(i + 1) % 6], BondOrder::Aromatic).unwrap();
265        }
266        b.build()
267    }
268
269    /// Build furan (4 aromatic C + 1 aromatic O, ring).
270    fn furan() -> Molecule {
271        let mut b = MoleculeBuilder::new();
272        let o  = b.add_atom(Atom::aromatic(Element::O));
273        let c1 = b.add_atom(Atom::aromatic(Element::C));
274        let c2 = b.add_atom(Atom::aromatic(Element::C));
275        let c3 = b.add_atom(Atom::aromatic(Element::C));
276        let c4 = b.add_atom(Atom::aromatic(Element::C));
277        let atoms = [o, c1, c2, c3, c4];
278        for i in 0..5 {
279            b.add_bond(atoms[i], atoms[(i + 1) % 5], BondOrder::Aromatic).unwrap();
280        }
281        b.build()
282    }
283
284    /// Build pyrrole ([nH] + 4 aromatic C, ring).
285    fn pyrrole() -> Molecule {
286        let mut b = MoleculeBuilder::new();
287        // N with 1 H — bracket-style (hydrogen_count = Some(1))
288        let mut n_atom = Atom::aromatic(Element::N);
289        n_atom.hydrogen_count = Some(1);
290        let n  = b.add_atom(n_atom);
291        let c1 = b.add_atom(Atom::aromatic(Element::C));
292        let c2 = b.add_atom(Atom::aromatic(Element::C));
293        let c3 = b.add_atom(Atom::aromatic(Element::C));
294        let c4 = b.add_atom(Atom::aromatic(Element::C));
295        let atoms = [n, c1, c2, c3, c4];
296        for i in 0..5 {
297            b.add_bond(atoms[i], atoms[(i + 1) % 5], BondOrder::Aromatic).unwrap();
298        }
299        b.build()
300    }
301
302    #[test]
303    fn test_kekulize_benzene() {
304        let mol = benzene();
305        let result = kekulize(&mol).expect("benzene kekulization failed");
306        assert_eq!(result.len(), 6); // all 6 aromatic bonds assigned
307
308        let doubles = result.values().filter(|&&o| o == BondOrder::Double).count();
309        let singles = result.values().filter(|&&o| o == BondOrder::Single).count();
310        assert_eq!(doubles, 3, "benzene must have 3 double bonds");
311        assert_eq!(singles, 3, "benzene must have 3 single bonds");
312    }
313
314    #[test]
315    fn test_kekulize_pyridine() {
316        let mol = pyridine();
317        let result = kekulize(&mol).expect("pyridine kekulization failed");
318        let doubles = result.values().filter(|&&o| o == BondOrder::Double).count();
319        assert_eq!(doubles, 3, "pyridine must have 3 double bonds");
320    }
321
322    #[test]
323    fn test_kekulize_furan() {
324        let mol = furan();
325        let result = kekulize(&mol).expect("furan kekulization failed");
326        let doubles = result.values().filter(|&&o| o == BondOrder::Double).count();
327        assert_eq!(doubles, 2, "furan must have 2 double bonds");
328    }
329
330    #[test]
331    fn test_kekulize_pyrrole() {
332        let mol = pyrrole();
333        let result = kekulize(&mol).expect("pyrrole kekulization failed");
334        let doubles = result.values().filter(|&&o| o == BondOrder::Double).count();
335        assert_eq!(doubles, 2, "pyrrole must have 2 double bonds");
336    }
337
338    #[test]
339    fn test_kekulize_naphthalene() {
340        // 10 aromatic C, 11 aromatic bonds (fused bicyclic)
341        let mut b = MoleculeBuilder::new();
342        let atoms: Vec<_> = (0..10).map(|_| b.add_atom(Atom::aromatic(Element::C))).collect();
343        // Ring 1: 0-1-2-3-4-9
344        let ring1 = [0,1,2,3,4,9];
345        for i in 0..ring1.len() {
346            b.add_bond(atoms[ring1[i]], atoms[ring1[(i+1)%ring1.len()]], BondOrder::Aromatic).unwrap();
347        }
348        // Ring 2: 4-5-6-7-8-9 (shares bond 4-9)
349        let ring2 = [4,5,6,7,8,9];
350        for i in 0..ring2.len() {
351            let a = atoms[ring2[i]];
352            let bb = atoms[ring2[(i+1)%ring2.len()]];
353            // Skip already-added bond (4-9)
354            if mol_has_no_bond_yet(&b, a, bb) {
355                b.add_bond(a, bb, BondOrder::Aromatic).unwrap();
356            }
357        }
358        let mol = b.build();
359        let result = kekulize(&mol).expect("naphthalene kekulization failed");
360        let doubles = result.values().filter(|&&o| o == BondOrder::Double).count();
361        assert_eq!(doubles, 5, "naphthalene must have 5 double bonds");
362    }
363
364    #[test]
365    fn test_apply_kekule() {
366        let mol = benzene();
367        let kekule = kekulize(&mol).unwrap();
368        let kekule_mol = apply_kekule(&mol, &kekule);
369
370        // After applying, no aromatic bonds should remain.
371        for (_, bond) in kekule_mol.bonds() {
372            assert_ne!(bond.order, BondOrder::Aromatic,
373                "apply_kekule should remove all aromatic bonds");
374        }
375    }
376
377    #[test]
378    fn test_no_aromatic_bonds_noop() {
379        // A molecule with no aromatic bonds should return empty result.
380        let mut b = MoleculeBuilder::new();
381        let c1 = b.add_atom(Atom::new(Element::C));
382        let c2 = b.add_atom(Atom::new(Element::C));
383        b.add_bond(c1, c2, BondOrder::Single).unwrap();
384        let mol = b.build();
385        let result = kekulize(&mol).unwrap();
386        assert!(result.is_empty());
387    }
388
389    // Helper: check that the builder does not yet have a bond between a and b.
390    fn mol_has_no_bond_yet(b: &MoleculeBuilder, a: AtomIdx, bb: AtomIdx) -> bool {
391        for (_, partner) in b.atom_neighbors(a) {
392            if partner == bb { return false; }
393        }
394        true
395    }
396}