Skip to main content

chematic_perception/
aromaticity.rs

1//! Hückel aromaticity perception.
2//!
3//! Works on a **kekulized** molecule (no `Aromatic` bond orders).
4//! Call `kekulize` + `apply_kekule` from `chematic-core` before calling
5//! `assign_aromaticity` if the input contains aromatic bonds.
6//!
7//! Algorithm:
8//! 1. Find all SSSR rings via `find_sssr`.
9//! 2. For each ring, check whether it could participate in a planar pi system.
10//! 3. Count pi electrons contributed by each ring atom.
11//! 4. If the total pi electron count satisfies 4n+2 (n >= 0), mark the ring as aromatic.
12//! 5. Record all aromatic atoms and bonds in an `AromaticityModel`.
13
14use std::collections::HashSet;
15
16use chematic_core::{AtomIdx, BondIdx, BondOrder, Molecule};
17
18use crate::sssr::find_sssr;
19
20// ---------------------------------------------------------------------------
21// Public types
22// ---------------------------------------------------------------------------
23
24/// Aromaticity assignment for a molecule.
25///
26/// Records which atoms and bonds belong to aromatic rings according to
27/// the Hückel 4n+2 rule applied to SSSR rings.
28#[derive(Debug, Clone)]
29pub struct AromaticityModel {
30    aromatic_atoms: HashSet<AtomIdx>,
31    aromatic_bonds: HashSet<BondIdx>,
32}
33
34impl AromaticityModel {
35    /// Whether atom `idx` is part of an aromatic ring.
36    pub fn is_atom_aromatic(&self, idx: AtomIdx) -> bool {
37        self.aromatic_atoms.contains(&idx)
38    }
39
40    /// Whether bond `idx` is part of an aromatic ring.
41    pub fn is_bond_aromatic(&self, idx: BondIdx) -> bool {
42        self.aromatic_bonds.contains(&idx)
43    }
44
45    /// Total number of atoms flagged as aromatic.
46    pub fn aromatic_atom_count(&self) -> usize {
47        self.aromatic_atoms.len()
48    }
49}
50
51// ---------------------------------------------------------------------------
52// Main entry point
53// ---------------------------------------------------------------------------
54
55/// Assign aromaticity to a kekulized molecule using the Hückel 4n+2 rule.
56///
57/// The molecule must use `Single` / `Double` bond orders (no `Aromatic` bonds).
58/// For input parsed from aromatic SMILES call `chematic_core::kekulize` then
59/// `chematic_core::apply_kekule` before passing the molecule here.
60pub fn assign_aromaticity(mol: &Molecule) -> AromaticityModel {
61    let ring_set = find_sssr(mol);
62
63    let mut aromatic_atoms: HashSet<AtomIdx> = HashSet::new();
64    let mut aromatic_bonds: HashSet<BondIdx> = HashSet::new();
65
66    for ring in ring_set.rings() {
67        if let Some(pi_electrons) = ring_pi_electrons(mol, ring) {
68            // Hückel: 4n+2 for n >= 0  →  (pi - 2) divisible by 4.
69            if pi_electrons >= 2 && (pi_electrons - 2) % 4 == 0 {
70                // Mark all atoms in this ring as aromatic.
71                for &atom in ring {
72                    aromatic_atoms.insert(atom);
73                }
74                // Mark all bonds in this ring as aromatic.
75                for i in 0..ring.len() {
76                    let a = ring[i];
77                    let b = ring[(i + 1) % ring.len()];
78                    if let Some((bidx, _)) = mol.bond_between(a, b) {
79                        aromatic_bonds.insert(bidx);
80                    }
81                }
82            }
83        }
84    }
85
86    AromaticityModel { aromatic_atoms, aromatic_bonds }
87}
88
89// ---------------------------------------------------------------------------
90// Per-ring pi electron count
91// ---------------------------------------------------------------------------
92
93/// Try to count pi electrons for a ring.
94///
95/// Returns `None` if any atom in the ring is not sp2-compatible (i.e. the ring
96/// cannot be aromatic regardless of electron count).
97/// Returns `Some(count)` otherwise.
98fn ring_pi_electrons(mol: &Molecule, ring: &[AtomIdx]) -> Option<u32> {
99    let ring_atom_set: HashSet<AtomIdx> = ring.iter().copied().collect();
100    let mut total_pi: u32 = 0;
101
102    for &atom_idx in ring {
103        let atom = mol.atom(atom_idx);
104        let an = atom.element.atomic_number();
105
106        // Count heavy-atom bonds to ring neighbors (ring degree).
107        let ring_degree = mol
108            .neighbors(atom_idx)
109            .filter(|(nb, _)| ring_atom_set.contains(nb))
110            .count();
111
112        // Determine whether the atom has a double bond to a ring neighbor.
113        let has_double_in_ring = mol
114            .neighbors(atom_idx)
115            .filter(|(nb, _)| ring_atom_set.contains(nb))
116            .any(|(_, bidx)| mol.bond(bidx).order == BondOrder::Double);
117
118        // Determine whether the atom has any double bond (inside or outside the ring).
119        let has_double_any = mol
120            .neighbors(atom_idx)
121            .any(|(_, bidx)| mol.bond(bidx).order == BondOrder::Double);
122
123        let pi = match an {
124            // Carbon: must have a double bond somewhere to be sp2.
125            6 => {
126                if !has_double_any {
127                    // sp3 carbon — ring cannot be aromatic.
128                    return None;
129                }
130                1 // One pi electron from the C=C double bond (shared between C atoms).
131            }
132            // Nitrogen (atomic number 7)
133            7 => {
134                // Determine H count: use explicit hydrogen_count if set (bracket atom),
135                // otherwise derive from valence.
136                if hydrogen_count(mol, atom_idx) > 0 {
137                    // Pyrrole-type N with H: contributes a lone pair → 2 pi electrons.
138                    2
139                } else if has_double_in_ring || has_double_any {
140                    // Pyridine-type N (or N with exocyclic C=N): contributes 1.
141                    1
142                } else {
143                    // N in ring with no double bond and no H — cannot determine pi system.
144                    return None;
145                }
146            }
147            // Oxygen / sulfur: lone-pair donor, contributes 2 (must be 2-connected in ring).
148            8 | 16 => {
149                if ring_degree != 2 {
150                    return None;
151                }
152                2
153            }
154            // Unsupported element for Hückel aromaticity.
155            _ => return None,
156        };
157
158        total_pi += pi;
159    }
160
161    Some(total_pi)
162}
163
164// ---------------------------------------------------------------------------
165// Hydrogen count helper
166// ---------------------------------------------------------------------------
167
168/// Return the total hydrogen count for `atom_idx`.
169///
170/// - Bracket atoms: use the stored `hydrogen_count`.
171/// - Organic-subset atoms: derive from the standard valence table.
172fn hydrogen_count(mol: &Molecule, atom_idx: AtomIdx) -> u8 {
173    let atom = mol.atom(atom_idx);
174
175    // Bracket atoms store the count directly.
176    if let Some(h) = atom.hydrogen_count {
177        return h;
178    }
179
180    // Organic-subset atoms: compute from valence.
181    let normal_valences = atom.element.normal_valences();
182    if normal_valences.is_empty() {
183        return 0;
184    }
185
186    let bond_sum: i32 = mol
187        .neighbors(atom_idx)
188        .map(|(_, bidx)| mol.bond(bidx).order.order_int() as i32)
189        .sum();
190
191    let charge = atom.charge as i32;
192
193    for &v in normal_valences {
194        let target = v as i32 + charge;
195        if target >= bond_sum {
196            return (target - bond_sum) as u8;
197        }
198    }
199
200    0
201}
202
203// ---------------------------------------------------------------------------
204// Tests
205// ---------------------------------------------------------------------------
206
207#[cfg(test)]
208mod tests {
209    use super::*;
210    use chematic_core::{Atom, BondOrder, Element, MoleculeBuilder};
211
212    // Build a kekulized benzene ring (alternating single/double bonds).
213    fn benzene_kekule() -> chematic_core::Molecule {
214        let mut b = MoleculeBuilder::new();
215        let atoms: Vec<_> = (0..6).map(|_| b.add_atom(Atom::new(Element::C))).collect();
216        for i in 0..6 {
217            let order = if i % 2 == 0 { BondOrder::Double } else { BondOrder::Single };
218            b.add_bond(atoms[i], atoms[(i + 1) % 6], order).unwrap();
219        }
220        b.build()
221    }
222
223    // Build cyclohexane (6 single bonds — no pi system).
224    fn cyclohexane() -> chematic_core::Molecule {
225        let mut b = MoleculeBuilder::new();
226        let atoms: Vec<_> = (0..6).map(|_| b.add_atom(Atom::new(Element::C))).collect();
227        for i in 0..6 {
228            b.add_bond(atoms[i], atoms[(i + 1) % 6], BondOrder::Single).unwrap();
229        }
230        b.build()
231    }
232
233    // Build kekulized pyridine: 5 C + 1 N (pyridine-type, no H).
234    // Atoms: N(0)-C(1)=C(2)-C(3)=C(4)-C(5)=N(0) ring
235    fn pyridine_kekule() -> chematic_core::Molecule {
236        let mut b = MoleculeBuilder::new();
237        let n = b.add_atom(Atom::new(Element::N));
238        let atoms_c: Vec<_> = (0..5).map(|_| b.add_atom(Atom::new(Element::C))).collect();
239        let ring = [n, atoms_c[0], atoms_c[1], atoms_c[2], atoms_c[3], atoms_c[4]];
240        // N=C-C=C-C=N  alternating, starting with double
241        for i in 0..6 {
242            let order = if i % 2 == 0 { BondOrder::Double } else { BondOrder::Single };
243            b.add_bond(ring[i], ring[(i + 1) % 6], order).unwrap();
244        }
245        b.build()
246    }
247
248    // Build kekulized furan: O + 4 C, 5-membered ring.
249    // O-C=C-C=C-O  (O contributes lone pair, 2 double bonds in ring)
250    fn furan_kekule() -> chematic_core::Molecule {
251        let mut b = MoleculeBuilder::new();
252        let o = b.add_atom(Atom::new(Element::O));
253        let c1 = b.add_atom(Atom::new(Element::C));
254        let c2 = b.add_atom(Atom::new(Element::C));
255        let c3 = b.add_atom(Atom::new(Element::C));
256        let c4 = b.add_atom(Atom::new(Element::C));
257        let ring = [o, c1, c2, c3, c4];
258        // O-C=C-C=C ring; O-C single, C=C double, C-C single, C=C double, C-O single (back)
259        b.add_bond(ring[0], ring[1], BondOrder::Single).unwrap();
260        b.add_bond(ring[1], ring[2], BondOrder::Double).unwrap();
261        b.add_bond(ring[2], ring[3], BondOrder::Single).unwrap();
262        b.add_bond(ring[3], ring[4], BondOrder::Double).unwrap();
263        b.add_bond(ring[4], ring[0], BondOrder::Single).unwrap();
264        b.build()
265    }
266
267    // Build kekulized pyrrole: [NH] + 4 C, 5-membered ring.
268    // N has an explicit H (hydrogen_count = Some(1)).
269    fn pyrrole_kekule() -> chematic_core::Molecule {
270        let mut b = MoleculeBuilder::new();
271        let mut n_atom = Atom::new(Element::N);
272        n_atom.hydrogen_count = Some(1);
273        let n = b.add_atom(n_atom);
274        let c1 = b.add_atom(Atom::new(Element::C));
275        let c2 = b.add_atom(Atom::new(Element::C));
276        let c3 = b.add_atom(Atom::new(Element::C));
277        let c4 = b.add_atom(Atom::new(Element::C));
278        let ring = [n, c1, c2, c3, c4];
279        b.add_bond(ring[0], ring[1], BondOrder::Single).unwrap();
280        b.add_bond(ring[1], ring[2], BondOrder::Double).unwrap();
281        b.add_bond(ring[2], ring[3], BondOrder::Single).unwrap();
282        b.add_bond(ring[3], ring[4], BondOrder::Double).unwrap();
283        b.add_bond(ring[4], ring[0], BondOrder::Single).unwrap();
284        b.build()
285    }
286
287    // Build kekulized naphthalene: 10 C, 11 bonds, 2 fused 6-membered rings.
288    fn naphthalene_kekule() -> chematic_core::Molecule {
289        let mut b = MoleculeBuilder::new();
290        let atoms: Vec<_> = (0..10).map(|_| b.add_atom(Atom::new(Element::C))).collect();
291        // Ring 1: 0-1-2-3-4-9
292        // Ring 2: 4-5-6-7-8-9 sharing bond 4-9
293        // Kekulé pattern: 0=1-2=3-4=9-0, then 4-5=6-7=8-9 (4-9 already single)
294        // Bond orders for a valid kekulé:
295        //   0-1: double, 1-2: single, 2-3: double, 3-4: single, 4-9: double, 9-0: single
296        //   4-5: single, 5-6: double, 6-7: single, 7-8: double, 8-9: single
297        let ring1 = [0usize, 1, 2, 3, 4, 9];
298        let orders1 = [
299            BondOrder::Double,
300            BondOrder::Single,
301            BondOrder::Double,
302            BondOrder::Single,
303            BondOrder::Double,
304            BondOrder::Single,
305        ];
306        for i in 0..6 {
307            b.add_bond(atoms[ring1[i]], atoms[ring1[(i + 1) % 6]], orders1[i]).unwrap();
308        }
309        // Ring 2 extra bonds (4-9 already added):
310        let ring2_extra = [(4, 5), (5, 6), (6, 7), (7, 8), (8, 9)];
311        let orders2 = [
312            BondOrder::Single,
313            BondOrder::Double,
314            BondOrder::Single,
315            BondOrder::Double,
316            BondOrder::Single,
317        ];
318        for (i, &(a, bb)) in ring2_extra.iter().enumerate() {
319            b.add_bond(atoms[a], atoms[bb], orders2[i]).unwrap();
320        }
321        b.build()
322    }
323
324    #[test]
325    fn test_benzene_is_aromatic() {
326        let mol = benzene_kekule();
327        let model = assign_aromaticity(&mol);
328        assert_eq!(model.aromatic_atom_count(), 6, "all 6 benzene atoms are aromatic");
329        for i in 0..6u32 {
330            assert!(model.is_atom_aromatic(AtomIdx(i)), "atom {} should be aromatic", i);
331        }
332    }
333
334    #[test]
335    fn test_cyclohexane_not_aromatic() {
336        let mol = cyclohexane();
337        let model = assign_aromaticity(&mol);
338        assert_eq!(model.aromatic_atom_count(), 0, "cyclohexane has no aromatic atoms");
339    }
340
341    #[test]
342    fn test_pyridine_is_aromatic() {
343        let mol = pyridine_kekule();
344        let model = assign_aromaticity(&mol);
345        assert_eq!(model.aromatic_atom_count(), 6, "all 6 pyridine atoms are aromatic");
346    }
347
348    #[test]
349    fn test_furan_is_aromatic() {
350        let mol = furan_kekule();
351        let model = assign_aromaticity(&mol);
352        assert_eq!(model.aromatic_atom_count(), 5, "all 5 furan atoms are aromatic");
353    }
354
355    #[test]
356    fn test_pyrrole_is_aromatic() {
357        let mol = pyrrole_kekule();
358        let model = assign_aromaticity(&mol);
359        assert_eq!(model.aromatic_atom_count(), 5, "all 5 pyrrole atoms are aromatic");
360    }
361
362    #[test]
363    fn test_naphthalene_both_rings_aromatic() {
364        let mol = naphthalene_kekule();
365        let model = assign_aromaticity(&mol);
366        // Both 6-membered rings are aromatic; all 10 atoms should be flagged.
367        assert_eq!(
368            model.aromatic_atom_count(),
369            10,
370            "all 10 naphthalene atoms are aromatic"
371        );
372    }
373
374    #[test]
375    fn test_bond_aromaticity_benzene() {
376        let mol = benzene_kekule();
377        let model = assign_aromaticity(&mol);
378        // All 6 ring bonds should be aromatic.
379        let mut count = 0;
380        for (bidx, _) in mol.bonds() {
381            if model.is_bond_aromatic(bidx) {
382                count += 1;
383            }
384        }
385        assert_eq!(count, 6, "benzene has 6 aromatic bonds");
386    }
387}