Skip to main content

chematic_perception/
aromaticity.rs

1//! Hückel aromaticity perception with antiaromaticity detection.
2//!
3//! Works on kekulized molecules (no `Aromatic` bond orders) **or** on molecules
4//! that retain `Aromatic` bond orders from the SMILES parser (pre-kekulization).
5//! Call `kekulize` + `apply_kekule` from `chematic-core` before calling
6//! `assign_aromaticity` if you need the explicit double-bond form.
7//!
8//! Algorithm:
9//! 1. Find all SSSR rings via `find_sssr`.
10//! 2. **Pass 1**: evaluate each ring independently using Hückel electron counting.
11//!    Aromatic (`BondOrder::Aromatic`) bonds are treated equivalently to double bonds
12//!    so that pre-kekulization input is handled correctly.
13//!    A special "bridgehead N" rule covers fused-ring N atoms whose entire valence
14//!    is satisfied by single σ-bonds (like indolizine's junction nitrogen).
15//! 3. **Pass 2**: iterative propagation. Rings that were `NonAromatic` or
16//!    indeterminate in Pass 1 are re-evaluated using the already-aromatic atom set
17//!    as context: confirmed-aromatic atoms contribute 1π unconditionally, allowing
18//!    fused rings to be recognised bottom-up (e.g. the 6-ring of indolizine).
19//! 4. Classify rings by electron count:
20//!    - 4n+2 electrons (n >= 0): aromatic (favorable)
21//!    - 4n electrons (n > 0): antiaromatic (unfavorable, strongly disfavored)
22//!    - Other: non-aromatic
23//! 5. Record all aromatic atoms, bonds, and antiaromatic rings in an `AromaticityModel`.
24
25// ---------------------------------------------------------------------------
26// Algorithm selector
27// ---------------------------------------------------------------------------
28
29/// Algorithm used to classify ring aromaticity.
30///
31/// Passed to [`assign_aromaticity_ex`] and [`apply_aromaticity_ex`].
32#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
33pub enum AromaticityAlgorithm {
34    /// Strict Hückel 4n+2 rule (default). Supports C, N, O, S.
35    #[default]
36    Huckel,
37    /// RDKit-compatible extension. Adds Se (34) and Te (52) as chalcogen lone-pair
38    /// donors (2π), matching the RDKit DEFAULT aromaticity model for common
39    /// organic and chalcogen heteroaromatics.
40    ///
41    /// P-containing aromatic rings are NOT supported in this mode (separate sprint).
42    /// Keto-lactam aromaticity is NOT included (TautomerMode, separate sprint).
43    RdkitLike,
44}
45
46use rustc_hash::{FxHashMap, FxHashSet};
47
48use chematic_core::{AtomIdx, BondIdx, BondOrder, Molecule, implicit_hcount};
49
50use crate::sssr::find_sssr;
51
52// ---------------------------------------------------------------------------
53// Public types
54// ---------------------------------------------------------------------------
55
56/// Ring aromaticity classification.
57#[derive(Debug, Clone, Copy, PartialEq, Eq)]
58pub enum RingAromaticity {
59    /// 4n+2 electrons: aromatic (favorable)
60    Aromatic,
61    /// 4n electrons (n > 0): antiaromatic (unfavorable)
62    Antiaromatic,
63    /// Any other electron count: non-aromatic
64    NonAromatic,
65}
66
67/// Aromaticity assignment for a molecule.
68///
69/// Records which atoms and bonds belong to aromatic rings according to
70/// the Hückel 4n+2 rule applied to SSSR rings (with fused-ring propagation).
71/// Also tracks antiaromatic rings (4n electrons) for chemical accuracy.
72#[derive(Debug, Clone)]
73pub struct AromaticityModel {
74    aromatic_atoms: FxHashSet<AtomIdx>,
75    aromatic_bonds: FxHashSet<BondIdx>,
76    antiaromatic_rings: Vec<Vec<AtomIdx>>,
77    ring_classifications: Vec<(Vec<AtomIdx>, RingAromaticity, u32)>,
78}
79
80impl AromaticityModel {
81    /// Whether atom `idx` is part of an aromatic ring.
82    pub fn is_atom_aromatic(&self, idx: AtomIdx) -> bool {
83        self.aromatic_atoms.contains(&idx)
84    }
85
86    /// Whether bond `idx` is part of an aromatic ring.
87    pub fn is_bond_aromatic(&self, idx: BondIdx) -> bool {
88        self.aromatic_bonds.contains(&idx)
89    }
90
91    /// Total number of atoms flagged as aromatic.
92    pub fn aromatic_atom_count(&self) -> usize {
93        self.aromatic_atoms.len()
94    }
95
96    /// Get all rings and their classification with electron counts.
97    ///
98    /// Each entry is `(ring_atoms, classification, π_electron_count)`.
99    /// Rings that could not be evaluated (sp3 atoms, unsupported elements) are omitted.
100    pub fn ring_classifications(&self) -> &[(Vec<AtomIdx>, RingAromaticity, u32)] {
101        &self.ring_classifications
102    }
103
104    /// Get all antiaromatic rings (4n electrons, n > 0).
105    pub fn antiaromatic_rings(&self) -> &[Vec<AtomIdx>] {
106        &self.antiaromatic_rings
107    }
108
109    /// Check if any atom belongs to an antiaromatic ring.
110    pub fn has_antiaromaticity(&self) -> bool {
111        !self.antiaromatic_rings.is_empty()
112    }
113}
114
115// ---------------------------------------------------------------------------
116// Main entry points
117// ---------------------------------------------------------------------------
118
119/// Classify a ring by its pi electron count using Hückel and antiaromaticity rules.
120#[allow(clippy::manual_is_multiple_of)]
121fn classify_ring_aromaticity(pi_electrons: u32) -> (RingAromaticity, u32) {
122    if pi_electrons >= 2 && (pi_electrons - 2) % 4 == 0 {
123        (RingAromaticity::Aromatic, pi_electrons)
124    } else if pi_electrons > 0 && pi_electrons % 4 == 0 {
125        (RingAromaticity::Antiaromatic, pi_electrons)
126    } else {
127        (RingAromaticity::NonAromatic, pi_electrons)
128    }
129}
130
131/// Mark all atoms and bonds in `ring` as aromatic in the provided sets.
132fn mark_ring_aromatic(
133    mol: &Molecule,
134    ring: &[AtomIdx],
135    aromatic_atoms: &mut FxHashSet<AtomIdx>,
136    aromatic_bonds: &mut FxHashSet<BondIdx>,
137) {
138    for &atom in ring {
139        aromatic_atoms.insert(atom);
140    }
141    for i in 0..ring.len() {
142        let a = ring[i];
143        let b = ring[(i + 1) % ring.len()];
144        if let Some((bidx, _)) = mol.bond_between(a, b) {
145            aromatic_bonds.insert(bidx);
146        }
147    }
148}
149
150/// Assign aromaticity to a molecule using the Hückel 4n+2 rule with fused-ring
151/// propagation (Pass 2) and antiaromaticity detection (4n electrons).
152///
153/// The molecule may be kekulized (`Single`/`Double` bonds) **or** may retain
154/// `BondOrder::Aromatic` bonds from the SMILES parser.  In the latter case,
155/// aromatic bonds are treated as equivalent to double bonds for electron
156/// counting, allowing correct detection without an explicit kekulization step.
157///
158/// For kekulized input from aromatic SMILES, call `chematic_core::kekulize`
159/// then `chematic_core::apply_kekule` first.
160///
161/// Uses [`AromaticityAlgorithm::Huckel`] (default). See [`assign_aromaticity_ex`]
162/// for the RdkitLike variant.
163pub fn assign_aromaticity(mol: &Molecule) -> AromaticityModel {
164    assign_aromaticity_ex(mol, AromaticityAlgorithm::Huckel)
165}
166
167/// Assign aromaticity using the specified algorithm.
168///
169/// The default ([`assign_aromaticity`]) uses [`AromaticityAlgorithm::Huckel`].
170/// Pass [`AromaticityAlgorithm::RdkitLike`] to additionally recognise Se/Te
171/// as lone-pair donors in aromatic rings.
172pub fn assign_aromaticity_ex(mol: &Molecule, algo: AromaticityAlgorithm) -> AromaticityModel {
173    let ring_set = find_sssr(mol);
174    let sssr_rings = ring_set.rings();
175
176    // Augment SSSR rings with smaller XOR sub-rings (GF(2) differences between pairs).
177    // This corrects the case where the SSSR algorithm stores a large fundamental cycle
178    // instead of its smaller GF(2)-reduced equivalent (e.g. the 5-ring of indolizine).
179    let rings: Vec<Vec<AtomIdx>> = augmented_ring_set(mol, sssr_rings);
180
181    let mut aromatic_atoms: FxHashSet<AtomIdx> = FxHashSet::default();
182    let mut aromatic_bonds: FxHashSet<BondIdx> = FxHashSet::default();
183    let mut antiaromatic_rings: Vec<Vec<AtomIdx>> = Vec::new();
184
185    // Per-ring classification: None means "not yet evaluated / indeterminate".
186    let mut classifications: Vec<Option<(RingAromaticity, u32)>> = vec![None; rings.len()];
187
188    // Indices of rings that are candidates for Pass 2 re-evaluation
189    // (returned None or NonAromatic in Pass 1).
190    let mut pass2_candidates: Vec<usize> = Vec::new();
191
192    // ----- Pass 1: independent Hückel per ring -----
193    let empty_context = FxHashSet::default();
194    for (ring_idx, ring) in rings.iter().enumerate() {
195        match ring_pi_electrons(mol, ring, &empty_context, algo) {
196            Some(pi) => {
197                let (cls, count) = classify_ring_aromaticity(pi);
198                classifications[ring_idx] = Some((cls, count));
199                match cls {
200                    RingAromaticity::Aromatic => {
201                        mark_ring_aromatic(mol, ring, &mut aromatic_atoms, &mut aromatic_bonds);
202                    }
203                    RingAromaticity::Antiaromatic => {
204                        antiaromatic_rings.push(ring.to_vec());
205                        // Antiaromatic is definitive — do not retry in Pass 2.
206                    }
207                    RingAromaticity::NonAromatic => {
208                        pass2_candidates.push(ring_idx);
209                    }
210                }
211            }
212            None => {
213                // Indeterminate (sp3 atoms, unsupported elements, etc.).
214                pass2_candidates.push(ring_idx);
215            }
216        }
217    }
218
219    // ----- Pass 2: propagate through fused ring systems -----
220    // Re-evaluate rings adjacent to already-aromatic rings.  Repeat until
221    // convergence (no newly aromatic ring found in the last iteration).
222    loop {
223        let mut any_new = false;
224        let mut still_pending: Vec<usize> = Vec::new();
225
226        for ring_idx in pass2_candidates {
227            let ring = &rings[ring_idx];
228            // Only rings that share an atom with an already-aromatic ring qualify.
229            if !ring.iter().any(|a| aromatic_atoms.contains(a)) {
230                still_pending.push(ring_idx);
231                continue;
232            }
233            match ring_pi_electrons(mol, ring, &aromatic_atoms, algo) {
234                Some(pi) => {
235                    let (cls, count) = classify_ring_aromaticity(pi);
236                    classifications[ring_idx] = Some((cls, count));
237                    if matches!(cls, RingAromaticity::Aromatic) {
238                        mark_ring_aromatic(mol, ring, &mut aromatic_atoms, &mut aromatic_bonds);
239                        any_new = true;
240                    }
241                    // NonAromatic even in Pass 2 context: do not retry further.
242                }
243                None => {
244                    still_pending.push(ring_idx);
245                }
246            }
247        }
248
249        pass2_candidates = still_pending;
250        if !any_new {
251            break;
252        }
253    }
254
255    // Build the public ring_classifications list (SSSR rings only, omitting augmented/indeterminate).
256    let ring_classifications: Vec<(Vec<AtomIdx>, RingAromaticity, u32)> = rings
257        .iter()
258        .take(sssr_rings.len()) // only expose SSSR rings in the public API
259        .enumerate()
260        .filter_map(|(i, ring)| classifications[i].map(|(cls, count)| (ring.to_vec(), cls, count)))
261        .collect();
262
263    AromaticityModel {
264        aromatic_atoms,
265        aromatic_bonds,
266        antiaromatic_rings,
267        ring_classifications,
268    }
269}
270
271/// Apply aromaticity perception to a molecule.
272///
273/// Returns a new [`Molecule`] where atoms in Hückel-aromatic rings have
274/// `atom.aromatic = true` and their bonds carry [`BondOrder::Aromatic`].
275/// Non-aromatic atoms and bonds are unchanged.
276///
277/// The input may be kekulized (no `Aromatic` bond orders) or may retain
278/// aromatic bond orders from the SMILES parser.
279///
280/// Uses [`AromaticityAlgorithm::Huckel`] (default). See [`apply_aromaticity_ex`]
281/// for the RdkitLike variant.
282pub fn apply_aromaticity(mol: &Molecule) -> Molecule {
283    apply_aromaticity_ex(mol, AromaticityAlgorithm::Huckel)
284}
285
286/// Apply aromaticity using the specified algorithm.
287///
288/// Returns a new [`Molecule`] with aromatic flags set according to `algo`.
289pub fn apply_aromaticity_ex(mol: &Molecule, algo: AromaticityAlgorithm) -> Molecule {
290    use chematic_core::{BondOrder, MoleculeBuilder};
291
292    let model = assign_aromaticity_ex(mol, algo);
293    let mut builder = MoleculeBuilder::new();
294
295    for (idx, atom) in mol.atoms() {
296        let mut a = atom.clone();
297        if model.is_atom_aromatic(idx) {
298            a.aromatic = true;
299        }
300        builder.add_atom(a);
301    }
302    for (bidx, bond) in mol.bonds() {
303        let order = if model.is_bond_aromatic(bidx) {
304            BondOrder::Aromatic
305        } else {
306            bond.order
307        };
308        let _ = builder.add_bond(bond.atom1, bond.atom2, order);
309    }
310    builder.build()
311}
312
313// ---------------------------------------------------------------------------
314// Ring augmentation (XOR sub-rings)
315// ---------------------------------------------------------------------------
316
317/// Return the sorted set of bond indices that form `ring`.
318fn ring_bond_set(mol: &Molecule, ring: &[AtomIdx]) -> Vec<BondIdx> {
319    let n = ring.len();
320    let mut bonds: Vec<BondIdx> = (0..n)
321        .filter_map(|i| {
322            let a = ring[i];
323            let b = ring[(i + 1) % n];
324            mol.bond_between(a, b).map(|(bidx, _)| bidx)
325        })
326        .collect();
327    bonds.sort();
328    bonds
329}
330
331/// Sorted symmetric difference of two sorted slices.
332fn bond_sym_diff(a: &[BondIdx], b: &[BondIdx]) -> Vec<BondIdx> {
333    let mut result: Vec<BondIdx> = Vec::new();
334    let mut i = 0;
335    let mut j = 0;
336    while i < a.len() && j < b.len() {
337        match a[i].cmp(&b[j]) {
338            std::cmp::Ordering::Less => {
339                result.push(a[i]);
340                i += 1;
341            }
342            std::cmp::Ordering::Greater => {
343                result.push(b[j]);
344                j += 1;
345            }
346            std::cmp::Ordering::Equal => {
347                i += 1;
348                j += 1;
349            }
350        }
351    }
352    result.extend_from_slice(&a[i..]);
353    result.extend_from_slice(&b[j..]);
354    result
355}
356
357/// Reconstruct an ordered atom sequence from a set of bond indices forming a simple cycle.
358/// Returns `None` if the bonds do not form a valid simple cycle.
359fn ring_atoms_from_bond_set(mol: &Molecule, bonds: &[BondIdx]) -> Option<Vec<AtomIdx>> {
360    if bonds.is_empty() {
361        return None;
362    }
363    let mut adj: FxHashMap<AtomIdx, [Option<AtomIdx>; 2]> = FxHashMap::default();
364    for &bidx in bonds {
365        let bond = mol.bond(bidx);
366        for (a, b) in [(bond.atom1, bond.atom2), (bond.atom2, bond.atom1)] {
367            let e = adj.entry(a).or_insert([None; 2]);
368            if e[0].is_none() {
369                e[0] = Some(b);
370            } else if e[1].is_none() {
371                e[1] = Some(b);
372            } else {
373                return None; // degree > 2 — not a simple ring
374            }
375        }
376    }
377    // All atoms must have exactly 2 neighbours.
378    if adj.values().any(|e| e[1].is_none()) {
379        return None;
380    }
381    let start = *adj.keys().next()?;
382    let mut path = vec![start];
383    let mut prev = start;
384    let mut current = adj[&start][0]?;
385    while current != start {
386        path.push(current);
387        let [n0, n1] = adj[&current];
388        let next = if n0 == Some(prev) { n1? } else { n0? };
389        prev = current;
390        current = next;
391    }
392    if path.len() != bonds.len() {
393        return None;
394    }
395    Some(path)
396}
397
398/// Augment the SSSR ring list with smaller XOR sub-rings found by pairwise GF(2)
399/// differences between SSSR rings that share atoms.
400///
401/// The standard SSSR algorithm sometimes stores a large fundamental cycle rather
402/// than its smaller GF(2)-reduced equivalent (e.g. the 5-ring of indolizine is
403/// the XOR of the 6-ring and the 9-ring the algorithm reports).
404/// This augmentation adds such missing smaller rings so that aromaticity
405/// perception works on the correct smallest rings without modifying the SSSR.
406///
407/// The returned `Vec` starts with all SSSR rings in their original order; any
408/// additional sub-rings derived by GF(2) pairwise XOR follow.  The function
409/// only adds a ring if it is strictly smaller than *both* parents, ensuring
410/// that envelope rings (e.g. the 10-membered perimeter of naphthalene) are
411/// never introduced.
412pub fn augmented_ring_set(mol: &Molecule, sssr_rings: &[Vec<AtomIdx>]) -> Vec<Vec<AtomIdx>> {
413    let mut rings: Vec<Vec<AtomIdx>> = sssr_rings.to_vec();
414
415    // Track which atom-sets we already have (as sorted atom lists).
416    let mut known: FxHashSet<Vec<AtomIdx>> = sssr_rings
417        .iter()
418        .map(|r| {
419            let mut s = r.clone();
420            s.sort();
421            s
422        })
423        .collect();
424
425    // Iterative pairwise XOR until convergence.
426    //
427    // A single pass only finds rings that are the XOR of two SSSR rings.
428    // Iterating also finds rings that require XOR of 3+ SSSR rings
429    // (e.g. the inner hexagon of coronene, or sub-rings in multi-step
430    // fused PAHs where the SSSR chose large perimeter cycles).
431    // Termination is guaranteed because each new ring is strictly smaller
432    // than both of its parents, so ring size can only decrease.
433    loop {
434        let mut changed = false;
435        let n = rings.len();
436        let bond_sets: Vec<Vec<BondIdx>> = rings.iter().map(|r| ring_bond_set(mol, r)).collect();
437
438        for i in 0..n {
439            for j in (i + 1)..n {
440                // Only consider pairs that share atoms (fused rings).
441                let shares_atom = rings[i].iter().any(|a| rings[j].contains(a));
442                if !shares_atom {
443                    continue;
444                }
445                let xor_bonds = bond_sym_diff(&bond_sets[i], &bond_sets[j]);
446                if xor_bonds.is_empty() {
447                    continue;
448                }
449                // Only interesting if the XOR ring is not larger than the larger
450                // parent.  Using max() recovers cases where SSSR chose a large
451                // cycle (e.g. 10-ring macro vs 6-ring benzene twin).
452                // Using `>` (not `>=`) also allows same-size XOR rings, which
453                // handles bridged bicyclics (e.g. tropane or dioxolane spirocycles)
454                // where both parent rings are 6-membered and the missing bridge
455                // ring is also 6-membered.  Termination is still guaranteed:
456                // the `known` set prevents duplicates, and a finite molecule has
457                // finitely many valid cycles.
458                if xor_bonds.len() > rings[i].len().max(rings[j].len()) {
459                    continue;
460                }
461                if let Some(new_ring) = ring_atoms_from_bond_set(mol, &xor_bonds) {
462                    let mut key = new_ring.clone();
463                    key.sort();
464                    if known.insert(key) {
465                        rings.push(new_ring);
466                        changed = true;
467                    }
468                }
469            }
470        }
471
472        // 3-ring XOR: catches small rings that require XOR of 3 SSSR rings
473        // when no intermediate 2-ring XOR produces a valid smaller ring.
474        for i in 0..n {
475            for j in (i + 1)..n {
476                let shares_ij = rings[i].iter().any(|a| rings[j].contains(a));
477                if !shares_ij {
478                    continue;
479                }
480                let xor_ij = bond_sym_diff(&bond_sets[i], &bond_sets[j]);
481                if xor_ij.is_empty() {
482                    continue;
483                }
484                for k in (j + 1)..n {
485                    let shares_k = rings[k]
486                        .iter()
487                        .any(|a| rings[i].contains(a) || rings[j].contains(a));
488                    if !shares_k {
489                        continue;
490                    }
491                    let xor_ijk = bond_sym_diff(&xor_ij, &bond_sets[k]);
492                    let max_size = rings[i].len().max(rings[j].len()).max(rings[k].len());
493                    if xor_ijk.is_empty() || xor_ijk.len() > max_size {
494                        continue;
495                    }
496                    if let Some(new_ring) = ring_atoms_from_bond_set(mol, &xor_ijk) {
497                        let mut key = new_ring.clone();
498                        key.sort();
499                        if known.insert(key) {
500                            rings.push(new_ring);
501                            changed = true;
502                        }
503                    }
504                }
505            }
506        }
507
508        if !changed {
509            break;
510        }
511    }
512
513    rings
514}
515
516/// Shared inner: SSSR → augmented_ring_set → strip_envelope_rings, no aromaticity filter.
517fn all_ring_list_inner(mol: &Molecule) -> Vec<Vec<AtomIdx>> {
518    let sssr = crate::sssr::find_sssr(mol);
519    let aug = augmented_ring_set(mol, sssr.rings());
520    if aug.len() <= 1 {
521        return aug;
522    }
523    let bond_sets: Vec<Vec<BondIdx>> = aug.iter().map(|r| ring_bond_set(mol, r)).collect();
524    let mut is_envelope = vec![false; aug.len()];
525    strip_envelope_rings(&aug, &bond_sets, &mut is_envelope);
526    aug.into_iter()
527        .zip(is_envelope)
528        .filter(|(_, e)| !e)
529        .map(|(r, _)| r)
530        .collect()
531}
532
533/// Return all rings after augmented-ring-set expansion and envelope stripping.
534///
535/// Same pipeline as [`aromatic_ring_list`] but with no aromaticity filter — useful
536/// for aliphatic/saturated ring counting and bridgehead detection where SSSR
537/// envelope rings cause over-counting.
538pub fn all_ring_list(mol: &Molecule) -> Vec<Vec<AtomIdx>> {
539    all_ring_list_inner(mol)
540}
541
542/// True when all ring bonds between ring atoms are `BondOrder::Aromatic`.
543///
544/// Rings written with aromatic-SMILES notation but containing an explicit single
545/// bond (`c-n`, `nc-2`, etc.) are NOT truly aromatic.  RDKit canonicalises such
546/// SMILES with lowercase atoms and a `-` bond, which the parser stores as
547/// `BondOrder::Single` between two aromatic-flagged atoms.  Returning `false`
548/// here lets callers exclude them from the aromatic ring count.
549pub fn ring_bonds_all_aromatic(mol: &Molecule, ring: &[AtomIdx]) -> bool {
550    let n = ring.len();
551    (0..n).all(|i| {
552        let a = ring[i];
553        let b = ring[(i + 1) % n];
554        mol.bond_between(a, b)
555            .map(|(bidx, _)| mol.bond(bidx).order == BondOrder::Aromatic)
556            .unwrap_or(true)
557    })
558}
559
560/// Return the de-duplicated list of aromatic rings after augmented-ring-set expansion
561/// and envelope stripping.  Useful for filtering (e.g. counting only aromatic heterocycles).
562pub fn aromatic_ring_list(mol: &Molecule) -> Vec<Vec<AtomIdx>> {
563    let mol_with_arom;
564    let mol = if mol.atoms().any(|(_, a)| a.aromatic) {
565        mol
566    } else {
567        mol_with_arom = apply_aromaticity(mol);
568        &mol_with_arom
569    };
570    all_ring_list_inner(mol)
571        .into_iter()
572        .filter(|ring| {
573            ring.iter().all(|&idx| mol.atom(idx).aromatic) && ring_bonds_all_aromatic(mol, ring)
574        })
575        .collect()
576}
577
578/// Mark which rings in `aromatic` are GF(2) sums (bond-XOR) of 2–4 smaller rings.
579fn strip_envelope_rings(
580    aromatic: &[Vec<AtomIdx>],
581    bond_sets: &[Vec<BondIdx>],
582    is_envelope: &mut [bool],
583) {
584    let n = aromatic.len();
585    for i in 0..n {
586        let si = aromatic[i].len();
587        'jk: for j in 0..n {
588            if j == i || aromatic[j].len() >= si {
589                continue;
590            }
591            for k in (j + 1)..n {
592                if k == i || aromatic[k].len() >= si {
593                    continue;
594                }
595                if bond_sym_diff(&bond_sets[j], &bond_sets[k]) == bond_sets[i] {
596                    is_envelope[i] = true;
597                    break 'jk;
598                }
599            }
600        }
601        if !is_envelope[i] {
602            'jkl: for j in 0..n {
603                if j == i || aromatic[j].len() >= si {
604                    continue;
605                }
606                for k in (j + 1)..n {
607                    if k == i || aromatic[k].len() >= si {
608                        continue;
609                    }
610                    let xor_jk = bond_sym_diff(&bond_sets[j], &bond_sets[k]);
611                    for l in (k + 1)..n {
612                        if l == i || aromatic[l].len() >= si {
613                            continue;
614                        }
615                        if bond_sym_diff(&xor_jk, &bond_sets[l]) == bond_sets[i] {
616                            is_envelope[i] = true;
617                            break 'jkl;
618                        }
619                    }
620                }
621            }
622        }
623        if !is_envelope[i] {
624            'jklm: for j in 0..n {
625                if j == i || aromatic[j].len() >= si {
626                    continue;
627                }
628                for k in (j + 1)..n {
629                    if k == i || aromatic[k].len() >= si {
630                        continue;
631                    }
632                    let xor_jk = bond_sym_diff(&bond_sets[j], &bond_sets[k]);
633                    for l in (k + 1)..n {
634                        if l == i || aromatic[l].len() >= si {
635                            continue;
636                        }
637                        let xor_jkl = bond_sym_diff(&xor_jk, &bond_sets[l]);
638                        for m in (l + 1)..n {
639                            if m == i || aromatic[m].len() >= si {
640                                continue;
641                            }
642                            if bond_sym_diff(&xor_jkl, &bond_sets[m]) == bond_sets[i] {
643                                is_envelope[i] = true;
644                                break 'jklm;
645                            }
646                        }
647                    }
648                }
649            }
650        }
651    }
652}
653
654pub fn count_aromatic_rings(mol: &Molecule) -> usize {
655    // For Kekulé-form input (uppercase atoms, no aromatic flags yet), run Hückel
656    // perception first so ring detection works correctly (RDKit #9271).
657    let mol_with_arom;
658    let mol = if mol.atoms().any(|(_, a)| a.aromatic) {
659        mol // aromatic SMILES — flags already set during parsing
660    } else {
661        mol_with_arom = apply_aromaticity(mol);
662        &mol_with_arom
663    };
664
665    let sssr = crate::sssr::find_sssr(mol);
666    let aug = augmented_ring_set(mol, sssr.rings());
667
668    // Keep only rings where every atom carries the aromatic flag.
669    let aromatic: Vec<Vec<AtomIdx>> = aug
670        .into_iter()
671        .filter(|ring| ring.iter().all(|&idx| mol.atom(idx).aromatic))
672        .collect();
673
674    if aromatic.len() <= 1 {
675        return aromatic.len();
676    }
677
678    // Build sorted bond-index sets for each aromatic ring.
679    let bond_sets: Vec<Vec<BondIdx>> = aromatic.iter().map(|r| ring_bond_set(mol, r)).collect();
680
681    // Mark rings that are the GF(2) sum (bond-XOR) of 2, 3, or 4 strictly
682    // smaller aromatic rings.  Such rings are "envelope" cycles introduced
683    // when the SSSR chose a large fundamental cycle instead of its smaller
684    // GF(2) components.
685    // 2-ring XOR: handles linear/angular fused systems (naphthalene, indolizine…).
686    // 3-ring XOR: handles compact PAHs like pyrene.
687    // 4-ring XOR: handles coronene-class PAHs where the outer perimeter is the
688    //   GF(2) sum of four inner hexagons.
689    let n = aromatic.len();
690    let mut is_envelope = vec![false; n];
691    strip_envelope_rings(&aromatic, &bond_sets, &mut is_envelope);
692    is_envelope.iter().filter(|&&e| !e).count()
693}
694
695// ---------------------------------------------------------------------------
696// Per-ring pi electron count
697// ---------------------------------------------------------------------------
698
699/// Count pi electrons for a ring atom, returning `None` if the atom is
700/// incompatible with aromaticity (e.g. sp3 carbon).
701///
702/// `aromatic_context`: atoms already confirmed aromatic (from Pass 1 or a
703/// previous Pass 2 iteration).  Such atoms contribute 1π unconditionally,
704/// without requiring an explicit double bond.
705///
706/// Rules:
707/// - **C**: `has_double_any` (Double or Aromatic bond anywhere) → 1π, else None.
708///   If already in `aromatic_context` → 1π (confirmed sp2).
709/// - **N**:
710///   1. Has H → 2π (pyrrole-type lone pair).
711///   2. Has an explicit `Double` bond → 1π (pyridine-type).
712///   3. Bridgehead N: total_degree == 3 AND ring_degree < total_degree AND no
713///      explicit double bond → 2π (lone pair in p orbital, like indolizine N).
714///   4. Has in-ring `Aromatic` bond → 1π (pyridine-like aromatic N).
715///   5. Already in `aromatic_context` → 1π.
716///   6. Otherwise → None.
717/// - **O/S**: ring_degree must be 2; contributes 2π (lone pair).
718/// - **Se (34) / Te (52)**: analogous to S; only in [`AromaticityAlgorithm::RdkitLike`] mode.
719/// - **Other elements**: None (unsupported).
720fn ring_pi_electrons(
721    mol: &Molecule,
722    ring: &[AtomIdx],
723    aromatic_context: &FxHashSet<AtomIdx>,
724    algo: AromaticityAlgorithm,
725) -> Option<u32> {
726    let ring_atom_set: FxHashSet<AtomIdx> = ring.iter().copied().collect();
727    let mut total_pi: u32 = 0;
728
729    for &atom_idx in ring {
730        // Atoms already confirmed aromatic in an adjacent ring contribute 1π.
731        if aromatic_context.contains(&atom_idx) {
732            total_pi += 1;
733            continue;
734        }
735
736        let atom = mol.atom(atom_idx);
737        let an = atom.element.atomic_number();
738
739        let ring_degree = mol
740            .neighbors(atom_idx)
741            .filter(|(nb, _)| ring_atom_set.contains(nb))
742            .count();
743
744        let total_degree = mol.degree(atom_idx);
745
746        // Explicit Double bond anywhere (not counting Aromatic).
747        let has_explicit_double = mol
748            .neighbors(atom_idx)
749            .any(|(_, bidx)| mol.bond(bidx).order == BondOrder::Double);
750
751        // Double OR Aromatic bond anywhere (for C sp2 check).
752        let has_double_any = has_explicit_double
753            || mol
754                .neighbors(atom_idx)
755                .any(|(_, bidx)| mol.bond(bidx).order == BondOrder::Aromatic);
756
757        // Aromatic bond within the ring (for pyridine-like N in aromatic SMILES).
758        let has_aromatic_in_ring = mol
759            .neighbors(atom_idx)
760            .filter(|(nb, _)| ring_atom_set.contains(nb))
761            .any(|(_, bidx)| mol.bond(bidx).order == BondOrder::Aromatic);
762
763        let pi = match an {
764            // Carbon: must be sp2 (has a double or aromatic bond somewhere).
765            6 => {
766                if !has_double_any {
767                    return None; // sp3 carbon — ring cannot be aromatic
768                }
769                1
770            }
771
772            // Nitrogen
773            7 => {
774                if implicit_hcount(mol, atom_idx) > 0 {
775                    // Pyrrole-type N with H: lone pair → 2π.
776                    2
777                } else if has_explicit_double {
778                    // Pyridine-type N with explicit double bond → 1π.
779                    1
780                } else if total_degree == 3 && ring_degree < total_degree {
781                    // Bridgehead N (e.g. indolizine): no H, no explicit double bond,
782                    // and all three σ-bonds exactly fill the N valence (3).
783                    // The lone pair occupies the p orbital → 2π (pyrrole-analogue).
784                    //
785                    // Guard: the exocyclic bond must lead to an sp2/aromatic neighbour
786                    // (another fused ring atom). This prevents imide N (phthalimide)
787                    // from triggering here — phthalimide N's exocyclic bond goes to an
788                    // alkyl chain with no double/aromatic bonds.
789                    let has_sp2_exocyclic = mol
790                        .neighbors(atom_idx)
791                        .filter(|(nb, _)| !ring_atom_set.contains(nb))
792                        .any(|(nb, _)| {
793                            mol.neighbors(nb).any(|(_, b2)| {
794                                matches!(
795                                    mol.bond(b2).order,
796                                    BondOrder::Double | BondOrder::Aromatic
797                                )
798                            })
799                        });
800                    if has_sp2_exocyclic {
801                        2
802                    } else {
803                        return None;
804                    }
805                } else if has_aromatic_in_ring {
806                    // N in an aromatic ring (pre-kekulization input) without an
807                    // explicit double bond and not a bridgehead → pyridine-like → 1π.
808                    1
809                } else {
810                    // Cannot determine pi contribution.
811                    return None;
812                }
813            }
814
815            // Oxygen / sulfur: lone-pair donor, must be 2-connected in the ring.
816            8 | 16 => {
817                if ring_degree != 2 {
818                    return None;
819                }
820                // Sulfoxide/sulfone: exocyclic S=O ties up the lone pair; cannot donate 2π
821                if an == 16
822                    && mol.neighbors(atom_idx).any(|(nb, bidx)| {
823                        !ring_atom_set.contains(&nb) && mol.bond(bidx).order == BondOrder::Double
824                    })
825                {
826                    return None;
827                }
828                2
829            }
830
831            // Se (34) / Te (52): chalcogen lone-pair donors (2π), analogous to S.
832            // Only recognised in RdkitLike mode.
833            34 | 52 => {
834                if algo != AromaticityAlgorithm::RdkitLike {
835                    return None;
836                }
837                if ring_degree != 2 {
838                    return None;
839                }
840                // Exocyclic Se=O / Te=O ties up the lone pair.
841                if mol.neighbors(atom_idx).any(|(nb, bidx)| {
842                    !ring_atom_set.contains(&nb) && mol.bond(bidx).order == BondOrder::Double
843                }) {
844                    return None;
845                }
846                2
847            }
848
849            // Unsupported element.
850            _ => return None,
851        };
852
853        total_pi += pi;
854    }
855
856    Some(total_pi)
857}
858
859// ---------------------------------------------------------------------------
860// Tests
861// ---------------------------------------------------------------------------
862
863#[cfg(test)]
864mod tests {
865    use super::*;
866    use chematic_core::{Atom, BondOrder, Element, MoleculeBuilder};
867
868    // =========================================================================
869    // Molecule builder helpers (kekulized, manually constructed)
870    // =========================================================================
871
872    fn benzene_kekule() -> chematic_core::Molecule {
873        let mut b = MoleculeBuilder::new();
874        let atoms: Vec<_> = (0..6).map(|_| b.add_atom(Atom::new(Element::C))).collect();
875        for i in 0..6 {
876            let order = if i % 2 == 0 {
877                BondOrder::Double
878            } else {
879                BondOrder::Single
880            };
881            b.add_bond(atoms[i], atoms[(i + 1) % 6], order).unwrap();
882        }
883        b.build()
884    }
885
886    fn cyclohexane() -> chematic_core::Molecule {
887        let mut b = MoleculeBuilder::new();
888        let atoms: Vec<_> = (0..6).map(|_| b.add_atom(Atom::new(Element::C))).collect();
889        for i in 0..6 {
890            b.add_bond(atoms[i], atoms[(i + 1) % 6], BondOrder::Single)
891                .unwrap();
892        }
893        b.build()
894    }
895
896    fn pyridine_kekule() -> chematic_core::Molecule {
897        let mut b = MoleculeBuilder::new();
898        let n = b.add_atom(Atom::new(Element::N));
899        let atoms_c: Vec<_> = (0..5).map(|_| b.add_atom(Atom::new(Element::C))).collect();
900        let ring = [
901            n, atoms_c[0], atoms_c[1], atoms_c[2], atoms_c[3], atoms_c[4],
902        ];
903        for i in 0..6 {
904            let order = if i % 2 == 0 {
905                BondOrder::Double
906            } else {
907                BondOrder::Single
908            };
909            b.add_bond(ring[i], ring[(i + 1) % 6], order).unwrap();
910        }
911        b.build()
912    }
913
914    fn furan_kekule() -> chematic_core::Molecule {
915        let mut b = MoleculeBuilder::new();
916        let o = b.add_atom(Atom::new(Element::O));
917        let c1 = b.add_atom(Atom::new(Element::C));
918        let c2 = b.add_atom(Atom::new(Element::C));
919        let c3 = b.add_atom(Atom::new(Element::C));
920        let c4 = b.add_atom(Atom::new(Element::C));
921        let ring = [o, c1, c2, c3, c4];
922        b.add_bond(ring[0], ring[1], BondOrder::Single).unwrap();
923        b.add_bond(ring[1], ring[2], BondOrder::Double).unwrap();
924        b.add_bond(ring[2], ring[3], BondOrder::Single).unwrap();
925        b.add_bond(ring[3], ring[4], BondOrder::Double).unwrap();
926        b.add_bond(ring[4], ring[0], BondOrder::Single).unwrap();
927        b.build()
928    }
929
930    fn pyrrole_kekule() -> chematic_core::Molecule {
931        let mut b = MoleculeBuilder::new();
932        let mut n_atom = Atom::new(Element::N);
933        n_atom.hydrogen_count = Some(1);
934        let n = b.add_atom(n_atom);
935        let c1 = b.add_atom(Atom::new(Element::C));
936        let c2 = b.add_atom(Atom::new(Element::C));
937        let c3 = b.add_atom(Atom::new(Element::C));
938        let c4 = b.add_atom(Atom::new(Element::C));
939        let ring = [n, c1, c2, c3, c4];
940        b.add_bond(ring[0], ring[1], BondOrder::Single).unwrap();
941        b.add_bond(ring[1], ring[2], BondOrder::Double).unwrap();
942        b.add_bond(ring[2], ring[3], BondOrder::Single).unwrap();
943        b.add_bond(ring[3], ring[4], BondOrder::Double).unwrap();
944        b.add_bond(ring[4], ring[0], BondOrder::Single).unwrap();
945        b.build()
946    }
947
948    fn naphthalene_kekule() -> chematic_core::Molecule {
949        let mut b = MoleculeBuilder::new();
950        let atoms: Vec<_> = (0..10).map(|_| b.add_atom(Atom::new(Element::C))).collect();
951        let ring1 = [0usize, 1, 2, 3, 4, 9];
952        let orders1 = [
953            BondOrder::Double,
954            BondOrder::Single,
955            BondOrder::Double,
956            BondOrder::Single,
957            BondOrder::Double,
958            BondOrder::Single,
959        ];
960        for i in 0..6 {
961            b.add_bond(atoms[ring1[i]], atoms[ring1[(i + 1) % 6]], orders1[i])
962                .unwrap();
963        }
964        let ring2_extra = [(4, 5), (5, 6), (6, 7), (7, 8), (8, 9)];
965        let orders2 = [
966            BondOrder::Single,
967            BondOrder::Double,
968            BondOrder::Single,
969            BondOrder::Double,
970            BondOrder::Single,
971        ];
972        for (i, &(a, bb)) in ring2_extra.iter().enumerate() {
973            b.add_bond(atoms[a], atoms[bb], orders2[i]).unwrap();
974        }
975        b.build()
976    }
977
978    fn cyclobutadiene_kekule() -> chematic_core::Molecule {
979        let mut b = MoleculeBuilder::new();
980        let atoms: Vec<_> = (0..4).map(|_| b.add_atom(Atom::new(Element::C))).collect();
981        for i in 0..4 {
982            let order = if i % 2 == 0 {
983                BondOrder::Double
984            } else {
985                BondOrder::Single
986            };
987            b.add_bond(atoms[i], atoms[(i + 1) % 4], order).unwrap();
988        }
989        b.build()
990    }
991
992    fn cyclooctatetraene_kekule() -> chematic_core::Molecule {
993        let mut b = MoleculeBuilder::new();
994        let atoms: Vec<_> = (0..8).map(|_| b.add_atom(Atom::new(Element::C))).collect();
995        for i in 0..8 {
996            let order = if i % 2 == 0 {
997                BondOrder::Double
998            } else {
999                BondOrder::Single
1000            };
1001            b.add_bond(atoms[i], atoms[(i + 1) % 8], order).unwrap();
1002        }
1003        b.build()
1004    }
1005
1006    /// Helper: parse an aromatic SMILES and return the molecule with aromatic bonds
1007    /// (no kekulization).  Use for compounds where kekulization is unsupported.
1008    #[cfg(test)]
1009    fn mol_aromatic(smiles: &str) -> chematic_core::Molecule {
1010        chematic_smiles::parse(smiles).expect("valid SMILES")
1011    }
1012
1013    /// Helper: parse SMILES and kekulize.  Panics if kekulization fails.
1014    #[cfg(test)]
1015    fn mol_kekulized(smiles: &str) -> chematic_core::Molecule {
1016        let mol = chematic_smiles::parse(smiles).expect("valid SMILES");
1017        let k = chematic_core::kekulize(&mol).expect("kekulizable");
1018        chematic_core::apply_kekule(&mol, &k)
1019    }
1020
1021    // =========================================================================
1022    // Regression: kekulized single-ring aromatics (Pass 1 only, no context)
1023    // =========================================================================
1024
1025    #[test]
1026    fn test_benzene_is_aromatic() {
1027        let mol = benzene_kekule();
1028        let model = assign_aromaticity(&mol);
1029        assert_eq!(
1030            model.aromatic_atom_count(),
1031            6,
1032            "all 6 benzene atoms aromatic"
1033        );
1034        for i in 0..6u32 {
1035            assert!(model.is_atom_aromatic(AtomIdx(i)));
1036        }
1037    }
1038
1039    #[test]
1040    fn test_cyclohexane_not_aromatic() {
1041        let mol = cyclohexane();
1042        let model = assign_aromaticity(&mol);
1043        assert_eq!(model.aromatic_atom_count(), 0, "cyclohexane not aromatic");
1044    }
1045
1046    #[test]
1047    fn test_pyridine_is_aromatic() {
1048        let mol = pyridine_kekule();
1049        let model = assign_aromaticity(&mol);
1050        assert_eq!(model.aromatic_atom_count(), 6);
1051    }
1052
1053    #[test]
1054    fn test_furan_is_aromatic() {
1055        let mol = furan_kekule();
1056        let model = assign_aromaticity(&mol);
1057        assert_eq!(model.aromatic_atom_count(), 5);
1058    }
1059
1060    #[test]
1061    fn test_pyrrole_is_aromatic() {
1062        let mol = pyrrole_kekule();
1063        let model = assign_aromaticity(&mol);
1064        assert_eq!(model.aromatic_atom_count(), 5);
1065    }
1066
1067    #[test]
1068    fn test_naphthalene_both_rings_aromatic() {
1069        let mol = naphthalene_kekule();
1070        let model = assign_aromaticity(&mol);
1071        assert_eq!(
1072            model.aromatic_atom_count(),
1073            10,
1074            "all 10 naphthalene atoms aromatic"
1075        );
1076    }
1077
1078    #[test]
1079    fn test_bond_aromaticity_benzene() {
1080        let mol = benzene_kekule();
1081        let model = assign_aromaticity(&mol);
1082        let count = mol
1083            .bonds()
1084            .filter(|(b, _)| model.is_bond_aromatic(*b))
1085            .count();
1086        assert_eq!(count, 6);
1087    }
1088
1089    #[test]
1090    fn test_apply_aromaticity_benzene() {
1091        let mol = benzene_kekule();
1092        let aromatic = apply_aromaticity(&mol);
1093        for (_, atom) in aromatic.atoms() {
1094            assert!(atom.aromatic, "every benzene carbon should be aromatic");
1095        }
1096        let aromatic_bond_count = aromatic
1097            .bonds()
1098            .filter(|(_, b)| b.order == BondOrder::Aromatic)
1099            .count();
1100        assert_eq!(aromatic_bond_count, 6);
1101    }
1102
1103    #[test]
1104    fn test_apply_aromaticity_cyclohexane_unchanged() {
1105        let mol = cyclohexane();
1106        let result = apply_aromaticity(&mol);
1107        for (_, atom) in result.atoms() {
1108            assert!(!atom.aromatic);
1109        }
1110        for (_, bond) in result.bonds() {
1111            assert_ne!(bond.order, BondOrder::Aromatic);
1112        }
1113    }
1114
1115    // =========================================================================
1116    // Antiaromaticity
1117    // =========================================================================
1118
1119    #[test]
1120    fn test_cyclobutadiene_antiaromatic() {
1121        let mol = cyclobutadiene_kekule();
1122        let model = assign_aromaticity(&mol);
1123        assert_eq!(
1124            model.aromatic_atom_count(),
1125            0,
1126            "cyclobutadiene not aromatic"
1127        );
1128        assert!(model.has_antiaromaticity(), "cyclobutadiene antiaromatic");
1129        assert_eq!(model.antiaromatic_rings().len(), 1);
1130        let classifications = model.ring_classifications();
1131        assert_eq!(classifications.len(), 1);
1132        assert_eq!(classifications[0].1, RingAromaticity::Antiaromatic);
1133        assert_eq!(classifications[0].2, 4);
1134    }
1135
1136    #[test]
1137    fn test_cyclooctatetraene_antiaromatic() {
1138        let mol = cyclooctatetraene_kekule();
1139        let model = assign_aromaticity(&mol);
1140        assert_eq!(model.aromatic_atom_count(), 0, "COT not aromatic");
1141        assert!(model.has_antiaromaticity(), "COT antiaromatic");
1142        assert_eq!(model.antiaromatic_rings().len(), 1);
1143        let cls = &model.ring_classifications()[0];
1144        assert_eq!(cls.1, RingAromaticity::Antiaromatic);
1145        assert_eq!(cls.2, 8);
1146    }
1147
1148    // =========================================================================
1149    // Ring classifications
1150    // =========================================================================
1151
1152    #[test]
1153    fn test_ring_classifications_benzene() {
1154        let mol = benzene_kekule();
1155        let model = assign_aromaticity(&mol);
1156        let classifications = model.ring_classifications();
1157        assert_eq!(classifications.len(), 1);
1158        assert_eq!(classifications[0].1, RingAromaticity::Aromatic);
1159        assert_eq!(classifications[0].2, 6);
1160    }
1161
1162    #[test]
1163    fn test_ring_classifications_naphthalene() {
1164        let mol = naphthalene_kekule();
1165        let model = assign_aromaticity(&mol);
1166        let classifications = model.ring_classifications();
1167        assert_eq!(classifications.len(), 2, "naphthalene has two rings");
1168        for (_, classification, count) in classifications {
1169            assert_eq!(*classification, RingAromaticity::Aromatic);
1170            assert_eq!(*count, 6);
1171        }
1172    }
1173
1174    #[test]
1175    fn test_non_aromatic_cyclohexane() {
1176        let mol = cyclohexane();
1177        let model = assign_aromaticity(&mol);
1178        for (_, classification, _) in model.ring_classifications() {
1179            assert_ne!(*classification, RingAromaticity::Aromatic);
1180            assert_ne!(*classification, RingAromaticity::Antiaromatic);
1181        }
1182    }
1183
1184    // =========================================================================
1185    // Electron distribution
1186    // =========================================================================
1187
1188    #[test]
1189    fn test_thiophene_aromatic() {
1190        let mut b = MoleculeBuilder::new();
1191        let s = b.add_atom(Atom::new(Element::S));
1192        let c1 = b.add_atom(Atom::new(Element::C));
1193        let c2 = b.add_atom(Atom::new(Element::C));
1194        let c3 = b.add_atom(Atom::new(Element::C));
1195        let c4 = b.add_atom(Atom::new(Element::C));
1196        let ring = [s, c1, c2, c3, c4];
1197        b.add_bond(ring[0], ring[1], BondOrder::Single).unwrap();
1198        b.add_bond(ring[1], ring[2], BondOrder::Double).unwrap();
1199        b.add_bond(ring[2], ring[3], BondOrder::Single).unwrap();
1200        b.add_bond(ring[3], ring[4], BondOrder::Double).unwrap();
1201        b.add_bond(ring[4], ring[0], BondOrder::Single).unwrap();
1202        let mol = b.build();
1203        let model = assign_aromaticity(&mol);
1204        assert_eq!(model.aromatic_atom_count(), 5);
1205        assert_eq!(model.ring_classifications()[0].2, 6);
1206    }
1207
1208    #[test]
1209    fn test_electron_distribution_tracking() {
1210        let mol = benzene_kekule();
1211        let model = assign_aromaticity(&mol);
1212        assert_eq!(model.ring_classifications()[0].2, 6, "benzene: 6 × 1π = 6");
1213
1214        let mol = pyrrole_kekule();
1215        let model = assign_aromaticity(&mol);
1216        assert_eq!(
1217            model.ring_classifications()[0].2,
1218            6,
1219            "pyrrole: N(2π) + 4C(1π) = 6"
1220        );
1221
1222        let mol = furan_kekule();
1223        let model = assign_aromaticity(&mol);
1224        assert_eq!(
1225            model.ring_classifications()[0].2,
1226            6,
1227            "furan: O(2π) + 4C(1π) = 6"
1228        );
1229    }
1230
1231    // =========================================================================
1232    // Aromatic-SMILES input (BondOrder::Aromatic, no kekulization)
1233    // Verifies that assign_aromaticity works on pre-kekulization molecules.
1234    // =========================================================================
1235
1236    #[test]
1237    fn test_benzene_aromatic_smiles() {
1238        // c1ccccc1 — parsed with BondOrder::Aromatic bonds
1239        let mol = mol_aromatic("c1ccccc1");
1240        let model = assign_aromaticity(&mol);
1241        assert_eq!(
1242            model.aromatic_atom_count(),
1243            6,
1244            "benzene from aromatic SMILES"
1245        );
1246    }
1247
1248    #[test]
1249    fn test_naphthalene_aromatic_smiles() {
1250        let mol = mol_aromatic("c1ccc2ccccc2c1");
1251        let model = assign_aromaticity(&mol);
1252        assert_eq!(
1253            model.aromatic_atom_count(),
1254            10,
1255            "naphthalene from aromatic SMILES"
1256        );
1257    }
1258
1259    #[test]
1260    fn test_pyridine_aromatic_smiles() {
1261        let mol = mol_aromatic("c1ccncc1");
1262        let model = assign_aromaticity(&mol);
1263        assert_eq!(
1264            model.aromatic_atom_count(),
1265            6,
1266            "pyridine from aromatic SMILES"
1267        );
1268    }
1269
1270    #[test]
1271    fn test_furan_aromatic_smiles() {
1272        let mol = mol_aromatic("c1ccoc1");
1273        let model = assign_aromaticity(&mol);
1274        assert_eq!(model.aromatic_atom_count(), 5, "furan from aromatic SMILES");
1275    }
1276
1277    #[test]
1278    fn test_pyrrole_aromatic_smiles() {
1279        // [nH] bracket atom: hydrogen_count = Some(1)
1280        let mol = mol_aromatic("c1cc[nH]c1");
1281        let model = assign_aromaticity(&mol);
1282        assert_eq!(
1283            model.aromatic_atom_count(),
1284            5,
1285            "pyrrole from aromatic SMILES"
1286        );
1287    }
1288
1289    #[test]
1290    fn test_thiophene_aromatic_smiles() {
1291        let mol = mol_aromatic("c1ccsc1");
1292        let model = assign_aromaticity(&mol);
1293        assert_eq!(
1294            model.aromatic_atom_count(),
1295            5,
1296            "thiophene from aromatic SMILES"
1297        );
1298    }
1299
1300    // =========================================================================
1301    // Fused-ring kekulized systems (Pass 2 propagation)
1302    // =========================================================================
1303
1304    #[test]
1305    fn test_indole_aromatic() {
1306        // c1ccc2[nH]ccc2c1 — indole (9 atoms, 5-ring + 6-ring fused)
1307        let mol = mol_kekulized("c1ccc2[nH]ccc2c1");
1308        let model = assign_aromaticity(&mol);
1309        assert_eq!(
1310            model.aromatic_atom_count(),
1311            9,
1312            "all 9 indole atoms aromatic"
1313        );
1314    }
1315
1316    #[test]
1317    fn test_benzimidazole_aromatic() {
1318        // Two N atoms in fused 5+6 ring system
1319        let mol = mol_kekulized("c1ccc2[nH]cnc2c1");
1320        let model = assign_aromaticity(&mol);
1321        assert_eq!(model.aromatic_atom_count(), 9, "all 9 benzimidazole atoms");
1322    }
1323
1324    #[test]
1325    fn test_quinoline_aromatic() {
1326        let mol = mol_kekulized("c1ccc2ncccc2c1");
1327        let model = assign_aromaticity(&mol);
1328        assert_eq!(model.aromatic_atom_count(), 10, "all 10 quinoline atoms");
1329    }
1330
1331    #[test]
1332    fn test_acridine_aromatic() {
1333        // 3 fused 6-membered rings, central N: 13 atoms
1334        let mol = mol_kekulized("c1ccc2nc3ccccc3cc2c1");
1335        let model = assign_aromaticity(&mol);
1336        // acridine is C13H9N → 14 heavy atoms (13 C + 1 N), all aromatic
1337        assert_eq!(model.aromatic_atom_count(), 14, "all 14 acridine atoms");
1338    }
1339
1340    // =========================================================================
1341    // Fused-ring aromatic-SMILES input (BondOrder::Aromatic, kekulize fails)
1342    // =========================================================================
1343
1344    #[test]
1345    fn test_indolizine_aromatic() {
1346        // c1ccn2cccc2c1 — indolizine: bridgehead N, kekulization unsupported.
1347        // The SSSR finds a 6-ring and a 9-ring; the 5-ring is recovered via
1348        // augmentation (XOR of 6- and 9-ring).
1349        // Pass 1: 5-ring (augmented) detected via bridgehead-N rule → 6π.
1350        // Pass 2: 6-ring detected using N already aromatic from 5-ring → 6π.
1351        // The 9-ring (SSSR artifact) is NonAromatic (9π ≠ 4n+2), but all
1352        // 9 atoms are correctly flagged aromatic via the 5- and 6-ring.
1353        let mol = mol_aromatic("c1ccn2cccc2c1");
1354        let model = assign_aromaticity(&mol);
1355        assert_eq!(
1356            model.aromatic_atom_count(),
1357            9,
1358            "all 9 indolizine atoms aromatic"
1359        );
1360        // At least the 6-ring should be classified as Aromatic in the SSSR set.
1361        let has_aromatic_ring = model
1362            .ring_classifications()
1363            .iter()
1364            .any(|(_, cls, _)| *cls == RingAromaticity::Aromatic);
1365        assert!(has_aromatic_ring, "at least one SSSR ring aromatic");
1366    }
1367
1368    #[test]
1369    fn test_purine_aromatic() {
1370        // c1cnc2[nH]cnc2n1 — purine: 9 atoms, kekulizable
1371        let mol = mol_kekulized("c1cnc2[nH]cnc2n1");
1372        let model = assign_aromaticity(&mol);
1373        assert_eq!(
1374            model.aromatic_atom_count(),
1375            9,
1376            "all 9 purine atoms aromatic"
1377        );
1378    }
1379
1380    #[test]
1381    fn test_purine_aromatic_from_aromatic_smiles() {
1382        let mol = mol_aromatic("c1cnc2[nH]cnc2n1");
1383        let model = assign_aromaticity(&mol);
1384        assert_eq!(
1385            model.aromatic_atom_count(),
1386            9,
1387            "purine from aromatic SMILES"
1388        );
1389    }
1390
1391    #[test]
1392    fn test_2_pyridinone_aromatic() {
1393        // O=c1ccncc1 — 2-pyridinone (aromatic SMILES, N without H, exo C=O).
1394        // Kekulization fails; tested on the aromatic-bond form directly.
1395        // The exo C=O gives the C atom has_double_any=true → 1π.
1396        // N has Aromatic bonds in ring → 1π (pyridine-like).
1397        // Total: 6 × 1π = 6π → aromatic.
1398        let mol = mol_aromatic("O=c1ccncc1");
1399        let model = assign_aromaticity(&mol);
1400        assert_eq!(
1401            model.aromatic_atom_count(),
1402            6,
1403            "all 6 ring atoms of 2-pyridinone aromatic"
1404        );
1405    }
1406
1407    #[test]
1408    fn test_quinolone_aromatic() {
1409        // O=c1ccc2ncccc2c1 — quinolone: fused 6+6 with exo C=O, kekulize fails
1410        let mol = mol_aromatic("O=c1ccc2ncccc2c1");
1411        let model = assign_aromaticity(&mol);
1412        assert_eq!(
1413            model.aromatic_atom_count(),
1414            10,
1415            "all 10 quinolone ring atoms aromatic"
1416        );
1417        assert_eq!(
1418            model.ring_classifications().len(),
1419            2,
1420            "two rings classified"
1421        );
1422    }
1423
1424    #[test]
1425    fn test_indole_aromatic_smiles() {
1426        let mol = mol_aromatic("c1ccc2[nH]ccc2c1");
1427        let model = assign_aromaticity(&mol);
1428        assert_eq!(
1429            model.aromatic_atom_count(),
1430            9,
1431            "indole from aromatic SMILES"
1432        );
1433    }
1434
1435    // =========================================================================
1436    // Bridgehead N rule: specifically test that the rule fires correctly
1437    // =========================================================================
1438
1439    #[test]
1440    fn test_bridgehead_n_contributes_lone_pair() {
1441        // Indolizine: the bridgehead N (degree 3, no H, no explicit double bond)
1442        // must be detected as a 2π contributor for the 5-membered ring.
1443        // We verify by checking the 5-ring classification (if accessible).
1444        let mol = mol_aromatic("c1ccn2cccc2c1");
1445        let model = assign_aromaticity(&mol);
1446        // All 9 atoms aromatic: both rings must be aromatic.
1447        assert_eq!(model.aromatic_atom_count(), 9);
1448        // The bridgehead N itself must be in the aromatic set.
1449        // In the SMILES c1ccn2cccc2c1, n is atom index 3.
1450        assert!(
1451            model.is_atom_aromatic(AtomIdx(3)),
1452            "bridgehead N must be aromatic"
1453        );
1454    }
1455
1456    #[test]
1457    fn test_non_bridgehead_n_no_false_positive() {
1458        // Pyrimidine: two N atoms in a 6-membered ring, no bridgehead.
1459        // Both N have ring_degree == total_degree == 2.
1460        // Should be detected as aromatic via has_aromatic_in_ring (Aromatic bonds).
1461        let mol = mol_aromatic("c1ccncn1");
1462        let model = assign_aromaticity(&mol);
1463        assert_eq!(model.aromatic_atom_count(), 6, "pyrimidine is aromatic");
1464    }
1465
1466    #[test]
1467    fn test_imidazole_aromatic() {
1468        // c1cn[nH]c1 / c1c[nH]cn1 — imidazole: one pyridine-type N, one pyrrole-type N
1469        let mol = mol_aromatic("c1cn[nH]c1");
1470        let model = assign_aromaticity(&mol);
1471        assert_eq!(model.aromatic_atom_count(), 5, "imidazole is aromatic");
1472    }
1473
1474    // =========================================================================
1475    // Pass 2 specifically: rings that need fused-ring context
1476    // =========================================================================
1477
1478    #[test]
1479    fn test_pass2_needed_for_indolizine_6ring() {
1480        // The augmented 5-ring (XOR of SSSR 6-ring and 9-ring) is detected aromatic in Pass 1.
1481        // The SSSR 6-ring is then detected aromatic in Pass 2 (N already aromatic → 1π).
1482        // The SSSR 9-ring (9π) remains NonAromatic per Hückel.
1483        // Key assertion: all 9 atoms are aromatic (correct overall perception).
1484        let mol = mol_aromatic("c1ccn2cccc2c1");
1485        let model = assign_aromaticity(&mol);
1486        assert_eq!(
1487            model.aromatic_atom_count(),
1488            9,
1489            "all 9 indolizine atoms aromatic"
1490        );
1491        // The bridgehead N must be aromatic.
1492        assert!(
1493            model.is_atom_aromatic(AtomIdx(3)),
1494            "bridgehead N is aromatic"
1495        );
1496        // The 6-ring (SSSR ring, improved by Pass 2) should be classified Aromatic.
1497        let aromatic_count = model
1498            .ring_classifications()
1499            .iter()
1500            .filter(|(_, cls, _)| *cls == RingAromaticity::Aromatic)
1501            .count();
1502        assert!(aromatic_count >= 1, "at least one SSSR ring is aromatic");
1503    }
1504
1505    #[test]
1506    fn test_no_pass2_needed_for_naphthalene() {
1507        // Naphthalene: both rings pass independently in Pass 1.
1508        // Verifies Pass 2 doesn't break things that already work.
1509        let mol = naphthalene_kekule();
1510        let model = assign_aromaticity(&mol);
1511        assert_eq!(model.aromatic_atom_count(), 10);
1512        let classes = model.ring_classifications();
1513        assert_eq!(classes.len(), 2);
1514        for (_, cls, _) in classes {
1515            assert_eq!(*cls, RingAromaticity::Aromatic);
1516        }
1517    }
1518
1519    #[test]
1520    fn test_anthracene_aromatic() {
1521        // c1ccc2cc3ccccc3cc2c1 — anthracene: 3 linearly fused 6-rings, 14 atoms
1522        let mol = mol_kekulized("c1ccc2cc3ccccc3cc2c1");
1523        let model = assign_aromaticity(&mol);
1524        assert_eq!(model.aromatic_atom_count(), 14, "all 14 anthracene atoms");
1525    }
1526
1527    // =========================================================================
1528    // Regression: aromatic-bond path must not perturb kekulized correctness
1529    // =========================================================================
1530
1531    #[test]
1532    fn test_kekulized_path_unaffected_by_aromatic_bond_changes() {
1533        // Kekulized benzene: bonds are Double/Single, not Aromatic.
1534        // The new Aromatic-bond branches must stay dormant.
1535        let mol = benzene_kekule();
1536        // Verify no aromatic bonds in input.
1537        for (_, bond) in mol.bonds() {
1538            assert_ne!(bond.order, BondOrder::Aromatic, "input must be kekulized");
1539        }
1540        let model = assign_aromaticity(&mol);
1541        assert_eq!(model.aromatic_atom_count(), 6);
1542        // All 6 bonds in benzene ring should be aromatic.
1543        let aromatic_bonds = mol
1544            .bonds()
1545            .filter(|(b, _)| model.is_bond_aromatic(*b))
1546            .count();
1547        assert_eq!(aromatic_bonds, 6);
1548    }
1549
1550    #[test]
1551    fn test_keto_pyridinone_not_huckel_aromatic() {
1552        // O=C1NC=CC=C1 — 2-pyridinone keto form with N-H.
1553        // π count: C(=O)(1π) + N-H(2π) + 3×C(1π each) + C6(1π) = 7π → not 4n+2.
1554        // This is a known scope boundary: keto pyridinone has partial aromatic
1555        // character by resonance but is NOT Hückel 4n+2 aromatic.
1556        // RDKit classifies it aromatic using an extended model; our strict Hückel
1557        // implementation correctly returns 0 aromatic atoms.
1558        let mol = mol_kekulized("O=C1NC=CC=C1");
1559        let model = assign_aromaticity(&mol);
1560        assert_eq!(
1561            model.aromatic_atom_count(),
1562            0,
1563            "keto pyridinone is not Hückel aromatic (7π ≠ 4n+2)"
1564        );
1565    }
1566
1567    // ── RDKit #9271: charged / zwitterionic aromatic systems ─────────────────
1568
1569    #[test]
1570    fn test_fluorescein_dianion_aromatic() {
1571        // Fluorescein dianion: RDKit #9271 incorrectly marked xanthene bonds as
1572        // single instead of aromatic. Verify chematic parses and identifies
1573        // aromatic atoms correctly (two benzene rings + xanthene O-bridge ring).
1574        // Kekulé-form SMILES: all atoms uppercase.
1575        let smi = "C1=CC=C(C(=C1)C2=C3C=CC(=O)C=C3OC4=C2C=CC(=C4)[O-])C(=O)[O-]";
1576        let mol = chematic_smiles::parse(smi).expect("fluorescein dianion should parse");
1577        // The molecule should parse without panic. Verify aromatic ring count:
1578        // fluorescein has 3 aromatic rings (2 benzene + xanthene core).
1579        let arc = count_aromatic_rings(&mol);
1580        assert!(
1581            arc >= 2,
1582            "fluorescein dianion: expected ≥2 aromatic rings, got {arc} \
1583             (RDKit #9271: charged aromatics may be misclassified)"
1584        );
1585    }
1586
1587    #[test]
1588    fn test_rhodamine_zwitterion_parses() {
1589        // Rhodamine-type zwitterion with N+ and bridging O (RDKit #9271).
1590        // Must parse cleanly and produce a valid aromatic ring count.
1591        let smi = "CCN(CC)c1ccc2c(-c3ccccc3C(=O)O)c3ccc(=[N+](CC)CC)cc-3oc2c1";
1592        let mol = chematic_smiles::parse(smi).expect("rhodamine zwitterion should parse");
1593        let arc = count_aromatic_rings(&mol);
1594        assert!(arc >= 3, "rhodamine: expected ≥3 aromatic rings, got {arc}");
1595    }
1596
1597    #[test]
1598    fn test_cyclopentadienyl_not_aromatic_kekulized() {
1599        // C1=CC=CC1 — cyclopentadiene (4 C with doubles + 1 sp3 CH2): not aromatic.
1600        let mut b = MoleculeBuilder::new();
1601        let c0 = b.add_atom(Atom::new(Element::C)); // sp3
1602        let c1 = b.add_atom(Atom::new(Element::C));
1603        let c2 = b.add_atom(Atom::new(Element::C));
1604        let c3 = b.add_atom(Atom::new(Element::C));
1605        let c4 = b.add_atom(Atom::new(Element::C));
1606        b.add_bond(c0, c1, BondOrder::Single).unwrap();
1607        b.add_bond(c1, c2, BondOrder::Double).unwrap();
1608        b.add_bond(c2, c3, BondOrder::Single).unwrap();
1609        b.add_bond(c3, c4, BondOrder::Double).unwrap();
1610        b.add_bond(c4, c0, BondOrder::Single).unwrap();
1611        let mol = b.build();
1612        let model = assign_aromaticity(&mol);
1613        assert_eq!(
1614            model.aromatic_atom_count(),
1615            0,
1616            "cyclopentadiene not aromatic"
1617        );
1618    }
1619
1620    // =========================================================================
1621    // RdkitLike mode: Se/Te chalcogen heteroaromatics
1622    // =========================================================================
1623
1624    #[test]
1625    fn test_selenophene_huckel_not_aromatic() {
1626        // c1cc[se]c1 — in strict Hückel mode, Se is unsupported → 0 aromatic atoms
1627        // (assign_aromaticity_ex re-derives from scratch, ignoring parser's aromatic flags)
1628        let mol = mol_aromatic("c1cc[se]c1");
1629        let m = assign_aromaticity(&mol); // default Hückel
1630        assert_eq!(
1631            m.aromatic_atom_count(),
1632            0,
1633            "selenophene: Se not aromatic in Hückel mode"
1634        );
1635    }
1636
1637    #[test]
1638    fn test_selenophene_rdkit_aromatic() {
1639        // c1cc[se]c1 — in RdkitLike mode, Se donates 2π → 6π total → aromatic
1640        let mol = mol_aromatic("c1cc[se]c1");
1641        let m = assign_aromaticity_ex(&mol, AromaticityAlgorithm::RdkitLike);
1642        assert_eq!(
1643            m.aromatic_atom_count(),
1644            5,
1645            "selenophene: all 5 atoms aromatic in RdkitLike"
1646        );
1647    }
1648
1649    #[test]
1650    fn test_tellurophene_rdkit_aromatic() {
1651        // c1cc[te]c1 — Te analogous to Se (2π donor)
1652        let mol = mol_aromatic("c1cc[te]c1");
1653        let m = assign_aromaticity_ex(&mol, AromaticityAlgorithm::RdkitLike);
1654        assert_eq!(
1655            m.aromatic_atom_count(),
1656            5,
1657            "tellurophene: all 5 atoms aromatic in RdkitLike"
1658        );
1659    }
1660
1661    #[test]
1662    fn test_benzoselenophene_rdkit() {
1663        // Fused benzene + selenophene
1664        let mol = mol_aromatic("c1ccc2[se]ccc2c1");
1665        let m = assign_aromaticity_ex(&mol, AromaticityAlgorithm::RdkitLike);
1666        assert_eq!(
1667            m.aromatic_atom_count(),
1668            9,
1669            "benzoselenophene: 9 atoms aromatic"
1670        );
1671    }
1672
1673    #[test]
1674    fn test_rdkit_mode_does_not_break_benzene() {
1675        // Benzene must give same result in both modes
1676        let mol = mol_aromatic("c1ccccc1");
1677        let m_h = assign_aromaticity(&mol);
1678        let m_r = assign_aromaticity_ex(&mol, AromaticityAlgorithm::RdkitLike);
1679        assert_eq!(m_h.aromatic_atom_count(), m_r.aromatic_atom_count());
1680    }
1681
1682    #[test]
1683    fn test_rdkit_mode_does_not_break_thiophene() {
1684        let mol = mol_aromatic("c1ccsc1");
1685        let m_h = assign_aromaticity(&mol);
1686        let m_r = assign_aromaticity_ex(&mol, AromaticityAlgorithm::RdkitLike);
1687        assert_eq!(
1688            m_h.aromatic_atom_count(),
1689            m_r.aromatic_atom_count(),
1690            "thiophene same in both modes"
1691        );
1692    }
1693}