Skip to main content

cosmolkit_core/chemistry/
kekulize.rs

1// RDKit marker convention defined in dev/source_reproduction_protocol.md.
2
3use std::collections::{BTreeMap, BTreeSet, VecDeque};
4
5use crate::{
6    AtomId, BondDirection, BondId, BondOrder, Molecule, RingFindingError, RingInfo,
7    ValenceAssignment, ValenceError, ValenceModel, canon_rank::FragmentRankScope,
8    canon_rank::rank_fragment_atoms_for_kekulize, molecule::TopologyBlock,
9    read_parts::MoleculeReadParts,
10};
11
12#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
13pub enum KekulizeError {
14    #[error("can't kekulize mol. unkekulized atoms: {atoms:?}")]
15    UnkekulizedAtoms { atoms: Vec<AtomId> },
16    #[error("non-ring atom {atom} marked aromatic")]
17    NonRingAromaticAtom { atom: AtomId },
18    #[error("incomplete RDKit kekulize port for {branch}: {reason}")]
19    ProtocolDebt {
20        branch: &'static str,
21        reason: &'static str,
22    },
23    #[error("kekulization changed valence on atom {atom}: {before}!={after}")]
24    ValenceChanged {
25        atom: AtomId,
26        before: i32,
27        after: i32,
28    },
29    #[error("kekulize fragment bitset size mismatch: atoms={atoms}, bonds={bonds}")]
30    FragmentBitsetSizeMismatch { atoms: usize, bonds: usize },
31    #[error("canonical rank {kind} symbol size mismatch: expected={expected}, actual={actual}")]
32    CanonicalRankSymbolSizeMismatch {
33        kind: &'static str,
34        expected: usize,
35        actual: usize,
36    },
37    #[error(transparent)]
38    RingFinding(#[from] RingFindingError),
39    #[error(transparent)]
40    Valence(#[from] ValenceError),
41}
42
43#[derive(Debug, Clone, PartialEq, Eq)]
44pub(crate) struct KekulizeAssignment {
45    bond_orders: Vec<Option<BondOrder>>,
46    bond_aromatic_flags: Vec<Option<bool>>,
47    bond_directions: Vec<Option<BondDirection>>,
48    atom_aromatic_flags: Vec<Option<bool>>,
49    atom_explicit_hydrogens: Vec<Option<u8>>,
50    atom_no_implicit: Vec<Option<bool>>,
51}
52
53struct KekulizeCandidateState {
54    all_atoms: Vec<AtomId>,
55    candidate_atoms: BTreeSet<AtomId>,
56    aromatic_edges: Vec<(BondId, AtomId, AtomId)>,
57    questions: Vec<AtomId>,
58    done: Vec<AtomId>,
59}
60
61#[derive(Debug, Clone, PartialEq, Eq)]
62struct KekulizeRing {
63    atoms: Vec<AtomId>,
64    bonds: Vec<BondId>,
65    source_ring: usize,
66}
67
68impl KekulizeAssignment {
69    fn empty(num_atoms: usize, num_bonds: usize) -> Self {
70        Self::with_sizes(num_atoms, num_bonds)
71    }
72
73    fn with_sizes(num_atoms: usize, num_bonds: usize) -> Self {
74        Self {
75            bond_orders: vec![None; num_bonds],
76            bond_aromatic_flags: vec![None; num_bonds],
77            bond_directions: vec![None; num_bonds],
78            atom_aromatic_flags: vec![None; num_atoms],
79            atom_explicit_hydrogens: vec![None; num_atoms],
80            atom_no_implicit: vec![None; num_atoms],
81        }
82    }
83
84    pub(crate) fn bond_order(&self, bond: BondId) -> Option<BondOrder> {
85        self.bond_orders[bond.index()]
86    }
87
88    pub(crate) fn bond_aromatic_flag(&self, bond: BondId) -> Option<bool> {
89        self.bond_aromatic_flags[bond.index()]
90    }
91
92    pub(crate) fn bond_direction(&self, bond: BondId) -> Option<BondDirection> {
93        self.bond_directions[bond.index()]
94    }
95
96    pub(crate) fn atom_aromatic_flag(&self, atom: AtomId) -> Option<bool> {
97        self.atom_aromatic_flags[atom.index()]
98    }
99
100    pub(crate) fn atom_explicit_hydrogens(&self, atom: AtomId) -> Option<u8> {
101        self.atom_explicit_hydrogens[atom.index()]
102    }
103
104    pub(crate) fn atom_no_implicit(&self, atom: AtomId) -> Option<bool> {
105        self.atom_no_implicit[atom.index()]
106    }
107
108    #[cfg(test)]
109    fn is_empty(&self) -> bool {
110        self.bond_orders.iter().all(Option::is_none)
111            && self.bond_aromatic_flags.iter().all(Option::is_none)
112            && self.bond_directions.iter().all(Option::is_none)
113            && self.atom_aromatic_flags.iter().all(Option::is_none)
114            && self.atom_explicit_hydrogens.iter().all(Option::is_none)
115            && self.atom_no_implicit.iter().all(Option::is_none)
116    }
117}
118
119pub(crate) fn apply_kekulize_assignment(
120    topology: &mut TopologyBlock,
121    assignment: &KekulizeAssignment,
122) -> bool {
123    let mut changed = false;
124    for bond in &mut topology.bonds {
125        if let Some(next) = assignment.bond_order(bond.id()) {
126            if bond.order() != next {
127                bond.set_order(next);
128                changed = true;
129            }
130        }
131        if let Some(next) = assignment.bond_aromatic_flag(bond.id()) {
132            if bond.is_aromatic() != next {
133                bond.set_aromatic(next);
134                changed = true;
135            }
136        }
137        if let Some(next) = assignment.bond_direction(bond.id()) {
138            if bond.direction() != next {
139                bond.set_direction(next);
140                changed = true;
141            }
142        }
143    }
144    for atom in &mut topology.atoms {
145        if let Some(next) = assignment.atom_aromatic_flag(atom.id()) {
146            if atom.is_aromatic() != next {
147                atom.set_aromatic(next);
148                changed = true;
149            }
150        }
151        if let Some(next) = assignment.atom_explicit_hydrogens(atom.id()) {
152            if atom.explicit_hydrogens() != next {
153                atom.set_explicit_hydrogens(next);
154                changed = true;
155            }
156        }
157        if let Some(next) = assignment.atom_no_implicit(atom.id()) {
158            if atom.no_implicit() != next {
159                atom.set_no_implicit(next);
160                changed = true;
161            }
162        }
163    }
164    changed
165}
166
167fn topology_is_aromatic_atom(topology: &TopologyBlock, atom: AtomId) -> bool {
168    let atom_data = &topology.atoms[atom.index()];
169    if atom_data.is_aromatic() {
170        return true;
171    }
172    topology.bonds.iter().any(|bond| {
173        (bond.begin() == atom || bond.end() == atom)
174            && (bond.is_aromatic() || bond.order() == BondOrder::Aromatic)
175    })
176}
177
178fn kekulize_is_aromatic_bond(bond: &crate::Bond) -> bool {
179    bond.is_aromatic() || bond.order() == BondOrder::Aromatic
180}
181
182pub(crate) fn kekulize_assignment(
183    molecule: &Molecule,
184    rings: Option<&RingInfo>,
185    clear_aromatic_flags: bool,
186    canonical: bool,
187    max_backtracks: usize,
188) -> Result<KekulizeAssignment, KekulizeError> {
189    kekulize_assignment_from_parts_with_valence(
190        molecule.topology_block(),
191        molecule.stereo_groups(),
192        rings,
193        molecule.derived_cache().valence.as_ref(),
194        clear_aromatic_flags,
195        canonical,
196        max_backtracks,
197    )
198}
199
200pub(crate) fn kekulize_assignment_from_parts(
201    topology: &TopologyBlock,
202    stereo_groups: &[crate::stereo::StereoGroup],
203    rings: Option<&RingInfo>,
204    clear_aromatic_flags: bool,
205    canonical: bool,
206    max_backtracks: usize,
207) -> Result<KekulizeAssignment, KekulizeError> {
208    kekulize_assignment_from_parts_with_valence(
209        topology,
210        stereo_groups,
211        rings,
212        None,
213        clear_aromatic_flags,
214        canonical,
215        max_backtracks,
216    )
217}
218
219fn kekulize_assignment_from_parts_with_valence(
220    topology: &TopologyBlock,
221    stereo_groups: &[crate::stereo::StereoGroup],
222    rings: Option<&RingInfo>,
223    cached_valence: Option<&ValenceAssignment>,
224    clear_aromatic_flags: bool,
225    canonical: bool,
226    max_backtracks: usize,
227) -> Result<KekulizeAssignment, KekulizeError> {
228    // BEGIN RDKIT CPP FUNCTION MolOps::Kekulize
229    // RDKit✔️✔️: void Kekulize(RWMol &mol, bool markAtomsBonds, bool canonical,
230    // RDKit✔️✔️:               unsigned int maxBackTracks) {
231    // RDKit✔️✔️:   boost::dynamic_bitset<> atomsToUse(mol.getNumAtoms());
232    // RDKit✔️✔️:   atomsToUse.set();
233    // RDKit✔️✔️:   boost::dynamic_bitset<> bondsToUse(mol.getNumBonds());
234    // RDKit✔️✔️:   bondsToUse.set();
235    // RDKit✔️✔️:   details::KekulizeFragment(mol, atomsToUse, bondsToUse, markAtomsBonds,
236    // RDKit✔️✔️:                             canonical, maxBackTracks);
237    // RDKit✔️✔️: }
238    // END RDKIT CPP FUNCTION MolOps::Kekulize
239    let owned_rings;
240    let rings = if let Some(rings) = rings {
241        rings
242    } else {
243        // RDKit✔️✔️:     VECT_INT_VECT allringsSSSR;
244        // RDKit✔️✔️:     if (!mol.getRingInfo()->isInitialized()) {
245        // RDKit✔️✔️:       MolOps::findSSSR(mol, allringsSSSR);
246        // RDKit✔️✔️:     }
247        // RDKit✔️✔️:     const VECT_INT_VECT &allrings =
248        // RDKit✔️✔️:         allringsSSSR.empty() ? mol.getRingInfo()->atomRings() : allringsSSSR;
249        //
250        // The value-style port has no mutable RingInfo on-molecule, so when the
251        // caller does not provide rings we must materialize the same SSSR-level
252        // snapshot RDKit would compute here, not SymmSSSR.
253        owned_rings = crate::rings::find_sssr_from_parts(
254            topology.atoms.len(),
255            &topology.bonds,
256            &topology.adjacency,
257        )?;
258        &owned_rings
259    };
260    let atoms_to_use = vec![true; topology.atoms.len()];
261    let bonds_to_use = vec![true; topology.bonds.len()];
262    kekulize_fragment_assignment(
263        topology,
264        stereo_groups,
265        rings,
266        cached_valence,
267        &atoms_to_use,
268        bonds_to_use,
269        clear_aromatic_flags,
270        canonical,
271        max_backtracks,
272    )
273}
274
275pub(crate) fn kekulize_assignment_from_read_parts(
276    read: MoleculeReadParts<'_>,
277    rings: Option<&RingInfo>,
278    clear_aromatic_flags: bool,
279    canonical: bool,
280    max_backtracks: usize,
281) -> Result<KekulizeAssignment, KekulizeError> {
282    kekulize_assignment_from_parts(
283        read.topology(),
284        read.stereo_groups(),
285        rings,
286        clear_aromatic_flags,
287        canonical,
288        max_backtracks,
289    )
290}
291
292pub(crate) fn kekulize_fragment_assignment(
293    topology: &TopologyBlock,
294    stereo_groups: &[crate::stereo::StereoGroup],
295    rings: &RingInfo,
296    cached_valence: Option<&ValenceAssignment>,
297    atoms_to_use: &[bool],
298    mut bonds_to_use: Vec<bool>,
299    clear_aromatic_flags: bool,
300    canonical: bool,
301    max_backtracks: usize,
302) -> Result<KekulizeAssignment, KekulizeError> {
303    let trace_kekulize = std::env::var_os("COSMOLKIT_TRACE_KEKULIZE").is_some();
304    // BEGIN RDKIT CPP FUNCTION details::KekulizeFragment
305    // RDKit✔️✔️: void KekulizeFragment(RWMol &mol, const boost::dynamic_bitset<> &atomsToUse,
306    // RDKit✔️✔️:                       boost::dynamic_bitset<> bondsToUse, bool markAtomsBonds,
307    // RDKit✔️✔️:                       bool canonical, unsigned int maxBackTracks) {
308    // RDKit✔️✔️:   PRECONDITION(atomsToUse.size() == mol.getNumAtoms(),
309    // RDKit✔️✔️:                "atomsToUse is wrong size");
310    // RDKit✔️✔️:   PRECONDITION(bondsToUse.size() == mol.getNumBonds(),
311    // RDKit✔️✔️:                "bondsToUse is wrong size");
312    // RDKit✔️✔️:   if (atomsToUse.none()) {
313    // RDKit✔️✔️:     return;
314    // RDKit✔️✔️:   }
315    if atoms_to_use.len() != topology.atoms.len() || bonds_to_use.len() != topology.bonds.len() {
316        return Err(KekulizeError::FragmentBitsetSizeMismatch {
317            atoms: atoms_to_use.len(),
318            bonds: bonds_to_use.len(),
319        });
320    }
321    let mut assignment = KekulizeAssignment::with_sizes(topology.atoms.len(), topology.bonds.len());
322    if !atoms_to_use.iter().any(|selected| *selected) {
323        return Ok(assignment);
324    }
325
326    // RDKit✔️✔️:   bool foundAromatic = false;
327    // RDKit✔️✔️:   for (const auto bond : mol.bonds()) {
328    // RDKit✔️✔️:     if (bondsToUse[bond->getIdx()]) {
329    // RDKit✔️✔️:       if (QueryOps::hasBondTypeQuery(*bond)) {
330    // RDKit✔️✔️:         bondsToUse[bond->getIdx()] = 0;
331    // RDKit✔️✔️:       } else if (bond->getIsAromatic()) {
332    // RDKit✔️✔️:         foundAromatic = true;
333    // RDKit✔️✔️:       }
334    // RDKit✔️✔️:     }
335    // RDKit✔️✔️:   }
336    let mut found_aromatic = false;
337    for bond in &topology.bonds {
338        if bonds_to_use[bond.id().index()] {
339            if bond
340                .query()
341                .is_some_and(crate::valence::has_bond_type_query)
342            {
343                bonds_to_use[bond.id().index()] = false;
344            } else if kekulize_is_aromatic_bond(bond) {
345                found_aromatic = true;
346            }
347        }
348    }
349
350    // RDKit✔️✔️:   for (auto atom : mol.atoms()) {
351    // RDKit✔️✔️:     atom->calcImplicitValence(false);
352    // RDKit✔️✔️:     valences[atom->getIdx()] = atom->getTotalValence();
353    // RDKit✔️✔️:     if (isAromaticAtom(*atom)) {
354    // RDKit✔️✔️:       foundAromatic = true;
355    // RDKit✔️✔️:     }
356    // RDKit✔️✔️:   }
357    let pre_kek_valence = if let Some(valence) = cached_valence {
358        valence.clone()
359    } else {
360        crate::valence::assign_valence_with_options_from_parts(
361            &topology.atoms,
362            &topology.bonds,
363            &topology.adjacency,
364            ValenceModel::RdkitLike,
365            false,
366        )?
367    };
368    if topology
369        .atoms
370        .iter()
371        .any(|atom| topology_is_aromatic_atom(topology, atom.id()))
372    {
373        found_aromatic = true;
374    }
375    if !found_aromatic {
376        return Ok(assignment);
377    }
378
379    // RDKit✔️✔️:   UINT_VECT atomRanks(mol.getNumAtoms());
380    // RDKit✔️✔️:   if (canonical) {
381    // RDKit✔️✔️:     Canon::rankFragmentAtoms(mol, atomRanks, atomsToUse, bondsToUse);
382    // RDKit✔️✔️:   } else {
383    // RDKit✔️✔️:     std::iota(atomRanks.begin(), atomRanks.end(), 0u);
384    // RDKit✔️✔️:   }
385    let atom_ranks = if canonical {
386        rank_fragment_atoms_for_kekulize(
387            topology,
388            stereo_groups,
389            FragmentRankScope::new(atoms_to_use, &bonds_to_use),
390        )?
391    } else {
392        (0..topology.atoms.len()).collect::<Vec<_>>()
393    };
394    if trace_kekulize {
395        eprintln!("kekulize-fragment atomRanks={atom_ranks:?}");
396    }
397
398    // RDKit✔️✔️:   if (bondsToUse.any()) {
399    // RDKit✔️✔️:     if (!mol.getRingInfo()->isInitialized()) {
400    // RDKit✔️✔️:       MolOps::findSSSR(mol, allringsSSSR);
401    // RDKit✔️✔️:     }
402    // RDKit✔️✔️:   if (bondsToUse.any()) {
403    // RDKit✔️✔️:     boost::dynamic_bitset<> wedgedAtoms(numAtoms);
404    // RDKit✔️✔️:     for (const auto bond : mol.bonds()) {
405    // RDKit✔️✔️:       if (bondsToUse[bond->getIdx()] &&
406    // RDKit✔️✔️:           (bond->getBondDir() == Bond::BEGINWEDGE ||
407    // RDKit✔️✔️:            bond->getBondDir() == Bond::BEGINDASH)) {
408    // RDKit✔️✔️:         wedgedAtoms.set(bond->getBeginAtomIdx());
409    // RDKit✔️✔️:       }
410    // RDKit✔️✔️:     }
411    let mut wedged_atoms = BTreeSet::new();
412    for bond in &topology.bonds {
413        if bonds_to_use[bond.id().index()]
414            && matches!(
415                bond.direction(),
416                BondDirection::BeginWedge | BondDirection::BeginDash
417            )
418        {
419            wedged_atoms.insert(bond.begin());
420        }
421    }
422
423    // RDKit✔️✔️:     const VECT_INT_VECT &allrings =
424    // RDKit✔️✔️:         allringsSSSR.empty() ? mol.getRingInfo()->atomRings() : allringsSSSR;
425    // RDKit✔️✔️:     std::deque<INT_VECT> tmpRings;
426    // RDKit✔️✔️:     auto containsNonDummy = [&atomsToUse, &dummyAts](const INT_VECT &ring) {
427    // RDKit✔️✔️:       bool ringOk = false;
428    // RDKit✔️✔️:       for (auto ai : ring) {
429    // RDKit✔️✔️:         if (!atomsToUse[ai]) {
430    // RDKit✔️✔️:           return false;
431    // RDKit✔️✔️:         }
432    // RDKit✔️✔️:         if (!dummyAts[ai]) {
433    // RDKit✔️✔️:           ringOk = true;
434    // RDKit✔️✔️:         }
435    // RDKit✔️✔️:       }
436    // RDKit✔️✔️:       return ringOk;
437    // RDKit✔️✔️:     };
438    // RDKit✔️✔️:     for (const auto &ring : allrings) {
439    // RDKit✔️✔️:       if (containsNonDummy(ring)) {
440    // RDKit✔️✔️:         unsigned int startPos = 0;
441    // RDKit✔️✔️:         bool hasWedge = false;
442    // RDKit✔️✔️:         for (auto ri = 0u; ri < ring.size(); ++ri) {
443    // RDKit✔️✔️:           if (wedgedAtoms[ring[ri]]) {
444    // RDKit✔️✔️:             startPos = ri;
445    // RDKit✔️✔️:             hasWedge = true;
446    // RDKit✔️✔️:             break;
447    // RDKit✔️✔️:           }
448    // RDKit✔️✔️:         }
449    // RDKit✔️✔️:         INT_VECT nring(ring.size());
450    // RDKit✔️✔️:         for (auto ri = 0u; ri < ring.size(); ++ri) {
451    // RDKit✔️✔️:           nring[ri] = ring.at((ri + startPos) % ring.size());
452    // RDKit✔️✔️:         }
453    // RDKit✔️✔️:         if (!hasWedge) {
454    // RDKit✔️✔️:           tmpRings.push_back(nring);
455    // RDKit✔️✔️:         } else {
456    // RDKit✔️✔️:           tmpRings.push_front(nring);
457    // RDKit✔️✔️:         }
458    // RDKit✔️✔️:       }
459    // RDKit✔️✔️:     }
460    // RDKit✔️✔️:     VECT_INT_VECT arings;
461    // RDKit✔️✔️:     arings.reserve(allrings.size());
462    // RDKit✔️✔️:     arings.insert(arings.end(), tmpRings.begin(), tmpRings.end());
463    // RDKit✔️✔️:     VECT_INT_VECT allbrings;
464    // RDKit✔️✔️:     RingUtils::convertToBonds(arings, allbrings, mol);
465    // RDKit✔️✔️:     VECT_INT_VECT brings;
466    // RDKit✔️✔️:     brings.reserve(allbrings.size());
467    // RDKit✔️✔️:     auto copyBondRingsWithinFragment = [&bondsToUse](const INT_VECT &ring) {
468    // RDKit✔️✔️:       return std::all_of(ring.begin(), ring.end(), [&bondsToUse](const int bi) {
469    // RDKit✔️✔️:         return bondsToUse[bi];
470    // RDKit✔️✔️:       });
471    // RDKit✔️✔️:     };
472    // RDKit✔️✔️:     VECT_INT_VECT aringsRemaining;
473    // RDKit✔️✔️:     aringsRemaining.reserve(arings.size());
474    // RDKit✔️✔️:     for (unsigned i = 0; i < allbrings.size(); ++i) {
475    // RDKit✔️✔️:       if (copyBondRingsWithinFragment(allbrings[i])) {
476    // RDKit✔️✔️:         brings.push_back(allbrings[i]);
477    // RDKit✔️✔️:         aringsRemaining.push_back(arings[i]);
478    // RDKit✔️✔️:       }
479    // RDKit✔️✔️:     }
480    // RDKit✔️✔️:     arings = std::move(aringsRemaining);
481    let kekulize_rings =
482        ordered_kekulize_rings(topology, rings, atoms_to_use, &bonds_to_use, &wedged_atoms);
483    let ring_systems = fused_ring_systems(&kekulize_rings);
484    if trace_kekulize {
485        eprintln!(
486            "kekulize-rings arings={:?}",
487            kekulize_rings
488                .iter()
489                .map(|ring| ring
490                    .atoms
491                    .iter()
492                    .map(|atom| atom.index())
493                    .collect::<Vec<_>>())
494                .collect::<Vec<_>>()
495        );
496        eprintln!(
497            "kekulize-rings brings={:?}",
498            kekulize_rings
499                .iter()
500                .map(|ring| ring
501                    .bonds
502                    .iter()
503                    .map(|bond| bond.index())
504                    .collect::<Vec<_>>())
505                .collect::<Vec<_>>()
506        );
507        eprintln!("kekulize-rings fused={ring_systems:?}");
508    }
509    for system in ring_systems {
510        let ring_bond_set = system
511            .iter()
512            .flat_map(|&ring_idx| kekulize_rings[ring_idx].bonds.iter().copied())
513            .collect::<BTreeSet<_>>();
514        let ring_atom_set = system
515            .iter()
516            .flat_map(|&ring_idx| kekulize_rings[ring_idx].atoms.iter().copied())
517            .collect::<BTreeSet<_>>();
518
519        if ring_bond_set.iter().any(|bond| {
520            let bond = &topology.bonds[bond.index()];
521            kekulize_is_aromatic_bond(bond)
522        }) || ring_atom_set
523            .iter()
524            .any(|atom| topology_is_aromatic_atom(topology, *atom))
525        {
526            kekulize_fused_assignment(
527                topology,
528                rings,
529                &kekulize_rings,
530                &system,
531                &pre_kek_valence,
532                &atom_ranks,
533                max_backtracks,
534                &mut assignment,
535            )?;
536        }
537    }
538
539    // RDKit✔️✔️:   if (markAtomsBonds) {
540    // RDKit✔️✔️:     for (auto bond : mol.bonds()) {
541    // RDKit✔️✔️:       if (bondsToUse[bond->getIdx()]) {
542    // RDKit✔️✔️:         bond->setIsAromatic(false);
543    // RDKit✔️✔️:       }
544    // RDKit✔️✔️:     }
545    // RDKit✔️✔️:     for (auto atom : mol.atoms()) {
546    // RDKit✔️✔️:       if (atomsToUse[atom->getIdx()] && atom->getIsAromatic()) {
547    if clear_aromatic_flags {
548        let full_molecule_scope = atoms_to_use.iter().all(|selected| *selected)
549            && bonds_to_use.iter().all(|selected| *selected);
550        for bond in &topology.bonds {
551            if bonds_to_use[bond.id().index()] && bond.is_aromatic() {
552                assignment.bond_aromatic_flags[bond.id().index()] = Some(false);
553            }
554        }
555        for atom in &topology.atoms {
556            if atoms_to_use[atom.id().index()] && atom.is_aromatic() {
557                if full_molecule_scope && rings.num_atom_rings(atom.id()) == 0 {
558                    return Err(KekulizeError::NonRingAromaticAtom { atom: atom.id() });
559                }
560                assignment.atom_aromatic_flags[atom.id().index()] = Some(false);
561                // RDKit✔️✔️:         if ((atom->getAtomicNum() == 7 || atom->getAtomicNum() == 15) &&
562                // RDKit✔️✔️:             atom->getFormalCharge() == 0 && atom->getNumExplicitHs() == 1) {
563                // RDKit✔️✔️:           atom->setNoImplicit(false);
564                // RDKit✔️✔️:           atom->setNumExplicitHs(0);
565                if matches!(atom.atomic_number(), 7 | 15)
566                    && atom.formal_charge() == 0
567                    && atom.explicit_hydrogens() == 1
568                {
569                    assignment.atom_no_implicit[atom.id().index()] = Some(false);
570                    assignment.atom_explicit_hydrogens[atom.id().index()] = Some(0);
571                }
572            }
573        }
574    }
575    let mut topology_after = topology.clone();
576    apply_kekulize_assignment(&mut topology_after, &assignment);
577    let mut post_kek_valence = pre_kek_valence.clone();
578    for atom in &topology_after.atoms {
579        let idx = atom.id().index();
580        if assignment.atom_explicit_hydrogens[idx].is_some()
581            || assignment.atom_no_implicit[idx].is_some()
582        {
583            let (explicit_valence, implicit_hydrogens) =
584                crate::valence::assign_valence_state_for_atom_from_parts(
585                    &topology_after.atoms,
586                    &topology_after.bonds,
587                    &topology_after.adjacency,
588                    atom.id(),
589                    false,
590                )?;
591            post_kek_valence.explicit_valence[idx] = explicit_valence;
592            post_kek_valence.implicit_hydrogens[idx] = implicit_hydrogens;
593        }
594    }
595    for atom in &topology.atoms {
596        let idx = atom.id().index();
597        let before =
598            pre_kek_valence.explicit_valence[idx] + pre_kek_valence.implicit_hydrogens[idx];
599        let after =
600            post_kek_valence.explicit_valence[idx] + post_kek_valence.implicit_hydrogens[idx];
601        if before != after {
602            return Err(KekulizeError::ValenceChanged {
603                atom: atom.id(),
604                before,
605                after,
606            });
607        }
608    }
609    // RDKit✔️✔️: }
610    // END RDKIT CPP FUNCTION details::KekulizeFragment
611    Ok(assignment)
612}
613
614fn kekulize_fused_assignment(
615    topology: &TopologyBlock,
616    rings: &RingInfo,
617    kekulize_rings: &[KekulizeRing],
618    fused_ring_indices: &[usize],
619    valence: &ValenceAssignment,
620    atom_ranks: &[usize],
621    max_backtracks: usize,
622    assignment: &mut KekulizeAssignment,
623) -> Result<(), KekulizeError> {
624    // BEGIN RDKIT CPP FUNCTION kekulizeFused
625    // RDKit✔️✔️: void kekulizeFused(RWMol &mol, const VECT_INT_VECT &arings,
626    // RDKit✔️✔️:                    const UINT_VECT &atomRanks, unsigned int maxBackTracks) {
627    // RDKit✔️✔️:   INT_VECT allAtms;
628    // RDKit✔️✔️:   Union(arings, allAtms);
629    // RDKit✔️✔️:   INT_VECT done;
630    // RDKit✔️✔️:   INT_VECT questions;
631    // RDKit✔️✔️:   boost::dynamic_bitset<> dBndCands(nats);
632    // RDKit✔️✔️:   boost::dynamic_bitset<> dBndAdds(nbnds);
633    // RDKit✔️✔️:   markDbondCands(mol, allAtms, dBndCands, questions, done);
634    let state = mark_double_bond_candidates(
635        topology,
636        rings,
637        kekulize_rings,
638        fused_ring_indices,
639        valence,
640        assignment,
641    )?;
642
643    // RDKit✔️✔️:   auto kekulized =
644    // RDKit✔️✔️:       kekulizeWorker(mol, allAtms, dBndCands, dBndAdds, done, atomRanks,
645    // RDKit✔️✔️:                      maxBackTracks);
646    let matched_bonds = if let Some(matched_bonds) = kekulize_worker_matching(
647        topology,
648        &state,
649        &state.done,
650        atom_ranks,
651        max_backtracks,
652        &BTreeSet::new(),
653    ) {
654        matched_bonds
655    } else if let Some(matched_bonds) = permute_dummies_and_match(
656        topology,
657        &state,
658        &state.questions,
659        atom_ranks,
660        max_backtracks,
661    ) {
662        matched_bonds
663    } else {
664        return Err(KekulizeError::UnkekulizedAtoms {
665            atoms: state.candidate_atoms.iter().copied().collect(),
666        });
667    };
668
669    for (bond, _, _) in state.aromatic_edges {
670        assignment.bond_orders[bond.index()] = Some(if matched_bonds.contains(&bond) {
671            BondOrder::Double
672        } else {
673            BondOrder::Single
674        });
675        if matched_bonds.contains(&bond)
676            && topology.bonds[bond.index()].direction() != BondDirection::None
677        {
678            assignment.bond_directions[bond.index()] = Some(BondDirection::None);
679        }
680    }
681    // RDKit✔️✔️:   if (!kekulized) {
682    // RDKit✔️✔️:     throw KekulizeException(msg, problemAtoms);
683    // RDKit✔️✔️:   }
684    // RDKit✔️✔️: }
685    // END RDKIT CPP FUNCTION kekulizeFused
686    Ok(())
687}
688
689fn mark_double_bond_candidates(
690    topology: &TopologyBlock,
691    rings: &RingInfo,
692    kekulize_rings: &[KekulizeRing],
693    fused_ring_indices: &[usize],
694    valence: &ValenceAssignment,
695    assignment: &mut KekulizeAssignment,
696) -> Result<KekulizeCandidateState, KekulizeError> {
697    // BEGIN RDKIT CPP FUNCTION markDbondCands
698    // RDKit✔️✔️: void markDbondCands(RWMol &mol, const INT_VECT &allAtms,
699    // RDKit✔️✔️:                     boost::dynamic_bitset<> &dBndCands, INT_VECT &questions,
700    // RDKit✔️✔️:                     INT_VECT &done) {
701    // RDKit✔️✔️:   bool hasAromaticOrDummyAtom =
702    // RDKit✔️✔️:       std::any_of(allAtms.begin(), allAtms.end(), [&mol](int allAtm) {
703    // RDKit✔️✔️:         return (!mol.getAtomWithIdx(allAtm)->getAtomicNum() ||
704    // RDKit✔️✔️:                 isAromaticAtom(*mol.getAtomWithIdx(allAtm)));
705    // RDKit✔️✔️:       });
706    let mut all_atoms = Vec::new();
707    let mut seen_atoms = BTreeSet::new();
708    for &ring_idx in fused_ring_indices {
709        for &atom_id in &kekulize_rings[ring_idx].atoms {
710            if seen_atoms.insert(atom_id) {
711                all_atoms.push(atom_id);
712            }
713        }
714    }
715    let all_atom_set = all_atoms.iter().copied().collect::<BTreeSet<_>>();
716    if !all_atoms.iter().any(|atom| {
717        topology.atoms[atom.index()].atomic_number() == 0
718            || topology_is_aromatic_atom(topology, *atom)
719    }) {
720        return Ok(KekulizeCandidateState {
721            all_atoms,
722            candidate_atoms: BTreeSet::new(),
723            aromatic_edges: Vec::new(),
724            questions: Vec::new(),
725            done: Vec::new(),
726        });
727    }
728    let fused_source_ring_set = fused_ring_indices
729        .iter()
730        .map(|&ring_idx| kekulize_rings[ring_idx].source_ring)
731        .collect::<BTreeSet<_>>();
732    let mut is_ring_not_cand = BTreeSet::new();
733    for &ring_idx in fused_ring_indices {
734        let mut ring_is_candidate = false;
735        let source_ring = kekulize_rings[ring_idx].source_ring;
736        for atom in &rings.atom_rings()[source_ring] {
737            if topology_is_aromatic_atom(topology, *atom) && rings.num_atom_rings(*atom) == 1 {
738                ring_is_candidate = true;
739                break;
740            }
741        }
742        if !ring_is_candidate {
743            is_ring_not_cand.insert(source_ring);
744        }
745    }
746    let mut candidate_atoms = BTreeSet::<AtomId>::new();
747    let mut aromatic_edges = Vec::<(BondId, AtomId, AtomId)>::new();
748    let mut seen_make_single = BTreeSet::<BondId>::new();
749    let mut questions = Vec::<AtomId>::new();
750    let mut done = Vec::<AtomId>::new();
751    // RDKit✔️✔️:   std::vector<Bond *> makeSingle;
752    for &atom_id in &all_atoms {
753        let atom = &topology.atoms[atom_id.index()];
754        if atom.atomic_number() != 0 && !topology_is_aromatic_atom(topology, atom_id) {
755            done.push(atom_id);
756            continue;
757        }
758        let mut sbo = 0i32;
759        let mut n_to_ignore = 0usize;
760        let mut non_ar_non_dummy_nbr = 0usize;
761        for bond in topology
762            .bonds
763            .iter()
764            .filter(|bond| bond.begin() == atom_id || bond.end() == atom_id)
765        {
766            let other = if bond.begin() == atom_id {
767                bond.end()
768            } else {
769                bond.begin()
770            };
771            let other_atom = &topology.atoms[other.index()];
772            if all_atom_set.contains(&other)
773                && other_atom.atomic_number() != 0
774                && !topology_is_aromatic_atom(topology, other)
775            {
776                non_ar_non_dummy_nbr += 1;
777            }
778            // RDKit✔️✔️:       if (bond->getIsAromatic() && (bond->getBondType() == Bond::SINGLE ||
779            // RDKit✔️✔️:                                     bond->getBondType() == Bond::DOUBLE ||
780            // RDKit✔️✔️:                                     bond->getBondType() == Bond::AROMATIC)) {
781            // RDKit✔️✔️:         ++sbo;
782            // RDKit✔️✔️:         // mark this bond to be marked single later
783            // RDKit✔️✔️:         // we don't want to do right now because it can screw-up the
784            // RDKit✔️✔️:         // valence calculation to determine the number of hydrogens below
785            // RDKit✔️✔️:         makeSingle.push_back(bond);
786            // RDKit✔️✔️:       } else {
787            if kekulize_is_aromatic_bond(bond)
788                && matches!(
789                    bond.order(),
790                    BondOrder::Single | BondOrder::Double | BondOrder::Aromatic
791                )
792            {
793                sbo += 1;
794                if seen_make_single.insert(bond.id()) {
795                    assignment.bond_orders[bond.id().index()] = Some(BondOrder::Single);
796                    aromatic_edges.push((bond.id(), bond.begin(), bond.end()));
797                }
798            } else {
799                let bond_contrib =
800                    crate::valence::bond_valence_contrib(bond, atom_id)?.round() as i32;
801                sbo += bond_contrib;
802                if bond_contrib == 0 {
803                    n_to_ignore += 1;
804                }
805            }
806        }
807        let num_atom_rings = rings.num_atom_rings(atom_id);
808        let num_non_cand_rings = rings
809            .atom_members(atom_id)
810            .iter()
811            .filter(|ring_idx| {
812                fused_source_ring_set.contains(ring_idx) && is_ring_not_cand.contains(ring_idx)
813            })
814            .count();
815        if atom.atomic_number() == 0
816            && non_ar_non_dummy_nbr < num_atom_rings
817            && num_non_cand_rings < num_atom_rings
818        {
819            candidate_atoms.insert(atom_id);
820            questions.push(atom_id);
821            continue;
822        }
823        if atom.atomic_number() == 0 {
824            continue;
825        }
826        sbo += i32::from(atom.explicit_hydrogens()) + valence.implicit_hydrogens[atom_id.index()];
827        let mut dv = rdkit_kekulize_default_valence(atom.atomic_number())?;
828        let mut chrg = atom.formal_charge();
829        if rdkit_is_early_atom(atom.atomic_number()) {
830            chrg = -chrg;
831        }
832        if atom.atomic_number() == 6 && chrg > 0 {
833            chrg = -chrg;
834        }
835        dv += i32::from(chrg);
836        let tbo =
837            valence.explicit_valence[atom_id.index()] + valence.implicit_hydrogens[atom_id.index()];
838        let n_radicals = i32::from(atom.radical_electrons());
839        let total_degree = i32::try_from(
840            topology
841                .bonds
842                .iter()
843                .filter(|bond| bond.begin() == atom_id || bond.end() == atom_id)
844                .count(),
845        )
846        .unwrap_or(i32::MAX)
847            + valence.implicit_hydrogens[atom_id.index()]
848            - i32::try_from(n_to_ignore).unwrap_or(i32::MAX);
849        let valence_list = crate::rdkit_valence_list(atom.atomic_number())?.ok_or(
850            ValenceError::UnsupportedBranch {
851                reason: "kekulize valence list atomic number out of range",
852            },
853        )?;
854        let mut vi = 1usize;
855        while tbo > dv && vi < valence_list.len() && valence_list[vi] > 0 {
856            dv = valence_list[vi] + i32::from(chrg);
857            vi += 1;
858        }
859        if tbo == 5
860            && sbo == 4
861            && dv == 3
862            && total_degree == 3
863            && n_radicals == 0
864            && chrg == 0
865            && atom.explicit_hydrogens() == 0
866            && valence.implicit_hydrogens[atom_id.index()] == 0
867            && matches!(atom.atomic_number(), 7 | 15 | 33)
868        {
869            dv = 5;
870        }
871        if total_degree + n_radicals >= dv {
872            continue;
873        }
874        if dv == (sbo + 1 + n_radicals)
875            || (n_radicals == 0 && atom.no_implicit() && dv == (sbo + 2))
876        {
877            candidate_atoms.insert(atom_id);
878        }
879    }
880    // RDKit✔️✔️:   for (auto &bi : makeSingle) {
881    // RDKit✔️✔️:     bi->setBondType(Bond::SINGLE);
882    // RDKit✔️✔️:   }
883    // RDKit✔️✔️: }
884    // END RDKIT CPP FUNCTION markDbondCands
885    Ok(KekulizeCandidateState {
886        all_atoms,
887        candidate_atoms,
888        aromatic_edges,
889        questions,
890        done,
891    })
892}
893
894fn kekulize_worker_matching(
895    topology: &TopologyBlock,
896    state: &KekulizeCandidateState,
897    initial_done: &[AtomId],
898    atom_ranks: &[usize],
899    max_backtracks: usize,
900    switched_off: &BTreeSet<AtomId>,
901) -> Option<BTreeSet<BondId>> {
902    let trace_kekulize = std::env::var_os("COSMOLKIT_TRACE_KEKULIZE").is_some();
903    // BEGIN RDKIT CPP FUNCTION kekulizeWorker
904    // RDKit✔️✔️: bool kekulizeWorker(RWMol &mol, const INT_VECT &allAtms,
905    // RDKit✔️✔️:                     boost::dynamic_bitset<> dBndCands,
906    // RDKit✔️✔️:                     boost::dynamic_bitset<> dBndAdds, INT_VECT done,
907    // RDKit✔️✔️:                     const UINT_VECT &atomRanks, unsigned int maxBackTracks) {
908    // RDKit✔️✔️:   INT_DEQUE astack;
909    // RDKit✔️✔️:   INT_INT_DEQ_MAP options;
910    // RDKit✔️✔️:   int lastOpt = -1;
911    // RDKit✔️✔️:   boost::dynamic_bitset<> localBondsAdded(mol.getNumBonds());
912    // RDKit✔️✔️:   boost::dynamic_bitset<> inAllAtms(mol.getNumAtoms());
913    // RDKit✔️✔️:   for (int allAtm : allAtms) {
914    // RDKit✔️✔️:     inAllAtms.set(allAtm);
915    // RDKit✔️✔️:   }
916    // RDKit✔️✔️:   auto lessByRank = [&atomRanks](int a, int b) {
917    // RDKit✔️✔️:     const auto ra = atomRanks.at(static_cast<unsigned int>(a));
918    // RDKit✔️✔️:     const auto rb = atomRanks.at(static_cast<unsigned int>(b));
919    // RDKit✔️✔️:     return (ra < rb) || (ra == rb && a < b);
920    // RDKit✔️✔️:   };
921    // RDKit✔️✔️:   boost::dynamic_bitset<> wedgeEndAtoms(mol.getNumAtoms());
922    // RDKit✔️✔️:   for (const auto bond : mol.bonds()) {
923    // RDKit✔️✔️:     if (bond->getBondDir() == Bond::BondDir::BEGINWEDGE ||
924    // RDKit✔️✔️:         bond->getBondDir() == Bond::BondDir::BEGINDASH) {
925    // RDKit✔️✔️:       const auto endIdx = bond->getEndAtomIdx();
926    // RDKit✔️✔️:       if (inAllAtms.test(endIdx)) {
927    // RDKit✔️✔️:         wedgeEndAtoms.set(endIdx);
928    // RDKit✔️✔️:       }
929    // RDKit✔️✔️:     }
930    // RDKit✔️✔️:   }
931    // RDKit✔️✔️:   INT_VECT sortedAtms(allAtms);
932    // RDKit✔️✔️:   std::sort(sortedAtms.begin(), sortedAtms.end(),
933    // RDKit✔️✔️:             [&wedgeEndAtoms, &lessByRank](int a, int b) {
934    // RDKit✔️✔️:               const bool wa = wedgeEndAtoms.test(a);
935    // RDKit✔️✔️:               const bool wb = wedgeEndAtoms.test(b);
936    // RDKit✔️✔️:               if (wa != wb) {
937    // RDKit✔️✔️:                 return wa;
938    // RDKit✔️✔️:               }
939    // RDKit✔️✔️:               return lessByRank(a, b);
940    // RDKit✔️✔️:             });
941    // RDKit✔️✔️:   int curr = -1;
942    // RDKit✔️✔️:   INT_DEQUE btmoves;
943    // RDKit✔️✔️:   unsigned int numBT = 0;
944    // RDKit✔️✔️:   while ((done.size() < sortedAtms.size()) || !astack.empty()) {
945    // RDKit✔️✔️:     if (astack.size() > 0) {
946    // RDKit✔️✔️:       curr = astack.front();
947    // RDKit✔️✔️:       astack.pop_front();
948    // RDKit✔️✔️:     } else {
949    // RDKit✔️✔️:       curr = -1;
950    // RDKit✔️✔️:       for (int allAtm : sortedAtms) {
951    // RDKit✔️✔️:         if (std::find(done.begin(), done.end(), allAtm) == done.end()) {
952    // RDKit✔️✔️:           curr = allAtm;
953    // RDKit✔️✔️:           break;
954    // RDKit✔️✔️:         }
955    // RDKit✔️✔️:       }
956    // RDKit✔️✔️:     }
957    // RDKit✔️✔️:     CHECK_INVARIANT(curr >= 0, "starting point not found");
958    // RDKit✔️✔️:     done.push_back(curr);
959    // RDKit✔️✔️:     INT_DEQUE opts;
960    // RDKit✔️✔️:     bool cCand = false;
961    // RDKit✔️✔️:     if (dBndCands[curr]) {
962    // RDKit✔️✔️:       cCand = true;
963    // RDKit✔️✔️:     }
964    // RDKit✔️✔️:     int ncnd;
965    // RDKit✔️✔️:     if (options.find(curr) != options.end()) {
966    // RDKit✔️✔️:       opts = options[curr];
967    // RDKit✔️✔️:       CHECK_INVARIANT(opts.size() > 0, "");
968    // RDKit✔️✔️:     } else {
969    // RDKit✔️✔️:       INT_DEQUE lstack;
970    // RDKit✔️✔️:       std::vector<int> optsV;
971    // RDKit✔️✔️:       std::vector<int> wedgedOptsV;
972    // RDKit✔️✔️:       std::vector<int> nbrs;
973    // RDKit✔️✔️:       for (auto nbrAtom : mol.atomNeighbors(mol.getAtomWithIdx(curr))) {
974    // RDKit✔️✔️:         const auto nbrIdx = static_cast<int>(nbrAtom->getIdx());
975    // RDKit✔️✔️:         if (!inAllAtms.test(nbrIdx)) {
976    // RDKit✔️✔️:           continue;
977    // RDKit✔️✔️:         }
978    // RDKit✔️✔️:         if (std::find(done.begin(), done.end(), nbrIdx) != done.end()) {
979    // RDKit✔️✔️:           continue;
980    // RDKit✔️✔️:         }
981    // RDKit✔️✔️:         nbrs.push_back(nbrIdx);
982    // RDKit✔️✔️:       }
983    // RDKit✔️✔️:       std::sort(nbrs.begin(), nbrs.end(), lessByRank);
984    // RDKit✔️✔️:       for (int nbrIdx : nbrs) {
985    // RDKit✔️✔️:         auto nbrBond = mol.getBondBetweenAtoms(curr, nbrIdx);
986    // RDKit✔️✔️:         if (std::find(astack.begin(), astack.end(), nbrIdx) == astack.end()) {
987    // RDKit✔️✔️:           lstack.push_back(nbrIdx);
988    // RDKit✔️✔️:         }
989    // RDKit✔️✔️:         if (cCand && dBndCands[nbrIdx] &&
990    // RDKit✔️✔️:             (nbrBond->getIsAromatic() ||
991    // RDKit✔️✔️:              mol.getAtomWithIdx(curr)->getAtomicNum() == 0 ||
992    // RDKit✔️✔️:              mol.getAtomWithIdx(nbrIdx)->getAtomicNum() == 0)) {
993    // RDKit✔️✔️:           if (nbrBond->getBondDir() == Bond::BondDir::BEGINWEDGE ||
994    // RDKit✔️✔️:               nbrBond->getBondDir() == Bond::BondDir::BEGINDASH) {
995    // RDKit✔️✔️:             wedgedOptsV.push_back(nbrIdx);
996    // RDKit✔️✔️:           } else {
997    // RDKit✔️✔️:             optsV.push_back(nbrIdx);
998    // RDKit✔️✔️:           }
999    // RDKit✔️✔️:         }
1000    // RDKit✔️✔️:       }
1001    // RDKit✔️✔️:       for (int v : optsV) {
1002    // RDKit✔️✔️:         opts.push_back(v);
1003    // RDKit✔️✔️:       }
1004    // RDKit✔️✔️:       for (int v : wedgedOptsV) {
1005    // RDKit✔️✔️:         opts.push_back(v);
1006    // RDKit✔️✔️:       }
1007    // RDKit✔️✔️:       astack.insert(astack.end(), lstack.begin(), lstack.end());
1008    // RDKit✔️✔️:     }
1009    // RDKit✔️✔️:     if (cCand) {
1010    // RDKit✔️✔️:       if (!opts.empty()) {
1011    // RDKit✔️✔️:         ncnd = opts.front();
1012    // RDKit✔️✔️:         opts.pop_front();
1013    // RDKit✔️✔️:         auto bnd = mol.getBondBetweenAtoms(curr, ncnd);
1014    // RDKit✔️✔️:         bnd->setBondType(Bond::DOUBLE);
1015    // RDKit✔️✔️:         if (bnd->getBondDir() != Bond::BondDir::NONE) {
1016    // RDKit✔️✔️:           bnd->setBondDir(Bond::BondDir::NONE);
1017    // RDKit✔️✔️:         }
1018    // RDKit✔️✔️:         dBndCands[curr] = 0;
1019    // RDKit✔️✔️:         dBndCands[ncnd] = 0;
1020    // RDKit✔️✔️:         dBndAdds[bnd->getIdx()] = 1;
1021    // RDKit✔️✔️:         localBondsAdded[bnd->getIdx()] = 1;
1022    // RDKit✔️✔️:         if (options.find(curr) != options.end()) {
1023    // RDKit✔️✔️:           if (opts.size() == 0) {
1024    // RDKit✔️✔️:             options.erase(curr);
1025    // RDKit✔️✔️:             btmoves.pop_back();
1026    // RDKit✔️✔️:             if (btmoves.size() > 0) {
1027    // RDKit✔️✔️:               lastOpt = btmoves.back();
1028    // RDKit✔️✔️:             } else {
1029    // RDKit✔️✔️:               lastOpt = -1;
1030    // RDKit✔️✔️:             }
1031    // RDKit✔️✔️:           } else {
1032    // RDKit✔️✔️:             options[curr] = opts;
1033    // RDKit✔️✔️:           }
1034    // RDKit✔️✔️:         } else {
1035    // RDKit✔️✔️:           if (opts.size() > 0) {
1036    // RDKit✔️✔️:             lastOpt = curr;
1037    // RDKit✔️✔️:             btmoves.push_back(lastOpt);
1038    // RDKit✔️✔️:             options[curr] = opts;
1039    // RDKit✔️✔️:           }
1040    // RDKit✔️✔️:         }
1041    // RDKit✔️✔️:       } else if (mol.getAtomWithIdx(curr)->getAtomicNum()) {
1042    // RDKit✔️✔️:         if ((lastOpt >= 0) && (numBT < maxBackTracks)) {
1043    // RDKit✔️✔️:           backTrack(mol, options, lastOpt, done, astack, dBndCands, dBndAdds);
1044    // RDKit✔️✔️:           ++numBT;
1045    // RDKit✔️✔️:         } else {
1046    // RDKit✔️✔️:           for (unsigned int bidx = 0; bidx < mol.getNumBonds(); ++bidx) {
1047    // RDKit✔️✔️:             if (localBondsAdded[bidx]) {
1048    // RDKit✔️✔️:               mol.getBondWithIdx(bidx)->setBondType(Bond::SINGLE);
1049    // RDKit✔️✔️:             }
1050    // RDKit✔️✔️:           }
1051    // RDKit✔️✔️:           return false;
1052    // RDKit✔️✔️:         }
1053    // RDKit✔️✔️:       }
1054    // RDKit✔️✔️:     }
1055    // RDKit✔️✔️:   }
1056    // RDKit✔️✔️:   return true;
1057    // RDKit✔️✔️: }
1058    // END RDKIT CPP FUNCTION kekulizeWorker
1059    let all_atom_set = state.all_atoms.iter().copied().collect::<BTreeSet<_>>();
1060    let mut aromatic_edge_ids = vec![false; topology.bonds.len()];
1061    for (bond, _, _) in &state.aromatic_edges {
1062        aromatic_edge_ids[bond.index()] = true;
1063    }
1064    let mut adjacency = vec![Vec::<(AtomId, BondId)>::new(); topology.atoms.len()];
1065    for bond in &topology.bonds {
1066        let begin = bond.begin();
1067        let end = bond.end();
1068        if all_atom_set.contains(&begin) && all_atom_set.contains(&end) {
1069            adjacency[begin.index()].push((end, bond.id()));
1070            adjacency[end.index()].push((begin, bond.id()));
1071        }
1072    }
1073    for neighbors in &mut adjacency {
1074        neighbors.sort_by_key(|(atom, _)| (atom_ranks[atom.index()], atom.index()));
1075    }
1076
1077    let sorted_atoms = worker_sorted_atoms(topology, &state.all_atoms, &all_atom_set, atom_ranks);
1078    if trace_kekulize {
1079        eprintln!(
1080            "kekulize-worker sortedAtms={:?}",
1081            sorted_atoms
1082                .iter()
1083                .map(|atom| atom.index())
1084                .collect::<Vec<_>>()
1085        );
1086        eprintln!(
1087            "kekulize-worker allAtms={:?} done={:?} candidates={:?} questions={:?}",
1088            state
1089                .all_atoms
1090                .iter()
1091                .map(|atom| atom.index())
1092                .collect::<Vec<_>>(),
1093            initial_done
1094                .iter()
1095                .map(|atom| atom.index())
1096                .collect::<Vec<_>>(),
1097            state
1098                .candidate_atoms
1099                .iter()
1100                .map(|atom| atom.index())
1101                .collect::<Vec<_>>(),
1102            state
1103                .questions
1104                .iter()
1105                .map(|atom| atom.index())
1106                .collect::<Vec<_>>()
1107        );
1108    }
1109
1110    let mut d_bnd_cands = vec![false; topology.atoms.len()];
1111    for atom in &state.candidate_atoms {
1112        if !switched_off.contains(atom) {
1113            d_bnd_cands[atom.index()] = true;
1114        }
1115    }
1116    let mut d_bnd_adds = vec![false; topology.bonds.len()];
1117    let mut local_bonds_added = vec![false; topology.bonds.len()];
1118    let mut done = initial_done.to_vec();
1119    let mut done_flags = vec![false; topology.atoms.len()];
1120    for atom in &done {
1121        done_flags[atom.index()] = true;
1122    }
1123    let mut astack = VecDeque::<AtomId>::new();
1124    let mut astack_flags = vec![false; topology.atoms.len()];
1125    let mut options = BTreeMap::<AtomId, VecDeque<(AtomId, BondId)>>::new();
1126    let mut last_opt = None::<AtomId>;
1127    let mut btmoves = Vec::<AtomId>::new();
1128    let mut matched = BTreeSet::<BondId>::new();
1129    let mut num_bt = 0usize;
1130
1131    while done.len() < sorted_atoms.len() || !astack.is_empty() {
1132        let curr = if let Some(curr) = astack.pop_front() {
1133            astack_flags[curr.index()] = false;
1134            curr
1135        } else {
1136            sorted_atoms
1137                .iter()
1138                .copied()
1139                .find(|atom| !done_flags[atom.index()])
1140                .expect("starting point not found")
1141        };
1142        done.push(curr);
1143        done_flags[curr.index()] = true;
1144        let c_cand = d_bnd_cands[curr.index()];
1145        let mut opts = if let Some(saved) = options.get(&curr) {
1146            saved.clone()
1147        } else {
1148            let mut lstack = VecDeque::new();
1149            let mut opts_v = Vec::<(AtomId, BondId)>::new();
1150            let mut wedged_opts_v = Vec::<(AtomId, BondId)>::new();
1151            for &(nbr_idx, nbr_bond) in &adjacency[curr.index()] {
1152                if done_flags[nbr_idx.index()] {
1153                    continue;
1154                }
1155                if !astack_flags[nbr_idx.index()] {
1156                    lstack.push_back(nbr_idx);
1157                    astack_flags[nbr_idx.index()] = true;
1158                }
1159                let bond = &topology.bonds[nbr_bond.index()];
1160                if c_cand
1161                    && d_bnd_cands[nbr_idx.index()]
1162                    && (aromatic_edge_ids[nbr_bond.index()]
1163                        || kekulize_is_aromatic_bond(bond)
1164                        || topology.atoms[curr.index()].atomic_number() == 0
1165                        || topology.atoms[nbr_idx.index()].atomic_number() == 0)
1166                {
1167                    if matches!(
1168                        bond.direction(),
1169                        BondDirection::BeginWedge | BondDirection::BeginDash
1170                    ) {
1171                        wedged_opts_v.push((nbr_idx, nbr_bond));
1172                    } else {
1173                        opts_v.push((nbr_idx, nbr_bond));
1174                    }
1175                }
1176            }
1177            let mut computed = VecDeque::new();
1178            computed.extend(opts_v);
1179            computed.extend(wedged_opts_v);
1180            astack.extend(lstack);
1181            computed
1182        };
1183        if trace_kekulize {
1184            eprintln!(
1185                "kekulize-worker step curr={} cCand={} opts={:?} astack={:?} done={:?}",
1186                curr.index(),
1187                c_cand,
1188                opts.iter()
1189                    .map(|(atom, bond)| (atom.index(), bond.index()))
1190                    .collect::<Vec<_>>(),
1191                astack.iter().map(|atom| atom.index()).collect::<Vec<_>>(),
1192                done.iter().map(|atom| atom.index()).collect::<Vec<_>>()
1193            );
1194        }
1195        if c_cand {
1196            if let Some((ncnd, bond_id)) = opts.pop_front() {
1197                d_bnd_cands[curr.index()] = false;
1198                d_bnd_cands[ncnd.index()] = false;
1199                d_bnd_adds[bond_id.index()] = true;
1200                local_bonds_added[bond_id.index()] = true;
1201                matched.insert(bond_id);
1202                if trace_kekulize {
1203                    eprintln!(
1204                        "kekulize-worker add curr={} ncnd={} bond={}",
1205                        curr.index(),
1206                        ncnd.index(),
1207                        bond_id.index()
1208                    );
1209                }
1210                if options.contains_key(&curr) {
1211                    if opts.is_empty() {
1212                        options.remove(&curr);
1213                        let _ = btmoves.pop();
1214                        last_opt = btmoves.last().copied();
1215                    } else {
1216                        options.insert(curr, opts);
1217                    }
1218                } else if !opts.is_empty() {
1219                    last_opt = Some(curr);
1220                    btmoves.push(curr);
1221                    options.insert(curr, opts);
1222                }
1223            } else if topology.atoms[curr.index()].atomic_number() != 0 {
1224                if let Some(last_opt_atom) = last_opt {
1225                    if num_bt < max_backtracks {
1226                        if trace_kekulize {
1227                            eprintln!(
1228                                "kekulize-worker backtrack curr={} lastOpt={} numBT={}",
1229                                curr.index(),
1230                                last_opt_atom.index(),
1231                                num_bt
1232                            );
1233                        }
1234                        back_track(
1235                            topology,
1236                            &mut options,
1237                            last_opt_atom,
1238                            &mut done,
1239                            &mut done_flags,
1240                            &mut astack,
1241                            &mut astack_flags,
1242                            &mut d_bnd_cands,
1243                            &mut d_bnd_adds,
1244                            &mut matched,
1245                        );
1246                        num_bt += 1;
1247                    } else {
1248                        for (bond_idx, was_added) in local_bonds_added.iter().enumerate() {
1249                            if *was_added {
1250                                matched.remove(&BondId::new(bond_idx));
1251                            }
1252                        }
1253                        return None;
1254                    }
1255                } else {
1256                    for (bond_idx, was_added) in local_bonds_added.iter().enumerate() {
1257                        if *was_added {
1258                            matched.remove(&BondId::new(bond_idx));
1259                        }
1260                    }
1261                    return None;
1262                }
1263            }
1264        }
1265    }
1266    Some(matched)
1267}
1268
1269fn permute_dummies_and_match(
1270    topology: &TopologyBlock,
1271    state: &KekulizeCandidateState,
1272    questions: &[AtomId],
1273    atom_ranks: &[usize],
1274    max_backtracks: usize,
1275) -> Option<BTreeSet<BondId>> {
1276    // BEGIN RDKIT CPP FUNCTION QuestionEnumerator
1277    // RDKit✔️✔️: class QuestionEnumerator {
1278    // RDKit✔️✔️:  public:
1279    // RDKit✔️✔️:   QuestionEnumerator(INT_VECT questions)
1280    // RDKit✔️✔️:       : d_questions(std::move(questions)), d_pos(1) {};
1281    // RDKit✔️✔️:   INT_VECT next() {
1282    // RDKit✔️✔️:     INT_VECT res;
1283    // RDKit✔️✔️:     if (d_pos >= (0x1u << d_questions.size())) {
1284    // RDKit✔️✔️:       return res;
1285    // RDKit✔️✔️:     }
1286    // RDKit✔️✔️:     for (unsigned int i = 0; i < d_questions.size(); ++i) {
1287    // RDKit✔️✔️:       if (d_pos & (0x1u << i)) {
1288    // RDKit✔️✔️:         res.push_back(d_questions[i]);
1289    // RDKit✔️✔️:       }
1290    // RDKit✔️✔️:     }
1291    // RDKit✔️✔️:     ++d_pos;
1292    // RDKit✔️✔️:     return res;
1293    // RDKit✔️✔️:   };
1294    // RDKit✔️✔️: };
1295    // END RDKIT CPP FUNCTION QuestionEnumerator
1296    // BEGIN RDKIT CPP FUNCTION permuteDummiesAndKekulize
1297    // RDKit✔️✔️: bool permuteDummiesAndKekulize(RWMol &mol, const INT_VECT &allAtms,
1298    // RDKit✔️✔️:                                boost::dynamic_bitset<> dBndCands,
1299    // RDKit✔️✔️:                                INT_VECT &questions,
1300    // RDKit✔️✔️:                                const UINT_VECT &atomRanks,
1301    // RDKit✔️✔️:                                unsigned int maxBackTracks) {
1302    // RDKit✔️✔️:   boost::dynamic_bitset<> atomsInPlay(mol.getNumAtoms());
1303    // RDKit✔️✔️:   for (int allAtm : allAtms) {
1304    // RDKit✔️✔️:     atomsInPlay[allAtm] = 1;
1305    // RDKit✔️✔️:   }
1306    // RDKit✔️✔️:   bool kekulized = false;
1307    // RDKit✔️✔️:   QuestionEnumerator qEnum(questions);
1308    // RDKit✔️✔️:   while (!kekulized && questions.size()) {
1309    // RDKit✔️✔️:     boost::dynamic_bitset<> dBndAdds(mol.getNumBonds());
1310    // RDKit✔️✔️:     INT_VECT done;
1311    // RDKit✔️✔️:     // reset the state: all aromatic bonds are remarked to single:
1312    // RDKit✔️✔️:     for (const auto bond : mol.bonds()) {
1313    // RDKit✔️✔️:       if (bond->getIsAromatic() && bond->getBondType() != Bond::SINGLE &&
1314    // RDKit✔️✔️:           atomsInPlay[bond->getBeginAtomIdx()] &&
1315    // RDKit✔️✔️:           atomsInPlay[bond->getEndAtomIdx()]) {
1316    // RDKit✔️✔️:         bond->setBondType(Bond::SINGLE);
1317    // RDKit✔️✔️:       }
1318    // RDKit✔️✔️:     }
1319    // RDKit✔️✔️:     const auto &switchOff = qEnum.next();
1320    // RDKit✔️✔️:     if (!switchOff.size()) {
1321    // RDKit✔️✔️:       break;
1322    // RDKit✔️✔️:     }
1323    // RDKit✔️✔️:     auto tCands = dBndCands;
1324    // RDKit✔️✔️:     for (int it : switchOff) {
1325    // RDKit✔️✔️:       tCands[it] = 0;
1326    // RDKit✔️✔️:     }
1327    // RDKit✔️✔️:     kekulized =
1328    // RDKit✔️✔️:         kekulizeWorker(mol, allAtms, tCands, dBndAdds, done, atomRanks,
1329    // RDKit✔️✔️:                        maxBackTracks);
1330    // RDKit✔️✔️:   }
1331    // RDKit✔️✔️:   return kekulized;
1332    // RDKit✔️✔️: }
1333    // END RDKIT CPP FUNCTION permuteDummiesAndKekulize
1334    if questions.is_empty() {
1335        return None;
1336    }
1337    for switched_off in question_switch_masks(questions) {
1338        if let Some(matched) = kekulize_worker_matching(
1339            topology,
1340            state,
1341            &[],
1342            atom_ranks,
1343            max_backtracks,
1344            &switched_off,
1345        ) {
1346            return Some(matched);
1347        }
1348    }
1349    None
1350}
1351
1352fn back_track(
1353    topology: &TopologyBlock,
1354    _options: &mut BTreeMap<AtomId, VecDeque<(AtomId, BondId)>>,
1355    last_opt: AtomId,
1356    done: &mut Vec<AtomId>,
1357    done_flags: &mut [bool],
1358    aqueue: &mut VecDeque<AtomId>,
1359    astack_flags: &mut [bool],
1360    d_bnd_cands: &mut [bool],
1361    d_bnd_adds: &mut [bool],
1362    matched: &mut BTreeSet<BondId>,
1363) {
1364    // BEGIN RDKIT CPP FUNCTION backTrack
1365    // RDKit✔️✔️: void backTrack(RWMol &mol, INT_INT_DEQ_MAP &, int lastOpt, INT_VECT &done,
1366    // RDKit✔️✔️:                INT_DEQUE &aqueue, boost::dynamic_bitset<> &dBndCands,
1367    // RDKit✔️✔️:                boost::dynamic_bitset<> &dBndAdds) {
1368    // RDKit✔️✔️:   auto ei = std::find(done.begin(), done.end(), lastOpt);
1369    // RDKit✔️✔️:   INT_VECT tdone;
1370    // RDKit✔️✔️:   tdone.insert(tdone.end(), done.begin(), ei);
1371    let split = done
1372        .iter()
1373        .position(|atom| *atom == last_opt)
1374        .expect("lastOpt must exist in done");
1375    let tdone = done[..split].to_vec();
1376    // RDKit✔️✔️:   INT_VECT_CRI eri = std::find(done.rbegin(), done.rend(), lastOpt);
1377    // RDKit✔️✔️:   ++eri;
1378    // RDKit✔️✔️:   for (INT_VECT_CRI ri = done.rbegin(); ri != eri; ++ri) {
1379    // RDKit✔️✔️:     aqueue.push_front(*ri);
1380    // RDKit✔️✔️:   }
1381    for atom in done[split..].iter().rev() {
1382        aqueue.push_front(*atom);
1383    }
1384    // RDKit✔️✔️:   unsigned int nbnds = mol.getNumBonds();
1385    // RDKit✔️✔️:   Bond *bnd;
1386    // RDKit✔️✔️:   unsigned int nbnds = mol.getNumBonds();
1387    // RDKit✔️✔️:   for (unsigned int bi = 0; bi < nbnds; ++bi) {
1388    // RDKit✔️✔️:     if (dBndAdds[bi]) {
1389    // RDKit✔️✔️:       bnd = mol.getBondWithIdx(bi);
1390    // RDKit✔️✔️:       int aid1 = bnd->getBeginAtomIdx();
1391    // RDKit✔️✔️:       int aid2 = bnd->getEndAtomIdx();
1392    // RDKit✔️✔️:       // if one of these atoms has been dealt with before lastOpt
1393    // RDKit✔️✔️:       // we don't have to change the double bond addition
1394    // RDKit✔️✔️:       if ((std::find(tdone.begin(), tdone.end(), aid1) == tdone.end()) &&
1395    // RDKit✔️✔️:           (std::find(tdone.begin(), tdone.end(), aid2) == tdone.end())) {
1396    // RDKit✔️✔️:         // otherwise strip the double bond and set it back to single
1397    // RDKit✔️✔️:         // and add the atoms to candidate for double bonds
1398    // RDKit✔️✔️:         dBndAdds[bi] = 0;
1399    // RDKit✔️✔️:         bnd->setBondType(Bond::SINGLE);
1400    // RDKit✔️✔️:         dBndCands[aid1] = 1;
1401    // RDKit✔️✔️:         dBndCands[aid2] = 1;
1402    // RDKit✔️✔️:       }
1403    // RDKit✔️✔️:     }
1404    // RDKit✔️✔️:   }
1405    // RDKit✔️✔️:   done = tdone;
1406    for atom in done.iter().skip(split) {
1407        done_flags[atom.index()] = false;
1408    }
1409    for atom in done[split..].iter().rev() {
1410        astack_flags[atom.index()] = true;
1411    }
1412    for bond in &topology.bonds {
1413        let bi = bond.id().index();
1414        if d_bnd_adds[bi] {
1415            let aid1 = bond.begin();
1416            let aid2 = bond.end();
1417            if !tdone.contains(&aid1) && !tdone.contains(&aid2) {
1418                d_bnd_adds[bi] = false;
1419                matched.remove(&bond.id());
1420                d_bnd_cands[aid1.index()] = true;
1421                d_bnd_cands[aid2.index()] = true;
1422            }
1423        }
1424    }
1425    *done = tdone;
1426    // RDKit✔️✔️: }
1427    // END RDKIT CPP FUNCTION backTrack
1428}
1429
1430fn worker_sorted_atoms(
1431    topology: &TopologyBlock,
1432    all_atoms: &[AtomId],
1433    all_atom_set: &BTreeSet<AtomId>,
1434    atom_ranks: &[usize],
1435) -> Vec<AtomId> {
1436    let mut wedge_end_atoms = BTreeSet::new();
1437    for bond in &topology.bonds {
1438        if matches!(
1439            bond.direction(),
1440            BondDirection::BeginWedge | BondDirection::BeginDash
1441        ) && all_atom_set.contains(&bond.end())
1442        {
1443            wedge_end_atoms.insert(bond.end());
1444        }
1445    }
1446
1447    let mut sorted_atoms = all_atoms.to_vec();
1448    sorted_atoms.sort_by(|left, right| {
1449        match (
1450            wedge_end_atoms.contains(left),
1451            wedge_end_atoms.contains(right),
1452        ) {
1453            (true, false) => std::cmp::Ordering::Less,
1454            (false, true) => std::cmp::Ordering::Greater,
1455            _ => (atom_ranks[left.index()], left.index())
1456                .cmp(&(atom_ranks[right.index()], right.index())),
1457        }
1458    });
1459    sorted_atoms
1460}
1461
1462fn question_switch_masks(questions: &[AtomId]) -> Vec<BTreeSet<AtomId>> {
1463    let question_count = questions.len();
1464    (1usize..(1usize << question_count))
1465        .map(|mask| {
1466            questions
1467                .iter()
1468                .enumerate()
1469                .filter_map(|(idx, atom)| ((mask & (1usize << idx)) != 0).then_some(*atom))
1470                .collect()
1471        })
1472        .collect()
1473}
1474
1475fn rdkit_kekulize_default_valence(atomic_number: u8) -> Result<i32, ValenceError> {
1476    crate::valence::rdkit_default_valence(atomic_number)
1477}
1478
1479// Ported RDKit entrypoint kept private until the public operation surface has a
1480// deliberate `KekulizeIfPossible` API. Tests exercise it directly.
1481#[allow(dead_code)]
1482fn kekulize_if_possible_assignment(
1483    molecule: &Molecule,
1484    clear_aromatic_flags: bool,
1485    canonical: bool,
1486    max_backtracks: usize,
1487) -> Result<Option<KekulizeAssignment>, KekulizeError> {
1488    // BEGIN RDKIT CPP FUNCTION MolOps::KekulizeIfPossible
1489    // RDKit✔️✔️: bool KekulizeIfPossible(RWMol &mol, bool markAtomsBonds, bool canonical,
1490    // RDKit✔️✔️:                         unsigned int maxBackTracks) {
1491    // RDKit✔️✔️:   boost::dynamic_bitset<> aromaticBonds(mol.getNumBonds());
1492    // RDKit✔️✔️:   for (const auto bond : mol.bonds()) {
1493    // RDKit✔️✔️:     if (bond->getIsAromatic()) {
1494    // RDKit✔️✔️:       aromaticBonds.set(bond->getIdx());
1495    // RDKit✔️✔️:     }
1496    // RDKit✔️✔️:   }
1497    // RDKit✔️✔️:   boost::dynamic_bitset<> aromaticAtoms(mol.getNumAtoms());
1498    // RDKit✔️✔️:   for (const auto atom : mol.atoms()) {
1499    // RDKit✔️✔️:     if (isAromaticAtom(*atom)) {
1500    // RDKit✔️✔️:       aromaticAtoms.set(atom->getIdx());
1501    // RDKit✔️✔️:     }
1502    // RDKit✔️✔️:   }
1503    // RDKit✔️✔️:   bool res = true;
1504    // RDKit✔️✔️:   try {
1505    // RDKit✔️✔️:     Kekulize(mol, markAtomsBonds, canonical, maxBackTracks);
1506    // RDKit✔️✔️:   } catch (const MolSanitizeException &) {
1507    // RDKit✔️✔️:     res = false;
1508    // RDKit✔️✔️:     for (unsigned int i = 0; i < mol.getNumBonds(); ++i) {
1509    // RDKit✔️✔️:       if (aromaticBonds[i]) {
1510    // RDKit✔️✔️:         auto bond = mol.getBondWithIdx(i);
1511    // RDKit✔️✔️:         bond->setIsAromatic(true);
1512    // RDKit✔️✔️:         bond->setBondType(Bond::BondType::AROMATIC);
1513    // RDKit✔️✔️:       }
1514    // RDKit✔️✔️:     }
1515    // RDKit✔️✔️:     for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) {
1516    // RDKit✔️✔️:       if (aromaticAtoms[i]) {
1517    // RDKit✔️✔️:         mol.getAtomWithIdx(i)->setIsAromatic(true);
1518    // RDKit✔️✔️:       }
1519    // RDKit✔️✔️:     }
1520    // RDKit✔️✔️:   }
1521    // RDKit✔️✔️:   return res;
1522    // RDKit✔️✔️: }
1523    // END RDKIT CPP FUNCTION MolOps::KekulizeIfPossible
1524    match kekulize_assignment(
1525        molecule,
1526        None,
1527        clear_aromatic_flags,
1528        canonical,
1529        max_backtracks,
1530    ) {
1531        Ok(assignment) => Ok(Some(assignment)),
1532        Err(error) if kekulize_if_possible_is_recoverable_failure(&error) => Ok(None),
1533        Err(error) => Err(error),
1534    }
1535}
1536
1537// Value-style internal analogue of RDKit's mutating KekulizeIfPossible():
1538// on failure the returned molecule preserves the original aromatic state.
1539#[allow(dead_code)]
1540fn kekulize_if_possible_value(
1541    molecule: &Molecule,
1542    clear_aromatic_flags: bool,
1543    canonical: bool,
1544    max_backtracks: usize,
1545) -> Result<(Molecule, bool), KekulizeError> {
1546    match kekulize_if_possible_assignment(
1547        molecule,
1548        clear_aromatic_flags,
1549        canonical,
1550        max_backtracks,
1551    )? {
1552        Some(assignment) => {
1553            let mut kekulized = molecule.clone();
1554            apply_kekulize_assignment(kekulized.topology_block_mut(), &assignment);
1555            Ok((kekulized, true))
1556        }
1557        None => Ok((molecule.clone(), false)),
1558    }
1559}
1560
1561fn kekulize_if_possible_is_recoverable_failure(error: &KekulizeError) -> bool {
1562    matches!(
1563        error,
1564        KekulizeError::UnkekulizedAtoms { .. }
1565            | KekulizeError::NonRingAromaticAtom { .. }
1566            | KekulizeError::ValenceChanged { .. }
1567            | KekulizeError::Valence(_)
1568            | KekulizeError::RingFinding(_)
1569    )
1570}
1571
1572fn rdkit_is_early_atom(atomic_number: u8) -> bool {
1573    // BEGIN RDKIT CPP FUNCTION isEarlyAtom
1574    // RDKit✔️✔️: bool isEarlyAtom(int atomicNum) {
1575    // RDKit✔️✔️:   static const bool table[119] = {
1576    // RDKit✔️✔️:       false,  // #0 *
1577    // RDKit✔️✔️:       false,  // #1 H
1578    // RDKit✔️✔️:       false,  // #2 He
1579    // RDKit✔️✔️:       true,   // #3 Li
1580    // RDKit✔️✔️:       true,   // #4 Be
1581    // RDKit✔️✔️:       true,   // #5 B
1582    // RDKit✔️✔️:       false,  // #6 C
1583    // RDKit✔️✔️:       false,  // #7 N
1584    // RDKit✔️✔️:       false,  // #8 O
1585    // RDKit✔️✔️:       false,  // #9 F
1586    // RDKit✔️✔️:       false,  // #10 Ne
1587    // RDKit✔️✔️:       true,   // #11 Na
1588    // RDKit✔️✔️:       true,   // #12 Mg
1589    // RDKit✔️✔️:       true,   // #13 Al
1590    // RDKit✔️✔️:       false,  // #14 Si
1591    // RDKit✔️✔️:       false,  // #15 P
1592    // RDKit✔️✔️:       false,  // #16 S
1593    // RDKit✔️✔️:       false,  // #17 Cl
1594    // RDKit✔️✔️:       false,  // #18 Ar
1595    // RDKit✔️✔️:       true,   // #19 K
1596    // RDKit✔️✔️:       true,   // #20 Ca
1597    // RDKit✔️✔️:       true,   // #21 Sc
1598    // RDKit✔️✔️:       true,   // #22 Ti
1599    // RDKit✔️✔️:       false,  // #23 V
1600    // RDKit✔️✔️:       false,  // #24 Cr
1601    // RDKit✔️✔️:       false,  // #25 Mn
1602    // RDKit✔️✔️:       false,  // #26 Fe
1603    // RDKit✔️✔️:       false,  // #27 Co
1604    // RDKit✔️✔️:       false,  // #28 Ni
1605    // RDKit✔️✔️:       false,  // #29 Cu
1606    // RDKit✔️✔️:       true,   // #30 Zn
1607    // RDKit✔️✔️:       true,   // #31 Ga
1608    // RDKit✔️✔️:       true,   // #32 Ge  see github #2606
1609    // RDKit✔️✔️:       false,  // #33 As
1610    // RDKit✔️✔️:       false,  // #34 Se
1611    // RDKit✔️✔️:       false,  // #35 Br
1612    // RDKit✔️✔️:       false,  // #36 Kr
1613    // RDKit✔️✔️:       true,   // #37 Rb
1614    // RDKit✔️✔️:       true,   // #38 Sr
1615    // RDKit✔️✔️:       true,   // #39 Y
1616    // RDKit✔️✔️:       true,   // #40 Zr
1617    // RDKit✔️✔️:       true,   // #41 Nb
1618    // RDKit✔️✔️:       false,  // #42 Mo
1619    // RDKit✔️✔️:       false,  // #43 Tc
1620    // RDKit✔️✔️:       false,  // #44 Ru
1621    // RDKit✔️✔️:       false,  // #45 Rh
1622    // RDKit✔️✔️:       false,  // #46 Pd
1623    // RDKit✔️✔️:       false,  // #47 Ag
1624    // RDKit✔️✔️:       true,   // #48 Cd
1625    // RDKit✔️✔️:       true,   // #49 In
1626    // RDKit✔️✔️:       true,   // #50 Sn  see github #2606
1627    // RDKit✔️✔️:       true,   // #51 Sb  see github #2775
1628    // RDKit✔️✔️:       false,  // #52 Te
1629    // RDKit✔️✔️:       false,  // #53 I
1630    // RDKit✔️✔️:       false,  // #54 Xe
1631    // RDKit✔️✔️:       true,   // #55 Cs
1632    // RDKit✔️✔️:       true,   // #56 Ba
1633    // RDKit✔️✔️:       true,   // #57 La
1634    // RDKit✔️✔️:       true,   // #58 Ce
1635    // RDKit✔️✔️:       true,   // #59 Pr
1636    // RDKit✔️✔️:       true,   // #60 Nd
1637    // RDKit✔️✔️:       true,   // #61 Pm
1638    // RDKit✔️✔️:       false,  // #62 Sm
1639    // RDKit✔️✔️:       false,  // #63 Eu
1640    // RDKit✔️✔️:       false,  // #64 Gd
1641    // RDKit✔️✔️:       false,  // #65 Tb
1642    // RDKit✔️✔️:       false,  // #66 Dy
1643    // RDKit✔️✔️:       false,  // #67 Ho
1644    // RDKit✔️✔️:       false,  // #68 Er
1645    // RDKit✔️✔️:       false,  // #69 Tm
1646    // RDKit✔️✔️:       false,  // #70 Yb
1647    // RDKit✔️✔️:       false,  // #71 Lu
1648    // RDKit✔️✔️:       true,   // #72 Hf
1649    // RDKit✔️✔️:       true,   // #73 Ta
1650    // RDKit✔️✔️:       false,  // #74 W
1651    // RDKit✔️✔️:       false,  // #75 Re
1652    // RDKit✔️✔️:       false,  // #76 Os
1653    // RDKit✔️✔️:       false,  // #77 Ir
1654    // RDKit✔️✔️:       false,  // #78 Pt
1655    // RDKit✔️✔️:       false,  // #79 Au
1656    // RDKit✔️✔️:       true,   // #80 Hg
1657    // RDKit✔️✔️:       true,   // #81 Tl
1658    // RDKit✔️✔️:       true,   // #82 Pb  see github #2606
1659    // RDKit✔️✔️:       true,   // #83 Bi  see github #2775
1660    // RDKit✔️✔️:       false,  // #84 Po
1661    // RDKit✔️✔️:       false,  // #85 At
1662    // RDKit✔️✔️:       false,  // #86 Rn
1663    // RDKit✔️✔️:       true,   // #87 Fr
1664    // RDKit✔️✔️:       true,   // #88 Ra
1665    // RDKit✔️✔️:       true,   // #89 Ac
1666    // RDKit✔️✔️:       true,   // #90 Th
1667    // RDKit✔️✔️:       true,   // #91 Pa
1668    // RDKit✔️✔️:       true,   // #92 U
1669    // RDKit✔️✔️:       true,   // #93 Np
1670    // RDKit✔️✔️:       false,  // #94 Pu
1671    // RDKit✔️✔️:       false,  // #95 Am
1672    // RDKit✔️✔️:       false,  // #96 Cm
1673    // RDKit✔️✔️:       false,  // #97 Bk
1674    // RDKit✔️✔️:       false,  // #98 Cf
1675    // RDKit✔️✔️:       false,  // #99 Es
1676    // RDKit✔️✔️:       false,  // #100 Fm
1677    // RDKit✔️✔️:       false,  // #101 Md
1678    // RDKit✔️✔️:       false,  // #102 No
1679    // RDKit✔️✔️:       false,  // #103 Lr
1680    // RDKit✔️✔️:       true,   // #104 Rf
1681    // RDKit✔️✔️:       true,   // #105 Db
1682    // RDKit✔️✔️:       true,   // #106 Sg
1683    // RDKit✔️✔️:       true,   // #107 Bh
1684    // RDKit✔️✔️:       true,   // #108 Hs
1685    // RDKit✔️✔️:       true,   // #109 Mt
1686    // RDKit✔️✔️:       true,   // #110 Ds
1687    // RDKit✔️✔️:       true,   // #111 Rg
1688    // RDKit✔️✔️:       true,   // #112 Cn
1689    // RDKit✔️✔️:       true,   // #113 Nh
1690    // RDKit✔️✔️:       true,   // #114 Fl
1691    // RDKit✔️✔️:       true,   // #115 Mc
1692    // RDKit✔️✔️:       true,   // #116 Lv
1693    // RDKit✔️✔️:       true,   // #117 Ts
1694    // RDKit✔️✔️:       true,   // #118 Og
1695    // RDKit✔️✔️:   };
1696    // RDKit✔️✔️:   return ((unsigned int)atomicNum < 119) && table[atomicNum];
1697    // RDKit✔️✔️: }
1698    // END RDKIT CPP FUNCTION isEarlyAtom
1699    matches!(
1700        atomic_number,
1701        3 | 4
1702            | 5
1703            | 11
1704            | 12
1705            | 13
1706            | 19
1707            | 20
1708            | 21
1709            | 22
1710            | 30
1711            | 31
1712            | 32
1713            | 37
1714            | 38
1715            | 39
1716            | 40
1717            | 41
1718            | 48
1719            | 49
1720            | 50
1721            | 51
1722            | 55
1723            | 56
1724            | 57
1725            | 58
1726            | 59
1727            | 60
1728            | 61
1729            | 72
1730            | 73
1731            | 80
1732            | 81
1733            | 82
1734            | 83
1735            | 87
1736            | 88
1737            | 89
1738            | 90
1739            | 91
1740            | 92
1741            | 93
1742            | 104
1743            | 105
1744            | 106
1745            | 107
1746            | 108
1747            | 109
1748            | 110
1749            | 111
1750            | 112
1751            | 113
1752            | 114
1753            | 115
1754            | 116
1755            | 117
1756            | 118
1757    )
1758}
1759
1760fn ordered_kekulize_rings(
1761    topology: &TopologyBlock,
1762    rings: &RingInfo,
1763    atoms_to_use: &[bool],
1764    bonds_to_use: &[bool],
1765    wedged_atoms: &BTreeSet<AtomId>,
1766) -> Vec<KekulizeRing> {
1767    let mut front = Vec::new();
1768    let mut back = Vec::new();
1769    for ring_idx in 0..rings.num_rings() {
1770        let atoms = &rings.atom_rings()[ring_idx];
1771        let bonds = &rings.bond_rings()[ring_idx];
1772        let contains_non_dummy = atoms
1773            .iter()
1774            .all(|atom| atom.index() < topology.atoms.len() && atoms_to_use[atom.index()])
1775            && atoms
1776                .iter()
1777                .any(|atom| topology.atoms[atom.index()].atomic_number() != 0);
1778        if !contains_non_dummy {
1779            continue;
1780        }
1781        if !bonds.iter().all(|bond| bonds_to_use[bond.index()]) {
1782            continue;
1783        }
1784        let start_pos = atoms
1785            .iter()
1786            .position(|atom| wedged_atoms.contains(atom))
1787            .unwrap_or(0);
1788        let rotated_atoms = (0..atoms.len())
1789            .map(|offset| atoms[(offset + start_pos) % atoms.len()])
1790            .collect::<Vec<_>>();
1791        let entry = KekulizeRing {
1792            atoms: rotated_atoms,
1793            bonds: bonds.clone(),
1794            source_ring: ring_idx,
1795        };
1796        if start_pos == 0 && !atoms.iter().any(|atom| wedged_atoms.contains(atom)) {
1797            back.push(entry);
1798        } else {
1799            front.push(entry);
1800        }
1801    }
1802    front.extend(back);
1803    front
1804}
1805
1806fn fused_ring_systems(rings: &[KekulizeRing]) -> Vec<Vec<usize>> {
1807    // BEGIN RDKIT CPP FUNCTION RingUtils::makeRingNeighborMap
1808    // RDKit✔️✔️: void makeRingNeighborMap(const VECT_INT_VECT &brings,
1809    // RDKit✔️✔️:                          INT_INT_VECT_MAP &neighMap, unsigned int maxSize,
1810    // RDKit✔️✔️:                          unsigned int maxOverlapSize) {
1811    // RDKit✔️✔️:   auto nrings = rdcast<int>(brings.size());
1812    // RDKit✔️✔️:   for (i = 0; i < nrings; ++i) {
1813    // RDKit✔️✔️:     neighMap[i];
1814    // RDKit✔️✔️:     ring1 = brings[i];
1815    // RDKit✔️✔️:     for (j = i + 1; j < nrings; ++j) {
1816    // RDKit✔️✔️:       INT_VECT inter;
1817    // RDKit✔️✔️:       Intersect(ring1, brings[j], inter);
1818    // RDKit✔️✔️:       if (inter.size() > 0 &&
1819    // RDKit✔️✔️:           (!maxOverlapSize || inter.size() <= maxOverlapSize)) {
1820    // RDKit✔️✔️:         neighMap[i].push_back(j);
1821    // RDKit✔️✔️:         neighMap[j].push_back(i);
1822    // RDKit✔️✔️:       }
1823    // RDKit✔️✔️:     }
1824    // RDKit✔️✔️:   }
1825    // RDKit✔️✔️: }
1826    // END RDKIT CPP FUNCTION RingUtils::makeRingNeighborMap
1827    let mut neighbor_map = vec![Vec::<usize>::new(); rings.len()];
1828    for left in 0..rings.len() {
1829        for right in (left + 1)..rings.len() {
1830            if rings_share_bond(rings, left, right) {
1831                neighbor_map[left].push(right);
1832                neighbor_map[right].push(left);
1833            }
1834        }
1835    }
1836
1837    // BEGIN RDKIT CPP FUNCTION RingUtils::pickFusedRings
1838    // RDKit✔️✔️: void pickFusedRings(int curr, const INT_INT_VECT_MAP &neighMap, INT_VECT &res,
1839    // RDKit✔️✔️:                     boost::dynamic_bitset<> &done, int depth) {
1840    // RDKit✔️✔️:   auto pos = neighMap.find(curr);
1841    // RDKit✔️✔️:   done[curr] = 1;
1842    // RDKit✔️✔️:   res.push_back(curr);
1843    // RDKit✔️✔️:   const auto &neighs = pos->second;
1844    // RDKit✔️✔️:   for (int neigh : neighs) {
1845    // RDKit✔️✔️:     if (!done[neigh]) {
1846    // RDKit✔️✔️:       pickFusedRings(neigh, neighMap, res, done, depth + 1);
1847    // RDKit✔️✔️:     }
1848    // RDKit✔️✔️:   }
1849    // RDKit✔️✔️: }
1850    // END RDKIT CPP FUNCTION RingUtils::pickFusedRings
1851    let mut systems = Vec::<Vec<usize>>::new();
1852    let mut done = vec![false; rings.len()];
1853    let mut curr = 0usize;
1854    while curr < rings.len() {
1855        let mut fused = Vec::new();
1856        pick_fused_rings_rdkit_order(curr, &neighbor_map, &mut fused, &mut done);
1857        systems.push(fused);
1858
1859        let Some(next) = done.iter().position(|is_done| !*is_done) else {
1860            break;
1861        };
1862        curr = next;
1863    }
1864    systems
1865}
1866
1867fn pick_fused_rings_rdkit_order(
1868    curr: usize,
1869    neighbor_map: &[Vec<usize>],
1870    result: &mut Vec<usize>,
1871    done: &mut [bool],
1872) {
1873    done[curr] = true;
1874    result.push(curr);
1875    for &neighbor in &neighbor_map[curr] {
1876        if !done[neighbor] {
1877            pick_fused_rings_rdkit_order(neighbor, neighbor_map, result, done);
1878        }
1879    }
1880}
1881
1882fn fused_ring_systems_stack_order_for_tests(rings: &[KekulizeRing]) -> Vec<Vec<usize>> {
1883    let mut systems = Vec::<Vec<usize>>::new();
1884    let mut seen = vec![false; rings.len()];
1885    for start in 0..rings.len() {
1886        if seen[start] {
1887            continue;
1888        }
1889        let mut stack = vec![start];
1890        let mut system = Vec::new();
1891        seen[start] = true;
1892        while let Some(ring) = stack.pop() {
1893            system.push(ring);
1894            for next in 0..rings.len() {
1895                if !seen[next] && rings_share_bond(rings, ring, next) {
1896                    seen[next] = true;
1897                    stack.push(next);
1898                }
1899            }
1900        }
1901        systems.push(system);
1902    }
1903    systems
1904}
1905
1906fn rings_share_bond(rings: &[KekulizeRing], left: usize, right: usize) -> bool {
1907    rings[left]
1908        .bonds
1909        .iter()
1910        .any(|bond| rings[right].bonds.contains(bond))
1911}
1912
1913#[cfg(test)]
1914mod tests {
1915    use super::*;
1916    use crate::{
1917        AtomSpec, BondOrder, BondQueryPredicate, BondSpec, Element, MoleculeBuilder, QueryNode,
1918        symmetrize_sssr,
1919    };
1920
1921    fn kekulize_fragment_assignment(
1922        molecule: &Molecule,
1923        rings: &RingInfo,
1924        atoms_to_use: &[bool],
1925        bonds_to_use: Vec<bool>,
1926        clear_aromatic_flags: bool,
1927        canonical: bool,
1928        max_backtracks: usize,
1929    ) -> Result<KekulizeAssignment, KekulizeError> {
1930        super::kekulize_fragment_assignment(
1931            molecule.topology_block(),
1932            molecule.stereo_groups(),
1933            rings,
1934            None,
1935            atoms_to_use,
1936            bonds_to_use,
1937            clear_aromatic_flags,
1938            canonical,
1939            max_backtracks,
1940        )
1941    }
1942
1943    fn ordered_kekulize_rings(
1944        molecule: &Molecule,
1945        rings: &RingInfo,
1946        atoms_to_use: &[bool],
1947        bonds_to_use: &[bool],
1948        wedged_atoms: &BTreeSet<AtomId>,
1949    ) -> Vec<KekulizeRing> {
1950        super::ordered_kekulize_rings(
1951            molecule.topology_block(),
1952            rings,
1953            atoms_to_use,
1954            bonds_to_use,
1955            wedged_atoms,
1956        )
1957    }
1958
1959    fn mark_double_bond_candidates(
1960        molecule: &Molecule,
1961        rings: &RingInfo,
1962        kekulize_rings: &[KekulizeRing],
1963        fused_ring_indices: &[usize],
1964        assignment: &mut KekulizeAssignment,
1965    ) -> Result<KekulizeCandidateState, KekulizeError> {
1966        let valence = crate::valence::assign_valence(&molecule, crate::ValenceModel::RdkitLike)?;
1967        super::mark_double_bond_candidates(
1968            molecule.topology_block(),
1969            rings,
1970            kekulize_rings,
1971            fused_ring_indices,
1972            &valence,
1973            assignment,
1974        )
1975    }
1976
1977    fn kekulize_fused_assignment(
1978        molecule: &Molecule,
1979        rings: &RingInfo,
1980        kekulize_rings: &[KekulizeRing],
1981        fused_ring_indices: &[usize],
1982        atom_ranks: &[usize],
1983        max_backtracks: usize,
1984        assignment: &mut KekulizeAssignment,
1985    ) -> Result<(), KekulizeError> {
1986        let valence = crate::valence::assign_valence(&molecule, crate::ValenceModel::RdkitLike)?;
1987        super::kekulize_fused_assignment(
1988            molecule.topology_block(),
1989            rings,
1990            kekulize_rings,
1991            fused_ring_indices,
1992            &valence,
1993            atom_ranks,
1994            max_backtracks,
1995            assignment,
1996        )
1997    }
1998
1999    fn kekulize_worker_matching(
2000        molecule: &Molecule,
2001        state: &KekulizeCandidateState,
2002        initial_done: &[AtomId],
2003        atom_ranks: &[usize],
2004        max_backtracks: usize,
2005        switched_off: &BTreeSet<AtomId>,
2006    ) -> Option<BTreeSet<BondId>> {
2007        super::kekulize_worker_matching(
2008            molecule.topology_block(),
2009            state,
2010            initial_done,
2011            atom_ranks,
2012            max_backtracks,
2013            switched_off,
2014        )
2015    }
2016
2017    fn permute_dummies_and_match(
2018        molecule: &Molecule,
2019        state: &KekulizeCandidateState,
2020        questions: &[AtomId],
2021        atom_ranks: &[usize],
2022        max_backtracks: usize,
2023    ) -> Option<BTreeSet<BondId>> {
2024        super::permute_dummies_and_match(
2025            molecule.topology_block(),
2026            state,
2027            questions,
2028            atom_ranks,
2029            max_backtracks,
2030        )
2031    }
2032
2033    #[allow(clippy::too_many_arguments)]
2034    fn back_track(
2035        molecule: &Molecule,
2036        options: &mut BTreeMap<AtomId, VecDeque<(AtomId, BondId)>>,
2037        last_opt: AtomId,
2038        done: &mut Vec<AtomId>,
2039        done_flags: &mut [bool],
2040        aqueue: &mut VecDeque<AtomId>,
2041        astack_flags: &mut [bool],
2042        d_bnd_cands: &mut [bool],
2043        d_bnd_adds: &mut [bool],
2044        matched: &mut BTreeSet<BondId>,
2045    ) {
2046        super::back_track(
2047            molecule.topology_block(),
2048            options,
2049            last_opt,
2050            done,
2051            done_flags,
2052            aqueue,
2053            astack_flags,
2054            d_bnd_cands,
2055            d_bnd_adds,
2056            matched,
2057        );
2058    }
2059
2060    fn worker_sorted_atoms(
2061        molecule: &Molecule,
2062        all_atoms: &[AtomId],
2063        all_atom_set: &BTreeSet<AtomId>,
2064        atom_ranks: &[usize],
2065    ) -> Vec<AtomId> {
2066        super::worker_sorted_atoms(
2067            molecule.topology_block(),
2068            all_atoms,
2069            all_atom_set,
2070            atom_ranks,
2071        )
2072    }
2073
2074    fn build_fused_aromatic_naphthalene(atom_is_aromatic: bool) -> Molecule {
2075        let mut builder = MoleculeBuilder::new();
2076        let atoms = (0..10)
2077            .map(|_| builder.add_atom(AtomSpec::new(Element::C).with_aromatic(atom_is_aromatic)))
2078            .collect::<Vec<_>>();
2079        for &(begin, end) in &[
2080            (0, 1),
2081            (1, 2),
2082            (2, 3),
2083            (3, 4),
2084            (4, 5),
2085            (5, 0),
2086            (4, 6),
2087            (6, 7),
2088            (7, 8),
2089            (8, 9),
2090            (9, 5),
2091        ] {
2092            builder
2093                .add_bond(
2094                    BondSpec::new(atoms[begin], atoms[end], BondOrder::Aromatic)
2095                        .with_aromatic(true),
2096                )
2097                .unwrap();
2098        }
2099        builder.build().unwrap()
2100    }
2101
2102    fn build_fused_dummy_aromatic_naphthalene() -> Molecule {
2103        let mut builder = MoleculeBuilder::new();
2104        let mut atoms = (0..10)
2105            .map(|_| builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true)))
2106            .collect::<Vec<_>>();
2107        atoms[0] = builder.add_atom(AtomSpec::new(Element::DUMMY));
2108        for &(begin, end) in &[
2109            (0, 1),
2110            (1, 2),
2111            (2, 3),
2112            (3, 4),
2113            (4, 5),
2114            (5, 0),
2115            (4, 6),
2116            (6, 7),
2117            (7, 8),
2118            (8, 9),
2119            (9, 5),
2120        ] {
2121            builder
2122                .add_bond(
2123                    BondSpec::new(atoms[begin], atoms[end], BondOrder::Aromatic)
2124                        .with_aromatic(true),
2125                )
2126                .unwrap();
2127        }
2128        builder.build().unwrap()
2129    }
2130
2131    fn fused_ring_inputs(molecule: &Molecule) -> (RingInfo, Vec<KekulizeRing>, Vec<Vec<usize>>) {
2132        let rings = symmetrize_sssr(molecule).unwrap();
2133        let kekulize_rings = ordered_kekulize_rings(
2134            molecule,
2135            &rings,
2136            &vec![true; molecule.num_atoms()],
2137            &vec![true; molecule.num_bonds()],
2138            &std::collections::BTreeSet::new(),
2139        );
2140        let ring_systems = fused_ring_systems(&kekulize_rings);
2141        (rings, kekulize_rings, ring_systems)
2142    }
2143
2144    #[test]
2145    fn kekulize_assignment_returns_empty_for_empty_molecule() {
2146        let molecule = Molecule::new();
2147
2148        let assignment = kekulize_assignment(&molecule, None, true, false, 100).unwrap();
2149
2150        assert!(assignment.is_empty());
2151    }
2152
2153    #[test]
2154    fn kekulize_assignment_kekulizes_benzene_like_aromatic_cycle() {
2155        let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
2156
2157        let assignment = kekulize_assignment(&molecule, None, true, false, 100).unwrap();
2158        let double_bonds = molecule
2159            .bonds()
2160            .iter()
2161            .filter(|bond| assignment.bond_order(bond.id()) == Some(BondOrder::Double))
2162            .count();
2163        let single_bonds = molecule
2164            .bonds()
2165            .iter()
2166            .filter(|bond| assignment.bond_order(bond.id()) == Some(BondOrder::Single))
2167            .count();
2168
2169        assert_eq!(double_bonds, 3);
2170        assert_eq!(single_bonds, 3);
2171        assert!(
2172            molecule
2173                .bonds()
2174                .iter()
2175                .all(|bond| assignment.bond_aromatic_flag(bond.id()) == Some(false))
2176        );
2177        assert!(
2178            molecule
2179                .atoms()
2180                .iter()
2181                .all(|atom| assignment.atom_aromatic_flag(atom.id()) == Some(false))
2182        );
2183    }
2184
2185    #[test]
2186    fn kekulize_assignment_preserves_aromatic_flags_when_clear_is_disabled_like_rdkit() {
2187        let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
2188
2189        let assignment = kekulize_assignment(&molecule, None, false, false, 100).unwrap();
2190
2191        let double_bonds = molecule
2192            .bonds()
2193            .iter()
2194            .filter(|bond| assignment.bond_order(bond.id()) == Some(BondOrder::Double))
2195            .count();
2196        let single_bonds = molecule
2197            .bonds()
2198            .iter()
2199            .filter(|bond| assignment.bond_order(bond.id()) == Some(BondOrder::Single))
2200            .count();
2201
2202        assert_eq!(double_bonds, 3);
2203        assert_eq!(single_bonds, 3);
2204        assert!(
2205            molecule
2206                .bonds()
2207                .iter()
2208                .all(|bond| assignment.bond_aromatic_flag(bond.id()).is_none())
2209        );
2210        assert!(
2211            molecule
2212                .atoms()
2213                .iter()
2214                .all(|atom| assignment.atom_aromatic_flag(atom.id()).is_none())
2215        );
2216    }
2217
2218    #[test]
2219    fn kekulize_assignment_noncanonical_keeps_atom_index_seed_for_benzene_like_rdkit_core_api() {
2220        let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
2221
2222        let assignment = kekulize_assignment(&molecule, None, false, false, 100).unwrap();
2223        let bond_orders = molecule
2224            .bonds()
2225            .iter()
2226            .map(|bond| assignment.bond_order(bond.id()).unwrap())
2227            .collect::<Vec<_>>();
2228
2229        assert_eq!(
2230            bond_orders,
2231            vec![
2232                BondOrder::Double,
2233                BondOrder::Single,
2234                BondOrder::Double,
2235                BondOrder::Single,
2236                BondOrder::Double,
2237                BondOrder::Single
2238            ]
2239        );
2240    }
2241
2242    #[test]
2243    fn kekulize_assignment_fragment_returns_empty_when_atoms_to_use_is_empty_like_rdkit() {
2244        let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
2245        let rings = symmetrize_sssr(&molecule).unwrap();
2246
2247        let assignment = kekulize_fragment_assignment(
2248            &molecule,
2249            &rings,
2250            &[false; 6],
2251            vec![true; molecule.num_bonds()],
2252            true,
2253            false,
2254            100,
2255        )
2256        .unwrap();
2257
2258        assert!(assignment.is_empty());
2259    }
2260
2261    #[test]
2262    fn kekulize_assignment_fragment_allows_partial_scope_non_ring_aromatic_atom_like_rdkit() {
2263        let mut builder = MoleculeBuilder::new();
2264        let a0 = builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true));
2265        let _a1 = builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true));
2266        let molecule = builder.build().unwrap();
2267        let rings = symmetrize_sssr(&molecule).unwrap();
2268
2269        let assignment = kekulize_fragment_assignment(
2270            &molecule,
2271            &rings,
2272            &[true, false],
2273            vec![],
2274            true,
2275            false,
2276            100,
2277        )
2278        .unwrap();
2279
2280        assert_eq!(assignment.atom_aromatic_flag(a0), Some(false));
2281    }
2282
2283    #[test]
2284    fn kekulize_fragment_rejects_bitset_size_mismatch() {
2285        let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
2286        let rings = symmetrize_sssr(&molecule).unwrap();
2287
2288        let error = kekulize_fragment_assignment(
2289            &molecule,
2290            &rings,
2291            &[true; 5],
2292            vec![true; molecule.num_bonds()],
2293            true,
2294            false,
2295            100,
2296        )
2297        .unwrap_err();
2298
2299        assert!(matches!(
2300            error,
2301            KekulizeError::FragmentBitsetSizeMismatch { atoms: 5, bonds: 6 }
2302        ));
2303    }
2304
2305    #[test]
2306    fn kekulize_fragment_rejects_bond_mask_size_mismatch_like_rdkit() {
2307        let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
2308        let rings = symmetrize_sssr(&molecule).unwrap();
2309
2310        let error = kekulize_fragment_assignment(
2311            &molecule,
2312            &rings,
2313            &[true; 6],
2314            vec![true; 5],
2315            true,
2316            false,
2317            100,
2318        )
2319        .unwrap_err();
2320
2321        assert!(matches!(
2322            error,
2323            KekulizeError::FragmentBitsetSizeMismatch { atoms: 6, bonds: 5 }
2324        ));
2325    }
2326
2327    #[test]
2328    fn kekulize_fragment_filters_out_rings_outside_selected_atom_mask_like_rdkit() {
2329        let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
2330        let rings = symmetrize_sssr(&molecule).unwrap();
2331        let mut atoms_to_use = vec![true; molecule.num_atoms()];
2332        atoms_to_use[0] = false;
2333
2334        let assignment = kekulize_fragment_assignment(
2335            &molecule,
2336            &rings,
2337            &atoms_to_use,
2338            vec![true; molecule.num_bonds()],
2339            false,
2340            false,
2341            100,
2342        )
2343        .unwrap();
2344
2345        assert!(assignment.bond_orders.iter().all(Option::is_none));
2346    }
2347
2348    #[test]
2349    fn kekulize_fragment_clears_pyrrolic_explicit_hydrogen_state_like_rdkit() {
2350        let molecule = Molecule::from_smiles_with_sanitize("c1cc[nH]c1", false).unwrap();
2351        let rings = symmetrize_sssr(&molecule).unwrap();
2352        let pyrrolic = molecule
2353            .atoms()
2354            .iter()
2355            .find(|atom| atom.atomic_number() == 7)
2356            .map(|atom| atom.id())
2357            .unwrap();
2358
2359        let assignment = kekulize_fragment_assignment(
2360            &molecule,
2361            &rings,
2362            &vec![true; molecule.num_atoms()],
2363            vec![true; molecule.num_bonds()],
2364            true,
2365            false,
2366            100,
2367        )
2368        .unwrap();
2369
2370        assert_eq!(assignment.atom_aromatic_flag(pyrrolic), Some(false));
2371        assert_eq!(assignment.atom_no_implicit(pyrrolic), Some(false));
2372        assert_eq!(assignment.atom_explicit_hydrogens(pyrrolic), Some(0));
2373    }
2374
2375    #[test]
2376    fn kekulize_assignment_runs_canonical_fragment_ranking_for_plain_aromatic_cycle() {
2377        let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
2378
2379        let assignment = kekulize_assignment(&molecule, None, true, true, 100).unwrap();
2380
2381        let double_bonds = molecule
2382            .bonds()
2383            .iter()
2384            .filter(|bond| assignment.bond_order(bond.id()) == Some(BondOrder::Double))
2385            .count();
2386        let single_bonds = molecule
2387            .bonds()
2388            .iter()
2389            .filter(|bond| assignment.bond_order(bond.id()) == Some(BondOrder::Single))
2390            .count();
2391        assert_eq!(double_bonds, 3);
2392        assert_eq!(single_bonds, 3);
2393    }
2394
2395    #[test]
2396    fn kekulize_if_possible_returns_assignment_for_kekulizable_molecule() {
2397        let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
2398
2399        let assignment = kekulize_if_possible_assignment(&molecule, true, false, 100)
2400            .unwrap()
2401            .unwrap();
2402
2403        let double_bonds = molecule
2404            .bonds()
2405            .iter()
2406            .filter(|bond| assignment.bond_order(bond.id()) == Some(BondOrder::Double))
2407            .count();
2408        assert_eq!(double_bonds, 3);
2409    }
2410
2411    #[test]
2412    fn kekulize_if_possible_preserves_aromatic_flags_when_clear_is_disabled() {
2413        let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
2414
2415        let assignment = kekulize_if_possible_assignment(&molecule, false, false, 100)
2416            .unwrap()
2417            .unwrap();
2418
2419        assert!(
2420            molecule
2421                .bonds()
2422                .iter()
2423                .all(|bond| assignment.bond_aromatic_flag(bond.id()).is_none())
2424        );
2425        assert!(
2426            molecule
2427                .atoms()
2428                .iter()
2429                .all(|atom| assignment.atom_aromatic_flag(atom.id()).is_none())
2430        );
2431    }
2432
2433    #[test]
2434    fn kekulize_if_possible_runs_canonical_fragment_ranking_for_plain_aromatic_cycle() {
2435        let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
2436
2437        let assignment = kekulize_if_possible_assignment(&molecule, true, true, 100)
2438            .unwrap()
2439            .unwrap();
2440
2441        let double_bonds = molecule
2442            .bonds()
2443            .iter()
2444            .filter(|bond| assignment.bond_order(bond.id()) == Some(BondOrder::Double))
2445            .count();
2446        assert_eq!(double_bonds, 3);
2447    }
2448
2449    #[test]
2450    fn kekulize_if_possible_assignment_returns_none_for_recoverable_failure() {
2451        let mut builder = MoleculeBuilder::new();
2452        let atoms: Vec<_> = (0..5)
2453            .map(|_| builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true)))
2454            .collect();
2455        for i in 0..5 {
2456            builder
2457                .add_bond(
2458                    BondSpec::new(atoms[i], atoms[(i + 1) % 5], BondOrder::Aromatic)
2459                        .with_aromatic(true),
2460                )
2461                .unwrap();
2462        }
2463        let molecule = builder.build().unwrap();
2464
2465        let assignment = kekulize_if_possible_assignment(&molecule, true, false, 100).unwrap();
2466
2467        assert!(assignment.is_none());
2468    }
2469
2470    #[test]
2471    fn kekulize_if_possible_value_returns_kekulized_clone_on_success() {
2472        let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
2473
2474        let (kekulized, changed) = kekulize_if_possible_value(&molecule, true, false, 100).unwrap();
2475
2476        assert!(changed);
2477        assert!(
2478            kekulized
2479                .bonds()
2480                .iter()
2481                .any(|bond| bond.order() == BondOrder::Double)
2482        );
2483        assert!(kekulized.bonds().iter().all(|bond| !bond.is_aromatic()));
2484        assert!(
2485            molecule
2486                .bonds()
2487                .iter()
2488                .all(|bond| bond.order() == BondOrder::Aromatic && bond.is_aromatic())
2489        );
2490    }
2491
2492    #[test]
2493    fn kekulize_if_possible_value_returns_original_clone_when_kekulization_fails() {
2494        let mut builder = MoleculeBuilder::new();
2495        let atoms = (0..5)
2496            .map(|_| builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true)))
2497            .collect::<Vec<_>>();
2498        for idx in 0..5 {
2499            builder
2500                .add_bond(
2501                    BondSpec::new(atoms[idx], atoms[(idx + 1) % 5], BondOrder::Aromatic)
2502                        .with_aromatic(true),
2503                )
2504                .unwrap();
2505        }
2506        let molecule = builder.build().unwrap();
2507
2508        let (restored, changed) = kekulize_if_possible_value(&molecule, true, false, 100).unwrap();
2509
2510        assert!(!changed);
2511        assert_eq!(restored.bonds().len(), molecule.bonds().len());
2512        assert!(restored.bonds().iter().zip(molecule.bonds().iter()).all(
2513            |(restored_bond, original_bond)| restored_bond.order() == original_bond.order()
2514                && restored_bond.is_aromatic() == original_bond.is_aromatic()
2515        ));
2516        assert!(
2517            restored
2518                .atoms()
2519                .iter()
2520                .zip(molecule.atoms().iter())
2521                .all(|(restored_atom, original_atom)| restored_atom.is_aromatic()
2522                    == original_atom.is_aromatic())
2523        );
2524    }
2525
2526    #[test]
2527    fn ordered_kekulize_rings_prioritizes_wedged_rings_like_rdkit() {
2528        let mut builder = MoleculeBuilder::new();
2529        let ring_one = (0..3)
2530            .map(|_| builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true)))
2531            .collect::<Vec<_>>();
2532        let ring_two = (0..3)
2533            .map(|_| builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true)))
2534            .collect::<Vec<_>>();
2535        for idx in 0..3 {
2536            builder
2537                .add_bond(
2538                    BondSpec::new(ring_one[idx], ring_one[(idx + 1) % 3], BondOrder::Aromatic)
2539                        .with_aromatic(true),
2540                )
2541                .unwrap();
2542        }
2543        for idx in 0..3 {
2544            let mut bond =
2545                BondSpec::new(ring_two[idx], ring_two[(idx + 1) % 3], BondOrder::Aromatic)
2546                    .with_aromatic(true);
2547            if idx == 0 {
2548                bond = bond.with_direction(crate::BondDirection::BeginWedge);
2549            }
2550            builder.add_bond(bond).unwrap();
2551        }
2552        let molecule = builder.build().unwrap();
2553        let rings = symmetrize_sssr(&molecule).unwrap();
2554        let wedged_atoms = std::iter::once(ring_two[0]).collect::<std::collections::BTreeSet<_>>();
2555
2556        let ordered = ordered_kekulize_rings(
2557            &molecule,
2558            &rings,
2559            &vec![true; molecule.num_atoms()],
2560            &vec![true; molecule.num_bonds()],
2561            &wedged_atoms,
2562        );
2563
2564        assert_eq!(ordered.len(), 2);
2565        assert_eq!(ordered[0].source_ring, 1);
2566    }
2567
2568    #[test]
2569    fn ordered_kekulize_rings_rotates_wedged_ring_start_atom_like_rdkit() {
2570        let mut builder = MoleculeBuilder::new();
2571        let ring = (0..4)
2572            .map(|_| builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true)))
2573            .collect::<Vec<_>>();
2574        for idx in 0..4 {
2575            let mut bond = BondSpec::new(ring[idx], ring[(idx + 1) % 4], BondOrder::Aromatic)
2576                .with_aromatic(true);
2577            if idx == 2 {
2578                bond = bond.with_direction(crate::BondDirection::BeginDash);
2579            }
2580            builder.add_bond(bond).unwrap();
2581        }
2582        let molecule = builder.build().unwrap();
2583        let rings = symmetrize_sssr(&molecule).unwrap();
2584        let wedged_atoms = std::iter::once(ring[2]).collect::<std::collections::BTreeSet<_>>();
2585
2586        let ordered = ordered_kekulize_rings(
2587            &molecule,
2588            &rings,
2589            &vec![true; molecule.num_atoms()],
2590            &vec![true; molecule.num_bonds()],
2591            &wedged_atoms,
2592        );
2593
2594        assert_eq!(ordered.len(), 1);
2595        assert_eq!(ordered[0].atoms[0], ring[2]);
2596    }
2597
2598    #[test]
2599    fn canonical_kekulize_handles_stereo_ranking_branch() {
2600        let mut builder = MoleculeBuilder::new();
2601        let atoms = (0..6)
2602            .map(|_| builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true)))
2603            .collect::<Vec<_>>();
2604        for idx in 0..6 {
2605            builder
2606                .add_bond(
2607                    BondSpec::new(atoms[idx], atoms[(idx + 1) % 6], BondOrder::Aromatic)
2608                        .with_aromatic(true),
2609                )
2610                .unwrap();
2611        }
2612        let mut molecule = builder.build().unwrap();
2613        molecule.topology_block_mut().atoms[0].set_chiral_tag(crate::ChiralTag::TetrahedralCw);
2614
2615        let assignment = kekulize_assignment(&molecule, None, true, true, 100).unwrap();
2616
2617        let double_bonds = molecule
2618            .bonds()
2619            .iter()
2620            .filter(|bond| assignment.bond_order(bond.id()) == Some(BondOrder::Double))
2621            .count();
2622        assert_eq!(double_bonds, 3);
2623    }
2624
2625    #[test]
2626    fn kekulize_assignment_handles_dummy_atom_permutation_branch() {
2627        let molecule = Molecule::from_smiles_with_sanitize("*1cccc1", false).unwrap();
2628
2629        let assignment = kekulize_assignment(&molecule, None, true, false, 100).unwrap();
2630
2631        let double_bonds = molecule
2632            .bonds()
2633            .iter()
2634            .filter(|bond| assignment.bond_order(bond.id()) == Some(BondOrder::Double))
2635            .count();
2636        let single_bonds = molecule
2637            .bonds()
2638            .iter()
2639            .filter(|bond| assignment.bond_order(bond.id()) == Some(BondOrder::Single))
2640            .count();
2641
2642        assert_eq!(double_bonds, 2, "{:?}", assignment.bond_orders);
2643        assert_eq!(single_bonds, 1, "{:?}", assignment.bond_orders);
2644    }
2645
2646    #[test]
2647    fn kekulize_assignment_detects_aromaticity_from_bond_state_without_atom_flags() {
2648        let mut builder = MoleculeBuilder::new();
2649        let atoms = (0..6)
2650            .map(|_| builder.add_atom(AtomSpec::new(Element::C)))
2651            .collect::<Vec<_>>();
2652        for idx in 0..6 {
2653            builder
2654                .add_bond(
2655                    BondSpec::new(atoms[idx], atoms[(idx + 1) % 6], BondOrder::Aromatic)
2656                        .with_aromatic(true),
2657                )
2658                .unwrap();
2659        }
2660        let molecule = builder.build().unwrap();
2661
2662        let assignment = kekulize_assignment(&molecule, None, true, false, 100).unwrap();
2663
2664        let double_bonds = molecule
2665            .bonds()
2666            .iter()
2667            .filter(|bond| assignment.bond_order(bond.id()) == Some(BondOrder::Double))
2668            .count();
2669        assert_eq!(double_bonds, 3);
2670    }
2671
2672    #[test]
2673    fn kekulize_assignment_detects_fused_aromaticity_from_bond_state_without_atom_flags() {
2674        let molecule = build_fused_aromatic_naphthalene(false);
2675
2676        let assignment = kekulize_assignment(&molecule, None, true, false, 100).unwrap();
2677
2678        let double_bonds = molecule
2679            .bonds()
2680            .iter()
2681            .filter(|bond| assignment.bond_order(bond.id()) == Some(BondOrder::Double))
2682            .count();
2683        assert_eq!(double_bonds, 5);
2684    }
2685
2686    #[test]
2687    fn kekulize_if_possible_mark_double_bond_candidates_tracks_dummy_atoms_in_fused_systems_like_rdkit()
2688     {
2689        let molecule = build_fused_dummy_aromatic_naphthalene();
2690        let (rings, kekulize_rings, ring_systems) = fused_ring_inputs(&molecule);
2691        let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
2692
2693        let state = mark_double_bond_candidates(
2694            &molecule,
2695            &rings,
2696            &kekulize_rings,
2697            &ring_systems[0],
2698            &mut assignment,
2699        )
2700        .unwrap();
2701
2702        assert!(state.questions.contains(&AtomId::new(10)));
2703    }
2704
2705    #[test]
2706    fn kekulize_if_possible_mark_double_bond_candidates_marks_non_aromatic_atoms_done_inside_fused_systems()
2707     {
2708        let mut molecule = build_fused_aromatic_naphthalene(true);
2709        molecule.topology_block_mut().atoms[0].set_aromatic(false);
2710        for bond_idx in [0usize, 5usize] {
2711            molecule.topology_block_mut().bonds[bond_idx].set_aromatic(false);
2712            molecule.topology_block_mut().bonds[bond_idx].set_order(BondOrder::Single);
2713        }
2714        let (rings, kekulize_rings, ring_systems) = fused_ring_inputs(&molecule);
2715        let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
2716
2717        let state = mark_double_bond_candidates(
2718            &molecule,
2719            &rings,
2720            &kekulize_rings,
2721            &ring_systems[0],
2722            &mut assignment,
2723        )
2724        .unwrap();
2725
2726        assert!(state.done.contains(&AtomId::new(0)));
2727        assert!(!state.candidate_atoms.contains(&AtomId::new(0)));
2728    }
2729
2730    #[test]
2731    fn kekulize_fused_returns_empty_candidate_state_without_aromatic_or_dummy_atoms() {
2732        let mut molecule = build_fused_aromatic_naphthalene(true);
2733        for atom in molecule.topology_block_mut().atoms.iter_mut() {
2734            atom.set_aromatic(false);
2735        }
2736        for bond in molecule.topology_block_mut().bonds.iter_mut() {
2737            bond.set_aromatic(false);
2738            bond.set_order(BondOrder::Single);
2739        }
2740        let (rings, kekulize_rings, ring_systems) = fused_ring_inputs(&molecule);
2741        let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
2742
2743        let state = mark_double_bond_candidates(
2744            &molecule,
2745            &rings,
2746            &kekulize_rings,
2747            &ring_systems[0],
2748            &mut assignment,
2749        )
2750        .unwrap();
2751
2752        assert!(state.candidate_atoms.is_empty());
2753        assert!(state.aromatic_edges.is_empty());
2754        assert!(state.questions.is_empty());
2755        assert!(state.done.is_empty());
2756    }
2757
2758    #[test]
2759    fn kekulize_assignment_reports_problem_atoms_for_unmatchable_fused_system() {
2760        let mut builder = MoleculeBuilder::new();
2761        let atoms = (0..5)
2762            .map(|_| builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true)))
2763            .collect::<Vec<_>>();
2764        for idx in 0..5 {
2765            builder
2766                .add_bond(
2767                    BondSpec::new(atoms[idx], atoms[(idx + 1) % 5], BondOrder::Aromatic)
2768                        .with_aromatic(true),
2769                )
2770                .unwrap();
2771        }
2772        let molecule = builder.build().unwrap();
2773        let rings = symmetrize_sssr(&molecule).unwrap();
2774        let kekulize_rings = ordered_kekulize_rings(
2775            &molecule,
2776            &rings,
2777            &vec![true; molecule.num_atoms()],
2778            &vec![true; molecule.num_bonds()],
2779            &std::collections::BTreeSet::new(),
2780        );
2781        let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
2782
2783        let error = kekulize_fused_assignment(
2784            &molecule,
2785            &rings,
2786            &kekulize_rings,
2787            &[0],
2788            &(0..molecule.num_atoms()).collect::<Vec<_>>(),
2789            100,
2790            &mut assignment,
2791        )
2792        .unwrap_err();
2793        assert!(matches!(
2794            error,
2795            KekulizeError::UnkekulizedAtoms { atoms }
2796            if atoms == vec![
2797                AtomId::new(0),
2798                AtomId::new(1),
2799                AtomId::new(2),
2800                AtomId::new(3),
2801                AtomId::new(4),
2802            ]
2803        ));
2804    }
2805
2806    #[test]
2807    fn kekulize_fused_assignment_sets_bond_orders_and_clears_directions_like_rdkit() {
2808        let mut molecule = build_fused_aromatic_naphthalene(true);
2809        for bond in molecule.topology_block_mut().bonds.iter_mut() {
2810            bond.set_direction(BondDirection::BeginWedge);
2811        }
2812        let (rings, kekulize_rings, ring_systems) = fused_ring_inputs(&molecule);
2813        let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
2814
2815        kekulize_fused_assignment(
2816            &molecule,
2817            &rings,
2818            &kekulize_rings,
2819            &ring_systems[0],
2820            &(0..molecule.num_atoms()).collect::<Vec<_>>(),
2821            100,
2822            &mut assignment,
2823        )
2824        .unwrap();
2825
2826        let assigned_orders = molecule
2827            .bonds()
2828            .iter()
2829            .filter_map(|bond| assignment.bond_order(bond.id()))
2830            .collect::<Vec<_>>();
2831        let num_doubles = assigned_orders
2832            .iter()
2833            .filter(|order| **order == BondOrder::Double)
2834            .count();
2835        let num_singles = assigned_orders
2836            .iter()
2837            .filter(|order| **order == BondOrder::Single)
2838            .count();
2839
2840        assert_eq!(num_doubles, 5);
2841        assert_eq!(num_singles, 6);
2842        assert!(molecule.bonds().iter().any(|bond| {
2843            assignment.bond_order(bond.id()) == Some(BondOrder::Double)
2844                && assignment.bond_direction(bond.id()) == Some(BondDirection::None)
2845        }));
2846    }
2847
2848    #[test]
2849    fn back_track_rolls_back_local_bonds_and_keeps_saved_options_like_rdkit() {
2850        let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
2851        let mut options = std::collections::BTreeMap::from([
2852            (
2853                AtomId::new(1),
2854                std::collections::VecDeque::from([(AtomId::new(3), BondId::new(2))]),
2855            ),
2856            (
2857                AtomId::new(2),
2858                std::collections::VecDeque::from([(AtomId::new(4), BondId::new(3))]),
2859            ),
2860        ]);
2861        let mut done = vec![AtomId::new(0), AtomId::new(1), AtomId::new(2)];
2862        let mut done_flags = vec![false; molecule.num_atoms()];
2863        for atom in &done {
2864            done_flags[atom.index()] = true;
2865        }
2866        let mut aqueue = std::collections::VecDeque::new();
2867        let mut astack_flags = vec![false; molecule.num_atoms()];
2868        let mut d_bnd_cands = vec![false; molecule.num_atoms()];
2869        let mut d_bnd_adds = vec![false; molecule.num_bonds()];
2870        d_bnd_adds[0] = true;
2871        d_bnd_adds[1] = true;
2872        let mut matched = [BondId::new(0), BondId::new(1)]
2873            .into_iter()
2874            .collect::<std::collections::BTreeSet<_>>();
2875
2876        back_track(
2877            &molecule,
2878            &mut options,
2879            AtomId::new(1),
2880            &mut done,
2881            &mut done_flags,
2882            &mut aqueue,
2883            &mut astack_flags,
2884            &mut d_bnd_cands,
2885            &mut d_bnd_adds,
2886            &mut matched,
2887        );
2888
2889        assert_eq!(done, vec![AtomId::new(0)]);
2890        assert!(done_flags[0]);
2891        assert!(!done_flags[1]);
2892        assert!(!done_flags[2]);
2893        assert_eq!(
2894            aqueue.into_iter().collect::<Vec<_>>(),
2895            vec![AtomId::new(1), AtomId::new(2)]
2896        );
2897        assert!(astack_flags[1]);
2898        assert!(astack_flags[2]);
2899        assert!(d_bnd_adds[0]);
2900        assert!(!d_bnd_adds[1]);
2901        assert!(matched.contains(&BondId::new(0)));
2902        assert!(!matched.contains(&BondId::new(1)));
2903        assert!(d_bnd_cands[1]);
2904        assert!(d_bnd_cands[2]);
2905        assert_eq!(
2906            options.get(&AtomId::new(1)).cloned(),
2907            Some(std::collections::VecDeque::from([(
2908                AtomId::new(3),
2909                BondId::new(2),
2910            )]))
2911        );
2912        assert_eq!(
2913            options.get(&AtomId::new(2)).cloned(),
2914            Some(std::collections::VecDeque::from([(
2915                AtomId::new(4),
2916                BondId::new(3),
2917            )]))
2918        );
2919    }
2920
2921    #[test]
2922    fn kekulize_if_possible_worker_back_track_keeps_saved_options_for_replay() {
2923        let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
2924        let expected = std::collections::VecDeque::from([(AtomId::new(3), BondId::new(2))]);
2925        let mut options = std::collections::BTreeMap::from([
2926            (AtomId::new(1), expected.clone()),
2927            (
2928                AtomId::new(2),
2929                std::collections::VecDeque::from([(AtomId::new(4), BondId::new(3))]),
2930            ),
2931        ]);
2932        let mut done = vec![AtomId::new(0), AtomId::new(1), AtomId::new(2)];
2933        let mut done_flags = vec![false; molecule.num_atoms()];
2934        for atom in &done {
2935            done_flags[atom.index()] = true;
2936        }
2937        let mut aqueue = std::collections::VecDeque::new();
2938        let mut astack_flags = vec![false; molecule.num_atoms()];
2939        let mut d_bnd_cands = vec![false; molecule.num_atoms()];
2940        let mut d_bnd_adds = vec![false; molecule.num_bonds()];
2941        d_bnd_adds[0] = true;
2942        let mut matched = std::collections::BTreeSet::from([BondId::new(0)]);
2943
2944        back_track(
2945            &molecule,
2946            &mut options,
2947            AtomId::new(1),
2948            &mut done,
2949            &mut done_flags,
2950            &mut aqueue,
2951            &mut astack_flags,
2952            &mut d_bnd_cands,
2953            &mut d_bnd_adds,
2954            &mut matched,
2955        );
2956
2957        assert_eq!(options.get(&AtomId::new(1)), Some(&expected));
2958        assert_eq!(matched, std::collections::BTreeSet::from([BondId::new(0)]));
2959    }
2960
2961    #[test]
2962    fn kekulize_if_possible_permutes_dummy_candidates_after_initial_worker_failure() {
2963        let molecule = Molecule::from_smiles_with_sanitize("*1cccc1", false).unwrap();
2964        let (rings, kekulize_rings, ring_systems) = fused_ring_inputs(&molecule);
2965        let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
2966        let state = mark_double_bond_candidates(
2967            &molecule,
2968            &rings,
2969            &kekulize_rings,
2970            &ring_systems[0],
2971            &mut assignment,
2972        )
2973        .unwrap();
2974        let atom_ranks = (0..molecule.num_atoms()).collect::<Vec<_>>();
2975
2976        let initial = kekulize_worker_matching(
2977            &molecule,
2978            &state,
2979            &state.done,
2980            &atom_ranks,
2981            100,
2982            &std::collections::BTreeSet::new(),
2983        );
2984        let permuted =
2985            permute_dummies_and_match(&molecule, &state, &state.questions, &atom_ranks, 100);
2986
2987        assert!(initial.is_none());
2988        assert_eq!(
2989            permuted.as_ref().map(|matched| matched.len()),
2990            Some(2),
2991            "{permuted:?}"
2992        );
2993    }
2994
2995    #[test]
2996    fn permute_dummies_and_match_can_recover_without_backtracking_via_mask_switch() {
2997        let molecule = Molecule::from_smiles_with_sanitize("*1cccc1", false).unwrap();
2998        let (rings, kekulize_rings, ring_systems) = fused_ring_inputs(&molecule);
2999        let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
3000        let state = mark_double_bond_candidates(
3001            &molecule,
3002            &rings,
3003            &kekulize_rings,
3004            &ring_systems[0],
3005            &mut assignment,
3006        )
3007        .unwrap();
3008        let atom_ranks = (0..molecule.num_atoms()).collect::<Vec<_>>();
3009
3010        let permuted =
3011            permute_dummies_and_match(&molecule, &state, &state.questions, &atom_ranks, 0);
3012
3013        assert_eq!(permuted.as_ref().map(|matched| matched.len()), Some(2));
3014    }
3015
3016    #[test]
3017    fn permute_dummies_and_match_returns_none_after_exhausting_all_switch_masks() {
3018        let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
3019        let (rings, kekulize_rings, ring_systems) = fused_ring_inputs(&molecule);
3020        let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
3021        let state = mark_double_bond_candidates(
3022            &molecule,
3023            &rings,
3024            &kekulize_rings,
3025            &ring_systems[0],
3026            &mut assignment,
3027        )
3028        .unwrap();
3029        let atom_ranks = (0..molecule.num_atoms()).collect::<Vec<_>>();
3030
3031        let permuted =
3032            permute_dummies_and_match(&molecule, &state, &[AtomId::new(0)], &atom_ranks, 100);
3033
3034        assert!(permuted.is_none());
3035    }
3036
3037    #[test]
3038    fn kekulize_worker_matching_succeeds_for_plain_aromatic_cycle() {
3039        let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
3040        let (rings, kekulize_rings, ring_systems) = fused_ring_inputs(&molecule);
3041        let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
3042        let state = mark_double_bond_candidates(
3043            &molecule,
3044            &rings,
3045            &kekulize_rings,
3046            &ring_systems[0],
3047            &mut assignment,
3048        )
3049        .unwrap();
3050        let atom_ranks = (0..molecule.num_atoms()).collect::<Vec<_>>();
3051
3052        let matched = kekulize_worker_matching(
3053            &molecule,
3054            &state,
3055            &state.done,
3056            &atom_ranks,
3057            100,
3058            &std::collections::BTreeSet::new(),
3059        );
3060
3061        assert_eq!(matched.as_ref().map(|set| set.len()), Some(3));
3062    }
3063
3064    #[test]
3065    fn kekulize_worker_matching_returns_none_when_backtracks_are_exhausted() {
3066        let molecule = Molecule::from_smiles_with_sanitize("*1cccc1", false).unwrap();
3067        let (rings, kekulize_rings, ring_systems) = fused_ring_inputs(&molecule);
3068        let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
3069        let state = mark_double_bond_candidates(
3070            &molecule,
3071            &rings,
3072            &kekulize_rings,
3073            &ring_systems[0],
3074            &mut assignment,
3075        )
3076        .unwrap();
3077        let atom_ranks = (0..molecule.num_atoms()).collect::<Vec<_>>();
3078
3079        let matched = kekulize_worker_matching(
3080            &molecule,
3081            &state,
3082            &state.done,
3083            &atom_ranks,
3084            0,
3085            &std::collections::BTreeSet::new(),
3086        );
3087
3088        assert!(matched.is_none());
3089    }
3090
3091    #[test]
3092    fn kekulize_worker_matching_returns_empty_match_when_candidates_are_switched_off() {
3093        let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
3094        let (rings, kekulize_rings, ring_systems) = fused_ring_inputs(&molecule);
3095        let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
3096        let state = mark_double_bond_candidates(
3097            &molecule,
3098            &rings,
3099            &kekulize_rings,
3100            &ring_systems[0],
3101            &mut assignment,
3102        )
3103        .unwrap();
3104        let atom_ranks = (0..molecule.num_atoms()).collect::<Vec<_>>();
3105        let switched_off = state.candidate_atoms.clone();
3106
3107        let matched = kekulize_worker_matching(
3108            &molecule,
3109            &state,
3110            &state.done,
3111            &atom_ranks,
3112            100,
3113            &switched_off,
3114        );
3115
3116        assert_eq!(matched, Some(std::collections::BTreeSet::new()));
3117    }
3118
3119    #[test]
3120    fn permute_dummies_and_match_returns_none_without_questions() {
3121        let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
3122        let (rings, kekulize_rings, ring_systems) = fused_ring_inputs(&molecule);
3123        let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
3124        let state = mark_double_bond_candidates(
3125            &molecule,
3126            &rings,
3127            &kekulize_rings,
3128            &ring_systems[0],
3129            &mut assignment,
3130        )
3131        .unwrap();
3132        let atom_ranks = (0..molecule.num_atoms()).collect::<Vec<_>>();
3133
3134        let matched = permute_dummies_and_match(&molecule, &state, &[], &atom_ranks, 100);
3135
3136        assert!(matched.is_none());
3137    }
3138
3139    #[test]
3140    fn kekulize_if_possible_does_not_treat_port_shape_errors_as_recoverable() {
3141        assert!(!kekulize_if_possible_is_recoverable_failure(
3142            &KekulizeError::FragmentBitsetSizeMismatch { atoms: 1, bonds: 2 }
3143        ));
3144        assert!(!kekulize_if_possible_is_recoverable_failure(
3145            &KekulizeError::CanonicalRankSymbolSizeMismatch {
3146                kind: "atom",
3147                expected: 1,
3148                actual: 0,
3149            }
3150        ));
3151    }
3152
3153    #[test]
3154    fn question_switch_masks_follow_rdkit_bit_order() {
3155        let masks = question_switch_masks(&[AtomId::new(1), AtomId::new(3), AtomId::new(5)]);
3156
3157        assert_eq!(
3158            masks,
3159            vec![
3160                [AtomId::new(1)].into_iter().collect(),
3161                [AtomId::new(3)].into_iter().collect(),
3162                [AtomId::new(1), AtomId::new(3)].into_iter().collect(),
3163                [AtomId::new(5)].into_iter().collect(),
3164                [AtomId::new(1), AtomId::new(5)].into_iter().collect(),
3165                [AtomId::new(3), AtomId::new(5)].into_iter().collect(),
3166                [AtomId::new(1), AtomId::new(3), AtomId::new(5)]
3167                    .into_iter()
3168                    .collect(),
3169            ]
3170        );
3171    }
3172
3173    #[test]
3174    fn question_switch_masks_returns_empty_for_no_questions() {
3175        let masks = question_switch_masks(&[]);
3176
3177        assert!(masks.is_empty());
3178    }
3179
3180    #[test]
3181    fn worker_sorted_atoms_prioritizes_wedge_end_atoms_before_rank() {
3182        let mut builder = MoleculeBuilder::new();
3183        let atoms = (0..3)
3184            .map(|_| builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true)))
3185            .collect::<Vec<_>>();
3186        builder
3187            .add_bond(
3188                BondSpec::new(atoms[0], atoms[1], BondOrder::Aromatic)
3189                    .with_aromatic(true)
3190                    .with_direction(crate::BondDirection::BeginWedge),
3191            )
3192            .unwrap();
3193        builder
3194            .add_bond(BondSpec::new(atoms[1], atoms[2], BondOrder::Aromatic).with_aromatic(true))
3195            .unwrap();
3196        let molecule = builder.build().unwrap();
3197        let all_atoms = vec![atoms[0], atoms[1], atoms[2]];
3198        let all_atom_set = all_atoms
3199            .iter()
3200            .copied()
3201            .collect::<std::collections::BTreeSet<_>>();
3202
3203        let sorted = worker_sorted_atoms(&molecule, &all_atoms, &all_atom_set, &[2, 10, 0]);
3204
3205        assert_eq!(sorted, vec![atoms[1], atoms[2], atoms[0]]);
3206    }
3207
3208    #[test]
3209    fn worker_sorted_atoms_uses_rank_order_when_no_wedge_bias_exists() {
3210        let mut builder = MoleculeBuilder::new();
3211        let atoms = (0..3)
3212            .map(|_| builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true)))
3213            .collect::<Vec<_>>();
3214        builder
3215            .add_bond(BondSpec::new(atoms[0], atoms[1], BondOrder::Aromatic).with_aromatic(true))
3216            .unwrap();
3217        builder
3218            .add_bond(BondSpec::new(atoms[1], atoms[2], BondOrder::Aromatic).with_aromatic(true))
3219            .unwrap();
3220        let molecule = builder.build().unwrap();
3221        let all_atoms = vec![atoms[0], atoms[1], atoms[2]];
3222        let all_atom_set = all_atoms
3223            .iter()
3224            .copied()
3225            .collect::<std::collections::BTreeSet<_>>();
3226
3227        let sorted = worker_sorted_atoms(&molecule, &all_atoms, &all_atom_set, &[2, 0, 1]);
3228
3229        assert_eq!(sorted, vec![atoms[1], atoms[2], atoms[0]]);
3230    }
3231
3232    #[test]
3233    fn mark_double_bond_candidates_detects_dummy_and_aromatic_fused_state() {
3234        let molecule = build_fused_dummy_aromatic_naphthalene();
3235        let (rings, kekulize_rings, ring_systems) = fused_ring_inputs(&molecule);
3236        let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
3237
3238        let state = mark_double_bond_candidates(
3239            &molecule,
3240            &rings,
3241            &kekulize_rings,
3242            &ring_systems[0],
3243            &mut assignment,
3244        )
3245        .unwrap();
3246
3247        assert!(!state.aromatic_edges.is_empty());
3248        assert!(!state.candidate_atoms.is_empty());
3249        assert!(!state.questions.is_empty());
3250        assert!(
3251            state
3252                .candidate_atoms
3253                .contains(&AtomId::new(molecule.atoms().len() - 1))
3254        );
3255    }
3256
3257    #[test]
3258    fn mark_double_bond_candidates_excludes_non_aromatic_ring_bond_from_aromatic_edges() {
3259        let mut molecule = build_fused_dummy_aromatic_naphthalene();
3260        molecule.topology_block_mut().bonds[0].set_aromatic(false);
3261        molecule.topology_block_mut().bonds[0].set_order(BondOrder::Single);
3262        let (rings, kekulize_rings, ring_systems) = fused_ring_inputs(&molecule);
3263        let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
3264
3265        let state = mark_double_bond_candidates(
3266            &molecule,
3267            &rings,
3268            &kekulize_rings,
3269            &ring_systems[0],
3270            &mut assignment,
3271        )
3272        .unwrap();
3273
3274        assert!(
3275            !state
3276                .aromatic_edges
3277                .iter()
3278                .any(|(bond, _, _)| *bond == BondId::new(0))
3279        );
3280        assert_eq!(assignment.bond_order(BondId::new(0)), None);
3281    }
3282
3283    #[test]
3284    fn kekulize_assignment_excludes_bond_type_query_from_bonds_to_use() {
3285        let mut builder = MoleculeBuilder::new();
3286        let a0 = builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true));
3287        let a1 = builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true));
3288        let a2 = builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true));
3289        builder
3290            .add_bond(
3291                BondSpec::new(a0, a1, BondOrder::Aromatic)
3292                    .with_aromatic(true)
3293                    .with_query(QueryNode::predicate(BondQueryPredicate::OrderIn(vec![
3294                        BondOrder::Single,
3295                        BondOrder::Aromatic,
3296                    ]))),
3297            )
3298            .unwrap();
3299        builder
3300            .add_bond(BondSpec::new(a1, a2, BondOrder::Aromatic).with_aromatic(true))
3301            .unwrap();
3302        builder
3303            .add_bond(BondSpec::new(a2, a0, BondOrder::Aromatic).with_aromatic(true))
3304            .unwrap();
3305        let molecule = builder.build().unwrap();
3306
3307        let assignment = kekulize_assignment(&molecule, None, true, false, 100).unwrap();
3308
3309        assert_eq!(assignment.bond_order(BondId::new(0)), None);
3310        assert_eq!(assignment.bond_aromatic_flag(BondId::new(0)), None);
3311        assert_eq!(assignment.bond_aromatic_flag(BondId::new(1)), Some(false));
3312        assert_eq!(assignment.bond_aromatic_flag(BondId::new(2)), Some(false));
3313    }
3314
3315    #[test]
3316    fn mark_double_bond_candidates_keeps_exocyclic_aryl_carbon_kekulizable_like_rdkit() {
3317        let molecule =
3318            Molecule::from_smiles("O=C1N(/N=C(/C)C1=NN/C2=C/C(OC)=CC=C2)C=3C=CC=CC=3").unwrap();
3319        let (rings, kekulize_rings, ring_systems) = fused_ring_inputs(&molecule);
3320        let phenyl_system = ring_systems
3321            .iter()
3322            .find(|system| {
3323                system.iter().any(|&ring_idx| {
3324                    kekulize_rings[ring_idx]
3325                        .atoms
3326                        .iter()
3327                        .any(|atom| atom.index() == 17)
3328                })
3329            })
3330            .unwrap();
3331        let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
3332
3333        let state = mark_double_bond_candidates(
3334            &molecule,
3335            &rings,
3336            &kekulize_rings,
3337            phenyl_system,
3338            &mut assignment,
3339        )
3340        .unwrap();
3341
3342        assert!(state.candidate_atoms.contains(&AtomId::new(17)));
3343        assert!(kekulize_assignment(&molecule, None, true, true, 100).is_ok());
3344    }
3345
3346    #[test]
3347    fn mark_double_bond_candidates_excludes_aromatic_nh_in_row_134_like_rdkit() {
3348        let molecule = Molecule::from_smiles(
3349            "CC1=C(/C=C2\\C(C)=C3/C(C)=C(/C=C4\\C(C)=C5/C(CCC(=O)O)=C(C(C)=C5N4)C=C6\\C(CCC(=O)O)=C(C)C(=C6N2)C=C1)N3)C=C(\\C=C)C",
3350        )
3351        .unwrap();
3352        let (rings, kekulize_rings, ring_systems) = fused_ring_inputs(&molecule);
3353        let porphyrin_system = ring_systems
3354            .iter()
3355            .find(|system| {
3356                system.iter().any(|&ring_idx| {
3357                    kekulize_rings[ring_idx]
3358                        .atoms
3359                        .iter()
3360                        .any(|atom| matches!(atom.index(), 26 | 39 | 42))
3361                })
3362            })
3363            .unwrap();
3364        let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
3365
3366        let state = mark_double_bond_candidates(
3367            &molecule,
3368            &rings,
3369            &kekulize_rings,
3370            porphyrin_system,
3371            &mut assignment,
3372        )
3373        .unwrap();
3374
3375        assert!(!state.candidate_atoms.contains(&AtomId::new(26)));
3376        assert!(!state.candidate_atoms.contains(&AtomId::new(39)));
3377        assert!(!state.candidate_atoms.contains(&AtomId::new(42)));
3378    }
3379
3380    #[test]
3381    fn kekulize_assignment_handles_row_134_porhyrin_like_rdkit() {
3382        let molecule = Molecule::from_smiles(
3383            "CC1=C(/C=C2\\C(C)=C3/C(C)=C(/C=C4\\C(C)=C5/C(CCC(=O)O)=C(C(C)=C5N4)C=C6\\C(CCC(=O)O)=C(C)C(=C6N2)C=C1)N3)C=C(\\C=C)C",
3384        )
3385        .unwrap();
3386
3387        assert!(kekulize_assignment(&molecule, None, true, true, 100).is_ok());
3388        assert!(kekulize_assignment(&molecule, None, true, false, 100).is_ok());
3389    }
3390
3391    fn kekulize_assignment_handles_pyrrolic_five_membered_ring_like_rdkit() {
3392        let molecule = Molecule::from_smiles("[nH]1cccc1").unwrap();
3393
3394        assert_eq!(molecule.atoms()[0].explicit_hydrogens(), 1);
3395        assert!(kekulize_assignment(&molecule, None, true, true, 100).is_ok());
3396        assert!(kekulize_assignment(&molecule, None, true, false, 100).is_ok());
3397    }
3398
3399    #[test]
3400    fn kekulize_assignment_keeps_non_bond_type_query_in_bonds_to_use() {
3401        let mut builder = MoleculeBuilder::new();
3402        let atoms = (0..6)
3403            .map(|_| builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true)))
3404            .collect::<Vec<_>>();
3405        builder
3406            .add_bond(
3407                BondSpec::new(atoms[0], atoms[1], BondOrder::Aromatic)
3408                    .with_aromatic(true)
3409                    .with_query(QueryNode::predicate(BondQueryPredicate::IsInRing(true))),
3410            )
3411            .unwrap();
3412        for idx in 1..6 {
3413            builder
3414                .add_bond(
3415                    BondSpec::new(atoms[idx], atoms[(idx + 1) % 6], BondOrder::Aromatic)
3416                        .with_aromatic(true),
3417                )
3418                .unwrap();
3419        }
3420        let molecule = builder.build().unwrap();
3421
3422        let assignment = kekulize_assignment(&molecule, None, true, false, 100).unwrap();
3423
3424        assert_eq!(assignment.bond_aromatic_flag(BondId::new(0)), Some(false));
3425    }
3426
3427    #[test]
3428    fn kekulize_assignment_rejects_non_ring_aromatic_atom_like_rdkit() {
3429        let mut builder = MoleculeBuilder::new();
3430        builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true));
3431        let molecule = builder.build().unwrap();
3432
3433        let error = kekulize_assignment(&molecule, None, true, false, 100).unwrap_err();
3434        assert!(
3435            matches!(error, KekulizeError::NonRingAromaticAtom { atom } if atom == AtomId::new(0))
3436        );
3437    }
3438
3439    #[test]
3440    fn kekulize_assignment_preserves_total_valence_like_rdkit() {
3441        let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
3442        let before =
3443            crate::assign_valence_with_options(&molecule, crate::ValenceModel::RdkitLike, false)
3444                .unwrap();
3445        let assignment = kekulize_assignment(&molecule, None, true, false, 100).unwrap();
3446        let mut kekulized = molecule.clone();
3447        apply_kekulize_assignment(kekulized.topology_block_mut(), &assignment);
3448        let after =
3449            crate::assign_valence_with_options(&kekulized, crate::ValenceModel::RdkitLike, false)
3450                .unwrap();
3451
3452        for atom in molecule.atoms() {
3453            let idx = atom.id().index();
3454            assert_eq!(
3455                before.explicit_valence[idx] + before.implicit_hydrogens[idx],
3456                after.explicit_valence[idx] + after.implicit_hydrogens[idx]
3457            );
3458        }
3459    }
3460
3461    #[test]
3462    fn kekulize_assignment_reuses_provided_ring_info_like_rdkit_entrypoint() {
3463        let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
3464        let rings = symmetrize_sssr(&molecule).unwrap();
3465
3466        let assignment = kekulize_assignment(&molecule, Some(&rings), true, false, 100).unwrap();
3467
3468        let double_bonds = molecule
3469            .bonds()
3470            .iter()
3471            .filter(|bond| assignment.bond_order(bond.id()) == Some(BondOrder::Double))
3472            .count();
3473        assert_eq!(double_bonds, 3);
3474    }
3475
3476    #[test]
3477    fn kekulize_source_markers_do_not_hide_elided_rdkit_lines() {
3478        let source = include_str!("kekulize.rs");
3479
3480        for line in source.lines().filter(|line| line.contains("RDKit")) {
3481            assert!(
3482                !line.contains("..."),
3483                "RDKit source marker must not contain elided source: {line}"
3484            );
3485        }
3486    }
3487}