1use std::collections::{BTreeMap, BTreeSet, VecDeque};
4
5use crate::{
6 AtomId, BondDirection, BondId, BondOrder, Molecule, RingFindingError, RingInfo,
7 ValenceAssignment, ValenceError, ValenceModel, canon_rank::FragmentRankScope,
8 canon_rank::rank_fragment_atoms_for_kekulize, molecule::TopologyBlock,
9 read_parts::MoleculeReadParts,
10};
11
12#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
13pub enum KekulizeError {
14 #[error("can't kekulize mol. unkekulized atoms: {atoms:?}")]
15 UnkekulizedAtoms { atoms: Vec<AtomId> },
16 #[error("non-ring atom {atom} marked aromatic")]
17 NonRingAromaticAtom { atom: AtomId },
18 #[error("incomplete RDKit kekulize port for {branch}: {reason}")]
19 ProtocolDebt {
20 branch: &'static str,
21 reason: &'static str,
22 },
23 #[error("kekulization changed valence on atom {atom}: {before}!={after}")]
24 ValenceChanged {
25 atom: AtomId,
26 before: i32,
27 after: i32,
28 },
29 #[error("kekulize fragment bitset size mismatch: atoms={atoms}, bonds={bonds}")]
30 FragmentBitsetSizeMismatch { atoms: usize, bonds: usize },
31 #[error("canonical rank {kind} symbol size mismatch: expected={expected}, actual={actual}")]
32 CanonicalRankSymbolSizeMismatch {
33 kind: &'static str,
34 expected: usize,
35 actual: usize,
36 },
37 #[error(transparent)]
38 RingFinding(#[from] RingFindingError),
39 #[error(transparent)]
40 Valence(#[from] ValenceError),
41}
42
43#[derive(Debug, Clone, PartialEq, Eq)]
44pub(crate) struct KekulizeAssignment {
45 bond_orders: Vec<Option<BondOrder>>,
46 bond_aromatic_flags: Vec<Option<bool>>,
47 bond_directions: Vec<Option<BondDirection>>,
48 atom_aromatic_flags: Vec<Option<bool>>,
49 atom_explicit_hydrogens: Vec<Option<u8>>,
50 atom_no_implicit: Vec<Option<bool>>,
51}
52
53struct KekulizeCandidateState {
54 all_atoms: Vec<AtomId>,
55 candidate_atoms: BTreeSet<AtomId>,
56 aromatic_edges: Vec<(BondId, AtomId, AtomId)>,
57 questions: Vec<AtomId>,
58 done: Vec<AtomId>,
59}
60
61#[derive(Debug, Clone, PartialEq, Eq)]
62struct KekulizeRing {
63 atoms: Vec<AtomId>,
64 bonds: Vec<BondId>,
65 source_ring: usize,
66}
67
68impl KekulizeAssignment {
69 fn empty(num_atoms: usize, num_bonds: usize) -> Self {
70 Self::with_sizes(num_atoms, num_bonds)
71 }
72
73 fn with_sizes(num_atoms: usize, num_bonds: usize) -> Self {
74 Self {
75 bond_orders: vec![None; num_bonds],
76 bond_aromatic_flags: vec![None; num_bonds],
77 bond_directions: vec![None; num_bonds],
78 atom_aromatic_flags: vec![None; num_atoms],
79 atom_explicit_hydrogens: vec![None; num_atoms],
80 atom_no_implicit: vec![None; num_atoms],
81 }
82 }
83
84 pub(crate) fn bond_order(&self, bond: BondId) -> Option<BondOrder> {
85 self.bond_orders[bond.index()]
86 }
87
88 pub(crate) fn bond_aromatic_flag(&self, bond: BondId) -> Option<bool> {
89 self.bond_aromatic_flags[bond.index()]
90 }
91
92 pub(crate) fn bond_direction(&self, bond: BondId) -> Option<BondDirection> {
93 self.bond_directions[bond.index()]
94 }
95
96 pub(crate) fn atom_aromatic_flag(&self, atom: AtomId) -> Option<bool> {
97 self.atom_aromatic_flags[atom.index()]
98 }
99
100 pub(crate) fn atom_explicit_hydrogens(&self, atom: AtomId) -> Option<u8> {
101 self.atom_explicit_hydrogens[atom.index()]
102 }
103
104 pub(crate) fn atom_no_implicit(&self, atom: AtomId) -> Option<bool> {
105 self.atom_no_implicit[atom.index()]
106 }
107
108 #[cfg(test)]
109 fn is_empty(&self) -> bool {
110 self.bond_orders.iter().all(Option::is_none)
111 && self.bond_aromatic_flags.iter().all(Option::is_none)
112 && self.bond_directions.iter().all(Option::is_none)
113 && self.atom_aromatic_flags.iter().all(Option::is_none)
114 && self.atom_explicit_hydrogens.iter().all(Option::is_none)
115 && self.atom_no_implicit.iter().all(Option::is_none)
116 }
117}
118
119pub(crate) fn apply_kekulize_assignment(
120 topology: &mut TopologyBlock,
121 assignment: &KekulizeAssignment,
122) -> bool {
123 let mut changed = false;
124 for bond in &mut topology.bonds {
125 if let Some(next) = assignment.bond_order(bond.id()) {
126 if bond.order() != next {
127 bond.set_order(next);
128 changed = true;
129 }
130 }
131 if let Some(next) = assignment.bond_aromatic_flag(bond.id()) {
132 if bond.is_aromatic() != next {
133 bond.set_aromatic(next);
134 changed = true;
135 }
136 }
137 if let Some(next) = assignment.bond_direction(bond.id()) {
138 if bond.direction() != next {
139 bond.set_direction(next);
140 changed = true;
141 }
142 }
143 }
144 for atom in &mut topology.atoms {
145 if let Some(next) = assignment.atom_aromatic_flag(atom.id()) {
146 if atom.is_aromatic() != next {
147 atom.set_aromatic(next);
148 changed = true;
149 }
150 }
151 if let Some(next) = assignment.atom_explicit_hydrogens(atom.id()) {
152 if atom.explicit_hydrogens() != next {
153 atom.set_explicit_hydrogens(next);
154 changed = true;
155 }
156 }
157 if let Some(next) = assignment.atom_no_implicit(atom.id()) {
158 if atom.no_implicit() != next {
159 atom.set_no_implicit(next);
160 changed = true;
161 }
162 }
163 }
164 changed
165}
166
167fn topology_is_aromatic_atom(topology: &TopologyBlock, atom: AtomId) -> bool {
168 let atom_data = &topology.atoms[atom.index()];
169 if atom_data.is_aromatic() {
170 return true;
171 }
172 topology.bonds.iter().any(|bond| {
173 (bond.begin() == atom || bond.end() == atom)
174 && (bond.is_aromatic() || bond.order() == BondOrder::Aromatic)
175 })
176}
177
178fn kekulize_is_aromatic_bond(bond: &crate::Bond) -> bool {
179 bond.is_aromatic() || bond.order() == BondOrder::Aromatic
180}
181
182pub(crate) fn kekulize_assignment(
183 molecule: &Molecule,
184 rings: Option<&RingInfo>,
185 clear_aromatic_flags: bool,
186 canonical: bool,
187 max_backtracks: usize,
188) -> Result<KekulizeAssignment, KekulizeError> {
189 kekulize_assignment_from_parts_with_valence(
190 molecule.topology_block(),
191 molecule.stereo_groups(),
192 rings,
193 molecule.derived_cache().valence.as_ref(),
194 clear_aromatic_flags,
195 canonical,
196 max_backtracks,
197 )
198}
199
200pub(crate) fn kekulize_assignment_from_parts(
201 topology: &TopologyBlock,
202 stereo_groups: &[crate::stereo::StereoGroup],
203 rings: Option<&RingInfo>,
204 clear_aromatic_flags: bool,
205 canonical: bool,
206 max_backtracks: usize,
207) -> Result<KekulizeAssignment, KekulizeError> {
208 kekulize_assignment_from_parts_with_valence(
209 topology,
210 stereo_groups,
211 rings,
212 None,
213 clear_aromatic_flags,
214 canonical,
215 max_backtracks,
216 )
217}
218
219fn kekulize_assignment_from_parts_with_valence(
220 topology: &TopologyBlock,
221 stereo_groups: &[crate::stereo::StereoGroup],
222 rings: Option<&RingInfo>,
223 cached_valence: Option<&ValenceAssignment>,
224 clear_aromatic_flags: bool,
225 canonical: bool,
226 max_backtracks: usize,
227) -> Result<KekulizeAssignment, KekulizeError> {
228 let owned_rings;
240 let rings = if let Some(rings) = rings {
241 rings
242 } else {
243 owned_rings = crate::rings::find_sssr_from_parts(
254 topology.atoms.len(),
255 &topology.bonds,
256 &topology.adjacency,
257 )?;
258 &owned_rings
259 };
260 let atoms_to_use = vec![true; topology.atoms.len()];
261 let bonds_to_use = vec![true; topology.bonds.len()];
262 kekulize_fragment_assignment(
263 topology,
264 stereo_groups,
265 rings,
266 cached_valence,
267 &atoms_to_use,
268 bonds_to_use,
269 clear_aromatic_flags,
270 canonical,
271 max_backtracks,
272 )
273}
274
275pub(crate) fn kekulize_assignment_from_read_parts(
276 read: MoleculeReadParts<'_>,
277 rings: Option<&RingInfo>,
278 clear_aromatic_flags: bool,
279 canonical: bool,
280 max_backtracks: usize,
281) -> Result<KekulizeAssignment, KekulizeError> {
282 kekulize_assignment_from_parts(
283 read.topology(),
284 read.stereo_groups(),
285 rings,
286 clear_aromatic_flags,
287 canonical,
288 max_backtracks,
289 )
290}
291
292pub(crate) fn kekulize_fragment_assignment(
293 topology: &TopologyBlock,
294 stereo_groups: &[crate::stereo::StereoGroup],
295 rings: &RingInfo,
296 cached_valence: Option<&ValenceAssignment>,
297 atoms_to_use: &[bool],
298 mut bonds_to_use: Vec<bool>,
299 clear_aromatic_flags: bool,
300 canonical: bool,
301 max_backtracks: usize,
302) -> Result<KekulizeAssignment, KekulizeError> {
303 let trace_kekulize = std::env::var_os("COSMOLKIT_TRACE_KEKULIZE").is_some();
304 if atoms_to_use.len() != topology.atoms.len() || bonds_to_use.len() != topology.bonds.len() {
316 return Err(KekulizeError::FragmentBitsetSizeMismatch {
317 atoms: atoms_to_use.len(),
318 bonds: bonds_to_use.len(),
319 });
320 }
321 let mut assignment = KekulizeAssignment::with_sizes(topology.atoms.len(), topology.bonds.len());
322 if !atoms_to_use.iter().any(|selected| *selected) {
323 return Ok(assignment);
324 }
325
326 let mut found_aromatic = false;
337 for bond in &topology.bonds {
338 if bonds_to_use[bond.id().index()] {
339 if bond
340 .query()
341 .is_some_and(crate::valence::has_bond_type_query)
342 {
343 bonds_to_use[bond.id().index()] = false;
344 } else if kekulize_is_aromatic_bond(bond) {
345 found_aromatic = true;
346 }
347 }
348 }
349
350 let pre_kek_valence = if let Some(valence) = cached_valence {
358 valence.clone()
359 } else {
360 crate::valence::assign_valence_with_options_from_parts(
361 &topology.atoms,
362 &topology.bonds,
363 &topology.adjacency,
364 ValenceModel::RdkitLike,
365 false,
366 )?
367 };
368 if topology
369 .atoms
370 .iter()
371 .any(|atom| topology_is_aromatic_atom(topology, atom.id()))
372 {
373 found_aromatic = true;
374 }
375 if !found_aromatic {
376 return Ok(assignment);
377 }
378
379 let atom_ranks = if canonical {
386 rank_fragment_atoms_for_kekulize(
387 topology,
388 stereo_groups,
389 FragmentRankScope::new(atoms_to_use, &bonds_to_use),
390 )?
391 } else {
392 (0..topology.atoms.len()).collect::<Vec<_>>()
393 };
394 if trace_kekulize {
395 eprintln!("kekulize-fragment atomRanks={atom_ranks:?}");
396 }
397
398 let mut wedged_atoms = BTreeSet::new();
412 for bond in &topology.bonds {
413 if bonds_to_use[bond.id().index()]
414 && matches!(
415 bond.direction(),
416 BondDirection::BeginWedge | BondDirection::BeginDash
417 )
418 {
419 wedged_atoms.insert(bond.begin());
420 }
421 }
422
423 let kekulize_rings =
482 ordered_kekulize_rings(topology, rings, atoms_to_use, &bonds_to_use, &wedged_atoms);
483 let ring_systems = fused_ring_systems(&kekulize_rings);
484 if trace_kekulize {
485 eprintln!(
486 "kekulize-rings arings={:?}",
487 kekulize_rings
488 .iter()
489 .map(|ring| ring
490 .atoms
491 .iter()
492 .map(|atom| atom.index())
493 .collect::<Vec<_>>())
494 .collect::<Vec<_>>()
495 );
496 eprintln!(
497 "kekulize-rings brings={:?}",
498 kekulize_rings
499 .iter()
500 .map(|ring| ring
501 .bonds
502 .iter()
503 .map(|bond| bond.index())
504 .collect::<Vec<_>>())
505 .collect::<Vec<_>>()
506 );
507 eprintln!("kekulize-rings fused={ring_systems:?}");
508 }
509 for system in ring_systems {
510 let ring_bond_set = system
511 .iter()
512 .flat_map(|&ring_idx| kekulize_rings[ring_idx].bonds.iter().copied())
513 .collect::<BTreeSet<_>>();
514 let ring_atom_set = system
515 .iter()
516 .flat_map(|&ring_idx| kekulize_rings[ring_idx].atoms.iter().copied())
517 .collect::<BTreeSet<_>>();
518
519 if ring_bond_set.iter().any(|bond| {
520 let bond = &topology.bonds[bond.index()];
521 kekulize_is_aromatic_bond(bond)
522 }) || ring_atom_set
523 .iter()
524 .any(|atom| topology_is_aromatic_atom(topology, *atom))
525 {
526 kekulize_fused_assignment(
527 topology,
528 rings,
529 &kekulize_rings,
530 &system,
531 &pre_kek_valence,
532 &atom_ranks,
533 max_backtracks,
534 &mut assignment,
535 )?;
536 }
537 }
538
539 if clear_aromatic_flags {
548 let full_molecule_scope = atoms_to_use.iter().all(|selected| *selected)
549 && bonds_to_use.iter().all(|selected| *selected);
550 for bond in &topology.bonds {
551 if bonds_to_use[bond.id().index()] && bond.is_aromatic() {
552 assignment.bond_aromatic_flags[bond.id().index()] = Some(false);
553 }
554 }
555 for atom in &topology.atoms {
556 if atoms_to_use[atom.id().index()] && atom.is_aromatic() {
557 if full_molecule_scope && rings.num_atom_rings(atom.id()) == 0 {
558 return Err(KekulizeError::NonRingAromaticAtom { atom: atom.id() });
559 }
560 assignment.atom_aromatic_flags[atom.id().index()] = Some(false);
561 if matches!(atom.atomic_number(), 7 | 15)
566 && atom.formal_charge() == 0
567 && atom.explicit_hydrogens() == 1
568 {
569 assignment.atom_no_implicit[atom.id().index()] = Some(false);
570 assignment.atom_explicit_hydrogens[atom.id().index()] = Some(0);
571 }
572 }
573 }
574 }
575 let mut topology_after = topology.clone();
576 apply_kekulize_assignment(&mut topology_after, &assignment);
577 let mut post_kek_valence = pre_kek_valence.clone();
578 for atom in &topology_after.atoms {
579 let idx = atom.id().index();
580 if assignment.atom_explicit_hydrogens[idx].is_some()
581 || assignment.atom_no_implicit[idx].is_some()
582 {
583 let (explicit_valence, implicit_hydrogens) =
584 crate::valence::assign_valence_state_for_atom_from_parts(
585 &topology_after.atoms,
586 &topology_after.bonds,
587 &topology_after.adjacency,
588 atom.id(),
589 false,
590 )?;
591 post_kek_valence.explicit_valence[idx] = explicit_valence;
592 post_kek_valence.implicit_hydrogens[idx] = implicit_hydrogens;
593 }
594 }
595 for atom in &topology.atoms {
596 let idx = atom.id().index();
597 let before =
598 pre_kek_valence.explicit_valence[idx] + pre_kek_valence.implicit_hydrogens[idx];
599 let after =
600 post_kek_valence.explicit_valence[idx] + post_kek_valence.implicit_hydrogens[idx];
601 if before != after {
602 return Err(KekulizeError::ValenceChanged {
603 atom: atom.id(),
604 before,
605 after,
606 });
607 }
608 }
609 Ok(assignment)
612}
613
614fn kekulize_fused_assignment(
615 topology: &TopologyBlock,
616 rings: &RingInfo,
617 kekulize_rings: &[KekulizeRing],
618 fused_ring_indices: &[usize],
619 valence: &ValenceAssignment,
620 atom_ranks: &[usize],
621 max_backtracks: usize,
622 assignment: &mut KekulizeAssignment,
623) -> Result<(), KekulizeError> {
624 let state = mark_double_bond_candidates(
635 topology,
636 rings,
637 kekulize_rings,
638 fused_ring_indices,
639 valence,
640 assignment,
641 )?;
642
643 let matched_bonds = if let Some(matched_bonds) = kekulize_worker_matching(
647 topology,
648 &state,
649 &state.done,
650 atom_ranks,
651 max_backtracks,
652 &BTreeSet::new(),
653 ) {
654 matched_bonds
655 } else if let Some(matched_bonds) = permute_dummies_and_match(
656 topology,
657 &state,
658 &state.questions,
659 atom_ranks,
660 max_backtracks,
661 ) {
662 matched_bonds
663 } else {
664 return Err(KekulizeError::UnkekulizedAtoms {
665 atoms: state.candidate_atoms.iter().copied().collect(),
666 });
667 };
668
669 for (bond, _, _) in state.aromatic_edges {
670 assignment.bond_orders[bond.index()] = Some(if matched_bonds.contains(&bond) {
671 BondOrder::Double
672 } else {
673 BondOrder::Single
674 });
675 if matched_bonds.contains(&bond)
676 && topology.bonds[bond.index()].direction() != BondDirection::None
677 {
678 assignment.bond_directions[bond.index()] = Some(BondDirection::None);
679 }
680 }
681 Ok(())
687}
688
689fn mark_double_bond_candidates(
690 topology: &TopologyBlock,
691 rings: &RingInfo,
692 kekulize_rings: &[KekulizeRing],
693 fused_ring_indices: &[usize],
694 valence: &ValenceAssignment,
695 assignment: &mut KekulizeAssignment,
696) -> Result<KekulizeCandidateState, KekulizeError> {
697 let mut all_atoms = Vec::new();
707 let mut seen_atoms = BTreeSet::new();
708 for &ring_idx in fused_ring_indices {
709 for &atom_id in &kekulize_rings[ring_idx].atoms {
710 if seen_atoms.insert(atom_id) {
711 all_atoms.push(atom_id);
712 }
713 }
714 }
715 let all_atom_set = all_atoms.iter().copied().collect::<BTreeSet<_>>();
716 if !all_atoms.iter().any(|atom| {
717 topology.atoms[atom.index()].atomic_number() == 0
718 || topology_is_aromatic_atom(topology, *atom)
719 }) {
720 return Ok(KekulizeCandidateState {
721 all_atoms,
722 candidate_atoms: BTreeSet::new(),
723 aromatic_edges: Vec::new(),
724 questions: Vec::new(),
725 done: Vec::new(),
726 });
727 }
728 let fused_source_ring_set = fused_ring_indices
729 .iter()
730 .map(|&ring_idx| kekulize_rings[ring_idx].source_ring)
731 .collect::<BTreeSet<_>>();
732 let mut is_ring_not_cand = BTreeSet::new();
733 for &ring_idx in fused_ring_indices {
734 let mut ring_is_candidate = false;
735 let source_ring = kekulize_rings[ring_idx].source_ring;
736 for atom in &rings.atom_rings()[source_ring] {
737 if topology_is_aromatic_atom(topology, *atom) && rings.num_atom_rings(*atom) == 1 {
738 ring_is_candidate = true;
739 break;
740 }
741 }
742 if !ring_is_candidate {
743 is_ring_not_cand.insert(source_ring);
744 }
745 }
746 let mut candidate_atoms = BTreeSet::<AtomId>::new();
747 let mut aromatic_edges = Vec::<(BondId, AtomId, AtomId)>::new();
748 let mut seen_make_single = BTreeSet::<BondId>::new();
749 let mut questions = Vec::<AtomId>::new();
750 let mut done = Vec::<AtomId>::new();
751 for &atom_id in &all_atoms {
753 let atom = &topology.atoms[atom_id.index()];
754 if atom.atomic_number() != 0 && !topology_is_aromatic_atom(topology, atom_id) {
755 done.push(atom_id);
756 continue;
757 }
758 let mut sbo = 0i32;
759 let mut n_to_ignore = 0usize;
760 let mut non_ar_non_dummy_nbr = 0usize;
761 for bond in topology
762 .bonds
763 .iter()
764 .filter(|bond| bond.begin() == atom_id || bond.end() == atom_id)
765 {
766 let other = if bond.begin() == atom_id {
767 bond.end()
768 } else {
769 bond.begin()
770 };
771 let other_atom = &topology.atoms[other.index()];
772 if all_atom_set.contains(&other)
773 && other_atom.atomic_number() != 0
774 && !topology_is_aromatic_atom(topology, other)
775 {
776 non_ar_non_dummy_nbr += 1;
777 }
778 if kekulize_is_aromatic_bond(bond)
788 && matches!(
789 bond.order(),
790 BondOrder::Single | BondOrder::Double | BondOrder::Aromatic
791 )
792 {
793 sbo += 1;
794 if seen_make_single.insert(bond.id()) {
795 assignment.bond_orders[bond.id().index()] = Some(BondOrder::Single);
796 aromatic_edges.push((bond.id(), bond.begin(), bond.end()));
797 }
798 } else {
799 let bond_contrib =
800 crate::valence::bond_valence_contrib(bond, atom_id)?.round() as i32;
801 sbo += bond_contrib;
802 if bond_contrib == 0 {
803 n_to_ignore += 1;
804 }
805 }
806 }
807 let num_atom_rings = rings.num_atom_rings(atom_id);
808 let num_non_cand_rings = rings
809 .atom_members(atom_id)
810 .iter()
811 .filter(|ring_idx| {
812 fused_source_ring_set.contains(ring_idx) && is_ring_not_cand.contains(ring_idx)
813 })
814 .count();
815 if atom.atomic_number() == 0
816 && non_ar_non_dummy_nbr < num_atom_rings
817 && num_non_cand_rings < num_atom_rings
818 {
819 candidate_atoms.insert(atom_id);
820 questions.push(atom_id);
821 continue;
822 }
823 if atom.atomic_number() == 0 {
824 continue;
825 }
826 sbo += i32::from(atom.explicit_hydrogens()) + valence.implicit_hydrogens[atom_id.index()];
827 let mut dv = rdkit_kekulize_default_valence(atom.atomic_number())?;
828 let mut chrg = atom.formal_charge();
829 if rdkit_is_early_atom(atom.atomic_number()) {
830 chrg = -chrg;
831 }
832 if atom.atomic_number() == 6 && chrg > 0 {
833 chrg = -chrg;
834 }
835 dv += i32::from(chrg);
836 let tbo =
837 valence.explicit_valence[atom_id.index()] + valence.implicit_hydrogens[atom_id.index()];
838 let n_radicals = i32::from(atom.radical_electrons());
839 let total_degree = i32::try_from(
840 topology
841 .bonds
842 .iter()
843 .filter(|bond| bond.begin() == atom_id || bond.end() == atom_id)
844 .count(),
845 )
846 .unwrap_or(i32::MAX)
847 + valence.implicit_hydrogens[atom_id.index()]
848 - i32::try_from(n_to_ignore).unwrap_or(i32::MAX);
849 let valence_list = crate::rdkit_valence_list(atom.atomic_number())?.ok_or(
850 ValenceError::UnsupportedBranch {
851 reason: "kekulize valence list atomic number out of range",
852 },
853 )?;
854 let mut vi = 1usize;
855 while tbo > dv && vi < valence_list.len() && valence_list[vi] > 0 {
856 dv = valence_list[vi] + i32::from(chrg);
857 vi += 1;
858 }
859 if tbo == 5
860 && sbo == 4
861 && dv == 3
862 && total_degree == 3
863 && n_radicals == 0
864 && chrg == 0
865 && atom.explicit_hydrogens() == 0
866 && valence.implicit_hydrogens[atom_id.index()] == 0
867 && matches!(atom.atomic_number(), 7 | 15 | 33)
868 {
869 dv = 5;
870 }
871 if total_degree + n_radicals >= dv {
872 continue;
873 }
874 if dv == (sbo + 1 + n_radicals)
875 || (n_radicals == 0 && atom.no_implicit() && dv == (sbo + 2))
876 {
877 candidate_atoms.insert(atom_id);
878 }
879 }
880 Ok(KekulizeCandidateState {
886 all_atoms,
887 candidate_atoms,
888 aromatic_edges,
889 questions,
890 done,
891 })
892}
893
894fn kekulize_worker_matching(
895 topology: &TopologyBlock,
896 state: &KekulizeCandidateState,
897 initial_done: &[AtomId],
898 atom_ranks: &[usize],
899 max_backtracks: usize,
900 switched_off: &BTreeSet<AtomId>,
901) -> Option<BTreeSet<BondId>> {
902 let trace_kekulize = std::env::var_os("COSMOLKIT_TRACE_KEKULIZE").is_some();
903 let all_atom_set = state.all_atoms.iter().copied().collect::<BTreeSet<_>>();
1060 let mut aromatic_edge_ids = vec![false; topology.bonds.len()];
1061 for (bond, _, _) in &state.aromatic_edges {
1062 aromatic_edge_ids[bond.index()] = true;
1063 }
1064 let mut adjacency = vec![Vec::<(AtomId, BondId)>::new(); topology.atoms.len()];
1065 for bond in &topology.bonds {
1066 let begin = bond.begin();
1067 let end = bond.end();
1068 if all_atom_set.contains(&begin) && all_atom_set.contains(&end) {
1069 adjacency[begin.index()].push((end, bond.id()));
1070 adjacency[end.index()].push((begin, bond.id()));
1071 }
1072 }
1073 for neighbors in &mut adjacency {
1074 neighbors.sort_by_key(|(atom, _)| (atom_ranks[atom.index()], atom.index()));
1075 }
1076
1077 let sorted_atoms = worker_sorted_atoms(topology, &state.all_atoms, &all_atom_set, atom_ranks);
1078 if trace_kekulize {
1079 eprintln!(
1080 "kekulize-worker sortedAtms={:?}",
1081 sorted_atoms
1082 .iter()
1083 .map(|atom| atom.index())
1084 .collect::<Vec<_>>()
1085 );
1086 eprintln!(
1087 "kekulize-worker allAtms={:?} done={:?} candidates={:?} questions={:?}",
1088 state
1089 .all_atoms
1090 .iter()
1091 .map(|atom| atom.index())
1092 .collect::<Vec<_>>(),
1093 initial_done
1094 .iter()
1095 .map(|atom| atom.index())
1096 .collect::<Vec<_>>(),
1097 state
1098 .candidate_atoms
1099 .iter()
1100 .map(|atom| atom.index())
1101 .collect::<Vec<_>>(),
1102 state
1103 .questions
1104 .iter()
1105 .map(|atom| atom.index())
1106 .collect::<Vec<_>>()
1107 );
1108 }
1109
1110 let mut d_bnd_cands = vec![false; topology.atoms.len()];
1111 for atom in &state.candidate_atoms {
1112 if !switched_off.contains(atom) {
1113 d_bnd_cands[atom.index()] = true;
1114 }
1115 }
1116 let mut d_bnd_adds = vec![false; topology.bonds.len()];
1117 let mut local_bonds_added = vec![false; topology.bonds.len()];
1118 let mut done = initial_done.to_vec();
1119 let mut done_flags = vec![false; topology.atoms.len()];
1120 for atom in &done {
1121 done_flags[atom.index()] = true;
1122 }
1123 let mut astack = VecDeque::<AtomId>::new();
1124 let mut astack_flags = vec![false; topology.atoms.len()];
1125 let mut options = BTreeMap::<AtomId, VecDeque<(AtomId, BondId)>>::new();
1126 let mut last_opt = None::<AtomId>;
1127 let mut btmoves = Vec::<AtomId>::new();
1128 let mut matched = BTreeSet::<BondId>::new();
1129 let mut num_bt = 0usize;
1130
1131 while done.len() < sorted_atoms.len() || !astack.is_empty() {
1132 let curr = if let Some(curr) = astack.pop_front() {
1133 astack_flags[curr.index()] = false;
1134 curr
1135 } else {
1136 sorted_atoms
1137 .iter()
1138 .copied()
1139 .find(|atom| !done_flags[atom.index()])
1140 .expect("starting point not found")
1141 };
1142 done.push(curr);
1143 done_flags[curr.index()] = true;
1144 let c_cand = d_bnd_cands[curr.index()];
1145 let mut opts = if let Some(saved) = options.get(&curr) {
1146 saved.clone()
1147 } else {
1148 let mut lstack = VecDeque::new();
1149 let mut opts_v = Vec::<(AtomId, BondId)>::new();
1150 let mut wedged_opts_v = Vec::<(AtomId, BondId)>::new();
1151 for &(nbr_idx, nbr_bond) in &adjacency[curr.index()] {
1152 if done_flags[nbr_idx.index()] {
1153 continue;
1154 }
1155 if !astack_flags[nbr_idx.index()] {
1156 lstack.push_back(nbr_idx);
1157 astack_flags[nbr_idx.index()] = true;
1158 }
1159 let bond = &topology.bonds[nbr_bond.index()];
1160 if c_cand
1161 && d_bnd_cands[nbr_idx.index()]
1162 && (aromatic_edge_ids[nbr_bond.index()]
1163 || kekulize_is_aromatic_bond(bond)
1164 || topology.atoms[curr.index()].atomic_number() == 0
1165 || topology.atoms[nbr_idx.index()].atomic_number() == 0)
1166 {
1167 if matches!(
1168 bond.direction(),
1169 BondDirection::BeginWedge | BondDirection::BeginDash
1170 ) {
1171 wedged_opts_v.push((nbr_idx, nbr_bond));
1172 } else {
1173 opts_v.push((nbr_idx, nbr_bond));
1174 }
1175 }
1176 }
1177 let mut computed = VecDeque::new();
1178 computed.extend(opts_v);
1179 computed.extend(wedged_opts_v);
1180 astack.extend(lstack);
1181 computed
1182 };
1183 if trace_kekulize {
1184 eprintln!(
1185 "kekulize-worker step curr={} cCand={} opts={:?} astack={:?} done={:?}",
1186 curr.index(),
1187 c_cand,
1188 opts.iter()
1189 .map(|(atom, bond)| (atom.index(), bond.index()))
1190 .collect::<Vec<_>>(),
1191 astack.iter().map(|atom| atom.index()).collect::<Vec<_>>(),
1192 done.iter().map(|atom| atom.index()).collect::<Vec<_>>()
1193 );
1194 }
1195 if c_cand {
1196 if let Some((ncnd, bond_id)) = opts.pop_front() {
1197 d_bnd_cands[curr.index()] = false;
1198 d_bnd_cands[ncnd.index()] = false;
1199 d_bnd_adds[bond_id.index()] = true;
1200 local_bonds_added[bond_id.index()] = true;
1201 matched.insert(bond_id);
1202 if trace_kekulize {
1203 eprintln!(
1204 "kekulize-worker add curr={} ncnd={} bond={}",
1205 curr.index(),
1206 ncnd.index(),
1207 bond_id.index()
1208 );
1209 }
1210 if options.contains_key(&curr) {
1211 if opts.is_empty() {
1212 options.remove(&curr);
1213 let _ = btmoves.pop();
1214 last_opt = btmoves.last().copied();
1215 } else {
1216 options.insert(curr, opts);
1217 }
1218 } else if !opts.is_empty() {
1219 last_opt = Some(curr);
1220 btmoves.push(curr);
1221 options.insert(curr, opts);
1222 }
1223 } else if topology.atoms[curr.index()].atomic_number() != 0 {
1224 if let Some(last_opt_atom) = last_opt {
1225 if num_bt < max_backtracks {
1226 if trace_kekulize {
1227 eprintln!(
1228 "kekulize-worker backtrack curr={} lastOpt={} numBT={}",
1229 curr.index(),
1230 last_opt_atom.index(),
1231 num_bt
1232 );
1233 }
1234 back_track(
1235 topology,
1236 &mut options,
1237 last_opt_atom,
1238 &mut done,
1239 &mut done_flags,
1240 &mut astack,
1241 &mut astack_flags,
1242 &mut d_bnd_cands,
1243 &mut d_bnd_adds,
1244 &mut matched,
1245 );
1246 num_bt += 1;
1247 } else {
1248 for (bond_idx, was_added) in local_bonds_added.iter().enumerate() {
1249 if *was_added {
1250 matched.remove(&BondId::new(bond_idx));
1251 }
1252 }
1253 return None;
1254 }
1255 } else {
1256 for (bond_idx, was_added) in local_bonds_added.iter().enumerate() {
1257 if *was_added {
1258 matched.remove(&BondId::new(bond_idx));
1259 }
1260 }
1261 return None;
1262 }
1263 }
1264 }
1265 }
1266 Some(matched)
1267}
1268
1269fn permute_dummies_and_match(
1270 topology: &TopologyBlock,
1271 state: &KekulizeCandidateState,
1272 questions: &[AtomId],
1273 atom_ranks: &[usize],
1274 max_backtracks: usize,
1275) -> Option<BTreeSet<BondId>> {
1276 if questions.is_empty() {
1335 return None;
1336 }
1337 for switched_off in question_switch_masks(questions) {
1338 if let Some(matched) = kekulize_worker_matching(
1339 topology,
1340 state,
1341 &[],
1342 atom_ranks,
1343 max_backtracks,
1344 &switched_off,
1345 ) {
1346 return Some(matched);
1347 }
1348 }
1349 None
1350}
1351
1352fn back_track(
1353 topology: &TopologyBlock,
1354 _options: &mut BTreeMap<AtomId, VecDeque<(AtomId, BondId)>>,
1355 last_opt: AtomId,
1356 done: &mut Vec<AtomId>,
1357 done_flags: &mut [bool],
1358 aqueue: &mut VecDeque<AtomId>,
1359 astack_flags: &mut [bool],
1360 d_bnd_cands: &mut [bool],
1361 d_bnd_adds: &mut [bool],
1362 matched: &mut BTreeSet<BondId>,
1363) {
1364 let split = done
1372 .iter()
1373 .position(|atom| *atom == last_opt)
1374 .expect("lastOpt must exist in done");
1375 let tdone = done[..split].to_vec();
1376 for atom in done[split..].iter().rev() {
1382 aqueue.push_front(*atom);
1383 }
1384 for atom in done.iter().skip(split) {
1407 done_flags[atom.index()] = false;
1408 }
1409 for atom in done[split..].iter().rev() {
1410 astack_flags[atom.index()] = true;
1411 }
1412 for bond in &topology.bonds {
1413 let bi = bond.id().index();
1414 if d_bnd_adds[bi] {
1415 let aid1 = bond.begin();
1416 let aid2 = bond.end();
1417 if !tdone.contains(&aid1) && !tdone.contains(&aid2) {
1418 d_bnd_adds[bi] = false;
1419 matched.remove(&bond.id());
1420 d_bnd_cands[aid1.index()] = true;
1421 d_bnd_cands[aid2.index()] = true;
1422 }
1423 }
1424 }
1425 *done = tdone;
1426 }
1429
1430fn worker_sorted_atoms(
1431 topology: &TopologyBlock,
1432 all_atoms: &[AtomId],
1433 all_atom_set: &BTreeSet<AtomId>,
1434 atom_ranks: &[usize],
1435) -> Vec<AtomId> {
1436 let mut wedge_end_atoms = BTreeSet::new();
1437 for bond in &topology.bonds {
1438 if matches!(
1439 bond.direction(),
1440 BondDirection::BeginWedge | BondDirection::BeginDash
1441 ) && all_atom_set.contains(&bond.end())
1442 {
1443 wedge_end_atoms.insert(bond.end());
1444 }
1445 }
1446
1447 let mut sorted_atoms = all_atoms.to_vec();
1448 sorted_atoms.sort_by(|left, right| {
1449 match (
1450 wedge_end_atoms.contains(left),
1451 wedge_end_atoms.contains(right),
1452 ) {
1453 (true, false) => std::cmp::Ordering::Less,
1454 (false, true) => std::cmp::Ordering::Greater,
1455 _ => (atom_ranks[left.index()], left.index())
1456 .cmp(&(atom_ranks[right.index()], right.index())),
1457 }
1458 });
1459 sorted_atoms
1460}
1461
1462fn question_switch_masks(questions: &[AtomId]) -> Vec<BTreeSet<AtomId>> {
1463 let question_count = questions.len();
1464 (1usize..(1usize << question_count))
1465 .map(|mask| {
1466 questions
1467 .iter()
1468 .enumerate()
1469 .filter_map(|(idx, atom)| ((mask & (1usize << idx)) != 0).then_some(*atom))
1470 .collect()
1471 })
1472 .collect()
1473}
1474
1475fn rdkit_kekulize_default_valence(atomic_number: u8) -> Result<i32, ValenceError> {
1476 crate::valence::rdkit_default_valence(atomic_number)
1477}
1478
1479#[allow(dead_code)]
1482fn kekulize_if_possible_assignment(
1483 molecule: &Molecule,
1484 clear_aromatic_flags: bool,
1485 canonical: bool,
1486 max_backtracks: usize,
1487) -> Result<Option<KekulizeAssignment>, KekulizeError> {
1488 match kekulize_assignment(
1525 molecule,
1526 None,
1527 clear_aromatic_flags,
1528 canonical,
1529 max_backtracks,
1530 ) {
1531 Ok(assignment) => Ok(Some(assignment)),
1532 Err(error) if kekulize_if_possible_is_recoverable_failure(&error) => Ok(None),
1533 Err(error) => Err(error),
1534 }
1535}
1536
1537#[allow(dead_code)]
1540fn kekulize_if_possible_value(
1541 molecule: &Molecule,
1542 clear_aromatic_flags: bool,
1543 canonical: bool,
1544 max_backtracks: usize,
1545) -> Result<(Molecule, bool), KekulizeError> {
1546 match kekulize_if_possible_assignment(
1547 molecule,
1548 clear_aromatic_flags,
1549 canonical,
1550 max_backtracks,
1551 )? {
1552 Some(assignment) => {
1553 let mut kekulized = molecule.clone();
1554 apply_kekulize_assignment(kekulized.topology_block_mut(), &assignment);
1555 Ok((kekulized, true))
1556 }
1557 None => Ok((molecule.clone(), false)),
1558 }
1559}
1560
1561fn kekulize_if_possible_is_recoverable_failure(error: &KekulizeError) -> bool {
1562 matches!(
1563 error,
1564 KekulizeError::UnkekulizedAtoms { .. }
1565 | KekulizeError::NonRingAromaticAtom { .. }
1566 | KekulizeError::ValenceChanged { .. }
1567 | KekulizeError::Valence(_)
1568 | KekulizeError::RingFinding(_)
1569 )
1570}
1571
1572fn rdkit_is_early_atom(atomic_number: u8) -> bool {
1573 matches!(
1700 atomic_number,
1701 3 | 4
1702 | 5
1703 | 11
1704 | 12
1705 | 13
1706 | 19
1707 | 20
1708 | 21
1709 | 22
1710 | 30
1711 | 31
1712 | 32
1713 | 37
1714 | 38
1715 | 39
1716 | 40
1717 | 41
1718 | 48
1719 | 49
1720 | 50
1721 | 51
1722 | 55
1723 | 56
1724 | 57
1725 | 58
1726 | 59
1727 | 60
1728 | 61
1729 | 72
1730 | 73
1731 | 80
1732 | 81
1733 | 82
1734 | 83
1735 | 87
1736 | 88
1737 | 89
1738 | 90
1739 | 91
1740 | 92
1741 | 93
1742 | 104
1743 | 105
1744 | 106
1745 | 107
1746 | 108
1747 | 109
1748 | 110
1749 | 111
1750 | 112
1751 | 113
1752 | 114
1753 | 115
1754 | 116
1755 | 117
1756 | 118
1757 )
1758}
1759
1760fn ordered_kekulize_rings(
1761 topology: &TopologyBlock,
1762 rings: &RingInfo,
1763 atoms_to_use: &[bool],
1764 bonds_to_use: &[bool],
1765 wedged_atoms: &BTreeSet<AtomId>,
1766) -> Vec<KekulizeRing> {
1767 let mut front = Vec::new();
1768 let mut back = Vec::new();
1769 for ring_idx in 0..rings.num_rings() {
1770 let atoms = &rings.atom_rings()[ring_idx];
1771 let bonds = &rings.bond_rings()[ring_idx];
1772 let contains_non_dummy = atoms
1773 .iter()
1774 .all(|atom| atom.index() < topology.atoms.len() && atoms_to_use[atom.index()])
1775 && atoms
1776 .iter()
1777 .any(|atom| topology.atoms[atom.index()].atomic_number() != 0);
1778 if !contains_non_dummy {
1779 continue;
1780 }
1781 if !bonds.iter().all(|bond| bonds_to_use[bond.index()]) {
1782 continue;
1783 }
1784 let start_pos = atoms
1785 .iter()
1786 .position(|atom| wedged_atoms.contains(atom))
1787 .unwrap_or(0);
1788 let rotated_atoms = (0..atoms.len())
1789 .map(|offset| atoms[(offset + start_pos) % atoms.len()])
1790 .collect::<Vec<_>>();
1791 let entry = KekulizeRing {
1792 atoms: rotated_atoms,
1793 bonds: bonds.clone(),
1794 source_ring: ring_idx,
1795 };
1796 if start_pos == 0 && !atoms.iter().any(|atom| wedged_atoms.contains(atom)) {
1797 back.push(entry);
1798 } else {
1799 front.push(entry);
1800 }
1801 }
1802 front.extend(back);
1803 front
1804}
1805
1806fn fused_ring_systems(rings: &[KekulizeRing]) -> Vec<Vec<usize>> {
1807 let mut neighbor_map = vec![Vec::<usize>::new(); rings.len()];
1828 for left in 0..rings.len() {
1829 for right in (left + 1)..rings.len() {
1830 if rings_share_bond(rings, left, right) {
1831 neighbor_map[left].push(right);
1832 neighbor_map[right].push(left);
1833 }
1834 }
1835 }
1836
1837 let mut systems = Vec::<Vec<usize>>::new();
1852 let mut done = vec![false; rings.len()];
1853 let mut curr = 0usize;
1854 while curr < rings.len() {
1855 let mut fused = Vec::new();
1856 pick_fused_rings_rdkit_order(curr, &neighbor_map, &mut fused, &mut done);
1857 systems.push(fused);
1858
1859 let Some(next) = done.iter().position(|is_done| !*is_done) else {
1860 break;
1861 };
1862 curr = next;
1863 }
1864 systems
1865}
1866
1867fn pick_fused_rings_rdkit_order(
1868 curr: usize,
1869 neighbor_map: &[Vec<usize>],
1870 result: &mut Vec<usize>,
1871 done: &mut [bool],
1872) {
1873 done[curr] = true;
1874 result.push(curr);
1875 for &neighbor in &neighbor_map[curr] {
1876 if !done[neighbor] {
1877 pick_fused_rings_rdkit_order(neighbor, neighbor_map, result, done);
1878 }
1879 }
1880}
1881
1882fn fused_ring_systems_stack_order_for_tests(rings: &[KekulizeRing]) -> Vec<Vec<usize>> {
1883 let mut systems = Vec::<Vec<usize>>::new();
1884 let mut seen = vec![false; rings.len()];
1885 for start in 0..rings.len() {
1886 if seen[start] {
1887 continue;
1888 }
1889 let mut stack = vec![start];
1890 let mut system = Vec::new();
1891 seen[start] = true;
1892 while let Some(ring) = stack.pop() {
1893 system.push(ring);
1894 for next in 0..rings.len() {
1895 if !seen[next] && rings_share_bond(rings, ring, next) {
1896 seen[next] = true;
1897 stack.push(next);
1898 }
1899 }
1900 }
1901 systems.push(system);
1902 }
1903 systems
1904}
1905
1906fn rings_share_bond(rings: &[KekulizeRing], left: usize, right: usize) -> bool {
1907 rings[left]
1908 .bonds
1909 .iter()
1910 .any(|bond| rings[right].bonds.contains(bond))
1911}
1912
1913#[cfg(test)]
1914mod tests {
1915 use super::*;
1916 use crate::{
1917 AtomSpec, BondOrder, BondQueryPredicate, BondSpec, Element, MoleculeBuilder, QueryNode,
1918 symmetrize_sssr,
1919 };
1920
1921 fn kekulize_fragment_assignment(
1922 molecule: &Molecule,
1923 rings: &RingInfo,
1924 atoms_to_use: &[bool],
1925 bonds_to_use: Vec<bool>,
1926 clear_aromatic_flags: bool,
1927 canonical: bool,
1928 max_backtracks: usize,
1929 ) -> Result<KekulizeAssignment, KekulizeError> {
1930 super::kekulize_fragment_assignment(
1931 molecule.topology_block(),
1932 molecule.stereo_groups(),
1933 rings,
1934 None,
1935 atoms_to_use,
1936 bonds_to_use,
1937 clear_aromatic_flags,
1938 canonical,
1939 max_backtracks,
1940 )
1941 }
1942
1943 fn ordered_kekulize_rings(
1944 molecule: &Molecule,
1945 rings: &RingInfo,
1946 atoms_to_use: &[bool],
1947 bonds_to_use: &[bool],
1948 wedged_atoms: &BTreeSet<AtomId>,
1949 ) -> Vec<KekulizeRing> {
1950 super::ordered_kekulize_rings(
1951 molecule.topology_block(),
1952 rings,
1953 atoms_to_use,
1954 bonds_to_use,
1955 wedged_atoms,
1956 )
1957 }
1958
1959 fn mark_double_bond_candidates(
1960 molecule: &Molecule,
1961 rings: &RingInfo,
1962 kekulize_rings: &[KekulizeRing],
1963 fused_ring_indices: &[usize],
1964 assignment: &mut KekulizeAssignment,
1965 ) -> Result<KekulizeCandidateState, KekulizeError> {
1966 let valence = crate::valence::assign_valence(&molecule, crate::ValenceModel::RdkitLike)?;
1967 super::mark_double_bond_candidates(
1968 molecule.topology_block(),
1969 rings,
1970 kekulize_rings,
1971 fused_ring_indices,
1972 &valence,
1973 assignment,
1974 )
1975 }
1976
1977 fn kekulize_fused_assignment(
1978 molecule: &Molecule,
1979 rings: &RingInfo,
1980 kekulize_rings: &[KekulizeRing],
1981 fused_ring_indices: &[usize],
1982 atom_ranks: &[usize],
1983 max_backtracks: usize,
1984 assignment: &mut KekulizeAssignment,
1985 ) -> Result<(), KekulizeError> {
1986 let valence = crate::valence::assign_valence(&molecule, crate::ValenceModel::RdkitLike)?;
1987 super::kekulize_fused_assignment(
1988 molecule.topology_block(),
1989 rings,
1990 kekulize_rings,
1991 fused_ring_indices,
1992 &valence,
1993 atom_ranks,
1994 max_backtracks,
1995 assignment,
1996 )
1997 }
1998
1999 fn kekulize_worker_matching(
2000 molecule: &Molecule,
2001 state: &KekulizeCandidateState,
2002 initial_done: &[AtomId],
2003 atom_ranks: &[usize],
2004 max_backtracks: usize,
2005 switched_off: &BTreeSet<AtomId>,
2006 ) -> Option<BTreeSet<BondId>> {
2007 super::kekulize_worker_matching(
2008 molecule.topology_block(),
2009 state,
2010 initial_done,
2011 atom_ranks,
2012 max_backtracks,
2013 switched_off,
2014 )
2015 }
2016
2017 fn permute_dummies_and_match(
2018 molecule: &Molecule,
2019 state: &KekulizeCandidateState,
2020 questions: &[AtomId],
2021 atom_ranks: &[usize],
2022 max_backtracks: usize,
2023 ) -> Option<BTreeSet<BondId>> {
2024 super::permute_dummies_and_match(
2025 molecule.topology_block(),
2026 state,
2027 questions,
2028 atom_ranks,
2029 max_backtracks,
2030 )
2031 }
2032
2033 #[allow(clippy::too_many_arguments)]
2034 fn back_track(
2035 molecule: &Molecule,
2036 options: &mut BTreeMap<AtomId, VecDeque<(AtomId, BondId)>>,
2037 last_opt: AtomId,
2038 done: &mut Vec<AtomId>,
2039 done_flags: &mut [bool],
2040 aqueue: &mut VecDeque<AtomId>,
2041 astack_flags: &mut [bool],
2042 d_bnd_cands: &mut [bool],
2043 d_bnd_adds: &mut [bool],
2044 matched: &mut BTreeSet<BondId>,
2045 ) {
2046 super::back_track(
2047 molecule.topology_block(),
2048 options,
2049 last_opt,
2050 done,
2051 done_flags,
2052 aqueue,
2053 astack_flags,
2054 d_bnd_cands,
2055 d_bnd_adds,
2056 matched,
2057 );
2058 }
2059
2060 fn worker_sorted_atoms(
2061 molecule: &Molecule,
2062 all_atoms: &[AtomId],
2063 all_atom_set: &BTreeSet<AtomId>,
2064 atom_ranks: &[usize],
2065 ) -> Vec<AtomId> {
2066 super::worker_sorted_atoms(
2067 molecule.topology_block(),
2068 all_atoms,
2069 all_atom_set,
2070 atom_ranks,
2071 )
2072 }
2073
2074 fn build_fused_aromatic_naphthalene(atom_is_aromatic: bool) -> Molecule {
2075 let mut builder = MoleculeBuilder::new();
2076 let atoms = (0..10)
2077 .map(|_| builder.add_atom(AtomSpec::new(Element::C).with_aromatic(atom_is_aromatic)))
2078 .collect::<Vec<_>>();
2079 for &(begin, end) in &[
2080 (0, 1),
2081 (1, 2),
2082 (2, 3),
2083 (3, 4),
2084 (4, 5),
2085 (5, 0),
2086 (4, 6),
2087 (6, 7),
2088 (7, 8),
2089 (8, 9),
2090 (9, 5),
2091 ] {
2092 builder
2093 .add_bond(
2094 BondSpec::new(atoms[begin], atoms[end], BondOrder::Aromatic)
2095 .with_aromatic(true),
2096 )
2097 .unwrap();
2098 }
2099 builder.build().unwrap()
2100 }
2101
2102 fn build_fused_dummy_aromatic_naphthalene() -> Molecule {
2103 let mut builder = MoleculeBuilder::new();
2104 let mut atoms = (0..10)
2105 .map(|_| builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true)))
2106 .collect::<Vec<_>>();
2107 atoms[0] = builder.add_atom(AtomSpec::new(Element::DUMMY));
2108 for &(begin, end) in &[
2109 (0, 1),
2110 (1, 2),
2111 (2, 3),
2112 (3, 4),
2113 (4, 5),
2114 (5, 0),
2115 (4, 6),
2116 (6, 7),
2117 (7, 8),
2118 (8, 9),
2119 (9, 5),
2120 ] {
2121 builder
2122 .add_bond(
2123 BondSpec::new(atoms[begin], atoms[end], BondOrder::Aromatic)
2124 .with_aromatic(true),
2125 )
2126 .unwrap();
2127 }
2128 builder.build().unwrap()
2129 }
2130
2131 fn fused_ring_inputs(molecule: &Molecule) -> (RingInfo, Vec<KekulizeRing>, Vec<Vec<usize>>) {
2132 let rings = symmetrize_sssr(molecule).unwrap();
2133 let kekulize_rings = ordered_kekulize_rings(
2134 molecule,
2135 &rings,
2136 &vec![true; molecule.num_atoms()],
2137 &vec![true; molecule.num_bonds()],
2138 &std::collections::BTreeSet::new(),
2139 );
2140 let ring_systems = fused_ring_systems(&kekulize_rings);
2141 (rings, kekulize_rings, ring_systems)
2142 }
2143
2144 #[test]
2145 fn kekulize_assignment_returns_empty_for_empty_molecule() {
2146 let molecule = Molecule::new();
2147
2148 let assignment = kekulize_assignment(&molecule, None, true, false, 100).unwrap();
2149
2150 assert!(assignment.is_empty());
2151 }
2152
2153 #[test]
2154 fn kekulize_assignment_kekulizes_benzene_like_aromatic_cycle() {
2155 let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
2156
2157 let assignment = kekulize_assignment(&molecule, None, true, false, 100).unwrap();
2158 let double_bonds = molecule
2159 .bonds()
2160 .iter()
2161 .filter(|bond| assignment.bond_order(bond.id()) == Some(BondOrder::Double))
2162 .count();
2163 let single_bonds = molecule
2164 .bonds()
2165 .iter()
2166 .filter(|bond| assignment.bond_order(bond.id()) == Some(BondOrder::Single))
2167 .count();
2168
2169 assert_eq!(double_bonds, 3);
2170 assert_eq!(single_bonds, 3);
2171 assert!(
2172 molecule
2173 .bonds()
2174 .iter()
2175 .all(|bond| assignment.bond_aromatic_flag(bond.id()) == Some(false))
2176 );
2177 assert!(
2178 molecule
2179 .atoms()
2180 .iter()
2181 .all(|atom| assignment.atom_aromatic_flag(atom.id()) == Some(false))
2182 );
2183 }
2184
2185 #[test]
2186 fn kekulize_assignment_preserves_aromatic_flags_when_clear_is_disabled_like_rdkit() {
2187 let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
2188
2189 let assignment = kekulize_assignment(&molecule, None, false, false, 100).unwrap();
2190
2191 let double_bonds = molecule
2192 .bonds()
2193 .iter()
2194 .filter(|bond| assignment.bond_order(bond.id()) == Some(BondOrder::Double))
2195 .count();
2196 let single_bonds = molecule
2197 .bonds()
2198 .iter()
2199 .filter(|bond| assignment.bond_order(bond.id()) == Some(BondOrder::Single))
2200 .count();
2201
2202 assert_eq!(double_bonds, 3);
2203 assert_eq!(single_bonds, 3);
2204 assert!(
2205 molecule
2206 .bonds()
2207 .iter()
2208 .all(|bond| assignment.bond_aromatic_flag(bond.id()).is_none())
2209 );
2210 assert!(
2211 molecule
2212 .atoms()
2213 .iter()
2214 .all(|atom| assignment.atom_aromatic_flag(atom.id()).is_none())
2215 );
2216 }
2217
2218 #[test]
2219 fn kekulize_assignment_noncanonical_keeps_atom_index_seed_for_benzene_like_rdkit_core_api() {
2220 let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
2221
2222 let assignment = kekulize_assignment(&molecule, None, false, false, 100).unwrap();
2223 let bond_orders = molecule
2224 .bonds()
2225 .iter()
2226 .map(|bond| assignment.bond_order(bond.id()).unwrap())
2227 .collect::<Vec<_>>();
2228
2229 assert_eq!(
2230 bond_orders,
2231 vec![
2232 BondOrder::Double,
2233 BondOrder::Single,
2234 BondOrder::Double,
2235 BondOrder::Single,
2236 BondOrder::Double,
2237 BondOrder::Single
2238 ]
2239 );
2240 }
2241
2242 #[test]
2243 fn kekulize_assignment_fragment_returns_empty_when_atoms_to_use_is_empty_like_rdkit() {
2244 let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
2245 let rings = symmetrize_sssr(&molecule).unwrap();
2246
2247 let assignment = kekulize_fragment_assignment(
2248 &molecule,
2249 &rings,
2250 &[false; 6],
2251 vec![true; molecule.num_bonds()],
2252 true,
2253 false,
2254 100,
2255 )
2256 .unwrap();
2257
2258 assert!(assignment.is_empty());
2259 }
2260
2261 #[test]
2262 fn kekulize_assignment_fragment_allows_partial_scope_non_ring_aromatic_atom_like_rdkit() {
2263 let mut builder = MoleculeBuilder::new();
2264 let a0 = builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true));
2265 let _a1 = builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true));
2266 let molecule = builder.build().unwrap();
2267 let rings = symmetrize_sssr(&molecule).unwrap();
2268
2269 let assignment = kekulize_fragment_assignment(
2270 &molecule,
2271 &rings,
2272 &[true, false],
2273 vec![],
2274 true,
2275 false,
2276 100,
2277 )
2278 .unwrap();
2279
2280 assert_eq!(assignment.atom_aromatic_flag(a0), Some(false));
2281 }
2282
2283 #[test]
2284 fn kekulize_fragment_rejects_bitset_size_mismatch() {
2285 let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
2286 let rings = symmetrize_sssr(&molecule).unwrap();
2287
2288 let error = kekulize_fragment_assignment(
2289 &molecule,
2290 &rings,
2291 &[true; 5],
2292 vec![true; molecule.num_bonds()],
2293 true,
2294 false,
2295 100,
2296 )
2297 .unwrap_err();
2298
2299 assert!(matches!(
2300 error,
2301 KekulizeError::FragmentBitsetSizeMismatch { atoms: 5, bonds: 6 }
2302 ));
2303 }
2304
2305 #[test]
2306 fn kekulize_fragment_rejects_bond_mask_size_mismatch_like_rdkit() {
2307 let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
2308 let rings = symmetrize_sssr(&molecule).unwrap();
2309
2310 let error = kekulize_fragment_assignment(
2311 &molecule,
2312 &rings,
2313 &[true; 6],
2314 vec![true; 5],
2315 true,
2316 false,
2317 100,
2318 )
2319 .unwrap_err();
2320
2321 assert!(matches!(
2322 error,
2323 KekulizeError::FragmentBitsetSizeMismatch { atoms: 6, bonds: 5 }
2324 ));
2325 }
2326
2327 #[test]
2328 fn kekulize_fragment_filters_out_rings_outside_selected_atom_mask_like_rdkit() {
2329 let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
2330 let rings = symmetrize_sssr(&molecule).unwrap();
2331 let mut atoms_to_use = vec![true; molecule.num_atoms()];
2332 atoms_to_use[0] = false;
2333
2334 let assignment = kekulize_fragment_assignment(
2335 &molecule,
2336 &rings,
2337 &atoms_to_use,
2338 vec![true; molecule.num_bonds()],
2339 false,
2340 false,
2341 100,
2342 )
2343 .unwrap();
2344
2345 assert!(assignment.bond_orders.iter().all(Option::is_none));
2346 }
2347
2348 #[test]
2349 fn kekulize_fragment_clears_pyrrolic_explicit_hydrogen_state_like_rdkit() {
2350 let molecule = Molecule::from_smiles_with_sanitize("c1cc[nH]c1", false).unwrap();
2351 let rings = symmetrize_sssr(&molecule).unwrap();
2352 let pyrrolic = molecule
2353 .atoms()
2354 .iter()
2355 .find(|atom| atom.atomic_number() == 7)
2356 .map(|atom| atom.id())
2357 .unwrap();
2358
2359 let assignment = kekulize_fragment_assignment(
2360 &molecule,
2361 &rings,
2362 &vec![true; molecule.num_atoms()],
2363 vec![true; molecule.num_bonds()],
2364 true,
2365 false,
2366 100,
2367 )
2368 .unwrap();
2369
2370 assert_eq!(assignment.atom_aromatic_flag(pyrrolic), Some(false));
2371 assert_eq!(assignment.atom_no_implicit(pyrrolic), Some(false));
2372 assert_eq!(assignment.atom_explicit_hydrogens(pyrrolic), Some(0));
2373 }
2374
2375 #[test]
2376 fn kekulize_assignment_runs_canonical_fragment_ranking_for_plain_aromatic_cycle() {
2377 let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
2378
2379 let assignment = kekulize_assignment(&molecule, None, true, true, 100).unwrap();
2380
2381 let double_bonds = molecule
2382 .bonds()
2383 .iter()
2384 .filter(|bond| assignment.bond_order(bond.id()) == Some(BondOrder::Double))
2385 .count();
2386 let single_bonds = molecule
2387 .bonds()
2388 .iter()
2389 .filter(|bond| assignment.bond_order(bond.id()) == Some(BondOrder::Single))
2390 .count();
2391 assert_eq!(double_bonds, 3);
2392 assert_eq!(single_bonds, 3);
2393 }
2394
2395 #[test]
2396 fn kekulize_if_possible_returns_assignment_for_kekulizable_molecule() {
2397 let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
2398
2399 let assignment = kekulize_if_possible_assignment(&molecule, true, false, 100)
2400 .unwrap()
2401 .unwrap();
2402
2403 let double_bonds = molecule
2404 .bonds()
2405 .iter()
2406 .filter(|bond| assignment.bond_order(bond.id()) == Some(BondOrder::Double))
2407 .count();
2408 assert_eq!(double_bonds, 3);
2409 }
2410
2411 #[test]
2412 fn kekulize_if_possible_preserves_aromatic_flags_when_clear_is_disabled() {
2413 let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
2414
2415 let assignment = kekulize_if_possible_assignment(&molecule, false, false, 100)
2416 .unwrap()
2417 .unwrap();
2418
2419 assert!(
2420 molecule
2421 .bonds()
2422 .iter()
2423 .all(|bond| assignment.bond_aromatic_flag(bond.id()).is_none())
2424 );
2425 assert!(
2426 molecule
2427 .atoms()
2428 .iter()
2429 .all(|atom| assignment.atom_aromatic_flag(atom.id()).is_none())
2430 );
2431 }
2432
2433 #[test]
2434 fn kekulize_if_possible_runs_canonical_fragment_ranking_for_plain_aromatic_cycle() {
2435 let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
2436
2437 let assignment = kekulize_if_possible_assignment(&molecule, true, true, 100)
2438 .unwrap()
2439 .unwrap();
2440
2441 let double_bonds = molecule
2442 .bonds()
2443 .iter()
2444 .filter(|bond| assignment.bond_order(bond.id()) == Some(BondOrder::Double))
2445 .count();
2446 assert_eq!(double_bonds, 3);
2447 }
2448
2449 #[test]
2450 fn kekulize_if_possible_assignment_returns_none_for_recoverable_failure() {
2451 let mut builder = MoleculeBuilder::new();
2452 let atoms: Vec<_> = (0..5)
2453 .map(|_| builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true)))
2454 .collect();
2455 for i in 0..5 {
2456 builder
2457 .add_bond(
2458 BondSpec::new(atoms[i], atoms[(i + 1) % 5], BondOrder::Aromatic)
2459 .with_aromatic(true),
2460 )
2461 .unwrap();
2462 }
2463 let molecule = builder.build().unwrap();
2464
2465 let assignment = kekulize_if_possible_assignment(&molecule, true, false, 100).unwrap();
2466
2467 assert!(assignment.is_none());
2468 }
2469
2470 #[test]
2471 fn kekulize_if_possible_value_returns_kekulized_clone_on_success() {
2472 let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
2473
2474 let (kekulized, changed) = kekulize_if_possible_value(&molecule, true, false, 100).unwrap();
2475
2476 assert!(changed);
2477 assert!(
2478 kekulized
2479 .bonds()
2480 .iter()
2481 .any(|bond| bond.order() == BondOrder::Double)
2482 );
2483 assert!(kekulized.bonds().iter().all(|bond| !bond.is_aromatic()));
2484 assert!(
2485 molecule
2486 .bonds()
2487 .iter()
2488 .all(|bond| bond.order() == BondOrder::Aromatic && bond.is_aromatic())
2489 );
2490 }
2491
2492 #[test]
2493 fn kekulize_if_possible_value_returns_original_clone_when_kekulization_fails() {
2494 let mut builder = MoleculeBuilder::new();
2495 let atoms = (0..5)
2496 .map(|_| builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true)))
2497 .collect::<Vec<_>>();
2498 for idx in 0..5 {
2499 builder
2500 .add_bond(
2501 BondSpec::new(atoms[idx], atoms[(idx + 1) % 5], BondOrder::Aromatic)
2502 .with_aromatic(true),
2503 )
2504 .unwrap();
2505 }
2506 let molecule = builder.build().unwrap();
2507
2508 let (restored, changed) = kekulize_if_possible_value(&molecule, true, false, 100).unwrap();
2509
2510 assert!(!changed);
2511 assert_eq!(restored.bonds().len(), molecule.bonds().len());
2512 assert!(restored.bonds().iter().zip(molecule.bonds().iter()).all(
2513 |(restored_bond, original_bond)| restored_bond.order() == original_bond.order()
2514 && restored_bond.is_aromatic() == original_bond.is_aromatic()
2515 ));
2516 assert!(
2517 restored
2518 .atoms()
2519 .iter()
2520 .zip(molecule.atoms().iter())
2521 .all(|(restored_atom, original_atom)| restored_atom.is_aromatic()
2522 == original_atom.is_aromatic())
2523 );
2524 }
2525
2526 #[test]
2527 fn ordered_kekulize_rings_prioritizes_wedged_rings_like_rdkit() {
2528 let mut builder = MoleculeBuilder::new();
2529 let ring_one = (0..3)
2530 .map(|_| builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true)))
2531 .collect::<Vec<_>>();
2532 let ring_two = (0..3)
2533 .map(|_| builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true)))
2534 .collect::<Vec<_>>();
2535 for idx in 0..3 {
2536 builder
2537 .add_bond(
2538 BondSpec::new(ring_one[idx], ring_one[(idx + 1) % 3], BondOrder::Aromatic)
2539 .with_aromatic(true),
2540 )
2541 .unwrap();
2542 }
2543 for idx in 0..3 {
2544 let mut bond =
2545 BondSpec::new(ring_two[idx], ring_two[(idx + 1) % 3], BondOrder::Aromatic)
2546 .with_aromatic(true);
2547 if idx == 0 {
2548 bond = bond.with_direction(crate::BondDirection::BeginWedge);
2549 }
2550 builder.add_bond(bond).unwrap();
2551 }
2552 let molecule = builder.build().unwrap();
2553 let rings = symmetrize_sssr(&molecule).unwrap();
2554 let wedged_atoms = std::iter::once(ring_two[0]).collect::<std::collections::BTreeSet<_>>();
2555
2556 let ordered = ordered_kekulize_rings(
2557 &molecule,
2558 &rings,
2559 &vec![true; molecule.num_atoms()],
2560 &vec![true; molecule.num_bonds()],
2561 &wedged_atoms,
2562 );
2563
2564 assert_eq!(ordered.len(), 2);
2565 assert_eq!(ordered[0].source_ring, 1);
2566 }
2567
2568 #[test]
2569 fn ordered_kekulize_rings_rotates_wedged_ring_start_atom_like_rdkit() {
2570 let mut builder = MoleculeBuilder::new();
2571 let ring = (0..4)
2572 .map(|_| builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true)))
2573 .collect::<Vec<_>>();
2574 for idx in 0..4 {
2575 let mut bond = BondSpec::new(ring[idx], ring[(idx + 1) % 4], BondOrder::Aromatic)
2576 .with_aromatic(true);
2577 if idx == 2 {
2578 bond = bond.with_direction(crate::BondDirection::BeginDash);
2579 }
2580 builder.add_bond(bond).unwrap();
2581 }
2582 let molecule = builder.build().unwrap();
2583 let rings = symmetrize_sssr(&molecule).unwrap();
2584 let wedged_atoms = std::iter::once(ring[2]).collect::<std::collections::BTreeSet<_>>();
2585
2586 let ordered = ordered_kekulize_rings(
2587 &molecule,
2588 &rings,
2589 &vec![true; molecule.num_atoms()],
2590 &vec![true; molecule.num_bonds()],
2591 &wedged_atoms,
2592 );
2593
2594 assert_eq!(ordered.len(), 1);
2595 assert_eq!(ordered[0].atoms[0], ring[2]);
2596 }
2597
2598 #[test]
2599 fn canonical_kekulize_handles_stereo_ranking_branch() {
2600 let mut builder = MoleculeBuilder::new();
2601 let atoms = (0..6)
2602 .map(|_| builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true)))
2603 .collect::<Vec<_>>();
2604 for idx in 0..6 {
2605 builder
2606 .add_bond(
2607 BondSpec::new(atoms[idx], atoms[(idx + 1) % 6], BondOrder::Aromatic)
2608 .with_aromatic(true),
2609 )
2610 .unwrap();
2611 }
2612 let mut molecule = builder.build().unwrap();
2613 molecule.topology_block_mut().atoms[0].set_chiral_tag(crate::ChiralTag::TetrahedralCw);
2614
2615 let assignment = kekulize_assignment(&molecule, None, true, true, 100).unwrap();
2616
2617 let double_bonds = molecule
2618 .bonds()
2619 .iter()
2620 .filter(|bond| assignment.bond_order(bond.id()) == Some(BondOrder::Double))
2621 .count();
2622 assert_eq!(double_bonds, 3);
2623 }
2624
2625 #[test]
2626 fn kekulize_assignment_handles_dummy_atom_permutation_branch() {
2627 let molecule = Molecule::from_smiles_with_sanitize("*1cccc1", false).unwrap();
2628
2629 let assignment = kekulize_assignment(&molecule, None, true, false, 100).unwrap();
2630
2631 let double_bonds = molecule
2632 .bonds()
2633 .iter()
2634 .filter(|bond| assignment.bond_order(bond.id()) == Some(BondOrder::Double))
2635 .count();
2636 let single_bonds = molecule
2637 .bonds()
2638 .iter()
2639 .filter(|bond| assignment.bond_order(bond.id()) == Some(BondOrder::Single))
2640 .count();
2641
2642 assert_eq!(double_bonds, 2, "{:?}", assignment.bond_orders);
2643 assert_eq!(single_bonds, 1, "{:?}", assignment.bond_orders);
2644 }
2645
2646 #[test]
2647 fn kekulize_assignment_detects_aromaticity_from_bond_state_without_atom_flags() {
2648 let mut builder = MoleculeBuilder::new();
2649 let atoms = (0..6)
2650 .map(|_| builder.add_atom(AtomSpec::new(Element::C)))
2651 .collect::<Vec<_>>();
2652 for idx in 0..6 {
2653 builder
2654 .add_bond(
2655 BondSpec::new(atoms[idx], atoms[(idx + 1) % 6], BondOrder::Aromatic)
2656 .with_aromatic(true),
2657 )
2658 .unwrap();
2659 }
2660 let molecule = builder.build().unwrap();
2661
2662 let assignment = kekulize_assignment(&molecule, None, true, false, 100).unwrap();
2663
2664 let double_bonds = molecule
2665 .bonds()
2666 .iter()
2667 .filter(|bond| assignment.bond_order(bond.id()) == Some(BondOrder::Double))
2668 .count();
2669 assert_eq!(double_bonds, 3);
2670 }
2671
2672 #[test]
2673 fn kekulize_assignment_detects_fused_aromaticity_from_bond_state_without_atom_flags() {
2674 let molecule = build_fused_aromatic_naphthalene(false);
2675
2676 let assignment = kekulize_assignment(&molecule, None, true, false, 100).unwrap();
2677
2678 let double_bonds = molecule
2679 .bonds()
2680 .iter()
2681 .filter(|bond| assignment.bond_order(bond.id()) == Some(BondOrder::Double))
2682 .count();
2683 assert_eq!(double_bonds, 5);
2684 }
2685
2686 #[test]
2687 fn kekulize_if_possible_mark_double_bond_candidates_tracks_dummy_atoms_in_fused_systems_like_rdkit()
2688 {
2689 let molecule = build_fused_dummy_aromatic_naphthalene();
2690 let (rings, kekulize_rings, ring_systems) = fused_ring_inputs(&molecule);
2691 let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
2692
2693 let state = mark_double_bond_candidates(
2694 &molecule,
2695 &rings,
2696 &kekulize_rings,
2697 &ring_systems[0],
2698 &mut assignment,
2699 )
2700 .unwrap();
2701
2702 assert!(state.questions.contains(&AtomId::new(10)));
2703 }
2704
2705 #[test]
2706 fn kekulize_if_possible_mark_double_bond_candidates_marks_non_aromatic_atoms_done_inside_fused_systems()
2707 {
2708 let mut molecule = build_fused_aromatic_naphthalene(true);
2709 molecule.topology_block_mut().atoms[0].set_aromatic(false);
2710 for bond_idx in [0usize, 5usize] {
2711 molecule.topology_block_mut().bonds[bond_idx].set_aromatic(false);
2712 molecule.topology_block_mut().bonds[bond_idx].set_order(BondOrder::Single);
2713 }
2714 let (rings, kekulize_rings, ring_systems) = fused_ring_inputs(&molecule);
2715 let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
2716
2717 let state = mark_double_bond_candidates(
2718 &molecule,
2719 &rings,
2720 &kekulize_rings,
2721 &ring_systems[0],
2722 &mut assignment,
2723 )
2724 .unwrap();
2725
2726 assert!(state.done.contains(&AtomId::new(0)));
2727 assert!(!state.candidate_atoms.contains(&AtomId::new(0)));
2728 }
2729
2730 #[test]
2731 fn kekulize_fused_returns_empty_candidate_state_without_aromatic_or_dummy_atoms() {
2732 let mut molecule = build_fused_aromatic_naphthalene(true);
2733 for atom in molecule.topology_block_mut().atoms.iter_mut() {
2734 atom.set_aromatic(false);
2735 }
2736 for bond in molecule.topology_block_mut().bonds.iter_mut() {
2737 bond.set_aromatic(false);
2738 bond.set_order(BondOrder::Single);
2739 }
2740 let (rings, kekulize_rings, ring_systems) = fused_ring_inputs(&molecule);
2741 let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
2742
2743 let state = mark_double_bond_candidates(
2744 &molecule,
2745 &rings,
2746 &kekulize_rings,
2747 &ring_systems[0],
2748 &mut assignment,
2749 )
2750 .unwrap();
2751
2752 assert!(state.candidate_atoms.is_empty());
2753 assert!(state.aromatic_edges.is_empty());
2754 assert!(state.questions.is_empty());
2755 assert!(state.done.is_empty());
2756 }
2757
2758 #[test]
2759 fn kekulize_assignment_reports_problem_atoms_for_unmatchable_fused_system() {
2760 let mut builder = MoleculeBuilder::new();
2761 let atoms = (0..5)
2762 .map(|_| builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true)))
2763 .collect::<Vec<_>>();
2764 for idx in 0..5 {
2765 builder
2766 .add_bond(
2767 BondSpec::new(atoms[idx], atoms[(idx + 1) % 5], BondOrder::Aromatic)
2768 .with_aromatic(true),
2769 )
2770 .unwrap();
2771 }
2772 let molecule = builder.build().unwrap();
2773 let rings = symmetrize_sssr(&molecule).unwrap();
2774 let kekulize_rings = ordered_kekulize_rings(
2775 &molecule,
2776 &rings,
2777 &vec![true; molecule.num_atoms()],
2778 &vec![true; molecule.num_bonds()],
2779 &std::collections::BTreeSet::new(),
2780 );
2781 let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
2782
2783 let error = kekulize_fused_assignment(
2784 &molecule,
2785 &rings,
2786 &kekulize_rings,
2787 &[0],
2788 &(0..molecule.num_atoms()).collect::<Vec<_>>(),
2789 100,
2790 &mut assignment,
2791 )
2792 .unwrap_err();
2793 assert!(matches!(
2794 error,
2795 KekulizeError::UnkekulizedAtoms { atoms }
2796 if atoms == vec![
2797 AtomId::new(0),
2798 AtomId::new(1),
2799 AtomId::new(2),
2800 AtomId::new(3),
2801 AtomId::new(4),
2802 ]
2803 ));
2804 }
2805
2806 #[test]
2807 fn kekulize_fused_assignment_sets_bond_orders_and_clears_directions_like_rdkit() {
2808 let mut molecule = build_fused_aromatic_naphthalene(true);
2809 for bond in molecule.topology_block_mut().bonds.iter_mut() {
2810 bond.set_direction(BondDirection::BeginWedge);
2811 }
2812 let (rings, kekulize_rings, ring_systems) = fused_ring_inputs(&molecule);
2813 let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
2814
2815 kekulize_fused_assignment(
2816 &molecule,
2817 &rings,
2818 &kekulize_rings,
2819 &ring_systems[0],
2820 &(0..molecule.num_atoms()).collect::<Vec<_>>(),
2821 100,
2822 &mut assignment,
2823 )
2824 .unwrap();
2825
2826 let assigned_orders = molecule
2827 .bonds()
2828 .iter()
2829 .filter_map(|bond| assignment.bond_order(bond.id()))
2830 .collect::<Vec<_>>();
2831 let num_doubles = assigned_orders
2832 .iter()
2833 .filter(|order| **order == BondOrder::Double)
2834 .count();
2835 let num_singles = assigned_orders
2836 .iter()
2837 .filter(|order| **order == BondOrder::Single)
2838 .count();
2839
2840 assert_eq!(num_doubles, 5);
2841 assert_eq!(num_singles, 6);
2842 assert!(molecule.bonds().iter().any(|bond| {
2843 assignment.bond_order(bond.id()) == Some(BondOrder::Double)
2844 && assignment.bond_direction(bond.id()) == Some(BondDirection::None)
2845 }));
2846 }
2847
2848 #[test]
2849 fn back_track_rolls_back_local_bonds_and_keeps_saved_options_like_rdkit() {
2850 let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
2851 let mut options = std::collections::BTreeMap::from([
2852 (
2853 AtomId::new(1),
2854 std::collections::VecDeque::from([(AtomId::new(3), BondId::new(2))]),
2855 ),
2856 (
2857 AtomId::new(2),
2858 std::collections::VecDeque::from([(AtomId::new(4), BondId::new(3))]),
2859 ),
2860 ]);
2861 let mut done = vec![AtomId::new(0), AtomId::new(1), AtomId::new(2)];
2862 let mut done_flags = vec![false; molecule.num_atoms()];
2863 for atom in &done {
2864 done_flags[atom.index()] = true;
2865 }
2866 let mut aqueue = std::collections::VecDeque::new();
2867 let mut astack_flags = vec![false; molecule.num_atoms()];
2868 let mut d_bnd_cands = vec![false; molecule.num_atoms()];
2869 let mut d_bnd_adds = vec![false; molecule.num_bonds()];
2870 d_bnd_adds[0] = true;
2871 d_bnd_adds[1] = true;
2872 let mut matched = [BondId::new(0), BondId::new(1)]
2873 .into_iter()
2874 .collect::<std::collections::BTreeSet<_>>();
2875
2876 back_track(
2877 &molecule,
2878 &mut options,
2879 AtomId::new(1),
2880 &mut done,
2881 &mut done_flags,
2882 &mut aqueue,
2883 &mut astack_flags,
2884 &mut d_bnd_cands,
2885 &mut d_bnd_adds,
2886 &mut matched,
2887 );
2888
2889 assert_eq!(done, vec![AtomId::new(0)]);
2890 assert!(done_flags[0]);
2891 assert!(!done_flags[1]);
2892 assert!(!done_flags[2]);
2893 assert_eq!(
2894 aqueue.into_iter().collect::<Vec<_>>(),
2895 vec![AtomId::new(1), AtomId::new(2)]
2896 );
2897 assert!(astack_flags[1]);
2898 assert!(astack_flags[2]);
2899 assert!(d_bnd_adds[0]);
2900 assert!(!d_bnd_adds[1]);
2901 assert!(matched.contains(&BondId::new(0)));
2902 assert!(!matched.contains(&BondId::new(1)));
2903 assert!(d_bnd_cands[1]);
2904 assert!(d_bnd_cands[2]);
2905 assert_eq!(
2906 options.get(&AtomId::new(1)).cloned(),
2907 Some(std::collections::VecDeque::from([(
2908 AtomId::new(3),
2909 BondId::new(2),
2910 )]))
2911 );
2912 assert_eq!(
2913 options.get(&AtomId::new(2)).cloned(),
2914 Some(std::collections::VecDeque::from([(
2915 AtomId::new(4),
2916 BondId::new(3),
2917 )]))
2918 );
2919 }
2920
2921 #[test]
2922 fn kekulize_if_possible_worker_back_track_keeps_saved_options_for_replay() {
2923 let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
2924 let expected = std::collections::VecDeque::from([(AtomId::new(3), BondId::new(2))]);
2925 let mut options = std::collections::BTreeMap::from([
2926 (AtomId::new(1), expected.clone()),
2927 (
2928 AtomId::new(2),
2929 std::collections::VecDeque::from([(AtomId::new(4), BondId::new(3))]),
2930 ),
2931 ]);
2932 let mut done = vec![AtomId::new(0), AtomId::new(1), AtomId::new(2)];
2933 let mut done_flags = vec![false; molecule.num_atoms()];
2934 for atom in &done {
2935 done_flags[atom.index()] = true;
2936 }
2937 let mut aqueue = std::collections::VecDeque::new();
2938 let mut astack_flags = vec![false; molecule.num_atoms()];
2939 let mut d_bnd_cands = vec![false; molecule.num_atoms()];
2940 let mut d_bnd_adds = vec![false; molecule.num_bonds()];
2941 d_bnd_adds[0] = true;
2942 let mut matched = std::collections::BTreeSet::from([BondId::new(0)]);
2943
2944 back_track(
2945 &molecule,
2946 &mut options,
2947 AtomId::new(1),
2948 &mut done,
2949 &mut done_flags,
2950 &mut aqueue,
2951 &mut astack_flags,
2952 &mut d_bnd_cands,
2953 &mut d_bnd_adds,
2954 &mut matched,
2955 );
2956
2957 assert_eq!(options.get(&AtomId::new(1)), Some(&expected));
2958 assert_eq!(matched, std::collections::BTreeSet::from([BondId::new(0)]));
2959 }
2960
2961 #[test]
2962 fn kekulize_if_possible_permutes_dummy_candidates_after_initial_worker_failure() {
2963 let molecule = Molecule::from_smiles_with_sanitize("*1cccc1", false).unwrap();
2964 let (rings, kekulize_rings, ring_systems) = fused_ring_inputs(&molecule);
2965 let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
2966 let state = mark_double_bond_candidates(
2967 &molecule,
2968 &rings,
2969 &kekulize_rings,
2970 &ring_systems[0],
2971 &mut assignment,
2972 )
2973 .unwrap();
2974 let atom_ranks = (0..molecule.num_atoms()).collect::<Vec<_>>();
2975
2976 let initial = kekulize_worker_matching(
2977 &molecule,
2978 &state,
2979 &state.done,
2980 &atom_ranks,
2981 100,
2982 &std::collections::BTreeSet::new(),
2983 );
2984 let permuted =
2985 permute_dummies_and_match(&molecule, &state, &state.questions, &atom_ranks, 100);
2986
2987 assert!(initial.is_none());
2988 assert_eq!(
2989 permuted.as_ref().map(|matched| matched.len()),
2990 Some(2),
2991 "{permuted:?}"
2992 );
2993 }
2994
2995 #[test]
2996 fn permute_dummies_and_match_can_recover_without_backtracking_via_mask_switch() {
2997 let molecule = Molecule::from_smiles_with_sanitize("*1cccc1", false).unwrap();
2998 let (rings, kekulize_rings, ring_systems) = fused_ring_inputs(&molecule);
2999 let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
3000 let state = mark_double_bond_candidates(
3001 &molecule,
3002 &rings,
3003 &kekulize_rings,
3004 &ring_systems[0],
3005 &mut assignment,
3006 )
3007 .unwrap();
3008 let atom_ranks = (0..molecule.num_atoms()).collect::<Vec<_>>();
3009
3010 let permuted =
3011 permute_dummies_and_match(&molecule, &state, &state.questions, &atom_ranks, 0);
3012
3013 assert_eq!(permuted.as_ref().map(|matched| matched.len()), Some(2));
3014 }
3015
3016 #[test]
3017 fn permute_dummies_and_match_returns_none_after_exhausting_all_switch_masks() {
3018 let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
3019 let (rings, kekulize_rings, ring_systems) = fused_ring_inputs(&molecule);
3020 let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
3021 let state = mark_double_bond_candidates(
3022 &molecule,
3023 &rings,
3024 &kekulize_rings,
3025 &ring_systems[0],
3026 &mut assignment,
3027 )
3028 .unwrap();
3029 let atom_ranks = (0..molecule.num_atoms()).collect::<Vec<_>>();
3030
3031 let permuted =
3032 permute_dummies_and_match(&molecule, &state, &[AtomId::new(0)], &atom_ranks, 100);
3033
3034 assert!(permuted.is_none());
3035 }
3036
3037 #[test]
3038 fn kekulize_worker_matching_succeeds_for_plain_aromatic_cycle() {
3039 let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
3040 let (rings, kekulize_rings, ring_systems) = fused_ring_inputs(&molecule);
3041 let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
3042 let state = mark_double_bond_candidates(
3043 &molecule,
3044 &rings,
3045 &kekulize_rings,
3046 &ring_systems[0],
3047 &mut assignment,
3048 )
3049 .unwrap();
3050 let atom_ranks = (0..molecule.num_atoms()).collect::<Vec<_>>();
3051
3052 let matched = kekulize_worker_matching(
3053 &molecule,
3054 &state,
3055 &state.done,
3056 &atom_ranks,
3057 100,
3058 &std::collections::BTreeSet::new(),
3059 );
3060
3061 assert_eq!(matched.as_ref().map(|set| set.len()), Some(3));
3062 }
3063
3064 #[test]
3065 fn kekulize_worker_matching_returns_none_when_backtracks_are_exhausted() {
3066 let molecule = Molecule::from_smiles_with_sanitize("*1cccc1", false).unwrap();
3067 let (rings, kekulize_rings, ring_systems) = fused_ring_inputs(&molecule);
3068 let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
3069 let state = mark_double_bond_candidates(
3070 &molecule,
3071 &rings,
3072 &kekulize_rings,
3073 &ring_systems[0],
3074 &mut assignment,
3075 )
3076 .unwrap();
3077 let atom_ranks = (0..molecule.num_atoms()).collect::<Vec<_>>();
3078
3079 let matched = kekulize_worker_matching(
3080 &molecule,
3081 &state,
3082 &state.done,
3083 &atom_ranks,
3084 0,
3085 &std::collections::BTreeSet::new(),
3086 );
3087
3088 assert!(matched.is_none());
3089 }
3090
3091 #[test]
3092 fn kekulize_worker_matching_returns_empty_match_when_candidates_are_switched_off() {
3093 let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
3094 let (rings, kekulize_rings, ring_systems) = fused_ring_inputs(&molecule);
3095 let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
3096 let state = mark_double_bond_candidates(
3097 &molecule,
3098 &rings,
3099 &kekulize_rings,
3100 &ring_systems[0],
3101 &mut assignment,
3102 )
3103 .unwrap();
3104 let atom_ranks = (0..molecule.num_atoms()).collect::<Vec<_>>();
3105 let switched_off = state.candidate_atoms.clone();
3106
3107 let matched = kekulize_worker_matching(
3108 &molecule,
3109 &state,
3110 &state.done,
3111 &atom_ranks,
3112 100,
3113 &switched_off,
3114 );
3115
3116 assert_eq!(matched, Some(std::collections::BTreeSet::new()));
3117 }
3118
3119 #[test]
3120 fn permute_dummies_and_match_returns_none_without_questions() {
3121 let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
3122 let (rings, kekulize_rings, ring_systems) = fused_ring_inputs(&molecule);
3123 let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
3124 let state = mark_double_bond_candidates(
3125 &molecule,
3126 &rings,
3127 &kekulize_rings,
3128 &ring_systems[0],
3129 &mut assignment,
3130 )
3131 .unwrap();
3132 let atom_ranks = (0..molecule.num_atoms()).collect::<Vec<_>>();
3133
3134 let matched = permute_dummies_and_match(&molecule, &state, &[], &atom_ranks, 100);
3135
3136 assert!(matched.is_none());
3137 }
3138
3139 #[test]
3140 fn kekulize_if_possible_does_not_treat_port_shape_errors_as_recoverable() {
3141 assert!(!kekulize_if_possible_is_recoverable_failure(
3142 &KekulizeError::FragmentBitsetSizeMismatch { atoms: 1, bonds: 2 }
3143 ));
3144 assert!(!kekulize_if_possible_is_recoverable_failure(
3145 &KekulizeError::CanonicalRankSymbolSizeMismatch {
3146 kind: "atom",
3147 expected: 1,
3148 actual: 0,
3149 }
3150 ));
3151 }
3152
3153 #[test]
3154 fn question_switch_masks_follow_rdkit_bit_order() {
3155 let masks = question_switch_masks(&[AtomId::new(1), AtomId::new(3), AtomId::new(5)]);
3156
3157 assert_eq!(
3158 masks,
3159 vec![
3160 [AtomId::new(1)].into_iter().collect(),
3161 [AtomId::new(3)].into_iter().collect(),
3162 [AtomId::new(1), AtomId::new(3)].into_iter().collect(),
3163 [AtomId::new(5)].into_iter().collect(),
3164 [AtomId::new(1), AtomId::new(5)].into_iter().collect(),
3165 [AtomId::new(3), AtomId::new(5)].into_iter().collect(),
3166 [AtomId::new(1), AtomId::new(3), AtomId::new(5)]
3167 .into_iter()
3168 .collect(),
3169 ]
3170 );
3171 }
3172
3173 #[test]
3174 fn question_switch_masks_returns_empty_for_no_questions() {
3175 let masks = question_switch_masks(&[]);
3176
3177 assert!(masks.is_empty());
3178 }
3179
3180 #[test]
3181 fn worker_sorted_atoms_prioritizes_wedge_end_atoms_before_rank() {
3182 let mut builder = MoleculeBuilder::new();
3183 let atoms = (0..3)
3184 .map(|_| builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true)))
3185 .collect::<Vec<_>>();
3186 builder
3187 .add_bond(
3188 BondSpec::new(atoms[0], atoms[1], BondOrder::Aromatic)
3189 .with_aromatic(true)
3190 .with_direction(crate::BondDirection::BeginWedge),
3191 )
3192 .unwrap();
3193 builder
3194 .add_bond(BondSpec::new(atoms[1], atoms[2], BondOrder::Aromatic).with_aromatic(true))
3195 .unwrap();
3196 let molecule = builder.build().unwrap();
3197 let all_atoms = vec![atoms[0], atoms[1], atoms[2]];
3198 let all_atom_set = all_atoms
3199 .iter()
3200 .copied()
3201 .collect::<std::collections::BTreeSet<_>>();
3202
3203 let sorted = worker_sorted_atoms(&molecule, &all_atoms, &all_atom_set, &[2, 10, 0]);
3204
3205 assert_eq!(sorted, vec![atoms[1], atoms[2], atoms[0]]);
3206 }
3207
3208 #[test]
3209 fn worker_sorted_atoms_uses_rank_order_when_no_wedge_bias_exists() {
3210 let mut builder = MoleculeBuilder::new();
3211 let atoms = (0..3)
3212 .map(|_| builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true)))
3213 .collect::<Vec<_>>();
3214 builder
3215 .add_bond(BondSpec::new(atoms[0], atoms[1], BondOrder::Aromatic).with_aromatic(true))
3216 .unwrap();
3217 builder
3218 .add_bond(BondSpec::new(atoms[1], atoms[2], BondOrder::Aromatic).with_aromatic(true))
3219 .unwrap();
3220 let molecule = builder.build().unwrap();
3221 let all_atoms = vec![atoms[0], atoms[1], atoms[2]];
3222 let all_atom_set = all_atoms
3223 .iter()
3224 .copied()
3225 .collect::<std::collections::BTreeSet<_>>();
3226
3227 let sorted = worker_sorted_atoms(&molecule, &all_atoms, &all_atom_set, &[2, 0, 1]);
3228
3229 assert_eq!(sorted, vec![atoms[1], atoms[2], atoms[0]]);
3230 }
3231
3232 #[test]
3233 fn mark_double_bond_candidates_detects_dummy_and_aromatic_fused_state() {
3234 let molecule = build_fused_dummy_aromatic_naphthalene();
3235 let (rings, kekulize_rings, ring_systems) = fused_ring_inputs(&molecule);
3236 let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
3237
3238 let state = mark_double_bond_candidates(
3239 &molecule,
3240 &rings,
3241 &kekulize_rings,
3242 &ring_systems[0],
3243 &mut assignment,
3244 )
3245 .unwrap();
3246
3247 assert!(!state.aromatic_edges.is_empty());
3248 assert!(!state.candidate_atoms.is_empty());
3249 assert!(!state.questions.is_empty());
3250 assert!(
3251 state
3252 .candidate_atoms
3253 .contains(&AtomId::new(molecule.atoms().len() - 1))
3254 );
3255 }
3256
3257 #[test]
3258 fn mark_double_bond_candidates_excludes_non_aromatic_ring_bond_from_aromatic_edges() {
3259 let mut molecule = build_fused_dummy_aromatic_naphthalene();
3260 molecule.topology_block_mut().bonds[0].set_aromatic(false);
3261 molecule.topology_block_mut().bonds[0].set_order(BondOrder::Single);
3262 let (rings, kekulize_rings, ring_systems) = fused_ring_inputs(&molecule);
3263 let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
3264
3265 let state = mark_double_bond_candidates(
3266 &molecule,
3267 &rings,
3268 &kekulize_rings,
3269 &ring_systems[0],
3270 &mut assignment,
3271 )
3272 .unwrap();
3273
3274 assert!(
3275 !state
3276 .aromatic_edges
3277 .iter()
3278 .any(|(bond, _, _)| *bond == BondId::new(0))
3279 );
3280 assert_eq!(assignment.bond_order(BondId::new(0)), None);
3281 }
3282
3283 #[test]
3284 fn kekulize_assignment_excludes_bond_type_query_from_bonds_to_use() {
3285 let mut builder = MoleculeBuilder::new();
3286 let a0 = builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true));
3287 let a1 = builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true));
3288 let a2 = builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true));
3289 builder
3290 .add_bond(
3291 BondSpec::new(a0, a1, BondOrder::Aromatic)
3292 .with_aromatic(true)
3293 .with_query(QueryNode::predicate(BondQueryPredicate::OrderIn(vec![
3294 BondOrder::Single,
3295 BondOrder::Aromatic,
3296 ]))),
3297 )
3298 .unwrap();
3299 builder
3300 .add_bond(BondSpec::new(a1, a2, BondOrder::Aromatic).with_aromatic(true))
3301 .unwrap();
3302 builder
3303 .add_bond(BondSpec::new(a2, a0, BondOrder::Aromatic).with_aromatic(true))
3304 .unwrap();
3305 let molecule = builder.build().unwrap();
3306
3307 let assignment = kekulize_assignment(&molecule, None, true, false, 100).unwrap();
3308
3309 assert_eq!(assignment.bond_order(BondId::new(0)), None);
3310 assert_eq!(assignment.bond_aromatic_flag(BondId::new(0)), None);
3311 assert_eq!(assignment.bond_aromatic_flag(BondId::new(1)), Some(false));
3312 assert_eq!(assignment.bond_aromatic_flag(BondId::new(2)), Some(false));
3313 }
3314
3315 #[test]
3316 fn mark_double_bond_candidates_keeps_exocyclic_aryl_carbon_kekulizable_like_rdkit() {
3317 let molecule =
3318 Molecule::from_smiles("O=C1N(/N=C(/C)C1=NN/C2=C/C(OC)=CC=C2)C=3C=CC=CC=3").unwrap();
3319 let (rings, kekulize_rings, ring_systems) = fused_ring_inputs(&molecule);
3320 let phenyl_system = ring_systems
3321 .iter()
3322 .find(|system| {
3323 system.iter().any(|&ring_idx| {
3324 kekulize_rings[ring_idx]
3325 .atoms
3326 .iter()
3327 .any(|atom| atom.index() == 17)
3328 })
3329 })
3330 .unwrap();
3331 let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
3332
3333 let state = mark_double_bond_candidates(
3334 &molecule,
3335 &rings,
3336 &kekulize_rings,
3337 phenyl_system,
3338 &mut assignment,
3339 )
3340 .unwrap();
3341
3342 assert!(state.candidate_atoms.contains(&AtomId::new(17)));
3343 assert!(kekulize_assignment(&molecule, None, true, true, 100).is_ok());
3344 }
3345
3346 #[test]
3347 fn mark_double_bond_candidates_excludes_aromatic_nh_in_row_134_like_rdkit() {
3348 let molecule = Molecule::from_smiles(
3349 "CC1=C(/C=C2\\C(C)=C3/C(C)=C(/C=C4\\C(C)=C5/C(CCC(=O)O)=C(C(C)=C5N4)C=C6\\C(CCC(=O)O)=C(C)C(=C6N2)C=C1)N3)C=C(\\C=C)C",
3350 )
3351 .unwrap();
3352 let (rings, kekulize_rings, ring_systems) = fused_ring_inputs(&molecule);
3353 let porphyrin_system = ring_systems
3354 .iter()
3355 .find(|system| {
3356 system.iter().any(|&ring_idx| {
3357 kekulize_rings[ring_idx]
3358 .atoms
3359 .iter()
3360 .any(|atom| matches!(atom.index(), 26 | 39 | 42))
3361 })
3362 })
3363 .unwrap();
3364 let mut assignment = KekulizeAssignment::empty(molecule.num_atoms(), molecule.num_bonds());
3365
3366 let state = mark_double_bond_candidates(
3367 &molecule,
3368 &rings,
3369 &kekulize_rings,
3370 porphyrin_system,
3371 &mut assignment,
3372 )
3373 .unwrap();
3374
3375 assert!(!state.candidate_atoms.contains(&AtomId::new(26)));
3376 assert!(!state.candidate_atoms.contains(&AtomId::new(39)));
3377 assert!(!state.candidate_atoms.contains(&AtomId::new(42)));
3378 }
3379
3380 #[test]
3381 fn kekulize_assignment_handles_row_134_porhyrin_like_rdkit() {
3382 let molecule = Molecule::from_smiles(
3383 "CC1=C(/C=C2\\C(C)=C3/C(C)=C(/C=C4\\C(C)=C5/C(CCC(=O)O)=C(C(C)=C5N4)C=C6\\C(CCC(=O)O)=C(C)C(=C6N2)C=C1)N3)C=C(\\C=C)C",
3384 )
3385 .unwrap();
3386
3387 assert!(kekulize_assignment(&molecule, None, true, true, 100).is_ok());
3388 assert!(kekulize_assignment(&molecule, None, true, false, 100).is_ok());
3389 }
3390
3391 fn kekulize_assignment_handles_pyrrolic_five_membered_ring_like_rdkit() {
3392 let molecule = Molecule::from_smiles("[nH]1cccc1").unwrap();
3393
3394 assert_eq!(molecule.atoms()[0].explicit_hydrogens(), 1);
3395 assert!(kekulize_assignment(&molecule, None, true, true, 100).is_ok());
3396 assert!(kekulize_assignment(&molecule, None, true, false, 100).is_ok());
3397 }
3398
3399 #[test]
3400 fn kekulize_assignment_keeps_non_bond_type_query_in_bonds_to_use() {
3401 let mut builder = MoleculeBuilder::new();
3402 let atoms = (0..6)
3403 .map(|_| builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true)))
3404 .collect::<Vec<_>>();
3405 builder
3406 .add_bond(
3407 BondSpec::new(atoms[0], atoms[1], BondOrder::Aromatic)
3408 .with_aromatic(true)
3409 .with_query(QueryNode::predicate(BondQueryPredicate::IsInRing(true))),
3410 )
3411 .unwrap();
3412 for idx in 1..6 {
3413 builder
3414 .add_bond(
3415 BondSpec::new(atoms[idx], atoms[(idx + 1) % 6], BondOrder::Aromatic)
3416 .with_aromatic(true),
3417 )
3418 .unwrap();
3419 }
3420 let molecule = builder.build().unwrap();
3421
3422 let assignment = kekulize_assignment(&molecule, None, true, false, 100).unwrap();
3423
3424 assert_eq!(assignment.bond_aromatic_flag(BondId::new(0)), Some(false));
3425 }
3426
3427 #[test]
3428 fn kekulize_assignment_rejects_non_ring_aromatic_atom_like_rdkit() {
3429 let mut builder = MoleculeBuilder::new();
3430 builder.add_atom(AtomSpec::new(Element::C).with_aromatic(true));
3431 let molecule = builder.build().unwrap();
3432
3433 let error = kekulize_assignment(&molecule, None, true, false, 100).unwrap_err();
3434 assert!(
3435 matches!(error, KekulizeError::NonRingAromaticAtom { atom } if atom == AtomId::new(0))
3436 );
3437 }
3438
3439 #[test]
3440 fn kekulize_assignment_preserves_total_valence_like_rdkit() {
3441 let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
3442 let before =
3443 crate::assign_valence_with_options(&molecule, crate::ValenceModel::RdkitLike, false)
3444 .unwrap();
3445 let assignment = kekulize_assignment(&molecule, None, true, false, 100).unwrap();
3446 let mut kekulized = molecule.clone();
3447 apply_kekulize_assignment(kekulized.topology_block_mut(), &assignment);
3448 let after =
3449 crate::assign_valence_with_options(&kekulized, crate::ValenceModel::RdkitLike, false)
3450 .unwrap();
3451
3452 for atom in molecule.atoms() {
3453 let idx = atom.id().index();
3454 assert_eq!(
3455 before.explicit_valence[idx] + before.implicit_hydrogens[idx],
3456 after.explicit_valence[idx] + after.implicit_hydrogens[idx]
3457 );
3458 }
3459 }
3460
3461 #[test]
3462 fn kekulize_assignment_reuses_provided_ring_info_like_rdkit_entrypoint() {
3463 let molecule = Molecule::from_smiles_with_sanitize("c1ccccc1", false).unwrap();
3464 let rings = symmetrize_sssr(&molecule).unwrap();
3465
3466 let assignment = kekulize_assignment(&molecule, Some(&rings), true, false, 100).unwrap();
3467
3468 let double_bonds = molecule
3469 .bonds()
3470 .iter()
3471 .filter(|bond| assignment.bond_order(bond.id()) == Some(BondOrder::Double))
3472 .count();
3473 assert_eq!(double_bonds, 3);
3474 }
3475
3476 #[test]
3477 fn kekulize_source_markers_do_not_hide_elided_rdkit_lines() {
3478 let source = include_str!("kekulize.rs");
3479
3480 for line in source.lines().filter(|line| line.contains("RDKit")) {
3481 assert!(
3482 !line.contains("..."),
3483 "RDKit source marker must not contain elided source: {line}"
3484 );
3485 }
3486 }
3487}