use std::collections::{HashMap, HashSet};
use crate::bond::BondOrder;
use crate::molecule::{AtomIdx, BondIdx, Molecule};
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct KekuleError {
pub detail: String,
}
impl core::fmt::Display for KekuleError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
write!(f, "kekulization failed: {}", self.detail)
}
}
impl std::error::Error for KekuleError {}
pub type KekuleResult = HashMap<BondIdx, BondOrder>;
pub fn kekulize(mol: &Molecule) -> Result<KekuleResult, KekuleError> {
let mut aromatic_bonds: Vec<BondIdx> = Vec::new();
let mut aromatic_atoms: HashSet<AtomIdx> = HashSet::new();
for (bidx, bond) in mol.bonds() {
if bond.order == BondOrder::Aromatic {
aromatic_bonds.push(bidx);
aromatic_atoms.insert(bond.atom1);
aromatic_atoms.insert(bond.atom2);
}
}
if aromatic_bonds.is_empty() {
return Ok(HashMap::new());
}
let must_match: HashSet<AtomIdx> = aromatic_atoms
.iter()
.copied()
.filter(|&idx| atom_must_be_matched(mol, idx))
.collect();
let mut adj: HashMap<AtomIdx, Vec<(AtomIdx, BondIdx)>> = HashMap::new();
for &bidx in &aromatic_bonds {
let bond = mol.bond(bidx);
if must_match.contains(&bond.atom1) && must_match.contains(&bond.atom2) {
adj.entry(bond.atom1).or_default().push((bond.atom2, bidx));
adj.entry(bond.atom2).or_default().push((bond.atom1, bidx));
}
}
let mut matching: HashMap<AtomIdx, AtomIdx> = HashMap::new();
let mut sorted_atoms: Vec<AtomIdx> = must_match.iter().copied().collect();
sorted_atoms.sort();
run_matching_pass(&sorted_atoms, &adj, &mut matching);
if must_match.iter().any(|&idx| !matching.contains_key(&idx)) {
matching.clear();
let mut rev = sorted_atoms.clone();
rev.reverse();
run_matching_pass(&rev, &adj, &mut matching);
}
for &idx in &must_match {
if !matching.contains_key(&idx) {
return Err(KekuleError {
detail: format!(
"atom {} ({}) cannot be assigned a double bond",
idx.0,
mol.atom(idx).element.symbol()
),
});
}
}
let mut double_bonds: HashSet<BondIdx> = HashSet::new();
for (&atom, &partner) in &matching {
if atom >= partner {
continue; }
if let Some((bidx, _)) = mol.bond_between(atom, partner)
&& mol.bond(bidx).order == BondOrder::Aromatic
{
double_bonds.insert(bidx);
}
}
let result: KekuleResult = aromatic_bonds
.iter()
.map(|&bidx| {
let order = if double_bonds.contains(&bidx) {
BondOrder::Double
} else {
BondOrder::Single
};
(bidx, order)
})
.collect();
Ok(result)
}
pub fn apply_kekule(mol: &Molecule, kekule: &KekuleResult) -> Molecule {
use crate::molecule::MoleculeBuilder;
let mut builder = MoleculeBuilder::new();
for (_, atom) in mol.atoms() {
builder.add_atom(atom.clone());
}
for (bidx, bond) in mol.bonds() {
let order = kekule.get(&bidx).copied().unwrap_or(bond.order);
builder
.add_bond(bond.atom1, bond.atom2, order)
.expect("duplicate bond during apply_kekule");
}
builder.build()
}
fn augment(
start: AtomIdx,
adj: &HashMap<AtomIdx, Vec<(AtomIdx, BondIdx)>>,
matching: &mut HashMap<AtomIdx, AtomIdx>,
visited: &mut HashSet<AtomIdx>,
) -> bool {
let mut parent: HashMap<AtomIdx, AtomIdx> = HashMap::new();
let mut queue: std::collections::VecDeque<AtomIdx> = std::collections::VecDeque::new();
queue.push_back(start);
'bfs: while let Some(v) = queue.pop_front() {
let Some(neighbors) = adj.get(&v) else {
continue;
};
for &(u, _) in neighbors {
if !visited.insert(u) {
continue;
}
parent.insert(u, v);
match matching.get(&u).copied() {
None => {
let mut cur = u;
loop {
let prev = parent[&cur];
let prev_old_match = matching.get(&prev).copied();
matching.insert(prev, cur);
matching.insert(cur, prev);
match prev_old_match {
None | Some(_) if prev == start => break,
Some(m) => cur = m,
None => break,
}
}
break 'bfs;
}
Some(partner) => {
if visited.insert(partner) {
parent.insert(partner, u);
queue.push_back(partner);
}
}
}
}
}
matching.contains_key(&start)
}
fn run_matching_pass(
atoms: &[AtomIdx],
adj: &HashMap<AtomIdx, Vec<(AtomIdx, BondIdx)>>,
matching: &mut HashMap<AtomIdx, AtomIdx>,
) {
for &start in atoms {
if matching.contains_key(&start) {
continue;
}
let mut visited: HashSet<AtomIdx> = HashSet::new();
visited.insert(start);
augment(start, adj, matching, &mut visited);
}
}
fn atom_must_be_matched(mol: &Molecule, idx: AtomIdx) -> bool {
let atom = mol.atom(idx);
match atom.element.atomic_number() {
8 | 16 | 34 => false,
5 => false,
7 => !matches!(atom.hydrogen_count, Some(h) if h > 0),
_ => true,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::atom::Atom;
use crate::element::Element;
use crate::molecule::MoleculeBuilder;
fn benzene() -> Molecule {
let mut b = MoleculeBuilder::new();
let atoms: Vec<_> = (0..6)
.map(|_| b.add_atom(Atom::aromatic(Element::C)))
.collect();
for i in 0..6 {
b.add_bond(atoms[i], atoms[(i + 1) % 6], BondOrder::Aromatic)
.unwrap();
}
b.build()
}
fn pyridine() -> Molecule {
let mut b = MoleculeBuilder::new();
let c1 = b.add_atom(Atom::aromatic(Element::C));
let c2 = b.add_atom(Atom::aromatic(Element::C));
let c3 = b.add_atom(Atom::aromatic(Element::C));
let n = b.add_atom(Atom::aromatic(Element::N));
let c4 = b.add_atom(Atom::aromatic(Element::C));
let c5 = b.add_atom(Atom::aromatic(Element::C));
let atoms = [c1, c2, c3, n, c4, c5];
for i in 0..6 {
b.add_bond(atoms[i], atoms[(i + 1) % 6], BondOrder::Aromatic)
.unwrap();
}
b.build()
}
fn furan() -> Molecule {
let mut b = MoleculeBuilder::new();
let o = b.add_atom(Atom::aromatic(Element::O));
let c1 = b.add_atom(Atom::aromatic(Element::C));
let c2 = b.add_atom(Atom::aromatic(Element::C));
let c3 = b.add_atom(Atom::aromatic(Element::C));
let c4 = b.add_atom(Atom::aromatic(Element::C));
let atoms = [o, c1, c2, c3, c4];
for i in 0..5 {
b.add_bond(atoms[i], atoms[(i + 1) % 5], BondOrder::Aromatic)
.unwrap();
}
b.build()
}
fn pyrrole() -> Molecule {
let mut b = MoleculeBuilder::new();
let mut n_atom = Atom::aromatic(Element::N);
n_atom.hydrogen_count = Some(1);
let n = b.add_atom(n_atom);
let c1 = b.add_atom(Atom::aromatic(Element::C));
let c2 = b.add_atom(Atom::aromatic(Element::C));
let c3 = b.add_atom(Atom::aromatic(Element::C));
let c4 = b.add_atom(Atom::aromatic(Element::C));
let atoms = [n, c1, c2, c3, c4];
for i in 0..5 {
b.add_bond(atoms[i], atoms[(i + 1) % 5], BondOrder::Aromatic)
.unwrap();
}
b.build()
}
#[test]
fn test_kekulize_benzene() {
let mol = benzene();
let result = kekulize(&mol).expect("benzene kekulization failed");
assert_eq!(result.len(), 6);
let doubles = result.values().filter(|&&o| o == BondOrder::Double).count();
let singles = result.values().filter(|&&o| o == BondOrder::Single).count();
assert_eq!(doubles, 3, "benzene must have 3 double bonds");
assert_eq!(singles, 3, "benzene must have 3 single bonds");
}
#[test]
fn test_kekulize_pyridine() {
let mol = pyridine();
let result = kekulize(&mol).expect("pyridine kekulization failed");
let doubles = result.values().filter(|&&o| o == BondOrder::Double).count();
assert_eq!(doubles, 3, "pyridine must have 3 double bonds");
}
#[test]
fn test_kekulize_furan() {
let mol = furan();
let result = kekulize(&mol).expect("furan kekulization failed");
let doubles = result.values().filter(|&&o| o == BondOrder::Double).count();
assert_eq!(doubles, 2, "furan must have 2 double bonds");
}
#[test]
fn test_kekulize_pyrrole() {
let mol = pyrrole();
let result = kekulize(&mol).expect("pyrrole kekulization failed");
let doubles = result.values().filter(|&&o| o == BondOrder::Double).count();
assert_eq!(doubles, 2, "pyrrole must have 2 double bonds");
}
#[test]
fn test_kekulize_naphthalene() {
let mut b = MoleculeBuilder::new();
let atoms: Vec<_> = (0..10)
.map(|_| b.add_atom(Atom::aromatic(Element::C)))
.collect();
let ring1 = [0, 1, 2, 3, 4, 9];
for i in 0..ring1.len() {
b.add_bond(
atoms[ring1[i]],
atoms[ring1[(i + 1) % ring1.len()]],
BondOrder::Aromatic,
)
.unwrap();
}
let ring2 = [4, 5, 6, 7, 8, 9];
for i in 0..ring2.len() {
let a = atoms[ring2[i]];
let bb = atoms[ring2[(i + 1) % ring2.len()]];
if mol_has_no_bond_yet(&b, a, bb) {
b.add_bond(a, bb, BondOrder::Aromatic).unwrap();
}
}
let mol = b.build();
let result = kekulize(&mol).expect("naphthalene kekulization failed");
let doubles = result.values().filter(|&&o| o == BondOrder::Double).count();
assert_eq!(doubles, 5, "naphthalene must have 5 double bonds");
}
#[test]
fn test_apply_kekule() {
let mol = benzene();
let kekule = kekulize(&mol).unwrap();
let kekule_mol = apply_kekule(&mol, &kekule);
for (_, bond) in kekule_mol.bonds() {
assert_ne!(
bond.order,
BondOrder::Aromatic,
"apply_kekule should remove all aromatic bonds"
);
}
}
#[test]
fn test_no_aromatic_bonds_noop() {
let mut b = MoleculeBuilder::new();
let c1 = b.add_atom(Atom::new(Element::C));
let c2 = b.add_atom(Atom::new(Element::C));
b.add_bond(c1, c2, BondOrder::Single).unwrap();
let mol = b.build();
let result = kekulize(&mol).unwrap();
assert!(result.is_empty());
}
fn mol_has_no_bond_yet(b: &MoleculeBuilder, a: AtomIdx, bb: AtomIdx) -> bool {
for (_, partner) in b.atom_neighbors(a) {
if partner == bb {
return false;
}
}
true
}
#[test]
fn test_kekulize_azulene() {
let mut b = MoleculeBuilder::new();
let a: Vec<_> = (0..10)
.map(|_| b.add_atom(Atom::aromatic(Element::C)))
.collect();
for i in 0..5 {
b.add_bond(a[i], a[(i + 1) % 5], BondOrder::Aromatic).unwrap();
}
for (x, y) in [(4usize, 5usize), (5, 6), (6, 7), (7, 8), (8, 9), (9, 0)] {
if mol_has_no_bond_yet(&b, a[x], a[y]) {
b.add_bond(a[x], a[y], BondOrder::Aromatic).unwrap();
}
}
let mol = b.build();
let result = kekulize(&mol).expect("azulene kekulization failed");
let doubles = result.values().filter(|&&o| o == BondOrder::Double).count();
assert_eq!(doubles, 5, "azulene needs 5 double bonds");
}
#[test]
fn test_kekulize_acenaphthylene() {
let mut b = MoleculeBuilder::new();
let a: Vec<_> = (0..12)
.map(|_| b.add_atom(Atom::aromatic(Element::C)))
.collect();
for (x, y) in [(0,1),(1,2),(2,3),(3,4),(4,9),(9,0)] {
b.add_bond(a[x], a[y], BondOrder::Aromatic).unwrap();
}
for (x, y) in [(4,5),(5,6),(6,7),(7,8),(8,9)] {
b.add_bond(a[x], a[y], BondOrder::Aromatic).unwrap();
}
for (x, y) in [(0,11),(11,10),(10,1)] {
b.add_bond(a[x], a[y], BondOrder::Aromatic).unwrap();
}
let mol = b.build();
let result = kekulize(&mol).expect("acenaphthylene kekulization failed");
let doubles = result.values().filter(|&&o| o == BondOrder::Double).count();
assert_eq!(doubles, 6, "acenaphthylene needs 6 double bonds");
}
fn biphenylene() -> Molecule {
let mut b = MoleculeBuilder::new();
let a: Vec<_> = (0..12)
.map(|_| b.add_atom(Atom::aromatic(Element::C)))
.collect();
for i in 0..6 {
b.add_bond(a[i], a[(i + 1) % 6], BondOrder::Aromatic).unwrap();
}
for i in 0..6 {
b.add_bond(a[6 + i], a[6 + (i + 1) % 6], BondOrder::Aromatic).unwrap();
}
b.add_bond(a[5], a[6], BondOrder::Aromatic).unwrap();
b.add_bond(a[0], a[11], BondOrder::Aromatic).unwrap();
b.build()
}
fn anthracene() -> Molecule {
let mut b = MoleculeBuilder::new();
let a: Vec<_> = (0..14)
.map(|_| b.add_atom(Atom::aromatic(Element::C)))
.collect();
for i in 0..6 {
b.add_bond(a[i], a[(i + 1) % 6], BondOrder::Aromatic).unwrap();
}
for (x, y) in [(4,9),(9,8),(8,7),(7,6),(6,5)] {
b.add_bond(a[x], a[y], BondOrder::Aromatic).unwrap();
}
for (x, y) in [(7,13),(13,12),(12,11),(11,10),(10,6)] {
b.add_bond(a[x], a[y], BondOrder::Aromatic).unwrap();
}
b.build()
}
#[test]
fn test_kekulize_biphenylene() {
let mol = biphenylene();
let result = kekulize(&mol).expect("biphenylene kekulization should succeed");
let doubles = result.values().filter(|&&o| o == BondOrder::Double).count();
assert_eq!(doubles, 6, "biphenylene needs 6 double bonds");
}
#[test]
fn test_kekulize_anthracene() {
let mol = anthracene();
let result = kekulize(&mol).expect("anthracene kekulization should succeed");
let doubles = result.values().filter(|&&o| o == BondOrder::Double).count();
assert_eq!(doubles, 7, "anthracene needs 7 double bonds");
}
#[test]
fn test_kekulize_biphenylene_double_bond_count() {
let mol = biphenylene();
let result = kekulize(&mol).expect("biphenylene kekulization");
let singles = result.values().filter(|&&o| o == BondOrder::Single).count();
let doubles = result.values().filter(|&&o| o == BondOrder::Double).count();
assert_eq!(singles + doubles, 14, "biphenylene has 14 aromatic bonds");
}
#[test]
fn test_kekulize_large_fused_6rings() {
let mol = {
let mut b = MoleculeBuilder::new();
let a: Vec<_> = (0..16)
.map(|_| b.add_atom(Atom::aromatic(Element::C)))
.collect();
for i in 0..6 { b.add_bond(a[i], a[(i+1)%6], BondOrder::Aromatic).unwrap(); }
for (x,y) in [(4,9),(9,8),(8,7),(7,6),(6,5)] {
b.add_bond(a[x], a[y], BondOrder::Aromatic).unwrap();
}
for (x,y) in [(2,11),(11,10),(10,13),(13,12),(12,1)] {
b.add_bond(a[x], a[y], BondOrder::Aromatic).unwrap();
}
for (x,y) in [(7,15),(15,14),(14,11)] {
b.add_bond(a[x], a[y], BondOrder::Aromatic).unwrap();
}
b.build()
};
let result = kekulize(&mol).expect("4-ring PAH kekulization should succeed");
let doubles = result.values().filter(|&&o| o == BondOrder::Double).count();
assert!(doubles >= 6, "4-ring PAH needs at least 6 double bonds, got {doubles}");
}
#[test]
fn test_kekulize_deterministic() {
let mol1 = biphenylene();
let mol2 = biphenylene();
let r1 = kekulize(&mol1).expect("pass1");
let r2 = kekulize(&mol2).expect("pass2");
assert_eq!(
r1.values().filter(|&&o| o == BondOrder::Double).count(),
r2.values().filter(|&&o| o == BondOrder::Double).count(),
"kekulization must be deterministic"
);
}
#[test]
fn test_kekulize_fluoranthene() {
let mut b = MoleculeBuilder::new();
let a: Vec<_> = (0..16)
.map(|_| b.add_atom(Atom::aromatic(Element::C)))
.collect();
for i in 0..6 { b.add_bond(a[i], a[(i+1)%6], BondOrder::Aromatic).unwrap(); }
for (x,y) in [(5,6),(6,7),(7,8),(8,9),(9,0)] {
if mol_has_no_bond_yet(&b, a[x], a[y]) {
b.add_bond(a[x], a[y], BondOrder::Aromatic).unwrap();
}
}
for (x,y) in [(2,10),(10,11),(11,12),(12,13),(13,1)] {
if mol_has_no_bond_yet(&b, a[x], a[y]) {
b.add_bond(a[x], a[y], BondOrder::Aromatic).unwrap();
}
}
for (x,y) in [(8,14),(14,15),(15,13),(13,9)] {
if mol_has_no_bond_yet(&b, a[x], a[y]) {
b.add_bond(a[x], a[y], BondOrder::Aromatic).unwrap();
}
}
let mol = b.build();
let result = kekulize(&mol).expect("fluoranthene-like kekulization failed");
let doubles = result.values().filter(|&&o| o == BondOrder::Double).count();
assert_eq!(doubles, 8, "fluoranthene-like structure needs 8 double bonds");
}
}