use std::collections::{HashMap, VecDeque};
use chematic_core::{AtomIdx, BondIdx, Molecule};
#[derive(Debug, Clone)]
pub struct RingSet(Vec<Vec<AtomIdx>>);
impl RingSet {
pub fn rings(&self) -> &[Vec<AtomIdx>] {
&self.0
}
pub fn ring_count(&self) -> usize {
self.0.len()
}
pub fn contains_atom(&self, atom: AtomIdx) -> bool {
self.0.iter().any(|ring| ring.contains(&atom))
}
pub fn atoms_in_ring_count(&self, atom: AtomIdx) -> usize {
self.0.iter().filter(|ring| ring.contains(&atom)).count()
}
}
pub fn find_sssr(mol: &Molecule) -> RingSet {
let v = mol.atom_count();
let e = mol.bond_count();
if v == 0 || e == 0 {
return RingSet(Vec::new());
}
let (components, parent, bfs_depth) = bfs_spanning_forest(mol);
let c = components;
let r = (e as isize) - (v as isize) + (c as isize);
if r <= 0 {
return RingSet(Vec::new());
}
let r = r as usize;
let mut candidate_cycles: Vec<(Vec<BondIdx>, Vec<AtomIdx>)> = Vec::new();
for (bidx, bond) in mol.bonds() {
let u = bond.atom1;
let v_atom = bond.atom2;
let u_parent = parent[u.0 as usize];
let v_parent = parent[v_atom.0 as usize];
let is_tree_edge = (u_parent == Some(v_atom)) || (v_parent == Some(u));
if !is_tree_edge {
if let Some((bond_set, atom_seq)) = fundamental_cycle(mol, u, v_atom, bidx, &parent, &bfs_depth) {
candidate_cycles.push((bond_set, atom_seq));
}
}
}
candidate_cycles.sort_by_key(|(bonds, _)| bonds.len());
let mut basis: HashMap<BondIdx, Vec<BondIdx>> = HashMap::new();
let mut selected_atoms: Vec<Vec<AtomIdx>> = Vec::new();
for (bond_set, atom_seq) in candidate_cycles {
let reduced = gf2_reduce(&bond_set, &basis);
if !reduced.is_empty() {
let pivot = *reduced.iter().min().unwrap();
basis.insert(pivot, reduced);
selected_atoms.push(atom_seq);
if selected_atoms.len() == r {
break;
}
}
}
selected_atoms.sort_by_key(|ring| ring.len());
RingSet(selected_atoms)
}
fn bfs_spanning_forest(mol: &Molecule) -> (usize, Vec<Option<AtomIdx>>, Vec<u32>) {
let n = mol.atom_count();
let mut visited = vec![false; n];
let mut parent: Vec<Option<AtomIdx>> = vec![None; n];
let mut depth = vec![0u32; n];
let mut components = 0;
let mut queue: VecDeque<AtomIdx> = VecDeque::new();
for start in 0..n {
if visited[start] {
continue;
}
components += 1;
let start_idx = AtomIdx(start as u32);
visited[start] = true;
queue.push_back(start_idx);
while let Some(current) = queue.pop_front() {
for (neighbor, _bidx) in mol.neighbors(current) {
let ni = neighbor.0 as usize;
if !visited[ni] {
visited[ni] = true;
parent[ni] = Some(current);
depth[ni] = depth[current.0 as usize] + 1;
queue.push_back(neighbor);
}
}
}
}
(components, parent, depth)
}
fn fundamental_cycle(
mol: &Molecule,
u: AtomIdx,
v: AtomIdx,
bidx: BondIdx,
parent: &[Option<AtomIdx>],
depth: &[u32],
) -> Option<(Vec<BondIdx>, Vec<AtomIdx>)> {
let (path_u, path_v) = paths_to_lca(u, v, parent, depth);
if path_u.is_empty() || path_v.is_empty() {
return None;
}
let mut ring_atoms: Vec<AtomIdx> = path_u.clone();
for &a in path_v.iter().rev().skip(1) {
ring_atoms.push(a);
}
let mut bond_set: Vec<BondIdx> = Vec::new();
for i in 0..path_u.len().saturating_sub(1) {
if let Some((b, _)) = mol.bond_between(path_u[i], path_u[i + 1]) {
bond_set.push(b);
} else {
return None;
}
}
for i in 0..path_v.len().saturating_sub(1) {
if let Some((b, _)) = mol.bond_between(path_v[i], path_v[i + 1]) {
bond_set.push(b);
} else {
return None;
}
}
bond_set.push(bidx);
bond_set.sort();
bond_set.dedup();
Some((bond_set, ring_atoms))
}
fn paths_to_lca(
u: AtomIdx,
v: AtomIdx,
parent: &[Option<AtomIdx>],
depth: &[u32],
) -> (Vec<AtomIdx>, Vec<AtomIdx>) {
let ancestors_u = ancestors(u, parent);
let ancestors_v = ancestors(v, parent);
let set_u: HashMap<AtomIdx, usize> = ancestors_u
.iter()
.enumerate()
.map(|(i, &a)| (a, i))
.collect();
let mut lca = None;
let mut idx_in_v = 0;
for (i, &a) in ancestors_v.iter().enumerate() {
if set_u.contains_key(&a) {
lca = Some(a);
idx_in_v = i;
break;
}
}
let lca = match lca {
Some(a) => a,
None => {
return (Vec::new(), Vec::new());
}
};
let idx_in_u = *set_u.get(&lca).unwrap();
let path_u = ancestors_u[..=idx_in_u].to_vec();
let path_v = ancestors_v[..=idx_in_v].to_vec();
let _ = depth;
(path_u, path_v)
}
fn ancestors(start: AtomIdx, parent: &[Option<AtomIdx>]) -> Vec<AtomIdx> {
let mut chain = Vec::new();
let mut current = start;
loop {
chain.push(current);
match parent[current.0 as usize] {
Some(p) => current = p,
None => break,
}
}
chain
}
fn gf2_reduce(cycle: &[BondIdx], basis: &HashMap<BondIdx, Vec<BondIdx>>) -> Vec<BondIdx> {
let mut current: Vec<BondIdx> = cycle.to_vec();
loop {
if current.is_empty() {
return current;
}
let pivot = *current.iter().min().unwrap();
match basis.get(&pivot) {
None => return current, Some(basis_row) => {
current = sym_diff(¤t, basis_row);
}
}
}
}
fn sym_diff(a: &[BondIdx], b: &[BondIdx]) -> Vec<BondIdx> {
let mut result = Vec::new();
let mut i = 0;
let mut j = 0;
while i < a.len() && j < b.len() {
match a[i].cmp(&b[j]) {
std::cmp::Ordering::Less => {
result.push(a[i]);
i += 1;
}
std::cmp::Ordering::Greater => {
result.push(b[j]);
j += 1;
}
std::cmp::Ordering::Equal => {
i += 1;
j += 1;
}
}
}
result.extend_from_slice(&a[i..]);
result.extend_from_slice(&b[j..]);
result
}
#[cfg(test)]
mod tests {
use super::*;
use chematic_core::{Atom, BondOrder, Element, MoleculeBuilder};
fn cyclohexane() -> chematic_core::Molecule {
let mut b = MoleculeBuilder::new();
let atoms: Vec<_> = (0..6).map(|_| b.add_atom(Atom::new(Element::C))).collect();
for i in 0..6 {
b.add_bond(atoms[i], atoms[(i + 1) % 6], BondOrder::Single).unwrap();
}
b.build()
}
fn benzene() -> chematic_core::Molecule {
let mut b = MoleculeBuilder::new();
let atoms: Vec<_> = (0..6).map(|_| b.add_atom(Atom::new(Element::C))).collect();
for i in 0..6 {
b.add_bond(atoms[i], atoms[(i + 1) % 6], BondOrder::Single).unwrap();
}
b.build()
}
fn naphthalene() -> chematic_core::Molecule {
let mut b = MoleculeBuilder::new();
let atoms: Vec<_> = (0..10).map(|_| b.add_atom(Atom::new(Element::C))).collect();
let ring1 = [0usize, 1, 2, 3, 4, 9];
for i in 0..6 {
b.add_bond(atoms[ring1[i]], atoms[ring1[(i + 1) % 6]], BondOrder::Single).unwrap();
}
b.add_bond(atoms[4], atoms[5], BondOrder::Single).unwrap();
b.add_bond(atoms[5], atoms[6], BondOrder::Single).unwrap();
b.add_bond(atoms[6], atoms[7], BondOrder::Single).unwrap();
b.add_bond(atoms[7], atoms[8], BondOrder::Single).unwrap();
b.add_bond(atoms[8], atoms[9], BondOrder::Single).unwrap();
b.build()
}
fn norbornane() -> chematic_core::Molecule {
let mut b = MoleculeBuilder::new();
let atoms: Vec<_> = (0..7).map(|_| b.add_atom(Atom::new(Element::C))).collect();
b.add_bond(atoms[0], atoms[1], BondOrder::Single).unwrap();
b.add_bond(atoms[1], atoms[2], BondOrder::Single).unwrap();
b.add_bond(atoms[2], atoms[3], BondOrder::Single).unwrap();
b.add_bond(atoms[0], atoms[4], BondOrder::Single).unwrap();
b.add_bond(atoms[4], atoms[5], BondOrder::Single).unwrap();
b.add_bond(atoms[5], atoms[3], BondOrder::Single).unwrap();
b.add_bond(atoms[0], atoms[6], BondOrder::Single).unwrap();
b.add_bond(atoms[6], atoms[3], BondOrder::Single).unwrap();
b.build()
}
#[test]
fn test_cyclohexane_sssr() {
let mol = cyclohexane();
let rings = find_sssr(&mol);
assert_eq!(rings.ring_count(), 1, "cyclohexane has exactly 1 ring");
assert_eq!(rings.rings()[0].len(), 6, "cyclohexane ring has 6 atoms");
}
#[test]
fn test_benzene_sssr() {
let mol = benzene();
let rings = find_sssr(&mol);
assert_eq!(rings.ring_count(), 1, "benzene has exactly 1 ring");
assert_eq!(rings.rings()[0].len(), 6, "benzene ring has 6 atoms");
}
#[test]
fn test_naphthalene_sssr() {
let mol = naphthalene();
let rings = find_sssr(&mol);
assert_eq!(rings.ring_count(), 2, "naphthalene SSSR has 2 rings");
for ring in rings.rings() {
assert_eq!(ring.len(), 6, "each naphthalene SSSR ring has 6 atoms");
}
}
#[test]
fn test_norbornane_sssr() {
let mol = norbornane();
let rings = find_sssr(&mol);
assert_eq!(rings.ring_count(), 2, "norbornane SSSR has 2 rings");
for ring in rings.rings() {
assert_eq!(ring.len(), 5, "each norbornane SSSR ring has 5 atoms");
}
}
#[test]
fn test_acyclic_molecule() {
let mut b = MoleculeBuilder::new();
let c1 = b.add_atom(Atom::new(Element::C));
let c2 = b.add_atom(Atom::new(Element::C));
b.add_bond(c1, c2, BondOrder::Single).unwrap();
let mol = b.build();
let rings = find_sssr(&mol);
assert_eq!(rings.ring_count(), 0);
}
#[test]
fn test_contains_atom() {
let mol = cyclohexane();
let rings = find_sssr(&mol);
for i in 0..6u32 {
assert!(rings.contains_atom(AtomIdx(i)), "atom {} should be in a ring", i);
}
}
#[test]
fn test_atoms_in_ring_count_benzene() {
let mol = benzene();
let rings = find_sssr(&mol);
for i in 0..6u32 {
assert_eq!(
rings.atoms_in_ring_count(AtomIdx(i)),
1,
"each benzene atom is in exactly 1 ring"
);
}
}
}