1use std::collections::{HashMap, VecDeque};
15
16use chematic_core::{AtomIdx, BondIdx, Molecule};
17
18#[derive(Debug, Clone)]
27pub struct RingSet(Vec<Vec<AtomIdx>>);
28
29impl RingSet {
30 pub fn rings(&self) -> &[Vec<AtomIdx>] {
32 &self.0
33 }
34
35 pub fn ring_count(&self) -> usize {
37 self.0.len()
38 }
39
40 pub fn contains_atom(&self, atom: AtomIdx) -> bool {
42 self.0.iter().any(|ring| ring.contains(&atom))
43 }
44
45 pub fn atoms_in_ring_count(&self, atom: AtomIdx) -> usize {
47 self.0.iter().filter(|ring| ring.contains(&atom)).count()
48 }
49}
50
51pub fn find_sssr(mol: &Molecule) -> RingSet {
60 let v = mol.atom_count();
61 let e = mol.bond_count();
62
63 if v == 0 || e == 0 {
64 return RingSet(Vec::new());
65 }
66
67 let (components, parent) = bfs_spanning_forest(mol);
69 let r = (e as isize) - (v as isize) + (components as isize);
70
71 if r <= 0 {
72 return RingSet(Vec::new());
73 }
74 let r = r as usize;
75
76 let mut candidate_cycles: Vec<(Vec<BondIdx>, Vec<AtomIdx>)> = Vec::new();
78
79 for (bidx, bond) in mol.bonds() {
80 let u = bond.atom1;
81 let v_atom = bond.atom2;
82
83 let u_parent = parent[u.0 as usize];
86 let v_parent = parent[v_atom.0 as usize];
87
88 let is_tree_edge = (u_parent == Some(v_atom)) || (v_parent == Some(u));
89
90 if !is_tree_edge {
91 if let Some((bond_set, atom_seq)) = fundamental_cycle(mol, u, v_atom, bidx, &parent) {
93 candidate_cycles.push((bond_set, atom_seq));
94 }
95 }
96 }
97
98 candidate_cycles.sort_by_key(|(bonds, _)| bonds.len());
100
101 let mut basis: HashMap<BondIdx, Vec<BondIdx>> = HashMap::new();
104 let mut selected_atoms: Vec<Vec<AtomIdx>> = Vec::new();
105
106 for (bond_set, atom_seq) in candidate_cycles {
107 let reduced = gf2_reduce(&bond_set, &basis);
109
110 if !reduced.is_empty() {
111 let pivot = *reduced.iter().min().unwrap();
113 basis.insert(pivot, reduced);
114 selected_atoms.push(atom_seq);
115
116 if selected_atoms.len() == r {
117 break;
118 }
119 }
120 }
121
122 selected_atoms.sort_by_key(|ring| ring.len());
124 RingSet(selected_atoms)
125}
126
127fn bfs_spanning_forest(mol: &Molecule) -> (usize, Vec<Option<AtomIdx>>) {
138 let n = mol.atom_count();
139 let mut visited = vec![false; n];
140 let mut parent: Vec<Option<AtomIdx>> = vec![None; n];
141 let mut components = 0;
142 let mut queue: VecDeque<AtomIdx> = VecDeque::new();
143
144 for start in 0..n {
145 if visited[start] {
146 continue;
147 }
148 components += 1;
149 let start_idx = AtomIdx(start as u32);
150 visited[start] = true;
151 queue.push_back(start_idx);
152
153 while let Some(current) = queue.pop_front() {
154 for (neighbor, _bidx) in mol.neighbors(current) {
155 let ni = neighbor.0 as usize;
156 if !visited[ni] {
157 visited[ni] = true;
158 parent[ni] = Some(current);
159 queue.push_back(neighbor);
160 }
161 }
162 }
163 }
164
165 (components, parent)
166}
167
168fn fundamental_cycle(
178 mol: &Molecule,
179 u: AtomIdx,
180 v: AtomIdx,
181 bidx: BondIdx,
182 parent: &[Option<AtomIdx>],
183) -> Option<(Vec<BondIdx>, Vec<AtomIdx>)> {
184 let (path_u, path_v) = paths_to_lca(u, v, parent);
188
189 if path_u.is_empty() || path_v.is_empty() {
190 return None;
191 }
192
193 let mut ring_atoms: Vec<AtomIdx> = path_u.clone();
196 for &a in path_v.iter().rev().skip(1) {
198 ring_atoms.push(a);
199 }
200
201 let mut bond_set: Vec<BondIdx> = Vec::new();
203
204 for i in 0..path_u.len().saturating_sub(1) {
206 if let Some((b, _)) = mol.bond_between(path_u[i], path_u[i + 1]) {
207 bond_set.push(b);
208 } else {
209 return None;
210 }
211 }
212 for i in 0..path_v.len().saturating_sub(1) {
214 if let Some((b, _)) = mol.bond_between(path_v[i], path_v[i + 1]) {
215 bond_set.push(b);
216 } else {
217 return None;
218 }
219 }
220 bond_set.push(bidx);
222
223 bond_set.sort();
226 bond_set.dedup();
227
228 Some((bond_set, ring_atoms))
229}
230
231fn paths_to_lca(
236 u: AtomIdx,
237 v: AtomIdx,
238 parent: &[Option<AtomIdx>],
239) -> (Vec<AtomIdx>, Vec<AtomIdx>) {
240 let ancestors_u = ancestors(u, parent);
242 let ancestors_v = ancestors(v, parent);
243
244 let set_u: HashMap<AtomIdx, usize> = ancestors_u
245 .iter()
246 .enumerate()
247 .map(|(i, &a)| (a, i))
248 .collect();
249
250 let Some((idx_in_v, lca)) = ancestors_v
252 .iter()
253 .enumerate()
254 .find_map(|(i, a)| set_u.contains_key(a).then_some((i, *a)))
255 else {
256 return (Vec::new(), Vec::new());
259 };
260
261 let idx_in_u = set_u[&lca];
262 let path_u = ancestors_u[..=idx_in_u].to_vec();
263 let path_v = ancestors_v[..=idx_in_v].to_vec();
264 (path_u, path_v)
265}
266
267fn ancestors(start: AtomIdx, parent: &[Option<AtomIdx>]) -> Vec<AtomIdx> {
270 let mut chain = Vec::new();
271 let mut current = start;
272 loop {
273 chain.push(current);
274 match parent[current.0 as usize] {
275 Some(p) => current = p,
276 None => break,
277 }
278 }
279 chain
280}
281
282fn gf2_reduce(cycle: &[BondIdx], basis: &HashMap<BondIdx, Vec<BondIdx>>) -> Vec<BondIdx> {
293 let mut current: Vec<BondIdx> = cycle.to_vec();
294 while let Some(&pivot) = current.iter().min() {
295 match basis.get(&pivot) {
296 None => return current, Some(basis_row) => current = sym_diff(¤t, basis_row),
299 }
300 }
301 current
302}
303
304fn sym_diff(a: &[BondIdx], b: &[BondIdx]) -> Vec<BondIdx> {
306 let mut result = Vec::new();
307 let mut i = 0;
308 let mut j = 0;
309 while i < a.len() && j < b.len() {
310 match a[i].cmp(&b[j]) {
311 std::cmp::Ordering::Less => {
312 result.push(a[i]);
313 i += 1;
314 }
315 std::cmp::Ordering::Greater => {
316 result.push(b[j]);
317 j += 1;
318 }
319 std::cmp::Ordering::Equal => {
320 i += 1;
322 j += 1;
323 }
324 }
325 }
326 result.extend_from_slice(&a[i..]);
327 result.extend_from_slice(&b[j..]);
328 result
329}
330
331#[cfg(test)]
336mod tests {
337 use super::*;
338 use chematic_core::{Atom, BondOrder, Element, MoleculeBuilder};
339
340 fn cyclohexane() -> chematic_core::Molecule {
342 let mut b = MoleculeBuilder::new();
343 let atoms: Vec<_> = (0..6).map(|_| b.add_atom(Atom::new(Element::C))).collect();
344 for i in 0..6 {
345 b.add_bond(atoms[i], atoms[(i + 1) % 6], BondOrder::Single).unwrap();
346 }
347 b.build()
348 }
349
350 fn benzene() -> chematic_core::Molecule {
352 let mut b = MoleculeBuilder::new();
353 let atoms: Vec<_> = (0..6).map(|_| b.add_atom(Atom::new(Element::C))).collect();
354 for i in 0..6 {
355 b.add_bond(atoms[i], atoms[(i + 1) % 6], BondOrder::Single).unwrap();
356 }
357 b.build()
358 }
359
360 fn naphthalene() -> chematic_core::Molecule {
365 let mut b = MoleculeBuilder::new();
366 let atoms: Vec<_> = (0..10).map(|_| b.add_atom(Atom::new(Element::C))).collect();
367 let ring1 = [0usize, 1, 2, 3, 4, 9];
369 for i in 0..6 {
370 b.add_bond(atoms[ring1[i]], atoms[ring1[(i + 1) % 6]], BondOrder::Single).unwrap();
371 }
372 b.add_bond(atoms[4], atoms[5], BondOrder::Single).unwrap();
375 b.add_bond(atoms[5], atoms[6], BondOrder::Single).unwrap();
376 b.add_bond(atoms[6], atoms[7], BondOrder::Single).unwrap();
377 b.add_bond(atoms[7], atoms[8], BondOrder::Single).unwrap();
378 b.add_bond(atoms[8], atoms[9], BondOrder::Single).unwrap();
379 b.build()
380 }
381
382 fn norbornane() -> chematic_core::Molecule {
389 let mut b = MoleculeBuilder::new();
390 let atoms: Vec<_> = (0..7).map(|_| b.add_atom(Atom::new(Element::C))).collect();
391 b.add_bond(atoms[0], atoms[1], BondOrder::Single).unwrap();
393 b.add_bond(atoms[1], atoms[2], BondOrder::Single).unwrap();
394 b.add_bond(atoms[2], atoms[3], BondOrder::Single).unwrap();
395 b.add_bond(atoms[0], atoms[4], BondOrder::Single).unwrap();
397 b.add_bond(atoms[4], atoms[5], BondOrder::Single).unwrap();
398 b.add_bond(atoms[5], atoms[3], BondOrder::Single).unwrap();
399 b.add_bond(atoms[0], atoms[6], BondOrder::Single).unwrap();
401 b.add_bond(atoms[6], atoms[3], BondOrder::Single).unwrap();
402 b.build()
403 }
404
405 #[test]
406 fn test_cyclohexane_sssr() {
407 let mol = cyclohexane();
408 let rings = find_sssr(&mol);
409 assert_eq!(rings.ring_count(), 1, "cyclohexane has exactly 1 ring");
410 assert_eq!(rings.rings()[0].len(), 6, "cyclohexane ring has 6 atoms");
411 }
412
413 #[test]
414 fn test_benzene_sssr() {
415 let mol = benzene();
416 let rings = find_sssr(&mol);
417 assert_eq!(rings.ring_count(), 1, "benzene has exactly 1 ring");
418 assert_eq!(rings.rings()[0].len(), 6, "benzene ring has 6 atoms");
419 }
420
421 #[test]
422 fn test_naphthalene_sssr() {
423 let mol = naphthalene();
424 let rings = find_sssr(&mol);
425 assert_eq!(rings.ring_count(), 2, "naphthalene SSSR has 2 rings");
428 for ring in rings.rings() {
429 assert_eq!(ring.len(), 6, "each naphthalene SSSR ring has 6 atoms");
430 }
431 }
432
433 #[test]
434 fn test_norbornane_sssr() {
435 let mol = norbornane();
436 let rings = find_sssr(&mol);
437 assert_eq!(rings.ring_count(), 2, "norbornane SSSR has 2 rings");
439 for ring in rings.rings() {
441 assert_eq!(ring.len(), 5, "each norbornane SSSR ring has 5 atoms");
442 }
443 }
444
445 #[test]
446 fn test_acyclic_molecule() {
447 let mut b = MoleculeBuilder::new();
449 let c1 = b.add_atom(Atom::new(Element::C));
450 let c2 = b.add_atom(Atom::new(Element::C));
451 b.add_bond(c1, c2, BondOrder::Single).unwrap();
452 let mol = b.build();
453 let rings = find_sssr(&mol);
454 assert_eq!(rings.ring_count(), 0);
455 }
456
457 #[test]
458 fn test_contains_atom() {
459 let mol = cyclohexane();
460 let rings = find_sssr(&mol);
461 for i in 0..6u32 {
462 assert!(rings.contains_atom(AtomIdx(i)), "atom {} should be in a ring", i);
463 }
464 }
465
466 #[test]
467 fn test_atoms_in_ring_count_benzene() {
468 let mol = benzene();
469 let rings = find_sssr(&mol);
470 for i in 0..6u32 {
471 assert_eq!(
472 rings.atoms_in_ring_count(AtomIdx(i)),
473 1,
474 "each benzene atom is in exactly 1 ring"
475 );
476 }
477 }
478}