Skip to main content

chematic_perception/
sssr.rs

1//! Smallest Set of Smallest Rings (SSSR) via the Balducci-Pearlman algorithm.
2//!
3//! Algorithm overview:
4//! 1. Compute the cycle rank r = E - V + C (Euler characteristic),
5//!    where C is the number of connected components.
6//! 2. Build a BFS spanning forest over all connected components.
7//! 3. Each non-tree (back) edge gives one fundamental cycle:
8//!    the unique path between its endpoints in the spanning tree, plus the edge itself.
9//! 4. Represent each cycle as a set of bond indices (for GF(2) XOR independence testing).
10//! 5. Use Gaussian elimination over GF(2) to greedily build an independent basis of r cycles,
11//!    preferring shorter cycles.
12//! 6. Convert the chosen bond-sets back to ordered atom sequences for the public API.
13
14use std::collections::{HashMap, VecDeque};
15
16use chematic_core::{AtomIdx, BondIdx, Molecule};
17
18// ---------------------------------------------------------------------------
19// Public types
20// ---------------------------------------------------------------------------
21
22/// The Smallest Set of Smallest Rings for a molecule.
23///
24/// Each ring is stored as a sequence of `AtomIdx` values listed in ring order.
25/// The first atom is not repeated at the end.
26#[derive(Debug, Clone)]
27pub struct RingSet(Vec<Vec<AtomIdx>>);
28
29impl RingSet {
30    /// All rings as slices of atom indices.
31    pub fn rings(&self) -> &[Vec<AtomIdx>] {
32        &self.0
33    }
34
35    /// Number of rings in the SSSR.
36    pub fn ring_count(&self) -> usize {
37        self.0.len()
38    }
39
40    /// Whether atom `atom` is a member of at least one ring.
41    pub fn contains_atom(&self, atom: AtomIdx) -> bool {
42        self.0.iter().any(|ring| ring.contains(&atom))
43    }
44
45    /// Number of rings that atom `atom` belongs to.
46    pub fn atoms_in_ring_count(&self, atom: AtomIdx) -> usize {
47        self.0.iter().filter(|ring| ring.contains(&atom)).count()
48    }
49}
50
51// ---------------------------------------------------------------------------
52// Main entry point
53// ---------------------------------------------------------------------------
54
55/// Compute the Smallest Set of Smallest Rings for `mol`.
56///
57/// Returns a [`RingSet`] whose ring count equals the cycle rank r = E - V + C.
58/// For acyclic molecules (r = 0) the returned set is empty.
59pub fn find_sssr(mol: &Molecule) -> RingSet {
60    let v = mol.atom_count();
61    let e = mol.bond_count();
62
63    if v == 0 || e == 0 {
64        return RingSet(Vec::new());
65    }
66
67    // Count connected components and build the BFS spanning forest.
68    let (components, parent, bfs_depth) = bfs_spanning_forest(mol);
69    let c = components;
70    let r = (e as isize) - (v as isize) + (c as isize);
71
72    if r <= 0 {
73        return RingSet(Vec::new());
74    }
75    let r = r as usize;
76
77    // Collect all fundamental cycles from back edges as bond-index sets.
78    let mut candidate_cycles: Vec<(Vec<BondIdx>, Vec<AtomIdx>)> = Vec::new();
79
80    for (bidx, bond) in mol.bonds() {
81        let u = bond.atom1;
82        let v_atom = bond.atom2;
83
84        // A bond is a back edge if neither endpoint is the parent of the other
85        // in the spanning forest.
86        let u_parent = parent[u.0 as usize];
87        let v_parent = parent[v_atom.0 as usize];
88
89        let is_tree_edge = (u_parent == Some(v_atom)) || (v_parent == Some(u));
90
91        if !is_tree_edge {
92            // This is a back edge — reconstruct the fundamental cycle.
93            if let Some((bond_set, atom_seq)) = fundamental_cycle(mol, u, v_atom, bidx, &parent, &bfs_depth) {
94                candidate_cycles.push((bond_set, atom_seq));
95            }
96        }
97    }
98
99    // Sort by cycle length (shorter first) for greedy shortest-cycle preference.
100    candidate_cycles.sort_by_key(|(bonds, _)| bonds.len());
101
102    // Gaussian elimination over GF(2) to select r linearly independent cycles.
103    // The basis maps a pivot BondIdx to the full bond-set of that basis row.
104    let mut basis: HashMap<BondIdx, Vec<BondIdx>> = HashMap::new();
105    let mut selected_atoms: Vec<Vec<AtomIdx>> = Vec::new();
106
107    for (bond_set, atom_seq) in candidate_cycles {
108        // Reduce this cycle against the current basis.
109        let reduced = gf2_reduce(&bond_set, &basis);
110
111        if !reduced.is_empty() {
112            // This cycle is independent — add it to the basis.
113            let pivot = *reduced.iter().min().unwrap();
114            basis.insert(pivot, reduced);
115            selected_atoms.push(atom_seq);
116
117            if selected_atoms.len() == r {
118                break;
119            }
120        }
121    }
122
123    // Sort output rings by length for output consistency.
124    selected_atoms.sort_by_key(|ring| ring.len());
125    RingSet(selected_atoms)
126}
127
128// ---------------------------------------------------------------------------
129// BFS spanning forest
130// ---------------------------------------------------------------------------
131
132/// Build a BFS spanning forest over the entire molecule.
133///
134/// Returns:
135/// - `components`: number of connected components.
136/// - `parent`: for each atom, the atom from which it was first discovered
137///   (None for BFS roots).
138/// - `depth`: BFS depth of each atom from the root of its component.
139fn bfs_spanning_forest(mol: &Molecule) -> (usize, Vec<Option<AtomIdx>>, Vec<u32>) {
140    let n = mol.atom_count();
141    let mut visited = vec![false; n];
142    let mut parent: Vec<Option<AtomIdx>> = vec![None; n];
143    let mut depth = vec![0u32; n];
144    let mut components = 0;
145    let mut queue: VecDeque<AtomIdx> = VecDeque::new();
146
147    for start in 0..n {
148        if visited[start] {
149            continue;
150        }
151        components += 1;
152        let start_idx = AtomIdx(start as u32);
153        visited[start] = true;
154        queue.push_back(start_idx);
155
156        while let Some(current) = queue.pop_front() {
157            for (neighbor, _bidx) in mol.neighbors(current) {
158                let ni = neighbor.0 as usize;
159                if !visited[ni] {
160                    visited[ni] = true;
161                    parent[ni] = Some(current);
162                    depth[ni] = depth[current.0 as usize] + 1;
163                    queue.push_back(neighbor);
164                }
165            }
166        }
167    }
168
169    (components, parent, depth)
170}
171
172// ---------------------------------------------------------------------------
173// Fundamental cycle reconstruction
174// ---------------------------------------------------------------------------
175
176/// Reconstruct the fundamental cycle introduced by the back edge (u, v, bond bidx).
177///
178/// Returns a pair of:
179/// - Sorted `Vec<BondIdx>` representing the cycle as a set of bonds (for GF(2) operations).
180/// - Ordered `Vec<AtomIdx>` representing the ring path (for the public API).
181fn fundamental_cycle(
182    mol: &Molecule,
183    u: AtomIdx,
184    v: AtomIdx,
185    bidx: BondIdx,
186    parent: &[Option<AtomIdx>],
187    depth: &[u32],
188) -> Option<(Vec<BondIdx>, Vec<AtomIdx>)> {
189    // Walk both endpoints up to the LCA (lowest common ancestor) in the spanning tree.
190    // path_u: atoms from u up to (and including) LCA
191    // path_v: atoms from v up to (and including) LCA
192    let (path_u, path_v) = paths_to_lca(u, v, parent, depth);
193
194    if path_u.is_empty() || path_v.is_empty() {
195        return None;
196    }
197
198    // Build the ordered atom ring: path_u (from u to LCA) ++ reverse(path_v[0..end-1])
199    // i.e. u ... LCA ... v and then the back edge closes to u
200    let mut ring_atoms: Vec<AtomIdx> = path_u.clone();
201    // Add path_v in reverse order (excluding the LCA which is already in ring_atoms)
202    for &a in path_v.iter().rev().skip(1) {
203        ring_atoms.push(a);
204    }
205
206    // Collect the bond indices that form this ring.
207    let mut bond_set: Vec<BondIdx> = Vec::new();
208
209    // Bonds along path_u (tree edges)
210    for i in 0..path_u.len().saturating_sub(1) {
211        if let Some((b, _)) = mol.bond_between(path_u[i], path_u[i + 1]) {
212            bond_set.push(b);
213        } else {
214            return None;
215        }
216    }
217    // Bonds along path_v (tree edges, reversed — same bonds)
218    for i in 0..path_v.len().saturating_sub(1) {
219        if let Some((b, _)) = mol.bond_between(path_v[i], path_v[i + 1]) {
220            bond_set.push(b);
221        } else {
222            return None;
223        }
224    }
225    // The back edge itself
226    bond_set.push(bidx);
227
228    // Sort and deduplicate (each tree edge can appear in both path_u and path_v
229    // only if they share the same edge — that cannot happen since paths diverge at LCA).
230    bond_set.sort();
231    bond_set.dedup();
232
233    Some((bond_set, ring_atoms))
234}
235
236/// Compute the paths from `u` and `v` to their lowest common ancestor (LCA)
237/// in the spanning tree defined by `parent`.
238///
239/// Returns `(path_u, path_v)` where each path includes the endpoint and the LCA.
240fn paths_to_lca(
241    u: AtomIdx,
242    v: AtomIdx,
243    parent: &[Option<AtomIdx>],
244    depth: &[u32],
245) -> (Vec<AtomIdx>, Vec<AtomIdx>) {
246    // Collect ancestors of u and v by walking up the parent pointers.
247    let ancestors_u = ancestors(u, parent);
248    let ancestors_v = ancestors(v, parent);
249
250    let set_u: HashMap<AtomIdx, usize> = ancestors_u
251        .iter()
252        .enumerate()
253        .map(|(i, &a)| (a, i))
254        .collect();
255
256    // Find LCA: first ancestor of v (in order from v to root) that is also in set_u.
257    let mut lca = None;
258    let mut idx_in_v = 0;
259    for (i, &a) in ancestors_v.iter().enumerate() {
260        if set_u.contains_key(&a) {
261            lca = Some(a);
262            idx_in_v = i;
263            break;
264        }
265    }
266
267    let lca = match lca {
268        Some(a) => a,
269        None => {
270            // u and v are in different components — not a valid back edge
271            // (should not happen if called correctly).
272            return (Vec::new(), Vec::new());
273        }
274    };
275
276    let idx_in_u = *set_u.get(&lca).unwrap();
277
278    let path_u = ancestors_u[..=idx_in_u].to_vec();
279    let path_v = ancestors_v[..=idx_in_v].to_vec();
280
281    // Suppress the depth parameter warning — we keep it for potential future use.
282    let _ = depth;
283
284    (path_u, path_v)
285}
286
287/// Walk parent pointers from `start` to the root, returning the full ancestor chain
288/// including `start` itself.
289fn ancestors(start: AtomIdx, parent: &[Option<AtomIdx>]) -> Vec<AtomIdx> {
290    let mut chain = Vec::new();
291    let mut current = start;
292    loop {
293        chain.push(current);
294        match parent[current.0 as usize] {
295            Some(p) => current = p,
296            None => break,
297        }
298    }
299    chain
300}
301
302// ---------------------------------------------------------------------------
303// GF(2) Gaussian elimination
304// ---------------------------------------------------------------------------
305
306/// Reduce `cycle` over GF(2) against the current `basis`.
307///
308/// Each basis entry maps a pivot bond (the minimum BondIdx in that row)
309/// to the full row (sorted Vec<BondIdx>).
310///
311/// Returns the reduced cycle (empty if dependent on existing basis).
312fn gf2_reduce(cycle: &[BondIdx], basis: &HashMap<BondIdx, Vec<BondIdx>>) -> Vec<BondIdx> {
313    let mut current: Vec<BondIdx> = cycle.to_vec();
314
315    loop {
316        if current.is_empty() {
317            return current;
318        }
319        let pivot = *current.iter().min().unwrap();
320        match basis.get(&pivot) {
321            None => return current, // independent
322            Some(basis_row) => {
323                // XOR: symmetric difference of the two sorted sets.
324                current = sym_diff(&current, basis_row);
325            }
326        }
327    }
328}
329
330/// Symmetric difference of two sorted slices (GF(2) addition / XOR for sets).
331fn sym_diff(a: &[BondIdx], b: &[BondIdx]) -> Vec<BondIdx> {
332    let mut result = Vec::new();
333    let mut i = 0;
334    let mut j = 0;
335    while i < a.len() && j < b.len() {
336        match a[i].cmp(&b[j]) {
337            std::cmp::Ordering::Less => {
338                result.push(a[i]);
339                i += 1;
340            }
341            std::cmp::Ordering::Greater => {
342                result.push(b[j]);
343                j += 1;
344            }
345            std::cmp::Ordering::Equal => {
346                // Both contain this element — XOR removes it.
347                i += 1;
348                j += 1;
349            }
350        }
351    }
352    result.extend_from_slice(&a[i..]);
353    result.extend_from_slice(&b[j..]);
354    result
355}
356
357// ---------------------------------------------------------------------------
358// Tests
359// ---------------------------------------------------------------------------
360
361#[cfg(test)]
362mod tests {
363    use super::*;
364    use chematic_core::{Atom, BondOrder, Element, MoleculeBuilder};
365
366    // Build a cyclohexane molecule (6 carbons, 6 single bonds).
367    fn cyclohexane() -> chematic_core::Molecule {
368        let mut b = MoleculeBuilder::new();
369        let atoms: Vec<_> = (0..6).map(|_| b.add_atom(Atom::new(Element::C))).collect();
370        for i in 0..6 {
371            b.add_bond(atoms[i], atoms[(i + 1) % 6], BondOrder::Single).unwrap();
372        }
373        b.build()
374    }
375
376    // Build a benzene molecule (6 aromatic carbons, 6 single bonds for topology).
377    fn benzene() -> chematic_core::Molecule {
378        let mut b = MoleculeBuilder::new();
379        let atoms: Vec<_> = (0..6).map(|_| b.add_atom(Atom::new(Element::C))).collect();
380        for i in 0..6 {
381            b.add_bond(atoms[i], atoms[(i + 1) % 6], BondOrder::Single).unwrap();
382        }
383        b.build()
384    }
385
386    // Build naphthalene: 10 atoms, 11 bonds (two fused 6-membered rings).
387    // Atom numbering:
388    //   0-1-2-3-4-5-0  (ring 1 perimeter, 6 atoms)
389    //   4-6-7-8-9-5    (ring 2, sharing bond 4-5)
390    fn naphthalene() -> chematic_core::Molecule {
391        let mut b = MoleculeBuilder::new();
392        let atoms: Vec<_> = (0..10).map(|_| b.add_atom(Atom::new(Element::C))).collect();
393        // Ring 1: 0-1-2-3-4-9-0
394        let ring1 = [0usize, 1, 2, 3, 4, 9];
395        for i in 0..6 {
396            b.add_bond(atoms[ring1[i]], atoms[ring1[(i + 1) % 6]], BondOrder::Single).unwrap();
397        }
398        // Ring 2: 4-5-6-7-8-9 (shares bond 4-9)
399        // Bonds to add: 4-5, 5-6, 6-7, 7-8, 8-9
400        b.add_bond(atoms[4], atoms[5], BondOrder::Single).unwrap();
401        b.add_bond(atoms[5], atoms[6], BondOrder::Single).unwrap();
402        b.add_bond(atoms[6], atoms[7], BondOrder::Single).unwrap();
403        b.add_bond(atoms[7], atoms[8], BondOrder::Single).unwrap();
404        b.add_bond(atoms[8], atoms[9], BondOrder::Single).unwrap();
405        b.build()
406    }
407
408    // Build norbornane (bicyclo[2.2.1]heptane): 7 carbons, 8 bonds, 2 rings.
409    // Numbering:
410    //   bridgehead atoms: 0, 3
411    //   bridge 1: 0-1-2-3
412    //   bridge 2: 0-4-5-3
413    //   bridge 3: 0-6-3  (one-carbon bridge)
414    fn norbornane() -> chematic_core::Molecule {
415        let mut b = MoleculeBuilder::new();
416        let atoms: Vec<_> = (0..7).map(|_| b.add_atom(Atom::new(Element::C))).collect();
417        // Bridge 1: 0-1-2-3
418        b.add_bond(atoms[0], atoms[1], BondOrder::Single).unwrap();
419        b.add_bond(atoms[1], atoms[2], BondOrder::Single).unwrap();
420        b.add_bond(atoms[2], atoms[3], BondOrder::Single).unwrap();
421        // Bridge 2: 0-4-5-3
422        b.add_bond(atoms[0], atoms[4], BondOrder::Single).unwrap();
423        b.add_bond(atoms[4], atoms[5], BondOrder::Single).unwrap();
424        b.add_bond(atoms[5], atoms[3], BondOrder::Single).unwrap();
425        // Bridge 3: 0-6-3
426        b.add_bond(atoms[0], atoms[6], BondOrder::Single).unwrap();
427        b.add_bond(atoms[6], atoms[3], BondOrder::Single).unwrap();
428        b.build()
429    }
430
431    #[test]
432    fn test_cyclohexane_sssr() {
433        let mol = cyclohexane();
434        let rings = find_sssr(&mol);
435        assert_eq!(rings.ring_count(), 1, "cyclohexane has exactly 1 ring");
436        assert_eq!(rings.rings()[0].len(), 6, "cyclohexane ring has 6 atoms");
437    }
438
439    #[test]
440    fn test_benzene_sssr() {
441        let mol = benzene();
442        let rings = find_sssr(&mol);
443        assert_eq!(rings.ring_count(), 1, "benzene has exactly 1 ring");
444        assert_eq!(rings.rings()[0].len(), 6, "benzene ring has 6 atoms");
445    }
446
447    #[test]
448    fn test_naphthalene_sssr() {
449        let mol = naphthalene();
450        let rings = find_sssr(&mol);
451        // Cycle rank: 11 bonds - 10 atoms + 1 component = 2
452        // SSSR should have 2 rings, both 6-membered.
453        assert_eq!(rings.ring_count(), 2, "naphthalene SSSR has 2 rings");
454        for ring in rings.rings() {
455            assert_eq!(ring.len(), 6, "each naphthalene SSSR ring has 6 atoms");
456        }
457    }
458
459    #[test]
460    fn test_norbornane_sssr() {
461        let mol = norbornane();
462        let rings = find_sssr(&mol);
463        // Cycle rank: 8 bonds - 7 atoms + 1 component = 2
464        assert_eq!(rings.ring_count(), 2, "norbornane SSSR has 2 rings");
465        // The two smallest rings are both 5-membered.
466        for ring in rings.rings() {
467            assert_eq!(ring.len(), 5, "each norbornane SSSR ring has 5 atoms");
468        }
469    }
470
471    #[test]
472    fn test_acyclic_molecule() {
473        // Ethane: no rings.
474        let mut b = MoleculeBuilder::new();
475        let c1 = b.add_atom(Atom::new(Element::C));
476        let c2 = b.add_atom(Atom::new(Element::C));
477        b.add_bond(c1, c2, BondOrder::Single).unwrap();
478        let mol = b.build();
479        let rings = find_sssr(&mol);
480        assert_eq!(rings.ring_count(), 0);
481    }
482
483    #[test]
484    fn test_contains_atom() {
485        let mol = cyclohexane();
486        let rings = find_sssr(&mol);
487        for i in 0..6u32 {
488            assert!(rings.contains_atom(AtomIdx(i)), "atom {} should be in a ring", i);
489        }
490    }
491
492    #[test]
493    fn test_atoms_in_ring_count_benzene() {
494        let mol = benzene();
495        let rings = find_sssr(&mol);
496        for i in 0..6u32 {
497            assert_eq!(
498                rings.atoms_in_ring_count(AtomIdx(i)),
499                1,
500                "each benzene atom is in exactly 1 ring"
501            );
502        }
503    }
504}