Skip to main content

chematic_perception/
sssr.rs

1//! Smallest Set of Smallest Rings (SSSR) via the Balducci-Pearlman algorithm.
2//!
3//! Algorithm overview:
4//! 1. Compute the cycle rank r = E - V + C (Euler characteristic),
5//!    where C is the number of connected components.
6//! 2. Build a BFS spanning forest over all connected components.
7//! 3. Each non-tree (back) edge gives one fundamental cycle:
8//!    the unique path between its endpoints in the spanning tree, plus the edge itself.
9//! 4. Represent each cycle as a set of bond indices (for GF(2) XOR independence testing).
10//! 5. Use Gaussian elimination over GF(2) to greedily build an independent basis of r cycles,
11//!    preferring shorter cycles.
12//! 6. Convert the chosen bond-sets back to ordered atom sequences for the public API.
13
14use std::collections::{HashMap, VecDeque};
15
16use chematic_core::{AtomIdx, BondIdx, Molecule};
17
18// ---------------------------------------------------------------------------
19// Public types
20// ---------------------------------------------------------------------------
21
22/// The Smallest Set of Smallest Rings for a molecule.
23///
24/// Each ring is stored as a sequence of `AtomIdx` values listed in ring order.
25/// The first atom is not repeated at the end.
26#[derive(Debug, Clone)]
27pub struct RingSet(Vec<Vec<AtomIdx>>);
28
29impl RingSet {
30    /// All rings as slices of atom indices.
31    pub fn rings(&self) -> &[Vec<AtomIdx>] {
32        &self.0
33    }
34
35    /// Number of rings in the SSSR.
36    pub fn ring_count(&self) -> usize {
37        self.0.len()
38    }
39
40    /// Whether atom `atom` is a member of at least one ring.
41    pub fn contains_atom(&self, atom: AtomIdx) -> bool {
42        self.0.iter().any(|ring| ring.contains(&atom))
43    }
44
45    /// Number of rings that atom `atom` belongs to.
46    pub fn atoms_in_ring_count(&self, atom: AtomIdx) -> usize {
47        self.0.iter().filter(|ring| ring.contains(&atom)).count()
48    }
49}
50
51// ---------------------------------------------------------------------------
52// Main entry point
53// ---------------------------------------------------------------------------
54
55/// Compute the Smallest Set of Smallest Rings for `mol`.
56///
57/// Returns a [`RingSet`] whose ring count equals the cycle rank r = E - V + C.
58/// For acyclic molecules (r = 0) the returned set is empty.
59pub fn find_sssr(mol: &Molecule) -> RingSet {
60    let v = mol.atom_count();
61    let e = mol.bond_count();
62
63    if v == 0 || e == 0 {
64        return RingSet(Vec::new());
65    }
66
67    // Count connected components and build the BFS spanning forest.
68    let (components, parent) = bfs_spanning_forest(mol);
69    let r = (e as isize) - (v as isize) + (components as isize);
70
71    if r <= 0 {
72        return RingSet(Vec::new());
73    }
74    let r = r as usize;
75
76    // Collect all fundamental cycles from back edges as bond-index sets.
77    let mut candidate_cycles: Vec<(Vec<BondIdx>, Vec<AtomIdx>)> = Vec::new();
78
79    for (bidx, bond) in mol.bonds() {
80        let u = bond.atom1;
81        let v_atom = bond.atom2;
82
83        // A bond is a back edge if neither endpoint is the parent of the other
84        // in the spanning forest.
85        let u_parent = parent[u.0 as usize];
86        let v_parent = parent[v_atom.0 as usize];
87
88        let is_tree_edge = (u_parent == Some(v_atom)) || (v_parent == Some(u));
89
90        if !is_tree_edge {
91            // This is a back edge — reconstruct the fundamental cycle.
92            if let Some((bond_set, atom_seq)) = fundamental_cycle(mol, u, v_atom, bidx, &parent) {
93                candidate_cycles.push((bond_set, atom_seq));
94            }
95        }
96    }
97
98    // Sort by cycle length (shorter first) for greedy shortest-cycle preference.
99    candidate_cycles.sort_by_key(|(bonds, _)| bonds.len());
100
101    // Gaussian elimination over GF(2) to select r linearly independent cycles.
102    // The basis maps a pivot BondIdx to the full bond-set of that basis row.
103    let mut basis: HashMap<BondIdx, Vec<BondIdx>> = HashMap::new();
104    let mut selected_atoms: Vec<Vec<AtomIdx>> = Vec::new();
105
106    for (bond_set, atom_seq) in candidate_cycles {
107        // Reduce this cycle against the current basis.
108        let reduced = gf2_reduce(&bond_set, &basis);
109
110        if !reduced.is_empty() {
111            // This cycle is independent — add it to the basis.
112            let pivot = *reduced.iter().min().unwrap();
113            basis.insert(pivot, reduced);
114            selected_atoms.push(atom_seq);
115
116            if selected_atoms.len() == r {
117                break;
118            }
119        }
120    }
121
122    // Sort output rings by length for output consistency.
123    selected_atoms.sort_by_key(|ring| ring.len());
124    RingSet(selected_atoms)
125}
126
127// ---------------------------------------------------------------------------
128// BFS spanning forest
129// ---------------------------------------------------------------------------
130
131/// Build a BFS spanning forest over the entire molecule.
132///
133/// Returns:
134/// - `components`: number of connected components.
135/// - `parent`: for each atom, the atom from which it was first discovered
136///   (None for BFS roots).
137fn bfs_spanning_forest(mol: &Molecule) -> (usize, Vec<Option<AtomIdx>>) {
138    let n = mol.atom_count();
139    let mut visited = vec![false; n];
140    let mut parent: Vec<Option<AtomIdx>> = vec![None; n];
141    let mut components = 0;
142    let mut queue: VecDeque<AtomIdx> = VecDeque::new();
143
144    for start in 0..n {
145        if visited[start] {
146            continue;
147        }
148        components += 1;
149        let start_idx = AtomIdx(start as u32);
150        visited[start] = true;
151        queue.push_back(start_idx);
152
153        while let Some(current) = queue.pop_front() {
154            for (neighbor, _bidx) in mol.neighbors(current) {
155                let ni = neighbor.0 as usize;
156                if !visited[ni] {
157                    visited[ni] = true;
158                    parent[ni] = Some(current);
159                    queue.push_back(neighbor);
160                }
161            }
162        }
163    }
164
165    (components, parent)
166}
167
168// ---------------------------------------------------------------------------
169// Fundamental cycle reconstruction
170// ---------------------------------------------------------------------------
171
172/// Reconstruct the fundamental cycle introduced by the back edge (u, v, bond bidx).
173///
174/// Returns a pair of:
175/// - Sorted `Vec<BondIdx>` representing the cycle as a set of bonds (for GF(2) operations).
176/// - Ordered `Vec<AtomIdx>` representing the ring path (for the public API).
177fn fundamental_cycle(
178    mol: &Molecule,
179    u: AtomIdx,
180    v: AtomIdx,
181    bidx: BondIdx,
182    parent: &[Option<AtomIdx>],
183) -> Option<(Vec<BondIdx>, Vec<AtomIdx>)> {
184    // Walk both endpoints up to the LCA (lowest common ancestor) in the spanning tree.
185    // path_u: atoms from u up to (and including) LCA
186    // path_v: atoms from v up to (and including) LCA
187    let (path_u, path_v) = paths_to_lca(u, v, parent);
188
189    if path_u.is_empty() || path_v.is_empty() {
190        return None;
191    }
192
193    // Build the ordered atom ring: path_u (from u to LCA) ++ reverse(path_v[0..end-1])
194    // i.e. u ... LCA ... v and then the back edge closes to u
195    let mut ring_atoms: Vec<AtomIdx> = path_u.clone();
196    // Add path_v in reverse order (excluding the LCA which is already in ring_atoms)
197    for &a in path_v.iter().rev().skip(1) {
198        ring_atoms.push(a);
199    }
200
201    // Collect the bond indices that form this ring.
202    let mut bond_set: Vec<BondIdx> = Vec::new();
203
204    // Bonds along path_u (tree edges)
205    for i in 0..path_u.len().saturating_sub(1) {
206        if let Some((b, _)) = mol.bond_between(path_u[i], path_u[i + 1]) {
207            bond_set.push(b);
208        } else {
209            return None;
210        }
211    }
212    // Bonds along path_v (tree edges, reversed — same bonds)
213    for i in 0..path_v.len().saturating_sub(1) {
214        if let Some((b, _)) = mol.bond_between(path_v[i], path_v[i + 1]) {
215            bond_set.push(b);
216        } else {
217            return None;
218        }
219    }
220    // The back edge itself
221    bond_set.push(bidx);
222
223    // Sort and deduplicate (each tree edge can appear in both path_u and path_v
224    // only if they share the same edge — that cannot happen since paths diverge at LCA).
225    bond_set.sort();
226    bond_set.dedup();
227
228    Some((bond_set, ring_atoms))
229}
230
231/// Compute the paths from `u` and `v` to their lowest common ancestor (LCA)
232/// in the spanning tree defined by `parent`.
233///
234/// Returns `(path_u, path_v)` where each path includes the endpoint and the LCA.
235fn paths_to_lca(
236    u: AtomIdx,
237    v: AtomIdx,
238    parent: &[Option<AtomIdx>],
239) -> (Vec<AtomIdx>, Vec<AtomIdx>) {
240    // Collect ancestors of u and v by walking up the parent pointers.
241    let ancestors_u = ancestors(u, parent);
242    let ancestors_v = ancestors(v, parent);
243
244    let set_u: HashMap<AtomIdx, usize> = ancestors_u
245        .iter()
246        .enumerate()
247        .map(|(i, &a)| (a, i))
248        .collect();
249
250    // Find LCA: first ancestor of v (in order from v to root) that is also in set_u.
251    let Some((idx_in_v, lca)) = ancestors_v
252        .iter()
253        .enumerate()
254        .find_map(|(i, a)| set_u.contains_key(a).then_some((i, *a)))
255    else {
256        // u and v are in different components — not a valid back edge
257        // (should not happen if called correctly).
258        return (Vec::new(), Vec::new());
259    };
260
261    let idx_in_u = set_u[&lca];
262    let path_u = ancestors_u[..=idx_in_u].to_vec();
263    let path_v = ancestors_v[..=idx_in_v].to_vec();
264    (path_u, path_v)
265}
266
267/// Walk parent pointers from `start` to the root, returning the full ancestor chain
268/// including `start` itself.
269fn ancestors(start: AtomIdx, parent: &[Option<AtomIdx>]) -> Vec<AtomIdx> {
270    let mut chain = Vec::new();
271    let mut current = start;
272    loop {
273        chain.push(current);
274        match parent[current.0 as usize] {
275            Some(p) => current = p,
276            None => break,
277        }
278    }
279    chain
280}
281
282// ---------------------------------------------------------------------------
283// GF(2) Gaussian elimination
284// ---------------------------------------------------------------------------
285
286/// Reduce `cycle` over GF(2) against the current `basis`.
287///
288/// Each basis entry maps a pivot bond (the minimum BondIdx in that row)
289/// to the full row (sorted Vec<BondIdx>).
290///
291/// Returns the reduced cycle (empty if dependent on existing basis).
292fn gf2_reduce(cycle: &[BondIdx], basis: &HashMap<BondIdx, Vec<BondIdx>>) -> Vec<BondIdx> {
293    let mut current: Vec<BondIdx> = cycle.to_vec();
294    while let Some(&pivot) = current.iter().min() {
295        match basis.get(&pivot) {
296            None => return current, // independent
297            // XOR: symmetric difference of the two sorted sets.
298            Some(basis_row) => current = sym_diff(&current, basis_row),
299        }
300    }
301    current
302}
303
304/// Symmetric difference of two sorted slices (GF(2) addition / XOR for sets).
305fn sym_diff(a: &[BondIdx], b: &[BondIdx]) -> Vec<BondIdx> {
306    let mut result = Vec::new();
307    let mut i = 0;
308    let mut j = 0;
309    while i < a.len() && j < b.len() {
310        match a[i].cmp(&b[j]) {
311            std::cmp::Ordering::Less => {
312                result.push(a[i]);
313                i += 1;
314            }
315            std::cmp::Ordering::Greater => {
316                result.push(b[j]);
317                j += 1;
318            }
319            std::cmp::Ordering::Equal => {
320                // Both contain this element — XOR removes it.
321                i += 1;
322                j += 1;
323            }
324        }
325    }
326    result.extend_from_slice(&a[i..]);
327    result.extend_from_slice(&b[j..]);
328    result
329}
330
331// ---------------------------------------------------------------------------
332// Tests
333// ---------------------------------------------------------------------------
334
335#[cfg(test)]
336mod tests {
337    use super::*;
338    use chematic_core::{Atom, BondOrder, Element, MoleculeBuilder};
339
340    // Build a cyclohexane molecule (6 carbons, 6 single bonds).
341    fn cyclohexane() -> chematic_core::Molecule {
342        let mut b = MoleculeBuilder::new();
343        let atoms: Vec<_> = (0..6).map(|_| b.add_atom(Atom::new(Element::C))).collect();
344        for i in 0..6 {
345            b.add_bond(atoms[i], atoms[(i + 1) % 6], BondOrder::Single).unwrap();
346        }
347        b.build()
348    }
349
350    // Build a benzene molecule (6 aromatic carbons, 6 single bonds for topology).
351    fn benzene() -> chematic_core::Molecule {
352        let mut b = MoleculeBuilder::new();
353        let atoms: Vec<_> = (0..6).map(|_| b.add_atom(Atom::new(Element::C))).collect();
354        for i in 0..6 {
355            b.add_bond(atoms[i], atoms[(i + 1) % 6], BondOrder::Single).unwrap();
356        }
357        b.build()
358    }
359
360    // Build naphthalene: 10 atoms, 11 bonds (two fused 6-membered rings).
361    // Atom numbering:
362    //   0-1-2-3-4-5-0  (ring 1 perimeter, 6 atoms)
363    //   4-6-7-8-9-5    (ring 2, sharing bond 4-5)
364    fn naphthalene() -> chematic_core::Molecule {
365        let mut b = MoleculeBuilder::new();
366        let atoms: Vec<_> = (0..10).map(|_| b.add_atom(Atom::new(Element::C))).collect();
367        // Ring 1: 0-1-2-3-4-9-0
368        let ring1 = [0usize, 1, 2, 3, 4, 9];
369        for i in 0..6 {
370            b.add_bond(atoms[ring1[i]], atoms[ring1[(i + 1) % 6]], BondOrder::Single).unwrap();
371        }
372        // Ring 2: 4-5-6-7-8-9 (shares bond 4-9)
373        // Bonds to add: 4-5, 5-6, 6-7, 7-8, 8-9
374        b.add_bond(atoms[4], atoms[5], BondOrder::Single).unwrap();
375        b.add_bond(atoms[5], atoms[6], BondOrder::Single).unwrap();
376        b.add_bond(atoms[6], atoms[7], BondOrder::Single).unwrap();
377        b.add_bond(atoms[7], atoms[8], BondOrder::Single).unwrap();
378        b.add_bond(atoms[8], atoms[9], BondOrder::Single).unwrap();
379        b.build()
380    }
381
382    // Build norbornane (bicyclo[2.2.1]heptane): 7 carbons, 8 bonds, 2 rings.
383    // Numbering:
384    //   bridgehead atoms: 0, 3
385    //   bridge 1: 0-1-2-3
386    //   bridge 2: 0-4-5-3
387    //   bridge 3: 0-6-3  (one-carbon bridge)
388    fn norbornane() -> chematic_core::Molecule {
389        let mut b = MoleculeBuilder::new();
390        let atoms: Vec<_> = (0..7).map(|_| b.add_atom(Atom::new(Element::C))).collect();
391        // Bridge 1: 0-1-2-3
392        b.add_bond(atoms[0], atoms[1], BondOrder::Single).unwrap();
393        b.add_bond(atoms[1], atoms[2], BondOrder::Single).unwrap();
394        b.add_bond(atoms[2], atoms[3], BondOrder::Single).unwrap();
395        // Bridge 2: 0-4-5-3
396        b.add_bond(atoms[0], atoms[4], BondOrder::Single).unwrap();
397        b.add_bond(atoms[4], atoms[5], BondOrder::Single).unwrap();
398        b.add_bond(atoms[5], atoms[3], BondOrder::Single).unwrap();
399        // Bridge 3: 0-6-3
400        b.add_bond(atoms[0], atoms[6], BondOrder::Single).unwrap();
401        b.add_bond(atoms[6], atoms[3], BondOrder::Single).unwrap();
402        b.build()
403    }
404
405    #[test]
406    fn test_cyclohexane_sssr() {
407        let mol = cyclohexane();
408        let rings = find_sssr(&mol);
409        assert_eq!(rings.ring_count(), 1, "cyclohexane has exactly 1 ring");
410        assert_eq!(rings.rings()[0].len(), 6, "cyclohexane ring has 6 atoms");
411    }
412
413    #[test]
414    fn test_benzene_sssr() {
415        let mol = benzene();
416        let rings = find_sssr(&mol);
417        assert_eq!(rings.ring_count(), 1, "benzene has exactly 1 ring");
418        assert_eq!(rings.rings()[0].len(), 6, "benzene ring has 6 atoms");
419    }
420
421    #[test]
422    fn test_naphthalene_sssr() {
423        let mol = naphthalene();
424        let rings = find_sssr(&mol);
425        // Cycle rank: 11 bonds - 10 atoms + 1 component = 2
426        // SSSR should have 2 rings, both 6-membered.
427        assert_eq!(rings.ring_count(), 2, "naphthalene SSSR has 2 rings");
428        for ring in rings.rings() {
429            assert_eq!(ring.len(), 6, "each naphthalene SSSR ring has 6 atoms");
430        }
431    }
432
433    #[test]
434    fn test_norbornane_sssr() {
435        let mol = norbornane();
436        let rings = find_sssr(&mol);
437        // Cycle rank: 8 bonds - 7 atoms + 1 component = 2
438        assert_eq!(rings.ring_count(), 2, "norbornane SSSR has 2 rings");
439        // The two smallest rings are both 5-membered.
440        for ring in rings.rings() {
441            assert_eq!(ring.len(), 5, "each norbornane SSSR ring has 5 atoms");
442        }
443    }
444
445    #[test]
446    fn test_acyclic_molecule() {
447        // Ethane: no rings.
448        let mut b = MoleculeBuilder::new();
449        let c1 = b.add_atom(Atom::new(Element::C));
450        let c2 = b.add_atom(Atom::new(Element::C));
451        b.add_bond(c1, c2, BondOrder::Single).unwrap();
452        let mol = b.build();
453        let rings = find_sssr(&mol);
454        assert_eq!(rings.ring_count(), 0);
455    }
456
457    #[test]
458    fn test_contains_atom() {
459        let mol = cyclohexane();
460        let rings = find_sssr(&mol);
461        for i in 0..6u32 {
462            assert!(rings.contains_atom(AtomIdx(i)), "atom {} should be in a ring", i);
463        }
464    }
465
466    #[test]
467    fn test_atoms_in_ring_count_benzene() {
468        let mol = benzene();
469        let rings = find_sssr(&mol);
470        for i in 0..6u32 {
471            assert_eq!(
472                rings.atoms_in_ring_count(AtomIdx(i)),
473                1,
474                "each benzene atom is in exactly 1 ring"
475            );
476        }
477    }
478}