Skip to main content

sci_form/rings/
sssr.rs

1//! Smallest Set of Smallest Rings (SSSR) using Horton's algorithm.
2//!
3//! Identifies the fundamental cycle basis of the molecular graph:
4//! 1. Compute shortest paths between all atom pairs (Floyd-Warshall)
5//! 2. For each edge (u,v), find the shortest cycle containing it
6//! 3. Reduce to a linearly independent set of minimum total weight
7
8use crate::graph::Molecule;
9use petgraph::graph::NodeIndex;
10use petgraph::visit::EdgeRef;
11use serde::{Deserialize, Serialize};
12use std::collections::{BTreeMap, BTreeSet, VecDeque};
13
14/// Information about a single ring in the SSSR.
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct RingInfo {
17    /// Atom indices forming the ring (in order around the cycle).
18    pub atoms: Vec<usize>,
19    /// Ring size.
20    pub size: usize,
21    /// Whether the ring is aromatic (Hückel 4n+2).
22    pub is_aromatic: bool,
23}
24
25/// Result of SSSR computation.
26#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct SssrResult {
28    /// The rings in the SSSR.
29    pub rings: Vec<RingInfo>,
30    /// Per-atom ring membership count.
31    pub atom_ring_count: Vec<usize>,
32    /// Per-atom ring sizes (all ring sizes the atom belongs to).
33    pub atom_ring_sizes: Vec<Vec<usize>>,
34    /// Ring-size histogram: index = ring size, value = count.
35    pub ring_size_histogram: Vec<usize>,
36}
37
38/// Compute the Smallest Set of Smallest Rings (SSSR) for a molecule.
39///
40/// Uses BFS-based cycle detection: for each edge (u,v), remove it and check
41/// if u and v are still connected. If so, the shortest alternative path + the
42/// edge forms a ring candidate. We collect all unique minimal rings.
43pub fn compute_sssr(mol: &Molecule) -> SssrResult {
44    let n = mol.graph.node_count();
45    let m = mol.graph.edge_count();
46
47    if n == 0 || m == 0 {
48        return SssrResult {
49            rings: vec![],
50            atom_ring_count: vec![0; n],
51            atom_ring_sizes: vec![vec![]; n],
52            ring_size_histogram: vec![],
53        };
54    }
55
56    // Build adjacency list
57    let mut adj: Vec<BTreeSet<usize>> = vec![BTreeSet::new(); n];
58    let mut edges: Vec<(usize, usize)> = Vec::with_capacity(m);
59    for edge in mol.graph.edge_references() {
60        let u = edge.source().index();
61        let v = edge.target().index();
62        adj[u].insert(v);
63        adj[v].insert(u);
64        if u < v {
65            edges.push((u, v));
66        } else {
67            edges.push((v, u));
68        }
69    }
70    edges.sort();
71    edges.dedup();
72
73    // Expected size of a cycle basis: m - n + c, where c is the number of
74    // connected components in the graph.
75    let n_expected = m.saturating_sub(n) + connected_components(&adj);
76
77    let mut ring_candidates: Vec<Vec<usize>> = Vec::new();
78
79    // For each edge, find the shortest cycle containing it
80    for &(u, v) in &edges {
81        // BFS from u to v without using the direct edge u-v
82        if let Some(path) = bfs_shortest_path_excluding_edge(&adj, u, v, n) {
83            // path is u → ... → v (not including the direct edge)
84            // The ring is path (which already includes u and v)
85            let ring = path;
86            if ring.len() >= 3 {
87                ring_candidates.push(ring);
88            }
89        }
90    }
91
92    // Deduplicate rings: normalize each ring to a canonical form
93    let mut unique_rings: Vec<Vec<usize>> = Vec::new();
94    let mut seen: BTreeSet<Vec<usize>> = BTreeSet::new();
95
96    for ring in &ring_candidates {
97        let canonical = canonicalize_ring(ring);
98        if seen.insert(canonical.clone()) {
99            unique_rings.push(ring.clone());
100        }
101    }
102
103    // Sort by ring size (prefer smallest rings)
104    unique_rings.sort_by_key(|r| r.len());
105
106    // Take only linearly independent rings (up to n_expected) using a GF(2)
107    // cycle-basis elimination on the ring-edge incidence vectors.
108    let mut selected_rings: Vec<Vec<usize>> = Vec::new();
109    let edge_to_bit: BTreeMap<(usize, usize), usize> = edges
110        .iter()
111        .enumerate()
112        .map(|(idx, &edge)| (edge, idx))
113        .collect();
114    let n_words = edges.len().div_ceil(64);
115    let mut basis_rows: Vec<(usize, Vec<u64>)> = Vec::new();
116
117    for ring in &unique_rings {
118        if selected_rings.len() >= n_expected {
119            break;
120        }
121        let ring_bits = ring_bitset(ring, &edge_to_bit, n_words);
122        if insert_basis_row(&mut basis_rows, ring_bits) {
123            selected_rings.push(ring.clone());
124        }
125    }
126
127    // Determine aromaticity for each ring
128    let rings: Vec<RingInfo> = selected_rings
129        .iter()
130        .map(|ring| {
131            let is_aromatic = check_ring_aromaticity(mol, ring);
132            RingInfo {
133                size: ring.len(),
134                atoms: ring.clone(),
135                is_aromatic,
136            }
137        })
138        .collect();
139
140    // Compute per-atom ring membership
141    let mut atom_ring_count = vec![0usize; n];
142    let mut atom_ring_sizes: Vec<Vec<usize>> = vec![vec![]; n];
143    for ring in &rings {
144        for &atom in &ring.atoms {
145            atom_ring_count[atom] += 1;
146            atom_ring_sizes[atom].push(ring.size);
147        }
148    }
149
150    // Ring size histogram
151    let max_size = rings.iter().map(|r| r.size).max().unwrap_or(0);
152    let mut ring_size_histogram = vec![0usize; max_size + 1];
153    for ring in &rings {
154        ring_size_histogram[ring.size] += 1;
155    }
156
157    SssrResult {
158        rings,
159        atom_ring_count,
160        atom_ring_sizes,
161        ring_size_histogram,
162    }
163}
164
165/// BFS shortest path from `start` to `end` without using the direct edge between them.
166fn bfs_shortest_path_excluding_edge(
167    adj: &[BTreeSet<usize>],
168    start: usize,
169    end: usize,
170    n: usize,
171) -> Option<Vec<usize>> {
172    let mut visited = vec![false; n];
173    let mut parent = vec![usize::MAX; n];
174    let mut queue = VecDeque::new();
175
176    visited[start] = true;
177    queue.push_back(start);
178
179    while let Some(current) = queue.pop_front() {
180        for &next in &adj[current] {
181            // Skip the direct edge start-end
182            if current == start && next == end {
183                continue;
184            }
185            if current == end && next == start {
186                continue;
187            }
188
189            if !visited[next] {
190                visited[next] = true;
191                parent[next] = current;
192                if next == end {
193                    // Reconstruct path
194                    let mut path = vec![end];
195                    let mut curr = end;
196                    while curr != start {
197                        curr = parent[curr];
198                        path.push(curr);
199                    }
200                    path.reverse();
201                    return Some(path);
202                }
203                queue.push_back(next);
204            }
205        }
206    }
207
208    None
209}
210
211fn connected_components(adj: &[BTreeSet<usize>]) -> usize {
212    let mut visited = vec![false; adj.len()];
213    let mut components = 0;
214
215    for start in 0..adj.len() {
216        if visited[start] {
217            continue;
218        }
219        components += 1;
220        let mut queue = VecDeque::from([start]);
221        visited[start] = true;
222
223        while let Some(node) = queue.pop_front() {
224            for &next in &adj[node] {
225                if !visited[next] {
226                    visited[next] = true;
227                    queue.push_back(next);
228                }
229            }
230        }
231    }
232
233    components.max(1)
234}
235
236fn ring_bitset(
237    ring: &[usize],
238    edge_to_bit: &BTreeMap<(usize, usize), usize>,
239    n_words: usize,
240) -> Vec<u64> {
241    let mut bits = vec![0u64; n_words];
242    for edge in ring_to_edges(ring) {
243        if let Some(&bit_index) = edge_to_bit.get(&edge) {
244            bits[bit_index / 64] |= 1u64 << (bit_index % 64);
245        }
246    }
247    bits
248}
249
250fn insert_basis_row(basis_rows: &mut Vec<(usize, Vec<u64>)>, mut row: Vec<u64>) -> bool {
251    for (_, basis_row) in basis_rows.iter() {
252        if let Some(pivot) = highest_set_bit(basis_row) {
253            if ((row[pivot / 64] >> (pivot % 64)) & 1) == 1 {
254                xor_bitsets(&mut row, basis_row);
255            }
256        }
257    }
258
259    let Some(pivot) = highest_set_bit(&row) else {
260        return false;
261    };
262
263    for (_, basis_row) in basis_rows.iter_mut() {
264        if ((basis_row[pivot / 64] >> (pivot % 64)) & 1) == 1 {
265            xor_bitsets(basis_row, &row);
266        }
267    }
268
269    basis_rows.push((pivot, row));
270    basis_rows.sort_by(|a, b| b.0.cmp(&a.0));
271    true
272}
273
274fn xor_bitsets(target: &mut [u64], other: &[u64]) {
275    for (lhs, rhs) in target.iter_mut().zip(other.iter()) {
276        *lhs ^= *rhs;
277    }
278}
279
280fn highest_set_bit(bits: &[u64]) -> Option<usize> {
281    for (word_index, &word) in bits.iter().enumerate().rev() {
282        if word != 0 {
283            let bit = 63usize - word.leading_zeros() as usize;
284            return Some(word_index * 64 + bit);
285        }
286    }
287    None
288}
289
290/// Canonicalize a ring by choosing the smallest rotation starting from the minimum index.
291fn canonicalize_ring(ring: &[usize]) -> Vec<usize> {
292    if ring.is_empty() {
293        return vec![];
294    }
295
296    let n = ring.len();
297    // Find position of minimum element
298    let min_pos = ring
299        .iter()
300        .enumerate()
301        .min_by_key(|(_, &v)| v)
302        .map(|(i, _)| i)
303        .unwrap();
304
305    // Try forward rotation
306    let forward: Vec<usize> = (0..n).map(|i| ring[(min_pos + i) % n]).collect();
307    // Try reverse rotation
308    let reverse: Vec<usize> = (0..n).map(|i| ring[(min_pos + n - i) % n]).collect();
309
310    // Return lexicographically smaller
311    if forward <= reverse {
312        forward
313    } else {
314        reverse
315    }
316}
317
318/// Convert a ring (list of atom indices) to a sorted set of edges.
319fn ring_to_edges(ring: &[usize]) -> Vec<(usize, usize)> {
320    let n = ring.len();
321    let mut edges = Vec::with_capacity(n);
322    for i in 0..n {
323        let u = ring[i];
324        let v = ring[(i + 1) % n];
325        if u < v {
326            edges.push((u, v));
327        } else {
328            edges.push((v, u));
329        }
330    }
331    edges.sort();
332    edges
333}
334
335/// Check if a ring is aromatic using Hückel's rule (4n+2 π electrons).
336fn check_ring_aromaticity(mol: &Molecule, ring_atoms: &[usize]) -> bool {
337    use crate::graph::BondOrder;
338
339    // All ring atoms must be sp2 or have aromatic bonds
340    let all_sp2_or_aromatic = ring_atoms.iter().all(|&idx| {
341        let node = NodeIndex::new(idx);
342        let elem = mol.graph[node].element;
343        // Must be C, N, O, S (common aromatic atoms)
344        if !matches!(elem, 6 | 7 | 8 | 16) {
345            return false;
346        }
347        // Check if atom has aromatic bonds or is sp2
348        let has_aromatic = mol
349            .graph
350            .edges(node)
351            .any(|e| matches!(e.weight().order, BondOrder::Aromatic));
352        let is_sp2 = matches!(
353            mol.graph[node].hybridization,
354            crate::graph::Hybridization::SP2
355        );
356        has_aromatic || is_sp2
357    });
358
359    if !all_sp2_or_aromatic {
360        return false;
361    }
362
363    // Count π electrons using Hückel's rule
364    let mut pi_electrons = 0;
365    for &idx in ring_atoms {
366        let node = NodeIndex::new(idx);
367        let elem = mol.graph[node].element;
368        match elem {
369            6 => pi_electrons += 1, // C contributes 1 π electron
370            7 => {
371                // N: 1 if in pyridine (=N-), 2 if in pyrrole (NH)
372                let h_count = mol
373                    .graph
374                    .neighbors(node)
375                    .filter(|n| mol.graph[*n].element == 1)
376                    .count();
377                if h_count > 0 {
378                    pi_electrons += 2; // pyrrole-type N
379                } else {
380                    pi_electrons += 1; // pyridine-type N (or N in ring with lone pair)
381                }
382            }
383            8 => pi_electrons += 2,  // O contributes 2 (furan-type)
384            16 => pi_electrons += 2, // S contributes 2 (thiophene-type)
385            _ => pi_electrons += 1,
386        }
387    }
388
389    // Hückel's rule: 4n+2 for n = 0,1,2,...
390    // Common: 2 (n=0), 6 (n=1, benzene), 10 (n=2), 14 (n=3), 18 (n=4)
391    if pi_electrons < 2 {
392        return false;
393    }
394    (pi_electrons - 2) % 4 == 0
395}
396
397#[cfg(test)]
398mod tests {
399    use super::*;
400
401    #[test]
402    fn test_benzene_sssr() {
403        let mol = Molecule::from_smiles("c1ccccc1").unwrap();
404        let result = compute_sssr(&mol);
405
406        // Benzene: 1 ring of size 6
407        assert_eq!(result.rings.len(), 1, "Benzene should have 1 ring in SSSR");
408        assert_eq!(result.rings[0].size, 6, "Ring should be size 6");
409        assert!(result.rings[0].is_aromatic, "Ring should be aromatic");
410    }
411
412    #[test]
413    fn test_naphthalene_sssr() {
414        let mol = Molecule::from_smiles("c1ccc2ccccc2c1").unwrap();
415        let result = compute_sssr(&mol);
416
417        // Naphthalene: 2 rings of size 6
418        assert_eq!(
419            result.rings.len(),
420            2,
421            "Naphthalene SSSR should have 2 rings, got {}",
422            result.rings.len()
423        );
424        for ring in &result.rings {
425            assert_eq!(ring.size, 6, "All naphthalene rings should be size 6");
426            assert!(ring.is_aromatic, "All naphthalene rings should be aromatic");
427        }
428    }
429
430    #[test]
431    fn test_cyclohexane_sssr() {
432        let mol = Molecule::from_smiles("C1CCCCC1").unwrap();
433        let result = compute_sssr(&mol);
434
435        assert_eq!(result.rings.len(), 1, "Cyclohexane should have 1 ring");
436        assert_eq!(result.rings[0].size, 6);
437        assert!(!result.rings[0].is_aromatic, "Cyclohexane is not aromatic");
438    }
439
440    #[test]
441    fn test_ethane_no_rings() {
442        let mol = Molecule::from_smiles("CC").unwrap();
443        let result = compute_sssr(&mol);
444
445        assert_eq!(result.rings.len(), 0, "Ethane should have no rings");
446    }
447
448    #[test]
449    fn test_ring_canonicalization() {
450        let ring1 = vec![3, 0, 1, 2];
451        let ring2 = vec![1, 2, 3, 0];
452        assert_eq!(canonicalize_ring(&ring1), canonicalize_ring(&ring2));
453    }
454
455    #[test]
456    fn test_atom_ring_membership() {
457        let mol = Molecule::from_smiles("c1ccccc1").unwrap();
458        let result = compute_sssr(&mol);
459
460        // All aromatic carbons should be in 1 ring
461        for &idx in &result.rings[0].atoms {
462            assert!(
463                result.atom_ring_count[idx] >= 1,
464                "Atom {} should be in at least 1 ring",
465                idx
466            );
467        }
468    }
469
470    #[test]
471    fn test_connected_components_helper() {
472        let adj = vec![
473            BTreeSet::from([1]),
474            BTreeSet::from([0, 2]),
475            BTreeSet::from([1]),
476            BTreeSet::from([4]),
477            BTreeSet::from([3]),
478        ];
479
480        assert_eq!(connected_components(&adj), 2);
481    }
482}