Skip to main content

chematic_perception/
cip_priority.rs

1//! v0.1.93: Full multi-sphere CIP (Cahn–Ingold–Prelog) priority comparison.
2//!
3//! Implements hierarchical digraph CIP rules with BFS sphere expansion,
4//! phantom atoms for double bonds, and atomic mass tiebreaking.
5//! This replaces the simplified 1-sphere CIP in stereo2d.rs / stereo3d.rs.
6
7use chematic_core::{AtomIdx, BondOrder, Molecule};
8use rustc_hash::{FxHashMap, FxHashSet};
9use std::cmp::Ordering;
10use std::collections::VecDeque;
11
12/// A single "sphere layer" in a CIP branch expansion.
13/// Each tuple: (atomic_number, isotope, atomic_mass) sorted descending by CIP priority.
14type SphereLayer = Vec<(u8, Option<u16>, f64)>;
15
16/// Get the key `(atomic_num, isotope, atomic_mass)` for an atom, used in CIP comparisons.
17fn atom_key(mol: &Molecule, idx: AtomIdx) -> (u8, Option<u16>, f64) {
18    let a = mol.atom(idx);
19    (
20        a.element.atomic_number(),
21        a.isotope,
22        a.element.atomic_mass(),
23    )
24}
25
26/// Compare two `(atomic_num, isotope, atomic_mass)` keys by CIP priority.
27///
28/// CIP rule hierarchy:
29/// 1. Higher atomic number wins.
30/// 2. For explicit isotopes: `Some(mass)` > `None` (heavier beats unspecified).
31/// 3. Tiebreaker: higher atomic mass wins (rule 4).
32fn cmp_key(a: (u8, Option<u16>, f64), b: (u8, Option<u16>, f64)) -> Ordering {
33    // Rule 1: atomic number (primary)
34    match a.0.cmp(&b.0) {
35        Ordering::Equal => {}
36        other => return other,
37    }
38    // Rule 2: isotope label (if present)
39    match (a.1, b.1) {
40        (Some(x), Some(y)) => match x.cmp(&y) {
41            Ordering::Equal => {}
42            other => return other,
43        },
44        (Some(_), None) => return Ordering::Greater,
45        (None, Some(_)) => return Ordering::Less,
46        (None, None) => {}
47    }
48    // Rule 4 tiebreaker: atomic mass (descending for higher priority)
49    b.2.partial_cmp(&a.2).unwrap_or(Ordering::Equal)
50}
51
52/// BFS state for one node during sphere expansion.
53struct ExpandState {
54    node: AtomIdx,
55    parent: AtomIdx,
56    depth: usize,
57    visited: FxHashSet<AtomIdx>,
58}
59
60/// BFS-based CIP sphere expansion for the branch starting at `start`,
61/// not going back through `center`.
62///
63/// Phantom atom rules:
64/// 1. **Double-bond phantom**: when expanding node B (reached via double bond from A),
65///    add a phantom entry for A at the same depth level as B's children.
66/// 2. **Ring revisit phantom**: if an already-visited atom is encountered,
67///    add a phantom for it but don't expand further.
68fn cip_branch_spheres(mol: &Molecule, center: AtomIdx, start: AtomIdx) -> Vec<SphereLayer> {
69    let mut layers: FxHashMap<usize, Vec<(u8, Option<u16>, f64)>> = FxHashMap::default();
70    let max_depth = 8usize;
71
72    // The start atom itself is at depth 1.
73    let start_key = atom_key(mol, start);
74    layers.entry(1).or_default().push(start_key);
75
76    let mut expand_queue: VecDeque<ExpandState> = VecDeque::new();
77    {
78        let mut v = FxHashSet::default();
79        v.insert(center);
80        v.insert(start);
81        expand_queue.push_back(ExpandState {
82            node: start,
83            parent: center,
84            depth: 1,
85            visited: v,
86        });
87    }
88
89    while let Some(state) = expand_queue.pop_front() {
90        if state.depth >= max_depth {
91            continue;
92        }
93        let child_depth = state.depth + 1;
94
95        // Phantom of parent: add if the bond used to reach this node was double.
96        if let Some((_, bond_to_parent)) = mol.bond_between(state.node, state.parent)
97            && bond_to_parent.order == BondOrder::Double
98        {
99            let phantom_key = atom_key(mol, state.parent);
100            layers.entry(child_depth).or_default().push(phantom_key);
101        }
102
103        for (nb, _) in mol.neighbors(state.node) {
104            if nb == state.parent || nb == center {
105                continue;
106            }
107            let child_key = atom_key(mol, nb);
108            let layer = layers.entry(child_depth).or_default();
109
110            if state.visited.contains(&nb) {
111                // Ring revisit: phantom only, no expansion.
112                layer.push(child_key);
113            } else {
114                layer.push(child_key);
115                let mut child_visited = state.visited.clone();
116                child_visited.insert(nb);
117                expand_queue.push_back(ExpandState {
118                    node: nb,
119                    parent: state.node,
120                    depth: child_depth,
121                    visited: child_visited,
122                });
123            }
124        }
125    }
126
127    // Sort each layer descending and return as a Vec ordered by depth.
128    let max_layer = layers.keys().copied().max().unwrap_or(0);
129    let mut result = Vec::new();
130    for d in 1..=max_layer {
131        let mut layer = layers.remove(&d).unwrap_or_default();
132        layer.sort_by(|a, b| cmp_key(*b, *a)); // descending
133        result.push(layer);
134    }
135    result
136}
137
138/// Compare two branches from `center` starting at `a` and `b`.
139///
140/// Returns `Ordering::Greater` if branch `a` has higher CIP priority than `b`.
141pub fn compare_branches(mol: &Molecule, center: AtomIdx, a: AtomIdx, b: AtomIdx) -> Ordering {
142    // Depth-0 comparison: the substituent atoms themselves.
143    let a_key = atom_key(mol, a);
144    let b_key = atom_key(mol, b);
145    match cmp_key(a_key, b_key) {
146        Ordering::Equal => {}
147        other => return other,
148    }
149
150    // Sphere-by-sphere comparison.
151    let a_spheres = cip_branch_spheres(mol, center, a);
152    let b_spheres = cip_branch_spheres(mol, center, b);
153
154    let max_depth = a_spheres.len().max(b_spheres.len());
155    for d in 0..max_depth {
156        let a_layer = a_spheres.get(d).map(|v| v.as_slice()).unwrap_or(&[]);
157        let b_layer = b_spheres.get(d).map(|v| v.as_slice()).unwrap_or(&[]);
158
159        let min_len = a_layer.len().min(b_layer.len());
160        for i in 0..min_len {
161            match cmp_key(a_layer[i], b_layer[i]) {
162                Ordering::Equal => {}
163                other => return other,
164            }
165        }
166        match a_layer.len().cmp(&b_layer.len()) {
167            Ordering::Equal => {}
168            other => return other,
169        }
170    }
171
172    Ordering::Equal
173}
174
175#[cfg(test)]
176mod tests {
177    use super::*;
178    use chematic_smiles::parse;
179
180    #[test]
181    fn test_multi_sphere_simple_distinction() {
182        // C(C)(N): carbon with methyl (C) and amine (N) branches
183        // Atomic numbers differ: C=6, N=7 → immediately different priority
184        let mol = parse("C(C)N").unwrap();
185        let center = AtomIdx(0);
186        let methyl = AtomIdx(1); // C neighbor
187        let amine = AtomIdx(2); // N neighbor
188
189        let order = compare_branches(&mol, center, methyl, amine);
190        // N (atomic number 7) has higher priority than C (atomic number 6)
191        assert!(
192            order == Ordering::Less,
193            "C should have lower priority than N"
194        );
195    }
196
197    #[test]
198    fn test_compare_branches_symmetrical() {
199        // C(C)(C)(C)C: central C with 4 identical methyl branches
200        // All branches are C atoms bonded to same structure (1 parent only)
201        let mol = parse("C(C)(C)(C)C").unwrap();
202        let center = AtomIdx(0);
203        let branch1 = AtomIdx(1);
204        let branch2 = AtomIdx(2);
205
206        let order_a = compare_branches(&mol, center, branch1, branch2);
207        // Both branches are identical methyls, should compare equal
208        assert!(
209            order_a == Ordering::Equal,
210            "identical methyl branches should compare equal"
211        );
212    }
213
214    #[test]
215    fn test_compare_branches_isotope() {
216        // C with C and 13C neighbors should distinguish
217        let mol = parse("C(C)(C)(C)C").unwrap();
218        let center = AtomIdx(0);
219        let c_normal = AtomIdx(1);
220        let c_normal2 = AtomIdx(2);
221
222        let order = compare_branches(&mol, center, c_normal, c_normal2);
223        // Two identical normal carbons should compare equal
224        assert_eq!(
225            order,
226            Ordering::Equal,
227            "identical normal carbons should be equal"
228        );
229    }
230}