Skip to main content

sci_form/
stereo.rs

1//! Stereochemistry perception: CIP priority, R/S, and E/Z assignment.
2//!
3//! Implements Cahn–Ingold–Prelog priority rules for chirality perception
4//! using depth-first graph traversal with phantom atom expansion.
5
6use crate::graph::{BondOrder, Molecule};
7use petgraph::graph::NodeIndex;
8use serde::{Deserialize, Serialize};
9
10/// CIP priority tree node for depth-first traversal.
11#[derive(Debug, Clone)]
12struct CipNode {
13    atomic_number: u8,
14    mass: f64,
15    children: Vec<CipNode>,
16}
17
18/// Result of R/S assignment at a single stereocenter.
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct Stereocenter {
21    /// Atom index of the chiral center.
22    pub atom_index: usize,
23    /// Atomic number of the center.
24    pub element: u8,
25    /// Indices of the four substituents (CIP priority order: 1 > 2 > 3 > 4).
26    pub substituent_indices: Vec<usize>,
27    /// CIP priorities (1 = highest).
28    pub priorities: Vec<usize>,
29    /// R, S, or None if undetermined.
30    pub configuration: Option<String>,
31}
32
33/// Result of E/Z assignment at a double bond.
34#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct DoubleBondStereo {
36    /// Index of atom on one end of the double bond.
37    pub atom1: usize,
38    /// Index of atom on the other end.
39    pub atom2: usize,
40    /// E, Z, or None if undetermined.
41    pub configuration: Option<String>,
42    /// Highest-priority substituent on atom1 side.
43    pub high_priority_sub1: Option<usize>,
44    /// Highest-priority substituent on atom2 side.
45    pub high_priority_sub2: Option<usize>,
46}
47
48/// Complete stereochemistry analysis result.
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct StereoAnalysis {
51    /// Detected stereocenters with R/S assignments.
52    pub stereocenters: Vec<Stereocenter>,
53    /// Detected double bonds with E/Z assignments.
54    pub double_bonds: Vec<DoubleBondStereo>,
55    /// Total number of chiral centers found.
56    pub n_stereocenters: usize,
57    /// Total number of E/Z-assignable double bonds.
58    pub n_double_bonds: usize,
59}
60
61/// Approximate atomic mass for CIP tie-breaking.
62fn atomic_mass(z: u8) -> f64 {
63    match z {
64        1 => 1.008,
65        5 => 10.81,
66        6 => 12.011,
67        7 => 14.007,
68        8 => 15.999,
69        9 => 18.998,
70        14 => 28.085,
71        15 => 30.974,
72        16 => 32.06,
73        17 => 35.45,
74        35 => 79.904,
75        53 => 126.904,
76        _ => z as f64,
77    }
78}
79
80/// Build a CIP priority tree rooted at `start` coming from `from`, up to depth `max_depth`.
81fn build_cip_tree(mol: &Molecule, start: usize, from: usize, max_depth: usize) -> CipNode {
82    let start_idx = NodeIndex::new(start);
83    let atom = &mol.graph[start_idx];
84
85    let mut node = CipNode {
86        atomic_number: atom.element,
87        mass: atomic_mass(atom.element),
88        children: Vec::new(),
89    };
90
91    if max_depth == 0 {
92        return node;
93    }
94
95    // Collect neighbors excluding `from`
96    for neighbor in mol.graph.neighbors(start_idx) {
97        let ni = neighbor.index();
98        if ni == from {
99            continue;
100        }
101
102        // Find the bond order between start and neighbor
103        let edge = mol.graph.find_edge(start_idx, neighbor);
104        let bond_order = edge
105            .map(|e| &mol.graph[e].order)
106            .unwrap_or(&BondOrder::Single);
107
108        // For double/triple bonds, add phantom atoms (duplicates)
109        let n_phantoms = match bond_order {
110            BondOrder::Double | BondOrder::Aromatic => 1,
111            BondOrder::Triple => 2,
112            _ => 0,
113        };
114
115        // Real child
116        node.children
117            .push(build_cip_tree(mol, ni, start, max_depth - 1));
118
119        // Phantom children (copies of the neighbor with no further children)
120        let neighbor_atom = &mol.graph[neighbor];
121        for _ in 0..n_phantoms {
122            node.children.push(CipNode {
123                atomic_number: neighbor_atom.element,
124                mass: atomic_mass(neighbor_atom.element),
125                children: Vec::new(),
126            });
127        }
128
129        // Add phantom of start to neighbor too (symmetry of double bond expansion)
130        // This is handled by the child's own tree construction.
131    }
132
133    // Sort children by CIP priority (higher atomic number = higher priority)
134    node.children.sort_by(|a, b| cip_compare(b, a));
135
136    node
137}
138
139/// Compare two CIP nodes (higher = "greater").
140/// Returns Ordering for descending sort (highest priority first).
141fn cip_compare(a: &CipNode, b: &CipNode) -> std::cmp::Ordering {
142    // Rule 1: Higher atomic number wins
143    match a.atomic_number.cmp(&b.atomic_number) {
144        std::cmp::Ordering::Equal => {}
145        other => return other,
146    }
147
148    // Rule 2: Higher mass wins (isotope rule)
149    match a.mass.partial_cmp(&b.mass) {
150        Some(std::cmp::Ordering::Equal) | None => {}
151        Some(other) => return other,
152    }
153
154    // Rule 3: Compare children layer by layer
155    let max_children = a.children.len().max(b.children.len());
156    for i in 0..max_children {
157        match (a.children.get(i), b.children.get(i)) {
158            (Some(ca), Some(cb)) => match cip_compare(ca, cb) {
159                std::cmp::Ordering::Equal => continue,
160                other => return other,
161            },
162            (Some(_), None) => return std::cmp::Ordering::Greater,
163            (None, Some(_)) => return std::cmp::Ordering::Less,
164            (None, None) => break,
165        }
166    }
167
168    std::cmp::Ordering::Equal
169}
170
171/// Detect stereocenters: sp3 atoms with 4 different substituents.
172fn find_stereocenters(mol: &Molecule) -> Vec<usize> {
173    let mut centers = Vec::new();
174
175    for node in mol.graph.node_indices() {
176        let atom = &mol.graph[node];
177        // Skip H and uncommon centers
178        if atom.element == 1 {
179            continue;
180        }
181
182        let neighbors: Vec<usize> = mol.graph.neighbors(node).map(|n| n.index()).collect();
183
184        // Need exactly 4 substituents for sp3 chiral center
185        if neighbors.len() != 4 {
186            continue;
187        }
188
189        // Build CIP trees for each substituent
190        let trees: Vec<CipNode> = neighbors
191            .iter()
192            .map(|&n| build_cip_tree(mol, n, node.index(), 6))
193            .collect();
194
195        // Check all pairs are distinct
196        let mut all_different = true;
197        'outer: for i in 0..4 {
198            for j in (i + 1)..4 {
199                if cip_compare(&trees[i], &trees[j]) == std::cmp::Ordering::Equal {
200                    all_different = false;
201                    break 'outer;
202                }
203            }
204        }
205
206        if all_different {
207            centers.push(node.index());
208        }
209    }
210
211    centers
212}
213
214/// Assign CIP priorities to substituents around a center atom.
215/// Returns (sorted_neighbor_indices, priority_ranks) where rank 1 = highest.
216fn assign_cip_priorities(
217    mol: &Molecule,
218    center: usize,
219    neighbors: &[usize],
220) -> (Vec<usize>, Vec<usize>) {
221    let mut indexed_trees: Vec<(usize, CipNode)> = neighbors
222        .iter()
223        .map(|&n| (n, build_cip_tree(mol, n, center, 6)))
224        .collect();
225
226    // Sort by descending CIP priority
227    indexed_trees.sort_by(|a, b| cip_compare(&b.1, &a.1));
228
229    let sorted_indices: Vec<usize> = indexed_trees.iter().map(|(idx, _)| *idx).collect();
230    let priorities: Vec<usize> = (1..=sorted_indices.len()).collect();
231
232    (sorted_indices, priorities)
233}
234
235/// Determine R/S configuration from 3D coordinates.
236///
237/// Uses the signed volume of the tetrahedron formed by substituents 1-2-3
238/// viewed from substituent 4 (lowest priority).
239/// Positive volume with 1→2→3 going clockwise = R.
240fn assign_rs(positions: &[[f64; 3]], center: usize, sorted_subs: &[usize]) -> Option<String> {
241    if sorted_subs.len() != 4 || positions.is_empty() {
242        return None;
243    }
244    if center >= positions.len() || sorted_subs.iter().any(|&s| s >= positions.len()) {
245        return None;
246    }
247
248    // Vector from lowest-priority (4th) substituent to center
249    let p4 = positions[sorted_subs[3]];
250    let p1 = positions[sorted_subs[0]];
251    let p2 = positions[sorted_subs[1]];
252    let p3 = positions[sorted_subs[2]];
253
254    // Vectors from p4 to p1, p2, p3
255    let v1 = [p1[0] - p4[0], p1[1] - p4[1], p1[2] - p4[2]];
256    let v2 = [p2[0] - p4[0], p2[1] - p4[1], p2[2] - p4[2]];
257    let v3 = [p3[0] - p4[0], p3[1] - p4[1], p3[2] - p4[2]];
258
259    // Signed volume = v1 · (v2 × v3)
260    let cross = [
261        v2[1] * v3[2] - v2[2] * v3[1],
262        v2[2] * v3[0] - v2[0] * v3[2],
263        v2[0] * v3[1] - v2[1] * v3[0],
264    ];
265    let signed_volume = v1[0] * cross[0] + v1[1] * cross[1] + v1[2] * cross[2];
266
267    if signed_volume.abs() < 1e-6 {
268        return None; // Planar, can't determine
269    }
270
271    Some(if signed_volume > 0.0 {
272        "R".to_string()
273    } else {
274        "S".to_string()
275    })
276}
277
278/// Find double bonds that could have E/Z isomerism.
279fn find_ez_double_bonds(mol: &Molecule) -> Vec<(usize, usize)> {
280    let mut bonds = Vec::new();
281
282    for edge in mol.graph.edge_indices() {
283        let bond = &mol.graph[edge];
284        if bond.order != BondOrder::Double {
285            continue;
286        }
287
288        let (a, b) = mol.graph.edge_endpoints(edge).unwrap();
289        let a_idx = a.index();
290        let b_idx = b.index();
291
292        // Each end needs at least 2 different substituents (including the other end)
293        let a_neighbors: Vec<usize> = mol
294            .graph
295            .neighbors(a)
296            .map(|n| n.index())
297            .filter(|&n| n != b_idx)
298            .collect();
299        let b_neighbors: Vec<usize> = mol
300            .graph
301            .neighbors(b)
302            .map(|n| n.index())
303            .filter(|&n| n != a_idx)
304            .collect();
305
306        // Need at least 1 substituent on each side (besides the double bond partner)
307        if a_neighbors.is_empty() || b_neighbors.is_empty() {
308            continue;
309        }
310
311        // If only 1 substituent on a side, we can still assign E/Z
312        // If 2 substituents, they must be different
313        if a_neighbors.len() >= 2 {
314            let trees_a: Vec<CipNode> = a_neighbors
315                .iter()
316                .map(|&n| build_cip_tree(mol, n, a_idx, 6))
317                .collect();
318            if cip_compare(&trees_a[0], &trees_a[1]) == std::cmp::Ordering::Equal {
319                continue; // Identical substituents on atom a
320            }
321        }
322        if b_neighbors.len() >= 2 {
323            let trees_b: Vec<CipNode> = b_neighbors
324                .iter()
325                .map(|&n| build_cip_tree(mol, n, b_idx, 6))
326                .collect();
327            if cip_compare(&trees_b[0], &trees_b[1]) == std::cmp::Ordering::Equal {
328                continue;
329            }
330        }
331
332        bonds.push((a_idx, b_idx));
333    }
334
335    bonds
336}
337
338/// Assign E/Z from 3D coordinates using the dihedral angle of highest-priority
339/// substituents across the double bond.
340fn assign_ez(
341    mol: &Molecule,
342    positions: &[[f64; 3]],
343    atom1: usize,
344    atom2: usize,
345) -> DoubleBondStereo {
346    let a_idx = NodeIndex::new(atom1);
347    let b_idx = NodeIndex::new(atom2);
348
349    let mut a_subs: Vec<usize> = mol
350        .graph
351        .neighbors(a_idx)
352        .map(|n| n.index())
353        .filter(|&n| n != atom2)
354        .collect();
355    let mut b_subs: Vec<usize> = mol
356        .graph
357        .neighbors(b_idx)
358        .map(|n| n.index())
359        .filter(|&n| n != atom1)
360        .collect();
361
362    // Sort by CIP priority (highest first)
363    a_subs.sort_by(|&x, &y| {
364        let tx = build_cip_tree(mol, x, atom1, 6);
365        let ty = build_cip_tree(mol, y, atom1, 6);
366        cip_compare(&ty, &tx)
367    });
368    b_subs.sort_by(|&x, &y| {
369        let tx = build_cip_tree(mol, x, atom2, 6);
370        let ty = build_cip_tree(mol, y, atom2, 6);
371        cip_compare(&ty, &tx)
372    });
373
374    let high_a = a_subs.first().copied();
375    let high_b = b_subs.first().copied();
376
377    let configuration = match (high_a, high_b) {
378        (Some(ha), Some(hb)) if !positions.is_empty() => {
379            if atom1 >= positions.len()
380                || atom2 >= positions.len()
381                || ha >= positions.len()
382                || hb >= positions.len()
383            {
384                None
385            } else {
386                // Compute dihedral angle ha-atom1-atom2-hb
387                let dihedral = compute_dihedral(
388                    &positions[ha],
389                    &positions[atom1],
390                    &positions[atom2],
391                    &positions[hb],
392                );
393                if dihedral.abs() < std::f64::consts::FRAC_PI_2 {
394                    Some("Z".to_string()) // same side
395                } else {
396                    Some("E".to_string()) // opposite side
397                }
398            }
399        }
400        _ => None,
401    };
402
403    DoubleBondStereo {
404        atom1,
405        atom2,
406        configuration,
407        high_priority_sub1: high_a,
408        high_priority_sub2: high_b,
409    }
410}
411
412/// Compute dihedral angle (in radians) for four points a-b-c-d.
413fn compute_dihedral(a: &[f64; 3], b: &[f64; 3], c: &[f64; 3], d: &[f64; 3]) -> f64 {
414    let b1 = [b[0] - a[0], b[1] - a[1], b[2] - a[2]];
415    let b2 = [c[0] - b[0], c[1] - b[1], c[2] - b[2]];
416    let b3 = [d[0] - c[0], d[1] - c[1], d[2] - c[2]];
417
418    let cross_b1_b2 = cross(&b1, &b2);
419    let cross_b2_b3 = cross(&b2, &b3);
420
421    let n1_dot_n2 = dot(&cross_b1_b2, &cross_b2_b3);
422    let b2_norm = dot(&b2, &b2).sqrt();
423
424    let x = n1_dot_n2;
425    let y = dot(&cross(&cross_b1_b2, &cross_b2_b3), &b2) / b2_norm.max(1e-12);
426
427    y.atan2(x)
428}
429
430fn cross(a: &[f64; 3], b: &[f64; 3]) -> [f64; 3] {
431    [
432        a[1] * b[2] - a[2] * b[1],
433        a[2] * b[0] - a[0] * b[2],
434        a[0] * b[1] - a[1] * b[0],
435    ]
436}
437
438fn dot(a: &[f64; 3], b: &[f64; 3]) -> f64 {
439    a[0] * b[0] + a[1] * b[1] + a[2] * b[2]
440}
441
442/// Perform complete stereochemistry analysis on a molecule.
443///
444/// Detects chiral centers (R/S) and E/Z double bonds from SMILES topology
445/// and optional 3D coordinates.
446pub fn analyze_stereo(mol: &Molecule, positions: &[[f64; 3]]) -> StereoAnalysis {
447    // Find and assign stereocenters
448    let center_indices = find_stereocenters(mol);
449    let stereocenters: Vec<Stereocenter> = center_indices
450        .iter()
451        .map(|&center| {
452            let center_idx = NodeIndex::new(center);
453            let neighbors: Vec<usize> =
454                mol.graph.neighbors(center_idx).map(|n| n.index()).collect();
455            let (sorted_subs, priorities) = assign_cip_priorities(mol, center, &neighbors);
456            let configuration = if !positions.is_empty() {
457                assign_rs(positions, center, &sorted_subs)
458            } else {
459                None
460            };
461
462            Stereocenter {
463                atom_index: center,
464                element: mol.graph[center_idx].element,
465                substituent_indices: sorted_subs,
466                priorities,
467                configuration,
468            }
469        })
470        .collect();
471
472    // Find and assign E/Z double bonds
473    let ez_bonds = find_ez_double_bonds(mol);
474    let double_bonds: Vec<DoubleBondStereo> = ez_bonds
475        .iter()
476        .map(|&(a, b)| assign_ez(mol, positions, a, b))
477        .collect();
478
479    let n_stereocenters = stereocenters.len();
480    let n_double_bonds = double_bonds.len();
481
482    StereoAnalysis {
483        stereocenters,
484        double_bonds,
485        n_stereocenters,
486        n_double_bonds,
487    }
488}
489
490#[cfg(test)]
491mod tests {
492    use super::*;
493
494    #[test]
495    fn test_no_stereocenters_ethane() {
496        let mol = Molecule::from_smiles("CC").unwrap();
497        let result = analyze_stereo(&mol, &[]);
498        assert_eq!(result.n_stereocenters, 0);
499    }
500
501    #[test]
502    fn test_stereocenter_detection_chiral() {
503        // 2-bromobutane: C(Br)(CC)C — central carbon has 4 different substituents
504        let mol = Molecule::from_smiles("CC(Br)CC").unwrap();
505        let result = analyze_stereo(&mol, &[]);
506        assert!(
507            result.n_stereocenters >= 1,
508            "2-bromobutane should have at least 1 stereocenter, got {}",
509            result.n_stereocenters
510        );
511    }
512
513    #[test]
514    fn test_cip_priority_ordering() {
515        // Br > Cl > F > O > N > C > H (atomic number)
516        let mol = Molecule::from_smiles("C(F)(Cl)Br").unwrap();
517        let result = analyze_stereo(&mol, &[]);
518        if let Some(sc) = result.stereocenters.first() {
519            // The Br neighbor should have priority 1, Cl 2, F 3
520            let elements: Vec<u8> = sc
521                .substituent_indices
522                .iter()
523                .map(|&i| mol.graph[NodeIndex::new(i)].element)
524                .collect();
525            // First should be Br(35), then Cl(17), then F(9), then H(1)
526            assert!(
527                elements[0] >= elements[1],
528                "CIP order wrong: first {} should be >= second {}",
529                elements[0],
530                elements[1]
531            );
532        }
533    }
534
535    #[test]
536    fn test_ez_detection_2_butene() {
537        // 2-butene: CC=CC has a double bond with different substituents
538        let mol = Molecule::from_smiles("CC=CC").unwrap();
539        let result = analyze_stereo(&mol, &[]);
540        assert!(
541            result.n_double_bonds >= 1,
542            "2-butene should have E/Z-assignable double bond, got {}",
543            result.n_double_bonds
544        );
545    }
546
547    #[test]
548    fn test_no_ez_ethylene() {
549        // Ethylene C=C — same substituents on each side (H, H)
550        let mol = Molecule::from_smiles("C=C").unwrap();
551        let result = analyze_stereo(&mol, &[]);
552        assert_eq!(
553            result.n_double_bonds, 0,
554            "Ethylene should have no E/Z double bonds"
555        );
556    }
557
558    #[test]
559    fn test_benzene_no_stereo() {
560        let mol = Molecule::from_smiles("c1ccccc1").unwrap();
561        let result = analyze_stereo(&mol, &[]);
562        assert_eq!(result.n_stereocenters, 0);
563    }
564}