Skip to main content

chematic_perception/
stereo2d.rs

1//! v0.1.93: Stereochemistry assignment from 2D coordinates with full CIP prioritization.
2//!
3//! [`assign_stereo_from_2d`] infers R/S tetrahedral stereo descriptors by
4//! combining the 2D layout positions of substituents with the wedge-bond
5//! notation (`BondOrder::Up` / `BondOrder::Down`) already encoded in the
6//! molecule.
7//!
8//! **Convention:**
9//! - `BondOrder::Up` (solid wedge): the *far* atom (`bond.atom2`) is in
10//!   front of the plane (toward the viewer, z > 0).
11//! - `BondOrder::Down` (dashed wedge): the far atom is behind the plane (z < 0).
12//! - All other bonds: both endpoints are coplanar (z = 0).
13//!
14//! Priority is determined using full multi-sphere BFS CIP rules (v0.1.93+),
15//! superseding the simplified 1-sphere approach from v0.1.92.
16
17use std::cmp::Ordering;
18
19use crate::cip_priority;
20use chematic_core::{AtomIdx, BondIdx, BondOrder, CipCode, Molecule};
21
22// ---------------------------------------------------------------------------
23// Public types
24// ---------------------------------------------------------------------------
25
26/// Result of a 2D-based stereochemistry assignment.
27#[derive(Debug, Default, Clone)]
28pub struct StereoAssignment2D {
29    pub assignments: Vec<(AtomIdx, CipCode)>,
30}
31
32impl StereoAssignment2D {
33    /// Look up the CIP code for atom `idx`.
34    pub fn get(&self, idx: AtomIdx) -> Option<CipCode> {
35        self.assignments
36            .iter()
37            .find(|(i, _)| *i == idx)
38            .map(|(_, c)| *c)
39    }
40}
41
42// ---------------------------------------------------------------------------
43// Public entry points
44// ---------------------------------------------------------------------------
45
46/// Assign R/S stereo to atoms that carry wedge bonds, given the 2D layout.
47///
48/// `coords[i]` is the `(x, y)` position of atom `i` in any consistent unit
49/// (screen pixels, Å, etc.).  The z-component is derived from wedge bonds.
50///
51/// Returns a [`StereoAssignment2D`] containing one entry per successfully
52/// assigned chiral center.  Atoms where the chirality is ambiguous (tied
53/// priorities or too few heavy-atom neighbors) are omitted.
54pub fn assign_stereo_from_2d(mol: &Molecule, coords: &[(f64, f64)]) -> StereoAssignment2D {
55    let mut assignments = Vec::new();
56    for (idx, _) in mol.atoms() {
57        if let Some(code) = assign_rs(mol, coords, idx) {
58            assignments.push((idx, code));
59        }
60    }
61    StereoAssignment2D { assignments }
62}
63
64/// Assign E/Z stereo to double bonds using 2D atom coordinates.
65///
66/// Unlike the CIP-based approach (which reads `BondOrder::Up/Down` hints near
67/// sp2 carbons), this function derives E/Z purely from the 2D positions of
68/// atoms: it projects each substituent onto the perpendicular of the
69/// double-bond axis and compares which side each highest-priority substituent
70/// falls on.
71///
72/// Sets `cip_code` (E or Z) on atom1 of each resolved double bond in `mol`.
73/// Double bonds that are terminal (no substituent at one end) or have tied
74/// substituent priorities are skipped.
75pub fn assign_ez_from_2d(mol: &mut Molecule, coords: &[(f64, f64)]) {
76    let bond_indices: Vec<BondIdx> = mol
77        .bonds()
78        .filter(|(_, b)| b.order == BondOrder::Double)
79        .map(|(bidx, _)| bidx)
80        .collect();
81    for bidx in bond_indices {
82        if let Some((atom_idx, code)) = ez_from_coords(mol, bidx, coords) {
83            mol.set_cip_code(atom_idx, Some(code));
84        }
85    }
86}
87
88/// Return the E/Z descriptor for the double bond at `bond_idx` from 2D coordinates.
89///
90/// Returns `None` when the bond is not a double bond, is a terminal alkene,
91/// has an ambiguous geometry (substituent on the bond axis), or has tied
92/// CIP priorities.
93pub fn cip_ez_descriptor(
94    mol: &Molecule,
95    bond_idx: BondIdx,
96    coords: &[(f64, f64)],
97) -> Option<CipCode> {
98    ez_from_coords(mol, bond_idx, coords).map(|(_, code)| code)
99}
100
101/// Like [`assign_stereo_from_2d`] but writes `cip_code` directly onto each
102/// chiral atom in `mol`.
103pub fn apply_stereo_from_2d(mol: &mut Molecule, coords: &[(f64, f64)]) {
104    let result = assign_stereo_from_2d(mol, coords);
105    for (idx, code) in result.assignments {
106        // atom_mut requires access — we use set_cip_code helper if it exists,
107        // or rewrite the atom via add/remove.
108        // Since Molecule.atoms is private, use the new mutable set_* API:
109        // (molecule.rs exposes atoms[idx].cip_code via a dedicated setter)
110        mol.set_cip_code(idx, Some(code));
111    }
112}
113
114// ---------------------------------------------------------------------------
115// E/Z core logic
116// ---------------------------------------------------------------------------
117
118/// Compute E/Z for a single double bond from 2D coordinates.
119///
120/// Returns `Some((atom1_of_bond, E | Z))` or `None` if unresolvable.
121fn ez_from_coords(
122    mol: &Molecule,
123    bond_idx: BondIdx,
124    coords: &[(f64, f64)],
125) -> Option<(AtomIdx, CipCode)> {
126    let bond = mol.bond(bond_idx);
127    if bond.order != BondOrder::Double {
128        return None;
129    }
130
131    let a1 = bond.atom1;
132    let a2 = bond.atom2;
133
134    let (x1, y1) = coords.get(a1.0 as usize).copied()?;
135    let (x2, y2) = coords.get(a2.0 as usize).copied()?;
136
137    // Non-double-bond substituents at each alkene end.
138    let subs_a1: Vec<AtomIdx> = mol
139        .neighbors(a1)
140        .filter(|&(nb, bidx)| nb != a2 && mol.bond(bidx).order != BondOrder::Double)
141        .map(|(nb, _)| nb)
142        .collect();
143
144    let subs_a2: Vec<AtomIdx> = mol
145        .neighbors(a2)
146        .filter(|&(nb, bidx)| nb != a1 && mol.bond(bidx).order != BondOrder::Double)
147        .map(|(nb, _)| nb)
148        .collect();
149
150    if subs_a1.is_empty() || subs_a2.is_empty() {
151        return None; // terminal alkene — no geometric isomerism
152    }
153
154    // Highest-priority substituent (simplified 1-sphere CIP) at each end.
155    let ha1 = highest_ez_priority(mol, a1, &subs_a1)?;
156    let ha2 = highest_ez_priority(mol, a2, &subs_a2)?;
157
158    let (hx1, hy1) = coords.get(ha1.0 as usize).copied()?;
159    let (hx2, hy2) = coords.get(ha2.0 as usize).copied()?;
160
161    // Double-bond direction vector (a1 → a2).
162    let vx = x2 - x1;
163    let vy = y2 - y1;
164
165    // Signed 2D cross product: cross(v, u) = vx*uy - vy*ux.
166    // The sign tells which side of the bond axis each substituent lies on.
167    let side_a1 = cross2d(vx, vy, hx1 - x1, hy1 - y1);
168    let side_a2 = cross2d(vx, vy, hx2 - x1, hy2 - y1);
169
170    if side_a1.abs() < 1e-6 || side_a2.abs() < 1e-6 {
171        return None; // substituent falls on the bond axis — geometry ambiguous
172    }
173
174    // Same side → Z (zusammen); opposite sides → E (entgegen).
175    let code = if side_a1.signum() == side_a2.signum() {
176        CipCode::Z
177    } else {
178        CipCode::E
179    };
180    Some((a1, code))
181}
182
183/// Return the highest-priority substituent from `subs` at `center`.
184///
185/// Returns `None` when the top two substituents have equal priority (tied →
186/// E/Z is unspecified) or `subs` is empty.
187fn highest_ez_priority(mol: &Molecule, center: AtomIdx, subs: &[AtomIdx]) -> Option<AtomIdx> {
188    if subs.is_empty() {
189        return None;
190    }
191    if subs.len() == 1 {
192        return Some(subs[0]);
193    }
194    let mut sorted = subs.to_vec();
195    sorted.sort_by(|&a, &b| cip_priority::compare_branches(mol, center, a, b).reverse());
196    if cip_priority::compare_branches(mol, center, sorted[0], sorted[1]) == Ordering::Equal {
197        return None; // tied top priorities → E/Z not determinable
198    }
199    Some(sorted[0])
200}
201
202/// 2D cross product scalar: vx*uy - vy*ux.
203fn cross2d(vx: f64, vy: f64, ux: f64, uy: f64) -> f64 {
204    vx * uy - vy * ux
205}
206
207// ---------------------------------------------------------------------------
208// Core R/S logic
209// ---------------------------------------------------------------------------
210
211/// 3D point (x, y, z).
212#[derive(Clone, Copy)]
213struct P3 {
214    x: f64,
215    y: f64,
216    z: f64,
217}
218
219fn assign_rs(mol: &Molecule, coords: &[(f64, f64)], center: AtomIdx) -> Option<CipCode> {
220    let center_pos = coords.get(center.0 as usize)?;
221
222    // Collect heavy-atom neighbors with their 3D positions.
223    let nbs: Vec<AtomIdx> = mol.neighbors(center).map(|(nb, _)| nb).collect();
224    if nbs.len() != 4 {
225        return None; // need exactly 4 heavy-atom neighbors for tetrahedral
226    }
227    let subs: [AtomIdx; 4] = [nbs[0], nbs[1], nbs[2], nbs[3]];
228
229    // Build 3D positions: z from wedge bonds.
230    let z_for: [f64; 4] = [
231        wedge_z(mol, center, subs[0]),
232        wedge_z(mol, center, subs[1]),
233        wedge_z(mol, center, subs[2]),
234        wedge_z(mol, center, subs[3]),
235    ];
236
237    let pts: [P3; 4] = [0, 1, 2, 3].map(|i| {
238        let (x, y) = coords
239            .get(subs[i].0 as usize)
240            .copied()
241            .unwrap_or(*center_pos);
242        P3 { x, y, z: z_for[i] }
243    });
244
245    let ranks = rank4(mol, center, &subs)?;
246
247    // Sort substituents highest-priority first.
248    let mut order: [usize; 4] = [0, 1, 2, 3];
249    order.sort_by(|&i, &j| ranks[j].cmp(&ranks[i]));
250
251    let s: [P3; 4] = order.map(|i| pts[i]);
252    let v = signed_volume(s[0], s[1], s[2], s[3]);
253
254    if v.abs() < 1e-6 {
255        return None; // coplanar / degenerate
256    }
257
258    Some(if v > 0.0 { CipCode::S } else { CipCode::R })
259}
260
261/// Determine z-offset for `neighbor` as seen from `center`:
262/// Up (wedge toward viewer) → +1.0
263/// Down (dash away from viewer) → -1.0
264/// Anything else → 0.0
265fn wedge_z(mol: &Molecule, center: AtomIdx, neighbor: AtomIdx) -> f64 {
266    // Check bond center→neighbor
267    if let Some((_, bond)) = mol.bond_between(center, neighbor) {
268        match bond.order {
269            BondOrder::Up => {
270                // If bond.atom1 == center, neighbor is in front (+z).
271                // If bond.atom1 == neighbor (bond drawn away), neighbor is behind (−z).
272                // Standard convention: wedge tip at atom1, base at atom2 → atom2 is in front.
273                if bond.atom1 == center {
274                    return 1.0;
275                } else {
276                    return -1.0;
277                }
278            }
279            BondOrder::Down => {
280                if bond.atom1 == center {
281                    return -1.0;
282                } else {
283                    return 1.0;
284                }
285            }
286            _ => {}
287        }
288    }
289    0.0
290}
291
292// Note: 1-sphere CIP helpers removed in v0.1.93; now using cip_priority module
293
294fn rank4(mol: &Molecule, center: AtomIdx, subs: &[AtomIdx; 4]) -> Option<[u8; 4]> {
295    let mut order: [usize; 4] = [0, 1, 2, 3];
296    order.sort_by(|&i, &j| cip_priority::compare_branches(mol, center, subs[i], subs[j]).reverse());
297    for k in 0..3 {
298        if cip_priority::compare_branches(mol, center, subs[order[k]], subs[order[k + 1]])
299            == Ordering::Equal
300        {
301            return None;
302        }
303    }
304    let mut ranks = [0u8; 4];
305    for (rank_from_top, &idx) in order.iter().enumerate() {
306        ranks[idx] = (4 - rank_from_top) as u8;
307    }
308    Some(ranks)
309}
310
311// ---------------------------------------------------------------------------
312// Signed volume
313// ---------------------------------------------------------------------------
314
315fn signed_volume(p1: P3, p2: P3, p3: P3, p4: P3) -> f64 {
316    let (ax, ay, az) = (p1.x - p4.x, p1.y - p4.y, p1.z - p4.z);
317    let (bx, by, bz) = (p2.x - p4.x, p2.y - p4.y, p2.z - p4.z);
318    let (cx, cy, cz) = (p3.x - p4.x, p3.y - p4.y, p3.z - p4.z);
319    ax * (by * cz - bz * cy) - ay * (bx * cz - bz * cx) + az * (bx * cy - by * cx)
320}
321
322// ---------------------------------------------------------------------------
323// Tests
324// ---------------------------------------------------------------------------
325
326#[cfg(test)]
327mod tests {
328    use super::*;
329    use chematic_smiles::parse;
330
331    // -----------------------------------------------------------------------
332    // E/Z tests
333    // -----------------------------------------------------------------------
334
335    #[test]
336    fn test_ez_terminal_alkene_no_assignment() {
337        // CH2=CH2 — terminal, no E/Z possible
338        let mol = parse("C=C").unwrap();
339        let coords = vec![(0.0, 0.0), (1.5, 0.0)];
340        // No E/Z should be assigned for a terminal alkene
341        let bond_idx = mol
342            .bonds()
343            .find(|(_, b)| b.order == BondOrder::Double)
344            .map(|(i, _)| i);
345        assert!(bond_idx.is_some());
346        let result = cip_ez_descriptor(&mol, bond_idx.unwrap(), &coords);
347        assert!(result.is_none(), "terminal alkene should have no E/Z");
348    }
349
350    #[test]
351    fn test_ez_but2ene_z() {
352        // (Z)-but-2-ene: CH3\C=C/CH3
353        // Atoms: C0(Me)-C1=C2-C3(Me)
354        // Layout: C1 and C2 horizontal; C0 above-left, C3 above-right → same side → Z
355        //
356        //   C0          C3
357        //     \        /
358        //      C1 = C2
359        //
360        let mol = parse("CC=CC").unwrap();
361        // Double bond is between atoms 1 and 2 (0-indexed SMILES order).
362        let bond_idx = mol
363            .bonds()
364            .find(|(_, b)| b.order == BondOrder::Double)
365            .map(|(i, _)| i)
366            .unwrap();
367        let coords = vec![
368            (-0.866, 0.5), // C0 (methyl at a1 end, above)
369            (0.0, 0.0),    // C1 (left alkene carbon)
370            (1.5, 0.0),    // C2 (right alkene carbon)
371            (2.366, 0.5),  // C3 (methyl at a2 end, above — same side as C0)
372        ];
373        let result = cip_ez_descriptor(&mol, bond_idx, &coords);
374        assert_eq!(result, Some(CipCode::Z), "(Z)-but-2-ene should be Z");
375    }
376
377    #[test]
378    fn test_ez_but2ene_e() {
379        // (E)-but-2-ene: CH3/C=C/CH3
380        // C0 above-left, C3 below-right → opposite sides → E
381        //
382        //   C0
383        //     \
384        //      C1 = C2
385        //              \
386        //               C3
387        //
388        let mol = parse("CC=CC").unwrap();
389        let bond_idx = mol
390            .bonds()
391            .find(|(_, b)| b.order == BondOrder::Double)
392            .map(|(i, _)| i)
393            .unwrap();
394        let coords = vec![
395            (-0.866, 0.5), // C0 above
396            (0.0, 0.0),    // C1
397            (1.5, 0.0),    // C2
398            (2.366, -0.5), // C3 below → opposite side → E
399        ];
400        let result = cip_ez_descriptor(&mol, bond_idx, &coords);
401        assert_eq!(result, Some(CipCode::E), "(E)-but-2-ene should be E");
402    }
403
404    #[test]
405    fn test_assign_ez_from_2d_sets_cip_code() {
406        let mut mol = parse("CC=CC").unwrap();
407        let coords = vec![
408            (-0.866, 0.5),
409            (0.0, 0.0),
410            (1.5, 0.0),
411            (2.366, 0.5), // same side → Z
412        ];
413        assign_ez_from_2d(&mut mol, &coords);
414        // At least one atom should have a Z cip_code after assignment.
415        let has_z = mol
416            .atoms()
417            .any(|(_, atom)| atom.cip_code == Some(CipCode::Z));
418        assert!(has_z, "assign_ez_from_2d should set Z on but-2-ene");
419    }
420
421    // -----------------------------------------------------------------------
422    // R/S tests
423    // -----------------------------------------------------------------------
424
425    #[test]
426    fn test_no_stereo_ethane() {
427        let mol = parse("CC").unwrap();
428        let coords = vec![(0.0, 0.0), (1.0, 0.0)];
429        let result = assign_stereo_from_2d(&mol, &coords);
430        assert!(result.assignments.is_empty());
431    }
432
433    #[test]
434    fn test_r_s_bromochlorofluoromethane() {
435        // CHFClBr — 4 heavy-atom neighbors, wedge C-Br above plane
436        // Build: C bonded to F, Cl, Br, H via explicit H
437        // We won't test R vs S deterministically (depends on atom ordering),
438        // just that an assignment is returned when a wedge bond is present.
439        use chematic_core::{Atom, BondOrder as BO, Element, MoleculeBuilder};
440        let mut b = MoleculeBuilder::new();
441        let c = b.add_atom(Atom::new(Element::C));
442        let f = b.add_atom(Atom::new(Element::F));
443        let cl = b.add_atom(Atom::new(Element::CL));
444        let br = b.add_atom(Atom::new(Element::BR));
445        let h = b.add_atom(Atom::new(Element::H));
446        b.add_bond(c, f, BO::Single).unwrap();
447        b.add_bond(c, cl, BO::Single).unwrap();
448        b.add_bond(c, br, BO::Up).unwrap(); // Br is in front of plane
449        b.add_bond(c, h, BO::Down).unwrap(); // H is behind plane
450        let mol = b.build();
451        // Use non-degenerate 2D positions to avoid a zero-volume determinant.
452        let coords = vec![
453            (0.0, 0.0),   // C
454            (-1.0, -0.5), // F
455            (1.0, -0.5),  // Cl
456            (0.0, 1.0),   // Br  (z = +1 from Up bond)
457            (0.0, -1.0),  // H   (z = -1 from Down bond)
458        ];
459        let result = assign_stereo_from_2d(&mol, &coords);
460        // Should assign one chiral center.
461        assert_eq!(result.assignments.len(), 1);
462        assert_eq!(result.assignments[0].0, c);
463    }
464}