Skip to main content

chematic_perception/
stereo2d.rs

1//! v0.1.93: Stereochemistry assignment from 2D coordinates with full CIP prioritization.
2//!
3//! [`assign_stereo_from_2d`] infers R/S tetrahedral stereo descriptors by
4//! combining the 2D layout positions of substituents with the wedge-bond
5//! notation (`BondOrder::Up` / `BondOrder::Down`) already encoded in the
6//! molecule.
7//!
8//! **Convention:**
9//! - `BondOrder::Up` (solid wedge): the *far* atom (`bond.atom2`) is in
10//!   front of the plane (toward the viewer, z > 0).
11//! - `BondOrder::Down` (dashed wedge): the far atom is behind the plane (z < 0).
12//! - All other bonds: both endpoints are coplanar (z = 0).
13//!
14//! Priority is determined using full multi-sphere BFS CIP rules (v0.1.93+),
15//! superseding the simplified 1-sphere approach from v0.1.92.
16
17use std::cmp::Ordering;
18
19use chematic_core::{AtomIdx, BondIdx, BondOrder, CipCode, Molecule};
20use crate::cip_priority;
21
22// ---------------------------------------------------------------------------
23// Public types
24// ---------------------------------------------------------------------------
25
26/// Result of a 2D-based stereochemistry assignment.
27#[derive(Debug, Default, Clone)]
28pub struct StereoAssignment2D {
29    pub assignments: Vec<(AtomIdx, CipCode)>,
30}
31
32impl StereoAssignment2D {
33    /// Look up the CIP code for atom `idx`.
34    pub fn get(&self, idx: AtomIdx) -> Option<CipCode> {
35        self.assignments
36            .iter()
37            .find(|(i, _)| *i == idx)
38            .map(|(_, c)| *c)
39    }
40}
41
42// ---------------------------------------------------------------------------
43// Public entry points
44// ---------------------------------------------------------------------------
45
46/// Assign R/S stereo to atoms that carry wedge bonds, given the 2D layout.
47///
48/// `coords[i]` is the `(x, y)` position of atom `i` in any consistent unit
49/// (screen pixels, Å, etc.).  The z-component is derived from wedge bonds.
50///
51/// Returns a [`StereoAssignment2D`] containing one entry per successfully
52/// assigned chiral center.  Atoms where the chirality is ambiguous (tied
53/// priorities or too few heavy-atom neighbors) are omitted.
54pub fn assign_stereo_from_2d(mol: &Molecule, coords: &[(f64, f64)]) -> StereoAssignment2D {
55    let mut assignments = Vec::new();
56    for (idx, _) in mol.atoms() {
57        if let Some(code) = assign_rs(mol, coords, idx) {
58            assignments.push((idx, code));
59        }
60    }
61    StereoAssignment2D { assignments }
62}
63
64/// Assign E/Z stereo to double bonds using 2D atom coordinates.
65///
66/// Unlike the CIP-based approach (which reads `BondOrder::Up/Down` hints near
67/// sp2 carbons), this function derives E/Z purely from the 2D positions of
68/// atoms: it projects each substituent onto the perpendicular of the
69/// double-bond axis and compares which side each highest-priority substituent
70/// falls on.
71///
72/// Sets `cip_code` (E or Z) on atom1 of each resolved double bond in `mol`.
73/// Double bonds that are terminal (no substituent at one end) or have tied
74/// substituent priorities are skipped.
75pub fn assign_ez_from_2d(mol: &mut Molecule, coords: &[(f64, f64)]) {
76    let bond_indices: Vec<BondIdx> = mol
77        .bonds()
78        .filter(|(_, b)| b.order == BondOrder::Double)
79        .map(|(bidx, _)| bidx)
80        .collect();
81    for bidx in bond_indices {
82        if let Some((atom_idx, code)) = ez_from_coords(mol, bidx, coords) {
83            mol.set_cip_code(atom_idx, Some(code));
84        }
85    }
86}
87
88/// Return the E/Z descriptor for the double bond at `bond_idx` from 2D coordinates.
89///
90/// Returns `None` when the bond is not a double bond, is a terminal alkene,
91/// has an ambiguous geometry (substituent on the bond axis), or has tied
92/// CIP priorities.
93pub fn cip_ez_descriptor(
94    mol: &Molecule,
95    bond_idx: BondIdx,
96    coords: &[(f64, f64)],
97) -> Option<CipCode> {
98    ez_from_coords(mol, bond_idx, coords).map(|(_, code)| code)
99}
100
101/// Like [`assign_stereo_from_2d`] but writes `cip_code` directly onto each
102/// chiral atom in `mol`.
103pub fn apply_stereo_from_2d(mol: &mut Molecule, coords: &[(f64, f64)]) {
104    let result = assign_stereo_from_2d(mol, coords);
105    for (idx, code) in result.assignments {
106        // atom_mut requires access — we use set_cip_code helper if it exists,
107        // or rewrite the atom via add/remove.
108        // Since Molecule.atoms is private, use the new mutable set_* API:
109        // (molecule.rs exposes atoms[idx].cip_code via a dedicated setter)
110        mol.set_cip_code(idx, Some(code));
111    }
112}
113
114// ---------------------------------------------------------------------------
115// E/Z core logic
116// ---------------------------------------------------------------------------
117
118/// Compute E/Z for a single double bond from 2D coordinates.
119///
120/// Returns `Some((atom1_of_bond, E | Z))` or `None` if unresolvable.
121fn ez_from_coords(
122    mol: &Molecule,
123    bond_idx: BondIdx,
124    coords: &[(f64, f64)],
125) -> Option<(AtomIdx, CipCode)> {
126    let bond = mol.bond(bond_idx);
127    if bond.order != BondOrder::Double {
128        return None;
129    }
130
131    let a1 = bond.atom1;
132    let a2 = bond.atom2;
133
134    let (x1, y1) = coords.get(a1.0 as usize).copied()?;
135    let (x2, y2) = coords.get(a2.0 as usize).copied()?;
136
137    // Non-double-bond substituents at each alkene end.
138    let subs_a1: Vec<AtomIdx> = mol
139        .neighbors(a1)
140        .filter(|&(nb, bidx)| nb != a2 && mol.bond(bidx).order != BondOrder::Double)
141        .map(|(nb, _)| nb)
142        .collect();
143
144    let subs_a2: Vec<AtomIdx> = mol
145        .neighbors(a2)
146        .filter(|&(nb, bidx)| nb != a1 && mol.bond(bidx).order != BondOrder::Double)
147        .map(|(nb, _)| nb)
148        .collect();
149
150    if subs_a1.is_empty() || subs_a2.is_empty() {
151        return None; // terminal alkene — no geometric isomerism
152    }
153
154    // Highest-priority substituent (simplified 1-sphere CIP) at each end.
155    let ha1 = highest_ez_priority(mol, a1, &subs_a1)?;
156    let ha2 = highest_ez_priority(mol, a2, &subs_a2)?;
157
158    let (hx1, hy1) = coords.get(ha1.0 as usize).copied()?;
159    let (hx2, hy2) = coords.get(ha2.0 as usize).copied()?;
160
161    // Double-bond direction vector (a1 → a2).
162    let vx = x2 - x1;
163    let vy = y2 - y1;
164
165    // Signed 2D cross product: cross(v, u) = vx*uy - vy*ux.
166    // The sign tells which side of the bond axis each substituent lies on.
167    let side_a1 = cross2d(vx, vy, hx1 - x1, hy1 - y1);
168    let side_a2 = cross2d(vx, vy, hx2 - x1, hy2 - y1);
169
170    if side_a1.abs() < 1e-6 || side_a2.abs() < 1e-6 {
171        return None; // substituent falls on the bond axis — geometry ambiguous
172    }
173
174    // Same side → Z (zusammen); opposite sides → E (entgegen).
175    let code = if side_a1.signum() == side_a2.signum() {
176        CipCode::Z
177    } else {
178        CipCode::E
179    };
180    Some((a1, code))
181}
182
183/// Return the highest-priority substituent from `subs` at `center`.
184///
185/// Returns `None` when the top two substituents have equal priority (tied →
186/// E/Z is unspecified) or `subs` is empty.
187fn highest_ez_priority(mol: &Molecule, center: AtomIdx, subs: &[AtomIdx]) -> Option<AtomIdx> {
188    if subs.is_empty() {
189        return None;
190    }
191    if subs.len() == 1 {
192        return Some(subs[0]);
193    }
194    let mut sorted = subs.to_vec();
195    sorted.sort_by(|&a, &b| {
196        cip_priority::compare_branches(mol, center, a, b).reverse()
197    });
198    if cip_priority::compare_branches(mol, center, sorted[0], sorted[1]) == Ordering::Equal {
199        return None; // tied top priorities → E/Z not determinable
200    }
201    Some(sorted[0])
202}
203
204/// 2D cross product scalar: vx*uy - vy*ux.
205fn cross2d(vx: f64, vy: f64, ux: f64, uy: f64) -> f64 {
206    vx * uy - vy * ux
207}
208
209// ---------------------------------------------------------------------------
210// Core R/S logic
211// ---------------------------------------------------------------------------
212
213/// 3D point (x, y, z).
214#[derive(Clone, Copy)]
215struct P3 {
216    x: f64,
217    y: f64,
218    z: f64,
219}
220
221fn assign_rs(mol: &Molecule, coords: &[(f64, f64)], center: AtomIdx) -> Option<CipCode> {
222    let center_pos = coords.get(center.0 as usize)?;
223
224    // Collect heavy-atom neighbors with their 3D positions.
225    let nbs: Vec<AtomIdx> = mol.neighbors(center).map(|(nb, _)| nb).collect();
226    if nbs.len() != 4 {
227        return None; // need exactly 4 heavy-atom neighbors for tetrahedral
228    }
229    let subs: [AtomIdx; 4] = [nbs[0], nbs[1], nbs[2], nbs[3]];
230
231    // Build 3D positions: z from wedge bonds.
232    let z_for: [f64; 4] = [
233        wedge_z(mol, center, subs[0]),
234        wedge_z(mol, center, subs[1]),
235        wedge_z(mol, center, subs[2]),
236        wedge_z(mol, center, subs[3]),
237    ];
238
239    let pts: [P3; 4] = [0, 1, 2, 3].map(|i| {
240        let (x, y) = coords
241            .get(subs[i].0 as usize)
242            .copied()
243            .unwrap_or(*center_pos);
244        P3 { x, y, z: z_for[i] }
245    });
246
247    let ranks = rank4(mol, center, &subs)?;
248
249    // Sort substituents highest-priority first.
250    let mut order: [usize; 4] = [0, 1, 2, 3];
251    order.sort_by(|&i, &j| ranks[j].cmp(&ranks[i]));
252
253    let s: [P3; 4] = order.map(|i| pts[i]);
254    let v = signed_volume(s[0], s[1], s[2], s[3]);
255
256    if v.abs() < 1e-6 {
257        return None; // coplanar / degenerate
258    }
259
260    Some(if v > 0.0 { CipCode::S } else { CipCode::R })
261}
262
263/// Determine z-offset for `neighbor` as seen from `center`:
264/// Up (wedge toward viewer) → +1.0
265/// Down (dash away from viewer) → -1.0
266/// Anything else → 0.0
267fn wedge_z(mol: &Molecule, center: AtomIdx, neighbor: AtomIdx) -> f64 {
268    // Check bond center→neighbor
269    if let Some((_, bond)) = mol.bond_between(center, neighbor) {
270        match bond.order {
271            BondOrder::Up => {
272                // If bond.atom1 == center, neighbor is in front (+z).
273                // If bond.atom1 == neighbor (bond drawn away), neighbor is behind (−z).
274                // Standard convention: wedge tip at atom1, base at atom2 → atom2 is in front.
275                if bond.atom1 == center {
276                    return 1.0;
277                } else {
278                    return -1.0;
279                }
280            }
281            BondOrder::Down => {
282                if bond.atom1 == center {
283                    return -1.0;
284                } else {
285                    return 1.0;
286                }
287            }
288            _ => {}
289        }
290    }
291    0.0
292}
293
294// Note: 1-sphere CIP helpers removed in v0.1.93; now using cip_priority module
295
296fn rank4(mol: &Molecule, center: AtomIdx, subs: &[AtomIdx; 4]) -> Option<[u8; 4]> {
297    let mut order: [usize; 4] = [0, 1, 2, 3];
298    order.sort_by(|&i, &j| {
299        cip_priority::compare_branches(mol, center, subs[i], subs[j]).reverse()
300    });
301    for k in 0..3 {
302        if cip_priority::compare_branches(mol, center, subs[order[k]], subs[order[k + 1]])
303            == Ordering::Equal
304        {
305            return None;
306        }
307    }
308    let mut ranks = [0u8; 4];
309    for (rank_from_top, &idx) in order.iter().enumerate() {
310        ranks[idx] = (4 - rank_from_top) as u8;
311    }
312    Some(ranks)
313}
314
315// ---------------------------------------------------------------------------
316// Signed volume
317// ---------------------------------------------------------------------------
318
319fn signed_volume(p1: P3, p2: P3, p3: P3, p4: P3) -> f64 {
320    let (ax, ay, az) = (p1.x - p4.x, p1.y - p4.y, p1.z - p4.z);
321    let (bx, by, bz) = (p2.x - p4.x, p2.y - p4.y, p2.z - p4.z);
322    let (cx, cy, cz) = (p3.x - p4.x, p3.y - p4.y, p3.z - p4.z);
323    ax * (by * cz - bz * cy) - ay * (bx * cz - bz * cx) + az * (bx * cy - by * cx)
324}
325
326// ---------------------------------------------------------------------------
327// Tests
328// ---------------------------------------------------------------------------
329
330#[cfg(test)]
331mod tests {
332    use super::*;
333    use chematic_smiles::parse;
334
335    // -----------------------------------------------------------------------
336    // E/Z tests
337    // -----------------------------------------------------------------------
338
339    #[test]
340    fn test_ez_terminal_alkene_no_assignment() {
341        // CH2=CH2 — terminal, no E/Z possible
342        let mol = parse("C=C").unwrap();
343        let coords = vec![(0.0, 0.0), (1.5, 0.0)];
344        // No E/Z should be assigned for a terminal alkene
345        let bond_idx = mol
346            .bonds()
347            .find(|(_, b)| b.order == BondOrder::Double)
348            .map(|(i, _)| i);
349        assert!(bond_idx.is_some());
350        let result = cip_ez_descriptor(&mol, bond_idx.unwrap(), &coords);
351        assert!(result.is_none(), "terminal alkene should have no E/Z");
352    }
353
354    #[test]
355    fn test_ez_but2ene_z() {
356        // (Z)-but-2-ene: CH3\C=C/CH3
357        // Atoms: C0(Me)-C1=C2-C3(Me)
358        // Layout: C1 and C2 horizontal; C0 above-left, C3 above-right → same side → Z
359        //
360        //   C0          C3
361        //     \        /
362        //      C1 = C2
363        //
364        let mol = parse("CC=CC").unwrap();
365        // Double bond is between atoms 1 and 2 (0-indexed SMILES order).
366        let bond_idx = mol
367            .bonds()
368            .find(|(_, b)| b.order == BondOrder::Double)
369            .map(|(i, _)| i)
370            .unwrap();
371        let coords = vec![
372            (-0.866, 0.5), // C0 (methyl at a1 end, above)
373            (0.0, 0.0),    // C1 (left alkene carbon)
374            (1.5, 0.0),    // C2 (right alkene carbon)
375            (2.366, 0.5),  // C3 (methyl at a2 end, above — same side as C0)
376        ];
377        let result = cip_ez_descriptor(&mol, bond_idx, &coords);
378        assert_eq!(result, Some(CipCode::Z), "(Z)-but-2-ene should be Z");
379    }
380
381    #[test]
382    fn test_ez_but2ene_e() {
383        // (E)-but-2-ene: CH3/C=C/CH3
384        // C0 above-left, C3 below-right → opposite sides → E
385        //
386        //   C0
387        //     \
388        //      C1 = C2
389        //              \
390        //               C3
391        //
392        let mol = parse("CC=CC").unwrap();
393        let bond_idx = mol
394            .bonds()
395            .find(|(_, b)| b.order == BondOrder::Double)
396            .map(|(i, _)| i)
397            .unwrap();
398        let coords = vec![
399            (-0.866, 0.5), // C0 above
400            (0.0, 0.0),    // C1
401            (1.5, 0.0),    // C2
402            (2.366, -0.5), // C3 below → opposite side → E
403        ];
404        let result = cip_ez_descriptor(&mol, bond_idx, &coords);
405        assert_eq!(result, Some(CipCode::E), "(E)-but-2-ene should be E");
406    }
407
408    #[test]
409    fn test_assign_ez_from_2d_sets_cip_code() {
410        let mut mol = parse("CC=CC").unwrap();
411        let coords = vec![
412            (-0.866, 0.5),
413            (0.0, 0.0),
414            (1.5, 0.0),
415            (2.366, 0.5), // same side → Z
416        ];
417        assign_ez_from_2d(&mut mol, &coords);
418        // At least one atom should have a Z cip_code after assignment.
419        let has_z = mol
420            .atoms()
421            .any(|(_, atom)| atom.cip_code == Some(CipCode::Z));
422        assert!(has_z, "assign_ez_from_2d should set Z on but-2-ene");
423    }
424
425    // -----------------------------------------------------------------------
426    // R/S tests
427    // -----------------------------------------------------------------------
428
429    #[test]
430    fn test_no_stereo_ethane() {
431        let mol = parse("CC").unwrap();
432        let coords = vec![(0.0, 0.0), (1.0, 0.0)];
433        let result = assign_stereo_from_2d(&mol, &coords);
434        assert!(result.assignments.is_empty());
435    }
436
437    #[test]
438    fn test_r_s_bromochlorofluoromethane() {
439        // CHFClBr — 4 heavy-atom neighbors, wedge C-Br above plane
440        // Build: C bonded to F, Cl, Br, H via explicit H
441        // We won't test R vs S deterministically (depends on atom ordering),
442        // just that an assignment is returned when a wedge bond is present.
443        use chematic_core::{Atom, BondOrder as BO, Element, MoleculeBuilder};
444        let mut b = MoleculeBuilder::new();
445        let c = b.add_atom(Atom::new(Element::C));
446        let f = b.add_atom(Atom::new(Element::F));
447        let cl = b.add_atom(Atom::new(Element::CL));
448        let br = b.add_atom(Atom::new(Element::BR));
449        let h = b.add_atom(Atom::new(Element::H));
450        b.add_bond(c, f, BO::Single).unwrap();
451        b.add_bond(c, cl, BO::Single).unwrap();
452        b.add_bond(c, br, BO::Up).unwrap(); // Br is in front of plane
453        b.add_bond(c, h, BO::Down).unwrap(); // H is behind plane
454        let mol = b.build();
455        // Use non-degenerate 2D positions to avoid a zero-volume determinant.
456        let coords = vec![
457            (0.0, 0.0),   // C
458            (-1.0, -0.5), // F
459            (1.0, -0.5),  // Cl
460            (0.0, 1.0),   // Br  (z = +1 from Up bond)
461            (0.0, -1.0),  // H   (z = -1 from Down bond)
462        ];
463        let result = assign_stereo_from_2d(&mol, &coords);
464        // Should assign one chiral center.
465        assert_eq!(result.assignments.len(), 1);
466        assert_eq!(result.assignments[0].0, c);
467    }
468}