chematic_perception/stereo2d.rs
1//! v0.1.93: Stereochemistry assignment from 2D coordinates with full CIP prioritization.
2//!
3//! [`assign_stereo_from_2d`] infers R/S tetrahedral stereo descriptors by
4//! combining the 2D layout positions of substituents with the wedge-bond
5//! notation (`BondOrder::Up` / `BondOrder::Down`) already encoded in the
6//! molecule.
7//!
8//! **Convention:**
9//! - `BondOrder::Up` (solid wedge): the *far* atom (`bond.atom2`) is in
10//! front of the plane (toward the viewer, z > 0).
11//! - `BondOrder::Down` (dashed wedge): the far atom is behind the plane (z < 0).
12//! - All other bonds: both endpoints are coplanar (z = 0).
13//!
14//! Priority is determined using full multi-sphere BFS CIP rules (v0.1.93+),
15//! superseding the simplified 1-sphere approach from v0.1.92.
16
17use std::cmp::Ordering;
18
19use crate::cip_priority;
20use chematic_core::{AtomIdx, BondIdx, BondOrder, CipCode, Molecule};
21
22// ---------------------------------------------------------------------------
23// Public types
24// ---------------------------------------------------------------------------
25
26/// Result of a 2D-based stereochemistry assignment.
27#[derive(Debug, Default, Clone)]
28pub struct StereoAssignment2D {
29 pub assignments: Vec<(AtomIdx, CipCode)>,
30}
31
32impl StereoAssignment2D {
33 /// Look up the CIP code for atom `idx`.
34 pub fn get(&self, idx: AtomIdx) -> Option<CipCode> {
35 self.assignments
36 .iter()
37 .find(|(i, _)| *i == idx)
38 .map(|(_, c)| *c)
39 }
40}
41
42// ---------------------------------------------------------------------------
43// Public entry points
44// ---------------------------------------------------------------------------
45
46/// Assign R/S stereo to atoms that carry wedge bonds, given the 2D layout.
47///
48/// `coords[i]` is the `(x, y)` position of atom `i` in any consistent unit
49/// (screen pixels, Å, etc.). The z-component is derived from wedge bonds.
50///
51/// Returns a [`StereoAssignment2D`] containing one entry per successfully
52/// assigned chiral center. Atoms where the chirality is ambiguous (tied
53/// priorities or too few heavy-atom neighbors) are omitted.
54pub fn assign_stereo_from_2d(mol: &Molecule, coords: &[(f64, f64)]) -> StereoAssignment2D {
55 let mut assignments = Vec::new();
56 for (idx, _) in mol.atoms() {
57 if let Some(code) = assign_rs(mol, coords, idx) {
58 assignments.push((idx, code));
59 }
60 }
61 StereoAssignment2D { assignments }
62}
63
64/// Assign E/Z stereo to double bonds using 2D atom coordinates.
65///
66/// Unlike the CIP-based approach (which reads `BondOrder::Up/Down` hints near
67/// sp2 carbons), this function derives E/Z purely from the 2D positions of
68/// atoms: it projects each substituent onto the perpendicular of the
69/// double-bond axis and compares which side each highest-priority substituent
70/// falls on.
71///
72/// Sets `cip_code` (E or Z) on atom1 of each resolved double bond in `mol`.
73/// Double bonds that are terminal (no substituent at one end) or have tied
74/// substituent priorities are skipped.
75pub fn assign_ez_from_2d(mol: &mut Molecule, coords: &[(f64, f64)]) {
76 let bond_indices: Vec<BondIdx> = mol
77 .bonds()
78 .filter(|(_, b)| b.order == BondOrder::Double)
79 .map(|(bidx, _)| bidx)
80 .collect();
81 for bidx in bond_indices {
82 if let Some((atom_idx, code)) = ez_from_coords(mol, bidx, coords) {
83 mol.set_cip_code(atom_idx, Some(code));
84 }
85 }
86}
87
88/// Return the E/Z descriptor for the double bond at `bond_idx` from 2D coordinates.
89///
90/// Returns `None` when the bond is not a double bond, is a terminal alkene,
91/// has an ambiguous geometry (substituent on the bond axis), or has tied
92/// CIP priorities.
93pub fn cip_ez_descriptor(
94 mol: &Molecule,
95 bond_idx: BondIdx,
96 coords: &[(f64, f64)],
97) -> Option<CipCode> {
98 ez_from_coords(mol, bond_idx, coords).map(|(_, code)| code)
99}
100
101/// Like [`assign_stereo_from_2d`] but writes `cip_code` directly onto each
102/// chiral atom in `mol`.
103pub fn apply_stereo_from_2d(mol: &mut Molecule, coords: &[(f64, f64)]) {
104 let result = assign_stereo_from_2d(mol, coords);
105 for (idx, code) in result.assignments {
106 // atom_mut requires access — we use set_cip_code helper if it exists,
107 // or rewrite the atom via add/remove.
108 // Since Molecule.atoms is private, use the new mutable set_* API:
109 // (molecule.rs exposes atoms[idx].cip_code via a dedicated setter)
110 mol.set_cip_code(idx, Some(code));
111 }
112}
113
114// ---------------------------------------------------------------------------
115// E/Z core logic
116// ---------------------------------------------------------------------------
117
118/// Compute E/Z for a single double bond from 2D coordinates.
119///
120/// Returns `Some((atom1_of_bond, E | Z))` or `None` if unresolvable.
121fn ez_from_coords(
122 mol: &Molecule,
123 bond_idx: BondIdx,
124 coords: &[(f64, f64)],
125) -> Option<(AtomIdx, CipCode)> {
126 let bond = mol.bond(bond_idx);
127 if bond.order != BondOrder::Double {
128 return None;
129 }
130
131 let a1 = bond.atom1;
132 let a2 = bond.atom2;
133
134 let (x1, y1) = coords.get(a1.0 as usize).copied()?;
135 let (x2, y2) = coords.get(a2.0 as usize).copied()?;
136
137 // Non-double-bond substituents at each alkene end.
138 let subs_a1: Vec<AtomIdx> = mol
139 .neighbors(a1)
140 .filter(|&(nb, bidx)| nb != a2 && mol.bond(bidx).order != BondOrder::Double)
141 .map(|(nb, _)| nb)
142 .collect();
143
144 let subs_a2: Vec<AtomIdx> = mol
145 .neighbors(a2)
146 .filter(|&(nb, bidx)| nb != a1 && mol.bond(bidx).order != BondOrder::Double)
147 .map(|(nb, _)| nb)
148 .collect();
149
150 if subs_a1.is_empty() || subs_a2.is_empty() {
151 return None; // terminal alkene — no geometric isomerism
152 }
153
154 // Highest-priority substituent (simplified 1-sphere CIP) at each end.
155 let ha1 = highest_ez_priority(mol, a1, &subs_a1)?;
156 let ha2 = highest_ez_priority(mol, a2, &subs_a2)?;
157
158 let (hx1, hy1) = coords.get(ha1.0 as usize).copied()?;
159 let (hx2, hy2) = coords.get(ha2.0 as usize).copied()?;
160
161 // Double-bond direction vector (a1 → a2).
162 let vx = x2 - x1;
163 let vy = y2 - y1;
164
165 // Signed 2D cross product: cross(v, u) = vx*uy - vy*ux.
166 // The sign tells which side of the bond axis each substituent lies on.
167 let side_a1 = cross2d(vx, vy, hx1 - x1, hy1 - y1);
168 let side_a2 = cross2d(vx, vy, hx2 - x1, hy2 - y1);
169
170 if side_a1.abs() < 1e-6 || side_a2.abs() < 1e-6 {
171 return None; // substituent falls on the bond axis — geometry ambiguous
172 }
173
174 // Same side → Z (zusammen); opposite sides → E (entgegen).
175 let code = if side_a1.signum() == side_a2.signum() {
176 CipCode::Z
177 } else {
178 CipCode::E
179 };
180 Some((a1, code))
181}
182
183/// Return the highest-priority substituent from `subs` at `center`.
184///
185/// Returns `None` when the top two substituents have equal priority (tied →
186/// E/Z is unspecified) or `subs` is empty.
187fn highest_ez_priority(mol: &Molecule, center: AtomIdx, subs: &[AtomIdx]) -> Option<AtomIdx> {
188 if subs.is_empty() {
189 return None;
190 }
191 if subs.len() == 1 {
192 return Some(subs[0]);
193 }
194 let mut sorted = subs.to_vec();
195 sorted.sort_by(|&a, &b| cip_priority::compare_branches(mol, center, a, b).reverse());
196 if cip_priority::compare_branches(mol, center, sorted[0], sorted[1]) == Ordering::Equal {
197 return None; // tied top priorities → E/Z not determinable
198 }
199 Some(sorted[0])
200}
201
202/// 2D cross product scalar: vx*uy - vy*ux.
203fn cross2d(vx: f64, vy: f64, ux: f64, uy: f64) -> f64 {
204 vx * uy - vy * ux
205}
206
207// ---------------------------------------------------------------------------
208// Core R/S logic
209// ---------------------------------------------------------------------------
210
211/// 3D point (x, y, z).
212#[derive(Clone, Copy)]
213struct P3 {
214 x: f64,
215 y: f64,
216 z: f64,
217}
218
219fn assign_rs(mol: &Molecule, coords: &[(f64, f64)], center: AtomIdx) -> Option<CipCode> {
220 let center_pos = coords.get(center.0 as usize)?;
221
222 // Collect heavy-atom neighbors with their 3D positions.
223 let nbs: Vec<AtomIdx> = mol.neighbors(center).map(|(nb, _)| nb).collect();
224 if nbs.len() != 4 {
225 return None; // need exactly 4 heavy-atom neighbors for tetrahedral
226 }
227 let subs: [AtomIdx; 4] = [nbs[0], nbs[1], nbs[2], nbs[3]];
228
229 // Build 3D positions: z from wedge bonds.
230 let z_for: [f64; 4] = [
231 wedge_z(mol, center, subs[0]),
232 wedge_z(mol, center, subs[1]),
233 wedge_z(mol, center, subs[2]),
234 wedge_z(mol, center, subs[3]),
235 ];
236
237 let pts: [P3; 4] = [0, 1, 2, 3].map(|i| {
238 let (x, y) = coords
239 .get(subs[i].0 as usize)
240 .copied()
241 .unwrap_or(*center_pos);
242 P3 { x, y, z: z_for[i] }
243 });
244
245 let ranks = rank4(mol, center, &subs)?;
246
247 // Sort substituents highest-priority first.
248 let mut order: [usize; 4] = [0, 1, 2, 3];
249 order.sort_by(|&i, &j| ranks[j].cmp(&ranks[i]));
250
251 let s: [P3; 4] = order.map(|i| pts[i]);
252 let v = signed_volume(s[0], s[1], s[2], s[3]);
253
254 if v.abs() < 1e-6 {
255 return None; // coplanar / degenerate
256 }
257
258 Some(if v > 0.0 { CipCode::S } else { CipCode::R })
259}
260
261/// Determine z-offset for `neighbor` as seen from `center`:
262/// Up (wedge toward viewer) → +1.0
263/// Down (dash away from viewer) → -1.0
264/// Anything else → 0.0
265fn wedge_z(mol: &Molecule, center: AtomIdx, neighbor: AtomIdx) -> f64 {
266 // Check bond center→neighbor
267 if let Some((_, bond)) = mol.bond_between(center, neighbor) {
268 match bond.order {
269 BondOrder::Up => {
270 // If bond.atom1 == center, neighbor is in front (+z).
271 // If bond.atom1 == neighbor (bond drawn away), neighbor is behind (−z).
272 // Standard convention: wedge tip at atom1, base at atom2 → atom2 is in front.
273 if bond.atom1 == center {
274 return 1.0;
275 } else {
276 return -1.0;
277 }
278 }
279 BondOrder::Down => {
280 if bond.atom1 == center {
281 return -1.0;
282 } else {
283 return 1.0;
284 }
285 }
286 _ => {}
287 }
288 }
289 0.0
290}
291
292// Note: 1-sphere CIP helpers removed in v0.1.93; now using cip_priority module
293
294fn rank4(mol: &Molecule, center: AtomIdx, subs: &[AtomIdx; 4]) -> Option<[u8; 4]> {
295 let mut order: [usize; 4] = [0, 1, 2, 3];
296 order.sort_by(|&i, &j| cip_priority::compare_branches(mol, center, subs[i], subs[j]).reverse());
297 for k in 0..3 {
298 if cip_priority::compare_branches(mol, center, subs[order[k]], subs[order[k + 1]])
299 == Ordering::Equal
300 {
301 return None;
302 }
303 }
304 let mut ranks = [0u8; 4];
305 for (rank_from_top, &idx) in order.iter().enumerate() {
306 ranks[idx] = (4 - rank_from_top) as u8;
307 }
308 Some(ranks)
309}
310
311// ---------------------------------------------------------------------------
312// Signed volume
313// ---------------------------------------------------------------------------
314
315fn signed_volume(p1: P3, p2: P3, p3: P3, p4: P3) -> f64 {
316 let (ax, ay, az) = (p1.x - p4.x, p1.y - p4.y, p1.z - p4.z);
317 let (bx, by, bz) = (p2.x - p4.x, p2.y - p4.y, p2.z - p4.z);
318 let (cx, cy, cz) = (p3.x - p4.x, p3.y - p4.y, p3.z - p4.z);
319 ax * (by * cz - bz * cy) - ay * (bx * cz - bz * cx) + az * (bx * cy - by * cx)
320}
321
322// ---------------------------------------------------------------------------
323// Tests
324// ---------------------------------------------------------------------------
325
326#[cfg(test)]
327mod tests {
328 use super::*;
329 use chematic_smiles::parse;
330
331 // -----------------------------------------------------------------------
332 // E/Z tests
333 // -----------------------------------------------------------------------
334
335 #[test]
336 fn test_ez_terminal_alkene_no_assignment() {
337 // CH2=CH2 — terminal, no E/Z possible
338 let mol = parse("C=C").unwrap();
339 let coords = vec![(0.0, 0.0), (1.5, 0.0)];
340 // No E/Z should be assigned for a terminal alkene
341 let bond_idx = mol
342 .bonds()
343 .find(|(_, b)| b.order == BondOrder::Double)
344 .map(|(i, _)| i);
345 assert!(bond_idx.is_some());
346 let result = cip_ez_descriptor(&mol, bond_idx.unwrap(), &coords);
347 assert!(result.is_none(), "terminal alkene should have no E/Z");
348 }
349
350 #[test]
351 fn test_ez_but2ene_z() {
352 // (Z)-but-2-ene: CH3\C=C/CH3
353 // Atoms: C0(Me)-C1=C2-C3(Me)
354 // Layout: C1 and C2 horizontal; C0 above-left, C3 above-right → same side → Z
355 //
356 // C0 C3
357 // \ /
358 // C1 = C2
359 //
360 let mol = parse("CC=CC").unwrap();
361 // Double bond is between atoms 1 and 2 (0-indexed SMILES order).
362 let bond_idx = mol
363 .bonds()
364 .find(|(_, b)| b.order == BondOrder::Double)
365 .map(|(i, _)| i)
366 .unwrap();
367 let coords = vec![
368 (-0.866, 0.5), // C0 (methyl at a1 end, above)
369 (0.0, 0.0), // C1 (left alkene carbon)
370 (1.5, 0.0), // C2 (right alkene carbon)
371 (2.366, 0.5), // C3 (methyl at a2 end, above — same side as C0)
372 ];
373 let result = cip_ez_descriptor(&mol, bond_idx, &coords);
374 assert_eq!(result, Some(CipCode::Z), "(Z)-but-2-ene should be Z");
375 }
376
377 #[test]
378 fn test_ez_but2ene_e() {
379 // (E)-but-2-ene: CH3/C=C/CH3
380 // C0 above-left, C3 below-right → opposite sides → E
381 //
382 // C0
383 // \
384 // C1 = C2
385 // \
386 // C3
387 //
388 let mol = parse("CC=CC").unwrap();
389 let bond_idx = mol
390 .bonds()
391 .find(|(_, b)| b.order == BondOrder::Double)
392 .map(|(i, _)| i)
393 .unwrap();
394 let coords = vec![
395 (-0.866, 0.5), // C0 above
396 (0.0, 0.0), // C1
397 (1.5, 0.0), // C2
398 (2.366, -0.5), // C3 below → opposite side → E
399 ];
400 let result = cip_ez_descriptor(&mol, bond_idx, &coords);
401 assert_eq!(result, Some(CipCode::E), "(E)-but-2-ene should be E");
402 }
403
404 #[test]
405 fn test_assign_ez_from_2d_sets_cip_code() {
406 let mut mol = parse("CC=CC").unwrap();
407 let coords = vec![
408 (-0.866, 0.5),
409 (0.0, 0.0),
410 (1.5, 0.0),
411 (2.366, 0.5), // same side → Z
412 ];
413 assign_ez_from_2d(&mut mol, &coords);
414 // At least one atom should have a Z cip_code after assignment.
415 let has_z = mol
416 .atoms()
417 .any(|(_, atom)| atom.cip_code == Some(CipCode::Z));
418 assert!(has_z, "assign_ez_from_2d should set Z on but-2-ene");
419 }
420
421 // -----------------------------------------------------------------------
422 // R/S tests
423 // -----------------------------------------------------------------------
424
425 #[test]
426 fn test_no_stereo_ethane() {
427 let mol = parse("CC").unwrap();
428 let coords = vec![(0.0, 0.0), (1.0, 0.0)];
429 let result = assign_stereo_from_2d(&mol, &coords);
430 assert!(result.assignments.is_empty());
431 }
432
433 #[test]
434 fn test_r_s_bromochlorofluoromethane() {
435 // CHFClBr — 4 heavy-atom neighbors, wedge C-Br above plane
436 // Build: C bonded to F, Cl, Br, H via explicit H
437 // We won't test R vs S deterministically (depends on atom ordering),
438 // just that an assignment is returned when a wedge bond is present.
439 use chematic_core::{Atom, BondOrder as BO, Element, MoleculeBuilder};
440 let mut b = MoleculeBuilder::new();
441 let c = b.add_atom(Atom::new(Element::C));
442 let f = b.add_atom(Atom::new(Element::F));
443 let cl = b.add_atom(Atom::new(Element::CL));
444 let br = b.add_atom(Atom::new(Element::BR));
445 let h = b.add_atom(Atom::new(Element::H));
446 b.add_bond(c, f, BO::Single).unwrap();
447 b.add_bond(c, cl, BO::Single).unwrap();
448 b.add_bond(c, br, BO::Up).unwrap(); // Br is in front of plane
449 b.add_bond(c, h, BO::Down).unwrap(); // H is behind plane
450 let mol = b.build();
451 // Use non-degenerate 2D positions to avoid a zero-volume determinant.
452 let coords = vec![
453 (0.0, 0.0), // C
454 (-1.0, -0.5), // F
455 (1.0, -0.5), // Cl
456 (0.0, 1.0), // Br (z = +1 from Up bond)
457 (0.0, -1.0), // H (z = -1 from Down bond)
458 ];
459 let result = assign_stereo_from_2d(&mol, &coords);
460 // Should assign one chiral center.
461 assert_eq!(result.assignments.len(), 1);
462 assert_eq!(result.assignments[0].0, c);
463 }
464}