chematic_perception/stereo2d.rs
1//! v0.1.93: Stereochemistry assignment from 2D coordinates with full CIP prioritization.
2//!
3//! [`assign_stereo_from_2d`] infers R/S tetrahedral stereo descriptors by
4//! combining the 2D layout positions of substituents with the wedge-bond
5//! notation (`BondOrder::Up` / `BondOrder::Down`) already encoded in the
6//! molecule.
7//!
8//! **Convention:**
9//! - `BondOrder::Up` (solid wedge): the *far* atom (`bond.atom2`) is in
10//! front of the plane (toward the viewer, z > 0).
11//! - `BondOrder::Down` (dashed wedge): the far atom is behind the plane (z < 0).
12//! - All other bonds: both endpoints are coplanar (z = 0).
13//!
14//! Priority is determined using full multi-sphere BFS CIP rules (v0.1.93+),
15//! superseding the simplified 1-sphere approach from v0.1.92.
16
17use std::cmp::Ordering;
18
19use chematic_core::{AtomIdx, BondIdx, BondOrder, CipCode, Molecule};
20use crate::cip_priority;
21
22// ---------------------------------------------------------------------------
23// Public types
24// ---------------------------------------------------------------------------
25
26/// Result of a 2D-based stereochemistry assignment.
27#[derive(Debug, Default, Clone)]
28pub struct StereoAssignment2D {
29 pub assignments: Vec<(AtomIdx, CipCode)>,
30}
31
32impl StereoAssignment2D {
33 /// Look up the CIP code for atom `idx`.
34 pub fn get(&self, idx: AtomIdx) -> Option<CipCode> {
35 self.assignments
36 .iter()
37 .find(|(i, _)| *i == idx)
38 .map(|(_, c)| *c)
39 }
40}
41
42// ---------------------------------------------------------------------------
43// Public entry points
44// ---------------------------------------------------------------------------
45
46/// Assign R/S stereo to atoms that carry wedge bonds, given the 2D layout.
47///
48/// `coords[i]` is the `(x, y)` position of atom `i` in any consistent unit
49/// (screen pixels, Å, etc.). The z-component is derived from wedge bonds.
50///
51/// Returns a [`StereoAssignment2D`] containing one entry per successfully
52/// assigned chiral center. Atoms where the chirality is ambiguous (tied
53/// priorities or too few heavy-atom neighbors) are omitted.
54pub fn assign_stereo_from_2d(mol: &Molecule, coords: &[(f64, f64)]) -> StereoAssignment2D {
55 let mut assignments = Vec::new();
56 for (idx, _) in mol.atoms() {
57 if let Some(code) = assign_rs(mol, coords, idx) {
58 assignments.push((idx, code));
59 }
60 }
61 StereoAssignment2D { assignments }
62}
63
64/// Assign E/Z stereo to double bonds using 2D atom coordinates.
65///
66/// Unlike the CIP-based approach (which reads `BondOrder::Up/Down` hints near
67/// sp2 carbons), this function derives E/Z purely from the 2D positions of
68/// atoms: it projects each substituent onto the perpendicular of the
69/// double-bond axis and compares which side each highest-priority substituent
70/// falls on.
71///
72/// Sets `cip_code` (E or Z) on atom1 of each resolved double bond in `mol`.
73/// Double bonds that are terminal (no substituent at one end) or have tied
74/// substituent priorities are skipped.
75pub fn assign_ez_from_2d(mol: &mut Molecule, coords: &[(f64, f64)]) {
76 let bond_indices: Vec<BondIdx> = mol
77 .bonds()
78 .filter(|(_, b)| b.order == BondOrder::Double)
79 .map(|(bidx, _)| bidx)
80 .collect();
81 for bidx in bond_indices {
82 if let Some((atom_idx, code)) = ez_from_coords(mol, bidx, coords) {
83 mol.set_cip_code(atom_idx, Some(code));
84 }
85 }
86}
87
88/// Return the E/Z descriptor for the double bond at `bond_idx` from 2D coordinates.
89///
90/// Returns `None` when the bond is not a double bond, is a terminal alkene,
91/// has an ambiguous geometry (substituent on the bond axis), or has tied
92/// CIP priorities.
93pub fn cip_ez_descriptor(
94 mol: &Molecule,
95 bond_idx: BondIdx,
96 coords: &[(f64, f64)],
97) -> Option<CipCode> {
98 ez_from_coords(mol, bond_idx, coords).map(|(_, code)| code)
99}
100
101/// Like [`assign_stereo_from_2d`] but writes `cip_code` directly onto each
102/// chiral atom in `mol`.
103pub fn apply_stereo_from_2d(mol: &mut Molecule, coords: &[(f64, f64)]) {
104 let result = assign_stereo_from_2d(mol, coords);
105 for (idx, code) in result.assignments {
106 // atom_mut requires access — we use set_cip_code helper if it exists,
107 // or rewrite the atom via add/remove.
108 // Since Molecule.atoms is private, use the new mutable set_* API:
109 // (molecule.rs exposes atoms[idx].cip_code via a dedicated setter)
110 mol.set_cip_code(idx, Some(code));
111 }
112}
113
114// ---------------------------------------------------------------------------
115// E/Z core logic
116// ---------------------------------------------------------------------------
117
118/// Compute E/Z for a single double bond from 2D coordinates.
119///
120/// Returns `Some((atom1_of_bond, E | Z))` or `None` if unresolvable.
121fn ez_from_coords(
122 mol: &Molecule,
123 bond_idx: BondIdx,
124 coords: &[(f64, f64)],
125) -> Option<(AtomIdx, CipCode)> {
126 let bond = mol.bond(bond_idx);
127 if bond.order != BondOrder::Double {
128 return None;
129 }
130
131 let a1 = bond.atom1;
132 let a2 = bond.atom2;
133
134 let (x1, y1) = coords.get(a1.0 as usize).copied()?;
135 let (x2, y2) = coords.get(a2.0 as usize).copied()?;
136
137 // Non-double-bond substituents at each alkene end.
138 let subs_a1: Vec<AtomIdx> = mol
139 .neighbors(a1)
140 .filter(|&(nb, bidx)| nb != a2 && mol.bond(bidx).order != BondOrder::Double)
141 .map(|(nb, _)| nb)
142 .collect();
143
144 let subs_a2: Vec<AtomIdx> = mol
145 .neighbors(a2)
146 .filter(|&(nb, bidx)| nb != a1 && mol.bond(bidx).order != BondOrder::Double)
147 .map(|(nb, _)| nb)
148 .collect();
149
150 if subs_a1.is_empty() || subs_a2.is_empty() {
151 return None; // terminal alkene — no geometric isomerism
152 }
153
154 // Highest-priority substituent (simplified 1-sphere CIP) at each end.
155 let ha1 = highest_ez_priority(mol, a1, &subs_a1)?;
156 let ha2 = highest_ez_priority(mol, a2, &subs_a2)?;
157
158 let (hx1, hy1) = coords.get(ha1.0 as usize).copied()?;
159 let (hx2, hy2) = coords.get(ha2.0 as usize).copied()?;
160
161 // Double-bond direction vector (a1 → a2).
162 let vx = x2 - x1;
163 let vy = y2 - y1;
164
165 // Signed 2D cross product: cross(v, u) = vx*uy - vy*ux.
166 // The sign tells which side of the bond axis each substituent lies on.
167 let side_a1 = cross2d(vx, vy, hx1 - x1, hy1 - y1);
168 let side_a2 = cross2d(vx, vy, hx2 - x1, hy2 - y1);
169
170 if side_a1.abs() < 1e-6 || side_a2.abs() < 1e-6 {
171 return None; // substituent falls on the bond axis — geometry ambiguous
172 }
173
174 // Same side → Z (zusammen); opposite sides → E (entgegen).
175 let code = if side_a1.signum() == side_a2.signum() {
176 CipCode::Z
177 } else {
178 CipCode::E
179 };
180 Some((a1, code))
181}
182
183/// Return the highest-priority substituent from `subs` at `center`.
184///
185/// Returns `None` when the top two substituents have equal priority (tied →
186/// E/Z is unspecified) or `subs` is empty.
187fn highest_ez_priority(mol: &Molecule, center: AtomIdx, subs: &[AtomIdx]) -> Option<AtomIdx> {
188 if subs.is_empty() {
189 return None;
190 }
191 if subs.len() == 1 {
192 return Some(subs[0]);
193 }
194 let mut sorted = subs.to_vec();
195 sorted.sort_by(|&a, &b| {
196 cip_priority::compare_branches(mol, center, a, b).reverse()
197 });
198 if cip_priority::compare_branches(mol, center, sorted[0], sorted[1]) == Ordering::Equal {
199 return None; // tied top priorities → E/Z not determinable
200 }
201 Some(sorted[0])
202}
203
204/// 2D cross product scalar: vx*uy - vy*ux.
205fn cross2d(vx: f64, vy: f64, ux: f64, uy: f64) -> f64 {
206 vx * uy - vy * ux
207}
208
209// ---------------------------------------------------------------------------
210// Core R/S logic
211// ---------------------------------------------------------------------------
212
213/// 3D point (x, y, z).
214#[derive(Clone, Copy)]
215struct P3 {
216 x: f64,
217 y: f64,
218 z: f64,
219}
220
221fn assign_rs(mol: &Molecule, coords: &[(f64, f64)], center: AtomIdx) -> Option<CipCode> {
222 let center_pos = coords.get(center.0 as usize)?;
223
224 // Collect heavy-atom neighbors with their 3D positions.
225 let nbs: Vec<AtomIdx> = mol.neighbors(center).map(|(nb, _)| nb).collect();
226 if nbs.len() != 4 {
227 return None; // need exactly 4 heavy-atom neighbors for tetrahedral
228 }
229 let subs: [AtomIdx; 4] = [nbs[0], nbs[1], nbs[2], nbs[3]];
230
231 // Build 3D positions: z from wedge bonds.
232 let z_for: [f64; 4] = [
233 wedge_z(mol, center, subs[0]),
234 wedge_z(mol, center, subs[1]),
235 wedge_z(mol, center, subs[2]),
236 wedge_z(mol, center, subs[3]),
237 ];
238
239 let pts: [P3; 4] = [0, 1, 2, 3].map(|i| {
240 let (x, y) = coords
241 .get(subs[i].0 as usize)
242 .copied()
243 .unwrap_or(*center_pos);
244 P3 { x, y, z: z_for[i] }
245 });
246
247 let ranks = rank4(mol, center, &subs)?;
248
249 // Sort substituents highest-priority first.
250 let mut order: [usize; 4] = [0, 1, 2, 3];
251 order.sort_by(|&i, &j| ranks[j].cmp(&ranks[i]));
252
253 let s: [P3; 4] = order.map(|i| pts[i]);
254 let v = signed_volume(s[0], s[1], s[2], s[3]);
255
256 if v.abs() < 1e-6 {
257 return None; // coplanar / degenerate
258 }
259
260 Some(if v > 0.0 { CipCode::S } else { CipCode::R })
261}
262
263/// Determine z-offset for `neighbor` as seen from `center`:
264/// Up (wedge toward viewer) → +1.0
265/// Down (dash away from viewer) → -1.0
266/// Anything else → 0.0
267fn wedge_z(mol: &Molecule, center: AtomIdx, neighbor: AtomIdx) -> f64 {
268 // Check bond center→neighbor
269 if let Some((_, bond)) = mol.bond_between(center, neighbor) {
270 match bond.order {
271 BondOrder::Up => {
272 // If bond.atom1 == center, neighbor is in front (+z).
273 // If bond.atom1 == neighbor (bond drawn away), neighbor is behind (−z).
274 // Standard convention: wedge tip at atom1, base at atom2 → atom2 is in front.
275 if bond.atom1 == center {
276 return 1.0;
277 } else {
278 return -1.0;
279 }
280 }
281 BondOrder::Down => {
282 if bond.atom1 == center {
283 return -1.0;
284 } else {
285 return 1.0;
286 }
287 }
288 _ => {}
289 }
290 }
291 0.0
292}
293
294// Note: 1-sphere CIP helpers removed in v0.1.93; now using cip_priority module
295
296fn rank4(mol: &Molecule, center: AtomIdx, subs: &[AtomIdx; 4]) -> Option<[u8; 4]> {
297 let mut order: [usize; 4] = [0, 1, 2, 3];
298 order.sort_by(|&i, &j| {
299 cip_priority::compare_branches(mol, center, subs[i], subs[j]).reverse()
300 });
301 for k in 0..3 {
302 if cip_priority::compare_branches(mol, center, subs[order[k]], subs[order[k + 1]])
303 == Ordering::Equal
304 {
305 return None;
306 }
307 }
308 let mut ranks = [0u8; 4];
309 for (rank_from_top, &idx) in order.iter().enumerate() {
310 ranks[idx] = (4 - rank_from_top) as u8;
311 }
312 Some(ranks)
313}
314
315// ---------------------------------------------------------------------------
316// Signed volume
317// ---------------------------------------------------------------------------
318
319fn signed_volume(p1: P3, p2: P3, p3: P3, p4: P3) -> f64 {
320 let (ax, ay, az) = (p1.x - p4.x, p1.y - p4.y, p1.z - p4.z);
321 let (bx, by, bz) = (p2.x - p4.x, p2.y - p4.y, p2.z - p4.z);
322 let (cx, cy, cz) = (p3.x - p4.x, p3.y - p4.y, p3.z - p4.z);
323 ax * (by * cz - bz * cy) - ay * (bx * cz - bz * cx) + az * (bx * cy - by * cx)
324}
325
326// ---------------------------------------------------------------------------
327// Tests
328// ---------------------------------------------------------------------------
329
330#[cfg(test)]
331mod tests {
332 use super::*;
333 use chematic_smiles::parse;
334
335 // -----------------------------------------------------------------------
336 // E/Z tests
337 // -----------------------------------------------------------------------
338
339 #[test]
340 fn test_ez_terminal_alkene_no_assignment() {
341 // CH2=CH2 — terminal, no E/Z possible
342 let mol = parse("C=C").unwrap();
343 let coords = vec![(0.0, 0.0), (1.5, 0.0)];
344 // No E/Z should be assigned for a terminal alkene
345 let bond_idx = mol
346 .bonds()
347 .find(|(_, b)| b.order == BondOrder::Double)
348 .map(|(i, _)| i);
349 assert!(bond_idx.is_some());
350 let result = cip_ez_descriptor(&mol, bond_idx.unwrap(), &coords);
351 assert!(result.is_none(), "terminal alkene should have no E/Z");
352 }
353
354 #[test]
355 fn test_ez_but2ene_z() {
356 // (Z)-but-2-ene: CH3\C=C/CH3
357 // Atoms: C0(Me)-C1=C2-C3(Me)
358 // Layout: C1 and C2 horizontal; C0 above-left, C3 above-right → same side → Z
359 //
360 // C0 C3
361 // \ /
362 // C1 = C2
363 //
364 let mol = parse("CC=CC").unwrap();
365 // Double bond is between atoms 1 and 2 (0-indexed SMILES order).
366 let bond_idx = mol
367 .bonds()
368 .find(|(_, b)| b.order == BondOrder::Double)
369 .map(|(i, _)| i)
370 .unwrap();
371 let coords = vec![
372 (-0.866, 0.5), // C0 (methyl at a1 end, above)
373 (0.0, 0.0), // C1 (left alkene carbon)
374 (1.5, 0.0), // C2 (right alkene carbon)
375 (2.366, 0.5), // C3 (methyl at a2 end, above — same side as C0)
376 ];
377 let result = cip_ez_descriptor(&mol, bond_idx, &coords);
378 assert_eq!(result, Some(CipCode::Z), "(Z)-but-2-ene should be Z");
379 }
380
381 #[test]
382 fn test_ez_but2ene_e() {
383 // (E)-but-2-ene: CH3/C=C/CH3
384 // C0 above-left, C3 below-right → opposite sides → E
385 //
386 // C0
387 // \
388 // C1 = C2
389 // \
390 // C3
391 //
392 let mol = parse("CC=CC").unwrap();
393 let bond_idx = mol
394 .bonds()
395 .find(|(_, b)| b.order == BondOrder::Double)
396 .map(|(i, _)| i)
397 .unwrap();
398 let coords = vec![
399 (-0.866, 0.5), // C0 above
400 (0.0, 0.0), // C1
401 (1.5, 0.0), // C2
402 (2.366, -0.5), // C3 below → opposite side → E
403 ];
404 let result = cip_ez_descriptor(&mol, bond_idx, &coords);
405 assert_eq!(result, Some(CipCode::E), "(E)-but-2-ene should be E");
406 }
407
408 #[test]
409 fn test_assign_ez_from_2d_sets_cip_code() {
410 let mut mol = parse("CC=CC").unwrap();
411 let coords = vec![
412 (-0.866, 0.5),
413 (0.0, 0.0),
414 (1.5, 0.0),
415 (2.366, 0.5), // same side → Z
416 ];
417 assign_ez_from_2d(&mut mol, &coords);
418 // At least one atom should have a Z cip_code after assignment.
419 let has_z = mol
420 .atoms()
421 .any(|(_, atom)| atom.cip_code == Some(CipCode::Z));
422 assert!(has_z, "assign_ez_from_2d should set Z on but-2-ene");
423 }
424
425 // -----------------------------------------------------------------------
426 // R/S tests
427 // -----------------------------------------------------------------------
428
429 #[test]
430 fn test_no_stereo_ethane() {
431 let mol = parse("CC").unwrap();
432 let coords = vec![(0.0, 0.0), (1.0, 0.0)];
433 let result = assign_stereo_from_2d(&mol, &coords);
434 assert!(result.assignments.is_empty());
435 }
436
437 #[test]
438 fn test_r_s_bromochlorofluoromethane() {
439 // CHFClBr — 4 heavy-atom neighbors, wedge C-Br above plane
440 // Build: C bonded to F, Cl, Br, H via explicit H
441 // We won't test R vs S deterministically (depends on atom ordering),
442 // just that an assignment is returned when a wedge bond is present.
443 use chematic_core::{Atom, BondOrder as BO, Element, MoleculeBuilder};
444 let mut b = MoleculeBuilder::new();
445 let c = b.add_atom(Atom::new(Element::C));
446 let f = b.add_atom(Atom::new(Element::F));
447 let cl = b.add_atom(Atom::new(Element::CL));
448 let br = b.add_atom(Atom::new(Element::BR));
449 let h = b.add_atom(Atom::new(Element::H));
450 b.add_bond(c, f, BO::Single).unwrap();
451 b.add_bond(c, cl, BO::Single).unwrap();
452 b.add_bond(c, br, BO::Up).unwrap(); // Br is in front of plane
453 b.add_bond(c, h, BO::Down).unwrap(); // H is behind plane
454 let mol = b.build();
455 // Use non-degenerate 2D positions to avoid a zero-volume determinant.
456 let coords = vec![
457 (0.0, 0.0), // C
458 (-1.0, -0.5), // F
459 (1.0, -0.5), // Cl
460 (0.0, 1.0), // Br (z = +1 from Up bond)
461 (0.0, -1.0), // H (z = -1 from Down bond)
462 ];
463 let result = assign_stereo_from_2d(&mol, &coords);
464 // Should assign one chiral center.
465 assert_eq!(result.assignments.len(), 1);
466 assert_eq!(result.assignments[0].0, c);
467 }
468}