chematic_perception/
stereo_validation.rs1use chematic_core::{AtomIdx, BondOrder, Chirality, Molecule};
15use std::fmt;
16
17#[derive(Debug, Clone, PartialEq, Eq)]
23pub enum StereoErrorKind {
24 ImpossibleCenter,
27 ConflictingWedges,
30 RedundantStereo,
33}
34
35#[derive(Debug, Clone, PartialEq, Eq)]
37pub struct StereoError {
38 pub atom_idx: usize,
40 pub kind: StereoErrorKind,
41}
42
43impl fmt::Display for StereoError {
44 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
45 let kind_str = match &self.kind {
46 StereoErrorKind::ImpossibleCenter => {
47 "impossible stereocenter (< 4 distinct neighbours)"
48 }
49 StereoErrorKind::ConflictingWedges => "conflicting wedge directions",
50 StereoErrorKind::RedundantStereo => "redundant stereo on symmetric atom",
51 };
52 write!(f, "atom {}: {}", self.atom_idx, kind_str)
53 }
54}
55
56impl std::error::Error for StereoError {}
57
58#[derive(Debug, Clone, PartialEq, Eq)]
60pub struct StereoCompleteness {
61 pub specified: usize,
63 pub unspecified: usize,
65 pub total_centers: usize,
67}
68
69fn simple_morgan_ranks(mol: &Molecule) -> Vec<u64> {
76 let n = mol.atom_count();
77 let mut ranks: Vec<u64> = (0..n)
78 .map(|i| {
79 let idx = AtomIdx(i as u32);
80 let atom = mol.atom(idx);
81 let deg = mol.neighbors(idx).count() as u64;
82 atom.element.atomic_number() as u64 * 1_000_000 + atom.charge as u64 * 1000 + deg
83 })
84 .collect();
85
86 let hash_round = |r: u64, nbrs: &[u64]| -> u64 {
87 let mut h: u64 = 14695981039346656037u64;
88 let prime: u64 = 1099511628211u64;
89 h ^= r;
90 h = h.wrapping_mul(prime);
91 for &nb in nbrs {
92 h ^= nb;
93 h = h.wrapping_mul(prime);
94 }
95 h
96 };
97
98 for _ in 0..(n + 2) {
99 let old_distinct = {
100 let mut v = ranks.clone();
101 v.sort_unstable();
102 v.dedup();
103 v.len()
104 };
105 let new_ranks: Vec<u64> = (0..n)
106 .map(|i| {
107 let idx = AtomIdx(i as u32);
108 let mut nb_ranks: Vec<u64> = mol
109 .neighbors(idx)
110 .map(|(nb, _)| ranks[nb.0 as usize])
111 .collect();
112 nb_ranks.sort_unstable();
113 hash_round(ranks[i], &nb_ranks)
114 })
115 .collect();
116 let new_distinct = {
117 let mut v = new_ranks.clone();
118 v.sort_unstable();
119 v.dedup();
120 v.len()
121 };
122 ranks = new_ranks;
123 if new_distinct <= old_distinct {
124 break;
125 }
126 }
127
128 let mut sorted = ranks.clone();
130 sorted.sort_unstable();
131 sorted.dedup();
132 ranks
133 .iter()
134 .map(|r| sorted.partition_point(|&u| u < *r) as u64)
135 .collect()
136}
137
138pub fn validate_stereo(mol: &Molecule) -> Vec<StereoError> {
146 let ranks = simple_morgan_ranks(mol);
147 let mut errors = Vec::new();
148
149 for (idx, atom) in mol.atoms() {
150 let i = idx.0 as usize;
151
152 if atom.chirality == Chirality::None {
154 continue;
155 }
156
157 let heavy_neighbors: Vec<AtomIdx> = mol
158 .neighbors(idx)
159 .filter(|(nb, _)| mol.atom(*nb).element.atomic_number() != 1)
160 .map(|(nb, _)| nb)
161 .collect();
162
163 let implicit_h = chematic_core::implicit_hcount(mol, idx);
166 let total_groups = heavy_neighbors.len() + implicit_h as usize;
167 if total_groups < 4 {
168 errors.push(StereoError {
169 atom_idx: i,
170 kind: StereoErrorKind::ImpossibleCenter,
171 });
172 continue; }
174
175 let mut has_up = false;
177 let mut has_down = false;
178 for (_, bid) in mol.neighbors(idx) {
179 let bond = mol.bond(bid);
180 if bond.atom1 == idx {
181 match bond.order {
182 BondOrder::Up => has_up = true,
183 BondOrder::Down => has_down = true,
184 _ => {}
185 }
186 }
187 }
188 if has_up && has_down {
189 errors.push(StereoError {
190 atom_idx: i,
191 kind: StereoErrorKind::ConflictingWedges,
192 });
193 }
194
195 if !heavy_neighbors.is_empty() {
197 let first_rank = ranks[heavy_neighbors[0].0 as usize];
198 let all_same = heavy_neighbors
199 .iter()
200 .all(|nb| ranks[nb.0 as usize] == first_rank);
201 if all_same && implicit_h == 0 {
203 errors.push(StereoError {
204 atom_idx: i,
205 kind: StereoErrorKind::RedundantStereo,
206 });
207 }
208 }
209 }
210
211 errors
212}
213
214pub fn stereo_completeness(mol: &Molecule) -> StereoCompleteness {
219 let ranks = simple_morgan_ranks(mol);
220 let mut specified = 0usize;
221 let mut unspecified = 0usize;
222
223 for (idx, atom) in mol.atoms() {
224 if atom.aromatic {
226 continue;
227 }
228
229 let heavy_nbs: Vec<AtomIdx> = mol
230 .neighbors(idx)
231 .filter(|(nb, _)| mol.atom(*nb).element.atomic_number() != 1)
232 .map(|(nb, _)| nb)
233 .collect();
234 let implicit_h = chematic_core::implicit_hcount(mol, idx) as usize;
235 let groups = heavy_nbs.len() + implicit_h;
236
237 if groups != 4 {
238 continue;
239 } let mut nb_ranks: Vec<u64> = heavy_nbs.iter().map(|nb| ranks[nb.0 as usize]).collect();
243 if implicit_h > 0 {
244 nb_ranks.push(0);
245 }
246
247 let mut sorted = nb_ranks.clone();
248 sorted.sort_unstable();
249 sorted.dedup();
250 if sorted.len() < 4 {
251 continue;
252 } if atom.chirality != Chirality::None {
255 specified += 1;
256 } else {
257 unspecified += 1;
258 }
259 }
260
261 StereoCompleteness {
262 specified,
263 unspecified,
264 total_centers: specified + unspecified,
265 }
266}
267
268#[cfg(test)]
273mod tests {
274 use super::*;
275 use chematic_smiles::parse;
276
277 #[test]
278 fn test_valid_chiral_center_no_errors() {
279 let mol = parse("N[C@@H](C)C(=O)O").unwrap();
281 let errors = validate_stereo(&mol);
282 assert!(
283 errors.is_empty(),
284 "L-alanine should have no stereo errors: {errors:?}"
285 );
286 }
287
288 #[test]
289 fn test_impossible_center_explicit_h_zero() {
290 use chematic_core::{Atom, BondOrder, Chirality, Element, MoleculeBuilder};
293 let mut b = MoleculeBuilder::new();
294 let mut c = Atom::new(Element::C);
295 c.chirality = Chirality::CounterClockwise;
296 c.hydrogen_count = Some(0); let ci = b.add_atom(c);
298 let cl = b.add_atom(Atom::new(Element::CL));
299 b.add_bond(ci, cl, BondOrder::Single).unwrap();
300 let mol = b.build();
301 let errors = validate_stereo(&mol);
302 assert!(
303 errors
304 .iter()
305 .any(|e| e.atom_idx == 0 && e.kind == StereoErrorKind::ImpossibleCenter),
306 "should detect ImpossibleCenter (1 group total): {errors:?}"
307 );
308 }
309
310 #[test]
311 fn test_stereo_completeness_alanine() {
312 let mol = parse("N[C@@H](C)C(=O)O").unwrap();
314 let sc = stereo_completeness(&mol);
315 assert_eq!(sc.specified, 1);
316 assert_eq!(sc.unspecified, 0);
317 assert_eq!(sc.total_centers, 1);
318 }
319
320 #[test]
321 fn test_stereo_completeness_unspecified() {
322 let mol = parse("NC(C)C(=O)O").unwrap();
324 let sc = stereo_completeness(&mol);
325 assert_eq!(sc.specified, 0);
326 assert!(sc.unspecified >= 1, "should detect unspecified center");
327 }
328
329 #[test]
330 fn test_no_centers_in_benzene() {
331 let mol = parse("c1ccccc1").unwrap();
332 let sc = stereo_completeness(&mol);
333 assert_eq!(sc.total_centers, 0);
334 }
335}