Skip to main content

chematic_perception/
stereo_validation.rs

1//! Stereochemistry quality validation.
2//!
3//! Detects common stereochemistry errors that arise when reading molecular
4//! files or manually editing structures:
5//!
6//! - [`StereoErrorKind::ImpossibleCenter`] — a chirality annotation (`@`/`@@`)
7//!   is present on an atom with fewer than 4 distinct heavy-atom neighbours.
8//! - [`StereoErrorKind::ConflictingWedges`] — the same atom is the base of two
9//!   or more stereo bonds (Up or Down) pointing in opposite directions.
10//! - [`StereoErrorKind::RedundantStereo`] — the annotated atom is topologically
11//!   equivalent to a neighbour (same Morgan rank), so the stereo specification
12//!   is chemically meaningless.
13
14use chematic_core::{AtomIdx, BondOrder, Chirality, Molecule};
15use std::fmt;
16
17// ---------------------------------------------------------------------------
18// Public types
19// ---------------------------------------------------------------------------
20
21/// Kind of stereochemistry error.
22#[derive(Debug, Clone, PartialEq, Eq)]
23pub enum StereoErrorKind {
24    /// Chirality annotation on an atom with < 4 heavy-atom neighbours (or all
25    /// neighbours identical).
26    ImpossibleCenter,
27    /// Two or more Up/Down bonds originate from the same atom with conflicting
28    /// directions (both Up and Down from the same center).
29    ConflictingWedges,
30    /// Stereo annotation on a topologically symmetric atom (all neighbours
31    /// have the same Morgan rank — no priority ordering possible).
32    RedundantStereo,
33}
34
35/// A detected stereochemistry error.
36#[derive(Debug, Clone, PartialEq, Eq)]
37pub struct StereoError {
38    /// 0-based atom index of the problematic center.
39    pub atom_idx: usize,
40    pub kind: StereoErrorKind,
41}
42
43impl fmt::Display for StereoError {
44    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
45        let kind_str = match &self.kind {
46            StereoErrorKind::ImpossibleCenter => {
47                "impossible stereocenter (< 4 distinct neighbours)"
48            }
49            StereoErrorKind::ConflictingWedges => "conflicting wedge directions",
50            StereoErrorKind::RedundantStereo => "redundant stereo on symmetric atom",
51        };
52        write!(f, "atom {}: {}", self.atom_idx, kind_str)
53    }
54}
55
56impl std::error::Error for StereoError {}
57
58/// Summary of stereocenters in a molecule.
59#[derive(Debug, Clone, PartialEq, Eq)]
60pub struct StereoCompleteness {
61    /// Stereocenters with an explicit `@`/`@@` annotation.
62    pub specified: usize,
63    /// Stereocenters with 4 distinct heavy-atom neighbours but no annotation.
64    pub unspecified: usize,
65    /// `specified + unspecified`
66    pub total_centers: usize,
67}
68
69// ---------------------------------------------------------------------------
70// Internal: lightweight Morgan ranks (avoids chematic-smiles dependency)
71// ---------------------------------------------------------------------------
72
73/// Compute simple Morgan connectivity ranks for atoms in `mol`.
74/// Uses initial invariant = atomic_number * 1000 + degree.
75fn simple_morgan_ranks(mol: &Molecule) -> Vec<u64> {
76    let n = mol.atom_count();
77    let mut ranks: Vec<u64> = (0..n)
78        .map(|i| {
79            let idx = AtomIdx(i as u32);
80            let atom = mol.atom(idx);
81            let deg = mol.neighbors(idx).count() as u64;
82            atom.element.atomic_number() as u64 * 1_000_000 + atom.charge as u64 * 1000 + deg
83        })
84        .collect();
85
86    let hash_round = |r: u64, nbrs: &[u64]| -> u64 {
87        let mut h: u64 = 14695981039346656037u64;
88        let prime: u64 = 1099511628211u64;
89        h ^= r;
90        h = h.wrapping_mul(prime);
91        for &nb in nbrs {
92            h ^= nb;
93            h = h.wrapping_mul(prime);
94        }
95        h
96    };
97
98    for _ in 0..(n + 2) {
99        let old_distinct = {
100            let mut v = ranks.clone();
101            v.sort_unstable();
102            v.dedup();
103            v.len()
104        };
105        let new_ranks: Vec<u64> = (0..n)
106            .map(|i| {
107                let idx = AtomIdx(i as u32);
108                let mut nb_ranks: Vec<u64> = mol
109                    .neighbors(idx)
110                    .map(|(nb, _)| ranks[nb.0 as usize])
111                    .collect();
112                nb_ranks.sort_unstable();
113                hash_round(ranks[i], &nb_ranks)
114            })
115            .collect();
116        let new_distinct = {
117            let mut v = new_ranks.clone();
118            v.sort_unstable();
119            v.dedup();
120            v.len()
121        };
122        ranks = new_ranks;
123        if new_distinct <= old_distinct {
124            break;
125        }
126    }
127
128    // Normalise to consecutive ordinals.
129    let mut sorted = ranks.clone();
130    sorted.sort_unstable();
131    sorted.dedup();
132    ranks
133        .iter()
134        .map(|r| sorted.partition_point(|&u| u < *r) as u64)
135        .collect()
136}
137
138// ---------------------------------------------------------------------------
139// Public API
140// ---------------------------------------------------------------------------
141
142/// Validate the stereochemistry of `mol` and return any errors found.
143///
144/// An empty `Vec` means the stereo annotations are chemically consistent.
145pub fn validate_stereo(mol: &Molecule) -> Vec<StereoError> {
146    let ranks = simple_morgan_ranks(mol);
147    let mut errors = Vec::new();
148
149    for (idx, atom) in mol.atoms() {
150        let i = idx.0 as usize;
151
152        // Only inspect atoms with explicit chirality.
153        if atom.chirality == Chirality::None {
154            continue;
155        }
156
157        let heavy_neighbors: Vec<AtomIdx> = mol
158            .neighbors(idx)
159            .filter(|(nb, _)| mol.atom(*nb).element.atomic_number() != 1)
160            .map(|(nb, _)| nb)
161            .collect();
162
163        // Rule 1: ImpossibleCenter — fewer than 4 distinct heavy neighbours.
164        // (3 heavy + 1 implicit H is OK; < 3 heavy is definitely impossible.)
165        let implicit_h = chematic_core::implicit_hcount(mol, idx);
166        let total_groups = heavy_neighbors.len() + implicit_h as usize;
167        if total_groups < 4 {
168            errors.push(StereoError {
169                atom_idx: i,
170                kind: StereoErrorKind::ImpossibleCenter,
171            });
172            continue; // no point checking further
173        }
174
175        // Rule 2: ConflictingWedges — Up and Down bonds from same center.
176        let mut has_up = false;
177        let mut has_down = false;
178        for (_, bid) in mol.neighbors(idx) {
179            let bond = mol.bond(bid);
180            if bond.atom1 == idx {
181                match bond.order {
182                    BondOrder::Up => has_up = true,
183                    BondOrder::Down => has_down = true,
184                    _ => {}
185                }
186            }
187        }
188        if has_up && has_down {
189            errors.push(StereoError {
190                atom_idx: i,
191                kind: StereoErrorKind::ConflictingWedges,
192            });
193        }
194
195        // Rule 3: RedundantStereo — all heavy neighbours have the same rank.
196        if !heavy_neighbors.is_empty() {
197            let first_rank = ranks[heavy_neighbors[0].0 as usize];
198            let all_same = heavy_neighbors
199                .iter()
200                .all(|nb| ranks[nb.0 as usize] == first_rank);
201            // Also check the center itself doesn't break ties via implicit H.
202            if all_same && implicit_h == 0 {
203                errors.push(StereoError {
204                    atom_idx: i,
205                    kind: StereoErrorKind::RedundantStereo,
206                });
207            }
208        }
209    }
210
211    errors
212}
213
214/// Summarise how many stereocenters in `mol` have been specified vs left open.
215///
216/// A potential stereocenter is an sp3 atom with 4 distinct heavy-atom
217/// neighbours (counting one implicit H as a distinct group when present).
218pub fn stereo_completeness(mol: &Molecule) -> StereoCompleteness {
219    let ranks = simple_morgan_ranks(mol);
220    let mut specified = 0usize;
221    let mut unspecified = 0usize;
222
223    for (idx, atom) in mol.atoms() {
224        // Skip aromatics and obvious non-centers.
225        if atom.aromatic {
226            continue;
227        }
228
229        let heavy_nbs: Vec<AtomIdx> = mol
230            .neighbors(idx)
231            .filter(|(nb, _)| mol.atom(*nb).element.atomic_number() != 1)
232            .map(|(nb, _)| nb)
233            .collect();
234        let implicit_h = chematic_core::implicit_hcount(mol, idx) as usize;
235        let groups = heavy_nbs.len() + implicit_h;
236
237        if groups != 4 {
238            continue;
239        } // only tetrahedral candidates
240
241        // Check all neighbours have distinct ranks (including implicit H as rank 0).
242        let mut nb_ranks: Vec<u64> = heavy_nbs.iter().map(|nb| ranks[nb.0 as usize]).collect();
243        if implicit_h > 0 {
244            nb_ranks.push(0);
245        }
246
247        let mut sorted = nb_ranks.clone();
248        sorted.sort_unstable();
249        sorted.dedup();
250        if sorted.len() < 4 {
251            continue;
252        } // symmetric neighbours — not a stereocenter
253
254        if atom.chirality != Chirality::None {
255            specified += 1;
256        } else {
257            unspecified += 1;
258        }
259    }
260
261    StereoCompleteness {
262        specified,
263        unspecified,
264        total_centers: specified + unspecified,
265    }
266}
267
268// ---------------------------------------------------------------------------
269// Tests
270// ---------------------------------------------------------------------------
271
272#[cfg(test)]
273mod tests {
274    use super::*;
275    use chematic_smiles::parse;
276
277    #[test]
278    fn test_valid_chiral_center_no_errors() {
279        // L-alanine: valid R/S center with 4 distinct groups.
280        let mol = parse("N[C@@H](C)C(=O)O").unwrap();
281        let errors = validate_stereo(&mol);
282        assert!(
283            errors.is_empty(),
284            "L-alanine should have no stereo errors: {errors:?}"
285        );
286    }
287
288    #[test]
289    fn test_impossible_center_explicit_h_zero() {
290        // A carbon with chirality annotation, 1 heavy bond, and explicit H=0
291        // gives total_groups = 1 → ImpossibleCenter.
292        use chematic_core::{Atom, BondOrder, Chirality, Element, MoleculeBuilder};
293        let mut b = MoleculeBuilder::new();
294        let mut c = Atom::new(Element::C);
295        c.chirality = Chirality::CounterClockwise;
296        c.hydrogen_count = Some(0); // force 0 implicit H
297        let ci = b.add_atom(c);
298        let cl = b.add_atom(Atom::new(Element::CL));
299        b.add_bond(ci, cl, BondOrder::Single).unwrap();
300        let mol = b.build();
301        let errors = validate_stereo(&mol);
302        assert!(
303            errors
304                .iter()
305                .any(|e| e.atom_idx == 0 && e.kind == StereoErrorKind::ImpossibleCenter),
306            "should detect ImpossibleCenter (1 group total): {errors:?}"
307        );
308    }
309
310    #[test]
311    fn test_stereo_completeness_alanine() {
312        // L-alanine has 1 specified stereocenter, 0 unspecified.
313        let mol = parse("N[C@@H](C)C(=O)O").unwrap();
314        let sc = stereo_completeness(&mol);
315        assert_eq!(sc.specified, 1);
316        assert_eq!(sc.unspecified, 0);
317        assert_eq!(sc.total_centers, 1);
318    }
319
320    #[test]
321    fn test_stereo_completeness_unspecified() {
322        // Alanine without stereo annotation: 1 unspecified center.
323        let mol = parse("NC(C)C(=O)O").unwrap();
324        let sc = stereo_completeness(&mol);
325        assert_eq!(sc.specified, 0);
326        assert!(sc.unspecified >= 1, "should detect unspecified center");
327    }
328
329    #[test]
330    fn test_no_centers_in_benzene() {
331        let mol = parse("c1ccccc1").unwrap();
332        let sc = stereo_completeness(&mol);
333        assert_eq!(sc.total_centers, 0);
334    }
335}