Skip to main content

chematic_core/
valence.rs

1//! Valence model: implicit hydrogen count for organic-subset atoms.
2//!
3//! Reference: OpenSMILES specification, section 3.4 (Implicit hydrogen)
4//! <http://opensmiles.org/opensmiles-spec.html>
5
6use crate::bond::BondOrder;
7use crate::molecule::{AtomIdx, Molecule};
8use std::fmt;
9
10/// Compute the implicit hydrogen count for atom `idx`.
11///
12/// - Bracket atoms (`hydrogen_count.is_some()`): return the stored value directly.
13/// - Wildcard atoms: return 0.
14/// - Organic-subset atoms: derive from the normal-valence table.
15/// - All other atoms: return 0 (no implicit H rule defined).
16///
17/// # Algorithm
18/// 1. Sum the integer bond orders of all bonds on the atom.
19/// 2. Find the smallest normal valence >= bond_sum (adjusted for formal charge).
20/// 3. implicit_H = adjusted_valence - bond_sum.
21///
22/// Charge adjustment:
23/// - Positive charge: increases target valence (e.g. [NH4]+ has 4 bonds → valence 4).
24/// - Negative charge: decreases target valence.
25pub fn implicit_hcount(mol: &Molecule, idx: AtomIdx) -> u8 {
26    let atom = mol.atom(idx);
27
28    // Wildcards have no defined implicit H.
29    if atom.wildcard {
30        return 0;
31    }
32
33    // Bracket atoms store the explicit H count.
34    if let Some(h) = atom.hydrogen_count {
35        return h;
36    }
37
38    // Only the organic subset gets implicit H.
39    if !atom.element.is_organic_subset() {
40        return 0;
41    }
42
43    let normal_valences = atom.element.normal_valences();
44    if normal_valences.is_empty() {
45        return 0;
46    }
47
48    let charge = atom.charge as i32;
49
50    // Separate aromatic bonds from non-aromatic bonds.
51    let mut aromatic_count: usize = 0;
52    let mut non_aromatic_sum: i32 = 0;
53    for (_, bidx) in mol.neighbors(idx) {
54        let order = mol.bond(bidx).order;
55        if order == BondOrder::Aromatic {
56            aromatic_count += 1;
57        } else {
58            non_aromatic_sum += order.order_int() as i32;
59        }
60    }
61
62    if aromatic_count > 0 {
63        // Aromatic molecule (pre-Kekulization): each aromatic bond contributes 1.5
64        // to the effective bond order (OpenSMILES convention).
65        //
66        // floor(1.5 × n) gives the contribution from n aromatic bonds:
67        //   n=2 → 3  benzene CH:   4−3=1H ✓   pyridine N: 3−3=0H ✓
68        //   n=3 → 4  junction C:   4−4=0H ✓
69        //
70        // Combined with non-aromatic substituents (e.g. N−CH₃) this correctly yields
71        // 0 H for all substituted aromatic atoms without needing Kekulization.
72        // Always use the lowest normal valence; aromatic atoms cannot be hypervalent.
73        let aromatic_contribution = (aromatic_count as f64 * 1.5).floor() as i32;
74        let effective_sum = aromatic_contribution.saturating_add(non_aromatic_sum);
75        let v = normal_valences[0] as i32 + charge;
76        if v <= 0 || effective_sum >= v {
77            return 0;
78        }
79        return (v - effective_sum) as u8;
80    }
81
82    // Non-aromatic path (or post-Kekulization molecule where all bonds are explicit).
83    let bond_sum = non_aromatic_sum;
84
85    // For atoms that carry the aromatic flag but reside in a kekulized molecule
86    // (bonds are Single/Double, not Aromatic), use only the lowest normal valence.
87    // Rationale: after Kekulization, a substituted aromatic N (e.g. N−CH₃ in caffeine
88    // with one ring double bond) has bond_sum=4, which would select valence 5 and
89    // give 1 implicit H.  Capping at the primary valence (3) returns 0 H instead.
90    let valences_to_check: &[u8] = if atom.aromatic {
91        &normal_valences[..1]
92    } else {
93        normal_valences
94    };
95
96    // Iterate through valences (ascending) and pick the smallest ≥ bond_sum.
97    for &v in valences_to_check {
98        let target = v as i32 + charge;
99        if target < 0 {
100            continue;
101        }
102        if target >= bond_sum {
103            return (target - bond_sum) as u8;
104        }
105    }
106
107    // bond_sum exceeds all consulted valences → 0 implicit H.
108    0
109}
110
111#[deprecated(
112    since = "0.1.95",
113    note = "use `implicit_hcount` directly — the two functions are identical"
114)]
115/// Alias for [`implicit_hcount`]; kept for API compatibility.
116pub fn total_hcount(mol: &Molecule, idx: AtomIdx) -> u8 {
117    implicit_hcount(mol, idx)
118}
119
120/// Sum of integer bond orders for heavy-atom bonds on `idx`.
121/// Aromatic bonds count as 1 (pre-Kekulization representation).
122pub fn bond_order_sum(mol: &Molecule, idx: AtomIdx) -> u8 {
123    mol.neighbors(idx)
124        .map(|(_, bidx)| mol.bond(bidx).order.order_int())
125        .fold(0u8, |acc, x| acc.saturating_add(x))
126}
127
128/// Returns true if the bond is counted as a "double bond equivalent" in valence sums.
129pub fn is_pi_bond(order: BondOrder) -> bool {
130    matches!(
131        order,
132        BondOrder::Double | BondOrder::Triple | BondOrder::Quadruple
133    )
134}
135
136// ---------------------------------------------------------------------------
137// Valence validation
138// ---------------------------------------------------------------------------
139
140/// A valence violation on a specific atom.
141///
142/// Returned by [`validate_valence`] for each atom whose observed bond-order sum
143/// exceeds all allowed normal valences (after formal-charge adjustment).
144#[derive(Debug, Clone)]
145pub struct ValenceError {
146    /// Index of the over-valenced atom.
147    pub atom: AtomIdx,
148    /// Observed bond-order sum (+ explicit bracket H count).
149    pub actual: u8,
150    /// Allowed normal valences for the element (from [`crate::Element::normal_valences`]).
151    pub allowed: &'static [u8],
152}
153
154impl fmt::Display for ValenceError {
155    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
156        let valences_str = self
157            .allowed
158            .iter()
159            .map(|v| v.to_string())
160            .collect::<Vec<_>>()
161            .join(", ");
162        write!(
163            f,
164            "atom {} has valence {} (allowed: [{}])",
165            self.atom.0, self.actual, valences_str
166        )
167    }
168}
169
170impl std::error::Error for ValenceError {}
171
172/// Check every atom in `mol` for valence violations.
173///
174/// Returns one [`ValenceError`] per over-valenced atom; an empty `Vec` means
175/// all atoms have valid valence.
176///
177/// Atoms without defined normal valences (transition metals, etc.) are skipped.
178/// Formal charge shifts the effective maximum: each unit of positive charge
179/// adds one to the allowed ceiling (e.g. `[NH4+]` with 4 bonds is valid).
180///
181/// Aromatic bonds are counted as 1 each (`order_int()`).  Molecules still
182/// written with `BondOrder::Aromatic` are handled correctly; fully kekulized
183/// molecules are also supported.
184pub fn validate_valence(mol: &Molecule) -> Vec<ValenceError> {
185    let mut errors = Vec::new();
186    for (idx, atom) in mol.atoms() {
187        if atom.wildcard {
188            continue;
189        }
190        let valences = atom.element.normal_valences();
191        if valences.is_empty() {
192            continue;
193        }
194
195        let bos = bond_order_sum(mol, idx);
196        let explicit_h = atom.hydrogen_count.unwrap_or(0);
197        let used = bos.saturating_add(explicit_h);
198        let charge = atom.charge as i16;
199
200        let has_valid = valences.iter().any(|&v| {
201            let effective = (v as i16 + charge).max(0) as u8;
202            effective >= used
203        });
204
205        if !has_valid {
206            errors.push(ValenceError {
207                atom: idx,
208                actual: used,
209                allowed: valences,
210            });
211        }
212    }
213    errors
214}
215
216#[cfg(test)]
217mod tests {
218    use super::*;
219    use crate::atom::Atom;
220    use crate::bond::BondOrder;
221    use crate::element::Element;
222    use crate::molecule::MoleculeBuilder;
223
224    fn single_atom(elem: Element) -> Molecule {
225        let mut b = MoleculeBuilder::new();
226        b.add_atom(Atom::organic(elem));
227        b.build()
228    }
229
230    fn two_atoms(e1: Element, e2: Element, order: BondOrder) -> Molecule {
231        let mut b = MoleculeBuilder::new();
232        let a = b.add_atom(Atom::organic(e1));
233        let c = b.add_atom(Atom::organic(e2));
234        b.add_bond(a, c, order).unwrap();
235        b.build()
236    }
237
238    #[test]
239    fn test_methane() {
240        // C alone: 0 bonds, valence 4 → 4 implicit H
241        let mol = single_atom(Element::C);
242        assert_eq!(implicit_hcount(&mol, AtomIdx(0)), 4);
243    }
244
245    #[test]
246    fn test_ethane_c() {
247        // CC: each C has 1 single bond → valence 4 → 3 implicit H
248        let mol = two_atoms(Element::C, Element::C, BondOrder::Single);
249        assert_eq!(implicit_hcount(&mol, AtomIdx(0)), 3);
250        assert_eq!(implicit_hcount(&mol, AtomIdx(1)), 3);
251    }
252
253    #[test]
254    fn test_ethylene_c() {
255        // C=C: double bond → bond_sum=2 → 4-2=2 implicit H
256        let mol = two_atoms(Element::C, Element::C, BondOrder::Double);
257        assert_eq!(implicit_hcount(&mol, AtomIdx(0)), 2);
258    }
259
260    #[test]
261    fn test_acetylene_c() {
262        // C#C: triple bond → bond_sum=3 → 4-3=1 implicit H
263        let mol = two_atoms(Element::C, Element::C, BondOrder::Triple);
264        assert_eq!(implicit_hcount(&mol, AtomIdx(0)), 1);
265    }
266
267    #[test]
268    fn test_nitrogen_amine() {
269        // N alone: 0 bonds, first normal valence=3 → 3 implicit H (NH3)
270        let mol = single_atom(Element::N);
271        assert_eq!(implicit_hcount(&mol, AtomIdx(0)), 3);
272    }
273
274    #[test]
275    fn test_nitrogen_triple() {
276        // N#C: N has triple bond → bond_sum=3 → 3-3=0 (nitrile N)
277        let mol = two_atoms(Element::N, Element::C, BondOrder::Triple);
278        assert_eq!(implicit_hcount(&mol, AtomIdx(0)), 0);
279    }
280
281    #[test]
282    fn test_oxygen_ether() {
283        // O alone: 0 bonds, valence 2 → 2 implicit H (water)
284        let mol = single_atom(Element::O);
285        assert_eq!(implicit_hcount(&mol, AtomIdx(0)), 2);
286    }
287
288    #[test]
289    fn test_fluorine() {
290        // F alone: valence 1 → 1 implicit H (HF)
291        let mol = single_atom(Element::F);
292        assert_eq!(implicit_hcount(&mol, AtomIdx(0)), 1);
293    }
294
295    #[test]
296    fn test_bracket_atom_explicit_h() {
297        // [NH4+] — bracket atom: explicit H=4 returned directly
298        let mut b = MoleculeBuilder::new();
299        let atom = Atom::bracket(Element::N, None, Default::default(), 4, 1, None);
300        b.add_atom(atom);
301        let mol = b.build();
302        assert_eq!(implicit_hcount(&mol, AtomIdx(0)), 4);
303    }
304
305    #[test]
306    fn test_hypervalent_sulfur() {
307        // S with four single bonds: bond_sum=4, S valences=[2,4,6] → target=4 → 0 H
308        let mut b = MoleculeBuilder::new();
309        let s = b.add_atom(Atom::organic(Element::S));
310        for _ in 0..4 {
311            let c = b.add_atom(Atom::organic(Element::C));
312            b.add_bond(s, c, BondOrder::Single).unwrap();
313        }
314        let mol = b.build();
315        assert_eq!(implicit_hcount(&mol, AtomIdx(0)), 0);
316    }
317
318    // ---------------------------------------------------------------------------
319    // validate_valence tests
320    // ---------------------------------------------------------------------------
321
322    #[test]
323    fn test_validate_valence_valid_molecules() {
324        // All normal molecules should produce no errors.
325        // methane (C, 0 bonds): valid
326        let mol = single_atom(Element::C);
327        assert!(
328            validate_valence(&mol).is_empty(),
329            "isolated C must be valid"
330        );
331
332        // water (O, 0 bonds): valid
333        let mol = single_atom(Element::O);
334        assert!(
335            validate_valence(&mol).is_empty(),
336            "isolated O must be valid"
337        );
338
339        // ethane (C–C): C has bond_sum=1, max valence 4 → valid
340        let mol = two_atoms(Element::C, Element::C, BondOrder::Single);
341        assert!(validate_valence(&mol).is_empty(), "ethane must be valid");
342
343        // formaldehyde (C=O): C bond_sum=2, O bond_sum=2 → both valid
344        let mol = two_atoms(Element::C, Element::O, BondOrder::Double);
345        assert!(
346            validate_valence(&mol).is_empty(),
347            "formaldehyde must be valid"
348        );
349    }
350
351    #[test]
352    fn test_validate_valence_pentavalent_carbon() {
353        // C with 5 single bonds: bond_sum=5 > max(C valences)=4 → error
354        let mut b = MoleculeBuilder::new();
355        let c = b.add_atom(Atom::organic(Element::C));
356        for _ in 0..5 {
357            let h = b.add_atom(Atom::new(Element::C));
358            b.add_bond(c, h, BondOrder::Single).unwrap();
359        }
360        let mol = b.build();
361        let errors = validate_valence(&mol);
362        assert_eq!(
363            errors.len(),
364            1,
365            "C with 5 bonds must produce exactly 1 error"
366        );
367        assert_eq!(errors[0].atom, AtomIdx(0));
368        assert_eq!(errors[0].actual, 5);
369    }
370
371    #[test]
372    fn test_validate_valence_trivalent_oxygen() {
373        // O with 3 single bonds: bond_sum=3 > max(O valences)=2 → error
374        let mut b = MoleculeBuilder::new();
375        let o = b.add_atom(Atom::organic(Element::O));
376        for _ in 0..3 {
377            let c = b.add_atom(Atom::organic(Element::C));
378            b.add_bond(o, c, BondOrder::Single).unwrap();
379        }
380        let mol = b.build();
381        let errors = validate_valence(&mol);
382        assert!(
383            !errors.is_empty(),
384            "O with 3 bonds must be flagged as over-valenced"
385        );
386        assert_eq!(errors[0].atom, AtomIdx(0));
387    }
388
389    #[test]
390    fn test_validate_valence_ammonium_valid() {
391        // [NH4+]: N with charge +1 and 4 bonds: effective max = 3+1=4 → valid
392        let mut b = MoleculeBuilder::new();
393        let mut n_atom = Atom::organic(Element::N);
394        n_atom.charge = 1;
395        let n = b.add_atom(n_atom);
396        for _ in 0..4 {
397            let c = b.add_atom(Atom::organic(Element::C));
398            b.add_bond(n, c, BondOrder::Single).unwrap();
399        }
400        let mol = b.build();
401        assert!(
402            validate_valence(&mol).is_empty(),
403            "N+ with 4 bonds must be valid (ammonium-like)"
404        );
405    }
406
407    #[test]
408    fn test_validate_valence_transition_metal_skipped() {
409        // Fe has no normal_valences → always valid regardless of bonds
410        let mut b = MoleculeBuilder::new();
411        let fe = b.add_atom(Atom::new(Element::FE));
412        for _ in 0..6 {
413            let c = b.add_atom(Atom::organic(Element::C));
414            b.add_bond(fe, c, BondOrder::Single).unwrap();
415        }
416        let mol = b.build();
417        assert!(
418            validate_valence(&mol).is_empty(),
419            "Fe with 6 bonds must be skipped"
420        );
421    }
422}