Skip to main content

chematic_core/
valence.rs

1//! Valence model: implicit hydrogen count for organic-subset atoms.
2//!
3//! Reference: OpenSMILES specification, section 3.4 (Implicit hydrogen)
4//! <http://opensmiles.org/opensmiles-spec.html>
5
6use crate::bond::BondOrder;
7use crate::molecule::{AtomIdx, Molecule};
8use std::fmt;
9
10/// Compute the implicit hydrogen count for atom `idx`.
11///
12/// - Bracket atoms (`hydrogen_count.is_some()`): return the stored value directly.
13/// - Wildcard atoms: return 0.
14/// - Organic-subset atoms: derive from the normal-valence table.
15/// - All other atoms: return 0 (no implicit H rule defined).
16///
17/// # Algorithm
18/// 1. Sum the integer bond orders of all bonds on the atom.
19/// 2. Find the smallest normal valence >= bond_sum (adjusted for formal charge).
20/// 3. implicit_H = adjusted_valence - bond_sum.
21///
22/// Charge adjustment:
23/// - Positive charge: increases target valence (e.g. [NH4]+ has 4 bonds → valence 4).
24/// - Negative charge: decreases target valence.
25pub fn implicit_hcount(mol: &Molecule, idx: AtomIdx) -> u8 {
26    let atom = mol.atom(idx);
27
28    // Wildcards have no defined implicit H.
29    if atom.wildcard {
30        return 0;
31    }
32
33    // Bracket atoms store the explicit H count.
34    if let Some(h) = atom.hydrogen_count {
35        return h;
36    }
37
38    // Only the organic subset gets implicit H.
39    if !atom.element.is_organic_subset() {
40        return 0;
41    }
42
43    let normal_valences = atom.element.normal_valences();
44    if normal_valences.is_empty() {
45        return 0;
46    }
47
48    let charge = atom.charge as i32;
49
50    // Separate aromatic bonds from non-aromatic bonds.
51    let mut aromatic_count: usize = 0;
52    let mut non_aromatic_sum: i32 = 0;
53    for (_, bidx) in mol.neighbors(idx) {
54        let order = mol.bond(bidx).order;
55        if order == BondOrder::Aromatic {
56            aromatic_count += 1;
57        } else {
58            non_aromatic_sum += order.order_int() as i32;
59        }
60    }
61
62    if aromatic_count > 0 {
63        // Aromatic molecule (pre-Kekulization): each aromatic bond contributes 1.5
64        // to the effective bond order (OpenSMILES convention).
65        //
66        // floor(1.5 × n) gives the contribution from n aromatic bonds:
67        //   n=2 → 3  benzene CH:   4−3=1H ✓   pyridine N: 3−3=0H ✓
68        //   n=3 → 4  junction C:   4−4=0H ✓
69        //
70        // Combined with non-aromatic substituents (e.g. N−CH₃) this correctly yields
71        // 0 H for all substituted aromatic atoms without needing Kekulization.
72        // Always use the lowest normal valence; aromatic atoms cannot be hypervalent.
73        let aromatic_contribution = (aromatic_count as f64 * 1.5).floor() as i32;
74        let effective_sum = aromatic_contribution.saturating_add(non_aromatic_sum);
75        let v = normal_valences[0] as i32 + charge;
76        if v <= 0 || effective_sum >= v {
77            return 0;
78        }
79        return (v - effective_sum) as u8;
80    }
81
82    // Non-aromatic path (or post-Kekulization molecule where all bonds are explicit).
83    let bond_sum = non_aromatic_sum;
84
85    // For atoms that carry the aromatic flag but reside in a kekulized molecule
86    // (bonds are Single/Double, not Aromatic), use only the lowest normal valence.
87    // Rationale: after Kekulization, a substituted aromatic N (e.g. N−CH₃ in caffeine
88    // with one ring double bond) has bond_sum=4, which would select valence 5 and
89    // give 1 implicit H.  Capping at the primary valence (3) returns 0 H instead.
90    let valences_to_check: &[u8] = if atom.aromatic {
91        &normal_valences[..1]
92    } else {
93        normal_valences
94    };
95
96    // Iterate through valences (ascending) and pick the smallest ≥ bond_sum.
97    for &v in valences_to_check {
98        let target = v as i32 + charge;
99        if target < 0 {
100            continue;
101        }
102        if target >= bond_sum {
103            return (target - bond_sum) as u8;
104        }
105    }
106
107    // bond_sum exceeds all consulted valences → 0 implicit H.
108    0
109}
110
111#[deprecated(since = "0.1.95", note = "use `implicit_hcount` directly — the two functions are identical")]
112/// Alias for [`implicit_hcount`]; kept for API compatibility.
113pub fn total_hcount(mol: &Molecule, idx: AtomIdx) -> u8 {
114    implicit_hcount(mol, idx)
115}
116
117/// Sum of integer bond orders for heavy-atom bonds on `idx`.
118/// Aromatic bonds count as 1 (pre-Kekulization representation).
119pub fn bond_order_sum(mol: &Molecule, idx: AtomIdx) -> u8 {
120    mol.neighbors(idx)
121        .map(|(_, bidx)| mol.bond(bidx).order.order_int())
122        .fold(0u8, |acc, x| acc.saturating_add(x))
123}
124
125/// Returns true if the bond is counted as a "double bond equivalent" in valence sums.
126pub fn is_pi_bond(order: BondOrder) -> bool {
127    matches!(
128        order,
129        BondOrder::Double | BondOrder::Triple | BondOrder::Quadruple
130    )
131}
132
133// ---------------------------------------------------------------------------
134// Valence validation
135// ---------------------------------------------------------------------------
136
137/// A valence violation on a specific atom.
138///
139/// Returned by [`validate_valence`] for each atom whose observed bond-order sum
140/// exceeds all allowed normal valences (after formal-charge adjustment).
141#[derive(Debug, Clone)]
142pub struct ValenceError {
143    /// Index of the over-valenced atom.
144    pub atom: AtomIdx,
145    /// Observed bond-order sum (+ explicit bracket H count).
146    pub actual: u8,
147    /// Allowed normal valences for the element (from [`crate::Element::normal_valences`]).
148    pub allowed: &'static [u8],
149}
150
151impl fmt::Display for ValenceError {
152    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
153        let valences_str = self
154            .allowed
155            .iter()
156            .map(|v| v.to_string())
157            .collect::<Vec<_>>()
158            .join(", ");
159        write!(
160            f,
161            "atom {} has valence {} (allowed: [{}])",
162            self.atom.0, self.actual, valences_str
163        )
164    }
165}
166
167impl std::error::Error for ValenceError {}
168
169/// Check every atom in `mol` for valence violations.
170///
171/// Returns one [`ValenceError`] per over-valenced atom; an empty `Vec` means
172/// all atoms have valid valence.
173///
174/// Atoms without defined normal valences (transition metals, etc.) are skipped.
175/// Formal charge shifts the effective maximum: each unit of positive charge
176/// adds one to the allowed ceiling (e.g. `[NH4+]` with 4 bonds is valid).
177///
178/// Aromatic bonds are counted as 1 each (`order_int()`).  Molecules still
179/// written with `BondOrder::Aromatic` are handled correctly; fully kekulized
180/// molecules are also supported.
181pub fn validate_valence(mol: &Molecule) -> Vec<ValenceError> {
182    let mut errors = Vec::new();
183    for (idx, atom) in mol.atoms() {
184        if atom.wildcard {
185            continue;
186        }
187        let valences = atom.element.normal_valences();
188        if valences.is_empty() {
189            continue;
190        }
191
192        let bos = bond_order_sum(mol, idx);
193        let explicit_h = atom.hydrogen_count.unwrap_or(0);
194        let used = bos.saturating_add(explicit_h);
195        let charge = atom.charge as i16;
196
197        let has_valid = valences.iter().any(|&v| {
198            let effective = (v as i16 + charge).max(0) as u8;
199            effective >= used
200        });
201
202        if !has_valid {
203            errors.push(ValenceError {
204                atom: idx,
205                actual: used,
206                allowed: valences,
207            });
208        }
209    }
210    errors
211}
212
213#[cfg(test)]
214mod tests {
215    use super::*;
216    use crate::atom::Atom;
217    use crate::bond::BondOrder;
218    use crate::element::Element;
219    use crate::molecule::MoleculeBuilder;
220
221    fn single_atom(elem: Element) -> Molecule {
222        let mut b = MoleculeBuilder::new();
223        b.add_atom(Atom::organic(elem));
224        b.build()
225    }
226
227    fn two_atoms(e1: Element, e2: Element, order: BondOrder) -> Molecule {
228        let mut b = MoleculeBuilder::new();
229        let a = b.add_atom(Atom::organic(e1));
230        let c = b.add_atom(Atom::organic(e2));
231        b.add_bond(a, c, order).unwrap();
232        b.build()
233    }
234
235    #[test]
236    fn test_methane() {
237        // C alone: 0 bonds, valence 4 → 4 implicit H
238        let mol = single_atom(Element::C);
239        assert_eq!(implicit_hcount(&mol, AtomIdx(0)), 4);
240    }
241
242    #[test]
243    fn test_ethane_c() {
244        // CC: each C has 1 single bond → valence 4 → 3 implicit H
245        let mol = two_atoms(Element::C, Element::C, BondOrder::Single);
246        assert_eq!(implicit_hcount(&mol, AtomIdx(0)), 3);
247        assert_eq!(implicit_hcount(&mol, AtomIdx(1)), 3);
248    }
249
250    #[test]
251    fn test_ethylene_c() {
252        // C=C: double bond → bond_sum=2 → 4-2=2 implicit H
253        let mol = two_atoms(Element::C, Element::C, BondOrder::Double);
254        assert_eq!(implicit_hcount(&mol, AtomIdx(0)), 2);
255    }
256
257    #[test]
258    fn test_acetylene_c() {
259        // C#C: triple bond → bond_sum=3 → 4-3=1 implicit H
260        let mol = two_atoms(Element::C, Element::C, BondOrder::Triple);
261        assert_eq!(implicit_hcount(&mol, AtomIdx(0)), 1);
262    }
263
264    #[test]
265    fn test_nitrogen_amine() {
266        // N alone: 0 bonds, first normal valence=3 → 3 implicit H (NH3)
267        let mol = single_atom(Element::N);
268        assert_eq!(implicit_hcount(&mol, AtomIdx(0)), 3);
269    }
270
271    #[test]
272    fn test_nitrogen_triple() {
273        // N#C: N has triple bond → bond_sum=3 → 3-3=0 (nitrile N)
274        let mol = two_atoms(Element::N, Element::C, BondOrder::Triple);
275        assert_eq!(implicit_hcount(&mol, AtomIdx(0)), 0);
276    }
277
278    #[test]
279    fn test_oxygen_ether() {
280        // O alone: 0 bonds, valence 2 → 2 implicit H (water)
281        let mol = single_atom(Element::O);
282        assert_eq!(implicit_hcount(&mol, AtomIdx(0)), 2);
283    }
284
285    #[test]
286    fn test_fluorine() {
287        // F alone: valence 1 → 1 implicit H (HF)
288        let mol = single_atom(Element::F);
289        assert_eq!(implicit_hcount(&mol, AtomIdx(0)), 1);
290    }
291
292    #[test]
293    fn test_bracket_atom_explicit_h() {
294        // [NH4+] — bracket atom: explicit H=4 returned directly
295        let mut b = MoleculeBuilder::new();
296        let atom = Atom::bracket(Element::N, None, Default::default(), 4, 1, None);
297        b.add_atom(atom);
298        let mol = b.build();
299        assert_eq!(implicit_hcount(&mol, AtomIdx(0)), 4);
300    }
301
302    #[test]
303    fn test_hypervalent_sulfur() {
304        // S with four single bonds: bond_sum=4, S valences=[2,4,6] → target=4 → 0 H
305        let mut b = MoleculeBuilder::new();
306        let s = b.add_atom(Atom::organic(Element::S));
307        for _ in 0..4 {
308            let c = b.add_atom(Atom::organic(Element::C));
309            b.add_bond(s, c, BondOrder::Single).unwrap();
310        }
311        let mol = b.build();
312        assert_eq!(implicit_hcount(&mol, AtomIdx(0)), 0);
313    }
314
315    // ---------------------------------------------------------------------------
316    // validate_valence tests
317    // ---------------------------------------------------------------------------
318
319    #[test]
320    fn test_validate_valence_valid_molecules() {
321        // All normal molecules should produce no errors.
322        // methane (C, 0 bonds): valid
323        let mol = single_atom(Element::C);
324        assert!(
325            validate_valence(&mol).is_empty(),
326            "isolated C must be valid"
327        );
328
329        // water (O, 0 bonds): valid
330        let mol = single_atom(Element::O);
331        assert!(
332            validate_valence(&mol).is_empty(),
333            "isolated O must be valid"
334        );
335
336        // ethane (C–C): C has bond_sum=1, max valence 4 → valid
337        let mol = two_atoms(Element::C, Element::C, BondOrder::Single);
338        assert!(validate_valence(&mol).is_empty(), "ethane must be valid");
339
340        // formaldehyde (C=O): C bond_sum=2, O bond_sum=2 → both valid
341        let mol = two_atoms(Element::C, Element::O, BondOrder::Double);
342        assert!(
343            validate_valence(&mol).is_empty(),
344            "formaldehyde must be valid"
345        );
346    }
347
348    #[test]
349    fn test_validate_valence_pentavalent_carbon() {
350        // C with 5 single bonds: bond_sum=5 > max(C valences)=4 → error
351        let mut b = MoleculeBuilder::new();
352        let c = b.add_atom(Atom::organic(Element::C));
353        for _ in 0..5 {
354            let h = b.add_atom(Atom::new(Element::C));
355            b.add_bond(c, h, BondOrder::Single).unwrap();
356        }
357        let mol = b.build();
358        let errors = validate_valence(&mol);
359        assert_eq!(
360            errors.len(),
361            1,
362            "C with 5 bonds must produce exactly 1 error"
363        );
364        assert_eq!(errors[0].atom, AtomIdx(0));
365        assert_eq!(errors[0].actual, 5);
366    }
367
368    #[test]
369    fn test_validate_valence_trivalent_oxygen() {
370        // O with 3 single bonds: bond_sum=3 > max(O valences)=2 → error
371        let mut b = MoleculeBuilder::new();
372        let o = b.add_atom(Atom::organic(Element::O));
373        for _ in 0..3 {
374            let c = b.add_atom(Atom::organic(Element::C));
375            b.add_bond(o, c, BondOrder::Single).unwrap();
376        }
377        let mol = b.build();
378        let errors = validate_valence(&mol);
379        assert!(
380            !errors.is_empty(),
381            "O with 3 bonds must be flagged as over-valenced"
382        );
383        assert_eq!(errors[0].atom, AtomIdx(0));
384    }
385
386    #[test]
387    fn test_validate_valence_ammonium_valid() {
388        // [NH4+]: N with charge +1 and 4 bonds: effective max = 3+1=4 → valid
389        let mut b = MoleculeBuilder::new();
390        let mut n_atom = Atom::organic(Element::N);
391        n_atom.charge = 1;
392        let n = b.add_atom(n_atom);
393        for _ in 0..4 {
394            let c = b.add_atom(Atom::organic(Element::C));
395            b.add_bond(n, c, BondOrder::Single).unwrap();
396        }
397        let mol = b.build();
398        assert!(
399            validate_valence(&mol).is_empty(),
400            "N+ with 4 bonds must be valid (ammonium-like)"
401        );
402    }
403
404    #[test]
405    fn test_validate_valence_transition_metal_skipped() {
406        // Fe has no normal_valences → always valid regardless of bonds
407        let mut b = MoleculeBuilder::new();
408        let fe = b.add_atom(Atom::new(Element::FE));
409        for _ in 0..6 {
410            let c = b.add_atom(Atom::organic(Element::C));
411            b.add_bond(fe, c, BondOrder::Single).unwrap();
412        }
413        let mol = b.build();
414        assert!(
415            validate_valence(&mol).is_empty(),
416            "Fe with 6 bonds must be skipped"
417        );
418    }
419}