Skip to main content

chematic_core/
valence.rs

1//! Valence model: implicit hydrogen count for organic-subset atoms.
2//!
3//! Reference: OpenSMILES specification, section 3.4 (Implicit hydrogen)
4//! <http://opensmiles.org/opensmiles-spec.html>
5
6use crate::bond::BondOrder;
7use crate::molecule::{AtomIdx, Molecule};
8use std::fmt;
9
10/// Compute the implicit hydrogen count for atom `idx`.
11///
12/// - Bracket atoms (`hydrogen_count.is_some()`): return the stored value directly.
13/// - Wildcard atoms: return 0.
14/// - Organic-subset atoms: derive from the normal-valence table.
15/// - All other atoms: return 0 (no implicit H rule defined).
16///
17/// # Algorithm
18/// 1. Sum the integer bond orders of all bonds on the atom.
19/// 2. Find the smallest normal valence >= bond_sum (adjusted for formal charge).
20/// 3. implicit_H = adjusted_valence - bond_sum.
21///
22/// Charge adjustment:
23/// - Positive charge: increases target valence (e.g. [NH4]+ has 4 bonds → valence 4).
24/// - Negative charge: decreases target valence.
25pub fn implicit_hcount(mol: &Molecule, idx: AtomIdx) -> u8 {
26    let atom = mol.atom(idx);
27
28    // Wildcards have no defined implicit H.
29    if atom.wildcard {
30        return 0;
31    }
32
33    // Bracket atoms store the explicit H count.
34    if let Some(h) = atom.hydrogen_count {
35        return h;
36    }
37
38    // Only the organic subset gets implicit H.
39    if !atom.element.is_organic_subset() {
40        return 0;
41    }
42
43    let normal_valences = atom.element.normal_valences();
44    if normal_valences.is_empty() {
45        return 0;
46    }
47
48    let charge = atom.charge as i32;
49
50    // Separate aromatic bonds from non-aromatic bonds.
51    let mut aromatic_count: usize = 0;
52    let mut non_aromatic_sum: i32 = 0;
53    for (_, bidx) in mol.neighbors(idx) {
54        let order = mol.bond(bidx).order;
55        if order == BondOrder::Aromatic {
56            aromatic_count += 1;
57        } else {
58            non_aromatic_sum += order.order_int() as i32;
59        }
60    }
61
62    if aromatic_count > 0 {
63        // Aromatic molecule (pre-Kekulization): each aromatic bond contributes 1.5
64        // to the effective bond order (OpenSMILES convention).
65        //
66        // floor(1.5 × n) gives the contribution from n aromatic bonds:
67        //   n=2 → 3  benzene CH:   4−3=1H ✓   pyridine N: 3−3=0H ✓
68        //   n=3 → 4  junction C:   4−4=0H ✓
69        //
70        // Combined with non-aromatic substituents (e.g. N−CH₃) this correctly yields
71        // 0 H for all substituted aromatic atoms without needing Kekulization.
72        // Always use the lowest normal valence; aromatic atoms cannot be hypervalent.
73        let effective_sum = (aromatic_count as f64 * 1.5).floor() as i32 + non_aromatic_sum;
74        let v = normal_valences[0] as i32 + charge;
75        if v <= 0 || effective_sum >= v {
76            return 0;
77        }
78        return (v - effective_sum) as u8;
79    }
80
81    // Non-aromatic path (or post-Kekulization molecule where all bonds are explicit).
82    let bond_sum = non_aromatic_sum;
83
84    // For atoms that carry the aromatic flag but reside in a kekulized molecule
85    // (bonds are Single/Double, not Aromatic), use only the lowest normal valence.
86    // Rationale: after Kekulization, a substituted aromatic N (e.g. N−CH₃ in caffeine
87    // with one ring double bond) has bond_sum=4, which would select valence 5 and
88    // give 1 implicit H.  Capping at the primary valence (3) returns 0 H instead.
89    let valences_to_check: &[u8] = if atom.aromatic {
90        &normal_valences[..1]
91    } else {
92        normal_valences
93    };
94
95    // Iterate through valences (ascending) and pick the smallest ≥ bond_sum.
96    for &v in valences_to_check {
97        let target = v as i32 + charge;
98        if target < 0 {
99            continue;
100        }
101        if target >= bond_sum {
102            return (target - bond_sum) as u8;
103        }
104    }
105
106    // bond_sum exceeds all consulted valences → 0 implicit H.
107    0
108}
109
110#[deprecated(since = "0.1.95", note = "use `implicit_hcount` directly — the two functions are identical")]
111/// Alias for [`implicit_hcount`]; kept for API compatibility.
112pub fn total_hcount(mol: &Molecule, idx: AtomIdx) -> u8 {
113    implicit_hcount(mol, idx)
114}
115
116/// Sum of integer bond orders for heavy-atom bonds on `idx`.
117/// Aromatic bonds count as 1 (pre-Kekulization representation).
118pub fn bond_order_sum(mol: &Molecule, idx: AtomIdx) -> u8 {
119    mol.neighbors(idx)
120        .map(|(_, bidx)| mol.bond(bidx).order.order_int())
121        .fold(0u8, |acc, x| acc.saturating_add(x))
122}
123
124/// Returns true if the bond is counted as a "double bond equivalent" in valence sums.
125pub fn is_pi_bond(order: BondOrder) -> bool {
126    matches!(
127        order,
128        BondOrder::Double | BondOrder::Triple | BondOrder::Quadruple
129    )
130}
131
132// ---------------------------------------------------------------------------
133// Valence validation
134// ---------------------------------------------------------------------------
135
136/// A valence violation on a specific atom.
137///
138/// Returned by [`validate_valence`] for each atom whose observed bond-order sum
139/// exceeds all allowed normal valences (after formal-charge adjustment).
140#[derive(Debug, Clone)]
141pub struct ValenceError {
142    /// Index of the over-valenced atom.
143    pub atom: AtomIdx,
144    /// Observed bond-order sum (+ explicit bracket H count).
145    pub actual: u8,
146    /// Allowed normal valences for the element (from [`crate::Element::normal_valences`]).
147    pub allowed: &'static [u8],
148}
149
150impl fmt::Display for ValenceError {
151    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
152        let valences_str = self
153            .allowed
154            .iter()
155            .map(|v| v.to_string())
156            .collect::<Vec<_>>()
157            .join(", ");
158        write!(
159            f,
160            "atom {} has valence {} (allowed: [{}])",
161            self.atom.0, self.actual, valences_str
162        )
163    }
164}
165
166impl std::error::Error for ValenceError {}
167
168/// Check every atom in `mol` for valence violations.
169///
170/// Returns one [`ValenceError`] per over-valenced atom; an empty `Vec` means
171/// all atoms have valid valence.
172///
173/// Atoms without defined normal valences (transition metals, etc.) are skipped.
174/// Formal charge shifts the effective maximum: each unit of positive charge
175/// adds one to the allowed ceiling (e.g. `[NH4+]` with 4 bonds is valid).
176///
177/// Aromatic bonds are counted as 1 each (`order_int()`).  Molecules still
178/// written with `BondOrder::Aromatic` are handled correctly; fully kekulized
179/// molecules are also supported.
180pub fn validate_valence(mol: &Molecule) -> Vec<ValenceError> {
181    let mut errors = Vec::new();
182    for (idx, atom) in mol.atoms() {
183        if atom.wildcard {
184            continue;
185        }
186        let valences = atom.element.normal_valences();
187        if valences.is_empty() {
188            continue;
189        }
190
191        let bos = bond_order_sum(mol, idx);
192        let explicit_h = atom.hydrogen_count.unwrap_or(0);
193        let used = bos.saturating_add(explicit_h);
194        let charge = atom.charge as i16;
195
196        let has_valid = valences.iter().any(|&v| {
197            let effective = (v as i16 + charge).max(0) as u8;
198            effective >= used
199        });
200
201        if !has_valid {
202            errors.push(ValenceError {
203                atom: idx,
204                actual: used,
205                allowed: valences,
206            });
207        }
208    }
209    errors
210}
211
212#[cfg(test)]
213mod tests {
214    use super::*;
215    use crate::atom::Atom;
216    use crate::bond::BondOrder;
217    use crate::element::Element;
218    use crate::molecule::MoleculeBuilder;
219
220    fn single_atom(elem: Element) -> Molecule {
221        let mut b = MoleculeBuilder::new();
222        b.add_atom(Atom::organic(elem));
223        b.build()
224    }
225
226    fn two_atoms(e1: Element, e2: Element, order: BondOrder) -> Molecule {
227        let mut b = MoleculeBuilder::new();
228        let a = b.add_atom(Atom::organic(e1));
229        let c = b.add_atom(Atom::organic(e2));
230        b.add_bond(a, c, order).unwrap();
231        b.build()
232    }
233
234    #[test]
235    fn test_methane() {
236        // C alone: 0 bonds, valence 4 → 4 implicit H
237        let mol = single_atom(Element::C);
238        assert_eq!(implicit_hcount(&mol, AtomIdx(0)), 4);
239    }
240
241    #[test]
242    fn test_ethane_c() {
243        // CC: each C has 1 single bond → valence 4 → 3 implicit H
244        let mol = two_atoms(Element::C, Element::C, BondOrder::Single);
245        assert_eq!(implicit_hcount(&mol, AtomIdx(0)), 3);
246        assert_eq!(implicit_hcount(&mol, AtomIdx(1)), 3);
247    }
248
249    #[test]
250    fn test_ethylene_c() {
251        // C=C: double bond → bond_sum=2 → 4-2=2 implicit H
252        let mol = two_atoms(Element::C, Element::C, BondOrder::Double);
253        assert_eq!(implicit_hcount(&mol, AtomIdx(0)), 2);
254    }
255
256    #[test]
257    fn test_acetylene_c() {
258        // C#C: triple bond → bond_sum=3 → 4-3=1 implicit H
259        let mol = two_atoms(Element::C, Element::C, BondOrder::Triple);
260        assert_eq!(implicit_hcount(&mol, AtomIdx(0)), 1);
261    }
262
263    #[test]
264    fn test_nitrogen_amine() {
265        // N alone: 0 bonds, first normal valence=3 → 3 implicit H (NH3)
266        let mol = single_atom(Element::N);
267        assert_eq!(implicit_hcount(&mol, AtomIdx(0)), 3);
268    }
269
270    #[test]
271    fn test_nitrogen_triple() {
272        // N#C: N has triple bond → bond_sum=3 → 3-3=0 (nitrile N)
273        let mol = two_atoms(Element::N, Element::C, BondOrder::Triple);
274        assert_eq!(implicit_hcount(&mol, AtomIdx(0)), 0);
275    }
276
277    #[test]
278    fn test_oxygen_ether() {
279        // O alone: 0 bonds, valence 2 → 2 implicit H (water)
280        let mol = single_atom(Element::O);
281        assert_eq!(implicit_hcount(&mol, AtomIdx(0)), 2);
282    }
283
284    #[test]
285    fn test_fluorine() {
286        // F alone: valence 1 → 1 implicit H (HF)
287        let mol = single_atom(Element::F);
288        assert_eq!(implicit_hcount(&mol, AtomIdx(0)), 1);
289    }
290
291    #[test]
292    fn test_bracket_atom_explicit_h() {
293        // [NH4+] — bracket atom: explicit H=4 returned directly
294        let mut b = MoleculeBuilder::new();
295        let atom = Atom::bracket(Element::N, None, Default::default(), 4, 1, None);
296        b.add_atom(atom);
297        let mol = b.build();
298        assert_eq!(implicit_hcount(&mol, AtomIdx(0)), 4);
299    }
300
301    #[test]
302    fn test_hypervalent_sulfur() {
303        // S with four single bonds: bond_sum=4, S valences=[2,4,6] → target=4 → 0 H
304        let mut b = MoleculeBuilder::new();
305        let s = b.add_atom(Atom::organic(Element::S));
306        for _ in 0..4 {
307            let c = b.add_atom(Atom::organic(Element::C));
308            b.add_bond(s, c, BondOrder::Single).unwrap();
309        }
310        let mol = b.build();
311        assert_eq!(implicit_hcount(&mol, AtomIdx(0)), 0);
312    }
313
314    // ---------------------------------------------------------------------------
315    // validate_valence tests
316    // ---------------------------------------------------------------------------
317
318    #[test]
319    fn test_validate_valence_valid_molecules() {
320        // All normal molecules should produce no errors.
321        // methane (C, 0 bonds): valid
322        let mol = single_atom(Element::C);
323        assert!(
324            validate_valence(&mol).is_empty(),
325            "isolated C must be valid"
326        );
327
328        // water (O, 0 bonds): valid
329        let mol = single_atom(Element::O);
330        assert!(
331            validate_valence(&mol).is_empty(),
332            "isolated O must be valid"
333        );
334
335        // ethane (C–C): C has bond_sum=1, max valence 4 → valid
336        let mol = two_atoms(Element::C, Element::C, BondOrder::Single);
337        assert!(validate_valence(&mol).is_empty(), "ethane must be valid");
338
339        // formaldehyde (C=O): C bond_sum=2, O bond_sum=2 → both valid
340        let mol = two_atoms(Element::C, Element::O, BondOrder::Double);
341        assert!(
342            validate_valence(&mol).is_empty(),
343            "formaldehyde must be valid"
344        );
345    }
346
347    #[test]
348    fn test_validate_valence_pentavalent_carbon() {
349        // C with 5 single bonds: bond_sum=5 > max(C valences)=4 → error
350        let mut b = MoleculeBuilder::new();
351        let c = b.add_atom(Atom::organic(Element::C));
352        for _ in 0..5 {
353            let h = b.add_atom(Atom::new(Element::C));
354            b.add_bond(c, h, BondOrder::Single).unwrap();
355        }
356        let mol = b.build();
357        let errors = validate_valence(&mol);
358        assert_eq!(
359            errors.len(),
360            1,
361            "C with 5 bonds must produce exactly 1 error"
362        );
363        assert_eq!(errors[0].atom, AtomIdx(0));
364        assert_eq!(errors[0].actual, 5);
365    }
366
367    #[test]
368    fn test_validate_valence_trivalent_oxygen() {
369        // O with 3 single bonds: bond_sum=3 > max(O valences)=2 → error
370        let mut b = MoleculeBuilder::new();
371        let o = b.add_atom(Atom::organic(Element::O));
372        for _ in 0..3 {
373            let c = b.add_atom(Atom::organic(Element::C));
374            b.add_bond(o, c, BondOrder::Single).unwrap();
375        }
376        let mol = b.build();
377        let errors = validate_valence(&mol);
378        assert!(
379            !errors.is_empty(),
380            "O with 3 bonds must be flagged as over-valenced"
381        );
382        assert_eq!(errors[0].atom, AtomIdx(0));
383    }
384
385    #[test]
386    fn test_validate_valence_ammonium_valid() {
387        // [NH4+]: N with charge +1 and 4 bonds: effective max = 3+1=4 → valid
388        let mut b = MoleculeBuilder::new();
389        let mut n_atom = Atom::organic(Element::N);
390        n_atom.charge = 1;
391        let n = b.add_atom(n_atom);
392        for _ in 0..4 {
393            let c = b.add_atom(Atom::organic(Element::C));
394            b.add_bond(n, c, BondOrder::Single).unwrap();
395        }
396        let mol = b.build();
397        assert!(
398            validate_valence(&mol).is_empty(),
399            "N+ with 4 bonds must be valid (ammonium-like)"
400        );
401    }
402
403    #[test]
404    fn test_validate_valence_transition_metal_skipped() {
405        // Fe has no normal_valences → always valid regardless of bonds
406        let mut b = MoleculeBuilder::new();
407        let fe = b.add_atom(Atom::new(Element::FE));
408        for _ in 0..6 {
409            let c = b.add_atom(Atom::organic(Element::C));
410            b.add_bond(fe, c, BondOrder::Single).unwrap();
411        }
412        let mol = b.build();
413        assert!(
414            validate_valence(&mol).is_empty(),
415            "Fe with 6 bonds must be skipped"
416        );
417    }
418}