Skip to main content

molprint_core/mol/
graph.rs

1use super::atom::Atom;
2use super::bond::BondType;
3use petgraph::graph::{NodeIndex, UnGraph};
4
5/// The core molecular graph type.
6pub type MolGraph = UnGraph<Atom, BondType>;
7
8/// Extension trait adding chemistry-aware methods to MolGraph.
9pub trait MolGraphExt {
10    /// Number of atoms (heavy atoms only, no implicit hydrogens).
11    fn num_atoms(&self) -> usize;
12    /// Number of bonds in the graph.
13    fn num_bonds(&self) -> usize;
14    /// Access atom data by node index.
15    fn atom(&self, idx: NodeIndex) -> &Atom;
16    /// Mutably access atom data by node index.
17    fn atom_mut(&mut self, idx: NodeIndex) -> &mut Atom;
18    /// Bond type between two atoms, or `None` if not bonded.
19    fn bond_between(&self, a: NodeIndex, b: NodeIndex) -> Option<BondType>;
20    /// All non-hydrogen neighbor indices.
21    fn heavy_neighbors(&self, idx: NodeIndex) -> Vec<NodeIndex>;
22    /// Number of neighbors (graph degree).
23    fn degree(&self, idx: NodeIndex) -> usize;
24
25    /// Compute implicit H count for an atom.
26    /// - If explicit_h is set (bracket atom with H spec), return that.
27    /// - For bracket atoms without H spec (explicit_h is None AND atom is bracket),
28    ///   we return 0 — caller decides based on context.
29    /// - For organic subset atoms, compute from default valences:
30    ///   implicit_H = min_valence_geq_bond_sum - bond_sum
31    fn compute_implicit_h(&self, idx: NodeIndex) -> u8;
32
33    /// Set h_count on all atoms. Call after graph is fully constructed.
34    fn assign_implicit_hydrogens(&mut self);
35}
36
37impl MolGraphExt for MolGraph {
38    fn num_atoms(&self) -> usize {
39        self.node_count()
40    }
41
42    fn num_bonds(&self) -> usize {
43        self.edge_count()
44    }
45
46    fn atom(&self, idx: NodeIndex) -> &Atom {
47        &self[idx]
48    }
49
50    fn atom_mut(&mut self, idx: NodeIndex) -> &mut Atom {
51        &mut self[idx]
52    }
53
54    fn bond_between(&self, a: NodeIndex, b: NodeIndex) -> Option<BondType> {
55        self.find_edge(a, b).map(|e| self[e])
56    }
57
58    fn heavy_neighbors(&self, idx: NodeIndex) -> Vec<NodeIndex> {
59        self.neighbors(idx).collect()
60    }
61
62    fn degree(&self, idx: NodeIndex) -> usize {
63        self.neighbors(idx).count()
64    }
65
66    fn compute_implicit_h(&self, idx: NodeIndex) -> u8 {
67        let atom = &self[idx];
68
69        // Bracket atoms with explicit H count
70        if let Some(h) = atom.explicit_h {
71            return h;
72        }
73
74        // Bracket atoms without H spec → 0 (e.g., [Fe], [C])
75        // We detect bracket vs organic by whether explicit_h was ever set.
76        // But we can't distinguish here without more context stored on the atom.
77        // Convention: if element is NOT in organic subset, return 0.
78        if !atom.element.is_organic_subset() {
79            return 0;
80        }
81
82        // Sum bond orders for the atom
83        let bond_sum: u8 = self
84            .edges(idx)
85            .map(|e| e.weight().valence_contribution())
86            .sum();
87
88        let valences = atom.element.default_valences();
89        if valences.is_empty() || (valences.len() == 1 && valences[0] == 0) {
90            return 0;
91        }
92
93        if atom.aromatic {
94            // For aromatic atoms, the pi system contributes 1 to the effective bond sum.
95            // However, if the atom's normal valence is already exactly satisfied by its bonds
96            // (e.g., aromatic N bonded to methyl + 2 ring atoms = 3 bonds, valence 3),
97            // do NOT add the +1 or we'd incorrectly jump to a higher valence (e.g., N valence 5).
98            for &v in valences {
99                if v >= bond_sum {
100                    let h_raw = v.saturating_sub(bond_sum);
101                    let charge_adj = atom.charge.unsigned_abs();
102                    if h_raw == 0 {
103                        // All valence bonds used; no pi-electron adjustment needed.
104                        return 0_u8.saturating_sub(charge_adj);
105                    }
106                    // Atom still has available valence: apply the +1 aromatic pi adjustment.
107                    let effective_sum = bond_sum + 1;
108                    for &v2 in valences {
109                        if v2 >= effective_sum {
110                            return v2.saturating_sub(effective_sum).saturating_sub(charge_adj);
111                        }
112                    }
113                    return 0;
114                }
115            }
116            return 0;
117        }
118
119        // Non-aromatic: find smallest valence >= bond_sum
120        for &v in valences {
121            if v >= bond_sum {
122                let charge_adj = atom.charge.unsigned_abs();
123                return v.saturating_sub(bond_sum).saturating_sub(charge_adj);
124            }
125        }
126
127        0
128    }
129
130    fn assign_implicit_hydrogens(&mut self) {
131        let indices: Vec<NodeIndex> = self.node_indices().collect();
132        for idx in indices {
133            if self[idx].element == super::atom::Element::H {
134                continue; // skip H/D atoms themselves
135            }
136            let h_implicit = self.compute_implicit_h(idx);
137            // Count explicit H/D neighbor atoms in the graph (e.g. [2H] deuterium)
138            let h_explicit: u8 = self
139                .neighbors(idx)
140                .filter(|&nb| self[nb].element == super::atom::Element::H)
141                .count()
142                .min(255) as u8;
143            self[idx].h_count = h_implicit + h_explicit;
144        }
145    }
146}
147
148#[cfg(test)]
149mod tests {
150    use super::super::atom::Element;
151    use super::*;
152
153    fn make_ethanol() -> MolGraph {
154        let mut g = MolGraph::new_undirected();
155        let c1 = g.add_node(Atom::new(Element::C));
156        let c2 = g.add_node(Atom::new(Element::C));
157        let o = g.add_node(Atom::new(Element::O));
158        g.add_edge(c1, c2, BondType::Single);
159        g.add_edge(c2, o, BondType::Single);
160        g.assign_implicit_hydrogens();
161        g
162    }
163
164    #[test]
165    fn test_ethanol_structure() {
166        let g = make_ethanol();
167        assert_eq!(g.num_atoms(), 3);
168        assert_eq!(g.num_bonds(), 2);
169    }
170
171    #[test]
172    fn test_ethanol_implicit_h() {
173        let g = make_ethanol();
174        // C-C-O: C1 has 1 heavy neighbor → 3H, C2 has 2 → 2H, O has 1 → 1H
175        assert_eq!(g[NodeIndex::new(0)].h_count, 3);
176        assert_eq!(g[NodeIndex::new(1)].h_count, 2);
177        assert_eq!(g[NodeIndex::new(2)].h_count, 1);
178    }
179
180    #[test]
181    fn test_benzene_implicit_h() {
182        let mut g = MolGraph::new_undirected();
183        let atoms: Vec<NodeIndex> = (0..6)
184            .map(|_| {
185                let mut a = Atom::new(Element::C);
186                a.aromatic = true;
187                g.add_node(a)
188            })
189            .collect();
190        for i in 0..5 {
191            g.add_edge(atoms[i], atoms[i + 1], BondType::Aromatic);
192        }
193        g.add_edge(atoms[5], atoms[0], BondType::Aromatic);
194        g.assign_implicit_hydrogens();
195        // Each aromatic C: bond_sum=2 (two aromatic bonds, each contrib 1),
196        // effective_sum = 2+1 = 3, valence 4, h = 4-3 = 1
197        for &a in &atoms {
198            assert_eq!(g[a].h_count, 1, "aromatic C should have 1H");
199        }
200    }
201
202    #[test]
203    fn test_heavy_neighbors() {
204        let g = make_ethanol();
205        let c2 = NodeIndex::new(1);
206        let neighbors = g.heavy_neighbors(c2);
207        assert_eq!(neighbors.len(), 2);
208    }
209}