Skip to main content

sci_form/
graph.rs

1//! Molecular graph representation backed by `petgraph`.
2//!
3//! Defines [`Atom`], [`Bond`], and [`Molecule`] — the core data model consumed by
4//! conformer generation, force fields, electronic methods, and all property modules.
5//! Atoms carry hybridization, chirality, and aromaticity; bonds carry order and stereo.
6
7use nalgebra::Vector3;
8pub use petgraph::graph::NodeIndex;
9use petgraph::graph::UnGraph;
10use serde::{Deserialize, Serialize};
11
12#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq)]
13/// Hybridization state of an atom, used to compute bond angles and geometry.
14pub enum Hybridization {
15    SP,
16    SP2,
17    SP3,
18    SP3D,
19    SP3D2,
20    Unknown,
21}
22
23#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
24/// Tetrahedral chirality type derived from the SMILES `@`/`@@` specification.
25pub enum ChiralType {
26    Unspecified,
27    TetrahedralCW,  // R usually
28    TetrahedralCCW, // S usually
29    Other,
30}
31
32#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq)]
33/// E/Z (cis/trans) stereochemistry of a double bond.
34pub enum BondStereo {
35    None,
36    E,
37    Z,
38    Any,
39}
40
41#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq)]
42/// Bond order: single, double, triple, aromatic, or unknown.
43pub enum BondOrder {
44    Single,
45    Double,
46    Triple,
47    Aromatic,
48    Unknown,
49}
50
51#[derive(Clone, Debug)]
52/// A single atom node in the molecular graph.
53pub struct Atom {
54    pub element: u8,
55    pub position: Vector3<f32>,
56    pub charge: f32,
57    pub formal_charge: i8,
58    pub hybridization: Hybridization,
59    pub chiral_tag: ChiralType,
60    pub explicit_h: u8,
61}
62
63#[derive(Clone, Debug)]
64/// A bond edge in the molecular graph.
65pub struct Bond {
66    pub order: BondOrder,
67    pub stereo: BondStereo,
68}
69
70impl Atom {
71    /// Construct a new atom with the given atomic number at the given 3D position.
72    /// Hybridization defaults to `Unknown` and is assigned during SMILES parsing.
73    pub fn new(atomic_number: u8, x: f32, y: f32, z: f32) -> Self {
74        Atom {
75            element: atomic_number,
76            position: Vector3::new(x, y, z),
77            charge: 0.0,
78            formal_charge: 0,
79            hybridization: Hybridization::Unknown,
80            chiral_tag: ChiralType::Unspecified,
81            explicit_h: 0,
82        }
83    }
84}
85
86// Representing Molecule utilizing petgraph
87/// Molecular graph: atoms as nodes, bonds as edges.
88/// Constructed from a SMILES string via [`Molecule::from_smiles`].
89pub struct Molecule {
90    pub graph: UnGraph<Atom, Bond>,
91    pub name: String,
92}
93
94impl Molecule {
95    /// Create an empty molecule with the given name.
96    pub fn new(name: &str) -> Self {
97        Self {
98            graph: UnGraph::new_undirected(),
99            name: name.to_string(),
100        }
101    }
102
103    /// Parse a SMILES string into a molecule.
104    ///
105    /// Automatically adds implicit hydrogens, assigns hybridization, detects
106    /// ring membership (SSSR), and retains only the largest fragment if the
107    /// SMILES contains multiple disconnected components (e.g. salts).
108    pub fn from_smiles(smiles: &str) -> Result<Self, String> {
109        let mut mol = Self::new(smiles);
110        let mut parser = crate::smiles::SmilesParser::new(smiles, &mut mol);
111        parser.parse()?;
112        // If molecule has disconnected fragments (salts etc.), keep only the largest
113        let components = petgraph::algo::connected_components(&mol.graph);
114        if components > 1 {
115            mol = mol.largest_fragment();
116        }
117        Ok(mol)
118    }
119
120    /// Returns a new Molecule containing only the largest connected component.
121    fn largest_fragment(self) -> Molecule {
122        use petgraph::visit::EdgeRef;
123        let n = self.graph.node_count();
124        // BFS to find connected components
125        let mut component = vec![usize::MAX; n];
126        let mut comp_sizes = Vec::new();
127        let mut comp_id = 0;
128        for start in 0..n {
129            if component[start] != usize::MAX {
130                continue;
131            }
132            let mut size = 0;
133            let mut queue = std::collections::VecDeque::new();
134            queue.push_back(start);
135            component[start] = comp_id;
136            while let Some(cur) = queue.pop_front() {
137                size += 1;
138                for nb in self.graph.neighbors(NodeIndex::new(cur)) {
139                    if component[nb.index()] == usize::MAX {
140                        component[nb.index()] = comp_id;
141                        queue.push_back(nb.index());
142                    }
143                }
144            }
145            comp_sizes.push(size);
146            comp_id += 1;
147        }
148        let best_comp = comp_sizes
149            .iter()
150            .enumerate()
151            .max_by_key(|(_, &s)| s)
152            .map(|(i, _)| i)
153            .unwrap_or(0);
154
155        // Build new molecule with only atoms from the largest component
156        let mut new_mol = Molecule::new(&self.name);
157        let mut old_to_new = vec![None; n];
158        for old_idx in 0..n {
159            if component[old_idx] == best_comp {
160                let atom = self.graph[NodeIndex::new(old_idx)].clone();
161                let new_idx = new_mol.graph.add_node(atom);
162                old_to_new[old_idx] = Some(new_idx);
163            }
164        }
165        for edge in self.graph.edge_references() {
166            let a = edge.source().index();
167            let b = edge.target().index();
168            if let (Some(na), Some(nb)) = (old_to_new[a], old_to_new[b]) {
169                new_mol.graph.add_edge(na, nb, edge.weight().clone());
170            }
171        }
172        new_mol
173    }
174
175    /// Add an atom node to the graph, returning its index.
176    pub fn add_atom(&mut self, atom: Atom) -> NodeIndex {
177        self.graph.add_node(atom)
178    }
179
180    /// Add a bond edge between two atom indices.
181    pub fn add_bond(&mut self, a: NodeIndex, b: NodeIndex, bond: Bond) {
182        self.graph.add_edge(a, b, bond);
183    }
184}
185
186// =============== ATOMIC CONSTANTS ==================
187
188/// Returns the covalent radius (in Å) for a given atomic number
189/// Data derived from RDKit's atomic_data.cpp
190pub fn get_covalent_radius(atomic_number: u8) -> f64 {
191    match atomic_number {
192        1 => 0.31,  // H
193        2 => 0.28,  // He
194        3 => 1.28,  // Li
195        4 => 0.96,  // Be
196        5 => 0.84,  // B
197        6 => 0.76,  // C
198        7 => 0.71,  // N
199        8 => 0.66,  // O
200        9 => 0.57,  // F
201        10 => 0.58, // Ne
202        11 => 1.66, // Na
203        12 => 1.41, // Mg
204        13 => 1.21, // Al
205        14 => 1.11, // Si
206        15 => 1.07, // P
207        16 => 1.05, // S
208        17 => 1.02, // Cl
209        18 => 1.06, // Ar
210        19 => 1.96, // K
211        20 => 1.71, // Ca
212        21 => 1.48, // Sc
213        22 => 1.36, // Ti
214        23 => 1.34, // V
215        24 => 1.22, // Cr
216        25 => 1.19, // Mn
217        26 => 1.16, // Fe
218        27 => 1.11, // Co
219        28 => 1.10, // Ni
220        29 => 1.12, // Cu
221        30 => 1.18, // Zn
222        31 => 1.24, // Ga
223        32 => 1.21, // Ge
224        33 => 1.21, // As
225        34 => 1.16, // Se
226        35 => 1.20, // Br
227        36 => 1.16, // Kr
228        37 => 2.10, // Rb
229        38 => 1.85, // Sr
230        39 => 1.63, // Y
231        40 => 1.54, // Zr
232        41 => 1.47, // Nb
233        42 => 1.38, // Mo
234        43 => 1.28, // Tc
235        44 => 1.25, // Ru
236        45 => 1.25, // Rh
237        46 => 1.20, // Pd
238        47 => 1.28, // Ag
239        48 => 1.36, // Cd
240        49 => 1.42, // In
241        50 => 1.40, // Sn
242        51 => 1.40, // Sb
243        52 => 1.36, // Te
244        53 => 1.39, // I
245        54 => 1.40, // Xe
246        55 => 2.32, // Cs
247        56 => 1.96, // Ba
248        72 => 1.50, // Hf
249        73 => 1.38, // Ta
250        74 => 1.37, // W
251        75 => 1.35, // Re
252        76 => 1.26, // Os
253        77 => 1.27, // Ir
254        78 => 1.23, // Pt
255        79 => 1.24, // Au
256        80 => 1.33, // Hg
257        81 => 1.44, // Tl
258        82 => 1.46, // Pb
259        83 => 1.48, // Bi
260        84 => 1.46, // Po
261        85 => 1.45, // At
262        86 => 1.50, // Rn
263        _ => 0.76,  // Fallback to Carbon's radius if unknown mostly
264    }
265}
266
267/// Returns the Van der Waals radius (in Å)
268pub fn get_vdw_radius(atomic_number: u8) -> f64 {
269    // RDKit's PeriodicTable::getRvdw values
270    match atomic_number {
271        1 => 1.20,  // H
272        2 => 1.40,  // He
273        3 => 1.82,  // Li
274        4 => 1.53,  // Be
275        5 => 1.92,  // B
276        6 => 1.70,  // C
277        7 => 1.60,  // N (was 1.55)
278        8 => 1.55,  // O (was 1.52)
279        9 => 1.50,  // F (was 1.47)
280        10 => 1.54, // Ne
281        11 => 2.27, // Na
282        12 => 1.73, // Mg
283        14 => 2.10, // Si
284        15 => 1.80, // P
285        16 => 1.80, // S
286        17 => 1.80, // Cl (was 1.75)
287        18 => 1.88, // Ar
288        35 => 1.90, // Br (was 1.85)
289        53 => 2.10, // I (was 1.98)
290        _ => 2.0,
291    }
292}
293
294/// Returns the ideal bond angle (in radians) for a given hybridization
295pub fn get_ideal_angle(hybridization: &Hybridization) -> f64 {
296    match hybridization {
297        Hybridization::SP => std::f64::consts::PI, // 180 degrees
298        Hybridization::SP2 => 2.0 * std::f64::consts::PI / 3.0, // 120 degrees
299        Hybridization::SP3 => 109.5 * std::f64::consts::PI / 180.0, // 109.5 degrees
300        Hybridization::SP3D => 105.0 * std::f64::consts::PI / 180.0, // 105 degrees (approx)
301        Hybridization::SP3D2 => 90.0 * std::f64::consts::PI / 180.0, // 90 degrees
302        Hybridization::Unknown => 109.5 * std::f64::consts::PI / 180.0, // Fallback to tetrahedral
303    }
304}
305
306/// BFS shortest path (in bonds) from `start` to `target` in the molecular graph,
307/// excluding the node `exclude`. Used to detect ring membership and ring sizes.
308/// Returns `None` if no path exists within `limit` bonds.
309pub fn min_path_excluding(
310    mol: &Molecule,
311    start: petgraph::graph::NodeIndex,
312    target: petgraph::graph::NodeIndex,
313    exclude: petgraph::graph::NodeIndex,
314    limit: usize,
315) -> Option<usize> {
316    if mol.graph.contains_edge(start, target) {
317        return Some(1);
318    }
319    let mut queue = std::collections::VecDeque::new();
320    let mut visited = std::collections::HashSet::new();
321    queue.push_back((start, 0));
322    visited.insert(start);
323
324    while let Some((curr, dist)) = queue.pop_front() {
325        if dist >= limit {
326            continue;
327        }
328        for n in mol.graph.neighbors(curr) {
329            if n == target {
330                return Some(dist + 1);
331            }
332            if n != exclude && !visited.contains(&n) {
333                visited.insert(n);
334                queue.push_back((n, dist + 1));
335            }
336        }
337    }
338    None
339}
340
341/// BFS shortest path from start to target, excluding two intermediate nodes.
342pub fn min_path_excluding2(
343    mol: &Molecule,
344    start: petgraph::graph::NodeIndex,
345    target: petgraph::graph::NodeIndex,
346    excl1: petgraph::graph::NodeIndex,
347    excl2: petgraph::graph::NodeIndex,
348    limit: usize,
349) -> Option<usize> {
350    let mut queue = std::collections::VecDeque::new();
351    let mut visited = std::collections::HashSet::new();
352    visited.insert(excl1);
353    visited.insert(excl2);
354    visited.insert(start);
355
356    // Always start from neighbors, skipping target to avoid counting the direct bond
357    for n in mol.graph.neighbors(start) {
358        if n == target {
359            continue; // Skip direct bond — we want an alternative path
360        }
361        if !visited.contains(&n) {
362            visited.insert(n);
363            queue.push_back((n, 1usize));
364        }
365    }
366
367    while let Some((curr, dist)) = queue.pop_front() {
368        if dist >= limit {
369            continue;
370        }
371        for n in mol.graph.neighbors(curr) {
372            if n == target {
373                return Some(dist + 1);
374            }
375            if !visited.contains(&n) {
376                visited.insert(n);
377                queue.push_back((n, dist + 1));
378            }
379        }
380    }
381    None
382}
383
384/// Check if an atom is in any ring (has an alternative path of length ≤ 8 through any neighbor).
385pub fn atom_in_ring(mol: &Molecule, atom: petgraph::graph::NodeIndex) -> bool {
386    for nb in mol.graph.neighbors(atom) {
387        if min_path_excluding(mol, nb, atom, atom, 8).is_some() {
388            return true;
389        }
390    }
391    false
392}
393
394/// Check if a bond (edge between a and b) is in a ring of exactly the given size.
395pub fn bond_in_ring_of_size(
396    mol: &Molecule,
397    a: petgraph::graph::NodeIndex,
398    b: petgraph::graph::NodeIndex,
399    ring_size: usize,
400) -> bool {
401    // min_path_excluding2 skips the direct a-b bond and finds the shortest alternative path.
402    // For a ring of size `ring_size`, the alternative path has ring_size - 1 edges.
403    min_path_excluding2(mol, a, b, a, a, ring_size).is_some_and(|d| d == ring_size - 1)
404}
405
406/// Returns the ideal bond angle accounting for rings topological constraints
407/// Helper: compute the ring interior angle for a given hybridization and ring size.
408/// Matches RDKit's _setRingAngle logic.
409fn ring_angle_for(ahyb: &Hybridization, ring_size: usize) -> f64 {
410    use std::f64::consts::PI;
411    if ring_size == 3 || ring_size == 4 || (*ahyb == Hybridization::SP2 && ring_size <= 8) {
412        PI * (1.0 - 2.0 / ring_size as f64)
413    } else if *ahyb == Hybridization::SP3 {
414        if ring_size == 5 {
415            104.0 * PI / 180.0
416        } else {
417            109.5 * PI / 180.0
418        }
419    } else if *ahyb == Hybridization::SP3D {
420        105.0 * PI / 180.0
421    } else {
422        // RDKit's _setRingAngle defaults to 120° for unhandled hybridizations (SP, SP3D2, etc.)
423        120.0 * PI / 180.0
424    }
425}
426
427/// Returns the ideal 1–2–3 bond angle (radians) between atoms `n1`–`center`–`n2`,
428/// accounting for ring strain in small rings (3-, 4-, 5-membered).
429pub fn get_corrected_ideal_angle(
430    mol: &Molecule,
431    center: petgraph::graph::NodeIndex,
432    n1: petgraph::graph::NodeIndex,
433    n2: petgraph::graph::NodeIndex,
434) -> f64 {
435    use std::f64::consts::PI;
436    let ahyb = &mol.graph[center].hybridization;
437
438    // Check if n1 and n2 are in a ring through center
439    let dist = min_path_excluding(mol, n1, n2, center, 8);
440
441    match dist {
442        Some(1) => return 60.0 * PI / 180.0, // 3-ring
443        Some(2) => return 90.0 * PI / 180.0, // 4-ring
444        _ => {}
445    }
446
447    // RDKit special case: exocyclic angles at SP3 atoms in small rings.
448    // When n1-n2 are NOT in a 3- or 4-ring through center, but center IS in a 3- or 4-ring,
449    // the exocyclic angle is widened: 116° for 3-ring, 112° for 4-ring.
450    // dist > 2 means the pair is not directly in any 3-ring (dist=1) or 4-ring (dist=2),
451    // so it's either in a larger ring envelope (fused system) or truly exocyclic.
452    if *ahyb == Hybridization::SP3 && dist.is_none_or(|d| d > 2) {
453        let nbs: Vec<_> = mol.graph.neighbors(center).collect();
454        let mut smallest_ring = 0usize;
455        for i in 0..nbs.len() {
456            for j in (i + 1)..nbs.len() {
457                if let Some(p) = min_path_excluding(mol, nbs[i], nbs[j], center, 8) {
458                    let rs = p + 2;
459                    if rs == 3 {
460                        smallest_ring = if smallest_ring == 0 {
461                            3
462                        } else {
463                            smallest_ring.min(3)
464                        };
465                    } else if rs == 4 && smallest_ring != 3 {
466                        smallest_ring = if smallest_ring == 0 {
467                            4
468                        } else {
469                            smallest_ring.min(4)
470                        };
471                    }
472                }
473            }
474        }
475        if smallest_ring == 3 {
476            return 116.0 * PI / 180.0;
477        } else if smallest_ring == 4 {
478            return 112.0 * PI / 180.0;
479        }
480    }
481
482    // For SP2 atoms with exactly 3 neighbors: use RDKit's approach of
483    // distributing 360° among all neighbor pairs based on ring membership.
484    // angleTaken = sum of ring angles, exo = (2π - angleTaken) / remaining_pairs
485    if *ahyb == Hybridization::SP2 {
486        let nbs: Vec<_> = mol.graph.neighbors(center).collect();
487        if nbs.len() == 3 {
488            // Find ring size for each pair
489            let pairs = [(0, 1, 2), (0, 2, 1), (1, 2, 0)];
490            let mut ring_angles = [0.0f64; 3]; // angles for pairs (0,1), (0,2), (1,2)
491            let mut in_ring = [false; 3];
492
493            for (idx, &(a, b, _)) in pairs.iter().enumerate() {
494                let d = min_path_excluding(mol, nbs[a], nbs[b], center, 8);
495                if let Some(p) = d {
496                    let rs = p + 2;
497                    if rs <= 8 {
498                        ring_angles[idx] = ring_angle_for(ahyb, rs);
499                        in_ring[idx] = true;
500                    }
501                }
502            }
503
504            let ring_count = in_ring.iter().filter(|&&x| x).count();
505            // Identify which pair index corresponds to (n1, n2)
506            let target_idx = if (nbs[0] == n1 && nbs[1] == n2) || (nbs[1] == n1 && nbs[0] == n2) {
507                0
508            } else if (nbs[0] == n1 && nbs[2] == n2) || (nbs[2] == n1 && nbs[0] == n2) {
509                1
510            } else {
511                2
512            };
513
514            if in_ring[target_idx] && ring_count <= 1 {
515                // Only this pair in a ring — return ring angle directly
516                return ring_angles[target_idx];
517            } else if !in_ring[target_idx] && ring_count > 0 {
518                // This pair is NOT in a ring, but other pairs are:
519                // exo angle = (2π - sum_of_ring_angles) / non_ring_pairs
520                let angle_taken: f64 = ring_angles.iter().sum();
521                let non_ring = 3 - ring_count;
522                if non_ring > 0 {
523                    let exo = (2.0 * PI - angle_taken) / non_ring as f64;
524                    if exo > 0.0 {
525                        return exo;
526                    }
527                }
528            } else if ring_count >= 2 && in_ring[target_idx] {
529                // This pair AND other pairs are in rings.
530                // For fused junctions: check if this pair's "ring" is
531                // actually the fused envelope. If the other two pairs
532                // account for the full angle, use ring angles directly.
533                let other_angle: f64 = (0..3)
534                    .filter(|&i| i != target_idx && in_ring[i])
535                    .map(|i| ring_angles[i])
536                    .sum();
537                let this_ring_expected = 2.0 * PI - other_angle;
538                // If this pair's ring angle differs significantly from expected exo,
539                // it's a fused envelope — use exo angle
540                if (ring_angles[target_idx] - this_ring_expected).abs() > 0.05 {
541                    return this_ring_expected;
542                }
543                return ring_angles[target_idx];
544            }
545        }
546    }
547
548    // Non-SP2 or fallback: use ring angle if in a ring
549    if let Some(ring_path) = dist {
550        let ring_size = ring_path + 2;
551        if ring_size <= 8 {
552            return ring_angle_for(ahyb, ring_size);
553        }
554    }
555
556    get_ideal_angle(ahyb)
557}