Skip to main content

sci_form/
graph.rs

1use nalgebra::Vector3;
2pub use petgraph::graph::NodeIndex;
3use petgraph::graph::UnGraph;
4use serde::{Deserialize, Serialize};
5
6#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq)]
7/// Hybridization state of an atom, used to compute bond angles and geometry.
8pub enum Hybridization {
9    SP,
10    SP2,
11    SP3,
12    SP3D,
13    SP3D2,
14    Unknown,
15}
16
17#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
18/// Tetrahedral chirality type derived from the SMILES `@`/`@@` specification.
19pub enum ChiralType {
20    Unspecified,
21    TetrahedralCW,  // R usually
22    TetrahedralCCW, // S usually
23    Other,
24}
25
26#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq)]
27/// E/Z (cis/trans) stereochemistry of a double bond.
28pub enum BondStereo {
29    None,
30    E,
31    Z,
32    Any,
33}
34
35#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq)]
36/// Bond order: single, double, triple, aromatic, or unknown.
37pub enum BondOrder {
38    Single,
39    Double,
40    Triple,
41    Aromatic,
42    Unknown,
43}
44
45#[derive(Clone, Debug)]
46/// A single atom node in the molecular graph.
47pub struct Atom {
48    pub element: u8,
49    pub position: Vector3<f32>,
50    pub charge: f32,
51    pub formal_charge: i8,
52    pub hybridization: Hybridization,
53    pub chiral_tag: ChiralType,
54    pub explicit_h: u8,
55}
56
57#[derive(Clone, Debug)]
58/// A bond edge in the molecular graph.
59pub struct Bond {
60    pub order: BondOrder,
61    pub stereo: BondStereo,
62}
63
64impl Atom {
65    /// Construct a new atom with the given atomic number at the given 3D position.
66    /// Hybridization defaults to `Unknown` and is assigned during SMILES parsing.
67    pub fn new(atomic_number: u8, x: f32, y: f32, z: f32) -> Self {
68        Atom {
69            element: atomic_number,
70            position: Vector3::new(x, y, z),
71            charge: 0.0,
72            formal_charge: 0,
73            hybridization: Hybridization::Unknown,
74            chiral_tag: ChiralType::Unspecified,
75            explicit_h: 0,
76        }
77    }
78}
79
80// Representing Molecule utilizing petgraph
81/// Molecular graph: atoms as nodes, bonds as edges.
82/// Constructed from a SMILES string via [`Molecule::from_smiles`].
83pub struct Molecule {
84    pub graph: UnGraph<Atom, Bond>,
85    pub name: String,
86}
87
88impl Molecule {
89    /// Create an empty molecule with the given name.
90    pub fn new(name: &str) -> Self {
91        Self {
92            graph: UnGraph::new_undirected(),
93            name: name.to_string(),
94        }
95    }
96
97    /// Parse a SMILES string into a molecule.
98    ///
99    /// Automatically adds implicit hydrogens, assigns hybridization, detects
100    /// ring membership (SSSR), and retains only the largest fragment if the
101    /// SMILES contains multiple disconnected components (e.g. salts).
102    pub fn from_smiles(smiles: &str) -> Result<Self, String> {
103        let mut mol = Self::new(smiles);
104        let mut parser = crate::smiles::SmilesParser::new(smiles, &mut mol);
105        parser.parse()?;
106        // If molecule has disconnected fragments (salts etc.), keep only the largest
107        let components = petgraph::algo::connected_components(&mol.graph);
108        if components > 1 {
109            mol = mol.largest_fragment();
110        }
111        Ok(mol)
112    }
113
114    /// Returns a new Molecule containing only the largest connected component.
115    fn largest_fragment(self) -> Molecule {
116        use petgraph::visit::EdgeRef;
117        let n = self.graph.node_count();
118        // BFS to find connected components
119        let mut component = vec![usize::MAX; n];
120        let mut comp_sizes = Vec::new();
121        let mut comp_id = 0;
122        for start in 0..n {
123            if component[start] != usize::MAX {
124                continue;
125            }
126            let mut size = 0;
127            let mut queue = std::collections::VecDeque::new();
128            queue.push_back(start);
129            component[start] = comp_id;
130            while let Some(cur) = queue.pop_front() {
131                size += 1;
132                for nb in self.graph.neighbors(NodeIndex::new(cur)) {
133                    if component[nb.index()] == usize::MAX {
134                        component[nb.index()] = comp_id;
135                        queue.push_back(nb.index());
136                    }
137                }
138            }
139            comp_sizes.push(size);
140            comp_id += 1;
141        }
142        let best_comp = comp_sizes
143            .iter()
144            .enumerate()
145            .max_by_key(|(_, &s)| s)
146            .map(|(i, _)| i)
147            .unwrap_or(0);
148
149        // Build new molecule with only atoms from the largest component
150        let mut new_mol = Molecule::new(&self.name);
151        let mut old_to_new = vec![None; n];
152        for old_idx in 0..n {
153            if component[old_idx] == best_comp {
154                let atom = self.graph[NodeIndex::new(old_idx)].clone();
155                let new_idx = new_mol.graph.add_node(atom);
156                old_to_new[old_idx] = Some(new_idx);
157            }
158        }
159        for edge in self.graph.edge_references() {
160            let a = edge.source().index();
161            let b = edge.target().index();
162            if let (Some(na), Some(nb)) = (old_to_new[a], old_to_new[b]) {
163                new_mol.graph.add_edge(na, nb, edge.weight().clone());
164            }
165        }
166        new_mol
167    }
168
169    /// Add an atom node to the graph, returning its index.
170    pub fn add_atom(&mut self, atom: Atom) -> NodeIndex {
171        self.graph.add_node(atom)
172    }
173
174    /// Add a bond edge between two atom indices.
175    pub fn add_bond(&mut self, a: NodeIndex, b: NodeIndex, bond: Bond) {
176        self.graph.add_edge(a, b, bond);
177    }
178}
179
180// =============== ATOMIC CONSTANTS ==================
181
182/// Returns the covalent radius (in Å) for a given atomic number
183/// Data derived from RDKit's atomic_data.cpp
184pub fn get_covalent_radius(atomic_number: u8) -> f64 {
185    match atomic_number {
186        1 => 0.31,  // H
187        2 => 0.28,  // He
188        3 => 1.28,  // Li
189        4 => 0.96,  // Be
190        5 => 0.84,  // B
191        6 => 0.76,  // C
192        7 => 0.71,  // N
193        8 => 0.66,  // O
194        9 => 0.57,  // F
195        10 => 0.58, // Ne
196        11 => 1.66, // Na
197        12 => 1.41, // Mg
198        13 => 1.21, // Al
199        14 => 1.11, // Si
200        15 => 1.07, // P
201        16 => 1.05, // S
202        17 => 1.02, // Cl
203        18 => 1.06, // Ar
204        19 => 1.96, // K
205        20 => 1.71, // Ca
206        21 => 1.48, // Sc
207        22 => 1.36, // Ti
208        23 => 1.34, // V
209        24 => 1.22, // Cr
210        25 => 1.19, // Mn
211        26 => 1.16, // Fe
212        27 => 1.11, // Co
213        28 => 1.10, // Ni
214        29 => 1.12, // Cu
215        30 => 1.18, // Zn
216        31 => 1.24, // Ga
217        32 => 1.21, // Ge
218        33 => 1.21, // As
219        34 => 1.16, // Se
220        35 => 1.20, // Br
221        36 => 1.16, // Kr
222        37 => 2.10, // Rb
223        38 => 1.85, // Sr
224        39 => 1.63, // Y
225        40 => 1.54, // Zr
226        41 => 1.47, // Nb
227        42 => 1.38, // Mo
228        43 => 1.28, // Tc
229        44 => 1.25, // Ru
230        45 => 1.25, // Rh
231        46 => 1.20, // Pd
232        47 => 1.28, // Ag
233        48 => 1.36, // Cd
234        49 => 1.42, // In
235        50 => 1.40, // Sn
236        51 => 1.40, // Sb
237        52 => 1.36, // Te
238        53 => 1.39, // I
239        54 => 1.40, // Xe
240        55 => 2.32, // Cs
241        56 => 1.96, // Ba
242        72 => 1.50, // Hf
243        73 => 1.38, // Ta
244        74 => 1.37, // W
245        75 => 1.35, // Re
246        76 => 1.26, // Os
247        77 => 1.27, // Ir
248        78 => 1.23, // Pt
249        79 => 1.24, // Au
250        80 => 1.33, // Hg
251        81 => 1.44, // Tl
252        82 => 1.46, // Pb
253        83 => 1.48, // Bi
254        84 => 1.46, // Po
255        85 => 1.45, // At
256        86 => 1.50, // Rn
257        _ => 0.76,  // Fallback to Carbon's radius if unknown mostly
258    }
259}
260
261/// Returns the Van der Waals radius (in Å)
262pub fn get_vdw_radius(atomic_number: u8) -> f64 {
263    // RDKit's PeriodicTable::getRvdw values
264    match atomic_number {
265        1 => 1.20,  // H
266        2 => 1.40,  // He
267        3 => 1.82,  // Li
268        4 => 1.53,  // Be
269        5 => 1.92,  // B
270        6 => 1.70,  // C
271        7 => 1.60,  // N (was 1.55)
272        8 => 1.55,  // O (was 1.52)
273        9 => 1.50,  // F (was 1.47)
274        10 => 1.54, // Ne
275        11 => 2.27, // Na
276        12 => 1.73, // Mg
277        14 => 2.10, // Si
278        15 => 1.80, // P
279        16 => 1.80, // S
280        17 => 1.80, // Cl (was 1.75)
281        18 => 1.88, // Ar
282        35 => 1.90, // Br (was 1.85)
283        53 => 2.10, // I (was 1.98)
284        _ => 2.0,
285    }
286}
287
288/// Returns the ideal bond angle (in radians) for a given hybridization
289pub fn get_ideal_angle(hybridization: &Hybridization) -> f64 {
290    match hybridization {
291        Hybridization::SP => std::f64::consts::PI, // 180 degrees
292        Hybridization::SP2 => 2.0 * std::f64::consts::PI / 3.0, // 120 degrees
293        Hybridization::SP3 => 109.5 * std::f64::consts::PI / 180.0, // 109.5 degrees
294        Hybridization::SP3D => 105.0 * std::f64::consts::PI / 180.0, // 105 degrees (approx)
295        Hybridization::SP3D2 => 90.0 * std::f64::consts::PI / 180.0, // 90 degrees
296        Hybridization::Unknown => 109.5 * std::f64::consts::PI / 180.0, // Fallback to tetrahedral
297    }
298}
299
300/// BFS shortest path (in bonds) from `start` to `target` in the molecular graph,
301/// excluding the node `exclude`. Used to detect ring membership and ring sizes.
302/// Returns `None` if no path exists within `limit` bonds.
303pub fn min_path_excluding(
304    mol: &Molecule,
305    start: petgraph::graph::NodeIndex,
306    target: petgraph::graph::NodeIndex,
307    exclude: petgraph::graph::NodeIndex,
308    limit: usize,
309) -> Option<usize> {
310    if mol.graph.contains_edge(start, target) {
311        return Some(1);
312    }
313    let mut queue = std::collections::VecDeque::new();
314    let mut visited = std::collections::HashSet::new();
315    queue.push_back((start, 0));
316    visited.insert(start);
317
318    while let Some((curr, dist)) = queue.pop_front() {
319        if dist >= limit {
320            continue;
321        }
322        for n in mol.graph.neighbors(curr) {
323            if n == target {
324                return Some(dist + 1);
325            }
326            if n != exclude && !visited.contains(&n) {
327                visited.insert(n);
328                queue.push_back((n, dist + 1));
329            }
330        }
331    }
332    None
333}
334
335/// BFS shortest path from start to target, excluding two intermediate nodes.
336pub fn min_path_excluding2(
337    mol: &Molecule,
338    start: petgraph::graph::NodeIndex,
339    target: petgraph::graph::NodeIndex,
340    excl1: petgraph::graph::NodeIndex,
341    excl2: petgraph::graph::NodeIndex,
342    limit: usize,
343) -> Option<usize> {
344    let mut queue = std::collections::VecDeque::new();
345    let mut visited = std::collections::HashSet::new();
346    visited.insert(excl1);
347    visited.insert(excl2);
348    visited.insert(start);
349
350    // Always start from neighbors, skipping target to avoid counting the direct bond
351    for n in mol.graph.neighbors(start) {
352        if n == target {
353            continue; // Skip direct bond — we want an alternative path
354        }
355        if !visited.contains(&n) {
356            visited.insert(n);
357            queue.push_back((n, 1usize));
358        }
359    }
360
361    while let Some((curr, dist)) = queue.pop_front() {
362        if dist >= limit {
363            continue;
364        }
365        for n in mol.graph.neighbors(curr) {
366            if n == target {
367                return Some(dist + 1);
368            }
369            if !visited.contains(&n) {
370                visited.insert(n);
371                queue.push_back((n, dist + 1));
372            }
373        }
374    }
375    None
376}
377
378/// Check if an atom is in any ring (has an alternative path of length ≤ 8 through any neighbor).
379pub fn atom_in_ring(mol: &Molecule, atom: petgraph::graph::NodeIndex) -> bool {
380    for nb in mol.graph.neighbors(atom) {
381        if min_path_excluding(mol, nb, atom, atom, 8).is_some() {
382            return true;
383        }
384    }
385    false
386}
387
388/// Check if a bond (edge between a and b) is in a ring of exactly the given size.
389pub fn bond_in_ring_of_size(
390    mol: &Molecule,
391    a: petgraph::graph::NodeIndex,
392    b: petgraph::graph::NodeIndex,
393    ring_size: usize,
394) -> bool {
395    // min_path_excluding2 skips the direct a-b bond and finds the shortest alternative path.
396    // For a ring of size `ring_size`, the alternative path has ring_size - 1 edges.
397    min_path_excluding2(mol, a, b, a, a, ring_size).is_some_and(|d| d == ring_size - 1)
398}
399
400/// Returns the ideal bond angle accounting for rings topological constraints
401/// Helper: compute the ring interior angle for a given hybridization and ring size.
402/// Matches RDKit's _setRingAngle logic.
403fn ring_angle_for(ahyb: &Hybridization, ring_size: usize) -> f64 {
404    use std::f64::consts::PI;
405    if ring_size == 3 || ring_size == 4 || (*ahyb == Hybridization::SP2 && ring_size <= 8) {
406        PI * (1.0 - 2.0 / ring_size as f64)
407    } else if *ahyb == Hybridization::SP3 {
408        if ring_size == 5 {
409            104.0 * PI / 180.0
410        } else {
411            109.5 * PI / 180.0
412        }
413    } else if *ahyb == Hybridization::SP3D {
414        105.0 * PI / 180.0
415    } else {
416        // RDKit's _setRingAngle defaults to 120° for unhandled hybridizations (SP, SP3D2, etc.)
417        120.0 * PI / 180.0
418    }
419}
420
421/// Returns the ideal 1–2–3 bond angle (radians) between atoms `n1`–`center`–`n2`,
422/// accounting for ring strain in small rings (3-, 4-, 5-membered).
423pub fn get_corrected_ideal_angle(
424    mol: &Molecule,
425    center: petgraph::graph::NodeIndex,
426    n1: petgraph::graph::NodeIndex,
427    n2: petgraph::graph::NodeIndex,
428) -> f64 {
429    use std::f64::consts::PI;
430    let ahyb = &mol.graph[center].hybridization;
431
432    // Check if n1 and n2 are in a ring through center
433    let dist = min_path_excluding(mol, n1, n2, center, 8);
434
435    match dist {
436        Some(1) => return 60.0 * PI / 180.0, // 3-ring
437        Some(2) => return 90.0 * PI / 180.0, // 4-ring
438        _ => {}
439    }
440
441    // RDKit special case: exocyclic angles at SP3 atoms in small rings.
442    // When n1-n2 are NOT in a 3- or 4-ring through center, but center IS in a 3- or 4-ring,
443    // the exocyclic angle is widened: 116° for 3-ring, 112° for 4-ring.
444    // dist > 2 means the pair is not directly in any 3-ring (dist=1) or 4-ring (dist=2),
445    // so it's either in a larger ring envelope (fused system) or truly exocyclic.
446    if *ahyb == Hybridization::SP3 && dist.is_none_or(|d| d > 2) {
447        let nbs: Vec<_> = mol.graph.neighbors(center).collect();
448        let mut smallest_ring = 0usize;
449        for i in 0..nbs.len() {
450            for j in (i + 1)..nbs.len() {
451                if let Some(p) = min_path_excluding(mol, nbs[i], nbs[j], center, 8) {
452                    let rs = p + 2;
453                    if rs == 3 {
454                        smallest_ring = if smallest_ring == 0 {
455                            3
456                        } else {
457                            smallest_ring.min(3)
458                        };
459                    } else if rs == 4 && smallest_ring != 3 {
460                        smallest_ring = if smallest_ring == 0 {
461                            4
462                        } else {
463                            smallest_ring.min(4)
464                        };
465                    }
466                }
467            }
468        }
469        if smallest_ring == 3 {
470            return 116.0 * PI / 180.0;
471        } else if smallest_ring == 4 {
472            return 112.0 * PI / 180.0;
473        }
474    }
475
476    // For SP2 atoms with exactly 3 neighbors: use RDKit's approach of
477    // distributing 360° among all neighbor pairs based on ring membership.
478    // angleTaken = sum of ring angles, exo = (2π - angleTaken) / remaining_pairs
479    if *ahyb == Hybridization::SP2 {
480        let nbs: Vec<_> = mol.graph.neighbors(center).collect();
481        if nbs.len() == 3 {
482            // Find ring size for each pair
483            let pairs = [(0, 1, 2), (0, 2, 1), (1, 2, 0)];
484            let mut ring_angles = [0.0f64; 3]; // angles for pairs (0,1), (0,2), (1,2)
485            let mut in_ring = [false; 3];
486
487            for (idx, &(a, b, _)) in pairs.iter().enumerate() {
488                let d = min_path_excluding(mol, nbs[a], nbs[b], center, 8);
489                if let Some(p) = d {
490                    let rs = p + 2;
491                    if rs <= 8 {
492                        ring_angles[idx] = ring_angle_for(ahyb, rs);
493                        in_ring[idx] = true;
494                    }
495                }
496            }
497
498            let ring_count = in_ring.iter().filter(|&&x| x).count();
499            // Identify which pair index corresponds to (n1, n2)
500            let target_idx = if (nbs[0] == n1 && nbs[1] == n2) || (nbs[1] == n1 && nbs[0] == n2) {
501                0
502            } else if (nbs[0] == n1 && nbs[2] == n2) || (nbs[2] == n1 && nbs[0] == n2) {
503                1
504            } else {
505                2
506            };
507
508            if in_ring[target_idx] && ring_count <= 1 {
509                // Only this pair in a ring — return ring angle directly
510                return ring_angles[target_idx];
511            } else if !in_ring[target_idx] && ring_count > 0 {
512                // This pair is NOT in a ring, but other pairs are:
513                // exo angle = (2π - sum_of_ring_angles) / non_ring_pairs
514                let angle_taken: f64 = ring_angles.iter().sum();
515                let non_ring = 3 - ring_count;
516                if non_ring > 0 {
517                    let exo = (2.0 * PI - angle_taken) / non_ring as f64;
518                    if exo > 0.0 {
519                        return exo;
520                    }
521                }
522            } else if ring_count >= 2 && in_ring[target_idx] {
523                // This pair AND other pairs are in rings.
524                // For fused junctions: check if this pair's "ring" is
525                // actually the fused envelope. If the other two pairs
526                // account for the full angle, use ring angles directly.
527                let other_angle: f64 = (0..3)
528                    .filter(|&i| i != target_idx && in_ring[i])
529                    .map(|i| ring_angles[i])
530                    .sum();
531                let this_ring_expected = 2.0 * PI - other_angle;
532                // If this pair's ring angle differs significantly from expected exo,
533                // it's a fused envelope — use exo angle
534                if (ring_angles[target_idx] - this_ring_expected).abs() > 0.05 {
535                    return this_ring_expected;
536                }
537                return ring_angles[target_idx];
538            }
539        }
540    }
541
542    // Non-SP2 or fallback: use ring angle if in a ring
543    if let Some(ring_path) = dist {
544        let ring_size = ring_path + 2;
545        if ring_size <= 8 {
546            return ring_angle_for(ahyb, ring_size);
547        }
548    }
549
550    get_ideal_angle(ahyb)
551}