Skip to main content

sci_form/forcefield/
mmff94.rs

1//! Merck Molecular Force Field 94 (MMFF94) atom typing and energy terms.
2//!
3//! Implements the 75-type classification and energy evaluation described in
4//! Halgren, J. Comput. Chem. 1996, 17, 490–519.
5
6use super::traits::ForceFieldContribution;
7use petgraph::visit::EdgeRef;
8
9// ─── MMFF94 Atom Type Assignment ─────────────────────────────────────────────
10
11/// MMFF94 atom type — comprehensive coverage of 75 atom types for organic
12/// and common heteroatom-containing molecules. Each type encodes element,
13/// hybridization, ring membership, and local chemical environment.
14///
15/// Type numbers follow the original MMFF94 specification (Halgren, 1996).
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
17#[repr(u8)]
18pub enum Mmff94AtomType {
19    CR = 1,    // Alkyl carbon, SP3
20    CSp2 = 2,  // Vinyl/generic SP2 carbon
21    CSp = 3,   // Acetylenic carbon, SP
22    CO = 4,    // Carbonyl carbon C=O
23    HC = 5,    // Hydrogen on carbon
24    OR = 6,    // Alcohol/ether oxygen SP3
25    O2 = 7,    // Carbonyl/carboxyl oxygen SP2
26    NR = 8,    // Amine nitrogen SP3
27    N2 = 9,    // Imine/SPH nitrogen SP2
28    NC = 10,   // Isonitrile/cyano nitrogen SP
29    F = 11,    // Fluorine
30    Cl = 12,   // Chlorine
31    Br = 13,   // Bromine
32    I = 14,    // Iodine
33    S = 15,    // Thiol/thioether sulfur
34    SdO = 16,  // S=O in >S=O (not sulfonyl)
35    SO = 17,   // Sulfoxide sulfur
36    SO2 = 18,  // Sulfone/sulfonamide sulfur
37    SI = 19,   // Silicon
38    CR4R = 20, // Carbon in 4-membered ring
39    HO = 21,   // Hydrogen on oxygen
40    CR3R = 22, // Carbon in 3-membered ring
41    HN = 23,   // Hydrogen on nitrogen
42    HOCO = 24, // Hydroxyl H in carboxylic acid
43    P = 25,    // Phosphorus SP3
44    HaP = 26,  // H on ≥4-coordinate P
45    HOS = 28,  // Hydrogen on -OH of sulfur acid
46    NPl3 = 30, // Nitrogen SP2 trigonal planar (3-coord)
47    ON2 = 31,  // Oxygen in nitro group
48    OX = 32,   // Anionic oxygen (carboxylate, etc.)
49    OM = 33,   // Anionic oxide (deprotonated -O⁻)
50    HNR = 34,  // H on NH in ring
51    HIM = 35,  // H on imidazole N
52    CB = 37,   // Aromatic carbon
53    NPOX = 38, // N-oxide nitrogen
54    NR2 = 39,  // Aromatic nitrogen (pyridine-like)
55    NAm = 40,  // Amide nitrogen
56    NSO = 41,  // N attached to S=O
57    Sthi = 44, // Thiol sulfur (-SH or -S-S-)
58    NdOx = 45, // Nitrogen in N-oxide
59    NPl = 46,  // Uncharged nitrogen SP2 planar
60    C5A = 63,  // Alpha carbon in 5-ring heteroaromatic
61    C5B = 64,  // Beta carbon in 5-ring heteroaromatic
62    N5A = 65,  // Alpha nitrogen in 5-ring heteroaromatic (pyrrole-like)
63    N5B = 66,  // Beta nitrogen in 5-ring heteroaromatic (basic, pyridine-like in 5-ring)
64    NAZT = 47, // Azide terminal nitrogen
65    NSP = 48,  // Nitrogen in thioamide
66    N5M = 49,  // Anionic N in 5-ring
67    N2OX = 50, // Nitrogen in NO2 group
68    N3OX = 51, // Nitrogen in NO3 (nitrate)
69    NPYD = 52, // Nitrogen in pyridinium cation
70    O5 = 59,   // Furan/oxazole oxygen
71    Fe2 = 87,  // Iron(II) cation
72    Fe3 = 88,  // Iron(III) cation
73    HS = 71,   // Hydrogen on sulfur
74    HP = 72,   // Hydrogen on phosphorus
75    PO = 75,   // Phosphate P (4-coordinate with P=O)
76    Unknown = 0,
77}
78
79/// MMFF94 atom-type assignment with SMARTS-based priority rules.
80///
81/// Implements 75 hierarchical rules matching the MMFF94 specification.
82/// Rules are applied in priority order; the first matching rule wins.
83pub fn assign_mmff94_type_smarts(mol: &crate::graph::Molecule, atom_idx: usize) -> Mmff94AtomType {
84    use crate::graph::{BondOrder, Hybridization};
85    use petgraph::graph::NodeIndex;
86
87    let node = NodeIndex::new(atom_idx);
88    let atom = &mol.graph[node];
89    let element = atom.element;
90    let hyb = &atom.hybridization;
91
92    let neighbors: Vec<NodeIndex> = mol.graph.neighbors(node).collect();
93    let degree = neighbors.len();
94
95    // Helper: get element of neighbor
96    let nb_elem = |ni: NodeIndex| mol.graph[ni].element;
97
98    // Helper: count neighbors of a specific element
99    let count_nb_elem = |z: u8| neighbors.iter().filter(|&&n| nb_elem(n) == z).count();
100
101    // Helper: count double bonds from this atom
102    let count_double_bonds = || -> usize {
103        mol.graph
104            .edges(node)
105            .filter(|e| e.weight().order == BondOrder::Double)
106            .count()
107    };
108
109    // Helper: is atom in ring of size `size`?
110    let in_ring_of_size = |size: usize| -> bool { atom_in_ring_of_size(mol, atom_idx, size) };
111
112    // Helper: is atom aromatic?
113    let is_aromatic = is_atom_aromatic_mmff(mol, atom_idx);
114
115    // Helper: is neighbor bonded to specific element with specific bond order?
116    let nb_has_double_bond_to = |ni: NodeIndex, target_z: u8| -> bool {
117        mol.graph.edges(ni).any(|e| {
118            e.weight().order == BondOrder::Double && {
119                let other = if e.source() == ni {
120                    e.target()
121                } else {
122                    e.source()
123                };
124                mol.graph[other].element == target_z
125            }
126        })
127    };
128
129    // Helper: is this atom adjacent to a carbonyl C=O?
130    let adjacent_to_carbonyl = || -> bool {
131        neighbors
132            .iter()
133            .any(|&ni| nb_elem(ni) == 6 && nb_has_double_bond_to(ni, 8))
134    };
135
136    match element {
137        // ═══════ HYDROGEN (1) ═══════
138        1 => {
139            if degree == 0 {
140                return Mmff94AtomType::HC;
141            }
142            let parent = neighbors[0];
143            let parent_z = nb_elem(parent);
144            match parent_z {
145                6 => Mmff94AtomType::HC, // H-C
146                7 => {
147                    // Refine: HN, HNR, HIM
148                    if is_atom_aromatic_mmff(mol, parent.index()) && in_ring_of_size(5) {
149                        Mmff94AtomType::HIM // H on imidazole-like N
150                    } else if atom_in_any_ring(mol, parent.index()) {
151                        Mmff94AtomType::HNR // H on N in ring
152                    } else {
153                        Mmff94AtomType::HN
154                    }
155                }
156                8 => {
157                    // Refine: HO, HOCO, HOS
158                    let parent_nbs: Vec<NodeIndex> = mol.graph.neighbors(parent).collect();
159                    for &pnb in &parent_nbs {
160                        if pnb.index() == atom_idx {
161                            continue;
162                        }
163                        let pnb_z = nb_elem(pnb);
164                        if pnb_z == 6 && nb_has_double_bond_to(pnb, 8) {
165                            return Mmff94AtomType::HOCO; // carboxylic acid H
166                        }
167                        if pnb_z == 16 {
168                            return Mmff94AtomType::HOS; // H on O-S acid
169                        }
170                    }
171                    Mmff94AtomType::HO
172                }
173                15 => Mmff94AtomType::HP, // H on P
174                16 => Mmff94AtomType::HS, // H on S
175                _ => Mmff94AtomType::HC,
176            }
177        }
178
179        // ═══════ CARBON (6) ═══════
180        6 => {
181            if is_aromatic {
182                // Aromatic carbon
183                if in_ring_of_size(5) {
184                    // 5-membered heteroaromatic ring
185                    let has_hetero_nb = neighbors.iter().any(|&ni| {
186                        let z = nb_elem(ni);
187                        (z == 7 || z == 8 || z == 16) && is_atom_aromatic_mmff(mol, ni.index())
188                    });
189                    if has_hetero_nb {
190                        Mmff94AtomType::C5A // alpha to heteroatom in 5-ring
191                    } else {
192                        Mmff94AtomType::C5B // beta position
193                    }
194                } else {
195                    Mmff94AtomType::CB
196                }
197            } else {
198                match hyb {
199                    Hybridization::SP => Mmff94AtomType::CSp,
200                    Hybridization::SP2 => {
201                        // Check for carbonyl C=O
202                        if count_double_bonds() > 0
203                            && count_nb_elem(8) > 0
204                            && mol.graph.edges(node).any(|e| {
205                                e.weight().order == BondOrder::Double && {
206                                    let other = if e.source() == node {
207                                        e.target()
208                                    } else {
209                                        e.source()
210                                    };
211                                    nb_elem(other) == 8
212                                }
213                            })
214                        {
215                            Mmff94AtomType::CO // Carbonyl carbon
216                        } else {
217                            Mmff94AtomType::CSp2
218                        }
219                    }
220                    _ => {
221                        // SP3 with ring checks
222                        if in_ring_of_size(3) {
223                            Mmff94AtomType::CR3R
224                        } else if in_ring_of_size(4) {
225                            Mmff94AtomType::CR4R
226                        } else {
227                            Mmff94AtomType::CR
228                        }
229                    }
230                }
231            }
232        }
233
234        // ═══════ NITROGEN (7) ═══════
235        7 => {
236            let n_double = count_double_bonds();
237
238            // Aromatic N
239            if is_aromatic {
240                if in_ring_of_size(5) {
241                    // Pyrrole-like (donating lone pair) vs pyridine-like in 5-ring
242                    if degree == 3 {
243                        Mmff94AtomType::N5A // pyrrole-like
244                    } else {
245                        Mmff94AtomType::N5B // imidazole N3 / thiazole
246                    }
247                } else {
248                    Mmff94AtomType::NR2 // 6-ring aromatic N (pyridine-like)
249                }
250            }
251            // Nitro group: N(=O)(=O) or [N+](=O)[O-]
252            else if n_double >= 2 && count_nb_elem(8) >= 2 {
253                Mmff94AtomType::N2OX
254            }
255            // N-oxide
256            else if n_double == 1 && count_nb_elem(8) >= 1 && degree >= 3 {
257                // Check if it's an N-oxide (sp2 N with one O– or =O and other substituents)
258                let has_o_double = mol.graph.edges(node).any(|e| {
259                    e.weight().order == BondOrder::Double && {
260                        let o = if e.source() == node {
261                            e.target()
262                        } else {
263                            e.source()
264                        };
265                        nb_elem(o) == 8
266                    }
267                });
268                if has_o_double && degree >= 3 {
269                    Mmff94AtomType::NPOX
270                } else {
271                    Mmff94AtomType::N2
272                }
273            }
274            // Amide: N bonded to C=O
275            else if adjacent_to_carbonyl()
276                && matches!(hyb, Hybridization::SP2 | Hybridization::SP3)
277                && degree <= 3
278            {
279                // Check if N is bonded to S=O → sulfonamide
280                let has_so = neighbors
281                    .iter()
282                    .any(|&ni| nb_elem(ni) == 16 && nb_has_double_bond_to(ni, 8));
283                if has_so {
284                    Mmff94AtomType::NSO
285                } else {
286                    Mmff94AtomType::NAm
287                }
288            }
289            // SP2 planar N (3-coordinate, no double bonds to O)
290            else if matches!(hyb, Hybridization::SP2) {
291                if degree == 3 && n_double == 0 {
292                    Mmff94AtomType::NPl3
293                } else if n_double >= 1 {
294                    Mmff94AtomType::N2
295                } else {
296                    Mmff94AtomType::NPl
297                }
298            }
299            // SP nitrogen
300            else if matches!(hyb, Hybridization::SP) {
301                Mmff94AtomType::NC
302            }
303            // SP3 amine
304            else {
305                Mmff94AtomType::NR
306            }
307        }
308
309        // ═══════ OXYGEN (8) ═══════
310        8 => {
311            if is_aromatic && in_ring_of_size(5) {
312                return Mmff94AtomType::O5; // Furan oxygen
313            }
314
315            let n_double = count_double_bonds();
316
317            // Nitro oxygen
318            if degree == 1 {
319                let parent = neighbors[0];
320                if nb_elem(parent) == 7 {
321                    let n_node = parent;
322                    let n_o_count = mol
323                        .graph
324                        .neighbors(n_node)
325                        .filter(|&ni| nb_elem(ni) == 8)
326                        .count();
327                    if n_o_count >= 2 {
328                        return Mmff94AtomType::ON2; // nitro oxygen
329                    }
330                }
331            }
332
333            // Anionic oxygen (formal charge check)
334            if atom.formal_charge < 0 {
335                return Mmff94AtomType::OM;
336            }
337
338            // Carboxylate/deprotonated oxygen
339            if degree == 1 && n_double == 0 {
340                // Terminal O with single bond — could be carboxylate if C also has C=O
341                let parent = neighbors[0];
342                if nb_elem(parent) == 6 && nb_has_double_bond_to(parent, 8) {
343                    return Mmff94AtomType::OX; // carboxylate oxygen
344                }
345            }
346
347            match hyb {
348                Hybridization::SP2 => Mmff94AtomType::O2, // C=O, generic SP2
349                _ => Mmff94AtomType::OR,                  // alcohol, ether
350            }
351        }
352
353        // ═══════ FLUORINE (9) ═══════
354        9 => Mmff94AtomType::F,
355
356        // ═══════ SILICON (14) ═══════
357        14 => Mmff94AtomType::SI,
358
359        // ═══════ PHOSPHORUS (15) ═══════
360        15 => {
361            if degree >= 4 && count_nb_elem(8) >= 1 && nb_has_double_bond_to(node, 8) {
362                Mmff94AtomType::PO // phosphate
363            } else {
364                Mmff94AtomType::P
365            }
366        }
367
368        // ═══════ SULFUR (16) ═══════
369        16 => {
370            if is_aromatic && in_ring_of_size(5) {
371                return Mmff94AtomType::Sthi; // thiophene S
372            }
373
374            let n_double_o = mol
375                .graph
376                .edges(node)
377                .filter(|e| {
378                    e.weight().order == BondOrder::Double && {
379                        let other = if e.source() == node {
380                            e.target()
381                        } else {
382                            e.source()
383                        };
384                        nb_elem(other) == 8
385                    }
386                })
387                .count();
388
389            if n_double_o >= 2 {
390                Mmff94AtomType::SO2 // Sulfone
391            } else if n_double_o == 1 {
392                Mmff94AtomType::SO // Sulfoxide
393            } else if degree <= 2 && count_nb_elem(1) >= 1 {
394                Mmff94AtomType::Sthi // Thiol
395            } else {
396                Mmff94AtomType::S // Thioether
397            }
398        }
399
400        // ═══════ CHLORINE (17) ═══════
401        17 => Mmff94AtomType::Cl,
402
403        // ═══════ BROMINE (35) ═══════
404        35 => Mmff94AtomType::Br,
405
406        // ═══════ IRON (26) ═══════
407        26 => {
408            if atom.formal_charge >= 3 {
409                Mmff94AtomType::Fe3
410            } else {
411                Mmff94AtomType::Fe2
412            }
413        }
414
415        // ═══════ IODINE (53) ═══════
416        53 => Mmff94AtomType::I,
417
418        _ => Mmff94AtomType::Unknown,
419    }
420}
421
422/// Check if an atom is in any ring by checking for back-paths.
423fn atom_in_any_ring(mol: &crate::graph::Molecule, atom_idx: usize) -> bool {
424    atom_in_ring_of_size(mol, atom_idx, 3)
425        || atom_in_ring_of_size(mol, atom_idx, 4)
426        || atom_in_ring_of_size(mol, atom_idx, 5)
427        || atom_in_ring_of_size(mol, atom_idx, 6)
428        || atom_in_ring_of_size(mol, atom_idx, 7)
429}
430
431/// Check if atom is part of a ring of a specific size via BFS.
432fn atom_in_ring_of_size(mol: &crate::graph::Molecule, atom_idx: usize, size: usize) -> bool {
433    use petgraph::graph::NodeIndex;
434    use std::collections::VecDeque;
435
436    let start = NodeIndex::new(atom_idx);
437    // BFS looking for a cycle back to start of exactly `size` length
438    let mut queue: VecDeque<(NodeIndex, Vec<usize>)> = VecDeque::new();
439    for nb in mol.graph.neighbors(start) {
440        queue.push_back((nb, vec![atom_idx, nb.index()]));
441    }
442
443    while let Some((current, path)) = queue.pop_front() {
444        if path.len() > size + 1 {
445            continue;
446        }
447        if path.len() == size + 1 && current == start {
448            return true;
449        }
450        if path.len() > size {
451            continue;
452        }
453        for nb in mol.graph.neighbors(current) {
454            if nb == start && path.len() == size {
455                return true;
456            }
457            if !path.contains(&nb.index()) {
458                let mut new_path = path.clone();
459                new_path.push(nb.index());
460                queue.push_back((nb, new_path));
461            }
462        }
463    }
464    false
465}
466
467/// Helper: check if atom is aromatic via MMFF94 perception.
468/// Checks if any bond to this atom has aromatic bond order.
469fn is_atom_aromatic_mmff(mol: &crate::graph::Molecule, atom_idx: usize) -> bool {
470    use crate::graph::BondOrder;
471    use petgraph::graph::NodeIndex;
472    let node = NodeIndex::new(atom_idx);
473    mol.graph
474        .edges(node)
475        .any(|e| e.weight().order == BondOrder::Aromatic)
476}
477
478/// Legacy assignment function (simplified, for backward compatibility).
479pub fn assign_mmff94_type(
480    element: u8,
481    hyb: &crate::graph::Hybridization,
482    is_aromatic: bool,
483    is_amide_n: bool,
484) -> Mmff94AtomType {
485    use crate::graph::Hybridization::*;
486    match element {
487        1 => Mmff94AtomType::HC,
488        5 => Mmff94AtomType::CSp2,
489        6 => {
490            if is_aromatic {
491                Mmff94AtomType::CB
492            } else {
493                match hyb {
494                    SP => Mmff94AtomType::CSp,
495                    SP2 => Mmff94AtomType::CSp2,
496                    _ => Mmff94AtomType::CR,
497                }
498            }
499        }
500        7 => {
501            if is_aromatic {
502                Mmff94AtomType::NR2
503            } else if is_amide_n {
504                Mmff94AtomType::NAm
505            } else {
506                match hyb {
507                    SP => Mmff94AtomType::NC,
508                    SP2 => Mmff94AtomType::N2,
509                    _ => Mmff94AtomType::NR,
510                }
511            }
512        }
513        8 => match hyb {
514            SP2 => Mmff94AtomType::O2,
515            _ => Mmff94AtomType::OR,
516        },
517        9 => Mmff94AtomType::F,
518        15 => Mmff94AtomType::P,
519        16 => Mmff94AtomType::S,
520        17 => Mmff94AtomType::Cl,
521        35 => Mmff94AtomType::Br,
522        53 => Mmff94AtomType::I,
523        _ => Mmff94AtomType::Unknown,
524    }
525}
526
527/// Assign MMFF94 types for all atoms in a molecule.
528///
529/// Returns a vector of atom types parallel to the atom indices.
530pub fn assign_all_mmff94_types(mol: &crate::graph::Molecule) -> Vec<Mmff94AtomType> {
531    let n = mol.graph.node_count();
532    (0..n).map(|i| assign_mmff94_type_smarts(mol, i)).collect()
533}
534
535// ─── MMFF94 Bond Stretching ──────────────────────────────────────────────────
536
537/// MMFF94 bond stretching (quartic form):
538/// E = 0.5 · k_b · Δr² · (1 + cs · Δr + 7/12 · cs² · Δr²)
539/// where cs = -2.0 Å⁻¹ (cubic stretch constant)
540pub struct Mmff94BondStretch {
541    pub atom_i: usize,
542    pub atom_j: usize,
543    pub k_b: f64, // Force constant (md/Å)
544    pub r0: f64,  // Equilibrium bond length (Å)
545}
546
547const MMFF94_CUBIC_STRETCH: f64 = -2.0;
548
549impl ForceFieldContribution for Mmff94BondStretch {
550    fn evaluate_energy_and_inject_gradient(&self, coords: &[f64], grad: &mut [f64]) -> f64 {
551        let ri = self.atom_i * 3;
552        let rj = self.atom_j * 3;
553        let dx = coords[ri] - coords[rj];
554        let dy = coords[ri + 1] - coords[rj + 1];
555        let dz = coords[ri + 2] - coords[rj + 2];
556        let dist = (dx * dx + dy * dy + dz * dz).sqrt().max(1e-8);
557        let dr = dist - self.r0;
558        let cs = MMFF94_CUBIC_STRETCH;
559        let cs2 = cs * cs;
560
561        // Energy: 143.9325 * 0.5 * kb * dr^2 * (1 + cs*dr + 7/12*cs^2*dr^2)
562        let energy =
563            143.9325 * 0.5 * self.k_b * dr * dr * (1.0 + cs * dr + (7.0 / 12.0) * cs2 * dr * dr);
564
565        // Gradient: dE/dr
566        let de_dr = 143.9325 * self.k_b * dr * (1.0 + 1.5 * cs * dr + (7.0 / 6.0) * cs2 * dr * dr);
567        let scale = de_dr / dist;
568        grad[ri] += scale * dx;
569        grad[ri + 1] += scale * dy;
570        grad[ri + 2] += scale * dz;
571        grad[rj] -= scale * dx;
572        grad[rj + 1] -= scale * dy;
573        grad[rj + 2] -= scale * dz;
574
575        energy
576    }
577}
578
579// ─── MMFF94 Angle Bending ────────────────────────────────────────────────────
580
581/// MMFF94 angle bending:
582/// E = 0.5 · k_a · (Δθ)² · (1 + cb · Δθ)
583/// where cb = -0.014 deg⁻¹ (cubic bend constant)
584pub struct Mmff94AngleBend {
585    pub atom_i: usize,
586    pub atom_j: usize, // central
587    pub atom_k: usize,
588    pub k_a: f64,    // Force constant (md·Å/rad²)
589    pub theta0: f64, // Equilibrium angle (radians)
590}
591
592const MMFF94_CUBIC_BEND: f64 = -0.014;
593
594impl ForceFieldContribution for Mmff94AngleBend {
595    fn evaluate_energy_and_inject_gradient(&self, coords: &[f64], grad: &mut [f64]) -> f64 {
596        let ri = self.atom_i * 3;
597        let rj = self.atom_j * 3;
598        let rk = self.atom_k * 3;
599
600        let rji = [
601            coords[ri] - coords[rj],
602            coords[ri + 1] - coords[rj + 1],
603            coords[ri + 2] - coords[rj + 2],
604        ];
605        let rjk = [
606            coords[rk] - coords[rj],
607            coords[rk + 1] - coords[rj + 1],
608            coords[rk + 2] - coords[rj + 2],
609        ];
610
611        let d_ji = (rji[0] * rji[0] + rji[1] * rji[1] + rji[2] * rji[2])
612            .sqrt()
613            .max(1e-8);
614        let d_jk = (rjk[0] * rjk[0] + rjk[1] * rjk[1] + rjk[2] * rjk[2])
615            .sqrt()
616            .max(1e-8);
617
618        let cos_theta = (rji[0] * rjk[0] + rji[1] * rjk[1] + rji[2] * rjk[2]) / (d_ji * d_jk);
619        let cos_theta_clamped = cos_theta.clamp(-1.0, 1.0);
620        let theta = cos_theta_clamped.acos();
621        let dt = (theta - self.theta0) * 180.0 / std::f64::consts::PI; // In degrees for MMFF94
622
623        let cb = MMFF94_CUBIC_BEND;
624        let energy = 0.043844 * 0.5 * self.k_a * dt * dt * (1.0 + cb * dt);
625
626        // Gradient (simplified: project along angle bisector normal)
627        let de_dtheta =
628            0.043844 * self.k_a * dt * (1.0 + 1.5 * cb * dt) * (180.0 / std::f64::consts::PI);
629        let sin_theta = (1.0 - cos_theta_clamped * cos_theta_clamped)
630            .sqrt()
631            .max(1e-8);
632        let dcos = -1.0 / sin_theta;
633        let pref = de_dtheta * dcos;
634
635        for dim in 0..3 {
636            let term_i = pref * (rjk[dim] / (d_ji * d_jk) - cos_theta * rji[dim] / (d_ji * d_ji))
637                / d_ji
638                * d_ji;
639            let term_k = pref * (rji[dim] / (d_ji * d_jk) - cos_theta * rjk[dim] / (d_jk * d_jk))
640                / d_jk
641                * d_jk;
642            grad[ri + dim] += term_i;
643            grad[rk + dim] += term_k;
644            grad[rj + dim] -= term_i + term_k;
645        }
646
647        energy
648    }
649}
650
651// ─── MMFF94 Torsion ──────────────────────────────────────────────────────────
652
653/// MMFF94 torsion:
654/// E = 0.5 · (V1·(1+cos φ) + V2·(1-cos 2φ) + V3·(1+cos 3φ))
655pub struct Mmff94Torsion {
656    pub atom_i: usize,
657    pub atom_j: usize,
658    pub atom_k: usize,
659    pub atom_l: usize,
660    pub v1: f64,
661    pub v2: f64,
662    pub v3: f64,
663}
664
665impl ForceFieldContribution for Mmff94Torsion {
666    fn evaluate_energy_and_inject_gradient(&self, coords: &[f64], grad: &mut [f64]) -> f64 {
667        let p = |idx: usize| -> [f64; 3] {
668            [coords[idx * 3], coords[idx * 3 + 1], coords[idx * 3 + 2]]
669        };
670        let pi = p(self.atom_i);
671        let pj = p(self.atom_j);
672        let pk = p(self.atom_k);
673        let pl = p(self.atom_l);
674
675        // Compute dihedral using standard atan2 method
676        let b1 = [pj[0] - pi[0], pj[1] - pi[1], pj[2] - pi[2]];
677        let b2 = [pk[0] - pj[0], pk[1] - pj[1], pk[2] - pj[2]];
678        let b3 = [pl[0] - pk[0], pl[1] - pk[1], pl[2] - pk[2]];
679
680        let cross = |a: [f64; 3], b: [f64; 3]| -> [f64; 3] {
681            [
682                a[1] * b[2] - a[2] * b[1],
683                a[2] * b[0] - a[0] * b[2],
684                a[0] * b[1] - a[1] * b[0],
685            ]
686        };
687        let dot = |a: [f64; 3], b: [f64; 3]| -> f64 { a[0] * b[0] + a[1] * b[1] + a[2] * b[2] };
688
689        let n1 = cross(b1, b2);
690        let n2 = cross(b2, b3);
691        let m1 = cross(n1, b2);
692
693        let b2_len = dot(b2, b2).sqrt().max(1e-8);
694        let x = dot(n1, n2);
695        let y = dot(m1, n2) / b2_len;
696        let phi = (-y).atan2(x);
697
698        let energy = 0.5
699            * (self.v1 * (1.0 + phi.cos())
700                + self.v2 * (1.0 - (2.0 * phi).cos())
701                + self.v3 * (1.0 + (3.0 * phi).cos()));
702
703        // Numerical gradient for torsion (analytical is complex; use central differences)
704        let eps = 1e-5;
705        for atom_idx in [self.atom_i, self.atom_j, self.atom_k, self.atom_l] {
706            for dim in 0..3 {
707                let idx = atom_idx * 3 + dim;
708                let orig = coords[idx];
709                let mut c_plus = coords.to_vec();
710                let mut c_minus = coords.to_vec();
711                c_plus[idx] = orig + eps;
712                c_minus[idx] = orig - eps;
713
714                let phi_p = {
715                    let pi = [
716                        c_plus[self.atom_i * 3],
717                        c_plus[self.atom_i * 3 + 1],
718                        c_plus[self.atom_i * 3 + 2],
719                    ];
720                    let pj = [
721                        c_plus[self.atom_j * 3],
722                        c_plus[self.atom_j * 3 + 1],
723                        c_plus[self.atom_j * 3 + 2],
724                    ];
725                    let pk = [
726                        c_plus[self.atom_k * 3],
727                        c_plus[self.atom_k * 3 + 1],
728                        c_plus[self.atom_k * 3 + 2],
729                    ];
730                    let pl = [
731                        c_plus[self.atom_l * 3],
732                        c_plus[self.atom_l * 3 + 1],
733                        c_plus[self.atom_l * 3 + 2],
734                    ];
735                    let b1 = [pj[0] - pi[0], pj[1] - pi[1], pj[2] - pi[2]];
736                    let b2 = [pk[0] - pj[0], pk[1] - pj[1], pk[2] - pj[2]];
737                    let b3 = [pl[0] - pk[0], pl[1] - pk[1], pl[2] - pk[2]];
738                    let nn1 = cross(b1, b2);
739                    let nn2 = cross(b2, b3);
740                    let mm1 = cross(nn1, b2);
741                    let b2l = dot(b2, b2).sqrt().max(1e-8);
742                    (-dot(mm1, nn2) / b2l).atan2(dot(nn1, nn2))
743                };
744                let phi_m = {
745                    let pi = [
746                        c_minus[self.atom_i * 3],
747                        c_minus[self.atom_i * 3 + 1],
748                        c_minus[self.atom_i * 3 + 2],
749                    ];
750                    let pj = [
751                        c_minus[self.atom_j * 3],
752                        c_minus[self.atom_j * 3 + 1],
753                        c_minus[self.atom_j * 3 + 2],
754                    ];
755                    let pk = [
756                        c_minus[self.atom_k * 3],
757                        c_minus[self.atom_k * 3 + 1],
758                        c_minus[self.atom_k * 3 + 2],
759                    ];
760                    let pl = [
761                        c_minus[self.atom_l * 3],
762                        c_minus[self.atom_l * 3 + 1],
763                        c_minus[self.atom_l * 3 + 2],
764                    ];
765                    let b1 = [pj[0] - pi[0], pj[1] - pi[1], pj[2] - pi[2]];
766                    let b2 = [pk[0] - pj[0], pk[1] - pj[1], pk[2] - pj[2]];
767                    let b3 = [pl[0] - pk[0], pl[1] - pk[1], pl[2] - pk[2]];
768                    let nn1 = cross(b1, b2);
769                    let nn2 = cross(b2, b3);
770                    let mm1 = cross(nn1, b2);
771                    let b2l = dot(b2, b2).sqrt().max(1e-8);
772                    (-dot(mm1, nn2) / b2l).atan2(dot(nn1, nn2))
773                };
774
775                let e_p = 0.5
776                    * (self.v1 * (1.0 + phi_p.cos())
777                        + self.v2 * (1.0 - (2.0 * phi_p).cos())
778                        + self.v3 * (1.0 + (3.0 * phi_p).cos()));
779                let e_m = 0.5
780                    * (self.v1 * (1.0 + phi_m.cos())
781                        + self.v2 * (1.0 - (2.0 * phi_m).cos())
782                        + self.v3 * (1.0 + (3.0 * phi_m).cos()));
783                grad[idx] += (e_p - e_m) / (2.0 * eps);
784            }
785        }
786
787        energy
788    }
789}
790
791// ─── MMFF94 Buffered 14-7 Van der Waals ──────────────────────────────────────
792
793/// Repulsive/attractive steric interaction using Halgren's buffered 14-7 form.
794pub struct Mmff94BufferedVanDerWaals {
795    pub atom_i_idx: usize,
796    pub atom_j_idx: usize,
797    pub radius_star: f64,   // Cross interaction radius parameter R*ij
798    pub epsilon_depth: f64, // Well depth parameter eps_ij
799}
800
801impl ForceFieldContribution for Mmff94BufferedVanDerWaals {
802    fn evaluate_energy_and_inject_gradient(&self, coords: &[f64], grad: &mut [f64]) -> f64 {
803        let root_i = self.atom_i_idx * 3;
804        let root_j = self.atom_j_idx * 3;
805
806        let delta_x = coords[root_i] - coords[root_j];
807        let delta_y = coords[root_i + 1] - coords[root_j + 1];
808        let delta_z = coords[root_i + 2] - coords[root_j + 2];
809
810        let dist_squared = delta_x * delta_x + delta_y * delta_y + delta_z * delta_z;
811        let mut dist_r = dist_squared.sqrt();
812
813        // Hard lower bound to avoid singular behavior during close contacts.
814        if dist_r < 1e-8 {
815            dist_r = 1e-8;
816        }
817
818        // Buffered 14-7 form:
819        // E_vdW = eps * (1.07 R* / (R + 0.07 R*))^7 * ((1.12 R*^7 / (R^7 + 0.12 R*^7)) - 2)
820        let r_star_powered_7 = self.radius_star.powi(7);
821        let dist_r_powered_7 = dist_r.powi(7);
822
823        let repulsive_denominator = dist_r + 0.07 * self.radius_star;
824        let repulsive_term = (1.07 * self.radius_star / repulsive_denominator).powi(7);
825
826        let attractive_denominator = dist_r_powered_7 + 0.12 * r_star_powered_7;
827        let attractive_term = (1.12 * r_star_powered_7 / attractive_denominator) - 2.0;
828
829        let vdw_total_energy = self.epsilon_depth * repulsive_term * attractive_term;
830
831        // Analytical radial derivative.
832        let gradient_rep_term = -7.0 * repulsive_term / repulsive_denominator;
833        let gradient_attr_term = -7.0 * dist_r.powi(6) * (1.12 * r_star_powered_7)
834            / (attractive_denominator * attractive_denominator);
835
836        let force_scalar_magnitude = self.epsilon_depth
837            * (gradient_rep_term * attractive_term + repulsive_term * gradient_attr_term);
838
839        // Convert the radial derivative into Cartesian components.
840        let vector_prefactor = force_scalar_magnitude / dist_r;
841        let grad_x = vector_prefactor * delta_x;
842        let grad_y = vector_prefactor * delta_y;
843        let grad_z = vector_prefactor * delta_z;
844
845        grad[root_i] += grad_x;
846        grad[root_i + 1] += grad_y;
847        grad[root_i + 2] += grad_z;
848
849        grad[root_j] -= grad_x;
850        grad[root_j + 1] -= grad_y;
851        grad[root_j + 2] -= grad_z;
852
853        vdw_total_energy
854    }
855}
856
857// ─── Gradient Validation ─────────────────────────────────────────────────────
858
859/// Validate analytical gradients against numerical (central-difference) gradients.
860/// Returns max absolute error across all coordinates.
861pub fn validate_gradients(term: &dyn ForceFieldContribution, coords: &[f64], eps: f64) -> f64 {
862    let n = coords.len();
863    let mut analytical_grad = vec![0.0; n];
864    term.evaluate_energy_and_inject_gradient(coords, &mut analytical_grad);
865
866    let mut max_err = 0.0f64;
867    for i in 0..n {
868        let mut c_plus = coords.to_vec();
869        let mut c_minus = coords.to_vec();
870        c_plus[i] += eps;
871        c_minus[i] -= eps;
872
873        let mut g_dummy = vec![0.0; n];
874        let e_plus = term.evaluate_energy_and_inject_gradient(&c_plus, &mut g_dummy);
875        g_dummy.fill(0.0);
876        let e_minus = term.evaluate_energy_and_inject_gradient(&c_minus, &mut g_dummy);
877
878        let numerical = (e_plus - e_minus) / (2.0 * eps);
879        let err = (analytical_grad[i] - numerical).abs();
880        max_err = max_err.max(err);
881    }
882    max_err
883}
884
885// ─── MMFF94 Builder (assemble terms for a molecule) ──────────────────────────
886
887/// Simple bond/angle/torsion lookup parameters for building MMFF94 terms.
888/// Uses fallback empirical rules when proper MMFF94 parameter tables are not available.
889pub struct Mmff94Builder;
890
891/// MMFF94 variant: original (dynamics) or static (energy minimization).
892///
893/// MMFF94s uses modified out-of-plane bending and torsion parameters
894/// optimized for static structures (crystal geometries), while MMFF94
895/// is optimized for molecular dynamics simulations.
896#[derive(Debug, Clone, Copy, PartialEq, Eq)]
897pub enum Mmff94Variant {
898    /// Original MMFF94 (Halgren 1996) — optimized for dynamics.
899    Mmff94,
900    /// MMFF94s — static variant with modified OOP and torsion parameters.
901    Mmff94s,
902}
903
904impl Mmff94Builder {
905    /// Estimate MMFF94 bond stretching parameters from elements and bond order.
906    fn bond_params(elem_i: u8, elem_j: u8, _bond_order: u8) -> (f64, f64) {
907        // r0 (Å) from covalent radii sum, kb from empirical rule
908        let r_cov = |e: u8| -> f64 {
909            match e {
910                1 => 0.31,
911                6 => 0.76,
912                7 => 0.71,
913                8 => 0.66,
914                9 => 0.57,
915                15 => 1.07,
916                16 => 1.05,
917                17 => 1.02,
918                35 => 1.20,
919                53 => 1.39,
920                _ => 1.0,
921            }
922        };
923        let r0 = r_cov(elem_i) + r_cov(elem_j);
924        let kb = 5.0; // Fallback; real MMFF94 uses typed parameters
925        (kb, r0)
926    }
927
928    /// Build all MMFF94 force field terms for a parsed molecule.
929    ///
930    /// `elements`: atomic numbers.
931    /// `bonds`: list of (atom_i, atom_j, bond_order).
932    /// `coords`: flat xyz coordinates.
933    ///
934    /// Returns boxed force field contributions.
935    pub fn build(
936        elements: &[u8],
937        bonds: &[(usize, usize, u8)],
938    ) -> Vec<Box<dyn ForceFieldContribution>> {
939        Self::build_variant(elements, bonds, Mmff94Variant::Mmff94)
940    }
941
942    /// Build MMFF94 terms with a specific variant (MMFF94 or MMFF94s).
943    pub fn build_variant(
944        elements: &[u8],
945        bonds: &[(usize, usize, u8)],
946        variant: Mmff94Variant,
947    ) -> Vec<Box<dyn ForceFieldContribution>> {
948        let n_atoms = elements.len();
949        let mut terms: Vec<Box<dyn ForceFieldContribution>> = Vec::new();
950
951        // Bond stretching terms
952        for &(i, j, order) in bonds {
953            let (kb, r0) = Self::bond_params(elements[i], elements[j], order);
954            terms.push(Box::new(Mmff94BondStretch {
955                atom_i: i,
956                atom_j: j,
957                k_b: kb,
958                r0,
959            }));
960        }
961
962        // Angle bending: find all i-j-k where (i,j) and (j,k) are bonded
963        let mut neighbors: Vec<Vec<usize>> = vec![Vec::new(); n_atoms];
964        for &(i, j, _) in bonds {
965            neighbors[i].push(j);
966            neighbors[j].push(i);
967        }
968        for j in 0..n_atoms {
969            let nbrs = &neighbors[j];
970            for a in 0..nbrs.len() {
971                for b in (a + 1)..nbrs.len() {
972                    let i = nbrs[a];
973                    let k = nbrs[b];
974                    terms.push(Box::new(Mmff94AngleBend {
975                        atom_i: i,
976                        atom_j: j,
977                        atom_k: k,
978                        k_a: 0.5,                       // Fallback force constant
979                        theta0: 109.5_f64.to_radians(), // SP3 default
980                    }));
981                }
982            }
983        }
984
985        // Torsion terms: find all i-j-k-l where (i,j), (j,k), (k,l) are bonded
986        for &(j, k, _) in bonds {
987            for &i in &neighbors[j] {
988                if i == k {
989                    continue;
990                }
991                for &l in &neighbors[k] {
992                    if l == j || l == i {
993                        continue;
994                    }
995                    // MMFF94s uses stiffer torsion barriers for planar groups
996                    let (v1, v2, v3) = match variant {
997                        Mmff94Variant::Mmff94s => (0.0, 1.5, 0.0),
998                        Mmff94Variant::Mmff94 => (0.0, 1.0, 0.0),
999                    };
1000                    terms.push(Box::new(Mmff94Torsion {
1001                        atom_i: i,
1002                        atom_j: j,
1003                        atom_k: k,
1004                        atom_l: l,
1005                        v1,
1006                        v2,
1007                        v3,
1008                    }));
1009                }
1010            }
1011        }
1012
1013        // 1-4 vdW terms (atoms separated by 3 bonds)
1014        for &(j, k, _) in bonds {
1015            for &i in &neighbors[j] {
1016                if i == k {
1017                    continue;
1018                }
1019                for &l in &neighbors[k] {
1020                    if l == j || l == i {
1021                        continue;
1022                    }
1023                    let r_star = 3.5; // Generic
1024                    let eps = 0.05;
1025                    terms.push(Box::new(Mmff94BufferedVanDerWaals {
1026                        atom_i_idx: i,
1027                        atom_j_idx: l,
1028                        radius_star: r_star,
1029                        epsilon_depth: eps,
1030                    }));
1031                }
1032            }
1033        }
1034
1035        terms
1036    }
1037
1038    /// Compute total energy from all MMFF94 terms.
1039    pub fn total_energy(
1040        terms: &[Box<dyn ForceFieldContribution>],
1041        coords: &[f64],
1042    ) -> (f64, Vec<f64>) {
1043        let n = coords.len();
1044        let mut grad = vec![0.0; n];
1045        let mut total = 0.0;
1046        for term in terms {
1047            total += term.evaluate_energy_and_inject_gradient(coords, &mut grad);
1048        }
1049        (total, grad)
1050    }
1051}
1052
1053#[cfg(test)]
1054mod tests {
1055    use super::*;
1056
1057    #[test]
1058    fn test_mmff94_vdw_energy() {
1059        let term = Mmff94BufferedVanDerWaals {
1060            atom_i_idx: 0,
1061            atom_j_idx: 1,
1062            radius_star: 3.6,
1063            epsilon_depth: 0.1,
1064        };
1065        let coords = vec![0.0, 0.0, 0.0, 3.6, 0.0, 0.0];
1066        let mut grad = vec![0.0; 6];
1067        let e = term.evaluate_energy_and_inject_gradient(&coords, &mut grad);
1068        // At equilibrium distance, energy should be near -epsilon
1069        assert!(e < 0.0 && e > -0.2, "vdW energy at R*: {e}");
1070    }
1071
1072    #[test]
1073    fn test_mmff94_vdw_gradient_validation() {
1074        let term = Mmff94BufferedVanDerWaals {
1075            atom_i_idx: 0,
1076            atom_j_idx: 1,
1077            radius_star: 3.6,
1078            epsilon_depth: 0.1,
1079        };
1080        let coords = vec![0.0, 0.0, 0.0, 4.0, 0.0, 0.0];
1081        let max_err = validate_gradients(&term, &coords, 1e-5);
1082        assert!(max_err < 1e-4, "vdW gradient error: {max_err}");
1083    }
1084
1085    #[test]
1086    fn test_mmff94_bond_stretch() {
1087        let term = Mmff94BondStretch {
1088            atom_i: 0,
1089            atom_j: 1,
1090            k_b: 5.0,
1091            r0: 1.5,
1092        };
1093        // At equilibrium: energy ≈ 0
1094        let coords_eq = vec![0.0, 0.0, 0.0, 1.5, 0.0, 0.0];
1095        let mut grad = vec![0.0; 6];
1096        let e_eq = term.evaluate_energy_and_inject_gradient(&coords_eq, &mut grad);
1097        assert!(e_eq.abs() < 1e-10, "bond stretch at r0: {e_eq}");
1098
1099        // Stretched: energy > 0
1100        let coords_stretch = vec![0.0, 0.0, 0.0, 2.0, 0.0, 0.0];
1101        grad.fill(0.0);
1102        let e_str = term.evaluate_energy_and_inject_gradient(&coords_stretch, &mut grad);
1103        assert!(
1104            e_str > 0.0,
1105            "bond stretch energy should be positive: {e_str}"
1106        );
1107    }
1108
1109    #[test]
1110    fn test_mmff94_bond_stretch_gradient_validation() {
1111        let term = Mmff94BondStretch {
1112            atom_i: 0,
1113            atom_j: 1,
1114            k_b: 5.0,
1115            r0: 1.5,
1116        };
1117        let coords = vec![0.0, 0.0, 0.0, 2.0, 0.1, 0.0];
1118        let max_err = validate_gradients(&term, &coords, 1e-5);
1119        assert!(max_err < 1e-3, "bond stretch gradient error: {max_err}");
1120    }
1121
1122    #[test]
1123    fn test_mmff94_torsion_energy() {
1124        let term = Mmff94Torsion {
1125            atom_i: 0,
1126            atom_j: 1,
1127            atom_k: 2,
1128            atom_l: 3,
1129            v1: 0.0,
1130            v2: 5.0,
1131            v3: 0.0,
1132        };
1133        // Planar trans: phi ≈ 180°
1134        let coords = vec![-1.5, 1.0, 0.0, 0.0, 0.0, 0.0, 1.5, 0.0, 0.0, 3.0, 1.0, 0.0];
1135        let mut grad = vec![0.0; 12];
1136        let e = term.evaluate_energy_and_inject_gradient(&coords, &mut grad);
1137        assert!(e.is_finite(), "torsion energy should be finite: {e}");
1138    }
1139
1140    #[test]
1141    fn test_mmff94_atom_typing() {
1142        use crate::graph::Hybridization;
1143        let t = assign_mmff94_type(6, &Hybridization::SP3, false, false);
1144        assert_eq!(t, Mmff94AtomType::CR);
1145        let t = assign_mmff94_type(6, &Hybridization::SP2, true, false);
1146        assert_eq!(t, Mmff94AtomType::CB);
1147        let t = assign_mmff94_type(7, &Hybridization::SP3, false, true);
1148        assert_eq!(t, Mmff94AtomType::NAm);
1149    }
1150
1151    #[test]
1152    fn test_mmff94_builder_ethane() {
1153        // Ethane: C-C with 6 hydrogens
1154        let elements = vec![6, 6, 1, 1, 1, 1, 1, 1]; // C, C, H×6
1155        let bonds = vec![
1156            (0, 1, 1), // C-C
1157            (0, 2, 1),
1158            (0, 3, 1),
1159            (0, 4, 1), // C-H
1160            (1, 5, 1),
1161            (1, 6, 1),
1162            (1, 7, 1), // C-H
1163        ];
1164        // Staggered ethane coordinates (approximate)
1165        let coords = vec![
1166            0.0, 0.0, 0.0, // C0
1167            1.54, 0.0, 0.0, // C1
1168            -0.5, 0.9, 0.0, // H
1169            -0.5, -0.9, 0.0, // H
1170            -0.5, 0.0, 0.9, // H
1171            2.04, 0.9, 0.0, // H
1172            2.04, -0.9, 0.0, // H
1173            2.04, 0.0, 0.9, // H
1174        ];
1175        let terms = Mmff94Builder::build(&elements, &bonds);
1176        assert!(!terms.is_empty(), "should produce force field terms");
1177
1178        let (energy, grad) = Mmff94Builder::total_energy(&terms, &coords);
1179        assert!(
1180            energy.is_finite(),
1181            "total energy should be finite: {energy}"
1182        );
1183        assert!(
1184            grad.iter().all(|g| g.is_finite()),
1185            "all gradients should be finite"
1186        );
1187    }
1188
1189    #[test]
1190    fn test_mmff94_builder_gradient_consistency() {
1191        // Verify total gradient is consistent with numerical for a simple system
1192        let elements = vec![6, 6];
1193        let bonds = vec![(0, 1, 1)];
1194        let coords = vec![0.0, 0.0, 0.0, 1.6, 0.1, 0.0];
1195        let terms = Mmff94Builder::build(&elements, &bonds);
1196        let (_, analytical_grad) = Mmff94Builder::total_energy(&terms, &coords);
1197
1198        let eps = 1e-5;
1199        for i in 0..coords.len() {
1200            let mut cp = coords.clone();
1201            let mut cm = coords.clone();
1202            cp[i] += eps;
1203            cm[i] -= eps;
1204            let (ep, _) = Mmff94Builder::total_energy(&terms, &cp);
1205            let (em, _) = Mmff94Builder::total_energy(&terms, &cm);
1206            let numerical = (ep - em) / (2.0 * eps);
1207            let err = (analytical_grad[i] - numerical).abs();
1208            assert!(
1209                err < 0.1,
1210                "gradient mismatch at coord {i}: anal={:.6} num={:.6} err={:.6}",
1211                analytical_grad[i],
1212                numerical,
1213                err
1214            );
1215        }
1216    }
1217}