Skip to main content

sci_form/
conformer.rs

1use crate::distgeom::{
2    calculate_bounds_matrix_opts, check_chiral_centers, check_double_bond_geometry,
3    check_tetrahedral_centers, compute_initial_coords_rdkit, identify_chiral_sets,
4    identify_tetrahedral_centers, pick_rdkit_distances, triangle_smooth_tol, MinstdRand,
5    MAX_MINIMIZED_E_PER_ATOM,
6};
7use crate::forcefield::bounds_ff::minimize_bfgs_rdkit;
8use crate::forcefield::etkdg_3d::{build_etkdg_3d_ff_with_torsions, minimize_etkdg_3d_bfgs};
9use crate::graph::Molecule;
10/// End-to-end 3D conformer generation pipeline.
11///
12/// Algorithm matching RDKit's ETKDG embedPoints() with retry-on-failure:
13///   1. Build distance-bounds matrix → Floyd-Warshall smoothing
14///   2. Identify chiral sets + tetrahedral centers (for validation)
15///   3. Retry loop (up to 10×N attempts):
16///      a. Sample random distances → metric matrix → 3D/4D embedding (4D only if chiral)
17///      b. First minimization: bounds FF (chiral_w=1.0, 4d_w=0.1, basin=5.0), loop until converged
18///      c. Energy/atom check: reject if energy/N ≥ 0.05
19///      d. Tetrahedral center volume check
20///      e. Chiral center sign check
21///      f. Second minimization: bounds FF (chiral_w=0.2, 4d_w=1.0), loop until converged
22///      g. Drop to 3D → ETKDG 3D FF minimization (300 iters, single pass)
23///      h. Planarity check (OOP energy)
24///      i. Double bond geometry check
25use nalgebra::DMatrix;
26
27const BASIN_THRESH: f32 = 5.0;
28const FORCE_TOL: f32 = 1e-3;
29const PLANARITY_TOLERANCE: f32 = 1.0;
30const ERROR_TOL: f64 = 1e-5; // RDKit's ERROR_TOL for energy pre-check
31
32/// Generate a 3D conformer from a SMILES string.
33pub fn generate_3d_conformer_from_smiles(smiles: &str, seed: u64) -> Result<DMatrix<f32>, String> {
34    let mol = Molecule::from_smiles(smiles)?;
35    generate_3d_conformer(&mol, seed)
36}
37
38/// Generate a 3D conformer for an already-parsed `Molecule`.
39///
40/// Implements RDKit's embedPoints() retry-on-failure loop.
41/// Returns the first valid 3D conformer, or an error if all attempts fail.
42pub fn generate_3d_conformer(mol: &Molecule, seed: u64) -> Result<DMatrix<f32>, String> {
43    let csd_torsions = crate::smarts::match_experimental_torsions(mol);
44    generate_3d_conformer_with_torsions(mol, seed, &csd_torsions)
45}
46
47/// Generate multiple conformers with different seeds and return the one
48/// with the lowest ETKDG 3D force field energy (best geometry).
49pub fn generate_3d_conformer_best_of_k(
50    mol: &Molecule,
51    seed: u64,
52    csd_torsions: &[crate::forcefield::etkdg_3d::M6TorsionContrib],
53    num_seeds: usize,
54) -> Result<DMatrix<f32>, String> {
55    if num_seeds <= 1 {
56        return generate_3d_conformer_with_torsions(mol, seed, csd_torsions);
57    }
58
59    let mut best: Option<(DMatrix<f32>, f64)> = None;
60    let mut last_err = String::new();
61
62    // Pre-compute bounds + FF scaffold once (topology-dependent, not seed-dependent)
63    let bounds = {
64        let raw = calculate_bounds_matrix_opts(mol, true);
65        let mut b = raw;
66        if triangle_smooth_tol(&mut b, 0.0) {
67            b
68        } else {
69            let raw2 = calculate_bounds_matrix_opts(mol, false);
70            let mut b2 = raw2.clone();
71            if triangle_smooth_tol(&mut b2, 0.0) {
72                b2
73            } else {
74                let mut b3 = raw2;
75                triangle_smooth_tol(&mut b3, 0.05);
76                b3
77            }
78        }
79    };
80
81    for k in 0..num_seeds {
82        let s = seed.wrapping_add(k as u64 * 1000);
83        match generate_3d_conformer_with_torsions(mol, s, csd_torsions) {
84            Ok(coords) => {
85                // Score using ETKDG 3D energy (topology-dependent, comparable across seeds)
86                let n = mol.graph.node_count();
87                let coords_f64 = coords.map(|v| v as f64);
88                let ff = build_etkdg_3d_ff_with_torsions(mol, &coords_f64, &bounds, csd_torsions);
89                let mut flat = vec![0.0f64; n * 3];
90                for a in 0..n {
91                    flat[a * 3] = coords[(a, 0)] as f64;
92                    flat[a * 3 + 1] = coords[(a, 1)] as f64;
93                    flat[a * 3 + 2] = coords[(a, 2)] as f64;
94                }
95                let energy = crate::forcefield::etkdg_3d::etkdg_3d_energy_f64(&flat, n, mol, &ff);
96                match &best {
97                    Some((_, best_e)) if energy >= *best_e => {}
98                    _ => {
99                        best = Some((coords, energy));
100                    }
101                }
102            }
103            Err(e) => {
104                last_err = e;
105            }
106        }
107    }
108
109    match best {
110        Some((coords, _)) => Ok(coords),
111        None => Err(last_err),
112    }
113}
114
115/// Generate a 3D conformer with optional CSD torsion overrides.
116///
117/// If `csd_torsions` is non-empty, they replace the default torsion terms
118/// in the 3D force field, providing much better torsion angle quality.
119pub fn generate_3d_conformer_with_torsions(
120    mol: &Molecule,
121    seed: u64,
122    csd_torsions: &[crate::forcefield::etkdg_3d::M6TorsionContrib],
123) -> Result<DMatrix<f32>, String> {
124    let n = mol.graph.node_count();
125    if n == 0 {
126        return Err("Empty molecule".to_string());
127    }
128
129    // RDKit-style two-pass bounds: try with set15 + strict smoothing,
130    // fall back to no set15 if triangle smoothing fails.
131    let bounds = {
132        let raw = calculate_bounds_matrix_opts(mol, true);
133        let mut b = raw;
134        if triangle_smooth_tol(&mut b, 0.0) {
135            b
136        } else {
137            #[cfg(test)]
138            eprintln!("  [FALLBACK] strict smoothing failed, retrying without set15");
139            let raw2 = calculate_bounds_matrix_opts(mol, false);
140            let mut b2 = raw2.clone();
141            if triangle_smooth_tol(&mut b2, 0.0) {
142                b2
143            } else {
144                #[cfg(test)]
145                eprintln!("  [FALLBACK] second smoothing also failed, using soft smooth");
146                let mut b3 = raw2;
147                triangle_smooth_tol(&mut b3, 0.05);
148                b3
149            }
150        }
151    };
152    let chiral_sets = identify_chiral_sets(mol);
153    let tetrahedral_centers = identify_tetrahedral_centers(mol);
154
155    let max_iterations = 10 * n;
156    let mut rng = MinstdRand::new(seed as u32);
157
158    // RDKit: 4D embedding only when chiral centers (CW/CCW) are present
159    // Otherwise 3D embedding (no 4th dimension overhead)
160    let use_4d = !chiral_sets.is_empty();
161    let embed_dim = if use_4d { 4 } else { 3 };
162
163    // Track consecutive embedding failures for random coord fallback
164    // For large molecules (100+ atoms), eigendecomposition is O(N³) and fails more often,
165    // so we switch to random coords sooner. For small molecules, we allow more eigen attempts.
166    let mut consecutive_embed_fails = 0u32;
167    let embed_fail_threshold = if n > 100 {
168        (n as u32 / 8).max(10)
169    } else {
170        (n as u32 / 4).max(20)
171    };
172    let mut random_coord_attempts = 0u32;
173    let max_random_coord_attempts = if n > 100 { 80u32 } else { 150u32 };
174    // Scale BFGS restart limit: large molecules converge with fewer restarts
175    let bfgs_restart_limit = if n > 100 { 20 } else { 50 };
176
177    // Track energy-check failures for progressive relaxation
178    let mut energy_check_failures = 0u32;
179    // After 30% of iterations fail at the energy check, relax from 0.05 to 0.125
180    let energy_relax_threshold = (max_iterations as f64 * 0.3) as u32;
181
182    for _iter in 0..max_iterations {
183        // Log attempt number if requested (works in both lib and integration tests)
184        let _log_attempts = std::env::var("LOG_ATTEMPTS").is_ok();
185
186        // Step 1: Generate initial coords
187        let use_random_coords = consecutive_embed_fails >= embed_fail_threshold
188            && random_coord_attempts < max_random_coord_attempts;
189        let (mut coords, basin_thresh) = if use_random_coords {
190            random_coord_attempts += 1;
191            // Random coordinate fallback: RDKit uses boxSizeMult * cube_root(N)
192            let box_size = 2.0 * (n as f64).cbrt().max(2.5);
193            let mut c = DMatrix::from_element(n, embed_dim, 0.0f64);
194            for i in 0..n {
195                for d in 0..embed_dim {
196                    c[(i, d)] = box_size * (rng.next_double() - 0.5);
197                }
198            }
199            // RDKit disables basin threshold for random coords
200            (c, 1e8f64)
201        } else {
202            let dists = pick_rdkit_distances(&mut rng, &bounds);
203            let coords_opt = compute_initial_coords_rdkit(&mut rng, &dists, embed_dim);
204            match coords_opt {
205                Some(c) => {
206                    consecutive_embed_fails = 0;
207                    (c, BASIN_THRESH as f64)
208                }
209                None => {
210                    consecutive_embed_fails += 1;
211                    // If we've exhausted random coord attempts and still failing, give up
212                    if random_coord_attempts >= max_random_coord_attempts {
213                        break;
214                    }
215                    if _log_attempts && consecutive_embed_fails == embed_fail_threshold {
216                        eprintln!(
217                            "  attempt {} → switching to random coords after {} failures",
218                            _iter, embed_fail_threshold
219                        );
220                    } else if _log_attempts {
221                        eprintln!("  attempt {} → embedding failed", _iter);
222                    }
223                    continue;
224                }
225            }
226        };
227
228        // Step 2: First minimization (bounds FF with chiral_w=1.0, 4d_w=0.1)
229        // RDKit: while(needMore) { needMore = field->minimize(400, forceTol); }
230        // Safety limit: max 50 restarts to prevent infinite loops (RDKit typically needs < 10)
231        {
232            let bt = basin_thresh as f32;
233            let initial_energy =
234                compute_total_bounds_energy_f64(&coords, &bounds, &chiral_sets, bt, 0.1, 1.0);
235            if initial_energy > ERROR_TOL {
236                let mut need_more = 1;
237                let mut restarts = 0;
238                while need_more != 0 && restarts < bfgs_restart_limit {
239                    need_more = minimize_bfgs_rdkit(
240                        &mut coords,
241                        &bounds,
242                        &chiral_sets,
243                        400,
244                        FORCE_TOL as f64,
245                        bt,
246                        0.1,
247                        1.0,
248                    );
249                    restarts += 1;
250                }
251            }
252        }
253
254        // Step 3: Energy per atom check with progressive relaxation
255        // For difficult molecules (complex polycyclic), relax energy threshold after many failures
256        let bt = basin_thresh as f32;
257        let energy = compute_total_bounds_energy_f64(&coords, &bounds, &chiral_sets, bt, 0.1, 1.0);
258        let effective_e_thresh = if energy_check_failures >= energy_relax_threshold {
259            MAX_MINIMIZED_E_PER_ATOM as f64 * 2.5 // 0.125 — still rejects truly bad conformers
260        } else {
261            MAX_MINIMIZED_E_PER_ATOM as f64
262        };
263        if energy / n as f64 >= effective_e_thresh {
264            energy_check_failures += 1;
265            if _log_attempts {
266                eprintln!(
267                    "  attempt {} → energy check failed: {:.6}/atom",
268                    _iter,
269                    energy / n as f64
270                );
271            }
272            continue;
273        }
274
275        // Step 4: Check tetrahedral centers (f64 coords matching RDKit's Point3D)
276        if !check_tetrahedral_centers(&coords, &tetrahedral_centers) {
277            if _log_attempts {
278                eprintln!("  attempt {} → tetrahedral check failed", _iter);
279            }
280            continue;
281        }
282
283        // Step 5: Check chiral center volumes (f64 coords)
284        if !chiral_sets.is_empty() && !check_chiral_centers(&coords, &chiral_sets) {
285            if _log_attempts {
286                eprintln!("  attempt {} → chiral check failed", _iter);
287            }
288            continue;
289        }
290
291        // Step 6: Second minimization (chiral_w=0.2, 4d_w=1.0) — only if 4D embedding
292        // RDKit: while(needMore) { needMore = field2->minimize(200, forceTol); }
293        if use_4d {
294            let energy2 =
295                compute_total_bounds_energy_f64(&coords, &bounds, &chiral_sets, bt, 1.0, 0.2);
296            if energy2 > ERROR_TOL {
297                let mut need_more = 1;
298                let mut restarts = 0;
299                while need_more != 0 && restarts < bfgs_restart_limit {
300                    need_more = minimize_bfgs_rdkit(
301                        &mut coords,
302                        &bounds,
303                        &chiral_sets,
304                        200,
305                        FORCE_TOL as f64,
306                        bt,
307                        1.0,
308                        0.2,
309                    );
310                    restarts += 1;
311                }
312            }
313        }
314
315        // Step 7: Drop to 3D (no-op for 3D embedding)
316        let coords3d = coords.columns(0, 3).into_owned();
317
318        // Step 8: ETKDG 3D FF minimization — single pass of 300 iterations (matching RDKit)
319        // Build FF using f64 coords for distance computation (matching RDKit's Point3D)
320        let ff = build_etkdg_3d_ff_with_torsions(mol, &coords3d, &bounds, csd_torsions);
321        // RDKit: only minimize if energy > ERROR_TOL
322        let e3d = crate::forcefield::etkdg_3d::etkdg_3d_energy_f64(
323            &{
324                let n = mol.graph.node_count();
325                let mut flat = vec![0.0f64; n * 3];
326                for a in 0..n {
327                    flat[a * 3] = coords3d[(a, 0)];
328                    flat[a * 3 + 1] = coords3d[(a, 1)];
329                    flat[a * 3 + 2] = coords3d[(a, 2)];
330                }
331                flat
332            },
333            mol.graph.node_count(),
334            mol,
335            &ff,
336        );
337        let refined = if e3d > ERROR_TOL {
338            minimize_etkdg_3d_bfgs(mol, &coords3d, &ff, 300, FORCE_TOL)
339        } else {
340            coords3d
341        };
342
343        // Step 9: Planarity check — matching RDKit's construct3DImproperForceField
344        // Uses UFF inversion energy + SP angle constraint energy (k=10.0) in f64
345        {
346            let n_improper_atoms = ff.inversion_contribs.len() / 3;
347            let flat_f64: Vec<f64> = {
348                let nr = refined.nrows();
349                let mut flat = vec![0.0f64; nr * 3];
350                for a in 0..nr {
351                    flat[a * 3] = refined[(a, 0)];
352                    flat[a * 3 + 1] = refined[(a, 1)];
353                    flat[a * 3 + 2] = refined[(a, 2)];
354                }
355                flat
356            };
357            let planarity_energy =
358                crate::forcefield::etkdg_3d::planarity_check_energy_f64(&flat_f64, n, &ff);
359            if planarity_energy > n_improper_atoms as f64 * PLANARITY_TOLERANCE as f64 {
360                if _log_attempts {
361                    eprintln!(
362                        "  attempt {} → planarity check failed (energy={:.4} > threshold={:.4})",
363                        _iter,
364                        planarity_energy,
365                        n_improper_atoms as f64 * PLANARITY_TOLERANCE as f64
366                    );
367                }
368                continue;
369            }
370        }
371
372        // Step 10: Double bond geometry check (f64 coords)
373        if !check_double_bond_geometry(mol, &refined) {
374            if _log_attempts {
375                eprintln!("  attempt {} → double bond check failed", _iter);
376            }
377            continue;
378        }
379
380        if _log_attempts {
381            eprintln!("  attempt {} → SUCCESS", _iter);
382        }
383
384        let refined_f32 = refined.map(|v| v as f32);
385        return Ok(refined_f32);
386    }
387
388    Err(format!(
389        "Failed to generate valid conformer after {} iterations",
390        max_iterations
391    ))
392}
393
394/// Compute bounds FF energy in f64, matching RDKit's field->calcEnergy() exactly.
395/// Used for the energy pre-check before minimization.
396pub fn compute_total_bounds_energy_f64(
397    coords: &DMatrix<f64>,
398    bounds: &DMatrix<f64>,
399    chiral_sets: &[crate::forcefield::bounds_ff::ChiralSet],
400    basin_thresh: f32,
401    weight_4d: f32,
402    weight_chiral: f32,
403) -> f64 {
404    let n = coords.nrows();
405    let dim_coords = coords.ncols();
406    let basin_thresh_f64 = basin_thresh as f64;
407    let weight_4d_f64 = weight_4d as f64;
408    let weight_chiral_f64 = weight_chiral as f64;
409
410    let mut energy = 0.0f64;
411    for i in 1..n {
412        for j in 0..i {
413            let ub = bounds[(j, i)];
414            let lb = bounds[(i, j)];
415            if ub - lb > basin_thresh_f64 {
416                continue;
417            }
418            let mut d2 = 0.0f64;
419            for d in 0..dim_coords {
420                let diff = coords[(i, d)] - coords[(j, d)];
421                d2 += diff * diff;
422            }
423            let ub2 = ub * ub;
424            let lb2 = lb * lb;
425            let val = if d2 > ub2 {
426                d2 / ub2 - 1.0
427            } else if d2 < lb2 {
428                2.0 * lb2 / (lb2 + d2) - 1.0
429            } else {
430                0.0
431            };
432            if val > 0.0 {
433                energy += val * val;
434            }
435        }
436    }
437    if !chiral_sets.is_empty() {
438        // Flatten coords to flat array for f64 chiral energy
439        let mut flat = vec![0.0f64; n * dim_coords];
440        for i in 0..n {
441            for d in 0..dim_coords {
442                flat[i * dim_coords + d] = coords[(i, d)];
443            }
444        }
445        energy += weight_chiral_f64
446            * crate::forcefield::bounds_ff::chiral_violation_energy_f64(
447                &flat,
448                dim_coords,
449                chiral_sets,
450            );
451    }
452    if dim_coords == 4 {
453        for i in 0..n {
454            let x4 = coords[(i, 3)];
455            energy += weight_4d_f64 * x4 * x4;
456        }
457    }
458    energy
459}