#![allow(
unused_imports,
unused_variables,
dead_code,
clippy::unnecessary_cast,
clippy::needless_range_loop,
clippy::manual_repeat_n,
clippy::manual_str_repeat,
clippy::manual_is_multiple_of,
clippy::redundant_field_names,
clippy::useless_vec,
clippy::single_range_in_vec_init
)]
use serde::Deserialize;
use std::fs;
#[derive(Deserialize)]
struct RefAtom {
element: u8,
x: f32,
y: f32,
z: f32,
formal_charge: i8,
hybridization: String,
}
#[derive(Deserialize)]
struct RefBond {
start: usize,
end: usize,
order: String,
}
#[derive(Deserialize)]
struct RefTorsion {
atoms: Vec<usize>,
v: Vec<f64>,
signs: Vec<i32>,
}
#[derive(Deserialize)]
struct RefMolecule {
smiles: String,
atoms: Vec<RefAtom>,
bonds: Vec<RefBond>,
torsions: Vec<RefTorsion>,
}
fn build_mol_from_ref(ref_mol: &RefMolecule) -> sci_form::graph::Molecule {
let mut mol = sci_form::graph::Molecule::new(&ref_mol.smiles);
let mut node_indices = Vec::with_capacity(ref_mol.atoms.len());
for atom in &ref_mol.atoms {
let hybridization = match atom.hybridization.as_str() {
"SP" => sci_form::graph::Hybridization::SP,
"SP2" => sci_form::graph::Hybridization::SP2,
"SP3" => sci_form::graph::Hybridization::SP3,
"SP3D" => sci_form::graph::Hybridization::SP3D,
"SP3D2" => sci_form::graph::Hybridization::SP3D2,
_ => sci_form::graph::Hybridization::Unknown,
};
let new_atom = sci_form::graph::Atom {
element: atom.element,
position: nalgebra::Vector3::new(0.0, 0.0, 0.0),
charge: 0.0,
formal_charge: atom.formal_charge,
hybridization,
chiral_tag: sci_form::graph::ChiralType::Unspecified,
explicit_h: if atom.element == 1 || atom.element == 0 {
1
} else {
0
},
};
node_indices.push(mol.add_atom(new_atom));
}
for bond in &ref_mol.bonds {
let order = match bond.order.as_str() {
"DOUBLE" => sci_form::graph::BondOrder::Double,
"TRIPLE" => sci_form::graph::BondOrder::Triple,
"AROMATIC" => sci_form::graph::BondOrder::Aromatic,
_ => sci_form::graph::BondOrder::Single,
};
mol.add_bond(
node_indices[bond.start],
node_indices[bond.end],
sci_form::graph::Bond {
order,
stereo: sci_form::graph::BondStereo::None,
},
);
}
mol
}
fn build_csd_torsions(
ref_torsions: &[RefTorsion],
) -> Vec<sci_form::forcefield::etkdg_3d::M6TorsionContrib> {
ref_torsions
.iter()
.filter_map(|t| {
if t.atoms.len() < 4 || t.v.len() < 6 || t.signs.len() < 6 {
return None;
}
let mut signs = [0.0f64; 6];
let mut v = [0.0f64; 6];
for k in 0..6 {
signs[k] = t.signs[k] as f64;
v[k] = t.v[k] as f64;
}
Some(sci_form::forcefield::etkdg_3d::M6TorsionContrib {
i: t.atoms[0],
j: t.atoms[1],
k: t.atoms[2],
l: t.atoms[3],
signs,
v,
})
})
.collect()
}
fn pairwise_rmsd(coords: &nalgebra::DMatrix<f32>, ref_atoms: &[RefAtom]) -> f32 {
let n = ref_atoms.len();
let mut sq_sum = 0.0f64;
let mut npairs = 0u64;
for a in 0..n {
for b in (a + 1)..n {
let dr = ((ref_atoms[a].x - ref_atoms[b].x).powi(2)
+ (ref_atoms[a].y - ref_atoms[b].y).powi(2)
+ (ref_atoms[a].z - ref_atoms[b].z).powi(2))
.sqrt() as f64;
let du = ((coords[(a, 0)] - coords[(b, 0)]).powi(2)
+ (coords[(a, 1)] - coords[(b, 1)]).powi(2)
+ (coords[(a, 2)] - coords[(b, 2)]).powi(2))
.sqrt() as f64;
sq_sum += (dr - du).powi(2);
npairs += 1;
}
}
if npairs > 0 {
(sq_sum / npairs as f64).sqrt() as f32
} else {
0.0
}
}
#[test]
fn test_ff_energy_comparison() {
use sci_form::distgeom::bounds::{calculate_bounds_matrix_opts, triangle_smooth_tol};
use sci_form::forcefield::etkdg_3d::{build_etkdg_3d_ff_with_torsions, etkdg_3d_energy_f64};
let ref_data =
sci_form::fixture_io::read_text_fixture("tests/fixtures/gdb20_reference_1k.json")
.expect("Run scripts/generate_gdb20_reference.py first");
let ref_mols: Vec<RefMolecule> =
serde_json::from_str(&ref_data).expect("Invalid gdb20_reference fixture");
let limit = std::env::var("GDB20_LIMIT")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(1000usize);
let ref_mols = &ref_mols[..limit.min(ref_mols.len())];
println!("\n=== FF ENERGY DIAGNOSTIC ===");
let mut results: Vec<(usize, f32, String)> = Vec::new();
for (idx, ref_mol) in ref_mols.iter().enumerate() {
let mol = build_mol_from_ref(ref_mol);
let csd_torsions = build_csd_torsions(&ref_mol.torsions);
let result =
sci_form::conformer::generate_3d_conformer_with_torsions(&mol, 42, &csd_torsions);
if let Ok(coords) = result {
let rmsd = pairwise_rmsd(&coords, &ref_mol.atoms);
if rmsd > 0.5 {
results.push((idx, rmsd, ref_mol.smiles.clone()));
}
}
}
results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
println!("Found {} failing molecules", results.len());
for (rank, (idx, rmsd, smi)) in results.iter().take(10).enumerate() {
let ref_mol = &ref_mols[*idx];
let mol = build_mol_from_ref(ref_mol);
let n = mol.graph.node_count();
let csd_torsions = build_csd_torsions(&ref_mol.torsions);
let our_coords =
sci_form::conformer::generate_3d_conformer_with_torsions(&mol, 42, &csd_torsions)
.unwrap();
let ref_coords_f32 = nalgebra::DMatrix::from_fn(n, 3, |i, j| match j {
0 => ref_mol.atoms[i].x,
1 => ref_mol.atoms[i].y,
2 => ref_mol.atoms[i].z,
_ => unreachable!(),
});
let bounds = {
let raw = calculate_bounds_matrix_opts(&mol, true);
let mut b = raw;
if triangle_smooth_tol(&mut b, 0.0) {
b
} else {
let raw2 = calculate_bounds_matrix_opts(&mol, false);
let mut b2 = raw2.clone();
if triangle_smooth_tol(&mut b2, 0.0) {
b2
} else {
let mut b3 = raw2;
triangle_smooth_tol(&mut b3, 0.05);
b3
}
}
};
let ff_ours = build_etkdg_3d_ff_with_torsions(
&mol,
&our_coords.map(|v| v as f64),
&bounds,
&csd_torsions,
);
let ff_ref = build_etkdg_3d_ff_with_torsions(
&mol,
&ref_coords_f32.map(|v| v as f64),
&bounds,
&csd_torsions,
);
let our_coords_f64: Vec<f64> = (0..n)
.flat_map(|i| {
vec![
our_coords[(i, 0)] as f64,
our_coords[(i, 1)] as f64,
our_coords[(i, 2)] as f64,
]
})
.collect();
let e_our_at_our = etkdg_3d_energy_f64(&our_coords_f64, n, &mol, &ff_ours);
let ref_coords_f64: Vec<f64> = (0..n)
.flat_map(|i| {
vec![
ref_mol.atoms[i].x as f64,
ref_mol.atoms[i].y as f64,
ref_mol.atoms[i].z as f64,
]
})
.collect();
let e_our_at_ref = etkdg_3d_energy_f64(&ref_coords_f64, n, &mol, &ff_ours);
let e_ref_at_ref = etkdg_3d_energy_f64(&ref_coords_f64, n, &mol, &ff_ref);
let e_ref_at_our = etkdg_3d_energy_f64(&our_coords_f64, n, &mol, &ff_ref);
let n_torsions = ff_ours.torsion_contribs.len();
let n_inversions = ff_ours.inversion_contribs.len();
let n_dist = ff_ours.dist_12.len() + ff_ours.dist_13.len() + ff_ours.dist_long.len();
let n_angles = ff_ours.angle_constraints.len();
let n_bond_constraints = mol.graph.edge_count();
let n_long_range = n_dist - n_bond_constraints;
println!("\n--- #{} SMILES={} RMSD={:.4} ---", rank + 1, smi, rmsd);
println!(" Atoms: {}, Bonds: {}", n, mol.graph.edge_count());
println!(
" CSD torsions: {}, Ring/chain torsions: {}",
csd_torsions.len(),
n_torsions - csd_torsions.len()
);
println!(" Inversion contribs: {}", n_inversions);
println!(
" Dist constraints: {} (approx {} bond + {} other)",
n_dist,
n_bond_constraints,
n_dist - n_bond_constraints
);
println!(" Angle constraints: {}", n_angles);
println!(" FF(ours) @ our coords: {:.6}", e_our_at_our);
println!(" FF(ours) @ ref coords: {:.6}", e_our_at_ref);
println!(" FF(ref) @ ref coords: {:.6}", e_ref_at_ref);
println!(" FF(ref) @ our coords: {:.6}", e_ref_at_our);
if e_our_at_our < e_our_at_ref {
println!(" → Our optimum is LOWER than ref in our FF (FF landscape differs)");
} else {
println!(" → Our optimum is HIGHER than ref in our FF (optimizer issue)");
}
}
}