use crate::chemistry::forcefield::crystalff::{
CrystalFFDetails, TorsionAngleContribs, get_experimental_torsions_without_bonds,
};
use crate::chemistry::forcefield::mmff::nonbonded::NonbondedContrib;
#[cfg(test)]
use crate::chemistry::forcefield::uff::atom_typer::get_atom_label_for_uff as source_get_atom_label_for_uff;
use crate::chemistry::forcefield::uff::atom_typer::get_atom_types_for_uff;
use crate::chemistry::forcefield::uff::inversion::InversionContribs;
use crate::chemistry::forcefield::uff::params::AtomicParams;
use crate::chemistry::forcefield::{
AngleConstraintContribs, DistanceConstraintContribs, ForceField, ForceFieldContrib,
ForceFieldVec3,
};
use crate::chemistry::stereo::{get_ideal_angle_between_ligands, has_non_tetrahedral_stereo};
use crate::{
Atom, AtomId, Bond, BondId, BondOrder, BondQueryPredicate, BondStereo, ChiralTag, Conformer3D,
DerivedState, Hybridization, Molecule, MoleculeBuildError, QueryNode, SubstructMatchParams,
ValenceModel, assign_valence, get_substruct_matches_with_params, rdkit_valence_list,
};
use std::borrow::Cow;
use std::collections::{BTreeMap, HashSet};
use std::env;
use std::sync::{Arc, Mutex, OnceLock};
use std::time::{Duration, Instant};
const DIST12_DELTA: f64 = 0.01;
const DIST13_TOL: f64 = 0.04;
const GEN_DIST_TOL: f64 = 0.06;
const DIST15_TOL: f64 = 0.08;
const VDW_SCALE_15: f64 = 0.7;
const H_BOND_LENGTH: f64 = 1.8;
const MAX_UPPER: f64 = 1000.0;
const KNOWN_DIST_TOL: f64 = 0.01;
const KNOWN_DIST_FORCE_CONSTANT: f64 = 100.0;
const MIN_MACROCYCLE_RING_SIZE: usize = 9;
const DEFAULT_LOWER: f64 = 0.001;
const DEFAULT_UPPER: f64 = MAX_UPPER;
const UFF_LAMBDA: f64 = 0.1332;
const EIGVAL_TOL: f64 = 0.001;
const RDKIT_RANDOM_MODULUS: u64 = 2_147_483_647;
const RDKIT_RANDOM_MULTIPLIER: u64 = 48_271;
const EMBEDDER_ERROR_TOL: f64 = 0.00001;
const MAX_MINIMIZED_E_PER_ATOM: f64 = 0.05;
const MIN_TETRAHEDRAL_CHIRAL_VOL: f64 = 0.50;
const TETRAHEDRAL_CENTERINVOLUME_TOL: f64 = 0.30;
thread_local! {
static RDKIT_DISTGEOM_RNG: std::cell::RefCell<RdkitDistgeomMinStdRand> =
std::cell::RefCell::new(RdkitDistgeomMinStdRand::default());
}
fn have_opposite_sign(a: f64, b: f64) -> bool {
a.is_sign_negative() ^ b.is_sign_negative()
}
fn row64_distgeom_trace_enabled(num_points: usize) -> bool {
env::var("RDKIT_ROW64_TRACE").ok().as_deref() == Some("1") && num_points == 106
}
fn row61_distgeom_trace_enabled(num_points: usize) -> bool {
env::var("RDKIT_ROW61_TRACE").ok().as_deref() == Some("1") && num_points == 26
}
fn row103_distgeom_trace_enabled(num_points: usize) -> bool {
env::var("RDKIT_ROW103_TRACE").ok().as_deref() == Some("1") && num_points == 45
}
#[allow(dead_code)]
fn failmutex_get() -> &'static Mutex<()> {
static MUTEX: OnceLock<Mutex<()>> = OnceLock::new();
MUTEX.get_or_init(|| Mutex::new(()))
}
#[allow(dead_code)]
fn failmutex_create() {
let _test_lock = failmutex_get()
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner);
}
#[allow(dead_code)]
fn get_fail_mutex() -> &'static Mutex<()> {
static INIT: OnceLock<()> = OnceLock::new();
INIT.get_or_init(failmutex_create);
failmutex_get()
}
fn normalized_point_delta(p0: ForceFieldVec3, p1: ForceFieldVec3) -> ForceFieldVec3 {
let delta = p0 - p1;
let length = delta.length();
delta / length
}
fn embedder_volume_test(chiral_set: &ChiralSet, positions: &[ForceFieldVec3]) -> bool {
let p0 = positions[chiral_set.idx0];
let p1 = positions[chiral_set.idx1];
let p2 = positions[chiral_set.idx2];
let p3 = positions[chiral_set.idx3];
let p4 = positions[chiral_set.idx4];
let v1 = normalized_point_delta(p0, p1);
let v2 = normalized_point_delta(p0, p2);
let v3 = normalized_point_delta(p0, p3);
let v4 = normalized_point_delta(p0, p4);
let mut vol_scale = 1.0;
if chiral_set.structure_flags & ChiralSetStructureFlags::InFusedSmallRings as u64 != 0 {
vol_scale = 0.25;
}
let min_vol = vol_scale * MIN_TETRAHEDRAL_CHIRAL_VOL;
let mut crossp = v1.cross_product(v2);
let mut vol = crossp.dot_product(v3);
if vol.abs() < min_vol {
return false;
}
crossp = v1.cross_product(v2);
vol = crossp.dot_product(v4);
if vol.abs() < min_vol {
return false;
}
crossp = v1.cross_product(v3);
vol = crossp.dot_product(v4);
if vol.abs() < min_vol {
return false;
}
crossp = v2.cross_product(v3);
vol = crossp.dot_product(v4);
vol.abs() >= min_vol
}
fn embedder_same_side(
v1: ForceFieldVec3,
v2: ForceFieldVec3,
v3: ForceFieldVec3,
v4: ForceFieldVec3,
p0: ForceFieldVec3,
tol: f64,
) -> bool {
let normal = (v2 - v1).cross_product(v3 - v1);
let d1 = normal.dot_product(v4 - v1);
let d2 = normal.dot_product(p0 - v1);
if d1.abs() < tol || d2.abs() < tol {
return false;
}
!((d1 < 0.0) ^ (d2 < 0.0))
}
fn embedder_center_in_volume_indices(
idx0: usize,
idx1: usize,
idx2: usize,
idx3: usize,
idx4: usize,
positions: &[ForceFieldVec3],
tol: f64,
) -> bool {
let p0 = positions[idx0];
let p1 = positions[idx1];
let p2 = positions[idx2];
let p3 = positions[idx3];
let p4 = positions[idx4];
embedder_same_side(p1, p2, p3, p4, p0, tol)
&& embedder_same_side(p2, p3, p4, p1, p0, tol)
&& embedder_same_side(p3, p4, p1, p2, p0, tol)
&& embedder_same_side(p4, p1, p2, p3, p0, tol)
}
fn embedder_center_in_volume(
chiral_set: &ChiralSet,
positions: &[ForceFieldVec3],
tol: f64,
) -> bool {
if chiral_set.idx0 == chiral_set.idx4 {
return true;
}
embedder_center_in_volume_indices(
chiral_set.idx0,
chiral_set.idx1,
chiral_set.idx2,
chiral_set.idx3,
chiral_set.idx4,
positions,
tol,
)
}
fn embedder_bounds_fulfilled(
atoms: &[i32],
mmat: &BoundsMatrix,
positions: &[ForceFieldVec3],
) -> bool {
if atoms.len() < 2 {
return true;
}
for i in 0..atoms.len() - 1 {
for j in i + 1..atoms.len() {
let a1 = atoms[i] as usize;
let a2 = atoms[j] as usize;
let d2 = (positions[a1] - positions[a2]).length();
let lb = mmat.get_lower(a1, a2);
let ub = mmat.get_upper(a1, a2);
if (d2 < lb && (d2 - lb).abs() > 0.1 * ub) || (d2 > ub && (d2 - ub).abs() > 0.1 * ub) {
return false;
}
}
}
true
}
struct EmbedArgs<'a> {
mmat: &'a BoundsMatrix,
chiral_centers: &'a [ChiralSetPtr],
tetrahedral_carbons: &'a [ChiralSetPtr],
etkdg_details: Option<&'a CrystalFFDetails>,
double_bond_ends: Option<&'a [(usize, usize, usize)]>,
stereo_double_bonds: &'a [(Vec<usize>, i32)],
}
struct EmbedHelperArgs<'a> {
confs_ok: &'a mut [bool],
four_d: bool,
frag_mapping: Option<&'a [usize]>,
confs: &'a mut [Conformer3D],
frag_idx: usize,
mmat: &'a BoundsMatrix,
chiral_centers: &'a [ChiralSetPtr],
tetrahedral_carbons: &'a [ChiralSetPtr],
double_bond_ends: &'a [(usize, usize, usize)],
stereo_double_bonds: &'a [(Vec<usize>, i32)],
etkdg_details: &'a CrystalFFDetails,
}
impl<'a> EmbedHelperArgs<'a> {
fn embed_args(&self) -> EmbedArgs<'_> {
EmbedArgs {
mmat: self.mmat,
chiral_centers: self.chiral_centers,
tetrahedral_carbons: self.tetrahedral_carbons,
etkdg_details: Some(self.etkdg_details),
double_bond_ends: Some(self.double_bond_ends),
stereo_double_bonds: self.stereo_double_bonds,
}
}
}
fn embedder_generate_initial_coords<R: RdkitDoubleRng>(
positions: &mut [Vec<f64>],
eargs: &EmbedArgs<'_>,
embed_params: &EmbedParameters,
dist_mat: &mut SymmMatrix,
rng: &mut R,
) -> Result<bool, DgBoundsError> {
let got_coords = if !embed_params.use_random_coords {
let _largest_distance = pick_random_dist_mat_with_rng(eargs.mmat, dist_mat, rng);
compute_initial_coords_with_rng(
dist_mat,
positions,
rng,
embed_params.rand_neg_eig,
embed_params.num_zero_fail as usize,
)?
} else {
let box_size = if embed_params.box_size_mult > 0.0 {
5.0 * embed_params.box_size_mult
} else {
-embed_params.box_size_mult
};
let got_coords = compute_random_coords_with_rng(positions, box_size, rng);
if let Some(coord_map) = &embed_params.coord_map {
for (idx, mapped_point) in coord_map {
let point = &mut positions[*idx as usize];
for ci in 0..3 {
point[ci] = mapped_point[ci];
}
for coord in point.iter_mut().skip(3) {
*coord = 0.0;
}
}
}
got_coords
};
Ok(got_coords)
}
fn copy_forcefield_positions_to_point_vectors(field: &ForceField, positions: &mut [Vec<f64>]) {
for (point, field_point) in positions.iter_mut().zip(field.positions()) {
for ci in 0..point.len() {
point[ci] = field_point[ci];
}
}
}
fn point_vectors_to_forcefield_vec3(positions: &[Vec<f64>]) -> Vec<ForceFieldVec3> {
positions
.iter()
.map(|point| ForceFieldVec3::new(point[0], point[1], point[2]))
.collect()
}
fn embedder_first_minimization(
positions: &mut [Vec<f64>],
eargs: &EmbedArgs<'_>,
embed_params: &EmbedParameters,
) -> bool {
let mut got_coords = true;
let mut fixed_pts = vec![false; positions.len()];
if embed_params.use_random_coords
&& let Some(coord_map) = &embed_params.coord_map
{
for idx in coord_map.keys() {
fixed_pts[*idx as usize] = true;
}
}
let mut field = construct_distgeom_forcefield(
eargs.mmat,
positions,
eargs.chiral_centers,
1.0,
0.1,
None,
embed_params.basin_thresh,
Some(&fixed_pts),
);
if embed_params.use_random_coords
&& let Some(coord_map) = &embed_params.coord_map
{
for idx in coord_map.keys() {
field.fixed_points_mut().push(*idx as usize);
}
}
field.initialize();
if field.calc_energy_current(None) > EMBEDDER_ERROR_TOL {
let mut need_more = 1;
while need_more != 0 {
need_more = field.minimize(400, embed_params.optimizer_force_tol, 1.0e-6);
}
}
copy_forcefield_positions_to_point_vectors(&field, positions);
let mut e_contribs = Vec::new();
let local_e = field.calc_energy_current(Some(&mut e_contribs));
if local_e / positions.len() as f64 >= MAX_MINIMIZED_E_PER_ATOM {
got_coords = false;
}
got_coords
}
fn embedder_check_tetrahedral_centers(
positions: &[Vec<f64>],
eargs: &EmbedArgs<'_>,
_embed_params: &EmbedParameters,
) -> bool {
let positions = point_vectors_to_forcefield_vec3(positions);
let trace_row103 = env::var("RDKIT_ROW103_TRACE").ok().as_deref() == Some("1")
&& positions.len() == 40
&& eargs.tetrahedral_carbons.len() == 3;
for tet_set in eargs.tetrahedral_carbons {
let volume_ok = embedder_volume_test(tet_set, &positions);
let center_ok =
embedder_center_in_volume(tet_set, &positions, TETRAHEDRAL_CENTERINVOLUME_TOL);
if trace_row103 {
println!(
"row103_tetra_check idx0={} atoms=[{},{},{},{},{}] volume_ok={} center_ok={}",
tet_set.idx0,
tet_set.idx0,
tet_set.idx1,
tet_set.idx2,
tet_set.idx3,
tet_set.idx4,
volume_ok,
center_ok
);
}
if !volume_ok || !center_ok {
return false;
}
}
true
}
fn embedder_check_chiral_centers(
positions: &[Vec<f64>],
eargs: &EmbedArgs<'_>,
_embed_params: &EmbedParameters,
) -> bool {
let positions = point_vectors_to_forcefield_vec3(positions);
for chiral_set in eargs.chiral_centers {
let vol = calc_chiral_volume_points(
chiral_set.idx1,
chiral_set.idx2,
chiral_set.idx3,
chiral_set.idx4,
&positions,
);
let lb = chiral_set.get_lower_volume_bound();
let ub = chiral_set.get_upper_volume_bound();
if (lb > 0.0 && vol < lb && (vol / lb < 0.8 || have_opposite_sign(vol, lb)))
|| (ub < 0.0 && vol > ub && (vol / ub < 0.8 || have_opposite_sign(vol, ub)))
{
return false;
}
}
true
}
fn embedder_minimize_fourth_dimension(
positions: &mut [Vec<f64>],
eargs: &EmbedArgs<'_>,
embed_params: &mut EmbedParameters,
end_time: Option<Instant>,
) -> bool {
let mut field = construct_distgeom_forcefield(
eargs.mmat,
positions,
eargs.chiral_centers,
0.2,
1.0,
None,
embed_params.basin_thresh,
None,
);
if embed_params.use_random_coords
&& let Some(coord_map) = &embed_params.coord_map
{
for idx in coord_map.keys() {
field.fixed_points_mut().push(*idx as usize);
}
}
field.initialize();
let trace_row64 = row64_distgeom_trace_enabled(positions.len());
if trace_row64 {
println!(
"row64_fourth_dimension initial_energy={:.15}",
field.calc_energy_current(None)
);
}
if field.calc_energy_current(None) > EMBEDDER_ERROR_TOL {
let mut need_more = 1;
let mut pass = 0usize;
while need_more != 0 {
if let Some(deadline) = end_time
&& Instant::now() > deadline
{
if trace_row64 {
println!("row64_fourth_dimension timeout pass={pass}");
}
copy_forcefield_positions_to_point_vectors(&field, positions);
return false;
}
need_more = field.minimize(200, embed_params.optimizer_force_tol, 1.0e-6);
if trace_row64 {
println!(
"row64_fourth_dimension pass={} need_more={} energy={:.15}",
pass,
need_more,
field.calc_energy_current(None)
);
}
pass += 1;
}
}
copy_forcefield_positions_to_point_vectors(&field, positions);
if trace_row64 {
print_debug_like_row64_positions("row64_after_fourth_dimension_internal", positions);
}
true
}
fn print_debug_like_row64_positions(stage: &str, positions: &[Vec<f64>]) {
let coords: Vec<String> = positions
.iter()
.map(|point| {
format!(
"[{:.15},{:.15},{:.15},{:.15}]",
point[0],
point[1],
point[2],
*point.get(3).unwrap_or(&0.0)
)
})
.collect();
println!("{stage} coords={}", coords.join(";"));
}
fn embedder_minimize_with_exp_torsions(
positions: &mut [Vec<f64>],
eargs: &EmbedArgs<'_>,
embed_params: &EmbedParameters,
) -> bool {
let etkdg_details = eargs.etkdg_details.expect("bogus etkdgDetails pointer");
let mut planar = true;
let mut positions_3d = point_vectors_to_forcefield_vec3(positions);
let mut field = if embed_params.use_basic_knowledge {
if let Some(cpci) = &embed_params.cpci {
let cpci_usize: BTreeMap<(usize, usize), f64> = cpci
.iter()
.map(|(&(idx1, idx2), &charge)| ((idx1 as usize, idx2 as usize), charge))
.collect();
construct_3d_forcefield_with_cpci(eargs.mmat, &positions_3d, etkdg_details, &cpci_usize)
} else {
construct_3d_forcefield(eargs.mmat, &positions_3d, etkdg_details)
}
} else {
construct_plain_3d_forcefield(eargs.mmat, &positions_3d, etkdg_details)
};
if embed_params.use_random_coords
&& let Some(coord_map) = &embed_params.coord_map
{
for idx in coord_map.keys() {
field.fixed_points_mut().push(*idx as usize);
}
}
field.initialize();
if field.calc_energy_current(None) > EMBEDDER_ERROR_TOL {
field.minimize(300, embed_params.optimizer_force_tol, 1.0e-6);
}
positions_3d.clone_from_slice(field.positions());
if embed_params.use_basic_knowledge {
let mut field2 = construct_3d_improper_forcefield(eargs.mmat, &positions_3d, etkdg_details);
if embed_params.use_random_coords
&& let Some(coord_map) = &embed_params.coord_map
{
for idx in coord_map.keys() {
field2.fixed_points_mut().push(*idx as usize);
}
}
field2.initialize();
let planarity_tolerance = 0.7;
if field2.calc_energy_current(None)
> etkdg_details.improper_atoms.len() as f64 * planarity_tolerance
{
planar = false;
}
}
for (point, point3d) in positions.iter_mut().zip(positions_3d.iter()) {
point[0] = point3d[0];
point[1] = point3d[1];
point[2] = point3d[2];
}
planar
}
fn embedder_double_bond_geometry_checks(
positions: &[Vec<f64>],
eargs: &EmbedArgs<'_>,
_embed_params: &mut EmbedParameters,
linear_tol: f64,
) -> bool {
if let Some(double_bond_ends) = eargs.double_bond_ends {
for &(idx0, idx1, idx2) in double_bond_ends {
let p0 =
ForceFieldVec3::new(positions[idx0][0], positions[idx0][1], positions[idx0][2]);
let p1 =
ForceFieldVec3::new(positions[idx1][0], positions[idx1][1], positions[idx1][2]);
let p2 =
ForceFieldVec3::new(positions[idx2][0], positions[idx2][1], positions[idx2][2]);
let mut v1 = p1 - p0;
v1 /= v1.length();
let mut v2 = p1 - p2;
v2 /= v2.length();
if v1.dot_product(v2) + 1.0 < linear_tol {
return false;
}
}
}
true
}
fn embedder_double_bond_stereo_checks(
positions: &[Vec<f64>],
eargs: &EmbedArgs<'_>,
_embed_params: &mut EmbedParameters,
) -> bool {
const M_PI_2: f64 = std::f64::consts::FRAC_PI_2;
for (atoms, sign) in eargs.stereo_double_bonds {
let p0 = ForceFieldVec3::new(
positions[atoms[0]][0],
positions[atoms[0]][1],
positions[atoms[0]][2],
);
let p1 = ForceFieldVec3::new(
positions[atoms[1]][0],
positions[atoms[1]][1],
positions[atoms[1]][2],
);
let p2 = ForceFieldVec3::new(
positions[atoms[2]][0],
positions[atoms[2]][1],
positions[atoms[2]][2],
);
let p3 = ForceFieldVec3::new(
positions[atoms[3]][0],
positions[atoms[3]][1],
positions[atoms[3]][2],
);
let beg_end_vec = p2 - p1;
let beg_nbr_vec = p0 - p1;
let crs1 = beg_nbr_vec.cross_product(beg_end_vec);
let end_nbr_vec = p3 - p2;
let crs2 = end_nbr_vec.cross_product(beg_end_vec);
let dihedral = (crs1.dot_product(crs2) / (crs1.length() * crs2.length()))
.clamp(-1.0, 1.0)
.acos();
if (dihedral - M_PI_2) * f64::from(*sign) < 0.0 {
return false;
}
}
true
}
fn embedder_increment_failure(embed_params: &mut EmbedParameters, cause: EmbedFailureCause) {
if embed_params.track_failures {
let _guard = failmutex_get().lock().expect("failure mutex poisoned");
embed_params.failures[cause as usize] += 1;
}
}
fn embedder_final_chiral_checks(
positions: &mut [Vec<f64>],
eargs: &EmbedArgs<'_>,
embed_params: &mut EmbedParameters,
) -> bool {
if !embedder_check_chiral_centers(positions, eargs, embed_params) {
embedder_increment_failure(embed_params, EmbedFailureCause::CheckChiralCenters2);
return false;
}
let mut atoms = std::collections::BTreeSet::new();
for chiral_set in eargs.chiral_centers {
if chiral_set.idx0 != chiral_set.idx4 {
atoms.insert(chiral_set.idx0 as i32);
atoms.insert(chiral_set.idx1 as i32);
atoms.insert(chiral_set.idx2 as i32);
atoms.insert(chiral_set.idx3 as i32);
atoms.insert(chiral_set.idx4 as i32);
}
}
let atoms_to_check: Vec<i32> = atoms.into_iter().collect();
if !atoms_to_check.is_empty() {
let positions_3d = point_vectors_to_forcefield_vec3(positions);
if !embedder_bounds_fulfilled(&atoms_to_check, eargs.mmat, &positions_3d) {
embedder_increment_failure(embed_params, EmbedFailureCause::FinalChiralBounds);
return false;
}
}
let positions_3d = point_vectors_to_forcefield_vec3(positions);
for chiral_set in eargs.chiral_centers {
if !embedder_center_in_volume(chiral_set, &positions_3d, 0.1) {
embedder_increment_failure(embed_params, EmbedFailureCause::FinalCenterInVolume);
return false;
}
}
true
}
fn embedder_embed_points_with_rng<R: RdkitDoubleRng>(
positions: &mut [Vec<f64>],
eargs: &EmbedArgs<'_>,
embed_params: &mut EmbedParameters,
end_time: Option<Instant>,
rng: &mut R,
) -> Result<bool, DgBoundsError> {
let mut got_coords = false;
let mut iter = 0_u32;
let mut dist_mat = SymmMatrix::new(positions.len());
let trace_row64 = row64_distgeom_trace_enabled(positions.len());
let trace_row61 = row61_distgeom_trace_enabled(positions.len());
while !got_coords && iter < embed_params.max_iterations {
if let Some(deadline) = end_time
&& Instant::now() > deadline
{
if trace_row64 {
println!("row64_embed_points timeout_before_iter iter={iter}");
} else if trace_row61 {
println!("row61_embed_points timeout_before_iter iter={iter}");
}
break;
}
iter += 1;
if trace_row64 {
println!("row64_embed_points iter_start iter={iter}");
} else if trace_row61 {
println!("row61_embed_points iter_start iter={iter}");
}
if let Some(callback) = embed_params.callback {
callback(iter);
}
got_coords =
embedder_generate_initial_coords(positions, eargs, embed_params, &mut dist_mat, rng)?;
if !got_coords {
if trace_row64 {
println!("row64_embed_points iter={iter} stage=initial_coords ok=0");
} else if trace_row61 {
println!("row61_embed_points iter={iter} stage=initial_coords ok=0");
}
embedder_increment_failure(embed_params, EmbedFailureCause::InitialCoords);
} else {
if trace_row64 {
println!("row64_embed_points iter={iter} stage=initial_coords ok=1");
} else if trace_row61 {
println!("row61_embed_points iter={iter} stage=initial_coords ok=1");
}
got_coords = embedder_first_minimization(positions, eargs, embed_params);
if !got_coords {
if trace_row64 {
println!("row64_embed_points iter={iter} stage=first_minimization ok=0");
} else if trace_row61 {
println!("row61_embed_points iter={iter} stage=first_minimization ok=0");
}
embedder_increment_failure(embed_params, EmbedFailureCause::FirstMinimization);
} else {
if trace_row64 {
println!("row64_embed_points iter={iter} stage=first_minimization ok=1");
} else if trace_row61 {
println!("row61_embed_points iter={iter} stage=first_minimization ok=1");
}
got_coords = embedder_check_tetrahedral_centers(positions, eargs, embed_params);
if !got_coords {
embedder_increment_failure(
embed_params,
EmbedFailureCause::CheckTetrahedralCenters,
);
}
}
if got_coords && embed_params.enforce_chirality && !eargs.chiral_centers.is_empty() {
got_coords = embedder_check_chiral_centers(positions, eargs, embed_params);
if !got_coords {
embedder_increment_failure(embed_params, EmbedFailureCause::CheckChiralCenters);
}
}
if got_coords && (!eargs.chiral_centers.is_empty() || embed_params.use_random_coords) {
got_coords =
embedder_minimize_fourth_dimension(positions, eargs, embed_params, end_time);
if trace_row64 {
println!(
"row64_embed_points iter={iter} stage=fourth_dimension ok={}",
i32::from(got_coords)
);
} else if trace_row61 {
println!(
"row61_embed_points iter={iter} stage=fourth_dimension ok={}",
i32::from(got_coords)
);
}
if !got_coords {
if let Some(deadline) = end_time
&& Instant::now() > deadline
{
embedder_increment_failure(
embed_params,
EmbedFailureCause::ExceededTimeout,
);
}
embedder_increment_failure(
embed_params,
EmbedFailureCause::MinimizeFourthDimension,
);
}
}
if got_coords
&& (embed_params.use_exp_torsion_angle_prefs || embed_params.use_basic_knowledge)
{
got_coords = embedder_minimize_with_exp_torsions(positions, eargs, embed_params);
if !got_coords {
embedder_increment_failure(embed_params, EmbedFailureCause::EtkMinimization);
}
}
if got_coords {
got_coords =
embedder_double_bond_geometry_checks(positions, eargs, embed_params, 1.0e-3);
if !got_coords {
embedder_increment_failure(embed_params, EmbedFailureCause::LinearDoubleBond);
}
}
if embed_params.enforce_chirality && got_coords {
if !eargs.chiral_centers.is_empty() {
got_coords = embedder_final_chiral_checks(positions, eargs, embed_params);
}
if got_coords && !eargs.stereo_double_bonds.is_empty() {
got_coords = embedder_double_bond_stereo_checks(positions, eargs, embed_params);
if !got_coords {
embedder_increment_failure(
embed_params,
EmbedFailureCause::BadDoubleBondStereo,
);
}
}
}
}
}
Ok(got_coords)
}
fn embedder_embed_points(
positions: &mut [Vec<f64>],
eargs: EmbedArgs<'_>,
embed_params: &mut EmbedParameters,
seed: i32,
end_time: Option<Instant>,
) -> Result<bool, DgBoundsError> {
assert!(
seed >= -1,
"random seed must either be positive, zero, or negative one"
);
if embed_params.max_iterations == 0 {
embed_params.max_iterations = 10 * positions.len() as u32;
}
if embed_params.use_random_coords {
embed_params.basin_thresh = 1.0e8;
}
let got_coords = if seed > -1 {
let mut rng = RdkitDistgeomMinStdRand::new_from_embed_points_seed(seed);
embedder_embed_points_with_rng(positions, &eargs, embed_params, end_time, &mut rng)
} else {
RDKIT_DISTGEOM_RNG.with(|rng| {
embedder_embed_points_with_rng(
positions,
&eargs,
embed_params,
end_time,
&mut *rng.borrow_mut(),
)
})
}?;
Ok(got_coords)
}
fn embedder_find_double_bonds(
mol: &Molecule,
double_bond_ends: &mut Vec<(usize, usize, usize)>,
stereo_double_bonds: &mut Vec<(Vec<usize>, i32)>,
coord_map: Option<&BTreeMap<i32, ForceFieldVec3>>,
) {
double_bond_ends.clear();
stereo_double_bonds.clear();
for bnd in mol.bonds() {
if bnd.order() != BondOrder::Double {
continue;
}
for atm in [bnd.begin().index(), bnd.end().index()] {
let atm_degree = neighbors_for_atom(mol, atm).len();
if atm_degree < 2 {
continue;
}
let oatm = if atm == bnd.begin().index() {
bnd.end().index()
} else {
bnd.begin().index()
};
for nbr in neighbors_for_atom(mol, atm) {
if nbr == oatm {
continue;
}
let Some(obnd) = bond_between(mol, atm, nbr) else {
continue;
};
if obnd.order() != BondOrder::Single && atm_degree == 2 {
continue;
}
double_bond_ends.push((nbr, atm, oatm));
}
}
if !matches!(bnd.stereo(), BondStereo::None | BondStereo::Any) {
let stereo_atoms = bnd
.stereo_atoms()
.expect("stereo double bond requires two controlling atoms");
if let Some(coord_map) = coord_map
&& coord_map.contains_key(&(stereo_atoms[0].index() as i32))
&& coord_map.contains_key(&(stereo_atoms[1].index() as i32))
{
continue;
}
let sign = if matches!(bnd.stereo(), BondStereo::Cis | BondStereo::Z) {
-1
} else {
1
};
stereo_double_bonds.push((
vec![
stereo_atoms[0].index(),
bnd.begin().index(),
bnd.end().index(),
stereo_atoms[1].index(),
],
sign,
));
}
}
}
fn embedder_find_chiral_sets(
mol: &Molecule,
chiral_centers: &mut VectChiralSet,
tetrahedral_centers: &mut VectChiralSet,
coord_map: Option<&BTreeMap<i32, ForceFieldVec3>>,
) {
let ring_info = mol.derived_cache().rings.as_ref();
for atom in mol.atoms() {
if atom.atomic_number() == 1 {
continue;
}
let chiral_type = atom.chiral_tag();
let atom_idx = atom.id().index();
let nbrs0 = neighbors_for_atom(mol, atom_idx);
if matches!(
chiral_type,
ChiralTag::TetrahedralCw | ChiralTag::TetrahedralCcw
) || ((atom.atomic_number() == 6 || atom.atomic_number() == 7) && nbrs0.len() == 4)
{
let mut nbrs = nbrs0;
assert!(nbrs.len() >= 3, "Cannot be a chiral center");
let mut vol_lower_bound = 5.0;
let mut vol_upper_bound = 100.0;
if nbrs.len() < 4 {
vol_lower_bound = 2.0;
nbrs.push(atom_idx);
}
let num_small_rings = ring_info
.map(|rings| {
atom_ring_sizes(rings, atom_idx)
.into_iter()
.filter(|size| *size < 5)
.count()
})
.unwrap_or(0);
let structure_flags = if num_small_rings > 1 {
ChiralSetStructureFlags::InFusedSmallRings as u64
} else {
0
};
if chiral_type == ChiralTag::TetrahedralCcw {
chiral_centers.push(Arc::new(ChiralSet::new(
atom_idx,
nbrs[0],
nbrs[1],
nbrs[2],
nbrs[3],
vol_lower_bound,
vol_upper_bound,
structure_flags,
)));
} else if chiral_type == ChiralTag::TetrahedralCw {
chiral_centers.push(Arc::new(ChiralSet::new(
atom_idx,
nbrs[0],
nbrs[1],
nbrs[2],
nbrs[3],
-vol_upper_bound,
-vol_lower_bound,
structure_flags,
)));
} else {
let coord_mapped =
coord_map.is_some_and(|map| map.contains_key(&(atom_idx as i32)));
let ring_excluded = ring_info.is_some_and(|rings| {
rings.num_atom_rings(AtomId::new(atom_idx)) < 2
|| rings.is_atom_in_ring_of_size(AtomId::new(atom_idx), 3)
});
if !(coord_mapped || ring_excluded) {
tetrahedral_centers.push(Arc::new(ChiralSet::new(
atom_idx,
nbrs[0],
nbrs[1],
nbrs[2],
nbrs[3],
0.0,
0.0,
structure_flags,
)));
}
}
}
}
for bond in mol.bonds() {
if !matches!(bond.stereo(), BondStereo::AtropCcw | BondStereo::AtropCw) {
continue;
}
let idx0 = bond.begin().index();
let idx1 = bond.end().index();
let atoms_and_bonds0 = embedder_atropisomer_neighbor_bonds(mol, idx0, bond.id().index());
let atoms_and_bonds1 = embedder_atropisomer_neighbor_bonds(mol, idx1, bond.id().index());
if atoms_and_bonds0.len() != 2 && atoms_and_bonds1.len() != 2 {
continue;
}
let nbr1 = embedder_bond_other_atom_idx(mol, atoms_and_bonds0[0], idx0);
let (nbr2, nbr3, nbr4) = if atoms_and_bonds0.len() == 2 {
let nbr2 = embedder_bond_other_atom_idx(mol, atoms_and_bonds0[1], idx0);
let nbr3 = embedder_bond_other_atom_idx(mol, atoms_and_bonds1[0], idx1);
let nbr4 = if atoms_and_bonds1.len() == 2 {
embedder_bond_other_atom_idx(mol, atoms_and_bonds1[1], idx1)
} else {
idx0
};
(nbr2, nbr3, nbr4)
} else {
let nbr2 = embedder_bond_other_atom_idx(mol, atoms_and_bonds1[0], idx1);
let nbr3 = embedder_bond_other_atom_idx(mol, atoms_and_bonds1[1], idx1);
(nbr2, nbr3, idx0)
};
let (vol_lower_bound, vol_upper_bound) = if bond.stereo() == BondStereo::AtropCcw {
(-100.0, -1.0)
} else {
(1.0, 100.0)
};
chiral_centers.push(Arc::new(ChiralSet::with_default_structure_flags(
idx0,
nbr1,
nbr2,
nbr3,
nbr4,
vol_lower_bound,
vol_upper_bound,
)));
}
}
fn embedder_atropisomer_neighbor_bonds(
mol: &Molecule,
focus_atom: usize,
atrop_bond: usize,
) -> Vec<usize> {
let mut bonds: Vec<usize> = mol
.bonds()
.iter()
.filter(|bond| {
bond.id().index() != atrop_bond
&& (bond.begin().index() == focus_atom || bond.end().index() == focus_atom)
})
.map(|bond| bond.id().index())
.collect();
if bonds.len() == 2 {
let other0 = embedder_bond_other_atom_idx(mol, bonds[0], focus_atom);
let other1 = embedder_bond_other_atom_idx(mol, bonds[1], focus_atom);
if other1 < other0 {
bonds.swap(0, 1);
}
}
bonds
}
fn embedder_bond_other_atom_idx(mol: &Molecule, bond_idx: usize, atom_idx: usize) -> usize {
let bond = &mol.bonds()[bond_idx];
if bond.begin().index() == atom_idx {
bond.end().index()
} else {
assert_eq!(bond.end().index(), atom_idx, "atom is not an endpoint");
bond.begin().index()
}
}
fn embedder_adjust_bounds_mat_from_coord_map(
mmat: &mut BoundsMatrix,
_num_atoms: usize,
coord_map: &BTreeMap<i32, ForceFieldVec3>,
) {
let entries: Vec<(usize, ForceFieldVec3)> = coord_map
.iter()
.map(|(idx, point)| {
let idx = usize::try_from(*idx).expect("coordMap atom index must be non-negative");
(idx, *point)
})
.collect();
for i in 0..entries.len() {
let (i_idx, i_point) = entries[i];
for &(j_idx, j_point) in entries.iter().skip(i + 1) {
let dist = (i_point - j_point).length();
mmat.set_upper(i_idx, j_idx, dist);
mmat.set_lower(i_idx, j_idx, dist);
}
}
}
fn embedder_init_etkdg(
mol: &Molecule,
params: &EmbedParameters,
etkdg_details: &mut CrystalFFDetails,
) -> Result<(), DgBoundsError> {
let n_atoms = mol.num_atoms();
let trace_row64 = row64_distgeom_trace_enabled(n_atoms);
if params.use_exp_torsion_angle_prefs || params.use_basic_knowledge {
let start = trace_row64.then(Instant::now);
get_experimental_torsions_without_bonds(
mol,
etkdg_details,
params.use_exp_torsion_angle_prefs,
params.use_small_ring_torsions,
params.use_macrocycle_torsions,
params.use_basic_knowledge,
params.et_version,
params.verbose,
)
.map_err(|err| DgBoundsError::GenerationFailed(err.to_string()))?;
if let Some(start) = start {
println!(
"row64_init_etkdg get_experimental_torsions={:.6}",
start.elapsed().as_secs_f64()
);
}
etkdg_details.atom_nums.resize(n_atoms, 0);
for i in 0..n_atoms {
etkdg_details.atom_nums[i] = i32::from(mol.atoms()[i].atomic_number());
}
}
etkdg_details.bounds_mat_force_scaling = params.bounds_mat_force_scaling;
Ok(())
}
fn embedder_setup_initial_bounds_matrix(
mol: &Molecule,
mmat: &mut BoundsMatrix,
coord_map: Option<&BTreeMap<i32, ForceFieldVec3>>,
params: &EmbedParameters,
etkdg_details: &mut CrystalFFDetails,
) -> Result<bool, DgBoundsError> {
let n_atoms = mol.num_atoms();
let trace_row64 = row64_distgeom_trace_enabled(n_atoms);
if params.use_exp_torsion_angle_prefs || params.use_basic_knowledge {
let start = trace_row64.then(Instant::now);
set_topol_bounds_with_outputs(
mol,
mmat,
&mut etkdg_details.bonds,
&mut etkdg_details.angles,
true,
false,
params.use_macrocycle14config,
params.force_trans_amides,
true,
true,
)?;
if let Some(start) = start {
println!(
"row64_setup_bounds set_topol_bounds_with_outputs={:.6}",
start.elapsed().as_secs_f64()
);
}
} else {
let start = trace_row64.then(Instant::now);
set_topol_bounds(
mol,
mmat,
true,
false,
params.use_macrocycle14config,
params.force_trans_amides,
true,
true,
)?;
if let Some(start) = start {
println!(
"row64_setup_bounds set_topol_bounds={:.6}",
start.elapsed().as_secs_f64()
);
}
}
let mut tol = 0.0;
if let Some(coord_map) = coord_map {
embedder_adjust_bounds_mat_from_coord_map(mmat, n_atoms, coord_map);
tol = 0.05;
}
let smooth_start = trace_row64.then(Instant::now);
if !triangle_smooth_bounds_shared(mmat, tol) {
if let Some(start) = smooth_start {
println!(
"row64_setup_bounds triangle_smooth_first={:.6} ok=0",
start.elapsed().as_secs_f64()
);
}
init_bounds_mat_shared(mmat, DEFAULT_LOWER, DEFAULT_UPPER);
let retry_start = trace_row64.then(Instant::now);
set_topol_bounds(
mol,
mmat,
false,
true,
params.use_macrocycle14config,
params.force_trans_amides,
true,
true,
)?;
if let Some(start) = retry_start {
println!(
"row64_setup_bounds retry_set_topol_bounds={:.6}",
start.elapsed().as_secs_f64()
);
}
if let Some(coord_map) = coord_map {
embedder_adjust_bounds_mat_from_coord_map(mmat, n_atoms, coord_map);
}
let second_smooth_start = trace_row64.then(Instant::now);
if !triangle_smooth_bounds_shared(mmat, tol) {
if let Some(start) = second_smooth_start {
println!(
"row64_setup_bounds triangle_smooth_second={:.6} ok=0",
start.elapsed().as_secs_f64()
);
}
if params.ignore_smoothing_failures {
init_bounds_mat_shared(mmat, DEFAULT_LOWER, DEFAULT_UPPER);
let fallback_start = trace_row64.then(Instant::now);
set_topol_bounds(
mol,
mmat,
false,
true,
params.use_macrocycle14config,
params.force_trans_amides,
true,
true,
)?;
if let Some(start) = fallback_start {
println!(
"row64_setup_bounds fallback_set_topol_bounds={:.6}",
start.elapsed().as_secs_f64()
);
}
if let Some(coord_map) = coord_map {
embedder_adjust_bounds_mat_from_coord_map(mmat, n_atoms, coord_map);
}
} else {
return Ok(false);
}
} else if let Some(start) = second_smooth_start {
println!(
"row64_setup_bounds triangle_smooth_second={:.6} ok=1",
start.elapsed().as_secs_f64()
);
}
} else if let Some(start) = smooth_start {
println!(
"row64_setup_bounds triangle_smooth_first={:.6} ok=1",
start.elapsed().as_secs_f64()
);
}
Ok(true)
}
fn embedder_fill_atom_positions(
pts: &mut [ForceFieldVec3],
conf: &Conformer3D,
_mol: &Molecule,
match_atoms: &[usize],
) {
assert_eq!(pts.len(), match_atoms.len(), "bad pts size");
for (i, &atom_idx) in match_atoms.iter().enumerate() {
let coord = conf.coordinates()[atom_idx];
pts[i] = ForceFieldVec3::new(coord[0], coord[1], coord[2]);
}
}
fn alignments_weighted_sum_of_points(points: &[ForceFieldVec3]) -> ForceFieldVec3 {
points
.iter()
.copied()
.fold(ForceFieldVec3::default(), |acc, point| acc + point)
}
fn alignments_weighted_sum_of_len_sq(points: &[ForceFieldVec3]) -> f64 {
points.iter().map(|point| point.length_sq()).sum()
}
fn alignments_compute_covariance_mat(
ref_points: &[ForceFieldVec3],
probe_points: &[ForceFieldVec3],
) -> [[f64; 3]; 3] {
assert_eq!(
ref_points.len(),
probe_points.len(),
"Number of points mismatch"
);
let mut cov_mat = [[0.0; 3]; 3];
for (rpt, ppt) in ref_points.iter().zip(probe_points) {
let w = 1.0;
cov_mat[0][0] += w * ppt.x * rpt.x;
cov_mat[0][1] += w * ppt.x * rpt.y;
cov_mat[0][2] += w * ppt.x * rpt.z;
cov_mat[1][0] += w * ppt.y * rpt.x;
cov_mat[1][1] += w * ppt.y * rpt.y;
cov_mat[1][2] += w * ppt.y * rpt.z;
cov_mat[2][0] += w * ppt.z * rpt.x;
cov_mat[2][1] += w * ppt.z * rpt.y;
cov_mat[2][2] += w * ppt.z * rpt.z;
}
cov_mat
}
fn alignments_convert_cov_mat_to_quad(
cov_mat: [[f64; 3]; 3],
rpt_sum: ForceFieldVec3,
ppt_sum: ForceFieldVec3,
wts_sum: f64,
) -> [[f64; 4]; 4] {
let temp = ppt_sum.x / wts_sum;
let px_rx = cov_mat[0][0] - temp * rpt_sum.x;
let px_ry = cov_mat[0][1] - temp * rpt_sum.y;
let px_rz = cov_mat[0][2] - temp * rpt_sum.z;
let temp = ppt_sum.y / wts_sum;
let py_rx = cov_mat[1][0] - temp * rpt_sum.x;
let py_ry = cov_mat[1][1] - temp * rpt_sum.y;
let py_rz = cov_mat[1][2] - temp * rpt_sum.z;
let temp = ppt_sum.z / wts_sum;
let pz_rx = cov_mat[2][0] - temp * rpt_sum.x;
let pz_ry = cov_mat[2][1] - temp * rpt_sum.y;
let pz_rz = cov_mat[2][2] - temp * rpt_sum.z;
let mut quad = [[0.0; 4]; 4];
quad[0][0] = -2.0 * (px_rx + py_ry + pz_rz);
quad[1][1] = -2.0 * (px_rx - py_ry - pz_rz);
quad[2][2] = -2.0 * (py_ry - pz_rz - px_rx);
quad[3][3] = -2.0 * (pz_rz - px_rx - py_ry);
quad[0][1] = 2.0 * (py_rz - pz_ry);
quad[1][0] = quad[0][1];
quad[0][2] = 2.0 * (pz_rx - px_rz);
quad[2][0] = quad[0][2];
quad[0][3] = 2.0 * (px_ry - py_rx);
quad[3][0] = quad[0][3];
quad[1][2] = -2.0 * (px_ry + py_rx);
quad[2][1] = quad[1][2];
quad[1][3] = -2.0 * (pz_rx + px_rz);
quad[3][1] = quad[1][3];
quad[2][3] = -2.0 * (py_rz + pz_ry);
quad[3][2] = quad[2][3];
quad
}
fn alignments_jacobi(
mut quad: [[f64; 4]; 4],
eigen_vals: &mut [f64; 4],
eigen_vecs: &mut [[f64; 4]; 4],
max_iter: u32,
) -> u32 {
const TOLERANCE: f64 = 1.0e-6;
for j in 0..=3 {
for row in eigen_vecs.iter_mut().take(4) {
row[j] = 0.0;
}
eigen_vecs[j][j] = 1.0;
eigen_vals[j] = quad[j][j];
}
let mut l = 0;
'outer: while l < max_iter {
let mut diag_norm = 0.0;
let mut off_diag_norm = 0.0;
for j in 0..=3 {
diag_norm += eigen_vals[j].abs();
for row in quad.iter().take(j) {
off_diag_norm += row[j].abs();
}
}
if diag_norm.abs() > 1.0e-16 && (off_diag_norm / diag_norm) <= TOLERANCE {
break 'outer;
}
for j in 1..=3 {
for i in 0..=j - 1 {
let b = quad[i][j];
if b.abs() > 0.0 {
let dma = eigen_vals[j] - eigen_vals[i];
let t = if dma.abs() + b.abs() <= dma.abs() {
b / dma
} else {
let q = 0.5 * dma / b;
let mut t = 1.0 / (q.abs() + (1.0 + q * q).sqrt());
if q < 0.0 {
t = -t;
}
t
};
let c = 1.0 / (t * t + 1.0).sqrt();
let s = t * c;
quad[i][j] = 0.0;
for k in 0..i {
let atemp = c * quad[k][i] - s * quad[k][j];
quad[k][j] = s * quad[k][i] + c * quad[k][j];
quad[k][i] = atemp;
}
for k in i + 1..=j - 1 {
let atemp = c * quad[i][k] - s * quad[k][j];
quad[k][j] = s * quad[i][k] + c * quad[k][j];
quad[i][k] = atemp;
}
for k in j + 1..=3 {
let atemp = c * quad[i][k] - s * quad[j][k];
quad[j][k] = s * quad[i][k] + c * quad[j][k];
quad[i][k] = atemp;
}
for row in eigen_vecs.iter_mut().take(4) {
let vtemp = c * row[i] - s * row[j];
row[j] = s * row[i] + c * row[j];
row[i] = vtemp;
}
let dtemp = c * c * eigen_vals[i] + s * s * eigen_vals[j] - 2.0 * c * s * b;
eigen_vals[j] = s * s * eigen_vals[i] + c * c * eigen_vals[j] + 2.0 * c * s * b;
eigen_vals[i] = dtemp;
}
}
}
l += 1;
}
for j in 0..=2 {
let mut k = j;
let mut dtemp = eigen_vals[k];
for (i, &val) in eigen_vals.iter().enumerate().take(4).skip(j + 1) {
if val < dtemp {
k = i;
dtemp = val;
}
}
if k > j {
eigen_vals[k] = eigen_vals[j];
eigen_vals[j] = dtemp;
for row in eigen_vecs.iter_mut().take(4) {
let dtemp = row[k];
row[k] = row[j];
row[j] = dtemp;
}
}
}
l + 1
}
fn alignments_align_points_ssr(
ref_points: &[ForceFieldVec3],
probe_points: &[ForceFieldVec3],
) -> f64 {
assert_eq!(
ref_points.len(),
probe_points.len(),
"Mismatch in number of points"
);
let npt = ref_points.len();
let wts_sum = npt as f64;
let rpt_sum = alignments_weighted_sum_of_points(ref_points);
let ppt_sum = alignments_weighted_sum_of_points(probe_points);
let rpt_sum_len_sq = alignments_weighted_sum_of_len_sq(ref_points);
let ppt_sum_len_sq = alignments_weighted_sum_of_len_sq(probe_points);
let cov_mat = alignments_compute_covariance_mat(ref_points, probe_points);
let quad = alignments_convert_cov_mat_to_quad(cov_mat, rpt_sum, ppt_sum, wts_sum);
let mut eigen_vecs = [[0.0; 4]; 4];
let mut eigen_vals = [0.0; 4];
alignments_jacobi(quad, &mut eigen_vals, &mut eigen_vecs, 50);
let mut ssr = eigen_vals[0] - (ppt_sum.length_sq() + rpt_sum.length_sq()) / wts_sum
+ rpt_sum_len_sq
+ ppt_sum_len_sq;
if ssr < 0.0 && ssr.abs() < 1.0e-6 {
ssr = 0.0;
}
ssr
}
fn embedder_is_conf_far_from_rest(
mol: &Molecule,
conf: &Conformer3D,
threshold: f64,
self_matches: &[Vec<usize>],
) -> bool {
assert!(!self_matches.is_empty(), "expected at least one self match");
let mut ref_points = vec![ForceFieldVec3::default(); self_matches[0].len()];
let mut prb_points = vec![ForceFieldVec3::default(); self_matches[0].len()];
embedder_fill_atom_positions(&mut ref_points, conf, mol, &self_matches[0]);
let ssr_thres = self_matches[0].len() as f64 * threshold * threshold;
for match_atoms in self_matches {
for existing in mol.conformers_3d() {
embedder_fill_atom_positions(&mut prb_points, existing, mol, match_atoms);
let ssr = alignments_align_points_ssr(&ref_points, &prb_points);
if ssr < ssr_thres {
return false;
}
}
}
true
}
fn molecule_without_hs_for_pruning(
mol: &Molecule,
) -> Result<(Molecule, Vec<usize>), DgBoundsError> {
let hydrogens: Vec<_> = mol
.atoms()
.iter()
.filter_map(|atom| (atom.atomic_number() == 1).then_some(atom.id()))
.collect();
let mut builder = mol.to_builder();
let old_to_new = builder.remove_atoms_for_construction(&hydrogens);
let stripped = builder.build()?;
let new_to_old = old_to_new
.iter()
.enumerate()
.filter_map(|(old_idx, new_idx)| new_idx.map(|idx| (idx.index(), old_idx)))
.fold(
vec![0; stripped.num_atoms()],
|mut acc, (new_idx, old_idx)| {
acc[new_idx] = old_idx;
acc
},
);
Ok((stripped, new_to_old))
}
fn embedder_get_mol_self_matches(
mol: &Molecule,
params: &EmbedParameters,
) -> Result<Vec<Vec<usize>>, DgBoundsError> {
let mut res = Vec::new();
if params.prune_rms_thresh != 0.0 && params.use_symmetry_for_pruning {
let (tmol, stripped_to_original) = molecule_without_hs_for_pruning(mol)?;
let prb_mol_symm = params
.symmetrize_conjugated_terminal_groups_for_pruning
.then(|| symmetrize_terminal_atoms_for_pruning(&tmol))
.transpose()?;
let prb_mol_for_match = prb_mol_symm.as_ref().unwrap_or(&tmol);
let stripped_match = stripped_to_original;
let sssps = SubstructMatchParams {
max_matches: 1000,
uniquify: false,
};
let heavy_atom_matches =
get_substruct_matches_with_params(&tmol, prb_mol_for_match, &sssps);
for match_result in heavy_atom_matches {
let mut mapped = Vec::with_capacity(match_result.atom_mapping.len());
for &midx_second in &match_result.atom_mapping {
mapped.push(stripped_match[midx_second]);
}
res.push(mapped);
}
} else if params.only_heavy_atoms_for_rms {
let atoms = mol
.atoms()
.iter()
.filter_map(|atom| (atom.atomic_number() != 1).then_some(atom.id().index()))
.collect();
res.push(atoms);
} else {
res.push((0..mol.num_atoms()).collect());
}
Ok(res)
}
fn symmetrize_terminal_atoms_for_pruning(mol: &Molecule) -> Result<Molecule, DgBoundsError> {
let mut symmetrized = mol.clone();
let mut matched_pairs = Vec::new();
for atom_idx in 0..mol.num_atoms() {
if !matches_rdkit_terminal_atom_pattern(mol, atom_idx) {
continue;
}
let neighbors = neighbors_for_atom(mol, atom_idx);
debug_assert_eq!(neighbors.len(), 1);
let neighbor_idx = neighbors[0];
if matches_rdkit_terminal_group_symmetry_query(mol, atom_idx, neighbor_idx) {
matched_pairs.push((atom_idx, neighbor_idx));
}
}
if matched_pairs.is_empty() {
return Ok(symmetrized);
}
{
let topology = symmetrized.topology_block_mut();
for (terminal_atom_idx, neighbor_idx) in matched_pairs {
topology.atoms[terminal_atom_idx].set_formal_charge(0);
let Some(bond) = topology.bonds.iter_mut().find(|bond| {
(bond.begin().index() == terminal_atom_idx && bond.end().index() == neighbor_idx)
|| (bond.begin().index() == neighbor_idx
&& bond.end().index() == terminal_atom_idx)
}) else {
return Err(DgBoundsError::GenerationFailed(
"MolAlign::details::symmetrizeTerminalAtoms could not find expected bond"
.to_string(),
));
};
bond.set_query(Some(QueryNode::predicate(BondQueryPredicate::OrderIn(
vec![BondOrder::Single, BondOrder::Double],
))));
}
}
symmetrized
.derived_cache_mut()
.invalidate(DerivedState::VALENCE | DerivedState::AROMATICITY | DerivedState::STEREO);
Ok(symmetrized)
}
fn matches_rdkit_terminal_group_symmetry_query(
mol: &Molecule,
terminal_atom_idx: usize,
neighbor_idx: usize,
) -> bool {
let Some(terminal_bond) = bond_between(mol, terminal_atom_idx, neighbor_idx) else {
return false;
};
let opposite_order = match terminal_bond.order() {
BondOrder::Single => BondOrder::Double,
BondOrder::Double => BondOrder::Single,
_ => return false,
};
for remote_terminal_idx in neighbors_for_atom(mol, neighbor_idx) {
if remote_terminal_idx == terminal_atom_idx
|| !matches_rdkit_terminal_atom_pattern(mol, remote_terminal_idx)
{
continue;
}
if let Some(remote_bond) = bond_between(mol, neighbor_idx, remote_terminal_idx)
&& remote_bond.order() == opposite_order
{
return true;
}
}
false
}
fn matches_rdkit_terminal_atom_pattern(mol: &Molecule, atom_idx: usize) -> bool {
let atom = &mol.atoms()[atom_idx];
matches!(atom.atomic_number(), 7 | 8) && neighbors_for_atom(mol, atom_idx).len() == 1
}
fn embedder_embed_helper(
thread_id: i32,
num_threads: i32,
eargs: &mut EmbedHelperArgs<'_>,
params: &mut EmbedParameters,
end_time: Option<Instant>,
) -> Result<(), DgBoundsError> {
let n_atoms = eargs.mmat.num_rows();
let dim = if eargs.four_d { 4 } else { 3 };
let mut positions = vec![vec![0.0; dim]; n_atoms];
for ci in 0..eargs.confs.len() {
if let Some(deadline) = end_time
&& Instant::now() > deadline
{
return Ok(());
}
if (ci % num_threads as usize) as i32 != thread_id {
continue;
}
if !eargs.confs_ok[ci] {
continue;
}
let new_seed = rdkit_embedder_conformer_seed(
params.random_seed,
ci,
params.enable_sequential_random_seeds,
);
let got_coords = embedder_embed_points(
&mut positions,
eargs.embed_args(),
params,
new_seed,
end_time,
)?;
if got_coords {
let conf = &mut eargs.confs[ci];
let mut frag_atom_idx = 0;
for i in 0..conf.coordinates().len() {
if eargs
.frag_mapping
.is_none_or(|mapping| mapping[i] == eargs.frag_idx)
{
conf.coordinates_mut()[i] = [
positions[frag_atom_idx][0],
positions[frag_atom_idx][1],
positions[frag_atom_idx][2],
];
frag_atom_idx += 1;
}
}
} else {
eargs.confs_ok[ci] = false;
}
}
Ok(())
}
#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
pub enum DgBoundsError {
#[error("distance bounds matrix generation failed: {0}")]
GenerationFailed(String),
#[error("invalid embed parameters JSON: {0}")]
InvalidEmbedParametersJson(String),
#[error("molecule has no atoms")]
EmptyMolecule,
#[error(
"Only version 1 and 2 of the experimental torsion-angle preferences (ETversion) supported"
)]
UnsupportedEtVersion,
#[error("coordinate block update failed: {0}")]
CoordinateUpdateFailed(String),
#[error(
"RDKit clock-derived implicit conformer seed is unsupported on wasm32; set EmbedParameters.random_seed to a non-negative explicit seed"
)]
WasmImplicitClockSeedUnsupported,
#[error(transparent)]
UnsupportedFeature(#[from] crate::UnsupportedFeatureError),
}
impl From<MoleculeBuildError> for DgBoundsError {
fn from(value: MoleculeBuildError) -> Self {
Self::CoordinateUpdateFailed(value.to_string())
}
}
impl From<crate::RingFindingError> for DgBoundsError {
fn from(value: crate::RingFindingError) -> Self {
Self::GenerationFailed(value.to_string())
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[repr(u32)]
pub enum EmbedFailureCause {
InitialCoords = 0,
FirstMinimization = 1,
CheckTetrahedralCenters = 2,
CheckChiralCenters = 3,
MinimizeFourthDimension = 4,
EtkMinimization = 5,
FinalChiralBounds = 6,
FinalCenterInVolume = 7,
LinearDoubleBond = 8,
BadDoubleBondStereo = 9,
CheckChiralCenters2 = 10,
ExceededTimeout = 11,
EndOfEnum = 12,
}
impl EmbedFailureCause {
pub const ALL: [Self; 13] = [
Self::InitialCoords,
Self::FirstMinimization,
Self::CheckTetrahedralCenters,
Self::CheckChiralCenters,
Self::MinimizeFourthDimension,
Self::EtkMinimization,
Self::FinalChiralBounds,
Self::FinalCenterInVolume,
Self::LinearDoubleBond,
Self::BadDoubleBondStereo,
Self::CheckChiralCenters2,
Self::ExceededTimeout,
Self::EndOfEnum,
];
#[must_use]
pub const fn rdkit_ordinal(self) -> u32 {
self as u32
}
#[must_use]
pub const fn from_rdkit_ordinal(value: u32) -> Option<Self> {
match value {
0 => Some(Self::InitialCoords),
1 => Some(Self::FirstMinimization),
2 => Some(Self::CheckTetrahedralCenters),
3 => Some(Self::CheckChiralCenters),
4 => Some(Self::MinimizeFourthDimension),
5 => Some(Self::EtkMinimization),
6 => Some(Self::FinalChiralBounds),
7 => Some(Self::FinalCenterInVolume),
8 => Some(Self::LinearDoubleBond),
9 => Some(Self::BadDoubleBondStereo),
10 => Some(Self::CheckChiralCenters2),
11 => Some(Self::ExceededTimeout),
12 => Some(Self::EndOfEnum),
_ => None,
}
}
#[must_use]
pub const fn rdkit_name(self) -> &'static str {
match self {
Self::InitialCoords => "INITIAL_COORDS",
Self::FirstMinimization => "FIRST_MINIMIZATION",
Self::CheckTetrahedralCenters => "CHECK_TETRAHEDRAL_CENTERS",
Self::CheckChiralCenters => "CHECK_CHIRAL_CENTERS",
Self::MinimizeFourthDimension => "MINIMIZE_FOURTH_DIMENSION",
Self::EtkMinimization => "ETK_MINIMIZATION",
Self::FinalChiralBounds => "FINAL_CHIRAL_BOUNDS",
Self::FinalCenterInVolume => "FINAL_CENTER_IN_VOLUME",
Self::LinearDoubleBond => "LINEAR_DOUBLE_BOND",
Self::BadDoubleBondStereo => "BAD_DOUBLE_BOND_STEREO",
Self::CheckChiralCenters2 => "CHECK_CHIRAL_CENTERS2",
Self::ExceededTimeout => "EXCEEDED_TIMEOUT",
Self::EndOfEnum => "END_OF_ENUM",
}
}
}
#[derive(Clone)]
pub struct EmbedParameters {
pub max_iterations: u32,
pub num_threads: i32,
pub random_seed: i32,
pub clear_confs: bool,
pub use_random_coords: bool,
pub box_size_mult: f64,
pub rand_neg_eig: bool,
pub num_zero_fail: u32,
pub coord_map: Option<BTreeMap<i32, ForceFieldVec3>>,
pub optimizer_force_tol: f64,
pub ignore_smoothing_failures: bool,
pub enforce_chirality: bool,
pub use_exp_torsion_angle_prefs: bool,
pub use_basic_knowledge: bool,
pub verbose: bool,
pub basin_thresh: f64,
pub prune_rms_thresh: f64,
pub only_heavy_atoms_for_rms: bool,
pub et_version: u32,
bounds_mat: Option<Arc<BoundsMatrix>>,
pub embed_fragments_separately: bool,
pub use_small_ring_torsions: bool,
pub use_macrocycle_torsions: bool,
pub use_macrocycle14config: bool,
pub timeout: u32,
pub cpci: Option<BTreeMap<(u32, u32), f64>>,
pub callback: Option<fn(u32)>,
pub force_trans_amides: bool,
pub use_symmetry_for_pruning: bool,
pub bounds_mat_force_scaling: f64,
pub track_failures: bool,
pub failures: Vec<u32>,
pub enable_sequential_random_seeds: bool,
pub symmetrize_conjugated_terminal_groups_for_pruning: bool,
}
#[derive(Clone)]
pub struct EmbedMoleculeResult {
pub molecule: Molecule,
pub conf_id: i32,
pub params: EmbedParameters,
}
impl EmbedMoleculeResult {
#[must_use]
pub fn ok(&self) -> bool {
self.conf_id >= 0
}
}
#[derive(Clone)]
pub struct EmbedMultipleConfsResult {
pub molecule: Molecule,
pub conf_ids: Vec<i32>,
pub requested_num_confs: u32,
pub params: EmbedParameters,
}
impl EmbedMultipleConfsResult {
#[must_use]
pub fn generated_count(&self) -> usize {
self.conf_ids.len()
}
}
impl Default for EmbedParameters {
fn default() -> Self {
Self {
max_iterations: 0,
num_threads: 1,
random_seed: -1,
clear_confs: true,
use_random_coords: false,
box_size_mult: 2.0,
rand_neg_eig: true,
num_zero_fail: 1,
coord_map: None,
optimizer_force_tol: 1e-3,
ignore_smoothing_failures: false,
enforce_chirality: true,
use_exp_torsion_angle_prefs: false,
use_basic_knowledge: false,
verbose: false,
basin_thresh: 5.0,
prune_rms_thresh: -1.0,
only_heavy_atoms_for_rms: true,
et_version: 2,
bounds_mat: None,
embed_fragments_separately: true,
use_small_ring_torsions: false,
use_macrocycle_torsions: false,
use_macrocycle14config: false,
timeout: 0,
cpci: None,
callback: None,
force_trans_amides: true,
use_symmetry_for_pruning: true,
bounds_mat_force_scaling: 1.0,
track_failures: false,
failures: Vec::new(),
enable_sequential_random_seeds: false,
symmetrize_conjugated_terminal_groups_for_pruning: true,
}
}
}
impl EmbedParameters {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[allow(clippy::too_many_arguments)]
fn from_rdkit_constructor(
max_iterations: u32,
num_threads: i32,
random_seed: i32,
clear_confs: bool,
use_random_coords: bool,
box_size_mult: f64,
rand_neg_eig: bool,
num_zero_fail: u32,
coord_map: Option<BTreeMap<i32, ForceFieldVec3>>,
optimizer_force_tol: f64,
ignore_smoothing_failures: bool,
enforce_chirality: bool,
use_exp_torsion_angle_prefs: bool,
use_basic_knowledge: bool,
verbose: bool,
basin_thresh: f64,
prune_rms_thresh: f64,
only_heavy_atoms_for_rms: bool,
et_version: u32,
bounds_mat: Option<Arc<BoundsMatrix>>,
embed_fragments_separately: bool,
use_small_ring_torsions: bool,
use_macrocycle_torsions: bool,
use_macrocycle14config: bool,
timeout: u32,
cpci: Option<BTreeMap<(u32, u32), f64>>,
callback: Option<fn(u32)>,
) -> Self {
let mut params = Self {
max_iterations,
num_threads,
random_seed,
clear_confs,
use_random_coords,
box_size_mult,
rand_neg_eig,
num_zero_fail,
coord_map,
optimizer_force_tol,
ignore_smoothing_failures,
enforce_chirality,
use_exp_torsion_angle_prefs,
use_basic_knowledge,
verbose,
basin_thresh,
prune_rms_thresh,
only_heavy_atoms_for_rms,
et_version,
bounds_mat,
embed_fragments_separately,
use_small_ring_torsions,
use_macrocycle_torsions,
use_macrocycle14config,
timeout,
cpci,
callback,
..Self::default()
};
params.failures = Vec::new();
params
}
#[must_use]
pub fn kdg() -> Self {
Self::from_rdkit_constructor(
0, 1, -1, true, false, 2.0, true, 1, None, 1e-3, false, true, false, true, false, 5.0,
-1.0, true, 1, None, true, false, false, false, 0, None, None,
)
}
#[must_use]
pub fn etdg() -> Self {
Self::from_rdkit_constructor(
0, 1, -1, true, false, 2.0, true, 1, None, 1e-3, false, false, true, false, false, 5.0,
-1.0, true, 1, None, true, false, false, false, 0, None, None,
)
}
#[must_use]
pub fn etdg_v2() -> Self {
Self::from_rdkit_constructor(
0, 1, -1, true, false, 2.0, true, 1, None, 1e-3, false, false, true, false, false, 5.0,
-1.0, true, 2, None, true, false, false, false, 0, None, None,
)
}
#[must_use]
pub fn etkdg() -> Self {
Self::from_rdkit_constructor(
0, 1, -1, true, false, 2.0, true, 1, None, 1e-3, false, true, true, true, false, 5.0,
-1.0, true, 1, None, true, false, false, false, 0, None, None,
)
}
#[must_use]
pub fn etkdg_v2() -> Self {
Self::from_rdkit_constructor(
0, 1, -1, true, false, 2.0, true, 1, None, 1e-3, false, true, true, true, false, 5.0,
-1.0, true, 2, None, true, false, false, false, 0, None, None,
)
}
#[must_use]
pub fn etkdg_v3() -> Self {
Self::from_rdkit_constructor(
0, 1, -1, true, false, 2.0, true, 1, None, 1e-3, false, true, true, true, false, 5.0,
-1.0, true, 2, None, true, false, true, true, 0, None, None,
)
}
#[must_use]
pub fn sr_etkdg_v3() -> Self {
Self::from_rdkit_constructor(
0, 1, -1, true, false, 2.0, true, 1, None, 1e-3, false, true, true, true, false, 5.0,
-1.0, true, 2, None, true, true, false, false, 0, None, None,
)
}
pub fn update_from_json(&mut self, json: &str) -> Result<(), DgBoundsError> {
if json.is_empty() {
return Ok(());
}
let value: serde_json::Value = serde_json::from_str(json)
.map_err(|err| DgBoundsError::InvalidEmbedParametersJson(err.to_string()))?;
update_f64_field(&value, "basinThresh", &mut self.basin_thresh)?;
update_f64_field(
&value,
"boundsMatForceScaling",
&mut self.bounds_mat_force_scaling,
)?;
update_f64_field(&value, "boxSizeMult", &mut self.box_size_mult)?;
update_bool_field(&value, "clearConfs", &mut self.clear_confs)?;
update_bool_field(
&value,
"embedFragmentsSeparately",
&mut self.embed_fragments_separately,
)?;
update_bool_field(
&value,
"enableSequentialRandomSeeds",
&mut self.enable_sequential_random_seeds,
)?;
update_bool_field(&value, "enforceChirality", &mut self.enforce_chirality)?;
update_u32_field(&value, "ETversion", &mut self.et_version)?;
update_bool_field(&value, "forceTransAmides", &mut self.force_trans_amides)?;
update_bool_field(
&value,
"ignoreSmoothingFailures",
&mut self.ignore_smoothing_failures,
)?;
update_u32_field(&value, "maxIterations", &mut self.max_iterations)?;
update_i32_field(&value, "numThreads", &mut self.num_threads)?;
update_u32_field(&value, "numZeroFail", &mut self.num_zero_fail)?;
update_bool_field(
&value,
"onlyHeavyAtomsForRMS",
&mut self.only_heavy_atoms_for_rms,
)?;
update_f64_field(&value, "optimizerForceTol", &mut self.optimizer_force_tol)?;
update_f64_field(&value, "pruneRmsThresh", &mut self.prune_rms_thresh)?;
update_bool_field(&value, "randNegEig", &mut self.rand_neg_eig)?;
update_i32_field(&value, "randomSeed", &mut self.random_seed)?;
update_bool_field(
&value,
"symmetrizeConjugatedTerminalGroupsForPruning",
&mut self.symmetrize_conjugated_terminal_groups_for_pruning,
)?;
update_u32_field(&value, "timeout", &mut self.timeout)?;
update_bool_field(&value, "trackFailures", &mut self.track_failures)?;
update_bool_field(&value, "useBasicKnowledge", &mut self.use_basic_knowledge)?;
update_bool_field(
&value,
"useExpTorsionAnglePrefs",
&mut self.use_exp_torsion_angle_prefs,
)?;
update_bool_field(
&value,
"useMacrocycle14config",
&mut self.use_macrocycle14config,
)?;
update_bool_field(
&value,
"useMacrocycleTorsions",
&mut self.use_macrocycle_torsions,
)?;
update_bool_field(&value, "useRandomCoords", &mut self.use_random_coords)?;
update_bool_field(
&value,
"useSmallRingTorsions",
&mut self.use_small_ring_torsions,
)?;
update_bool_field(
&value,
"useSymmetryForPruning",
&mut self.use_symmetry_for_pruning,
)?;
update_bool_field(&value, "verbose", &mut self.verbose)?;
if let Some(coord_map) = value.get("coordMap") {
self.coord_map = Some(parse_embed_parameters_coord_map(coord_map)?);
}
Ok(())
}
#[must_use]
pub fn to_json(&self) -> String {
let mut fields = Vec::with_capacity(31);
push_json_field(&mut fields, "basinThresh", self.basin_thresh);
push_json_field(
&mut fields,
"boundsMatForceScaling",
self.bounds_mat_force_scaling,
);
push_json_field(&mut fields, "boxSizeMult", self.box_size_mult);
push_json_field(&mut fields, "clearConfs", self.clear_confs);
push_json_field(
&mut fields,
"embedFragmentsSeparately",
self.embed_fragments_separately,
);
push_json_field(
&mut fields,
"enableSequentialRandomSeeds",
self.enable_sequential_random_seeds,
);
push_json_field(&mut fields, "enforceChirality", self.enforce_chirality);
push_json_field(&mut fields, "ETversion", self.et_version);
push_json_field(&mut fields, "forceTransAmides", self.force_trans_amides);
push_json_field(
&mut fields,
"ignoreSmoothingFailures",
self.ignore_smoothing_failures,
);
push_json_field(&mut fields, "maxIterations", self.max_iterations);
push_json_field(&mut fields, "numThreads", self.num_threads);
push_json_field(&mut fields, "numZeroFail", self.num_zero_fail);
push_json_field(
&mut fields,
"onlyHeavyAtomsForRMS",
self.only_heavy_atoms_for_rms,
);
push_json_field(&mut fields, "optimizerForceTol", self.optimizer_force_tol);
push_json_field(&mut fields, "pruneRmsThresh", self.prune_rms_thresh);
push_json_field(&mut fields, "randNegEig", self.rand_neg_eig);
push_json_field(&mut fields, "randomSeed", self.random_seed);
push_json_field(
&mut fields,
"symmetrizeConjugatedTerminalGroupsForPruning",
self.symmetrize_conjugated_terminal_groups_for_pruning,
);
push_json_field(&mut fields, "timeout", self.timeout);
push_json_field(&mut fields, "trackFailures", self.track_failures);
push_json_field(&mut fields, "useBasicKnowledge", self.use_basic_knowledge);
push_json_field(
&mut fields,
"useExpTorsionAnglePrefs",
self.use_exp_torsion_angle_prefs,
);
push_json_field(
&mut fields,
"useMacrocycle14config",
self.use_macrocycle14config,
);
push_json_field(
&mut fields,
"useMacrocycleTorsions",
self.use_macrocycle_torsions,
);
push_json_field(&mut fields, "useRandomCoords", self.use_random_coords);
push_json_field(
&mut fields,
"useSmallRingTorsions",
self.use_small_ring_torsions,
);
push_json_field(
&mut fields,
"useSymmetryForPruning",
self.use_symmetry_for_pruning,
);
push_json_field(&mut fields, "verbose", self.verbose);
if let Some(coord_map) = &self.coord_map {
let entries = coord_map
.iter()
.map(|(atom_idx, point)| {
format!(
"\"{}\":[\"{:.6}\",\"{:.6}\",\"{:.6}\"]",
atom_idx, point.x, point.y, point.z
)
})
.collect::<Vec<_>>()
.join(",");
fields.push(format!("\"coordMap\":{{{entries}}}"));
}
if let Some(bounds_mat) = &self.bounds_mat {
let rows = (0..bounds_mat.n)
.map(|i| {
let row = (0..bounds_mat.n)
.map(|j| format!("\"{}\"", bounds_mat.get_val(i, j)))
.collect::<Vec<_>>()
.join(",");
format!("[{row}]")
})
.collect::<Vec<_>>()
.join(",");
fields.push(format!("\"boundsMatrix\":[{rows}]"));
}
format!("{{{}}}", fields.join(","))
}
}
fn embed_parameters_json_field<'a>(
value: &'a serde_json::Value,
name: &str,
) -> Option<&'a serde_json::Value> {
value.as_object().and_then(|object| object.get(name))
}
fn push_json_field<T: ToString>(fields: &mut Vec<String>, name: &str, value: T) {
fields.push(format!("\"{}\":\"{}\"", name, value.to_string()));
}
fn embed_parameters_invalid_json(name: &str, expected: &str) -> DgBoundsError {
DgBoundsError::InvalidEmbedParametersJson(format!("{name} must be {expected}"))
}
fn json_value_as_f64(name: &str, value: &serde_json::Value) -> Result<f64, DgBoundsError> {
if let Some(number) = value.as_f64() {
Ok(number)
} else if let Some(text) = value.as_str() {
text.parse::<f64>()
.map_err(|_| embed_parameters_invalid_json(name, "a floating-point number"))
} else {
Err(embed_parameters_invalid_json(
name,
"a floating-point number",
))
}
}
fn json_value_as_i32(name: &str, value: &serde_json::Value) -> Result<i32, DgBoundsError> {
if let Some(number) = value.as_i64() {
i32::try_from(number).map_err(|_| embed_parameters_invalid_json(name, "a 32-bit integer"))
} else if let Some(text) = value.as_str() {
text.parse::<i32>()
.map_err(|_| embed_parameters_invalid_json(name, "a 32-bit integer"))
} else {
Err(embed_parameters_invalid_json(name, "a 32-bit integer"))
}
}
fn json_value_as_u32(name: &str, value: &serde_json::Value) -> Result<u32, DgBoundsError> {
if let Some(number) = value.as_u64() {
u32::try_from(number)
.map_err(|_| embed_parameters_invalid_json(name, "an unsigned 32-bit integer"))
} else if let Some(text) = value.as_str() {
text.parse::<u32>()
.map_err(|_| embed_parameters_invalid_json(name, "an unsigned 32-bit integer"))
} else {
Err(embed_parameters_invalid_json(
name,
"an unsigned 32-bit integer",
))
}
}
fn json_value_as_bool(name: &str, value: &serde_json::Value) -> Result<bool, DgBoundsError> {
if let Some(value) = value.as_bool() {
Ok(value)
} else if let Some(number) = value.as_u64() {
match number {
0 => Ok(false),
1 => Ok(true),
_ => Err(embed_parameters_invalid_json(name, "a boolean")),
}
} else if let Some(text) = value.as_str() {
match text {
"0" => Ok(false),
"1" => Ok(true),
"false" => Ok(false),
"true" => Ok(true),
_ => Err(embed_parameters_invalid_json(name, "a boolean")),
}
} else {
Err(embed_parameters_invalid_json(name, "a boolean"))
}
}
fn update_f64_field(
value: &serde_json::Value,
name: &str,
target: &mut f64,
) -> Result<(), DgBoundsError> {
if let Some(field) = embed_parameters_json_field(value, name) {
*target = json_value_as_f64(name, field)?;
}
Ok(())
}
fn update_i32_field(
value: &serde_json::Value,
name: &str,
target: &mut i32,
) -> Result<(), DgBoundsError> {
if let Some(field) = embed_parameters_json_field(value, name) {
*target = json_value_as_i32(name, field)?;
}
Ok(())
}
fn update_u32_field(
value: &serde_json::Value,
name: &str,
target: &mut u32,
) -> Result<(), DgBoundsError> {
if let Some(field) = embed_parameters_json_field(value, name) {
*target = json_value_as_u32(name, field)?;
}
Ok(())
}
fn update_bool_field(
value: &serde_json::Value,
name: &str,
target: &mut bool,
) -> Result<(), DgBoundsError> {
if let Some(field) = embed_parameters_json_field(value, name) {
*target = json_value_as_bool(name, field)?;
}
Ok(())
}
fn parse_embed_parameters_coord_map(
value: &serde_json::Value,
) -> Result<BTreeMap<i32, ForceFieldVec3>, DgBoundsError> {
let object = value
.as_object()
.ok_or_else(|| embed_parameters_invalid_json("coordMap", "an object"))?;
let mut coord_map = BTreeMap::new();
for (key, point_value) in object {
let atom_idx = key
.parse::<i32>()
.map_err(|_| embed_parameters_invalid_json("coordMap key", "a 32-bit integer"))?;
let point = point_value
.as_array()
.ok_or_else(|| embed_parameters_invalid_json("coordMap value", "an array"))?;
if point.len() < 3 {
return Err(embed_parameters_invalid_json(
"coordMap value",
"an array with at least three coordinates",
));
}
coord_map.insert(
atom_idx,
ForceFieldVec3::new(
json_value_as_f64("coordMap x", &point[0])?,
json_value_as_f64("coordMap y", &point[1])?,
json_value_as_f64("coordMap z", &point[2])?,
),
);
}
Ok(coord_map)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u64)]
pub enum ChiralSetStructureFlags {
InFusedSmallRings = 1 << 0,
}
#[derive(Debug, Clone, PartialEq)]
pub struct ChiralSet {
pub idx0: usize,
pub idx1: usize,
pub idx2: usize,
pub idx3: usize,
pub idx4: usize,
pub volume_lower_bound: f64,
pub volume_upper_bound: f64,
pub structure_flags: u64,
}
impl ChiralSet {
#[must_use]
pub fn new(
pid0: usize,
pid1: usize,
pid2: usize,
pid3: usize,
pid4: usize,
lower_vol_bound: f64,
upper_vol_bound: f64,
structure_flags: u64,
) -> Self {
assert!(lower_vol_bound <= upper_vol_bound, "Inconsistent bounds\n");
Self {
idx0: pid0,
idx1: pid1,
idx2: pid2,
idx3: pid3,
idx4: pid4,
volume_lower_bound: lower_vol_bound,
volume_upper_bound: upper_vol_bound,
structure_flags,
}
}
#[must_use]
pub fn with_default_structure_flags(
pid0: usize,
pid1: usize,
pid2: usize,
pid3: usize,
pid4: usize,
lower_vol_bound: f64,
upper_vol_bound: f64,
) -> Self {
Self::new(
pid0,
pid1,
pid2,
pid3,
pid4,
lower_vol_bound,
upper_vol_bound,
0,
)
}
#[must_use]
pub fn get_upper_volume_bound(&self) -> f64 {
self.volume_upper_bound
}
#[must_use]
pub fn get_lower_volume_bound(&self) -> f64 {
self.volume_lower_bound
}
}
pub type ChiralSetPtr = Arc<ChiralSet>;
pub type VectChiralSet = Vec<ChiralSetPtr>;
#[must_use]
pub fn calc_chiral_volume_flat(
idx1: usize,
idx2: usize,
idx3: usize,
idx4: usize,
pos: &[f64],
dim: usize,
) -> f64 {
let v1 = ForceFieldVec3::new(
pos[idx1 * dim] - pos[idx4 * dim],
pos[idx1 * dim + 1] - pos[idx4 * dim + 1],
pos[idx1 * dim + 2] - pos[idx4 * dim + 2],
);
let v2 = ForceFieldVec3::new(
pos[idx2 * dim] - pos[idx4 * dim],
pos[idx2 * dim + 1] - pos[idx4 * dim + 1],
pos[idx2 * dim + 2] - pos[idx4 * dim + 2],
);
let v3 = ForceFieldVec3::new(
pos[idx3 * dim] - pos[idx4 * dim],
pos[idx3 * dim + 1] - pos[idx4 * dim + 1],
pos[idx3 * dim + 2] - pos[idx4 * dim + 2],
);
v1.dot_product(v2.cross_product(v3))
}
#[must_use]
pub fn calc_chiral_volume_points(
idx1: usize,
idx2: usize,
idx3: usize,
idx4: usize,
pts: &[ForceFieldVec3],
) -> f64 {
let v1 = pts[idx1] - pts[idx4];
let v2 = pts[idx2] - pts[idx4];
let v3 = pts[idx3] - pts[idx4];
v1.dot_product(v2.cross_product(v3))
}
#[derive(Debug, Clone, PartialEq)]
pub struct ChiralViolationContribsParams {
pub idx1: usize,
pub idx2: usize,
pub idx3: usize,
pub idx4: usize,
pub vol_upper: f64,
pub vol_lower: f64,
pub weight: f64,
}
impl ChiralViolationContribsParams {
#[must_use]
pub fn new(
idx1: usize,
idx2: usize,
idx3: usize,
idx4: usize,
vol_upper: f64,
vol_lower: f64,
weight: f64,
) -> Self {
Self {
idx1,
idx2,
idx3,
idx4,
vol_upper,
vol_lower,
weight,
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct ChiralViolationContribs {
owner: *const ForceField,
contribs: Vec<ChiralViolationContribsParams>,
}
impl Default for ChiralViolationContribs {
fn default() -> Self {
Self {
owner: std::ptr::null(),
contribs: Vec::new(),
}
}
}
impl ChiralViolationContribs {
#[must_use]
pub fn new(owner: &ForceField) -> Self {
Self {
owner,
contribs: Vec::new(),
}
}
pub fn add_contrib(&mut self, cset: &ChiralSet, weight: f64) {
let owner = self.owner();
assert!(cset.idx1 < owner.positions().len());
assert!(cset.idx2 < owner.positions().len());
assert!(cset.idx3 < owner.positions().len());
assert!(cset.idx4 < owner.positions().len());
self.contribs.push(ChiralViolationContribsParams::new(
cset.idx1,
cset.idx2,
cset.idx3,
cset.idx4,
cset.get_upper_volume_bound(),
cset.get_lower_volume_bound(),
weight,
));
}
#[must_use]
pub fn empty(&self) -> bool {
self.contribs.is_empty()
}
#[must_use]
pub fn size(&self) -> usize {
self.contribs.len()
}
#[must_use]
pub fn contribs(&self) -> &[ChiralViolationContribsParams] {
&self.contribs
}
fn owner(&self) -> &ForceField {
assert!(!self.owner.is_null(), "no owner");
unsafe { &*self.owner }
}
}
impl ForceFieldContrib for ChiralViolationContribs {
fn copy(&self) -> Box<dyn ForceFieldContrib> {
Box::new(self.clone())
}
fn set_force_field(&mut self, owner: *const ForceField) {
self.owner = owner;
}
fn get_energy(&self, pos: &[f64]) -> f64 {
assert!(!pos.is_empty(), "bad vector");
let dim = self.owner().dimension();
let mut res = 0.0;
for c in &self.contribs {
let vol = calc_chiral_volume_flat(c.idx1, c.idx2, c.idx3, c.idx4, pos, dim);
if vol < c.vol_lower {
res += c.weight * (vol - c.vol_lower) * (vol - c.vol_lower);
} else if vol > c.vol_upper {
res += c.weight * (vol - c.vol_upper) * (vol - c.vol_upper);
}
}
res
}
fn get_grad(&self, pos: &[f64], grad: &mut [f64]) {
assert!(!pos.is_empty(), "bad vector");
assert!(!grad.is_empty(), "bad vector");
let dim = self.owner().dimension();
for c in &self.contribs {
let v1 = ForceFieldVec3::new(
pos[c.idx1 * dim] - pos[c.idx4 * dim],
pos[c.idx1 * dim + 1] - pos[c.idx4 * dim + 1],
pos[c.idx1 * dim + 2] - pos[c.idx4 * dim + 2],
);
let v2 = ForceFieldVec3::new(
pos[c.idx2 * dim] - pos[c.idx4 * dim],
pos[c.idx2 * dim + 1] - pos[c.idx4 * dim + 1],
pos[c.idx2 * dim + 2] - pos[c.idx4 * dim + 2],
);
let v3 = ForceFieldVec3::new(
pos[c.idx3 * dim] - pos[c.idx4 * dim],
pos[c.idx3 * dim + 1] - pos[c.idx4 * dim + 1],
pos[c.idx3 * dim + 2] - pos[c.idx4 * dim + 2],
);
let vol = v1.dot_product(v2.cross_product(v3));
let pre_factor = if vol < c.vol_lower {
c.weight * (vol - c.vol_lower)
} else if vol > c.vol_upper {
c.weight * (vol - c.vol_upper)
} else {
continue;
};
grad[dim * c.idx1] += pre_factor * (v2.y * v3.z - v3.y * v2.z);
grad[dim * c.idx1 + 1] += pre_factor * (v3.x * v2.z - v2.x * v3.z);
grad[dim * c.idx1 + 2] += pre_factor * (v2.x * v3.y - v3.x * v2.y);
grad[dim * c.idx2] += pre_factor * (v3.y * v1.z - v3.z * v1.y);
grad[dim * c.idx2 + 1] += pre_factor * (v3.z * v1.x - v3.x * v1.z);
grad[dim * c.idx2 + 2] += pre_factor * (v3.x * v1.y - v3.y * v1.x);
grad[dim * c.idx3] += pre_factor * (v2.z * v1.y - v2.y * v1.z);
grad[dim * c.idx3 + 1] += pre_factor * (v2.x * v1.z - v2.z * v1.x);
grad[dim * c.idx3 + 2] += pre_factor * (v2.y * v1.x - v2.x * v1.y);
grad[dim * c.idx4] += pre_factor
* (pos[c.idx1 * dim + 2] * (pos[c.idx2 * dim + 1] - pos[c.idx3 * dim + 1])
+ pos[c.idx2 * dim + 2] * (pos[c.idx3 * dim + 1] - pos[c.idx1 * dim + 1])
+ pos[c.idx3 * dim + 2] * (pos[c.idx1 * dim + 1] - pos[c.idx2 * dim + 1]));
grad[dim * c.idx4 + 1] += pre_factor
* (pos[c.idx1 * dim] * (pos[c.idx2 * dim + 2] - pos[c.idx3 * dim + 2])
+ pos[c.idx2 * dim] * (pos[c.idx3 * dim + 2] - pos[c.idx1 * dim + 2])
+ pos[c.idx3 * dim] * (pos[c.idx1 * dim + 2] - pos[c.idx2 * dim + 2]));
grad[dim * c.idx4 + 2] += pre_factor
* (pos[c.idx1 * dim + 1] * (pos[c.idx2 * dim] - pos[c.idx3 * dim])
+ pos[c.idx2 * dim + 1] * (pos[c.idx3 * dim] - pos[c.idx1 * dim])
+ pos[c.idx3 * dim + 1] * (pos[c.idx1 * dim] - pos[c.idx2 * dim]));
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct DistViolationContribsParams {
pub idx1: usize,
pub idx2: usize,
pub ub: f64,
pub lb: f64,
pub ub2: f64,
pub lb2: f64,
pub weight: f64,
}
impl DistViolationContribsParams {
#[must_use]
pub fn new(idx1: usize, idx2: usize, ub: f64, lb: f64, weight: f64) -> Self {
Self {
idx1,
idx2,
ub,
lb,
ub2: ub * ub,
lb2: lb * lb,
weight,
}
}
}
fn dist_violation_distance2(idx1: usize, idx2: usize, pos: &[f64], dim: usize) -> f64 {
let mut d2 = 0.0;
for i in 0..dim {
let d = pos[dim * idx1 + i] - pos[dim * idx2 + i];
d2 += d * d;
}
d2
}
fn dist_violation_distance(idx1: usize, idx2: usize, pos: &[f64], dim: usize) -> f64 {
dist_violation_distance2(idx1, idx2, pos, dim).sqrt()
}
#[derive(Debug, Clone, PartialEq)]
pub struct DistViolationContribs {
owner: *const ForceField,
contribs: Vec<DistViolationContribsParams>,
}
impl Default for DistViolationContribs {
fn default() -> Self {
Self {
owner: std::ptr::null(),
contribs: Vec::new(),
}
}
}
impl DistViolationContribs {
#[must_use]
pub fn new(owner: &ForceField) -> Self {
Self {
owner,
contribs: Vec::new(),
}
}
pub fn add_contrib(&mut self, idx1: usize, idx2: usize, ub: f64, lb: f64, weight: f64) {
self.contribs
.push(DistViolationContribsParams::new(idx1, idx2, ub, lb, weight));
}
#[must_use]
pub fn empty(&self) -> bool {
self.contribs.is_empty()
}
#[must_use]
pub fn size(&self) -> usize {
self.contribs.len()
}
#[must_use]
pub fn contribs(&self) -> &[DistViolationContribsParams] {
&self.contribs
}
fn owner(&self) -> &ForceField {
assert!(!self.owner.is_null(), "no owner");
unsafe { &*self.owner }
}
}
impl ForceFieldContrib for DistViolationContribs {
fn copy(&self) -> Box<dyn ForceFieldContrib> {
Box::new(self.clone())
}
fn set_force_field(&mut self, owner: *const ForceField) {
self.owner = owner;
}
fn get_energy(&self, pos: &[f64]) -> f64 {
assert!(!pos.is_empty(), "bad vector");
let dim = self.owner().dimension();
let mut accum = 0.0;
for c in &self.contribs {
let d2 = dist_violation_distance2(c.idx1, c.idx2, pos, dim);
let mut val = 0.0;
if d2 > c.ub2 {
val = d2 / c.ub2 - 1.0;
} else if d2 < c.lb2 {
val = (2.0 * c.lb2) / (c.lb2 + d2) - 1.0;
}
if val > 0.0 {
accum += c.weight * val * val;
}
}
accum
}
fn get_grad(&self, pos: &[f64], grad: &mut [f64]) {
assert!(!pos.is_empty(), "bad vector");
assert!(!grad.is_empty(), "bad vector");
let dim = self.owner().dimension();
for c in &self.contribs {
let d2 = dist_violation_distance2(c.idx1, c.idx2, pos, dim);
let d;
let pre_factor;
if d2 > c.ub2 {
d = dist_violation_distance(c.idx1, c.idx2, pos, dim);
pre_factor = 4.0 * ((d * d) / c.ub2 - 1.0) * (d / c.ub2);
} else if d2 < c.lb2 {
d = dist_violation_distance(c.idx1, c.idx2, pos, dim);
let l2d2 = d2 + c.lb2;
pre_factor = 8.0 * c.lb2 * d * (1.0 - 2.0 * c.lb2 / l2d2) / (l2d2 * l2d2);
} else {
continue;
}
for i in 0..dim {
let p1 = dim * c.idx1 + i;
let p2 = dim * c.idx2 + i;
let d_grad = if d > 0.0 {
c.weight * pre_factor * (pos[p1] - pos[p2]) / d
} else {
c.weight * pre_factor * (pos[p1] - pos[p2])
};
grad[p1] += d_grad;
grad[p2] -= d_grad;
}
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct FourthDimContribsParams {
pub idx: usize,
pub weight: f64,
}
impl FourthDimContribsParams {
#[must_use]
pub fn new(idx: usize, weight: f64) -> Self {
Self { idx, weight }
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct FourthDimContribs {
owner: *const ForceField,
contribs: Vec<FourthDimContribsParams>,
}
impl Default for FourthDimContribs {
fn default() -> Self {
Self {
owner: std::ptr::null(),
contribs: Vec::new(),
}
}
}
impl FourthDimContribs {
#[must_use]
pub fn new(owner: &ForceField) -> Self {
assert_eq!(owner.dimension(), 4, "force field has wrong dimension");
Self {
owner,
contribs: Vec::new(),
}
}
pub fn add_contrib(&mut self, idx: usize, weight: f64) {
self.contribs
.push(FourthDimContribsParams::new(idx, weight));
}
#[must_use]
pub fn empty(&self) -> bool {
self.contribs.is_empty()
}
#[must_use]
pub fn size(&self) -> usize {
self.contribs.len()
}
#[must_use]
pub fn contribs(&self) -> &[FourthDimContribsParams] {
&self.contribs
}
fn owner(&self) -> &ForceField {
assert!(!self.owner.is_null(), "no owner");
unsafe { &*self.owner }
}
}
impl ForceFieldContrib for FourthDimContribs {
fn copy(&self) -> Box<dyn ForceFieldContrib> {
Box::new(self.clone())
}
fn set_force_field(&mut self, owner: *const ForceField) {
self.owner = owner;
}
fn get_energy(&self, pos: &[f64]) -> f64 {
assert!(!pos.is_empty(), "bad vector");
let _owner = self.owner();
let ffdim = 4;
let mut res = 0.0;
for contrib in &self.contribs {
let pid = contrib.idx * ffdim + 3;
res += contrib.weight * pos[pid] * pos[pid];
}
res
}
fn get_grad(&self, pos: &[f64], grad: &mut [f64]) {
assert!(!pos.is_empty(), "bad vector");
assert!(!grad.is_empty(), "bad vector");
let _owner = self.owner();
let ffdim = 4;
for contrib in &self.contribs {
let pid = contrib.idx * ffdim + 3;
grad[pid] += contrib.weight * pos[pid];
}
}
}
fn vdw_radius(atomic_num: u8) -> f64 {
match atomic_num {
0 => 0.0, 1 => 1.2, 2 => 1.4, 3 => 2.2, 4 => 1.9, 5 => 1.8, 6 => 1.7, 7 => 1.6, 8 => 1.55, 9 => 1.5, 10 => 1.54, 11 => 2.4, 12 => 2.2, 13 => 2.1, 14 => 2.1, 15 => 1.95, 16 => 1.8, 17 => 1.8, 18 => 1.88, 19 => 2.8, 20 => 2.4, 21 => 2.3, 22 => 2.15, 23 => 2.05, 24 => 2.05, 25 => 2.05, 26 => 2.05, 27 => 2.0, 28 => 2.0, 29 => 2.0, 30 => 2.1, 31 => 2.1, 32 => 2.1, 33 => 2.05, 34 => 1.9, 35 => 1.9, 36 => 2.02, 37 => 2.9, 38 => 2.55, 39 => 2.4, 40 => 2.3, 41 => 2.15, 42 => 2.1, 43 => 2.05, 44 => 2.05, 45 => 2.0, 46 => 2.05, 47 => 2.1, 48 => 2.2, 49 => 2.2, 50 => 2.25, 51 => 2.2, 52 => 2.1, 53 => 2.1, 54 => 2.16, 55 => 3.0, 56 => 2.7, 57 => 2.5, 58 => 2.48, 59 => 2.47, 60 => 2.45, 61 => 2.43, 62 => 2.42, 63 => 2.4, 64 => 2.38, 65 => 2.37, 66 => 2.35, 67 => 2.33, 68 => 2.32, 69 => 2.3, 70 => 2.28, 71 => 2.27, 72 => 2.25, 73 => 2.2, 74 => 2.1, 75 => 2.05, 76 => 2.0, 77 => 2.0, 78 => 2.05, 79 => 2.1, 80 => 2.05, 81 => 2.2, 82 => 2.3, 83 => 2.3, 84 => 2.0, 85 => 2.0, _ => 2.0, }
}
fn is_hbond_acceptor(atomic_num: u8) -> bool {
atomic_num == 7 || atomic_num == 8
}
fn is_hbond_donor(atom: &Atom, total_num_hydrogens: u32) -> bool {
is_hbond_acceptor(atom.atomic_number()) && total_num_hydrogens > 0
}
fn is_h_in_hbond_donor(mol: &Molecule, atom_idx: usize) -> bool {
if mol.atoms()[atom_idx].atomic_number() != 1 {
return false;
}
for bond in mol.bonds() {
let other = if bond.begin().index() == atom_idx {
bond.end().index()
} else if bond.end().index() == atom_idx {
bond.begin().index()
} else {
continue;
};
let an = mol.atoms()[other].atomic_number();
if an == 7 || an == 8 {
return true;
}
}
false
}
fn rdkit_default_valence(atomic_num: u8) -> Option<i32> {
let vals = rdkit_valence_list(atomic_num).ok()??;
vals.iter().copied().find(|v| *v >= 0)
}
fn rdkit_n_outer_electrons(atomic_num: u8) -> Option<i32> {
crate::chemistry::valence::periodic_table_outer_electrons(atomic_num).ok()
}
fn bond_valence_contrib_for_atom(bond: &Bond, atom_index: usize) -> f64 {
if bond.begin().index() != atom_index && bond.end().index() != atom_index {
return 0.0;
}
match bond.order() {
BondOrder::Null | BondOrder::Zero | BondOrder::Ionic => 0.0,
BondOrder::Single => 1.0,
BondOrder::Double => 2.0,
BondOrder::Triple => 3.0,
BondOrder::Quadruple => 4.0,
BondOrder::Quintuple => 5.0,
BondOrder::Hextuple => 6.0,
BondOrder::OneAndHalf => 1.5,
BondOrder::TwoAndHalf => 2.5,
BondOrder::ThreeAndHalf => 3.5,
BondOrder::FourAndHalf => 4.5,
BondOrder::FiveAndHalf => 5.5,
BondOrder::Aromatic => 1.5,
BondOrder::Hydrogen => 0.0,
BondOrder::Dative
| BondOrder::DativeOne
| BondOrder::DativeLeft
| BondOrder::DativeRight => {
if bond.end().index() == atom_index {
1.0
} else {
0.0
}
}
BondOrder::ThreeCenter | BondOrder::Unspecified | BondOrder::Other => 0.0,
}
}
fn count_atom_electrons_rdkit(
mol: &Molecule,
assignment: &crate::ValenceAssignment,
atom_degree: &[usize],
atom_index: usize,
) -> i32 {
let atom = &mol.atoms()[atom_index];
let Some(dv) = rdkit_default_valence(atom.atomic_number()) else {
return -1;
};
if dv <= 1 {
return -1;
}
let mut degree = atom_degree[atom_index] as i32
+ atom.explicit_hydrogens() as i32
+ assignment.implicit_hydrogens[atom_index] as i32;
for bond in mol.bonds() {
if (bond.begin().index() == atom_index || bond.end().index() == atom_index)
&& bond_valence_contrib_for_atom(bond, atom_index) == 0.0
{
degree -= 1;
}
}
if degree > 3 {
return -1;
}
let Some(nouter) = rdkit_n_outer_electrons(atom.atomic_number()) else {
return -1;
};
let nlp = (nouter - dv - atom.formal_charge() as i32).max(0);
let radicals = atom.radical_electrons() as i32;
let mut res = (dv - degree) + nlp - radicals;
if res > 1 {
let n_unsaturations =
assignment.explicit_valence[atom_index] - atom_degree[atom_index] as i32;
if n_unsaturations > 1 {
res = 1;
}
}
res
}
fn is_atom_conjugation_candidate(
mol: &Molecule,
assignment: &crate::ValenceAssignment,
atom_degree: &[usize],
atom_index: usize,
) -> bool {
let atom = &mol.atoms()[atom_index];
if let Ok(Some(vals)) = rdkit_valence_list(atom.atomic_number())
&& atom.formal_charge() == 0
&& !vals.is_empty()
&& vals[0] >= 0
{
let total_valence =
assignment.explicit_valence[atom_index] + assignment.implicit_hydrogens[atom_index];
if total_valence > vals[0] {
return false;
}
}
let nouter = rdkit_n_outer_electrons(atom.atomic_number()).unwrap_or(0);
let total_degree = atom_degree[atom_index]
+ atom.explicit_hydrogens() as usize
+ assignment.implicit_hydrogens[atom_index].max(0) as usize;
((atom.atomic_number() <= 10)
|| (nouter != 5 && nouter != 6)
|| (nouter == 6 && total_degree < 2))
&& count_atom_electrons_rdkit(mol, assignment, atom_degree, atom_index) > 0
}
fn compute_conjugated_bonds_for_uff(
mol: &Molecule,
assignment: &crate::ValenceAssignment,
atom_degree: &[usize],
) -> Vec<bool> {
let mut conjugated = vec![false; mol.bonds().len()];
for (bi, bond) in mol.bonds().iter().enumerate() {
conjugated[bi] = bond.is_aromatic();
}
for atom_index in 0..mol.atoms().len() {
if !is_atom_conjugation_candidate(mol, assignment, atom_degree, atom_index) {
continue;
}
let sbo = atom_degree[atom_index]
+ mol.atoms()[atom_index].explicit_hydrogens() as usize
+ assignment.implicit_hydrogens[atom_index].max(0) as usize;
if !(2..=3).contains(&sbo) {
continue;
}
let incident_bonds: Vec<usize> = mol
.bonds()
.iter()
.enumerate()
.filter_map(|(bond_index, bond)| {
if bond.begin().index() == atom_index || bond.end().index() == atom_index {
Some(bond_index)
} else {
None
}
})
.collect();
for &bond1_idx in &incident_bonds {
let bond1 = &mol.bonds()[bond1_idx];
if bond_valence_contrib_for_atom(bond1, atom_index) < 1.5 {
continue;
}
let other1 = if bond1.begin().index() == atom_index {
bond1.end().index()
} else {
bond1.begin().index()
};
if !is_atom_conjugation_candidate(mol, assignment, atom_degree, other1) {
continue;
}
for &bond2_idx in &incident_bonds {
if bond1_idx == bond2_idx {
continue;
}
let bond2 = &mol.bonds()[bond2_idx];
let other2 = if bond2.begin().index() == atom_index {
bond2.end().index()
} else {
bond2.begin().index()
};
let sbo2 = atom_degree[other2]
+ mol.atoms()[other2].explicit_hydrogens() as usize
+ assignment.implicit_hydrogens[other2].max(0) as usize;
if sbo2 > 3 {
continue;
}
if is_atom_conjugation_candidate(mol, assignment, atom_degree, other2) {
conjugated[bond1_idx] = true;
conjugated[bond2_idx] = true;
}
}
}
}
conjugated
}
fn compute_hybridizations_for_uff(
mol: &Molecule,
assignment: &crate::ValenceAssignment,
atom_degree: &[usize],
atom_has_conjugated_bond: &[bool],
) -> Vec<Hybridization> {
let mut out = Vec::with_capacity(mol.atoms().len());
for (atom_index, atom) in mol.atoms().iter().enumerate() {
if atom.atomic_number() == 0 {
out.push(Hybridization::Unspecified);
continue;
}
let mut deg = atom_degree[atom_index] as i32
+ atom.explicit_hydrogens() as i32
+ assignment.implicit_hydrogens[atom_index];
for bond in mol.bonds() {
if (bond.begin().index() == atom_index || bond.end().index() == atom_index)
&& matches!(bond.order(), BondOrder::Dative | BondOrder::DativeOne)
&& bond.end().index() != atom_index
{
deg -= 1;
}
}
let hyb = if atom.atomic_number() <= 1 {
match deg {
0 | 1 => Hybridization::S,
2 => Hybridization::Sp,
3 => Hybridization::Sp2,
4 => Hybridization::Sp3,
5 => Hybridization::Sp3d,
6 => Hybridization::Sp3d2,
_ => Hybridization::Unspecified,
}
} else {
let nouter = rdkit_n_outer_electrons(atom.atomic_number()).unwrap_or(0);
let total_valence =
assignment.explicit_valence[atom_index] + assignment.implicit_hydrogens[atom_index];
let num_free = nouter - (total_valence + atom.formal_charge() as i32);
let norbs = if total_valence + nouter - (atom.formal_charge() as i32) < 8 {
let radicals = atom.radical_electrons() as i32;
let lone_pairs = (num_free - radicals) / 2;
deg + lone_pairs + radicals
} else {
let lone_pairs = num_free / 2;
deg + lone_pairs
};
match norbs {
0 | 1 => Hybridization::S,
2 => Hybridization::Sp,
3 => Hybridization::Sp2,
4 => {
let total_degree = atom_degree[atom_index]
+ atom.explicit_hydrogens() as usize
+ assignment.implicit_hydrogens[atom_index].max(0) as usize;
if total_degree > 3 || !atom_has_conjugated_bond[atom_index] {
Hybridization::Sp3
} else {
Hybridization::Sp2
}
}
5 => Hybridization::Sp3d,
6 => Hybridization::Sp3d2,
_ => Hybridization::Unspecified,
}
};
out.push(hyb);
}
out
}
fn atom_total_valence_for_uff(
mol: &Molecule,
assignment: &crate::ValenceAssignment,
atom_index: usize,
) -> i32 {
mol.bonds()
.iter()
.map(|bond| bond_valence_contrib_for_atom(bond, atom_index))
.sum::<f64>()
.round() as i32
+ assignment.implicit_hydrogens[atom_index]
}
fn total_num_hydrogens_for_distgeom(
mol: &Molecule,
atom: &Atom,
assignment: &crate::ValenceAssignment,
atom_index: usize,
) -> u32 {
let mut res =
atom.explicit_hydrogens() as u32 + assignment.implicit_hydrogens[atom_index].max(0) as u32;
res += neighbors_for_atom(mol, atom_index)
.into_iter()
.filter(|&nbr| mol.atoms()[nbr].atomic_number() == 1)
.count() as u32;
res
}
#[cfg(test)]
fn get_atom_label_for_uff(
mol: &Molecule,
assignment: &crate::ValenceAssignment,
hybridizations: &[Hybridization],
atom_has_conjugated_bond: &[bool],
atom_index: usize,
) -> Result<String, DgBoundsError> {
let atom = &mol.atoms()[atom_index];
let total_valence = atom_total_valence_for_uff(mol, assignment, atom_index);
source_get_atom_label_for_uff(
atom,
atom_index,
total_valence,
hybridizations[atom_index],
atom_has_conjugated_bond[atom_index],
true,
)
.map_err(|err| {
DgBoundsError::GenerationFailed(format!(
"UFF atom symbol lookup failed for atomic number {}: {err}",
atom.atomic_number()
))
})
}
fn calc_bond_rest_length(bond_order: f64, end1: &AtomicParams, end2: &AtomicParams) -> f64 {
let ri = end1.r1;
let rj = end2.r1;
let r_bo = -UFF_LAMBDA * (ri + rj) * bond_order.ln();
let xi = end1.gmp_xi;
let xj = end2.gmp_xi;
let root_delta = xi.sqrt() - xj.sqrt();
let r_en = ri * rj * root_delta * root_delta / (xi * ri + xj * rj);
ri + rj + r_bo - r_en
}
fn ideal_bond_angle(hybridization: &crate::Hybridization, ring_size: Option<usize>) -> f64 {
const DEG_TO_RAD: f64 = std::f64::consts::PI / 180.0;
if let Some(rsize) = ring_size {
match hybridization {
crate::Hybridization::Sp2 if rsize <= 8 || rsize == 3 || rsize == 4 => {
std::f64::consts::PI * (1.0 - 2.0 / rsize as f64)
}
crate::Hybridization::Sp3 if rsize == 5 => 104.0 * DEG_TO_RAD,
crate::Hybridization::Sp3 => 109.5 * DEG_TO_RAD,
crate::Hybridization::Sp3d => 105.0 * DEG_TO_RAD,
crate::Hybridization::Sp3d2 => 90.0 * DEG_TO_RAD,
_ => 120.0 * DEG_TO_RAD,
}
} else {
match hybridization {
crate::Hybridization::Sp => 180.0 * DEG_TO_RAD,
crate::Hybridization::Sp2 => 120.0 * DEG_TO_RAD,
crate::Hybridization::Sp3 => 109.5 * DEG_TO_RAD,
crate::Hybridization::Sp3d => 90.0 * DEG_TO_RAD,
crate::Hybridization::Sp3d2 => 90.0 * DEG_TO_RAD,
crate::Hybridization::Sp2d => 120.0 * DEG_TO_RAD,
_ => 120.0 * DEG_TO_RAD,
}
}
}
const LOCAL_INF_DIST: f64 = 1.0e8;
fn compute_topological_distances(mol: &Molecule) -> Vec<f64> {
let n_atoms = mol.num_atoms();
let mut last_dist = vec![LOCAL_INF_DIST; n_atoms * n_atoms];
let mut last_path = vec![0_i32; n_atoms * n_atoms];
for i in 0..n_atoms {
last_dist[i * n_atoms + i] = 0.0;
}
for bond in mol.bonds() {
let i = bond.begin().index();
let j = bond.end().index();
last_dist[i * n_atoms + j] = 1.0;
last_dist[j * n_atoms + i] = 1.0;
}
for i in 0..n_atoms {
let itab = i * n_atoms;
for j in 0..n_atoms {
if i == j || last_dist[itab + j] == LOCAL_INF_DIST {
last_path[itab + j] = -1;
} else {
last_path[itab + j] = i as i32;
}
}
}
let mut curr_dist = vec![0.0; n_atoms * n_atoms];
let mut curr_path = vec![0_i32; n_atoms * n_atoms];
for k in 0..n_atoms {
let ktab = k * n_atoms;
for i in 0..n_atoms {
let itab = i * n_atoms;
for j in 0..n_atoms {
let v1 = last_dist[itab + j];
let v2 = last_dist[itab + k] + last_dist[ktab + j];
if v1 <= v2 {
curr_dist[itab + j] = v1;
curr_path[itab + j] = last_path[itab + j];
} else {
curr_dist[itab + j] = v2;
curr_path[itab + j] = last_path[ktab + j];
}
}
}
std::mem::swap(&mut curr_dist, &mut last_dist);
std::mem::swap(&mut curr_path, &mut last_path);
}
last_dist
}
fn flatten_topological_distances_matrix(mol: &Molecule) -> Vec<f64> {
compute_topological_distances(mol)
}
fn neighbors_for_atom(mol: &Molecule, idx: usize) -> Vec<usize> {
let mut ns = Vec::new();
for bond in mol.bonds() {
if bond.begin().index() == idx {
ns.push(bond.end().index());
} else if bond.end().index() == idx {
ns.push(bond.begin().index());
}
}
ns
}
fn bond_between(mol: &Molecule, a: usize, b: usize) -> Option<&Bond> {
mol.bonds().iter().find(|bond| {
(bond.begin().index() == a && bond.end().index() == b)
|| (bond.begin().index() == b && bond.end().index() == a)
})
}
fn common_middle_atom(mol: &Molecule, a1: usize, a3: usize) -> Option<usize> {
let n1 = neighbors_for_atom(mol, a1);
let n3 = neighbors_for_atom(mol, a3);
for &n in &n1 {
if n3.contains(&n) {
return Some(n);
}
}
None
}
#[derive(Clone)]
struct BoundsMatrix {
data: Vec<Vec<f64>>,
n: usize,
}
impl BoundsMatrix {
fn new(n: usize) -> Self {
let mut matrix = Self {
data: vec![vec![0.0; n]; n],
n,
};
init_bounds_mat_shared(&mut matrix, DEFAULT_LOWER, DEFAULT_UPPER);
matrix
}
fn get_val(&self, i: usize, j: usize) -> f64 {
self.data[i][j]
}
fn set_val(&mut self, i: usize, j: usize, value: f64) {
self.data[i][j] = value;
}
fn get_upper(&self, i: usize, j: usize) -> f64 {
if i < j {
self.get_val(i, j)
} else {
self.get_val(j, i)
}
}
fn set_upper(&mut self, i: usize, j: usize, v: f64) {
assert!(v >= 0.0, "Negative upper bound");
if i < j {
self.set_val(i, j, v);
} else {
self.set_val(j, i, v);
}
}
fn set_upper_if_better(&mut self, i: usize, j: usize, val: f64) {
if val < self.get_upper(i, j) && val > self.get_lower(i, j) {
self.set_upper(i, j, val);
}
}
fn set_lower(&mut self, i: usize, j: usize, v: f64) {
assert!(v >= 0.0, "Negative lower bound");
if i < j {
self.set_val(j, i, v);
} else {
self.set_val(i, j, v);
}
}
fn set_lower_if_better(&mut self, i: usize, j: usize, val: f64) {
if val > self.get_lower(i, j) && val < self.get_upper(i, j) {
self.set_lower(i, j, val);
}
}
fn get_lower(&self, i: usize, j: usize) -> f64 {
if i < j {
self.get_val(j, i)
} else {
self.get_val(i, j)
}
}
fn check_valid(&self) -> bool {
for i in 1..self.n {
for j in 0..i {
if self.get_upper(i, j) < self.get_lower(i, j) {
return false;
}
}
}
true
}
fn num_rows(&self) -> usize {
self.n
}
}
#[derive(Debug, Clone, PartialEq)]
struct SymmMatrix {
data: Vec<f64>,
n: usize,
}
impl SymmMatrix {
fn new(n: usize) -> Self {
Self {
data: vec![0.0; n * (n + 1) / 2],
n,
}
}
fn with_value(n: usize, value: f64) -> Self {
Self {
data: vec![value; n * (n + 1) / 2],
n,
}
}
fn num_rows(&self) -> usize {
self.n
}
fn get_data_size(&self) -> usize {
self.data.len()
}
fn get_data(&self) -> &[f64] {
&self.data
}
fn get_val(&self, i: usize, j: usize) -> f64 {
self.data[SymmMatrix::data_index(i, j)]
}
fn set_val(&mut self, i: usize, j: usize, value: f64) {
let idx = SymmMatrix::data_index(i, j);
self.data[idx] = value;
}
fn data_index(i: usize, j: usize) -> usize {
let (row, col) = if i >= j { (i, j) } else { (j, i) };
row * (row + 1) / 2 + col
}
}
#[derive(Debug, Clone, PartialEq)]
struct DoubleMatrix {
data: Vec<f64>,
n_rows: usize,
n_cols: usize,
}
impl DoubleMatrix {
fn new(n_rows: usize, n_cols: usize) -> Self {
Self {
data: vec![0.0; n_rows * n_cols],
n_rows,
n_cols,
}
}
fn num_rows(&self) -> usize {
self.n_rows
}
fn num_cols(&self) -> usize {
self.n_cols
}
fn get_val(&self, i: usize, j: usize) -> f64 {
self.data[i * self.n_cols + j]
}
}
trait RdkitDoubleRng {
fn next_unit_f64(&mut self) -> f64;
}
#[derive(Debug, Clone)]
struct RdkitDistgeomMinStdRand {
state: u64,
}
impl Default for RdkitDistgeomMinStdRand {
fn default() -> Self {
Self { state: 42 }
}
}
impl RdkitDistgeomMinStdRand {
fn new(seed: i32) -> Self {
if seed > 0 {
let mut state = seed as u64 % RDKIT_RANDOM_MODULUS;
if state == 0 {
state = 1;
}
Self { state }
} else {
Self::default()
}
}
fn new_from_embed_points_seed(seed: i32) -> Self {
assert!(seed >= 0);
let mut state = seed as u64 % RDKIT_RANDOM_MODULUS;
if state == 0 {
state = 1;
}
Self { state }
}
fn next_raw(&mut self) -> u32 {
self.state = (self.state * RDKIT_RANDOM_MULTIPLIER) % RDKIT_RANDOM_MODULUS;
self.state as u32
}
}
#[cfg(not(target_arch = "wasm32"))]
unsafe extern "C" {
fn clock() -> libc::clock_t;
}
#[cfg(not(target_arch = "wasm32"))]
fn rdkit_clock_seed() -> Result<i32, DgBoundsError> {
Ok(unsafe { clock() as i32 })
}
#[cfg(target_arch = "wasm32")]
fn rdkit_clock_seed() -> Result<i32, DgBoundsError> {
Err(DgBoundsError::WasmImplicitClockSeedUnsupported)
}
impl RdkitDoubleRng for RdkitDistgeomMinStdRand {
fn next_unit_f64(&mut self) -> f64 {
let raw = self.next_raw() as f64;
(raw - 1.0) / (RDKIT_RANDOM_MODULUS as f64 - 1.0)
}
}
fn rdkit_embedder_multiplication_overflows(a: i32, b: i32) -> bool {
if a == 0 || b == 0 {
return false;
}
a > i32::MAX / b
}
fn rdkit_embedder_conformer_seed(
random_seed: i32,
conformer_index: usize,
enable_sequential_random_seeds: bool,
) -> i32 {
assert!(
random_seed >= -1,
"random seed must either be positive, zero, or negative one"
);
let ci_plus_one = i32::try_from(conformer_index + 1).expect("conformer index too large");
let mut new_seed = random_seed;
if new_seed > -1 {
if enable_sequential_random_seeds {
new_seed = new_seed
.checked_add(ci_plus_one)
.expect("sequential conformer seed overflow");
} else if !rdkit_embedder_multiplication_overflows(ci_plus_one, random_seed) {
new_seed = ci_plus_one * random_seed;
} else {
let big_seed = random_seed as usize;
let max_val = (conformer_index + 1).max(big_seed);
let big_num = big_seed + max_val * (conformer_index + 1);
const POSITIVE_INT_MASK: usize = 0x7fff_ffff;
let folded_num = (big_num & POSITIVE_INT_MASK) ^ (big_num >> 31);
new_seed = (folded_num & POSITIVE_INT_MASK) as i32;
}
}
assert!(
new_seed >= -1,
"Something went wrong calculating a new seed"
);
new_seed
}
fn pick_random_dist_mat(mmat: &BoundsMatrix, dist_mat: &mut SymmMatrix, seed: i32) -> f64 {
if seed > 0 {
RDKIT_DISTGEOM_RNG.with(|rng| {
*rng.borrow_mut() = RdkitDistgeomMinStdRand::new(seed);
});
}
RDKIT_DISTGEOM_RNG
.with(|rng| pick_random_dist_mat_with_rng(mmat, dist_mat, &mut *rng.borrow_mut()))
}
fn pick_random_dist_mat_with_rng<R: RdkitDoubleRng>(
mmat: &BoundsMatrix,
dist_mat: &mut SymmMatrix,
rng: &mut R,
) -> f64 {
let npt = mmat.num_rows();
assert_eq!(npt, dist_mat.num_rows(), "Size mismatch");
let mut largest_val = -1.0;
for i in 1..npt {
let id = i * (i + 1) / 2;
for j in 0..i {
let ub = mmat.get_upper(i, j);
let lb = mmat.get_lower(i, j);
assert!(ub >= lb);
let rval = rng.next_unit_f64();
let d = lb + rval * (ub - lb);
dist_mat.data[id + j] = d;
if d > largest_val {
largest_val = d;
}
}
}
largest_val
}
fn rdkit_symm_matrix_vector_multiply(a: &SymmMatrix, x: &[f64], y: &mut [f64]) {
let a_size = a.num_rows();
assert_eq!(a_size, x.len(), "Size mismatch during multiplication");
assert_eq!(a_size, y.len(), "Size mismatch during multiplication");
for i in 0..a_size {
y[i] = 0.0;
let mut id_a = i * (i + 1) / 2;
for xj in x.iter().take(i + 1) {
y[i] += a.data[id_a] * *xj;
id_a += 1;
}
id_a -= 1;
for (j, xj) in x.iter().enumerate().take(a_size).skip(i + 1) {
id_a += j;
y[i] += a.data[id_a] * *xj;
}
}
}
fn rdkit_vector_largest_abs_val_id(data: &[f64]) -> usize {
let mut res = -1.0;
let mut id = data.len();
for (i, value) in data.iter().enumerate() {
if value.abs() > res {
res = value.abs();
id = i;
}
}
id
}
fn rdkit_vector_normalize(data: &mut [f64]) {
let val = data.iter().map(|v| v * v).sum::<f64>().sqrt();
assert!(val >= f64::EPSILON, "Cannot normalize a zero length vector");
for item in data {
*item /= val;
}
}
fn rdkit_vector_set_to_random(size: usize, seed: i32) -> Result<Vec<f64>, DgBoundsError> {
let effective_seed = if seed > 0 {
seed
} else {
rdkit_clock_seed()?.wrapping_add(1)
};
let mut rng = RdkitDistgeomMinStdRand::new(effective_seed);
let mut data = (0..size).map(|_| rng.next_unit_f64()).collect::<Vec<_>>();
rdkit_vector_normalize(&mut data);
Ok(data)
}
fn power_eigen_solver(
num_eig: usize,
mat: &mut SymmMatrix,
eigen_values: &mut [f64],
mut eigen_vectors: Option<&mut DoubleMatrix>,
seed: i32,
) -> Result<bool, DgBoundsError> {
const MAX_ITERATIONS: usize = 1000;
const TOLERANCE: f64 = 0.001;
const HUGE_EIGVAL: f64 = 1.0e10;
const TINY_EIGVAL: f64 = 1.0e-10;
let n = mat.num_rows();
assert!(eigen_values.len() >= num_eig);
assert!(num_eig <= n);
if let Some(eig_vecs) = eigen_vectors.as_ref() {
assert!(eig_vecs.num_cols() >= n);
assert!(eig_vecs.num_rows() >= num_eig);
}
let mut effective_seed = if seed > 0 { seed } else { rdkit_clock_seed()? };
let mut converged = false;
let mut z = vec![0.0; n];
for (ei, eig_value_slot) in eigen_values.iter_mut().enumerate().take(num_eig) {
let mut eig_val = -HUGE_EIGVAL;
effective_seed += ei as i32;
let mut v = rdkit_vector_set_to_random(n, effective_seed)?;
converged = false;
for _iter in 0..MAX_ITERATIONS {
rdkit_symm_matrix_vector_multiply(mat, &v, &mut z);
let prev_val = eig_val;
let eval_id = rdkit_vector_largest_abs_val_id(&z);
eig_val = z[eval_id];
if eig_val.abs() < TINY_EIGVAL {
break;
}
v.copy_from_slice(&z);
for item in &mut v {
*item /= eig_val;
}
if (eig_val - prev_val).abs() < TOLERANCE {
converged = true;
break;
}
}
if !converged {
break;
}
rdkit_vector_normalize(&mut v);
if let Some(eig_vecs) = eigen_vectors.as_deref_mut() {
let id = ei * eig_vecs.num_cols();
eig_vecs.data[id..id + n].copy_from_slice(&v);
}
*eig_value_slot = eig_val;
for i in 0..n {
let id = i * (i + 1) / 2;
for j in 0..=i {
mat.data[id + j] -= eig_val * v[i] * v[j];
}
}
}
Ok(converged)
}
fn compute_initial_coords(
dist_mat: &SymmMatrix,
positions: &mut [Vec<f64>],
rand_neg_eig: bool,
num_zero_fail: usize,
seed: i32,
) -> Result<bool, DgBoundsError> {
if seed > 0 {
RDKIT_DISTGEOM_RNG.with(|rng| {
*rng.borrow_mut() = RdkitDistgeomMinStdRand::new(seed);
});
}
RDKIT_DISTGEOM_RNG.with(|rng| {
compute_initial_coords_with_rng(
dist_mat,
positions,
&mut *rng.borrow_mut(),
rand_neg_eig,
num_zero_fail,
)
})
}
fn compute_initial_coords_with_rng<R: RdkitDoubleRng>(
dist_mat: &SymmMatrix,
positions: &mut [Vec<f64>],
rng: &mut R,
rand_neg_eig: bool,
num_zero_fail: usize,
) -> Result<bool, DgBoundsError> {
let n = dist_mat.num_rows();
assert_eq!(positions.len(), n, "Size mismatch");
assert!(!positions.is_empty());
let dim = positions[0].len();
let trace_row64 = row64_distgeom_trace_enabled(n);
let trace_row61 = row61_distgeom_trace_enabled(n);
let data = dist_mat.get_data();
let mut sq_mat = SymmMatrix::new(n);
let mut t = SymmMatrix::with_value(n, 0.0);
let mut eig_vecs = DoubleMatrix::new(dim, n);
let mut eig_vals = vec![0.0; dim];
let d_size = dist_mat.get_data_size();
let mut sum_sq_d2 = 0.0;
for (i, value) in data.iter().enumerate().take(d_size) {
sq_mat.data[i] = value * value;
sum_sq_d2 += sq_mat.data[i];
}
sum_sq_d2 /= (n * n) as f64;
if trace_row64 {
let preview_len = data.len().min(12);
let payload = data[..preview_len]
.iter()
.map(|v| format!("{v:.15}"))
.collect::<Vec<_>>()
.join(",");
println!(
"row64_compute_initial_coords dist_data=[{}] sum_sq_d2={:.15}",
payload, sum_sq_d2
);
} else if trace_row61 {
let preview_len = data.len().min(12);
let payload = data[..preview_len]
.iter()
.map(|v| format!("{v:.15}"))
.collect::<Vec<_>>()
.join(",");
println!(
"row61_compute_initial_coords dist_data=[{}] sum_sq_d2={:.15}",
payload, sum_sq_d2
);
}
let mut sq_d0i = vec![0.0; n];
for (i, sq_d0i_value) in sq_d0i.iter_mut().enumerate().take(n) {
for j in 0..n {
*sq_d0i_value += sq_mat.get_val(i, j);
}
*sq_d0i_value /= n as f64;
*sq_d0i_value -= sum_sq_d2;
if *sq_d0i_value < EIGVAL_TOL && n > 3 {
if trace_row64 {
let first_fail_val = *sq_d0i_value;
let preview_len = (i + 1).min(12);
let payload = sq_d0i[..preview_len]
.iter()
.map(|v| format!("{v:.15}"))
.collect::<Vec<_>>()
.join(",");
println!(
"row64_compute_initial_coords sq_d0i=[{}] first_fail_idx={} first_fail_val={:.15}",
payload, i, first_fail_val
);
} else if trace_row61 {
let first_fail_val = *sq_d0i_value;
let preview_len = (i + 1).min(12);
let payload = sq_d0i[..preview_len]
.iter()
.map(|v| format!("{v:.15}"))
.collect::<Vec<_>>()
.join(",");
println!(
"row61_compute_initial_coords sq_d0i=[{}] first_fail_idx={} first_fail_val={:.15}",
payload, i, first_fail_val
);
}
return Ok(false);
}
}
if trace_row64 {
let preview_len = sq_d0i.len().min(12);
let payload = sq_d0i[..preview_len]
.iter()
.map(|v| format!("{v:.15}"))
.collect::<Vec<_>>()
.join(",");
println!("row64_compute_initial_coords sq_d0i=[{}]", payload);
} else if trace_row61 {
let preview_len = sq_d0i.len().min(12);
let payload = sq_d0i[..preview_len]
.iter()
.map(|v| format!("{v:.15}"))
.collect::<Vec<_>>()
.join(",");
println!("row61_compute_initial_coords sq_d0i=[{}]", payload);
}
for i in 0..n {
for j in 0..=i {
let val = 0.5 * (sq_d0i[i] + sq_d0i[j] - sq_mat.get_val(i, j));
t.set_val(i, j, val);
}
}
let n_eigs = dim.min(n);
power_eigen_solver(
n_eigs,
&mut t,
&mut eig_vals,
Some(&mut eig_vecs),
(sum_sq_d2 * n as f64) as i32,
)?;
if trace_row64 {
let payload = eig_vals
.iter()
.map(|v| format!("{v:.15}"))
.collect::<Vec<_>>()
.join(",");
println!(
"row64_compute_initial_coords eig_vals_before_sqrt=[{}]",
payload
);
} else if trace_row61 {
let payload = eig_vals
.iter()
.map(|v| format!("{v:.15}"))
.collect::<Vec<_>>()
.join(",");
println!(
"row61_compute_initial_coords eig_vals_before_sqrt=[{}]",
payload
);
}
let mut found_neg = false;
let mut zero_eigs = 0;
for eig_val in eig_vals.iter_mut().take(dim) {
if *eig_val > EIGVAL_TOL {
*eig_val = eig_val.sqrt();
} else if eig_val.abs() < EIGVAL_TOL {
*eig_val = 0.0;
zero_eigs += 1;
} else {
found_neg = true;
}
}
if trace_row64 {
let payload = eig_vals
.iter()
.map(|v| format!("{v:.15}"))
.collect::<Vec<_>>()
.join(",");
println!(
"row64_compute_initial_coords eig_vals_after_sqrt=[{}] found_neg={} zero_eigs={}",
payload, found_neg, zero_eigs
);
} else if trace_row61 {
let payload = eig_vals
.iter()
.map(|v| format!("{v:.15}"))
.collect::<Vec<_>>()
.join(",");
println!(
"row61_compute_initial_coords eig_vals_after_sqrt=[{}] found_neg={} zero_eigs={}",
payload, found_neg, zero_eigs
);
}
if found_neg && !rand_neg_eig {
return Ok(false);
}
if zero_eigs >= num_zero_fail && n > 3 {
return Ok(false);
}
for (i, point) in positions.iter_mut().enumerate().take(n) {
for j in 0..dim {
if eig_vals[j] >= 0.0 {
point[j] = eig_vals[j] * eig_vecs.get_val(j, i);
} else {
point[j] = 1.0 - 2.0 * rng.next_unit_f64();
}
}
}
Ok(true)
}
fn compute_random_coords(positions: &mut [Vec<f64>], box_size: f64, seed: i32) -> bool {
if seed > 0 {
RDKIT_DISTGEOM_RNG.with(|rng| {
*rng.borrow_mut() = RdkitDistgeomMinStdRand::new(seed);
});
}
RDKIT_DISTGEOM_RNG
.with(|rng| compute_random_coords_with_rng(positions, box_size, &mut *rng.borrow_mut()))
}
fn compute_random_coords_with_rng<R: RdkitDoubleRng>(
positions: &mut [Vec<f64>],
box_size: f64,
rng: &mut R,
) -> bool {
assert!(box_size > 0.0, "bad boxSize");
for point in positions {
for coord in point {
*coord = box_size * (rng.next_unit_f64() - 0.5);
}
}
true
}
fn forcefield_vec_from_position(point: &[f64]) -> ForceFieldVec3 {
assert!(
(3..=4).contains(&point.len()),
"unsupported point dimension"
);
if point.len() == 4 {
ForceFieldVec3::new4(point[0], point[1], point[2], point[3])
} else {
ForceFieldVec3::new(point[0], point[1], point[2])
}
}
fn construct_distgeom_forcefield(
mmat: &BoundsMatrix,
positions: &[Vec<f64>],
csets: &[ChiralSetPtr],
weight_chiral: f64,
weight_fourth_dim: f64,
extra_weights: Option<&BTreeMap<(usize, usize), f64>>,
basin_size_tol: f64,
fixed_pts: Option<&[bool]>,
) -> ForceField {
let n = mmat.num_rows();
assert_eq!(n, positions.len());
assert!(!positions.is_empty());
let dimension = positions[0].len();
assert!((3..=4).contains(&dimension), "unsupported point dimension");
assert!(
positions.iter().all(|point| point.len() == dimension),
"inconsistent point dimension"
);
if let Some(fixed_pts) = fixed_pts {
assert!(fixed_pts.len() >= n, "bad fixed point bitset");
}
let mut field = ForceField::new(dimension);
field.positions_mut().extend(
positions
.iter()
.map(|point| forcefield_vec_from_position(point)),
);
let mut dist_contrib = DistViolationContribs::new(&field);
for i in 1..n {
for j in 0..i {
if fixed_pts.is_some_and(|fixed_pts| fixed_pts[i] && fixed_pts[j]) {
continue;
}
let mut weight = 1.0;
let lower = mmat.get_lower(i, j);
let upper = mmat.get_upper(i, j);
let mut include_it = false;
if let Some(extra_weights) = extra_weights
&& let Some(extra_weight) = extra_weights.get(&(i, j))
{
weight = *extra_weight;
include_it = true;
}
if upper - lower <= basin_size_tol {
include_it = true;
}
if include_it {
dist_contrib.add_contrib(i, j, upper, lower, weight);
}
}
}
if !dist_contrib.empty() {
field.add_contrib(Box::new(dist_contrib));
}
if weight_chiral > 1.0e-8 {
let mut chiral_contrib = ChiralViolationContribs::new(&field);
for cset in csets {
chiral_contrib.add_contrib(cset, weight_chiral);
}
if !chiral_contrib.empty() {
field.add_contrib(Box::new(chiral_contrib));
}
}
if field.dimension() == 4 && weight_fourth_dim > 1.0e-8 {
let mut fourth_dim_contrib = FourthDimContribs::new(&field);
for i in 0..n {
fourth_dim_contrib.add_contrib(i, weight_fourth_dim);
}
if !fourth_dim_contrib.empty() {
field.add_contrib(Box::new(fourth_dim_contrib));
}
}
field
}
fn add_improper_torsion_terms(
ff: &mut ForceField,
force_scaling_factor: f64,
improper_atoms: &[Vec<i32>],
is_improper_constrained: &mut [bool],
) {
let mut inversion_contribs = InversionContribs::new(ff);
for improper_atom in improper_atoms {
let mut n = [0_usize; 4];
for i in 0..3 {
n[1] = 1;
match i {
0 => {
n[0] = 0;
n[2] = 2;
n[3] = 3;
}
1 => {
n[0] = 0;
n[2] = 3;
n[3] = 2;
}
2 => {
n[0] = 2;
n[2] = 3;
n[3] = 0;
}
_ => unreachable!("loop bounds guarantee 0..3"),
}
inversion_contribs.add_contrib(
improper_atom[n[0]] as usize,
improper_atom[n[1]] as usize,
improper_atom[n[2]] as usize,
improper_atom[n[3]] as usize,
improper_atom[4],
improper_atom[5] != 0,
force_scaling_factor,
);
is_improper_constrained[improper_atom[n[1]] as usize] = true;
}
}
if !inversion_contribs.empty() {
ff.add_contrib(Box::new(inversion_contribs));
}
}
fn add_experimental_torsion_terms(
ff: &mut ForceField,
etkdg_details: &CrystalFFDetails,
atom_pairs: &mut [bool],
num_atoms: usize,
) {
let mut torsion_contribs = TorsionAngleContribs::new(ff);
for t in 0..etkdg_details.exp_torsion_atoms.len() {
let i = etkdg_details.exp_torsion_atoms[t][0] as usize;
let j = etkdg_details.exp_torsion_atoms[t][1] as usize;
let k = etkdg_details.exp_torsion_atoms[t][2] as usize;
let l = etkdg_details.exp_torsion_atoms[t][3] as usize;
if i < l {
atom_pairs[i * num_atoms + l] = true;
} else {
atom_pairs[l * num_atoms + i] = true;
}
torsion_contribs.add_contrib(
i,
j,
k,
l,
etkdg_details.exp_torsion_angles[t].1.clone(),
etkdg_details.exp_torsion_angles[t].0.clone(),
);
}
if !torsion_contribs.is_empty() {
ff.add_contrib(Box::new(torsion_contribs));
}
}
fn add_12_terms(
ff: &mut ForceField,
etkdg_details: &CrystalFFDetails,
atom_pairs: &mut [bool],
positions: &[ForceFieldVec3],
force_constant: f64,
num_atoms: usize,
) {
let mut dist_contribs = DistanceConstraintContribs::new(ff);
for &(first, second) in &etkdg_details.bonds {
let i = first as usize;
let j = second as usize;
if i < j {
atom_pairs[i * num_atoms + j] = true;
} else {
atom_pairs[j * num_atoms + i] = true;
}
let d = (positions[i] - positions[j]).length();
dist_contribs.add_contrib(i, j, d - KNOWN_DIST_TOL, d + KNOWN_DIST_TOL, force_constant);
}
if !dist_contribs.empty() {
ff.add_contrib(Box::new(dist_contribs));
}
}
fn add_13_terms(
ff: &mut ForceField,
etkdg_details: &CrystalFFDetails,
atom_pairs: &mut [bool],
positions: &[ForceFieldVec3],
force_constant: f64,
is_improper_constrained: &[bool],
use_basic_knowledge: bool,
mmat: &BoundsMatrix,
num_atoms: usize,
) {
let mut dist_contribs = DistanceConstraintContribs::new(ff);
let mut angle_contribs = AngleConstraintContribs::new(ff);
for angle in &etkdg_details.angles {
let i = angle[0] as usize;
let j = angle[1] as usize;
let k = angle[2] as usize;
if i < k {
atom_pairs[i * num_atoms + k] = true;
} else {
atom_pairs[k * num_atoms + i] = true;
}
if use_basic_knowledge && angle[3] != 0 {
angle_contribs.add_contrib(i, j, k, 179.0, 180.0, 1.0);
} else if is_improper_constrained[j] {
dist_contribs.add_contrib(
i,
k,
mmat.get_lower(i, k),
mmat.get_upper(i, k),
force_constant,
);
} else {
let d = (positions[i] - positions[k]).length();
dist_contribs.add_contrib(i, k, d - KNOWN_DIST_TOL, d + KNOWN_DIST_TOL, force_constant);
}
}
if !angle_contribs.empty() {
ff.add_contrib(Box::new(angle_contribs));
}
if !dist_contribs.empty() {
ff.add_contrib(Box::new(dist_contribs));
}
}
fn add_long_range_distance_constraints(
ff: &mut ForceField,
etkdg_details: &CrystalFFDetails,
atom_pairs: &[bool],
positions: &[ForceFieldVec3],
known_distance_force_constant: f64,
mmat: &BoundsMatrix,
num_atoms: usize,
) {
let mut dist_contribs = DistanceConstraintContribs::new(ff);
for i in 1..num_atoms {
for j in 0..i {
if !atom_pairs[j * num_atoms + i] {
let mut fdist = etkdg_details.bounds_mat_force_scaling * 10.0;
let mut l = mmat.get_lower(i, j);
let mut u = mmat.get_upper(i, j);
if !etkdg_details.constrained_atoms.is_empty()
&& etkdg_details.constrained_atoms[i]
&& etkdg_details.constrained_atoms[j]
{
let d = (positions[i] - positions[j]).length();
l = d;
u = d;
l -= KNOWN_DIST_TOL;
u += KNOWN_DIST_TOL;
fdist = known_distance_force_constant;
}
dist_contribs.add_contrib(i, j, l, u, fdist);
}
}
}
if !dist_contribs.empty() {
ff.add_contrib(Box::new(dist_contribs));
}
}
fn construct_3d_forcefield(
mmat: &BoundsMatrix,
positions: &[ForceFieldVec3],
etkdg_details: &CrystalFFDetails,
) -> ForceField {
let n = mmat.num_rows();
assert_eq!(n, positions.len());
assert_eq!(
etkdg_details.exp_torsion_atoms.len(),
etkdg_details.exp_torsion_angles.len()
);
assert!(!positions.is_empty());
let mut field = ForceField::new(3);
field.positions_mut().extend_from_slice(positions);
let mut atom_pairs = vec![false; n * n];
let mut is_improper_constrained = vec![false; n];
add_experimental_torsion_terms(&mut field, etkdg_details, &mut atom_pairs, n);
add_improper_torsion_terms(
&mut field,
10.0,
&etkdg_details.improper_atoms,
&mut is_improper_constrained,
);
add_12_terms(
&mut field,
etkdg_details,
&mut atom_pairs,
positions,
KNOWN_DIST_FORCE_CONSTANT,
n,
);
add_13_terms(
&mut field,
etkdg_details,
&mut atom_pairs,
positions,
KNOWN_DIST_FORCE_CONSTANT,
&is_improper_constrained,
true,
mmat,
n,
);
add_long_range_distance_constraints(
&mut field,
etkdg_details,
&atom_pairs,
positions,
KNOWN_DIST_FORCE_CONSTANT,
mmat,
n,
);
field
}
fn construct_plain_3d_forcefield(
mmat: &BoundsMatrix,
positions: &[ForceFieldVec3],
etkdg_details: &CrystalFFDetails,
) -> ForceField {
let n = mmat.num_rows();
assert_eq!(n, positions.len());
assert_eq!(
etkdg_details.exp_torsion_atoms.len(),
etkdg_details.exp_torsion_angles.len()
);
assert!(!positions.is_empty());
let mut field = ForceField::new(3);
field.positions_mut().extend_from_slice(positions);
let mut atom_pairs = vec![false; n * n];
let is_improper_constrained = vec![false; n];
add_experimental_torsion_terms(&mut field, etkdg_details, &mut atom_pairs, n);
add_12_terms(
&mut field,
etkdg_details,
&mut atom_pairs,
positions,
KNOWN_DIST_FORCE_CONSTANT,
n,
);
add_13_terms(
&mut field,
etkdg_details,
&mut atom_pairs,
positions,
KNOWN_DIST_FORCE_CONSTANT,
&is_improper_constrained,
false,
mmat,
n,
);
add_long_range_distance_constraints(
&mut field,
etkdg_details,
&atom_pairs,
positions,
KNOWN_DIST_FORCE_CONSTANT,
mmat,
n,
);
field
}
fn construct_3d_improper_forcefield_from_parts(
mmat: &BoundsMatrix,
positions: &[ForceFieldVec3],
improper_atoms: &[Vec<i32>],
angles: &[Vec<i32>],
atom_nums: &[i32],
) -> ForceField {
let _ = atom_nums;
let n = mmat.num_rows();
assert_eq!(n, positions.len());
assert!(!positions.is_empty());
let mut field = ForceField::new(3);
field.positions_mut().extend_from_slice(positions);
let oob_force_scaling_factor = 10.0;
let mut is_improper_constrained = vec![false; n];
add_improper_torsion_terms(
&mut field,
oob_force_scaling_factor,
improper_atoms,
&mut is_improper_constrained,
);
let mut angle_contribs = AngleConstraintContribs::new(&field);
for angle in angles {
if angle[3] != 0 {
angle_contribs.add_contrib(
angle[0] as usize,
angle[1] as usize,
angle[2] as usize,
179.0,
180.0,
oob_force_scaling_factor,
);
}
}
if !angle_contribs.empty() {
field.add_contrib(Box::new(angle_contribs));
}
field
}
fn construct_3d_improper_forcefield(
mmat: &BoundsMatrix,
positions: &[ForceFieldVec3],
etkdg_details: &CrystalFFDetails,
) -> ForceField {
construct_3d_improper_forcefield_from_parts(
mmat,
positions,
&etkdg_details.improper_atoms,
&etkdg_details.angles,
&etkdg_details.atom_nums,
)
}
fn construct_3d_forcefield_with_cpci(
mmat: &BoundsMatrix,
positions: &[ForceFieldVec3],
etkdg_details: &CrystalFFDetails,
cpci: &BTreeMap<(usize, usize), f64>,
) -> ForceField {
let mut field = construct_3d_forcefield(mmat, positions, etkdg_details);
let is_1_4 = false;
let diel_model = 1;
let mut contrib = NonbondedContrib::new(&field);
for (&(idx1, idx2), &charge_term) in cpci {
contrib.add_term(idx1, idx2, None, true, charge_term, diel_model, is_1_4);
}
field.add_contrib(Box::new(contrib));
field
}
impl BoundsMatrix {
fn check_and_set_bounds(&mut self, i: usize, j: usize, lb: f64, ub: f64) {
self.check_and_set_bounds_with_mode(i, j, lb, ub, false);
}
fn check_and_set_bounds_with_mode(
&mut self,
i: usize,
j: usize,
lb: f64,
ub: f64,
set_if_better: bool,
) {
let clb = self.get_lower(i, j);
let cub = self.get_upper(i, j);
assert!(ub > lb, "upper bound not greater than lower bound");
assert!(lb > DIST12_DELTA || clb > DIST12_DELTA, "bad lower bound");
if set_if_better {
let mut nlb = clb.max(lb);
let mut nub = cub.min(ub);
if nub <= nlb {
nlb = clb.min(lb);
nub = cub.max(ub);
}
self.set_lower(i, j, nlb);
self.set_upper(i, j, nub);
} else {
if clb <= DIST12_DELTA {
self.set_lower(i, j, lb);
} else if lb < clb && lb > DIST12_DELTA {
self.set_lower(i, j, lb);
}
if cub >= MAX_UPPER {
self.set_upper(i, j, ub);
} else if ub > cub && ub < MAX_UPPER {
self.set_upper(i, j, ub);
}
}
}
fn triangle_smooth(&mut self, tol: f64) -> bool {
triangle_smooth_bounds_ptr(self, tol)
}
fn to_vec_vec(self) -> Vec<Vec<f64>> {
self.data
}
}
fn triangle_smooth_bounds_shared(bounds_mat: &mut BoundsMatrix, tol: f64) -> bool {
triangle_smooth_bounds_ptr(bounds_mat, tol)
}
fn triangle_smooth_bounds_ptr(bounds_mat: &mut BoundsMatrix, tol: f64) -> bool {
let npt = bounds_mat.num_rows();
if npt < 2 {
return true;
}
for k in 0..npt {
for i in 0..(npt - 1) {
if i == k {
continue;
}
let (ii, ik) = if i > k { (k, i) } else { (i, k) };
let uik = bounds_mat.get_val(ii, ik);
let lik = bounds_mat.get_val(ik, ii);
for j in (i + 1)..npt {
if j == k {
continue;
}
let (jj, jk) = if j > k { (k, j) } else { (j, k) };
let ukj = bounds_mat.get_val(jj, jk);
let sum_uik_ukj = uik + ukj;
if bounds_mat.get_val(i, j) > sum_uik_ukj {
bounds_mat.set_val(i, j, sum_uik_ukj);
}
let diff_lik_ujk = lik - ukj;
let diff_ljk_uik = bounds_mat.get_val(jk, jj) - uik;
if bounds_mat.get_val(j, i) < diff_lik_ujk {
bounds_mat.set_val(j, i, diff_lik_ujk);
} else if bounds_mat.get_val(j, i) < diff_ljk_uik {
bounds_mat.set_val(j, i, diff_ljk_uik);
}
let l_bound = bounds_mat.get_val(j, i);
let u_bound = bounds_mat.get_val(i, j);
let rel_gap = (l_bound - u_bound) / l_bound;
if tol > 0.0 && rel_gap > 0.0 && rel_gap < tol {
bounds_mat.set_val(i, j, l_bound);
} else if l_bound - u_bound > 0.0 {
return false;
}
}
}
}
true
}
fn init_bounds_mat_ptr(mmat: &mut BoundsMatrix, default_min: f64, default_max: f64) {
let npt = mmat.num_rows();
for i in 1..npt {
for j in 0..i {
mmat.set_upper(i, j, default_max);
mmat.set_lower(i, j, default_min);
}
}
}
fn init_bounds_mat_shared(mmat: &mut BoundsMatrix, default_min: f64, default_max: f64) {
init_bounds_mat_ptr(mmat, default_min, default_max);
}
fn atom_ring_sizes(ring_info: &crate::RingInfo, atom_idx: usize) -> Vec<usize> {
let aid = AtomId::new(atom_idx);
let mut sizes = Vec::new();
for r in ring_info.atom_rings() {
if r.contains(&aid) {
sizes.push(r.len());
}
}
sizes
}
fn is_atom_in_ring_of_size(ring_info: &crate::RingInfo, atom_idx: usize, size: usize) -> bool {
atom_ring_sizes(ring_info, atom_idx).contains(&size)
}
fn is_bond_in_ring_of_size(ring_info: &crate::RingInfo, bond_idx: usize, size: usize) -> bool {
let bid = BondId::new(bond_idx);
ring_info
.bond_rings()
.iter()
.any(|r| r.len() == size && r.contains(&bid))
}
fn set_12_bounds(
mol: &Molecule,
mmat: &mut BoundsMatrix,
accum_data: &mut ComputedData,
) -> Result<(), DgBoundsError> {
let npt = mmat.num_rows();
if npt != mol.atoms().len() {
return Err(DgBoundsError::GenerationFailed(
"Wrong size metric matrix".to_string(),
));
}
if accum_data.bond_lengths.len() < mol.bonds().len() {
return Err(DgBoundsError::GenerationFailed(
"Wrong size accumData".to_string(),
));
}
let assignment = assign_valence(mol, ValenceModel::RdkitLike).map_err(|err| {
DgBoundsError::GenerationFailed(format!(
"RDKit UFF atom typing valence assignment failed: {err}"
))
})?;
let mut atom_degree = vec![0usize; mol.atoms().len()];
for bond in mol.bonds() {
atom_degree[bond.begin().index()] += 1;
atom_degree[bond.end().index()] += 1;
}
let conjugated = compute_conjugated_bonds_for_uff(mol, &assignment, &atom_degree);
let mut atom_has_conjugated_bond = vec![false; mol.atoms().len()];
for (bond_index, bond) in mol.bonds().iter().enumerate() {
if conjugated[bond_index] {
atom_has_conjugated_bond[bond.begin().index()] = true;
atom_has_conjugated_bond[bond.end().index()] = true;
}
}
let hybridizations =
compute_hybridizations_for_uff(mol, &assignment, &atom_degree, &atom_has_conjugated_bond);
let total_valences = (0..mol.atoms().len())
.map(|atom_index| atom_total_valence_for_uff(mol, &assignment, atom_index))
.collect::<Vec<_>>();
let (atom_params, _found_all) = get_atom_types_for_uff(
mol,
&total_valences,
&hybridizations,
&atom_has_conjugated_bond,
)
.map_err(|err| {
DgBoundsError::GenerationFailed(format!("RDKit UFF atom typing failed: {err}"))
})?;
let mut squish_atoms = vec![false; mol.atoms().len()];
let ring_info = mol.derived_cache().rings.as_ref();
for (bond_index, bond) in mol.bonds().iter().enumerate() {
if conjugated[bond_index]
&& (mol.atoms()[bond.begin().index()].atomic_number() > 10
|| mol.atoms()[bond.end().index()].atomic_number() > 10)
&& ring_info.is_some_and(|ri| is_bond_in_ring_of_size(ri, bond.id().index(), 5))
{
squish_atoms[bond.begin().index()] = true;
squish_atoms[bond.end().index()] = true;
}
}
for bond in mol.bonds() {
let beg_id = bond.begin().index();
let end_id = bond.end().index();
let bond_order =
crate::chemistry::valence::bond_type_as_double(bond.order()).map_err(|err| {
DgBoundsError::GenerationFailed(format!(
"RDKit set12Bounds bond-order conversion failed: {err}"
))
})?;
if let (Some(param1), Some(param2)) = (atom_params[beg_id], atom_params[end_id]) {
if bond_order > 0.0 {
let bl = calc_bond_rest_length(bond_order, ¶m1, ¶m2);
let extra_squish = if squish_atoms[beg_id] || squish_atoms[end_id] {
0.2
} else {
0.0
};
accum_data.bond_lengths[bond.id().index()] = bl;
mmat.set_upper(beg_id, end_id, bl + extra_squish + DIST12_DELTA);
mmat.set_lower(beg_id, end_id, bl - extra_squish - DIST12_DELTA);
} else {
let bl = (vdw_radius(mol.atoms()[beg_id].atomic_number())
+ vdw_radius(mol.atoms()[end_id].atomic_number()))
/ 2.0;
accum_data.bond_lengths[bond.id().index()] = bl;
mmat.set_upper(beg_id, end_id, 1.5 * bl);
mmat.set_lower(beg_id, end_id, 0.5 * bl);
}
} else {
let bl = (vdw_radius(mol.atoms()[beg_id].atomic_number())
+ vdw_radius(mol.atoms()[end_id].atomic_number()))
/ 2.0;
accum_data.bond_lengths[bond.id().index()] = bl;
mmat.set_upper(beg_id, end_id, 1.5 * bl);
mmat.set_lower(beg_id, end_id, 0.5 * bl);
}
let pid = beg_id.min(end_id) * mol.atoms().len() + beg_id.max(end_id);
accum_data.visited12_bounds[pid] = true;
}
Ok(())
}
fn compute_13_dist(bl1: f64, bl2: f64, angle: f64) -> f64 {
let res = bl1 * bl1 + bl2 * bl2 - 2.0 * bl1 * bl2 * angle.cos();
res.sqrt()
}
fn compute_14_dist_3d(d1: f64, d2: f64, d3: f64, ang12: f64, ang23: f64, tor_ang: f64) -> f64 {
let p1x = d1 * ang12.cos();
let p1y = d1 * ang12.sin();
let p4x = d2 - d3 * ang23.cos();
let p4y = d3 * ang23.sin() * tor_ang.cos();
let p4z = d3 * ang23.sin() * tor_ang.sin();
let dx = p4x - p1x;
let dy = p4y - p1y;
let dz = p4z;
(dx * dx + dy * dy + dz * dz).sqrt()
}
fn compute_14_dist_cis(d1: f64, d2: f64, d3: f64, ang12: f64, ang23: f64) -> f64 {
let dx = d2 - d3 * ang23.cos() - d1 * ang12.cos();
let dy = d3 * ang23.sin() - d1 * ang12.sin();
(dx * dx + dy * dy).sqrt()
}
fn compute_14_dist_trans(d1: f64, d2: f64, d3: f64, ang12: f64, ang23: f64) -> f64 {
let dx = d2 - d3 * ang23.cos() - d1 * ang12.cos();
let dy = d3 * ang23.sin() + d1 * ang12.sin();
(dx * dx + dy * dy).sqrt()
}
fn set_13_bounds_helper(
aid1: usize,
aid: usize,
aid3: usize,
angle: f64,
bond_lengths: &[f64],
mmat: &mut BoundsMatrix,
mol: &Molecule,
) {
let Some(bid1) = bond_between_idx_simple(mol, aid1, aid) else {
return;
};
let Some(bid2) = bond_between_idx_simple(mol, aid, aid3) else {
return;
};
let mut dl = compute_13_dist(bond_lengths[bid1], bond_lengths[bid2], angle);
let mut dist_tol = DIST13_TOL;
if is_larger_sp2_atom_idx(mol, aid1) {
dist_tol *= 2.0;
}
if is_larger_sp2_atom_idx(mol, aid) {
dist_tol *= 2.0;
}
if is_larger_sp2_atom_idx(mol, aid3) {
dist_tol *= 2.0;
}
let du = dl + dist_tol;
dl -= dist_tol;
mmat.check_and_set_bounds(aid1, aid3, dl, du);
}
fn bond_between_idx(bond_idx: usize) -> Option<usize> {
None
}
fn is_larger_sp2_atom_idx(mol: &Molecule, idx: usize) -> bool {
let atom = &mol.atoms()[idx];
atom.atomic_number() > 13
&& atom.hybridization() == Hybridization::Sp2
&& mol
.derived_cache()
.rings
.as_ref()
.is_some_and(|rings| rings.num_atom_rings(AtomId::new(idx)) > 0)
}
#[derive(Debug, Clone, Copy)]
struct Path14Configuration {
bid1: usize,
bid2: usize,
bid3: usize,
kind: Path14Kind,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Path14Kind {
Cis,
Trans,
Other,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum DistType {
Dist12,
Dist13,
Dist14,
}
#[derive(Debug, Clone)]
struct ComputedData {
bond_lengths: Vec<f64>,
bond_adj: Vec<i32>, bond_angles: Vec<f64>, paths14: Vec<Path14Configuration>,
cis_paths: Vec<u64>,
trans_paths: Vec<u64>,
set15_atoms: Vec<bool>, visited12_bounds: Vec<bool>,
visited13_bounds: Vec<bool>,
visited14_bounds: Vec<bool>,
}
impl ComputedData {
fn new(n_atoms: usize, n_bonds: usize) -> Self {
Self {
bond_lengths: vec![0.0; n_bonds],
bond_adj: vec![-1; n_bonds * n_bonds],
bond_angles: vec![-1.0; n_bonds * n_bonds],
paths14: Vec::new(),
cis_paths: Vec::new(),
trans_paths: Vec::new(),
set15_atoms: vec![false; n_atoms * n_atoms],
visited12_bounds: vec![false; n_atoms * n_atoms],
visited13_bounds: vec![false; n_atoms * n_atoms],
visited14_bounds: vec![false; n_atoms * n_atoms],
}
}
fn bond_mat_idx(&self, n_bonds: usize, i: usize, j: usize) -> usize {
i * n_bonds + j
}
fn set_bond_adj(&mut self, n_bonds: usize, i: usize, j: usize, value: i32) {
let idx = self.bond_mat_idx(n_bonds, i, j);
let rev = self.bond_mat_idx(n_bonds, j, i);
self.bond_adj[idx] = value;
self.bond_adj[rev] = value;
}
fn get_bond_adj(&self, n_bonds: usize, i: usize, j: usize) -> i32 {
self.bond_adj[self.bond_mat_idx(n_bonds, i, j)]
}
fn set_bond_angle(&mut self, n_bonds: usize, i: usize, j: usize, value: f64) {
let idx = self.bond_mat_idx(n_bonds, i, j);
let rev = self.bond_mat_idx(n_bonds, j, i);
self.bond_angles[idx] = value;
self.bond_angles[rev] = value;
}
fn get_bond_angle(&self, n_bonds: usize, i: usize, j: usize) -> f64 {
self.bond_angles[self.bond_mat_idx(n_bonds, i, j)]
}
fn visited_bound(&self, pid: usize, dist_type: DistType) -> bool {
self.visited12_bounds[pid]
|| (matches!(dist_type, DistType::Dist13 | DistType::Dist14)
&& self.visited13_bounds[pid])
|| (matches!(dist_type, DistType::Dist14) && self.visited14_bounds[pid])
}
}
fn record_path_flag(path_store: &mut Vec<u64>, path_id: u64) {
if !path_store.contains(&path_id) {
path_store.push(path_id);
}
}
fn has_path_flag(path_store: &[u64], path_id: u64) -> bool {
path_store.contains(&path_id)
}
fn path14_id(nb: usize, bid1: usize, bid2: usize, bid3: usize) -> u64 {
bid1 as u64 * nb as u64 * nb as u64 + bid2 as u64 * nb as u64 + bid3 as u64
}
fn ring_info_for_distgeom(mol: &Molecule) -> Result<Cow<'_, crate::RingInfo>, DgBoundsError> {
if let Some(ring_info) = mol.derived_cache().rings.as_ref() {
Ok(Cow::Borrowed(ring_info))
} else {
Ok(Cow::Owned(crate::symmetrize_sssr(mol)?))
}
}
fn bond_pair_shared_atom(
mol: &Molecule,
accum_data: &ComputedData,
bid1: usize,
bid2: usize,
) -> usize {
let nb = mol.num_bonds();
let aid = accum_data.get_bond_adj(nb, bid1, bid2);
assert!(aid >= 0, "missing shared atom for bond pair");
aid as usize
}
fn get_atom_stereo(bond: &Bond, aid1: usize, aid4: usize) -> BondStereo {
let mut stype = bond.stereo();
if matches!(
stype,
BondStereo::Z | BondStereo::E | BondStereo::Cis | BondStereo::Trans
) && bond.stereo_atoms().is_some_and(|atoms| atoms.len() >= 2)
{
let stereo_atoms = bond.stereo_atoms().expect("checked stereo atoms");
let needs_flip = (stereo_atoms[0].index() != aid1) ^ (stereo_atoms[1].index() != aid4);
if needs_flip {
stype = match stype {
BondStereo::Z => BondStereo::E,
BondStereo::E => BondStereo::Z,
BondStereo::Cis => BondStereo::Trans,
BondStereo::Trans => BondStereo::Cis,
other => other,
};
}
}
stype
}
fn set_ring_angle(mol: &Molecule, aid2: usize, ring_size: usize) -> f64 {
let hyb = mol.atoms()[aid2].hybridization();
let deg_to_rad = std::f64::consts::PI / 180.0;
if (hyb == Hybridization::Sp2 && ring_size <= 8) || ring_size == 3 || ring_size == 4 {
std::f64::consts::PI * (1.0 - 2.0 / ring_size as f64)
} else if hyb == Hybridization::Sp3 {
if ring_size == 5 {
104.0 * deg_to_rad
} else {
109.5 * deg_to_rad
}
} else if hyb == Hybridization::Sp3d {
105.0 * deg_to_rad
} else if hyb == Hybridization::Sp3d2 {
90.0 * deg_to_rad
} else {
120.0 * deg_to_rad
}
}
fn set_13_bounds(
mol: &Molecule,
mmat: &mut BoundsMatrix,
accum_data: &mut ComputedData,
rinfo: &crate::RingInfo,
) {
let npt = mmat.num_rows();
assert_eq!(npt, mol.atoms().len(), "Wrong size metric matrix");
let nb = mol.bonds().len();
assert_eq!(
accum_data.bond_angles.len(),
nb * nb,
"Wrong size bond angle matrix"
);
assert_eq!(
accum_data.bond_adj.len(),
nb * nb,
"Wrong size bond adjacency matrix"
);
let mut atom_rings: Vec<Vec<usize>> = rinfo
.atom_rings()
.iter()
.map(|ring| ring.iter().map(|aid| aid.index()).collect())
.collect();
atom_rings.sort_by_key(Vec::len);
let mut visited = vec![0usize; npt];
let mut angle_taken = vec![0.0; npt];
let mut done_paths = vec![false; nb * nb];
for ring in &atom_rings {
let r_size = ring.len();
let mut aid1 = ring[r_size - 1];
for i in 0..r_size {
let aid2 = ring[i];
let aid3 = if i == r_size - 1 {
ring[0]
} else {
ring[i + 1]
};
let b1 = bond_between_idx_simple(mol, aid1, aid2).expect("no bond found");
let b2 = bond_between_idx_simple(mol, aid2, aid3).expect("no bond found");
let id1 = nb * b1 + b2;
let id2 = nb * b2 + b1;
let pid = aid1.min(aid3) * npt + aid1.max(aid3);
if !done_paths[id1] && !done_paths[id2] {
let angle = set_ring_angle(mol, aid2, r_size);
if !accum_data.visited_bound(pid, DistType::Dist12) {
set_13_bounds_helper(
aid1,
aid2,
aid3,
angle,
&accum_data.bond_lengths,
mmat,
mol,
);
accum_data.visited13_bounds[pid] = true;
}
accum_data.set_bond_angle(nb, b1, b2, angle);
accum_data.set_bond_adj(nb, b1, b2, aid2 as i32);
visited[aid2] += 1;
angle_taken[aid2] += angle;
done_paths[id1] = true;
done_paths[id2] = true;
}
aid1 = aid2;
}
}
for aid2 in 0..npt {
let atom = &mol.atoms()[aid2];
let nbrs = neighbors_for_atom(mol, aid2);
let deg = nbrs.len();
let n13 = deg * (deg.saturating_sub(1)) / 2;
if n13 == visited[aid2] {
continue;
}
let ahyb = atom.hybridization();
if visited[aid2] >= 1 {
for left in 0..nbrs.len() {
let aid1 = nbrs[left];
let bid1 = bond_between_idx_simple(mol, aid1, aid2).expect("missing bond");
for right in 0..left {
let aid3 = nbrs[right];
let bid2 = bond_between_idx_simple(mol, aid3, aid2).expect("missing bond");
if accum_data.get_bond_angle(nb, bid1, bid2) >= 0.0 {
continue;
}
let angle = if ahyb == Hybridization::Sp2 {
(2.0 * std::f64::consts::PI - angle_taken[aid2])
/ (n13 - visited[aid2]) as f64
} else if ahyb == Hybridization::Sp3 {
if rinfo.is_atom_in_ring_of_size(AtomId::new(aid2), 3) {
116.0_f64.to_radians()
} else if rinfo.is_atom_in_ring_of_size(AtomId::new(aid2), 4) {
112.0_f64.to_radians()
} else {
109.5_f64.to_radians()
}
} else if has_non_tetrahedral_stereo(atom) {
get_ideal_angle_between_ligands(aid2, aid1, aid3, mol).to_radians()
} else if deg == 5 {
105.0_f64.to_radians()
} else if deg == 6 {
135.0_f64.to_radians()
} else {
120.0_f64.to_radians()
};
let pid = aid1.min(aid3) * npt + aid1.max(aid3);
if !accum_data.visited_bound(pid, DistType::Dist12) {
set_13_bounds_helper(
aid1,
aid2,
aid3,
angle,
&accum_data.bond_lengths,
mmat,
mol,
);
accum_data.visited13_bounds[pid] = true;
}
accum_data.set_bond_angle(nb, bid1, bid2, angle);
accum_data.set_bond_adj(nb, bid1, bid2, aid2 as i32);
angle_taken[aid2] += angle;
visited[aid2] += 1;
}
}
} else {
for left in 0..nbrs.len() {
let aid1 = nbrs[left];
let bid1 = bond_between_idx_simple(mol, aid1, aid2).expect("missing bond");
for right in 0..left {
let aid3 = nbrs[right];
let bid2 = bond_between_idx_simple(mol, aid3, aid2).expect("missing bond");
let angle = if has_non_tetrahedral_stereo(atom) {
get_ideal_angle_between_ligands(aid2, aid1, aid3, mol).to_radians()
} else if ahyb == Hybridization::Sp {
std::f64::consts::PI
} else if ahyb == Hybridization::Sp2 {
2.0 * std::f64::consts::PI / 3.0
} else if ahyb == Hybridization::Sp3 {
109.5_f64.to_radians()
} else if ahyb == Hybridization::Sp3d {
105.0_f64.to_radians()
} else if ahyb == Hybridization::Sp3d2 {
135.0_f64.to_radians()
} else {
120.0_f64.to_radians()
};
let pid = aid1.min(aid3) * npt + aid1.max(aid3);
if !accum_data.visited_bound(pid, DistType::Dist12) {
if deg <= 4
|| (has_non_tetrahedral_stereo(atom)
&& atom.chiral_permutation().is_some())
{
set_13_bounds_helper(
aid1,
aid2,
aid3,
angle,
&accum_data.bond_lengths,
mmat,
mol,
);
} else {
let dmax =
accum_data.bond_lengths[bid1] + accum_data.bond_lengths[bid2];
let dl = 1.0;
let du = dmax * 1.2;
mmat.check_and_set_bounds(aid1, aid3, dl, du);
}
accum_data.visited13_bounds[pid] = true;
}
accum_data.set_bond_angle(nb, bid1, bid2, angle);
accum_data.set_bond_adj(nb, bid1, bid2, aid2 as i32);
angle_taken[aid2] += angle;
visited[aid2] += 1;
}
}
}
}
}
fn total_num_hydrogens_rdkit_like(mol: &Molecule, atom_idx: usize) -> Option<u32> {
let assignment = assign_valence(mol, ValenceModel::RdkitLike).ok()?;
Some(total_num_hydrogens_for_distgeom(
mol,
&mol.atoms()[atom_idx],
&assignment,
atom_idx,
))
}
fn record_14_path(
mol: &Molecule,
bid1: usize,
bid2: usize,
bid3: usize,
accum_data: &mut ComputedData,
) {
let atm2 = bond_pair_shared_atom(mol, accum_data, bid1, bid2);
let ahyb2 = mol.atoms()[atm2].hybridization();
let atm3 = bond_pair_shared_atom(mol, accum_data, bid2, bid3);
let ahyb3 = mol.atoms()[atm3].hybridization();
let nb = mol.num_bonds();
let kind = if ahyb2 == Hybridization::Sp2 && ahyb3 == Hybridization::Sp2 {
record_path_flag(&mut accum_data.cis_paths, path14_id(nb, bid1, bid2, bid3));
record_path_flag(&mut accum_data.cis_paths, path14_id(nb, bid3, bid2, bid1));
Path14Kind::Cis
} else {
Path14Kind::Other
};
accum_data.paths14.push(Path14Configuration {
bid1,
bid2,
bid3,
kind,
});
}
fn set_in_ring_14_bounds(
mol: &Molecule,
bid1: usize,
bid2: usize,
bid3: usize,
accum_data: &mut ComputedData,
mmat: &mut BoundsMatrix,
dmat: &[f64],
ring_size: usize,
ring_info: &crate::RingInfo,
) {
let atm2 = bond_pair_shared_atom(mol, accum_data, bid1, bid2);
let ahyb2 = mol.atoms()[atm2].hybridization();
let atm3 = bond_pair_shared_atom(mol, accum_data, bid2, bid3);
let ahyb3 = mol.atoms()[atm3].hybridization();
let bnd1 = &mol.bonds()[bid1];
let bnd3 = &mol.bonds()[bid3];
let aid1 = if bnd1.begin().index() == atm2 {
bnd1.end().index()
} else {
bnd1.begin().index()
};
let aid4 = if bnd3.begin().index() == atm3 {
bnd3.end().index()
} else {
bnd3.begin().index()
};
let pid = aid1.min(aid4) * mol.num_atoms() + aid1.max(aid4);
if accum_data.visited_bound(pid, DistType::Dist13) {
return;
}
if dmat[aid1.max(aid4) * mmat.num_rows() + aid1.min(aid4)] < 2.9 {
return;
}
let bl1 = accum_data.bond_lengths[bid1];
let bl2 = accum_data.bond_lengths[bid2];
let bl3 = accum_data.bond_lengths[bid3];
let ba12 = accum_data.get_bond_angle(mol.num_bonds(), bid1, bid2);
let ba23 = accum_data.get_bond_angle(mol.num_bonds(), bid2, bid3);
assert!(ba12 > 0.0);
assert!(ba23 > 0.0);
let stype = get_atom_stereo(&mol.bonds()[bid2], aid1, aid4);
let mut prefer_cis = false;
let mut prefer_trans = false;
if ring_size <= 8
&& ahyb2 == Hybridization::Sp2
&& ahyb3 == Hybridization::Sp2
&& !matches!(stype, BondStereo::E | BondStereo::Trans)
{
if ring_info.num_bond_rings(BondId::new(bid2)) > 1 {
if ring_info.num_bond_rings(BondId::new(bid1)) == 1
&& ring_info.num_bond_rings(BondId::new(bid3)) == 1
{
for br in ring_info.bond_rings() {
if br.contains(&BondId::new(bid1)) {
if br.contains(&BondId::new(bid3)) {
prefer_cis = true;
}
break;
}
}
}
} else {
prefer_cis = true;
}
} else if matches!(stype, BondStereo::Z | BondStereo::Cis) {
prefer_cis = true;
} else if matches!(stype, BondStereo::E | BondStereo::Trans) {
prefer_trans = true;
}
let nb = mol.num_bonds();
let kind = if prefer_cis {
record_path_flag(&mut accum_data.cis_paths, path14_id(nb, bid1, bid2, bid3));
record_path_flag(&mut accum_data.cis_paths, path14_id(nb, bid3, bid2, bid1));
Path14Kind::Cis
} else if prefer_trans {
record_path_flag(&mut accum_data.trans_paths, path14_id(nb, bid1, bid2, bid3));
record_path_flag(&mut accum_data.trans_paths, path14_id(nb, bid3, bid2, bid1));
Path14Kind::Trans
} else {
Path14Kind::Other
};
accum_data.paths14.push(Path14Configuration {
bid1,
bid2,
bid3,
kind,
});
let (mut dl, mut du) = if prefer_cis {
let dl = compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23) - GEN_DIST_TOL;
(dl, dl + 2.0 * GEN_DIST_TOL)
} else if prefer_trans {
let dl = compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23) - GEN_DIST_TOL;
(dl, dl + 2.0 * GEN_DIST_TOL)
} else {
let mut dl = compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23);
let mut du = compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23);
if du < dl {
std::mem::swap(&mut dl, &mut du);
}
if (du - dl).abs() < DIST12_DELTA {
dl -= GEN_DIST_TOL;
du += GEN_DIST_TOL;
}
(dl, du)
};
accum_data.visited14_bounds[pid] = true;
mmat.check_and_set_bounds(aid1, aid4, dl, du);
}
fn set_two_in_same_ring_14_bounds(
mol: &Molecule,
bid1: usize,
bid2: usize,
bid3: usize,
accum_data: &mut ComputedData,
mmat: &mut BoundsMatrix,
dmat: &[f64],
) {
let atm2 = bond_pair_shared_atom(mol, accum_data, bid1, bid2);
let atm3 = bond_pair_shared_atom(mol, accum_data, bid2, bid3);
let bnd1 = &mol.bonds()[bid1];
let bnd3 = &mol.bonds()[bid3];
let aid1 = if bnd1.begin().index() == atm2 {
bnd1.end().index()
} else {
bnd1.begin().index()
};
let aid4 = if bnd3.begin().index() == atm3 {
bnd3.end().index()
} else {
bnd3.begin().index()
};
let pid = aid1.min(aid4) * mol.num_atoms() + aid1.max(aid4);
if accum_data.visited_bound(pid, DistType::Dist13) {
return;
}
if dmat[aid1.max(aid4) * mmat.num_rows() + aid1.min(aid4)] < 2.9 {
return;
}
if bond_between(mol, aid1, atm3).is_some() || bond_between(mol, aid4, atm2).is_some() {
return;
}
let ahyb2 = mol.atoms()[atm2].hybridization();
let ahyb3 = mol.atoms()[atm3].hybridization();
let bl1 = accum_data.bond_lengths[bid1];
let bl2 = accum_data.bond_lengths[bid2];
let bl3 = accum_data.bond_lengths[bid3];
let ba12 = accum_data.get_bond_angle(mol.num_bonds(), bid1, bid2);
let ba23 = accum_data.get_bond_angle(mol.num_bonds(), bid2, bid3);
assert!(ba12 > 0.0);
assert!(ba23 > 0.0);
let nb = mol.num_bonds();
let (mut dl, mut du, kind) = if ahyb2 == Hybridization::Sp2 && ahyb3 == Hybridization::Sp2 {
record_path_flag(&mut accum_data.trans_paths, path14_id(nb, bid1, bid2, bid3));
record_path_flag(&mut accum_data.trans_paths, path14_id(nb, bid3, bid2, bid1));
let du = compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23);
(du - GEN_DIST_TOL, du + GEN_DIST_TOL, Path14Kind::Trans)
} else {
let mut dl = compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23);
let mut du = compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23);
if du < dl {
std::mem::swap(&mut dl, &mut du);
}
if (du - dl).abs() < DIST12_DELTA {
dl -= GEN_DIST_TOL;
du += GEN_DIST_TOL;
}
(dl, du, Path14Kind::Other)
};
mmat.check_and_set_bounds(aid1, aid4, dl, du);
accum_data.paths14.push(Path14Configuration {
bid1,
bid2,
bid3,
kind,
});
accum_data.visited14_bounds[pid] = true;
}
fn set_two_in_diff_ring_14_bounds(
mol: &Molecule,
bid1: usize,
bid2: usize,
bid3: usize,
accum_data: &mut ComputedData,
mmat: &mut BoundsMatrix,
dmat: &[f64],
ring_info: &crate::RingInfo,
) {
set_in_ring_14_bounds(mol, bid1, bid2, bid3, accum_data, mmat, dmat, 0, ring_info);
}
fn set_share_ring_bond_14_bounds(
mol: &Molecule,
bid1: usize,
bid2: usize,
bid3: usize,
accum_data: &mut ComputedData,
mmat: &mut BoundsMatrix,
dmat: &[f64],
ring_info: &crate::RingInfo,
) {
set_in_ring_14_bounds(mol, bid1, bid2, bid3, accum_data, mmat, dmat, 0, ring_info);
}
fn check_macrocycle_two_in_same_ring_amide_ester_14(
mol: &Molecule,
bnd1_idx: usize,
bnd3_idx: usize,
atm1: usize,
atm2: usize,
atm3: usize,
atm4: usize,
) -> bool {
let a1_num = mol.atoms()[atm1].atomic_number();
let a2_num = mol.atoms()[atm2].atomic_number();
let a3_num = mol.atoms()[atm3].atomic_number();
let a4_num = mol.atoms()[atm4].atomic_number();
a1_num != 1
&& a3_num == 6
&& mol.bonds()[bnd3_idx].order() == BondOrder::Double
&& (a4_num == 8 || a4_num == 7)
&& mol.bonds()[bnd1_idx].order() == BondOrder::Single
&& (a2_num == 8 || a2_num == 7)
}
fn set_macrocycle_two_in_same_ring_14_bounds(
mol: &Molecule,
bid1: usize,
bid2: usize,
bid3: usize,
accum_data: &mut ComputedData,
mmat: &mut BoundsMatrix,
dmat: &[f64],
) {
let atm2 = bond_pair_shared_atom(mol, accum_data, bid1, bid2);
let atm3 = bond_pair_shared_atom(mol, accum_data, bid2, bid3);
let bnd1 = &mol.bonds()[bid1];
let bnd3 = &mol.bonds()[bid3];
let aid1 = if bnd1.begin().index() == atm2 {
bnd1.end().index()
} else {
bnd1.begin().index()
};
let aid4 = if bnd3.begin().index() == atm3 {
bnd3.end().index()
} else {
bnd3.begin().index()
};
let atm1 = aid1;
let atm4 = aid4;
let pid = aid1.min(aid4) * mol.num_atoms() + aid1.max(aid4);
if accum_data.visited_bound(pid, DistType::Dist13) {
return;
}
if dmat[aid1.max(aid4) * mmat.num_rows() + aid1.min(aid4)] < 2.9 {
return;
}
if bond_between(mol, aid1, atm3).is_some() || bond_between(mol, aid4, atm2).is_some() {
return;
}
let bl1 = accum_data.bond_lengths[bid1];
let bl2 = accum_data.bond_lengths[bid2];
let bl3 = accum_data.bond_lengths[bid3];
let ba12 = accum_data.get_bond_angle(mol.num_bonds(), bid1, bid2);
let ba23 = accum_data.get_bond_angle(mol.num_bonds(), bid2, bid3);
assert!(ba12 > 0.0);
assert!(ba23 > 0.0);
let nb = mol.num_bonds();
let (mut dl, mut du, kind) = if check_macrocycle_two_in_same_ring_amide_ester_14(
mol, bid1, bid3, atm1, atm2, atm3, atm4,
) || check_macrocycle_two_in_same_ring_amide_ester_14(
mol, bid3, bid1, atm4, atm3, atm2, atm1,
) {
let dl = compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23);
record_path_flag(&mut accum_data.cis_paths, path14_id(nb, bid1, bid2, bid3));
record_path_flag(&mut accum_data.cis_paths, path14_id(nb, bid3, bid2, bid1));
(dl - GEN_DIST_TOL, dl + GEN_DIST_TOL, Path14Kind::Cis)
} else {
let mut dl = compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23);
let mut du = compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23);
if du < dl {
std::mem::swap(&mut dl, &mut du);
}
if (du - dl).abs() < DIST12_DELTA {
dl -= GEN_DIST_TOL;
du += GEN_DIST_TOL;
}
(dl, du, Path14Kind::Other)
};
mmat.check_and_set_bounds(aid1, aid4, dl, du);
accum_data.paths14.push(Path14Configuration {
bid1,
bid2,
bid3,
kind,
});
accum_data.visited14_bounds[pid] = true;
}
fn set_macrocycle_all_in_same_ring_14_bounds(
mol: &Molecule,
bid1: usize,
bid2: usize,
bid3: usize,
accum_data: &mut ComputedData,
mmat: &mut BoundsMatrix,
) {
let atm2 = bond_pair_shared_atom(mol, accum_data, bid1, bid2);
let atm3 = bond_pair_shared_atom(mol, accum_data, bid2, bid3);
let bnd1 = &mol.bonds()[bid1];
let bnd2 = &mol.bonds()[bid2];
let bnd3 = &mol.bonds()[bid3];
let aid1 = if bnd1.begin().index() == atm2 {
bnd1.end().index()
} else {
bnd1.begin().index()
};
let aid4 = if bnd3.begin().index() == atm3 {
bnd3.end().index()
} else {
bnd3.begin().index()
};
let pid = aid1.min(aid4) * mol.num_atoms() + aid1.max(aid4);
if accum_data.visited_bound(pid, DistType::Dist13) {
return;
}
let atm1 = aid1;
let atm4 = aid4;
let bl1 = accum_data.bond_lengths[bid1];
let bl2 = accum_data.bond_lengths[bid2];
let bl3 = accum_data.bond_lengths[bid3];
let ba12 = accum_data.get_bond_angle(mol.num_bonds(), bid1, bid2);
let ba23 = accum_data.get_bond_angle(mol.num_bonds(), bid2, bid3);
assert!(ba12 > 0.0);
assert!(ba23 > 0.0);
let mut set_the_bound = true;
let nb = mol.num_bonds();
let (mut dl, mut du, kind) = match bnd2.order() {
BondOrder::Double => {
if bnd1.order() == BondOrder::Double || bnd3.order() == BondOrder::Double {
let dl = compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23) - GEN_DIST_TOL;
record_path_flag(&mut accum_data.cis_paths, path14_id(nb, bid1, bid2, bid3));
record_path_flag(&mut accum_data.cis_paths, path14_id(nb, bid3, bid2, bid1));
(dl, dl + 2.0 * GEN_DIST_TOL, Path14Kind::Cis)
} else if matches!(
bnd2.stereo(),
BondStereo::Z
| BondStereo::E
| BondStereo::Cis
| BondStereo::Trans
| BondStereo::AtropCw
| BondStereo::AtropCcw
) {
let stype = get_atom_stereo(bnd2, aid1, aid4);
if matches!(stype, BondStereo::Z | BondStereo::Cis) {
let dl = compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23) - GEN_DIST_TOL;
record_path_flag(&mut accum_data.cis_paths, path14_id(nb, bid1, bid2, bid3));
record_path_flag(&mut accum_data.cis_paths, path14_id(nb, bid3, bid2, bid1));
(dl, dl + 2.0 * GEN_DIST_TOL, Path14Kind::Cis)
} else {
let du = compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23);
record_path_flag(&mut accum_data.trans_paths, path14_id(nb, bid1, bid2, bid3));
record_path_flag(&mut accum_data.trans_paths, path14_id(nb, bid3, bid2, bid1));
(du - GEN_DIST_TOL, du + GEN_DIST_TOL, Path14Kind::Trans)
}
} else {
let mut dl = compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23);
let mut du = compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23);
if (du - dl).abs() < DIST12_DELTA {
dl -= GEN_DIST_TOL;
du += GEN_DIST_TOL;
}
(dl, du, Path14Kind::Other)
}
}
BondOrder::Single => {
if mol.atoms()[atm2].atomic_number() == 16
&& mol.atoms()[atm3].atomic_number() == 16
&& neighbors_for_atom(mol, atm2).len() == 2
&& neighbors_for_atom(mol, atm3).len() == 2
{
let dl = compute_14_dist_3d(bl1, bl2, bl3, ba12, ba23, std::f64::consts::PI / 2.0)
- GEN_DIST_TOL;
(dl, dl + 2.0 * GEN_DIST_TOL, Path14Kind::Other)
} else if check_macrocycle_all_in_same_ring_amide_ester_14(mol, atm1, atm2, atm3, atm4)
|| check_macrocycle_all_in_same_ring_amide_ester_14(mol, atm4, atm3, atm2, atm1)
{
let dl = compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23) + 0.1;
record_path_flag(&mut accum_data.trans_paths, path14_id(nb, bid1, bid2, bid3));
record_path_flag(&mut accum_data.trans_paths, path14_id(nb, bid3, bid2, bid1));
(dl - GEN_DIST_TOL, dl + GEN_DIST_TOL, Path14Kind::Trans)
} else if check_amide_ester_15(mol, bid1, bid3, atm2, atm3)
|| check_amide_ester_15(mol, bid3, bid1, atm3, atm2)
{
let total_hs_atm2 = total_num_hydrogens_rdkit_like(mol, atm2).unwrap_or(0);
if mol.atoms()[atm2].atomic_number() == 7
&& neighbors_for_atom(mol, atm2).len() == 3
&& mol.atoms()[atm1].atomic_number() == 1
&& total_hs_atm2 == 1
{
set_the_bound = false;
(0.0, 0.0, Path14Kind::Other)
} else {
let dl = compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23);
record_path_flag(&mut accum_data.trans_paths, path14_id(nb, bid1, bid2, bid3));
record_path_flag(&mut accum_data.trans_paths, path14_id(nb, bid3, bid2, bid1));
(dl - GEN_DIST_TOL, dl + GEN_DIST_TOL, Path14Kind::Trans)
}
} else {
(
compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23),
compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23),
Path14Kind::Other,
)
}
}
_ => (
compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23),
compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23),
Path14Kind::Other,
),
};
if set_the_bound {
if (du - dl).abs() < DIST12_DELTA {
dl -= GEN_DIST_TOL;
du += GEN_DIST_TOL;
}
mmat.check_and_set_bounds(aid1, aid4, dl, du);
accum_data.paths14.push(Path14Configuration {
bid1,
bid2,
bid3,
kind,
});
accum_data.visited14_bounds[pid] = true;
}
}
fn set_chain_14_bounds(
mol: &Molecule,
bid1: usize,
bid2: usize,
bid3: usize,
accum_data: &mut ComputedData,
mmat: &mut BoundsMatrix,
force_trans_amides: bool,
) {
let atm2 = bond_pair_shared_atom(mol, accum_data, bid1, bid2);
let atm3 = bond_pair_shared_atom(mol, accum_data, bid2, bid3);
let bnd1 = &mol.bonds()[bid1];
let bnd2 = &mol.bonds()[bid2];
let bnd3 = &mol.bonds()[bid3];
let aid1 = if bnd1.begin().index() == atm2 {
bnd1.end().index()
} else {
bnd1.begin().index()
};
let aid4 = if bnd3.begin().index() == atm3 {
bnd3.end().index()
} else {
bnd3.begin().index()
};
let pid = aid1.min(aid4) * mol.num_atoms() + aid1.max(aid4);
if accum_data.visited_bound(pid, DistType::Dist13) {
return;
}
let atm1 = aid1;
let atm4 = aid4;
let bl1 = accum_data.bond_lengths[bid1];
let bl2 = accum_data.bond_lengths[bid2];
let bl3 = accum_data.bond_lengths[bid3];
let ba12 = accum_data.get_bond_angle(mol.num_bonds(), bid1, bid2);
let ba23 = accum_data.get_bond_angle(mol.num_bonds(), bid2, bid3);
assert!(ba12 > 0.0);
assert!(ba23 > 0.0);
let mut set_the_bound = true;
let nb = mol.num_bonds();
let (mut dl, mut du, kind) = match bnd2.order() {
BondOrder::Double => {
if bnd1.order() == BondOrder::Double || bnd3.order() == BondOrder::Double {
let dl = compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23) - GEN_DIST_TOL;
record_path_flag(&mut accum_data.cis_paths, path14_id(nb, bid1, bid2, bid3));
record_path_flag(&mut accum_data.cis_paths, path14_id(nb, bid3, bid2, bid1));
(dl, dl + 2.0 * GEN_DIST_TOL, Path14Kind::Cis)
} else if matches!(
bnd2.stereo(),
BondStereo::Z
| BondStereo::E
| BondStereo::Cis
| BondStereo::Trans
| BondStereo::AtropCw
| BondStereo::AtropCcw
) {
let stype = get_atom_stereo(bnd2, aid1, aid4);
if matches!(stype, BondStereo::Z | BondStereo::Cis) {
let dl = compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23) - GEN_DIST_TOL;
record_path_flag(&mut accum_data.cis_paths, path14_id(nb, bid1, bid2, bid3));
record_path_flag(&mut accum_data.cis_paths, path14_id(nb, bid3, bid2, bid1));
(dl, dl + 2.0 * GEN_DIST_TOL, Path14Kind::Cis)
} else {
let du = compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23);
record_path_flag(&mut accum_data.trans_paths, path14_id(nb, bid1, bid2, bid3));
record_path_flag(&mut accum_data.trans_paths, path14_id(nb, bid3, bid2, bid1));
(du - GEN_DIST_TOL, du + GEN_DIST_TOL, Path14Kind::Trans)
}
} else {
let mut dl = compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23);
let mut du = compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23);
if (du - dl).abs() < DIST12_DELTA {
dl -= GEN_DIST_TOL;
du += GEN_DIST_TOL;
}
(dl, du, Path14Kind::Other)
}
}
BondOrder::Single => {
if mol.atoms()[atm2].atomic_number() == 16
&& mol.atoms()[atm3].atomic_number() == 16
&& neighbors_for_atom(mol, atm2).len() == 2
&& neighbors_for_atom(mol, atm3).len() == 2
{
let dl = compute_14_dist_3d(bl1, bl2, bl3, ba12, ba23, std::f64::consts::PI / 2.0)
- GEN_DIST_TOL;
(dl, dl + 2.0 * GEN_DIST_TOL, Path14Kind::Other)
} else if check_amide_ester_14(mol, bid1, bid3, atm2, atm3, atm4)
|| check_amide_ester_14(mol, bid3, bid1, atm3, atm2, atm1)
{
if force_trans_amides {
let total_hs_atm2 = total_num_hydrogens_rdkit_like(mol, atm2).unwrap_or(0);
let total_hs_atm3 = total_num_hydrogens_rdkit_like(mol, atm3).unwrap_or(0);
let secondary_left = mol.atoms()[atm1].atomic_number() == 1
&& mol.atoms()[atm2].atomic_number() == 7
&& neighbors_for_atom(mol, atm2).len() == 3
&& total_hs_atm2 == 1;
let secondary_right = mol.atoms()[atm4].atomic_number() == 1
&& mol.atoms()[atm3].atomic_number() == 7
&& neighbors_for_atom(mol, atm3).len() == 3
&& total_hs_atm3 == 1;
if secondary_left || secondary_right {
let dl = compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23);
record_path_flag(
&mut accum_data.trans_paths,
path14_id(nb, bid1, bid2, bid3),
);
record_path_flag(
&mut accum_data.trans_paths,
path14_id(nb, bid3, bid2, bid1),
);
(dl - GEN_DIST_TOL, dl + GEN_DIST_TOL, Path14Kind::Trans)
} else {
let dl = compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23);
record_path_flag(
&mut accum_data.cis_paths,
path14_id(nb, bid1, bid2, bid3),
);
record_path_flag(
&mut accum_data.cis_paths,
path14_id(nb, bid3, bid2, bid1),
);
(dl - GEN_DIST_TOL, dl + GEN_DIST_TOL, Path14Kind::Cis)
}
} else {
(
compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23),
compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23),
Path14Kind::Other,
)
}
} else if check_amide_ester_15(mol, bid1, bid3, atm2, atm3)
|| check_amide_ester_15(mol, bid3, bid1, atm3, atm2)
{
if force_trans_amides {
let total_hs_atm2 = total_num_hydrogens_rdkit_like(mol, atm2).unwrap_or(0);
let total_hs_atm3 = total_num_hydrogens_rdkit_like(mol, atm3).unwrap_or(0);
let secondary_left = mol.atoms()[atm1].atomic_number() == 1
&& mol.atoms()[atm2].atomic_number() == 7
&& neighbors_for_atom(mol, atm2).len() == 3
&& total_hs_atm2 == 1;
let secondary_right = mol.atoms()[atm4].atomic_number() == 1
&& mol.atoms()[atm3].atomic_number() == 7
&& neighbors_for_atom(mol, atm3).len() == 3
&& total_hs_atm3 == 1;
if secondary_left || secondary_right {
let dl = compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23);
record_path_flag(
&mut accum_data.cis_paths,
path14_id(nb, bid1, bid2, bid3),
);
record_path_flag(
&mut accum_data.cis_paths,
path14_id(nb, bid3, bid2, bid1),
);
(dl - GEN_DIST_TOL, dl + GEN_DIST_TOL, Path14Kind::Cis)
} else {
let dl = compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23);
record_path_flag(
&mut accum_data.trans_paths,
path14_id(nb, bid1, bid2, bid3),
);
record_path_flag(
&mut accum_data.trans_paths,
path14_id(nb, bid3, bid2, bid1),
);
(dl - GEN_DIST_TOL, dl + GEN_DIST_TOL, Path14Kind::Trans)
}
} else {
(
compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23),
compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23),
Path14Kind::Other,
)
}
} else {
(
compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23),
compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23),
Path14Kind::Other,
)
}
}
_ => (
compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23),
compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23),
Path14Kind::Other,
),
};
if set_the_bound {
if (du - dl).abs() < DIST12_DELTA {
dl -= GEN_DIST_TOL;
du += GEN_DIST_TOL;
}
mmat.check_and_set_bounds(aid1, aid4, dl, du);
accum_data.paths14.push(Path14Configuration {
bid1,
bid2,
bid3,
kind,
});
accum_data.visited14_bounds[pid] = true;
}
}
fn check_h2_nx3_h1_ox2(mol: &Molecule, atom_idx: usize) -> bool {
let atom = &mol.atoms()[atom_idx];
let Some(total_hs) = total_num_hydrogens_rdkit_like(mol, atom_idx) else {
return false;
};
(atom.atomic_number() == 6 && total_hs == 2)
|| (atom.atomic_number() == 8 && total_hs == 0)
|| (atom.atomic_number() == 7
&& neighbors_for_atom(mol, atom_idx).len() == 3
&& total_hs == 1)
}
fn check_nh_ch_ch_nh(mol: &Molecule, atm1: usize, atm2: usize, atm3: usize, atm4: usize) -> bool {
mol.atoms()[atm1].atomic_number() != 1
&& mol.atoms()[atm4].atomic_number() != 1
&& check_h2_nx3_h1_ox2(mol, atm2)
&& check_h2_nx3_h1_ox2(mol, atm3)
}
fn check_amide_ester_14(
mol: &Molecule,
bnd1_idx: usize,
bnd3_idx: usize,
atm2: usize,
atm3: usize,
atm4: usize,
) -> bool {
let bnd1 = &mol.bonds()[bnd1_idx];
let bnd3 = &mol.bonds()[bnd3_idx];
let a2_num = mol.atoms()[atm2].atomic_number();
let a3_num = mol.atoms()[atm3].atomic_number();
let a4_num = mol.atoms()[atm4].atomic_number();
let total_hs_atm2 = total_num_hydrogens_rdkit_like(mol, atm2).unwrap_or(0);
a3_num == 6
&& bnd3.order() == BondOrder::Double
&& (a4_num == 8 || a4_num == 7)
&& bnd1.order() == BondOrder::Single
&& (a2_num == 8 || (a2_num == 7 && total_hs_atm2 == 1))
}
fn check_macrocycle_all_in_same_ring_amide_ester_14(
mol: &Molecule,
atm1: usize,
atm2: usize,
atm3: usize,
atm4: usize,
) -> bool {
let a2_num = mol.atoms()[atm2].atomic_number();
let a3_num = mol.atoms()[atm3].atomic_number();
if a3_num != 6 {
return false;
}
if a2_num != 7 && a2_num != 8 {
return false;
}
if neighbors_for_atom(mol, atm2).len() != 3 || neighbors_for_atom(mol, atm3).len() != 3 {
return false;
}
for nbr_idx in neighbors_for_atom(mol, atm2) {
if nbr_idx != atm1 && nbr_idx != atm3 {
let res = &mol.atoms()[nbr_idx];
let res_bnd = &mol.bonds()[bond_between_idx_simple(mol, atm2, nbr_idx).expect("bond")];
if (res.atomic_number() != 6 && res.atomic_number() != 1)
|| res_bnd.order() != BondOrder::Single
{
return false;
}
break;
}
}
for nbr_idx in neighbors_for_atom(mol, atm3) {
if nbr_idx != atm2 && nbr_idx != atm4 {
let res = &mol.atoms()[nbr_idx];
let res_bnd = &mol.bonds()[bond_between_idx_simple(mol, atm3, nbr_idx).expect("bond")];
if res.atomic_number() != 8 || res_bnd.order() != BondOrder::Double {
return false;
}
break;
}
}
true
}
fn is_carbonyl(mol: &Molecule, atom_idx: usize) -> bool {
let atom = &mol.atoms()[atom_idx];
if atom.atomic_number() != 6 || neighbors_for_atom(mol, atom_idx).len() <= 2 {
return false;
}
neighbors_for_atom(mol, atom_idx)
.into_iter()
.any(|nbr_idx| {
let at_num = mol.atoms()[nbr_idx].atomic_number();
(at_num == 8 || at_num == 7)
&& mol.bonds()[bond_between_idx_simple(mol, atom_idx, nbr_idx).expect("bond")]
.order()
== BondOrder::Double
})
}
fn check_amide_ester_15(
mol: &Molecule,
bnd1_idx: usize,
bnd3_idx: usize,
atm2: usize,
atm3: usize,
) -> bool {
let a2_num = mol.atoms()[atm2].atomic_number();
let total_hs_atm2 = total_num_hydrogens_rdkit_like(mol, atm2).unwrap_or(0);
((a2_num == 8) || (a2_num == 7 && total_hs_atm2 == 1))
&& mol.bonds()[bnd1_idx].order() == BondOrder::Single
&& mol.atoms()[atm3].atomic_number() == 6
&& mol.bonds()[bnd3_idx].order() == BondOrder::Single
&& is_carbonyl(mol, atm3)
}
fn compute_15_dist_cis_cis(
d1: f64,
d2: f64,
d3: f64,
d4: f64,
ang12: f64,
ang23: f64,
ang34: f64,
) -> f64 {
let dx14 = d2 - d3 * ang23.cos() - d1 * ang12.cos();
let dy14 = d3 * ang23.sin() - d1 * ang12.sin();
let d14 = (dx14 * dx14 + dy14 * dy14).sqrt();
let cval = ((d3 - d2 * ang23.cos() + d1 * (ang12 + ang23).cos()) / d14).clamp(-1.0, 1.0);
let ang143 = cval.acos();
let ang145 = ang34 - ang143;
compute_13_dist(d14, d4, ang145)
}
fn compute_15_dist_cis_trans(
d1: f64,
d2: f64,
d3: f64,
d4: f64,
ang12: f64,
ang23: f64,
ang34: f64,
) -> f64 {
let dx14 = d2 - d3 * ang23.cos() - d1 * ang12.cos();
let dy14 = d3 * ang23.sin() - d1 * ang12.sin();
let d14 = (dx14 * dx14 + dy14 * dy14).sqrt();
let cval = ((d3 - d2 * ang23.cos() + d1 * (ang12 + ang23).cos()) / d14).clamp(-1.0, 1.0);
let ang143 = cval.acos();
let ang145 = ang34 + ang143;
compute_13_dist(d14, d4, ang145)
}
fn compute_15_dist_trans_trans(
d1: f64,
d2: f64,
d3: f64,
d4: f64,
ang12: f64,
ang23: f64,
ang34: f64,
) -> f64 {
let dx14 = d2 - d3 * ang23.cos() - d1 * ang12.cos();
let dy14 = d3 * ang23.sin() + d1 * ang12.sin();
let d14 = (dx14 * dx14 + dy14 * dy14).sqrt();
let cval = ((d3 - d2 * ang23.cos() + d1 * (ang12 - ang23).cos()) / d14).clamp(-1.0, 1.0);
let ang143 = cval.acos();
let ang145 = ang34 + ang143;
compute_13_dist(d14, d4, ang145)
}
fn compute_15_dist_trans_cis(
d1: f64,
d2: f64,
d3: f64,
d4: f64,
ang12: f64,
ang23: f64,
ang34: f64,
) -> f64 {
let dx14 = d2 - d3 * ang23.cos() - d1 * ang12.cos();
let dy14 = d3 * ang23.sin() + d1 * ang12.sin();
let d14 = (dx14 * dx14 + dy14 * dy14).sqrt();
let cval = ((d3 - d2 * ang23.cos() + d1 * (ang12 - ang23).cos()) / d14).clamp(-1.0, 1.0);
let ang143 = cval.acos();
let ang145 = ang34 - ang143;
compute_13_dist(d14, d4, ang145)
}
fn set_15_bounds(
mol: &Molecule,
mmat: &mut BoundsMatrix,
accum_data: &mut ComputedData,
dmat: &[f64],
) {
let nb = mol.bonds().len();
let na = mol.atoms().len();
for path_idx in 0..accum_data.paths14.len() {
let path = accum_data.paths14[path_idx];
set_15_bounds_helper(
mol, mmat, accum_data, dmat, nb, na, path.bid1, path.bid2, path.bid3, path.kind,
);
set_15_bounds_helper(
mol, mmat, accum_data, dmat, nb, na, path.bid3, path.bid2, path.bid1, path.kind,
);
}
}
#[allow(clippy::too_many_arguments)]
fn set_15_bounds_helper(
mol: &Molecule,
mmat: &mut BoundsMatrix,
accum_data: &mut ComputedData,
dmat: &[f64],
nb: usize,
na: usize,
bid1: usize,
bid2: usize,
bid3: usize,
kind: Path14Kind,
) {
let aid2 = accum_data.get_bond_adj(nb, bid1, bid2) as usize;
let aid1 = if mol.bonds()[bid1].begin().index() == aid2 {
mol.bonds()[bid1].end().index()
} else {
mol.bonds()[bid1].begin().index()
};
let aid3 = accum_data.get_bond_adj(nb, bid2, bid3) as usize;
let aid4 = if mol.bonds()[bid3].begin().index() == aid3 {
mol.bonds()[bid3].end().index()
} else {
mol.bonds()[bid3].begin().index()
};
let d1 = accum_data.bond_lengths[bid1];
let d2 = accum_data.bond_lengths[bid2];
let d3 = accum_data.bond_lengths[bid3];
let ang12 = accum_data.get_bond_angle(nb, bid1, bid2);
let ang23 = accum_data.get_bond_angle(nb, bid2, bid3);
for i in 0..nb {
if accum_data.get_bond_adj(nb, bid3, i) != aid4 as i32 {
continue;
}
let aid5 = if mol.bonds()[i].begin().index() == aid4 {
mol.bonds()[i].end().index()
} else {
mol.bonds()[i].begin().index()
};
let pid = aid1.min(aid5) * na + aid1.max(aid5);
if accum_data.visited_bound(pid, DistType::Dist14) {
return;
}
if dmat[aid1.max(aid5) * na + aid1.min(aid5)] < 3.9 {
continue;
}
if aid1 == aid5 {
continue;
}
let pid1 = aid1 * na + aid5;
let pid2 = aid5 * na + aid1;
if !(mmat.get_lower(aid1, aid5) < DIST12_DELTA
|| accum_data.set15_atoms[pid1]
|| accum_data.set15_atoms[pid2])
{
continue;
}
let d4 = accum_data.bond_lengths[i];
let ang34 = accum_data.get_bond_angle(nb, bid3, i);
let path_id = bid2 as u64 * nb as u64 * nb as u64 + bid3 as u64 * nb as u64 + i as u64;
let (mut dl, mut du) = match kind {
Path14Kind::Cis => {
if has_path_flag(&accum_data.cis_paths, path_id) {
let base = compute_15_dist_cis_cis(d1, d2, d3, d4, ang12, ang23, ang34);
(base - DIST15_TOL, base + DIST15_TOL)
} else if has_path_flag(&accum_data.trans_paths, path_id) {
let base = compute_15_dist_cis_trans(d1, d2, d3, d4, ang12, ang23, ang34);
(base - DIST15_TOL, base + DIST15_TOL)
} else {
(
compute_15_dist_cis_cis(d1, d2, d3, d4, ang12, ang23, ang34) - DIST15_TOL,
compute_15_dist_cis_trans(d1, d2, d3, d4, ang12, ang23, ang34) + DIST15_TOL,
)
}
}
Path14Kind::Trans => {
if has_path_flag(&accum_data.cis_paths, path_id) {
let base = compute_15_dist_trans_cis(d1, d2, d3, d4, ang12, ang23, ang34);
(base - DIST15_TOL, base + DIST15_TOL)
} else if has_path_flag(&accum_data.trans_paths, path_id) {
let base = compute_15_dist_trans_trans(d1, d2, d3, d4, ang12, ang23, ang34);
(base - DIST15_TOL, base + DIST15_TOL)
} else {
(
compute_15_dist_trans_cis(d1, d2, d3, d4, ang12, ang23, ang34) - DIST15_TOL,
compute_15_dist_trans_trans(d1, d2, d3, d4, ang12, ang23, ang34)
+ DIST15_TOL,
)
}
}
Path14Kind::Other => {
if has_path_flag(&accum_data.cis_paths, path_id) {
(
compute_15_dist_cis_cis(d4, d3, d2, d1, ang34, ang23, ang12) - DIST15_TOL,
compute_15_dist_cis_trans(d4, d3, d2, d1, ang34, ang23, ang12) + DIST15_TOL,
)
} else if has_path_flag(&accum_data.trans_paths, path_id) {
(
compute_15_dist_trans_cis(d4, d3, d2, d1, ang34, ang23, ang12) - DIST15_TOL,
compute_15_dist_trans_trans(d4, d3, d2, d1, ang34, ang23, ang12)
+ DIST15_TOL,
)
} else {
let vw1 = vdw_radius(mol.atoms()[aid1].atomic_number());
let vw5 = vdw_radius(mol.atoms()[aid5].atomic_number());
(VDW_SCALE_15 * (vw1 + vw5), MAX_UPPER)
}
}
};
if du < 0.0 {
du = MAX_UPPER;
}
mmat.check_and_set_bounds(aid1, aid5, dl, du);
accum_data.set15_atoms[pid1] = true;
accum_data.set15_atoms[pid2] = true;
}
}
fn set_14_bounds(
mol: &Molecule,
mmat: &mut BoundsMatrix,
accum_data: &mut ComputedData,
dmat: &[f64],
use_macrocycle_14config: bool,
force_trans_amides: bool,
rinfo: &crate::RingInfo,
) {
let npt = mmat.num_rows();
assert_eq!(npt, mol.num_atoms(), "Wrong size metric matrix");
let max_num_bonds = (u64::MAX as f64).cbrt() as usize;
if mol.num_bonds() >= max_num_bonds {
panic!("Too many bonds in the molecule, cannot compute 1-4 bounds");
}
let bond_rings = rinfo.bond_rings();
let mut bid_is_macrocycle: HashSet<usize> = HashSet::new();
let mut ring_bond_pairs: HashSet<u64> = HashSet::new();
let mut done_paths: HashSet<u64> = HashSet::new();
let nb = mol.num_bonds() as u64;
for bring in bond_rings {
let r_size = bring.len();
if r_size < 3 {
continue;
}
let mut bid1 = bring[r_size - 1].index();
for i in 0..r_size {
let bid2 = bring[i].index();
let bid3 = bring[(i + 1) % r_size].index();
let pid1 = bid1 as u64 * nb + bid2 as u64;
let pid2 = bid2 as u64 * nb + bid1 as u64;
let id1 = bid1 as u64 * nb * nb + bid2 as u64 * nb + bid3 as u64;
let id2 = bid3 as u64 * nb * nb + bid2 as u64 * nb + bid1 as u64;
ring_bond_pairs.insert(pid1);
ring_bond_pairs.insert(pid2);
done_paths.insert(id1);
done_paths.insert(id2);
if r_size > 5 {
if use_macrocycle_14config && r_size >= MIN_MACROCYCLE_RING_SIZE {
set_macrocycle_all_in_same_ring_14_bounds(
mol, bid1, bid2, bid3, accum_data, mmat,
);
bid_is_macrocycle.insert(bid2);
} else {
set_in_ring_14_bounds(
mol, bid1, bid2, bid3, accum_data, mmat, dmat, r_size, rinfo,
);
}
} else {
record_14_path(mol, bid1, bid2, bid3, accum_data);
}
bid1 = bid2;
}
}
for bond in mol.bonds() {
let bid2 = bond.id().index();
let aid2 = bond.begin().index();
let aid3 = bond.end().index();
for nbr1 in neighbors_for_atom(mol, aid2) {
let Some(bid1) = bond_between_idx_simple(mol, aid2, nbr1) else {
continue;
};
if bid1 == bid2 {
continue;
}
for nbr3 in neighbors_for_atom(mol, aid3) {
let Some(bid3) = bond_between_idx_simple(mol, aid3, nbr3) else {
continue;
};
if bid3 == bid2 {
continue;
}
let id1 = bid1 as u64 * nb * nb + bid2 as u64 * nb + bid3 as u64;
let id2 = bid3 as u64 * nb * nb + bid2 as u64 * nb + bid1 as u64;
if done_paths.contains(&id1) || done_paths.contains(&id2) {
continue;
}
let pid1 = bid1 as u64 * nb + bid2 as u64;
let pid2 = bid2 as u64 * nb + bid1 as u64;
let pid3 = bid2 as u64 * nb + bid3 as u64;
let pid4 = bid3 as u64 * nb + bid2 as u64;
if ring_bond_pairs.contains(&pid1)
|| ring_bond_pairs.contains(&pid2)
|| ring_bond_pairs.contains(&pid3)
|| ring_bond_pairs.contains(&pid4)
{
if use_macrocycle_14config && bid_is_macrocycle.contains(&bid2) {
set_macrocycle_two_in_same_ring_14_bounds(
mol, bid1, bid2, bid3, accum_data, mmat, dmat,
);
} else {
set_two_in_same_ring_14_bounds(
mol, bid1, bid2, bid3, accum_data, mmat, dmat,
);
}
} else if (rinfo.num_bond_rings(BondId::new(bid1)) > 0
&& rinfo.num_bond_rings(BondId::new(bid2)) > 0)
|| (rinfo.num_bond_rings(BondId::new(bid2)) > 0
&& rinfo.num_bond_rings(BondId::new(bid3)) > 0)
{
set_two_in_diff_ring_14_bounds(
mol, bid1, bid2, bid3, accum_data, mmat, dmat, rinfo,
);
} else if rinfo.num_bond_rings(BondId::new(bid2)) > 0 {
set_share_ring_bond_14_bounds(
mol, bid1, bid2, bid3, accum_data, mmat, dmat, rinfo,
);
} else {
set_chain_14_bounds(
mol,
bid1,
bid2,
bid3,
accum_data,
mmat,
force_trans_amides,
);
}
}
}
}
}
fn bond_lengths_from_accum(accum_data: &ComputedData, bid: usize) -> f64 {
let bl = accum_data.bond_lengths[bid];
if bl > 0.0 { bl } else { 1.5 }
}
fn bond_between_idx_simple(mol: &Molecule, a: usize, b: usize) -> Option<usize> {
mol.bonds()
.iter()
.find(|bond| {
(bond.begin().index() == a && bond.end().index() == b)
|| (bond.begin().index() == b && bond.end().index() == a)
})
.map(|b| b.id().index())
}
fn collect_bonds_and_angles(
mol: &Molecule,
bonds: &mut Vec<(i32, i32)>,
angles: &mut Vec<Vec<i32>>,
) {
bonds.clear();
angles.clear();
bonds.reserve(mol.num_bonds());
for bondi in mol.bonds() {
bonds.push((bondi.begin().index() as i32, bondi.end().index() as i32));
for j in (bondi.id().index() + 1)..mol.num_bonds() {
let bondj = &mol.bonds()[j];
let aid11 = bondi.begin().index() as i32;
let aid12 = bondi.end().index() as i32;
let aid21 = bondj.begin().index() as i32;
let aid22 = bondj.end().index() as i32;
if aid11 != aid21 && aid11 != aid22 && aid12 != aid21 && aid12 != aid22 {
continue;
}
let mut tmp = vec![0; 4];
if aid12 == aid21 {
tmp[0] = aid11;
tmp[1] = aid12;
tmp[2] = aid22;
} else if aid12 == aid22 {
tmp[0] = aid11;
tmp[1] = aid12;
tmp[2] = aid21;
} else if aid11 == aid21 {
tmp[0] = aid12;
tmp[1] = aid11;
tmp[2] = aid22;
} else if aid11 == aid22 {
tmp[0] = aid12;
tmp[1] = aid11;
tmp[2] = aid21;
}
if bondi.order() == BondOrder::Triple || bondj.order() == BondOrder::Triple {
tmp[3] = 1;
} else if bondi.order() == BondOrder::Double
&& bondj.order() == BondOrder::Double
&& neighbors_for_atom(mol, tmp[1] as usize).len() == 2
{
tmp[3] = 1;
}
angles.push(tmp);
}
}
}
fn set_lower_bound_vdw(mol: &Molecule, mmat: &mut BoundsMatrix, _scale_vdw: bool, dmat: &[f64]) {
let n = mol.atoms().len();
let npt = mmat.num_rows();
assert_eq!(npt, n, "Wrong size metric matrix");
let mut h_in_hbond_donors = vec![false; n];
let mut hbond_acceptors = vec![false; n];
for i in 1..n {
h_in_hbond_donors[i] = is_h_in_hbond_donor(mol, i);
hbond_acceptors[i] = is_hbond_acceptor(mol.atoms()[i].atomic_number());
}
for i in 1..n {
let vw1 = vdw_radius(mol.atoms()[i].atomic_number());
for j in 0..i {
if mmat.get_lower(i, j) > DIST12_DELTA {
continue;
}
let vw2 = vdw_radius(mol.atoms()[j].atomic_number());
let td = dmat[i * npt + j];
if (h_in_hbond_donors[i] && hbond_acceptors[j])
|| (hbond_acceptors[i] && h_in_hbond_donors[j])
{
mmat.set_lower(i, j, H_BOND_LENGTH);
} else if td == 4.0 {
mmat.set_lower(i, j, VDW_SCALE_15 * (vw1 + vw2));
} else if td == 5.0 {
mmat.set_lower(
i,
j,
(VDW_SCALE_15 + 0.5 * (1.0 - VDW_SCALE_15)) * (vw1 + vw2),
);
} else {
mmat.set_lower(i, j, vw1 + vw2);
}
}
}
}
#[allow(clippy::too_many_arguments)]
fn set_topol_bounds(
mol: &Molecule,
mmat: &mut BoundsMatrix,
set15bounds: bool,
scale_vdw: bool,
use_macrocycle_14config: bool,
force_trans_amides: bool,
set14bounds: bool,
set13bounds: bool,
) -> Result<(), DgBoundsError> {
let nb = mol.num_bonds();
let na = mol.num_atoms();
if na == 0 {
return Err(DgBoundsError::GenerationFailed(
"molecule has no atoms".to_string(),
));
}
let max_num_bonds = (u64::MAX as f64).cbrt() as usize;
if nb >= max_num_bonds {
return Err(DgBoundsError::GenerationFailed(
"Too many bonds in the molecule, cannot compute 1-4 bounds".to_string(),
));
}
let mut accum_data = ComputedData::new(na, nb);
let dist_matrix = flatten_topological_distances_matrix(mol);
let ring_info = if set13bounds || set14bounds {
Some(ring_info_for_distgeom(mol)?)
} else {
None
};
set_12_bounds(mol, mmat, &mut accum_data)?;
if set13bounds {
set_13_bounds(
mol,
mmat,
&mut accum_data,
ring_info.as_deref().expect("ring info loaded"),
);
}
if set14bounds {
set_14_bounds(
mol,
mmat,
&mut accum_data,
&dist_matrix,
use_macrocycle_14config,
force_trans_amides,
ring_info.as_deref().expect("ring info loaded"),
);
}
if set15bounds {
set_15_bounds(mol, mmat, &mut accum_data, &dist_matrix);
}
set_lower_bound_vdw(mol, mmat, scale_vdw, &dist_matrix);
Ok(())
}
#[allow(clippy::too_many_arguments)]
fn set_topol_bounds_with_outputs(
mol: &Molecule,
mmat: &mut BoundsMatrix,
bonds: &mut Vec<(i32, i32)>,
angles: &mut Vec<Vec<i32>>,
set15bounds: bool,
scale_vdw: bool,
use_macrocycle_14config: bool,
force_trans_amides: bool,
set14bounds: bool,
set13bounds: bool,
) -> Result<(), DgBoundsError> {
bonds.clear();
angles.clear();
set_topol_bounds(
mol,
mmat,
set15bounds,
scale_vdw,
use_macrocycle_14config,
force_trans_amides,
set14bounds,
set13bounds,
)?;
collect_bonds_and_angles(mol, bonds, angles);
Ok(())
}
pub fn dg_bounds_matrix_with_options(
molecule: &Molecule,
set15bounds: bool,
scale_vdw: bool,
do_triangle_smoothing: bool,
use_macrocycle14config: bool,
) -> Result<Vec<Vec<f64>>, DgBoundsError> {
let n = molecule.atoms().len();
let mut mmat = BoundsMatrix::new(n);
set_topol_bounds(
molecule,
&mut mmat,
set15bounds,
scale_vdw,
use_macrocycle14config,
true,
true,
true,
)?;
if do_triangle_smoothing && !mmat.triangle_smooth(0.0) {
return Err(DgBoundsError::GenerationFailed(
"triangle smoothing found inconsistent bounds".to_string(),
));
}
Ok(mmat.to_vec_vec())
}
pub fn dg_bounds_matrix(molecule: &Molecule) -> Result<Vec<Vec<f64>>, DgBoundsError> {
dg_bounds_matrix_with_options(molecule, true, false, true, false)
}
fn molecule_fragment_mapping(mol: &Molecule) -> Vec<usize> {
let mut mapping = vec![usize::MAX; mol.num_atoms()];
let mut frag_idx = 0;
for atom_idx in 0..mol.num_atoms() {
if mapping[atom_idx] != usize::MAX {
continue;
}
let mut stack = vec![atom_idx];
mapping[atom_idx] = frag_idx;
while let Some(current) = stack.pop() {
for nbr in neighbors_for_atom(mol, current) {
if mapping[nbr] == usize::MAX {
mapping[nbr] = frag_idx;
stack.push(nbr);
}
}
}
frag_idx += 1;
}
mapping
}
fn molecule_fragments_for_embed(
mol: &Molecule,
embed_fragments_separately: bool,
) -> Result<(Vec<Molecule>, Vec<usize>), DgBoundsError> {
if !embed_fragments_separately {
return Ok((vec![mol.clone()], vec![0; mol.num_atoms()]));
}
let frag_mapping = molecule_fragment_mapping(mol);
let fragment_count = frag_mapping
.iter()
.copied()
.max()
.map_or(0, |max_frag| max_frag + 1);
if fragment_count <= 1 {
return Ok((vec![mol.clone()], vec![0; mol.num_atoms()]));
}
let mut fragments = Vec::with_capacity(fragment_count);
for frag_idx in 0..fragment_count {
let atoms_to_remove: Vec<_> = mol
.atoms()
.iter()
.filter_map(|atom| (frag_mapping[atom.id().index()] != frag_idx).then_some(atom.id()))
.collect();
let mut builder = mol.to_builder();
builder.remove_atoms_for_construction(&atoms_to_remove);
fragments.push(builder.build()?);
}
Ok((fragments, frag_mapping))
}
fn conformer_from_positions(id: usize, atom_count: usize) -> Conformer3D {
Conformer3D::new(id, vec![[0.0, 0.0, 0.0]; atom_count], true)
}
fn embedder_add_conformers(
mol: &Molecule,
new_conformers: Vec<Conformer3D>,
clear_confs: bool,
) -> Result<(Molecule, Vec<i32>), DgBoundsError> {
let mut out = mol.clone();
let coordinates = out.coordinate_block_mut();
if clear_confs {
coordinates.conformers_3d.clear();
}
let mut ids = Vec::with_capacity(new_conformers.len());
for conformer in new_conformers {
let conf_id = coordinates.conformers_3d.len();
ids.push(conf_id as i32);
coordinates.conformers_3d.push(Conformer3D::new(
conf_id,
conformer.coordinates().to_vec(),
conformer.is_3d(),
));
}
Ok((out, ids))
}
pub fn embed_multiple_confs(
mol: &Molecule,
num_confs: u32,
params: &mut EmbedParameters,
) -> Result<(Molecule, Vec<i32>), DgBoundsError> {
let end_time = (params.timeout > 0)
.then(|| Instant::now() + Duration::from_secs(u64::from(params.timeout)));
if params.track_failures {
params
.failures
.resize(EmbedFailureCause::EndOfEnum as usize, 0);
params.failures.fill(0);
}
if mol.num_atoms() == 0 {
return Err(DgBoundsError::EmptyMolecule);
}
if params.et_version < 1 || params.et_version > 2 {
return Err(DgBoundsError::UnsupportedEtVersion);
}
let mut confs: Vec<Conformer3D> = (0..num_confs as usize)
.map(|i| conformer_from_positions(i, mol.num_atoms()))
.collect();
let mut confs_ok = vec![true; num_confs as usize];
let (mol_frags, frag_mapping) =
molecule_fragments_for_embed(mol, params.embed_fragments_separately)?;
let coord_map_storage = params.coord_map.clone();
let mut coord_map = coord_map_storage.as_ref();
if mol_frags.len() > 1 && coord_map.is_some() {
coord_map = None;
}
if mol_frags.len() > 1 && params.bounds_mat.is_some() {
coord_map = None;
}
for (frag_idx, piece) in mol_frags.iter().enumerate() {
let n_atoms = piece.num_atoms();
let mut etkdg_details = CrystalFFDetails::default();
embedder_init_etkdg(piece, params, &mut etkdg_details)?;
let mut mmat;
if params.bounds_mat.is_none() || mol_frags.len() > 1 {
mmat = BoundsMatrix::new(n_atoms);
if !embedder_setup_initial_bounds_matrix(
piece,
&mut mmat,
coord_map,
params,
&mut etkdg_details,
)? {
return embedder_add_conformers(mol, Vec::new(), params.clear_confs);
}
} else {
let bounds = params.bounds_mat.as_ref().expect("checked above");
if bounds.num_rows() != n_atoms {
return Err(DgBoundsError::GenerationFailed(
"size of boundsMat provided does not match the number of atoms in the molecule."
.to_string(),
));
}
collect_bonds_and_angles(piece, &mut etkdg_details.bonds, &mut etkdg_details.angles);
mmat = (**bounds).clone();
}
let piece_storage;
let piece = if piece
.derived_cache()
.rings
.as_ref()
.is_some_and(crate::RingInfo::is_initialized)
{
piece
} else {
let mut ringed_piece = piece.clone();
let ring_info = ring_info_for_distgeom(piece)?.into_owned();
ringed_piece.derived_cache_mut().rings = Some(ring_info);
piece_storage = ringed_piece;
&piece_storage
};
let mut chiral_centers = Vec::new();
let mut tetrahedral_carbons = Vec::new();
embedder_find_chiral_sets(
piece,
&mut chiral_centers,
&mut tetrahedral_carbons,
coord_map,
);
let mut double_bond_ends = Vec::new();
let mut stereo_double_bonds = Vec::new();
embedder_find_double_bonds(
piece,
&mut double_bond_ends,
&mut stereo_double_bonds,
coord_map,
);
let four_d = params.use_random_coords || !chiral_centers.is_empty();
let num_threads = if params.num_threads <= 0 {
std::thread::available_parallelism().map_or(1, std::num::NonZeroUsize::get) as i32
} else {
params.num_threads
};
let mut helper_args = EmbedHelperArgs {
confs_ok: &mut confs_ok,
four_d,
frag_mapping: Some(&frag_mapping),
confs: &mut confs,
frag_idx,
mmat: &mmat,
chiral_centers: &chiral_centers,
tetrahedral_carbons: &tetrahedral_carbons,
double_bond_ends: &double_bond_ends,
stereo_double_bonds: &stereo_double_bonds,
etkdg_details: &etkdg_details,
};
for thread_id in 0..num_threads {
embedder_embed_helper(thread_id, num_threads, &mut helper_args, params, end_time)?;
}
if let Some(deadline) = end_time
&& Instant::now() > deadline
{
embedder_increment_failure(params, EmbedFailureCause::ExceededTimeout);
let (out, mut res) = embedder_add_conformers(mol, Vec::new(), params.clear_confs)?;
res.push(-1);
return Ok((out, res));
}
}
let self_matches = if params.prune_rms_thresh > 0.0 {
embedder_get_mol_self_matches(mol, params)?
} else {
Vec::new()
};
let mut out = mol.clone();
if params.clear_confs {
out.coordinate_block_mut().conformers_3d.clear();
}
let mut res = Vec::new();
for (ci, conf) in confs.into_iter().enumerate() {
if confs_ok[ci]
&& (params.prune_rms_thresh <= 0.0
|| embedder_is_conf_far_from_rest(
&out,
&conf,
params.prune_rms_thresh,
&self_matches,
))
{
let conf_id = out.coordinate_block_mut().conformers_3d.len();
out.coordinate_block_mut()
.conformers_3d
.push(Conformer3D::new(
conf_id,
conf.coordinates().to_vec(),
conf.is_3d(),
));
res.push(conf_id as i32);
}
}
Ok((out, res))
}
pub fn embed_multiple_confs_return_vector(
mol: &Molecule,
num_confs: u32,
params: &mut EmbedParameters,
) -> Result<Vec<i32>, DgBoundsError> {
let (_out, res) = embed_multiple_confs(mol, num_confs, params)?;
Ok(res)
}
pub fn embed_multiple_confs_result(
mol: &Molecule,
num_confs: u32,
params: &mut EmbedParameters,
) -> Result<EmbedMultipleConfsResult, DgBoundsError> {
let (molecule, conf_ids) = embed_multiple_confs(mol, num_confs, params)?;
Ok(EmbedMultipleConfsResult {
molecule,
conf_ids,
requested_num_confs: num_confs,
params: params.clone(),
})
}
pub fn embed_molecule(
mol: &Molecule,
params: &mut EmbedParameters,
) -> Result<(Molecule, i32), DgBoundsError> {
let (out, conf_ids) = embed_multiple_confs(mol, 1, params)?;
Ok((out, conf_ids.first().copied().unwrap_or(-1)))
}
pub fn embed_molecule_result(
mol: &Molecule,
params: &mut EmbedParameters,
) -> Result<EmbedMoleculeResult, DgBoundsError> {
let (molecule, conf_id) = embed_molecule(mol, params)?;
Ok(EmbedMoleculeResult {
molecule,
conf_id,
params: params.clone(),
})
}
#[allow(clippy::too_many_arguments)]
pub fn rd_distgeom_embed_molecule_wrapper(
mol: &Molecule,
max_attempts: u32,
seed: i32,
clear_confs: bool,
use_random_coords: bool,
box_size_mult: f64,
rand_neg_eig: bool,
num_zero_fail: u32,
coord_map: BTreeMap<i32, ForceFieldVec3>,
force_tol: f64,
ignore_smoothing_failures: bool,
enforce_chirality: bool,
use_exp_torsion_angle_prefs: bool,
use_basic_knowledge: bool,
print_exp_torsion_angles: bool,
use_small_ring_torsions: bool,
use_macrocycle_torsions: bool,
et_version: u32,
use_macrocycle14config: bool,
) -> Result<(Molecule, i32), DgBoundsError> {
let mut params = EmbedParameters::from_rdkit_constructor(
max_attempts,
1,
seed,
clear_confs,
use_random_coords,
box_size_mult,
rand_neg_eig,
num_zero_fail,
(!coord_map.is_empty()).then_some(coord_map),
force_tol,
ignore_smoothing_failures,
enforce_chirality,
use_exp_torsion_angle_prefs,
use_basic_knowledge,
print_exp_torsion_angles,
EmbedParameters::default().basin_thresh,
-1.0,
false,
et_version,
None,
true,
use_small_ring_torsions,
use_macrocycle_torsions,
use_macrocycle14config,
0,
None,
None,
);
embed_molecule(mol, &mut params)
}
pub fn rd_distgeom_embed_molecule_wrapper_with_params(
mol: &Molecule,
params: &mut EmbedParameters,
) -> Result<(Molecule, i32), DgBoundsError> {
embed_molecule(mol, params)
}
pub fn rd_distgeom_get_kdg() -> EmbedParameters {
EmbedParameters::kdg()
}
pub fn rd_distgeom_get_etdg() -> EmbedParameters {
EmbedParameters::etdg()
}
pub fn rd_distgeom_get_etdg_v2() -> EmbedParameters {
EmbedParameters::etdg_v2()
}
pub fn rd_distgeom_get_etkdg() -> EmbedParameters {
EmbedParameters::etkdg()
}
pub fn rd_distgeom_get_etkdg_v2() -> EmbedParameters {
EmbedParameters::etkdg_v2()
}
pub fn rd_distgeom_get_etkdg_v3() -> EmbedParameters {
EmbedParameters::etkdg_v3()
}
pub fn rd_distgeom_get_sr_etkdg_v3() -> EmbedParameters {
EmbedParameters::sr_etkdg_v3()
}
pub fn rd_distgeom_get_exp_tors_helper(
mol: &Molecule,
use_exp_torsions: bool,
use_small_ring_torsions: bool,
use_macrocycle_torsions: bool,
use_basic_knowledge: bool,
et_version: u32,
verbose: bool,
) -> Result<CrystalFFDetails, DgBoundsError> {
let mut details = CrystalFFDetails::default();
get_experimental_torsions_without_bonds(
mol,
&mut details,
use_exp_torsions,
use_small_ring_torsions,
use_macrocycle_torsions,
use_basic_knowledge,
et_version,
verbose,
)
.map_err(|err| DgBoundsError::GenerationFailed(err.to_string()))?;
Ok(details)
}
pub fn rd_distgeom_get_exp_tors_helper_with_params(
mol: &Molecule,
params: &EmbedParameters,
) -> Result<CrystalFFDetails, DgBoundsError> {
rd_distgeom_get_exp_tors_helper(
mol,
params.use_exp_torsion_angle_prefs,
params.use_small_ring_torsions,
params.use_macrocycle_torsions,
params.use_basic_knowledge,
params.et_version,
params.verbose,
)
}
pub fn rd_distgeom_embed_parameters_to_json_helper(params: &EmbedParameters) -> String {
params.to_json()
}
#[cfg(test)]
mod tests;