#[cfg(test)]
use crate::chemistry::forcefield::uff::atom_typer::get_atom_label_for_uff as source_get_atom_label_for_uff;
use crate::chemistry::forcefield::uff::atom_typer::get_atom_types_for_uff;
use crate::chemistry::forcefield::uff::params::AtomicParams;
use crate::chemistry::stereo::{get_ideal_angle_between_ligands, has_non_tetrahedral_stereo};
use crate::{
Atom, AtomId, Bond, BondId, BondOrder, BondStereo, Hybridization, Molecule, ValenceModel,
assign_valence, rdkit_valence_list,
};
use std::collections::HashSet;
const DIST12_DELTA: f64 = 0.01;
const DIST13_TOL: f64 = 0.04;
const GEN_DIST_TOL: f64 = 0.06;
const DIST15_TOL: f64 = 0.08;
const VDW_SCALE_15: f64 = 0.7;
const H_BOND_LENGTH: f64 = 1.8;
const MAX_UPPER: f64 = 1000.0;
const MIN_MACROCYCLE_RING_SIZE: usize = 9;
const DEFAULT_LOWER: f64 = 0.001;
const DEFAULT_UPPER: f64 = MAX_UPPER;
const UFF_LAMBDA: f64 = 0.1332;
#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
pub enum DgBoundsError {
#[error("distance bounds matrix generation failed: {0}")]
GenerationFailed(String),
#[error(transparent)]
UnsupportedFeature(#[from] crate::UnsupportedFeatureError),
}
fn vdw_radius(atomic_num: u8) -> f64 {
match atomic_num {
0 => 0.0, 1 => 1.2, 2 => 1.4, 3 => 2.2, 4 => 1.9, 5 => 1.8, 6 => 1.7, 7 => 1.6, 8 => 1.55, 9 => 1.5, 10 => 1.54, 11 => 2.4, 12 => 2.2, 13 => 2.1, 14 => 2.1, 15 => 1.95, 16 => 1.8, 17 => 1.8, 18 => 1.88, 19 => 2.8, 20 => 2.4, 21 => 2.3, 22 => 2.15, 23 => 2.05, 24 => 2.05, 25 => 2.05, 26 => 2.05, 27 => 2.0, 28 => 2.0, 29 => 2.0, 30 => 2.1, 31 => 2.1, 32 => 2.1, 33 => 2.05, 34 => 1.9, 35 => 1.9, 36 => 2.02, 37 => 2.9, 38 => 2.55, 39 => 2.4, 40 => 2.3, 41 => 2.15, 42 => 2.1, 43 => 2.05, 44 => 2.05, 45 => 2.0, 46 => 2.05, 47 => 2.1, 48 => 2.2, 49 => 2.2, 50 => 2.25, 51 => 2.2, 52 => 2.1, 53 => 2.1, 54 => 2.16, 55 => 3.0, 56 => 2.7, 57 => 2.5, 58 => 2.48, 59 => 2.47, 60 => 2.45, 61 => 2.43, 62 => 2.42, 63 => 2.4, 64 => 2.38, 65 => 2.37, 66 => 2.35, 67 => 2.33, 68 => 2.32, 69 => 2.3, 70 => 2.28, 71 => 2.27, 72 => 2.25, 73 => 2.2, 74 => 2.1, 75 => 2.05, 76 => 2.0, 77 => 2.0, 78 => 2.05, 79 => 2.1, 80 => 2.05, 81 => 2.2, 82 => 2.3, 83 => 2.3, 84 => 2.0, 85 => 2.0, _ => 2.0, }
}
fn is_hbond_acceptor(atomic_num: u8) -> bool {
atomic_num == 7 || atomic_num == 8
}
fn is_hbond_donor(atom: &Atom, total_num_hydrogens: u32) -> bool {
is_hbond_acceptor(atom.atomic_number()) && total_num_hydrogens > 0
}
fn is_h_in_hbond_donor(mol: &Molecule, atom_idx: usize) -> bool {
if mol.atoms()[atom_idx].atomic_number() != 1 {
return false;
}
for bond in mol.bonds() {
let other = if bond.begin().index() == atom_idx {
bond.end().index()
} else if bond.end().index() == atom_idx {
bond.begin().index()
} else {
continue;
};
let an = mol.atoms()[other].atomic_number();
if an == 7 || an == 8 {
return true;
}
}
false
}
fn rdkit_default_valence(atomic_num: u8) -> Option<i32> {
let vals = rdkit_valence_list(atomic_num).ok()??;
vals.iter().copied().find(|v| *v >= 0)
}
fn rdkit_n_outer_electrons(atomic_num: u8) -> Option<i32> {
crate::chemistry::valence::periodic_table_outer_electrons(atomic_num).ok()
}
fn bond_valence_contrib_for_atom(bond: &Bond, atom_index: usize) -> f64 {
if bond.begin().index() != atom_index && bond.end().index() != atom_index {
return 0.0;
}
match bond.order() {
BondOrder::Null | BondOrder::Zero | BondOrder::Ionic => 0.0,
BondOrder::Single => 1.0,
BondOrder::Double => 2.0,
BondOrder::Triple => 3.0,
BondOrder::Quadruple => 4.0,
BondOrder::Quintuple => 5.0,
BondOrder::Hextuple => 6.0,
BondOrder::OneAndHalf => 1.5,
BondOrder::TwoAndHalf => 2.5,
BondOrder::ThreeAndHalf => 3.5,
BondOrder::FourAndHalf => 4.5,
BondOrder::FiveAndHalf => 5.5,
BondOrder::Aromatic => 1.5,
BondOrder::Hydrogen => 0.0,
BondOrder::Dative
| BondOrder::DativeOne
| BondOrder::DativeLeft
| BondOrder::DativeRight => {
if bond.end().index() == atom_index {
1.0
} else {
0.0
}
}
BondOrder::ThreeCenter | BondOrder::Unspecified | BondOrder::Other => 0.0,
}
}
fn count_atom_electrons_rdkit(
mol: &Molecule,
assignment: &crate::ValenceAssignment,
atom_degree: &[usize],
atom_index: usize,
) -> i32 {
let atom = &mol.atoms()[atom_index];
let Some(dv) = rdkit_default_valence(atom.atomic_number()) else {
return -1;
};
if dv <= 1 {
return -1;
}
let mut degree = atom_degree[atom_index] as i32
+ atom.explicit_hydrogens() as i32
+ assignment.implicit_hydrogens[atom_index] as i32;
for bond in mol.bonds() {
if (bond.begin().index() == atom_index || bond.end().index() == atom_index)
&& bond_valence_contrib_for_atom(bond, atom_index) == 0.0
{
degree -= 1;
}
}
if degree > 3 {
return -1;
}
let Some(nouter) = rdkit_n_outer_electrons(atom.atomic_number()) else {
return -1;
};
let nlp = (nouter - dv - atom.formal_charge() as i32).max(0);
let radicals = atom.radical_electrons() as i32;
let mut res = (dv - degree) + nlp - radicals;
if res > 1 {
let n_unsaturations =
assignment.explicit_valence[atom_index] - atom_degree[atom_index] as i32;
if n_unsaturations > 1 {
res = 1;
}
}
res
}
fn is_atom_conjugation_candidate(
mol: &Molecule,
assignment: &crate::ValenceAssignment,
atom_degree: &[usize],
atom_index: usize,
) -> bool {
let atom = &mol.atoms()[atom_index];
if let Ok(Some(vals)) = rdkit_valence_list(atom.atomic_number())
&& atom.formal_charge() == 0
&& !vals.is_empty()
&& vals[0] >= 0
{
let total_valence =
assignment.explicit_valence[atom_index] + assignment.implicit_hydrogens[atom_index];
if total_valence > vals[0] {
return false;
}
}
let nouter = rdkit_n_outer_electrons(atom.atomic_number()).unwrap_or(0);
let total_degree = atom_degree[atom_index]
+ atom.explicit_hydrogens() as usize
+ assignment.implicit_hydrogens[atom_index].max(0) as usize;
((atom.atomic_number() <= 10)
|| (nouter != 5 && nouter != 6)
|| (nouter == 6 && total_degree < 2))
&& count_atom_electrons_rdkit(mol, assignment, atom_degree, atom_index) > 0
}
fn compute_conjugated_bonds_for_uff(
mol: &Molecule,
assignment: &crate::ValenceAssignment,
atom_degree: &[usize],
) -> Vec<bool> {
let mut conjugated = vec![false; mol.bonds().len()];
for (bi, bond) in mol.bonds().iter().enumerate() {
conjugated[bi] = bond.is_aromatic();
}
for atom_index in 0..mol.atoms().len() {
if !is_atom_conjugation_candidate(mol, assignment, atom_degree, atom_index) {
continue;
}
let sbo = atom_degree[atom_index]
+ mol.atoms()[atom_index].explicit_hydrogens() as usize
+ assignment.implicit_hydrogens[atom_index].max(0) as usize;
if !(2..=3).contains(&sbo) {
continue;
}
let incident_bonds: Vec<usize> = mol
.bonds()
.iter()
.enumerate()
.filter_map(|(bond_index, bond)| {
if bond.begin().index() == atom_index || bond.end().index() == atom_index {
Some(bond_index)
} else {
None
}
})
.collect();
for &bond1_idx in &incident_bonds {
let bond1 = &mol.bonds()[bond1_idx];
if bond_valence_contrib_for_atom(bond1, atom_index) < 1.5 {
continue;
}
let other1 = if bond1.begin().index() == atom_index {
bond1.end().index()
} else {
bond1.begin().index()
};
if !is_atom_conjugation_candidate(mol, assignment, atom_degree, other1) {
continue;
}
for &bond2_idx in &incident_bonds {
if bond1_idx == bond2_idx {
continue;
}
let bond2 = &mol.bonds()[bond2_idx];
let other2 = if bond2.begin().index() == atom_index {
bond2.end().index()
} else {
bond2.begin().index()
};
let sbo2 = atom_degree[other2]
+ mol.atoms()[other2].explicit_hydrogens() as usize
+ assignment.implicit_hydrogens[other2].max(0) as usize;
if sbo2 > 3 {
continue;
}
if is_atom_conjugation_candidate(mol, assignment, atom_degree, other2) {
conjugated[bond1_idx] = true;
conjugated[bond2_idx] = true;
}
}
}
}
conjugated
}
fn compute_hybridizations_for_uff(
mol: &Molecule,
assignment: &crate::ValenceAssignment,
atom_degree: &[usize],
atom_has_conjugated_bond: &[bool],
) -> Vec<Hybridization> {
let mut out = Vec::with_capacity(mol.atoms().len());
for (atom_index, atom) in mol.atoms().iter().enumerate() {
if atom.atomic_number() == 0 {
out.push(Hybridization::Unspecified);
continue;
}
let mut deg = atom_degree[atom_index] as i32
+ atom.explicit_hydrogens() as i32
+ assignment.implicit_hydrogens[atom_index];
for bond in mol.bonds() {
if (bond.begin().index() == atom_index || bond.end().index() == atom_index)
&& matches!(bond.order(), BondOrder::Dative | BondOrder::DativeOne)
&& bond.end().index() != atom_index
{
deg -= 1;
}
}
let hyb = if atom.atomic_number() <= 1 {
match deg {
0 | 1 => Hybridization::S,
2 => Hybridization::Sp,
3 => Hybridization::Sp2,
4 => Hybridization::Sp3,
5 => Hybridization::Sp3d,
6 => Hybridization::Sp3d2,
_ => Hybridization::Unspecified,
}
} else {
let nouter = rdkit_n_outer_electrons(atom.atomic_number()).unwrap_or(0);
let total_valence =
assignment.explicit_valence[atom_index] + assignment.implicit_hydrogens[atom_index];
let num_free = nouter - (total_valence + atom.formal_charge() as i32);
let norbs = if total_valence + nouter - (atom.formal_charge() as i32) < 8 {
let radicals = atom.radical_electrons() as i32;
let lone_pairs = (num_free - radicals) / 2;
deg + lone_pairs + radicals
} else {
let lone_pairs = num_free / 2;
deg + lone_pairs
};
match norbs {
0 | 1 => Hybridization::S,
2 => Hybridization::Sp,
3 => Hybridization::Sp2,
4 => {
let total_degree = atom_degree[atom_index]
+ atom.explicit_hydrogens() as usize
+ assignment.implicit_hydrogens[atom_index].max(0) as usize;
if total_degree > 3 || !atom_has_conjugated_bond[atom_index] {
Hybridization::Sp3
} else {
Hybridization::Sp2
}
}
5 => Hybridization::Sp3d,
6 => Hybridization::Sp3d2,
_ => Hybridization::Unspecified,
}
};
out.push(hyb);
}
out
}
fn atom_total_valence_for_uff(
mol: &Molecule,
assignment: &crate::ValenceAssignment,
atom_index: usize,
) -> i32 {
mol.bonds()
.iter()
.map(|bond| bond_valence_contrib_for_atom(bond, atom_index))
.sum::<f64>()
.round() as i32
+ assignment.implicit_hydrogens[atom_index]
}
fn total_num_hydrogens_for_distgeom(
mol: &Molecule,
atom: &Atom,
assignment: &crate::ValenceAssignment,
atom_index: usize,
) -> u32 {
let mut res =
atom.explicit_hydrogens() as u32 + assignment.implicit_hydrogens[atom_index].max(0) as u32;
res += neighbors_for_atom(mol, atom_index)
.into_iter()
.filter(|&nbr| mol.atoms()[nbr].atomic_number() == 1)
.count() as u32;
res
}
#[cfg(test)]
fn get_atom_label_for_uff(
mol: &Molecule,
assignment: &crate::ValenceAssignment,
hybridizations: &[Hybridization],
atom_has_conjugated_bond: &[bool],
atom_index: usize,
) -> Result<String, DgBoundsError> {
let atom = &mol.atoms()[atom_index];
let total_valence = atom_total_valence_for_uff(mol, assignment, atom_index);
source_get_atom_label_for_uff(
atom,
atom_index,
total_valence,
hybridizations[atom_index],
atom_has_conjugated_bond[atom_index],
true,
)
.map_err(|err| {
DgBoundsError::GenerationFailed(format!(
"UFF atom symbol lookup failed for atomic number {}: {err}",
atom.atomic_number()
))
})
}
fn calc_bond_rest_length(bond_order: f64, end1: &AtomicParams, end2: &AtomicParams) -> f64 {
let ri = end1.r1;
let rj = end2.r1;
let r_bo = -UFF_LAMBDA * (ri + rj) * bond_order.ln();
let xi = end1.gmp_xi;
let xj = end2.gmp_xi;
let root_delta = xi.sqrt() - xj.sqrt();
let r_en = ri * rj * root_delta * root_delta / (xi * ri + xj * rj);
ri + rj + r_bo - r_en
}
fn ideal_bond_angle(hybridization: &crate::Hybridization, ring_size: Option<usize>) -> f64 {
const DEG_TO_RAD: f64 = std::f64::consts::PI / 180.0;
if let Some(rsize) = ring_size {
match hybridization {
crate::Hybridization::Sp2 if rsize <= 8 || rsize == 3 || rsize == 4 => {
std::f64::consts::PI * (1.0 - 2.0 / rsize as f64)
}
crate::Hybridization::Sp3 if rsize == 5 => 104.0 * DEG_TO_RAD,
crate::Hybridization::Sp3 => 109.5 * DEG_TO_RAD,
crate::Hybridization::Sp3d => 105.0 * DEG_TO_RAD,
crate::Hybridization::Sp3d2 => 90.0 * DEG_TO_RAD,
_ => 120.0 * DEG_TO_RAD,
}
} else {
match hybridization {
crate::Hybridization::Sp => 180.0 * DEG_TO_RAD,
crate::Hybridization::Sp2 => 120.0 * DEG_TO_RAD,
crate::Hybridization::Sp3 => 109.5 * DEG_TO_RAD,
crate::Hybridization::Sp3d => 90.0 * DEG_TO_RAD,
crate::Hybridization::Sp3d2 => 90.0 * DEG_TO_RAD,
crate::Hybridization::Sp2d => 120.0 * DEG_TO_RAD,
_ => 120.0 * DEG_TO_RAD,
}
}
}
const LOCAL_INF_DIST: f64 = 1.0e8;
fn compute_topological_distances(mol: &Molecule) -> Vec<f64> {
let n_atoms = mol.num_atoms();
let mut last_dist = vec![LOCAL_INF_DIST; n_atoms * n_atoms];
let mut last_path = vec![0_i32; n_atoms * n_atoms];
for i in 0..n_atoms {
last_dist[i * n_atoms + i] = 0.0;
}
for bond in mol.bonds() {
let i = bond.begin().index();
let j = bond.end().index();
last_dist[i * n_atoms + j] = 1.0;
last_dist[j * n_atoms + i] = 1.0;
}
for i in 0..n_atoms {
let itab = i * n_atoms;
for j in 0..n_atoms {
if i == j || last_dist[itab + j] == LOCAL_INF_DIST {
last_path[itab + j] = -1;
} else {
last_path[itab + j] = i as i32;
}
}
}
let mut curr_dist = vec![0.0; n_atoms * n_atoms];
let mut curr_path = vec![0_i32; n_atoms * n_atoms];
for k in 0..n_atoms {
let ktab = k * n_atoms;
for i in 0..n_atoms {
let itab = i * n_atoms;
for j in 0..n_atoms {
let v1 = last_dist[itab + j];
let v2 = last_dist[itab + k] + last_dist[ktab + j];
if v1 <= v2 {
curr_dist[itab + j] = v1;
curr_path[itab + j] = last_path[itab + j];
} else {
curr_dist[itab + j] = v2;
curr_path[itab + j] = last_path[ktab + j];
}
}
}
std::mem::swap(&mut curr_dist, &mut last_dist);
std::mem::swap(&mut curr_path, &mut last_path);
}
last_dist
}
fn flatten_topological_distances_matrix(mol: &Molecule) -> Vec<f64> {
compute_topological_distances(mol)
}
fn neighbors_for_atom(mol: &Molecule, idx: usize) -> Vec<usize> {
let mut ns = Vec::new();
for bond in mol.bonds() {
if bond.begin().index() == idx {
ns.push(bond.end().index());
} else if bond.end().index() == idx {
ns.push(bond.begin().index());
}
}
ns
}
fn bond_between(mol: &Molecule, a: usize, b: usize) -> Option<&Bond> {
mol.bonds().iter().find(|bond| {
(bond.begin().index() == a && bond.end().index() == b)
|| (bond.begin().index() == b && bond.end().index() == a)
})
}
fn common_middle_atom(mol: &Molecule, a1: usize, a3: usize) -> Option<usize> {
let n1 = neighbors_for_atom(mol, a1);
let n3 = neighbors_for_atom(mol, a3);
for &n in &n1 {
if n3.contains(&n) {
return Some(n);
}
}
None
}
struct BoundsMatrix {
data: Vec<Vec<f64>>,
n: usize,
}
impl BoundsMatrix {
fn new(n: usize) -> Self {
let mut matrix = Self {
data: vec![vec![0.0; n]; n],
n,
};
init_bounds_mat_shared(&mut matrix, DEFAULT_LOWER, DEFAULT_UPPER);
matrix
}
fn get_val(&self, i: usize, j: usize) -> f64 {
self.data[i][j]
}
fn set_val(&mut self, i: usize, j: usize, value: f64) {
self.data[i][j] = value;
}
fn get_upper(&self, i: usize, j: usize) -> f64 {
if i < j {
self.get_val(i, j)
} else {
self.get_val(j, i)
}
}
fn set_upper(&mut self, i: usize, j: usize, v: f64) {
assert!(v >= 0.0, "Negative upper bound");
if i < j {
self.set_val(i, j, v);
} else {
self.set_val(j, i, v);
}
}
fn set_upper_if_better(&mut self, i: usize, j: usize, val: f64) {
if val < self.get_upper(i, j) && val > self.get_lower(i, j) {
self.set_upper(i, j, val);
}
}
fn set_lower(&mut self, i: usize, j: usize, v: f64) {
assert!(v >= 0.0, "Negative lower bound");
if i < j {
self.set_val(j, i, v);
} else {
self.set_val(i, j, v);
}
}
fn set_lower_if_better(&mut self, i: usize, j: usize, val: f64) {
if val > self.get_lower(i, j) && val < self.get_upper(i, j) {
self.set_lower(i, j, val);
}
}
fn get_lower(&self, i: usize, j: usize) -> f64 {
if i < j {
self.get_val(j, i)
} else {
self.get_val(i, j)
}
}
fn check_valid(&self) -> bool {
for i in 1..self.n {
for j in 0..i {
if self.get_upper(i, j) < self.get_lower(i, j) {
return false;
}
}
}
true
}
fn num_rows(&self) -> usize {
self.n
}
fn check_and_set_bounds(&mut self, i: usize, j: usize, lb: f64, ub: f64) {
self.check_and_set_bounds_with_mode(i, j, lb, ub, false);
}
fn check_and_set_bounds_with_mode(
&mut self,
i: usize,
j: usize,
lb: f64,
ub: f64,
set_if_better: bool,
) {
let clb = self.get_lower(i, j);
let cub = self.get_upper(i, j);
assert!(ub > lb, "upper bound not greater than lower bound");
assert!(lb > DIST12_DELTA || clb > DIST12_DELTA, "bad lower bound");
if set_if_better {
let mut nlb = clb.max(lb);
let mut nub = cub.min(ub);
if nub <= nlb {
nlb = clb.min(lb);
nub = cub.max(ub);
}
self.set_lower(i, j, nlb);
self.set_upper(i, j, nub);
} else {
if clb <= DIST12_DELTA {
self.set_lower(i, j, lb);
} else if lb < clb && lb > DIST12_DELTA {
self.set_lower(i, j, lb);
}
if cub >= MAX_UPPER {
self.set_upper(i, j, ub);
} else if ub > cub && ub < MAX_UPPER {
self.set_upper(i, j, ub);
}
}
}
fn triangle_smooth(&mut self, tol: f64) -> bool {
triangle_smooth_bounds_ptr(self, tol)
}
fn to_vec_vec(self) -> Vec<Vec<f64>> {
self.data
}
}
fn triangle_smooth_bounds_shared(bounds_mat: &mut BoundsMatrix, tol: f64) -> bool {
triangle_smooth_bounds_ptr(bounds_mat, tol)
}
fn triangle_smooth_bounds_ptr(bounds_mat: &mut BoundsMatrix, tol: f64) -> bool {
let npt = bounds_mat.num_rows();
if npt < 2 {
return true;
}
for k in 0..npt {
for i in 0..(npt - 1) {
if i == k {
continue;
}
let (ii, ik) = if i > k { (k, i) } else { (i, k) };
let uik = bounds_mat.get_val(ii, ik);
let lik = bounds_mat.get_val(ik, ii);
for j in (i + 1)..npt {
if j == k {
continue;
}
let (jj, jk) = if j > k { (k, j) } else { (j, k) };
let ukj = bounds_mat.get_val(jj, jk);
let sum_uik_ukj = uik + ukj;
if bounds_mat.get_val(i, j) > sum_uik_ukj {
bounds_mat.set_val(i, j, sum_uik_ukj);
}
let diff_lik_ujk = lik - ukj;
let diff_ljk_uik = bounds_mat.get_val(jk, jj) - uik;
if bounds_mat.get_val(j, i) < diff_lik_ujk {
bounds_mat.set_val(j, i, diff_lik_ujk);
} else if bounds_mat.get_val(j, i) < diff_ljk_uik {
bounds_mat.set_val(j, i, diff_ljk_uik);
}
let l_bound = bounds_mat.get_val(j, i);
let u_bound = bounds_mat.get_val(i, j);
let rel_gap = (l_bound - u_bound) / l_bound;
if tol > 0.0 && rel_gap > 0.0 && rel_gap < tol {
bounds_mat.set_val(i, j, l_bound);
} else if l_bound - u_bound > 0.0 {
return false;
}
}
}
}
true
}
fn init_bounds_mat_ptr(mmat: &mut BoundsMatrix, default_min: f64, default_max: f64) {
let npt = mmat.num_rows();
for i in 1..npt {
for j in 0..i {
mmat.set_upper(i, j, default_max);
mmat.set_lower(i, j, default_min);
}
}
}
fn init_bounds_mat_shared(mmat: &mut BoundsMatrix, default_min: f64, default_max: f64) {
init_bounds_mat_ptr(mmat, default_min, default_max);
}
fn atom_ring_sizes(ring_info: &crate::RingInfo, atom_idx: usize) -> Vec<usize> {
let aid = AtomId::new(atom_idx);
let mut sizes = Vec::new();
for r in ring_info.atom_rings() {
if r.contains(&aid) {
sizes.push(r.len());
}
}
sizes
}
fn is_atom_in_ring_of_size(ring_info: &crate::RingInfo, atom_idx: usize, size: usize) -> bool {
atom_ring_sizes(ring_info, atom_idx).contains(&size)
}
fn is_bond_in_ring_of_size(ring_info: &crate::RingInfo, bond_idx: usize, size: usize) -> bool {
let bid = BondId::new(bond_idx);
ring_info
.bond_rings()
.iter()
.any(|r| r.len() == size && r.contains(&bid))
}
fn set_12_bounds(
mol: &Molecule,
mmat: &mut BoundsMatrix,
accum_data: &mut ComputedData,
) -> Result<(), DgBoundsError> {
let npt = mmat.num_rows();
if npt != mol.atoms().len() {
return Err(DgBoundsError::GenerationFailed(
"Wrong size metric matrix".to_string(),
));
}
if accum_data.bond_lengths.len() < mol.bonds().len() {
return Err(DgBoundsError::GenerationFailed(
"Wrong size accumData".to_string(),
));
}
let assignment = assign_valence(mol, ValenceModel::RdkitLike).map_err(|err| {
DgBoundsError::GenerationFailed(format!(
"RDKit UFF atom typing valence assignment failed: {err}"
))
})?;
let mut atom_degree = vec![0usize; mol.atoms().len()];
for bond in mol.bonds() {
atom_degree[bond.begin().index()] += 1;
atom_degree[bond.end().index()] += 1;
}
let conjugated = compute_conjugated_bonds_for_uff(mol, &assignment, &atom_degree);
let mut atom_has_conjugated_bond = vec![false; mol.atoms().len()];
for (bond_index, bond) in mol.bonds().iter().enumerate() {
if conjugated[bond_index] {
atom_has_conjugated_bond[bond.begin().index()] = true;
atom_has_conjugated_bond[bond.end().index()] = true;
}
}
let hybridizations =
compute_hybridizations_for_uff(mol, &assignment, &atom_degree, &atom_has_conjugated_bond);
let total_valences = (0..mol.atoms().len())
.map(|atom_index| atom_total_valence_for_uff(mol, &assignment, atom_index))
.collect::<Vec<_>>();
let (atom_params, _found_all) = get_atom_types_for_uff(
mol,
&total_valences,
&hybridizations,
&atom_has_conjugated_bond,
)
.map_err(|err| {
DgBoundsError::GenerationFailed(format!("RDKit UFF atom typing failed: {err}"))
})?;
let mut squish_atoms = vec![false; mol.atoms().len()];
let ring_info = mol.derived_cache().rings.as_ref();
for (bond_index, bond) in mol.bonds().iter().enumerate() {
if conjugated[bond_index]
&& (mol.atoms()[bond.begin().index()].atomic_number() > 10
|| mol.atoms()[bond.end().index()].atomic_number() > 10)
&& ring_info.is_some_and(|ri| is_bond_in_ring_of_size(ri, bond.id().index(), 5))
{
squish_atoms[bond.begin().index()] = true;
squish_atoms[bond.end().index()] = true;
}
}
for bond in mol.bonds() {
let beg_id = bond.begin().index();
let end_id = bond.end().index();
let bond_order =
crate::chemistry::valence::bond_type_as_double(bond.order()).map_err(|err| {
DgBoundsError::GenerationFailed(format!(
"RDKit set12Bounds bond-order conversion failed: {err}"
))
})?;
if let (Some(param1), Some(param2)) = (atom_params[beg_id], atom_params[end_id]) {
if bond_order > 0.0 {
let bl = calc_bond_rest_length(bond_order, ¶m1, ¶m2);
let extra_squish = if squish_atoms[beg_id] || squish_atoms[end_id] {
0.2
} else {
0.0
};
accum_data.bond_lengths[bond.id().index()] = bl;
mmat.set_upper(beg_id, end_id, bl + extra_squish + DIST12_DELTA);
mmat.set_lower(beg_id, end_id, bl - extra_squish - DIST12_DELTA);
} else {
let bl = (vdw_radius(mol.atoms()[beg_id].atomic_number())
+ vdw_radius(mol.atoms()[end_id].atomic_number()))
/ 2.0;
accum_data.bond_lengths[bond.id().index()] = bl;
mmat.set_upper(beg_id, end_id, 1.5 * bl);
mmat.set_lower(beg_id, end_id, 0.5 * bl);
}
} else {
let bl = (vdw_radius(mol.atoms()[beg_id].atomic_number())
+ vdw_radius(mol.atoms()[end_id].atomic_number()))
/ 2.0;
accum_data.bond_lengths[bond.id().index()] = bl;
mmat.set_upper(beg_id, end_id, 1.5 * bl);
mmat.set_lower(beg_id, end_id, 0.5 * bl);
}
let pid = beg_id.min(end_id) * mol.atoms().len() + beg_id.max(end_id);
accum_data.visited12_bounds[pid] = true;
}
Ok(())
}
fn compute_13_dist(bl1: f64, bl2: f64, angle: f64) -> f64 {
let res = bl1 * bl1 + bl2 * bl2 - 2.0 * bl1 * bl2 * angle.cos();
res.sqrt()
}
fn compute_14_dist_3d(d1: f64, d2: f64, d3: f64, ang12: f64, ang23: f64, tor_ang: f64) -> f64 {
let p1x = d1 * ang12.cos();
let p1y = d1 * ang12.sin();
let p4x = d2 - d3 * ang23.cos();
let p4y = d3 * ang23.sin() * tor_ang.cos();
let p4z = d3 * ang23.sin() * tor_ang.sin();
let dx = p4x - p1x;
let dy = p4y - p1y;
let dz = p4z;
(dx * dx + dy * dy + dz * dz).sqrt()
}
fn compute_14_dist_cis(d1: f64, d2: f64, d3: f64, ang12: f64, ang23: f64) -> f64 {
let dx = d2 - d3 * ang23.cos() - d1 * ang12.cos();
let dy = d3 * ang23.sin() - d1 * ang12.sin();
(dx * dx + dy * dy).sqrt()
}
fn compute_14_dist_trans(d1: f64, d2: f64, d3: f64, ang12: f64, ang23: f64) -> f64 {
let dx = d2 - d3 * ang23.cos() - d1 * ang12.cos();
let dy = d3 * ang23.sin() + d1 * ang12.sin();
(dx * dx + dy * dy).sqrt()
}
fn set_13_bounds_helper(
aid1: usize,
aid: usize,
aid3: usize,
angle: f64,
bond_lengths: &[f64],
mmat: &mut BoundsMatrix,
mol: &Molecule,
) {
let Some(bid1) = bond_between_idx_simple(mol, aid1, aid) else {
return;
};
let Some(bid2) = bond_between_idx_simple(mol, aid, aid3) else {
return;
};
let mut dl = compute_13_dist(bond_lengths[bid1], bond_lengths[bid2], angle);
let mut dist_tol = DIST13_TOL;
if is_larger_sp2_atom_idx(mol, aid1) {
dist_tol *= 2.0;
}
if is_larger_sp2_atom_idx(mol, aid) {
dist_tol *= 2.0;
}
if is_larger_sp2_atom_idx(mol, aid3) {
dist_tol *= 2.0;
}
let du = dl + dist_tol;
dl -= dist_tol;
mmat.check_and_set_bounds(aid1, aid3, dl, du);
}
fn bond_between_idx(bond_idx: usize) -> Option<usize> {
None
}
fn is_larger_sp2_atom_idx(mol: &Molecule, idx: usize) -> bool {
let atom = &mol.atoms()[idx];
atom.atomic_number() > 13
&& atom.hybridization() == Hybridization::Sp2
&& mol
.derived_cache()
.rings
.as_ref()
.is_some_and(|rings| rings.num_atom_rings(AtomId::new(idx)) > 0)
}
#[derive(Debug, Clone, Copy)]
struct Path14Configuration {
bid1: usize,
bid2: usize,
bid3: usize,
kind: Path14Kind,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Path14Kind {
Cis,
Trans,
Other,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum DistType {
Dist12,
Dist13,
Dist14,
}
#[derive(Debug, Clone)]
struct ComputedData {
bond_lengths: Vec<f64>,
bond_adj: Vec<i32>, bond_angles: Vec<f64>, paths14: Vec<Path14Configuration>,
cis_paths: Vec<u64>,
trans_paths: Vec<u64>,
set15_atoms: Vec<bool>, visited12_bounds: Vec<bool>,
visited13_bounds: Vec<bool>,
visited14_bounds: Vec<bool>,
}
impl ComputedData {
fn new(n_atoms: usize, n_bonds: usize) -> Self {
Self {
bond_lengths: vec![0.0; n_bonds],
bond_adj: vec![-1; n_bonds * n_bonds],
bond_angles: vec![-1.0; n_bonds * n_bonds],
paths14: Vec::new(),
cis_paths: Vec::new(),
trans_paths: Vec::new(),
set15_atoms: vec![false; n_atoms * n_atoms],
visited12_bounds: vec![false; n_atoms * n_atoms],
visited13_bounds: vec![false; n_atoms * n_atoms],
visited14_bounds: vec![false; n_atoms * n_atoms],
}
}
fn bond_mat_idx(&self, n_bonds: usize, i: usize, j: usize) -> usize {
i * n_bonds + j
}
fn set_bond_adj(&mut self, n_bonds: usize, i: usize, j: usize, value: i32) {
let idx = self.bond_mat_idx(n_bonds, i, j);
let rev = self.bond_mat_idx(n_bonds, j, i);
self.bond_adj[idx] = value;
self.bond_adj[rev] = value;
}
fn get_bond_adj(&self, n_bonds: usize, i: usize, j: usize) -> i32 {
self.bond_adj[self.bond_mat_idx(n_bonds, i, j)]
}
fn set_bond_angle(&mut self, n_bonds: usize, i: usize, j: usize, value: f64) {
let idx = self.bond_mat_idx(n_bonds, i, j);
let rev = self.bond_mat_idx(n_bonds, j, i);
self.bond_angles[idx] = value;
self.bond_angles[rev] = value;
}
fn get_bond_angle(&self, n_bonds: usize, i: usize, j: usize) -> f64 {
self.bond_angles[self.bond_mat_idx(n_bonds, i, j)]
}
fn visited_bound(&self, pid: usize, dist_type: DistType) -> bool {
self.visited12_bounds[pid]
|| (matches!(dist_type, DistType::Dist13 | DistType::Dist14)
&& self.visited13_bounds[pid])
|| (matches!(dist_type, DistType::Dist14) && self.visited14_bounds[pid])
}
}
fn record_path_flag(path_store: &mut Vec<u64>, path_id: u64) {
if !path_store.contains(&path_id) {
path_store.push(path_id);
}
}
fn has_path_flag(path_store: &[u64], path_id: u64) -> bool {
path_store.contains(&path_id)
}
fn path14_id(nb: usize, bid1: usize, bid2: usize, bid3: usize) -> u64 {
bid1 as u64 * nb as u64 * nb as u64 + bid2 as u64 * nb as u64 + bid3 as u64
}
fn ring_info_for_distgeom(mol: &Molecule) -> &crate::RingInfo {
mol.derived_cache()
.rings
.as_ref()
.expect("distgeom requires ring info")
}
fn bond_pair_shared_atom(
mol: &Molecule,
accum_data: &ComputedData,
bid1: usize,
bid2: usize,
) -> usize {
let nb = mol.num_bonds();
let aid = accum_data.get_bond_adj(nb, bid1, bid2);
assert!(aid >= 0, "missing shared atom for bond pair");
aid as usize
}
fn get_atom_stereo(bond: &Bond, aid1: usize, aid4: usize) -> BondStereo {
let mut stype = bond.stereo();
if matches!(
stype,
BondStereo::Z | BondStereo::E | BondStereo::Cis | BondStereo::Trans
) && bond.stereo_atoms().is_some_and(|atoms| atoms.len() >= 2)
{
let stereo_atoms = bond.stereo_atoms().expect("checked stereo atoms");
let needs_flip = (stereo_atoms[0].index() != aid1) ^ (stereo_atoms[1].index() != aid4);
if needs_flip {
stype = match stype {
BondStereo::Z => BondStereo::E,
BondStereo::E => BondStereo::Z,
BondStereo::Cis => BondStereo::Trans,
BondStereo::Trans => BondStereo::Cis,
other => other,
};
}
}
stype
}
fn set_ring_angle(mol: &Molecule, aid2: usize, ring_size: usize) -> f64 {
let hyb = mol.atoms()[aid2].hybridization();
let deg_to_rad = std::f64::consts::PI / 180.0;
if (hyb == Hybridization::Sp2 && ring_size <= 8) || ring_size == 3 || ring_size == 4 {
std::f64::consts::PI * (1.0 - 2.0 / ring_size as f64)
} else if hyb == Hybridization::Sp3 {
if ring_size == 5 {
104.0 * deg_to_rad
} else {
109.5 * deg_to_rad
}
} else if hyb == Hybridization::Sp3d {
105.0 * deg_to_rad
} else if hyb == Hybridization::Sp3d2 {
90.0 * deg_to_rad
} else {
120.0 * deg_to_rad
}
}
fn set_13_bounds(mol: &Molecule, mmat: &mut BoundsMatrix, accum_data: &mut ComputedData) {
let npt = mmat.num_rows();
assert_eq!(npt, mol.atoms().len(), "Wrong size metric matrix");
let nb = mol.bonds().len();
assert_eq!(
accum_data.bond_angles.len(),
nb * nb,
"Wrong size bond angle matrix"
);
assert_eq!(
accum_data.bond_adj.len(),
nb * nb,
"Wrong size bond adjacency matrix"
);
let rinfo = mol
.derived_cache()
.rings
.as_ref()
.expect("set13Bounds requires ring info");
let mut atom_rings: Vec<Vec<usize>> = rinfo
.atom_rings()
.iter()
.map(|ring| ring.iter().map(|aid| aid.index()).collect())
.collect();
atom_rings.sort_by_key(Vec::len);
let mut visited = vec![0usize; npt];
let mut angle_taken = vec![0.0; npt];
let mut done_paths = vec![false; nb * nb];
for ring in &atom_rings {
let r_size = ring.len();
let mut aid1 = ring[r_size - 1];
for i in 0..r_size {
let aid2 = ring[i];
let aid3 = if i == r_size - 1 {
ring[0]
} else {
ring[i + 1]
};
let b1 = bond_between_idx_simple(mol, aid1, aid2).expect("no bond found");
let b2 = bond_between_idx_simple(mol, aid2, aid3).expect("no bond found");
let id1 = nb * b1 + b2;
let id2 = nb * b2 + b1;
let pid = aid1.min(aid3) * npt + aid1.max(aid3);
if !done_paths[id1] && !done_paths[id2] {
let angle = set_ring_angle(mol, aid2, r_size);
if !accum_data.visited_bound(pid, DistType::Dist12) {
set_13_bounds_helper(
aid1,
aid2,
aid3,
angle,
&accum_data.bond_lengths,
mmat,
mol,
);
accum_data.visited13_bounds[pid] = true;
}
accum_data.set_bond_angle(nb, b1, b2, angle);
accum_data.set_bond_adj(nb, b1, b2, aid2 as i32);
visited[aid2] += 1;
angle_taken[aid2] += angle;
done_paths[id1] = true;
done_paths[id2] = true;
}
aid1 = aid2;
}
}
for aid2 in 0..npt {
let atom = &mol.atoms()[aid2];
let nbrs = neighbors_for_atom(mol, aid2);
let deg = nbrs.len();
let n13 = deg * (deg.saturating_sub(1)) / 2;
if n13 == visited[aid2] {
continue;
}
let ahyb = atom.hybridization();
if visited[aid2] >= 1 {
for left in 0..nbrs.len() {
let aid1 = nbrs[left];
let bid1 = bond_between_idx_simple(mol, aid1, aid2).expect("missing bond");
for right in 0..left {
let aid3 = nbrs[right];
let bid2 = bond_between_idx_simple(mol, aid3, aid2).expect("missing bond");
if accum_data.get_bond_angle(nb, bid1, bid2) >= 0.0 {
continue;
}
let angle = if ahyb == Hybridization::Sp2 {
(2.0 * std::f64::consts::PI - angle_taken[aid2])
/ (n13 - visited[aid2]) as f64
} else if ahyb == Hybridization::Sp3 {
if rinfo.is_atom_in_ring_of_size(AtomId::new(aid2), 3) {
116.0_f64.to_radians()
} else if rinfo.is_atom_in_ring_of_size(AtomId::new(aid2), 4) {
112.0_f64.to_radians()
} else {
109.5_f64.to_radians()
}
} else if has_non_tetrahedral_stereo(atom) {
get_ideal_angle_between_ligands(aid2, aid1, aid3, mol).to_radians()
} else if deg == 5 {
105.0_f64.to_radians()
} else if deg == 6 {
135.0_f64.to_radians()
} else {
120.0_f64.to_radians()
};
let pid = aid1.min(aid3) * npt + aid1.max(aid3);
if !accum_data.visited_bound(pid, DistType::Dist12) {
set_13_bounds_helper(
aid1,
aid2,
aid3,
angle,
&accum_data.bond_lengths,
mmat,
mol,
);
accum_data.visited13_bounds[pid] = true;
}
accum_data.set_bond_angle(nb, bid1, bid2, angle);
accum_data.set_bond_adj(nb, bid1, bid2, aid2 as i32);
angle_taken[aid2] += angle;
visited[aid2] += 1;
}
}
} else {
for left in 0..nbrs.len() {
let aid1 = nbrs[left];
let bid1 = bond_between_idx_simple(mol, aid1, aid2).expect("missing bond");
for right in 0..left {
let aid3 = nbrs[right];
let bid2 = bond_between_idx_simple(mol, aid3, aid2).expect("missing bond");
let angle = if has_non_tetrahedral_stereo(atom) {
get_ideal_angle_between_ligands(aid2, aid1, aid3, mol).to_radians()
} else if ahyb == Hybridization::Sp {
std::f64::consts::PI
} else if ahyb == Hybridization::Sp2 {
2.0 * std::f64::consts::PI / 3.0
} else if ahyb == Hybridization::Sp3 {
109.5_f64.to_radians()
} else if ahyb == Hybridization::Sp3d {
105.0_f64.to_radians()
} else if ahyb == Hybridization::Sp3d2 {
135.0_f64.to_radians()
} else {
120.0_f64.to_radians()
};
let pid = aid1.min(aid3) * npt + aid1.max(aid3);
if !accum_data.visited_bound(pid, DistType::Dist12) {
if deg <= 4
|| (has_non_tetrahedral_stereo(atom)
&& atom.chiral_permutation().is_some())
{
set_13_bounds_helper(
aid1,
aid2,
aid3,
angle,
&accum_data.bond_lengths,
mmat,
mol,
);
} else {
let dmax =
accum_data.bond_lengths[bid1] + accum_data.bond_lengths[bid2];
let dl = 1.0;
let du = dmax * 1.2;
mmat.check_and_set_bounds(aid1, aid3, dl, du);
}
accum_data.visited13_bounds[pid] = true;
}
accum_data.set_bond_angle(nb, bid1, bid2, angle);
accum_data.set_bond_adj(nb, bid1, bid2, aid2 as i32);
angle_taken[aid2] += angle;
visited[aid2] += 1;
}
}
}
}
}
fn total_num_hydrogens_rdkit_like(mol: &Molecule, atom_idx: usize) -> Option<u32> {
let assignment = assign_valence(mol, ValenceModel::RdkitLike).ok()?;
Some(total_num_hydrogens_for_distgeom(
mol,
&mol.atoms()[atom_idx],
&assignment,
atom_idx,
))
}
fn record_14_path(
mol: &Molecule,
bid1: usize,
bid2: usize,
bid3: usize,
accum_data: &mut ComputedData,
) {
let atm2 = bond_pair_shared_atom(mol, accum_data, bid1, bid2);
let ahyb2 = mol.atoms()[atm2].hybridization();
let atm3 = bond_pair_shared_atom(mol, accum_data, bid2, bid3);
let ahyb3 = mol.atoms()[atm3].hybridization();
let nb = mol.num_bonds();
let kind = if ahyb2 == Hybridization::Sp2 && ahyb3 == Hybridization::Sp2 {
record_path_flag(&mut accum_data.cis_paths, path14_id(nb, bid1, bid2, bid3));
record_path_flag(&mut accum_data.cis_paths, path14_id(nb, bid3, bid2, bid1));
Path14Kind::Cis
} else {
Path14Kind::Other
};
accum_data.paths14.push(Path14Configuration {
bid1,
bid2,
bid3,
kind,
});
}
fn set_in_ring_14_bounds(
mol: &Molecule,
bid1: usize,
bid2: usize,
bid3: usize,
accum_data: &mut ComputedData,
mmat: &mut BoundsMatrix,
dmat: &[f64],
ring_size: usize,
) {
let atm2 = bond_pair_shared_atom(mol, accum_data, bid1, bid2);
let ahyb2 = mol.atoms()[atm2].hybridization();
let atm3 = bond_pair_shared_atom(mol, accum_data, bid2, bid3);
let ahyb3 = mol.atoms()[atm3].hybridization();
let bnd1 = &mol.bonds()[bid1];
let bnd3 = &mol.bonds()[bid3];
let aid1 = if bnd1.begin().index() == atm2 {
bnd1.end().index()
} else {
bnd1.begin().index()
};
let aid4 = if bnd3.begin().index() == atm3 {
bnd3.end().index()
} else {
bnd3.begin().index()
};
let pid = aid1.min(aid4) * mol.num_atoms() + aid1.max(aid4);
if accum_data.visited_bound(pid, DistType::Dist13) {
return;
}
if dmat[aid1.max(aid4) * mmat.num_rows() + aid1.min(aid4)] < 2.9 {
return;
}
let bl1 = accum_data.bond_lengths[bid1];
let bl2 = accum_data.bond_lengths[bid2];
let bl3 = accum_data.bond_lengths[bid3];
let ba12 = accum_data.get_bond_angle(mol.num_bonds(), bid1, bid2);
let ba23 = accum_data.get_bond_angle(mol.num_bonds(), bid2, bid3);
assert!(ba12 > 0.0);
assert!(ba23 > 0.0);
let stype = get_atom_stereo(&mol.bonds()[bid2], aid1, aid4);
let ring_info = ring_info_for_distgeom(mol);
let mut prefer_cis = false;
let mut prefer_trans = false;
if ring_size <= 8
&& ahyb2 == Hybridization::Sp2
&& ahyb3 == Hybridization::Sp2
&& !matches!(stype, BondStereo::E | BondStereo::Trans)
{
if ring_info.num_bond_rings(BondId::new(bid2)) > 1 {
if ring_info.num_bond_rings(BondId::new(bid1)) == 1
&& ring_info.num_bond_rings(BondId::new(bid3)) == 1
{
for br in ring_info.bond_rings() {
if br.contains(&BondId::new(bid1)) {
if br.contains(&BondId::new(bid3)) {
prefer_cis = true;
}
break;
}
}
}
} else {
prefer_cis = true;
}
} else if matches!(stype, BondStereo::Z | BondStereo::Cis) {
prefer_cis = true;
} else if matches!(stype, BondStereo::E | BondStereo::Trans) {
prefer_trans = true;
}
let nb = mol.num_bonds();
let kind = if prefer_cis {
record_path_flag(&mut accum_data.cis_paths, path14_id(nb, bid1, bid2, bid3));
record_path_flag(&mut accum_data.cis_paths, path14_id(nb, bid3, bid2, bid1));
Path14Kind::Cis
} else if prefer_trans {
record_path_flag(&mut accum_data.trans_paths, path14_id(nb, bid1, bid2, bid3));
record_path_flag(&mut accum_data.trans_paths, path14_id(nb, bid3, bid2, bid1));
Path14Kind::Trans
} else {
Path14Kind::Other
};
accum_data.paths14.push(Path14Configuration {
bid1,
bid2,
bid3,
kind,
});
let (mut dl, mut du) = if prefer_cis {
let dl = compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23) - GEN_DIST_TOL;
(dl, dl + 2.0 * GEN_DIST_TOL)
} else if prefer_trans {
let dl = compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23) - GEN_DIST_TOL;
(dl, dl + 2.0 * GEN_DIST_TOL)
} else {
let mut dl = compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23);
let mut du = compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23);
if du < dl {
std::mem::swap(&mut dl, &mut du);
}
if (du - dl).abs() < DIST12_DELTA {
dl -= GEN_DIST_TOL;
du += GEN_DIST_TOL;
}
(dl, du)
};
accum_data.visited14_bounds[pid] = true;
mmat.check_and_set_bounds(aid1, aid4, dl, du);
}
fn set_two_in_same_ring_14_bounds(
mol: &Molecule,
bid1: usize,
bid2: usize,
bid3: usize,
accum_data: &mut ComputedData,
mmat: &mut BoundsMatrix,
dmat: &[f64],
) {
let atm2 = bond_pair_shared_atom(mol, accum_data, bid1, bid2);
let atm3 = bond_pair_shared_atom(mol, accum_data, bid2, bid3);
let bnd1 = &mol.bonds()[bid1];
let bnd3 = &mol.bonds()[bid3];
let aid1 = if bnd1.begin().index() == atm2 {
bnd1.end().index()
} else {
bnd1.begin().index()
};
let aid4 = if bnd3.begin().index() == atm3 {
bnd3.end().index()
} else {
bnd3.begin().index()
};
let pid = aid1.min(aid4) * mol.num_atoms() + aid1.max(aid4);
if accum_data.visited_bound(pid, DistType::Dist13) {
return;
}
if dmat[aid1.max(aid4) * mmat.num_rows() + aid1.min(aid4)] < 2.9 {
return;
}
if bond_between(mol, aid1, atm3).is_some() || bond_between(mol, aid4, atm2).is_some() {
return;
}
let ahyb2 = mol.atoms()[atm2].hybridization();
let ahyb3 = mol.atoms()[atm3].hybridization();
let bl1 = accum_data.bond_lengths[bid1];
let bl2 = accum_data.bond_lengths[bid2];
let bl3 = accum_data.bond_lengths[bid3];
let ba12 = accum_data.get_bond_angle(mol.num_bonds(), bid1, bid2);
let ba23 = accum_data.get_bond_angle(mol.num_bonds(), bid2, bid3);
assert!(ba12 > 0.0);
assert!(ba23 > 0.0);
let nb = mol.num_bonds();
let (mut dl, mut du, kind) = if ahyb2 == Hybridization::Sp2 && ahyb3 == Hybridization::Sp2 {
record_path_flag(&mut accum_data.trans_paths, path14_id(nb, bid1, bid2, bid3));
record_path_flag(&mut accum_data.trans_paths, path14_id(nb, bid3, bid2, bid1));
let du = compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23);
(du - GEN_DIST_TOL, du + GEN_DIST_TOL, Path14Kind::Trans)
} else {
let mut dl = compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23);
let mut du = compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23);
if du < dl {
std::mem::swap(&mut dl, &mut du);
}
if (du - dl).abs() < DIST12_DELTA {
dl -= GEN_DIST_TOL;
du += GEN_DIST_TOL;
}
(dl, du, Path14Kind::Other)
};
mmat.check_and_set_bounds(aid1, aid4, dl, du);
accum_data.paths14.push(Path14Configuration {
bid1,
bid2,
bid3,
kind,
});
accum_data.visited14_bounds[pid] = true;
}
fn set_two_in_diff_ring_14_bounds(
mol: &Molecule,
bid1: usize,
bid2: usize,
bid3: usize,
accum_data: &mut ComputedData,
mmat: &mut BoundsMatrix,
dmat: &[f64],
) {
set_in_ring_14_bounds(mol, bid1, bid2, bid3, accum_data, mmat, dmat, 0);
}
fn set_share_ring_bond_14_bounds(
mol: &Molecule,
bid1: usize,
bid2: usize,
bid3: usize,
accum_data: &mut ComputedData,
mmat: &mut BoundsMatrix,
dmat: &[f64],
) {
set_in_ring_14_bounds(mol, bid1, bid2, bid3, accum_data, mmat, dmat, 0);
}
fn check_macrocycle_two_in_same_ring_amide_ester_14(
mol: &Molecule,
bnd1_idx: usize,
bnd3_idx: usize,
atm1: usize,
atm2: usize,
atm3: usize,
atm4: usize,
) -> bool {
let a1_num = mol.atoms()[atm1].atomic_number();
let a2_num = mol.atoms()[atm2].atomic_number();
let a3_num = mol.atoms()[atm3].atomic_number();
let a4_num = mol.atoms()[atm4].atomic_number();
a1_num != 1
&& a3_num == 6
&& mol.bonds()[bnd3_idx].order() == BondOrder::Double
&& (a4_num == 8 || a4_num == 7)
&& mol.bonds()[bnd1_idx].order() == BondOrder::Single
&& (a2_num == 8 || a2_num == 7)
}
fn set_macrocycle_two_in_same_ring_14_bounds(
mol: &Molecule,
bid1: usize,
bid2: usize,
bid3: usize,
accum_data: &mut ComputedData,
mmat: &mut BoundsMatrix,
dmat: &[f64],
) {
let atm2 = bond_pair_shared_atom(mol, accum_data, bid1, bid2);
let atm3 = bond_pair_shared_atom(mol, accum_data, bid2, bid3);
let bnd1 = &mol.bonds()[bid1];
let bnd3 = &mol.bonds()[bid3];
let aid1 = if bnd1.begin().index() == atm2 {
bnd1.end().index()
} else {
bnd1.begin().index()
};
let aid4 = if bnd3.begin().index() == atm3 {
bnd3.end().index()
} else {
bnd3.begin().index()
};
let atm1 = aid1;
let atm4 = aid4;
let pid = aid1.min(aid4) * mol.num_atoms() + aid1.max(aid4);
if accum_data.visited_bound(pid, DistType::Dist13) {
return;
}
if dmat[aid1.max(aid4) * mmat.num_rows() + aid1.min(aid4)] < 2.9 {
return;
}
if bond_between(mol, aid1, atm3).is_some() || bond_between(mol, aid4, atm2).is_some() {
return;
}
let bl1 = accum_data.bond_lengths[bid1];
let bl2 = accum_data.bond_lengths[bid2];
let bl3 = accum_data.bond_lengths[bid3];
let ba12 = accum_data.get_bond_angle(mol.num_bonds(), bid1, bid2);
let ba23 = accum_data.get_bond_angle(mol.num_bonds(), bid2, bid3);
assert!(ba12 > 0.0);
assert!(ba23 > 0.0);
let nb = mol.num_bonds();
let (mut dl, mut du, kind) = if check_macrocycle_two_in_same_ring_amide_ester_14(
mol, bid1, bid3, atm1, atm2, atm3, atm4,
) || check_macrocycle_two_in_same_ring_amide_ester_14(
mol, bid3, bid1, atm4, atm3, atm2, atm1,
) {
let dl = compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23);
record_path_flag(&mut accum_data.cis_paths, path14_id(nb, bid1, bid2, bid3));
record_path_flag(&mut accum_data.cis_paths, path14_id(nb, bid3, bid2, bid1));
(dl - GEN_DIST_TOL, dl + GEN_DIST_TOL, Path14Kind::Cis)
} else {
let mut dl = compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23);
let mut du = compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23);
if du < dl {
std::mem::swap(&mut dl, &mut du);
}
if (du - dl).abs() < DIST12_DELTA {
dl -= GEN_DIST_TOL;
du += GEN_DIST_TOL;
}
(dl, du, Path14Kind::Other)
};
mmat.check_and_set_bounds(aid1, aid4, dl, du);
accum_data.paths14.push(Path14Configuration {
bid1,
bid2,
bid3,
kind,
});
accum_data.visited14_bounds[pid] = true;
}
fn set_macrocycle_all_in_same_ring_14_bounds(
mol: &Molecule,
bid1: usize,
bid2: usize,
bid3: usize,
accum_data: &mut ComputedData,
mmat: &mut BoundsMatrix,
) {
let atm2 = bond_pair_shared_atom(mol, accum_data, bid1, bid2);
let atm3 = bond_pair_shared_atom(mol, accum_data, bid2, bid3);
let bnd1 = &mol.bonds()[bid1];
let bnd2 = &mol.bonds()[bid2];
let bnd3 = &mol.bonds()[bid3];
let aid1 = if bnd1.begin().index() == atm2 {
bnd1.end().index()
} else {
bnd1.begin().index()
};
let aid4 = if bnd3.begin().index() == atm3 {
bnd3.end().index()
} else {
bnd3.begin().index()
};
let pid = aid1.min(aid4) * mol.num_atoms() + aid1.max(aid4);
if accum_data.visited_bound(pid, DistType::Dist13) {
return;
}
let atm1 = aid1;
let atm4 = aid4;
let bl1 = accum_data.bond_lengths[bid1];
let bl2 = accum_data.bond_lengths[bid2];
let bl3 = accum_data.bond_lengths[bid3];
let ba12 = accum_data.get_bond_angle(mol.num_bonds(), bid1, bid2);
let ba23 = accum_data.get_bond_angle(mol.num_bonds(), bid2, bid3);
assert!(ba12 > 0.0);
assert!(ba23 > 0.0);
let mut set_the_bound = true;
let nb = mol.num_bonds();
let (mut dl, mut du, kind) = match bnd2.order() {
BondOrder::Double => {
if bnd1.order() == BondOrder::Double || bnd3.order() == BondOrder::Double {
let dl = compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23) - GEN_DIST_TOL;
record_path_flag(&mut accum_data.cis_paths, path14_id(nb, bid1, bid2, bid3));
record_path_flag(&mut accum_data.cis_paths, path14_id(nb, bid3, bid2, bid1));
(dl, dl + 2.0 * GEN_DIST_TOL, Path14Kind::Cis)
} else if matches!(
bnd2.stereo(),
BondStereo::Z
| BondStereo::E
| BondStereo::Cis
| BondStereo::Trans
| BondStereo::AtropCw
| BondStereo::AtropCcw
) {
let stype = get_atom_stereo(bnd2, aid1, aid4);
if matches!(stype, BondStereo::Z | BondStereo::Cis) {
let dl = compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23) - GEN_DIST_TOL;
record_path_flag(&mut accum_data.cis_paths, path14_id(nb, bid1, bid2, bid3));
record_path_flag(&mut accum_data.cis_paths, path14_id(nb, bid3, bid2, bid1));
(dl, dl + 2.0 * GEN_DIST_TOL, Path14Kind::Cis)
} else {
let du = compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23);
record_path_flag(&mut accum_data.trans_paths, path14_id(nb, bid1, bid2, bid3));
record_path_flag(&mut accum_data.trans_paths, path14_id(nb, bid3, bid2, bid1));
(du - GEN_DIST_TOL, du + GEN_DIST_TOL, Path14Kind::Trans)
}
} else {
let mut dl = compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23);
let mut du = compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23);
if (du - dl).abs() < DIST12_DELTA {
dl -= GEN_DIST_TOL;
du += GEN_DIST_TOL;
}
(dl, du, Path14Kind::Other)
}
}
BondOrder::Single => {
if mol.atoms()[atm2].atomic_number() == 16
&& mol.atoms()[atm3].atomic_number() == 16
&& neighbors_for_atom(mol, atm2).len() == 2
&& neighbors_for_atom(mol, atm3).len() == 2
{
let dl = compute_14_dist_3d(bl1, bl2, bl3, ba12, ba23, std::f64::consts::PI / 2.0)
- GEN_DIST_TOL;
(dl, dl + 2.0 * GEN_DIST_TOL, Path14Kind::Other)
} else if check_macrocycle_all_in_same_ring_amide_ester_14(mol, atm1, atm2, atm3, atm4)
|| check_macrocycle_all_in_same_ring_amide_ester_14(mol, atm4, atm3, atm2, atm1)
{
let dl = compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23) + 0.1;
record_path_flag(&mut accum_data.trans_paths, path14_id(nb, bid1, bid2, bid3));
record_path_flag(&mut accum_data.trans_paths, path14_id(nb, bid3, bid2, bid1));
(dl - GEN_DIST_TOL, dl + GEN_DIST_TOL, Path14Kind::Trans)
} else if check_amide_ester_15(mol, bid1, bid3, atm2, atm3)
|| check_amide_ester_15(mol, bid3, bid1, atm3, atm2)
{
let total_hs_atm2 = total_num_hydrogens_rdkit_like(mol, atm2).unwrap_or(0);
if mol.atoms()[atm2].atomic_number() == 7
&& neighbors_for_atom(mol, atm2).len() == 3
&& mol.atoms()[atm1].atomic_number() == 1
&& total_hs_atm2 == 1
{
set_the_bound = false;
(0.0, 0.0, Path14Kind::Other)
} else {
let dl = compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23);
record_path_flag(&mut accum_data.trans_paths, path14_id(nb, bid1, bid2, bid3));
record_path_flag(&mut accum_data.trans_paths, path14_id(nb, bid3, bid2, bid1));
(dl - GEN_DIST_TOL, dl + GEN_DIST_TOL, Path14Kind::Trans)
}
} else {
(
compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23),
compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23),
Path14Kind::Other,
)
}
}
_ => (
compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23),
compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23),
Path14Kind::Other,
),
};
if set_the_bound {
if (du - dl).abs() < DIST12_DELTA {
dl -= GEN_DIST_TOL;
du += GEN_DIST_TOL;
}
mmat.check_and_set_bounds(aid1, aid4, dl, du);
accum_data.paths14.push(Path14Configuration {
bid1,
bid2,
bid3,
kind,
});
accum_data.visited14_bounds[pid] = true;
}
}
fn set_chain_14_bounds(
mol: &Molecule,
bid1: usize,
bid2: usize,
bid3: usize,
accum_data: &mut ComputedData,
mmat: &mut BoundsMatrix,
force_trans_amides: bool,
) {
let atm2 = bond_pair_shared_atom(mol, accum_data, bid1, bid2);
let atm3 = bond_pair_shared_atom(mol, accum_data, bid2, bid3);
let bnd1 = &mol.bonds()[bid1];
let bnd2 = &mol.bonds()[bid2];
let bnd3 = &mol.bonds()[bid3];
let aid1 = if bnd1.begin().index() == atm2 {
bnd1.end().index()
} else {
bnd1.begin().index()
};
let aid4 = if bnd3.begin().index() == atm3 {
bnd3.end().index()
} else {
bnd3.begin().index()
};
let pid = aid1.min(aid4) * mol.num_atoms() + aid1.max(aid4);
if accum_data.visited_bound(pid, DistType::Dist13) {
return;
}
let atm1 = aid1;
let atm4 = aid4;
let bl1 = accum_data.bond_lengths[bid1];
let bl2 = accum_data.bond_lengths[bid2];
let bl3 = accum_data.bond_lengths[bid3];
let ba12 = accum_data.get_bond_angle(mol.num_bonds(), bid1, bid2);
let ba23 = accum_data.get_bond_angle(mol.num_bonds(), bid2, bid3);
assert!(ba12 > 0.0);
assert!(ba23 > 0.0);
let mut set_the_bound = true;
let nb = mol.num_bonds();
let (mut dl, mut du, kind) = match bnd2.order() {
BondOrder::Double => {
if bnd1.order() == BondOrder::Double || bnd3.order() == BondOrder::Double {
let dl = compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23) - GEN_DIST_TOL;
record_path_flag(&mut accum_data.cis_paths, path14_id(nb, bid1, bid2, bid3));
record_path_flag(&mut accum_data.cis_paths, path14_id(nb, bid3, bid2, bid1));
(dl, dl + 2.0 * GEN_DIST_TOL, Path14Kind::Cis)
} else if matches!(
bnd2.stereo(),
BondStereo::Z
| BondStereo::E
| BondStereo::Cis
| BondStereo::Trans
| BondStereo::AtropCw
| BondStereo::AtropCcw
) {
let stype = get_atom_stereo(bnd2, aid1, aid4);
if matches!(stype, BondStereo::Z | BondStereo::Cis) {
let dl = compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23) - GEN_DIST_TOL;
record_path_flag(&mut accum_data.cis_paths, path14_id(nb, bid1, bid2, bid3));
record_path_flag(&mut accum_data.cis_paths, path14_id(nb, bid3, bid2, bid1));
(dl, dl + 2.0 * GEN_DIST_TOL, Path14Kind::Cis)
} else {
let du = compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23);
record_path_flag(&mut accum_data.trans_paths, path14_id(nb, bid1, bid2, bid3));
record_path_flag(&mut accum_data.trans_paths, path14_id(nb, bid3, bid2, bid1));
(du - GEN_DIST_TOL, du + GEN_DIST_TOL, Path14Kind::Trans)
}
} else {
let mut dl = compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23);
let mut du = compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23);
if (du - dl).abs() < DIST12_DELTA {
dl -= GEN_DIST_TOL;
du += GEN_DIST_TOL;
}
(dl, du, Path14Kind::Other)
}
}
BondOrder::Single => {
if mol.atoms()[atm2].atomic_number() == 16
&& mol.atoms()[atm3].atomic_number() == 16
&& neighbors_for_atom(mol, atm2).len() == 2
&& neighbors_for_atom(mol, atm3).len() == 2
{
let dl = compute_14_dist_3d(bl1, bl2, bl3, ba12, ba23, std::f64::consts::PI / 2.0)
- GEN_DIST_TOL;
(dl, dl + 2.0 * GEN_DIST_TOL, Path14Kind::Other)
} else if check_amide_ester_14(mol, bid1, bid3, atm2, atm3, atm4)
|| check_amide_ester_14(mol, bid3, bid1, atm3, atm2, atm1)
{
if force_trans_amides {
let total_hs_atm2 = total_num_hydrogens_rdkit_like(mol, atm2).unwrap_or(0);
let total_hs_atm3 = total_num_hydrogens_rdkit_like(mol, atm3).unwrap_or(0);
let secondary_left = mol.atoms()[atm1].atomic_number() == 1
&& mol.atoms()[atm2].atomic_number() == 7
&& neighbors_for_atom(mol, atm2).len() == 3
&& total_hs_atm2 == 1;
let secondary_right = mol.atoms()[atm4].atomic_number() == 1
&& mol.atoms()[atm3].atomic_number() == 7
&& neighbors_for_atom(mol, atm3).len() == 3
&& total_hs_atm3 == 1;
if secondary_left || secondary_right {
let dl = compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23);
record_path_flag(
&mut accum_data.trans_paths,
path14_id(nb, bid1, bid2, bid3),
);
record_path_flag(
&mut accum_data.trans_paths,
path14_id(nb, bid3, bid2, bid1),
);
(dl - GEN_DIST_TOL, dl + GEN_DIST_TOL, Path14Kind::Trans)
} else {
let dl = compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23);
record_path_flag(
&mut accum_data.cis_paths,
path14_id(nb, bid1, bid2, bid3),
);
record_path_flag(
&mut accum_data.cis_paths,
path14_id(nb, bid3, bid2, bid1),
);
(dl - GEN_DIST_TOL, dl + GEN_DIST_TOL, Path14Kind::Cis)
}
} else {
(
compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23),
compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23),
Path14Kind::Other,
)
}
} else if check_amide_ester_15(mol, bid1, bid3, atm2, atm3)
|| check_amide_ester_15(mol, bid3, bid1, atm3, atm2)
{
if force_trans_amides {
let total_hs_atm2 = total_num_hydrogens_rdkit_like(mol, atm2).unwrap_or(0);
let total_hs_atm3 = total_num_hydrogens_rdkit_like(mol, atm3).unwrap_or(0);
let secondary_left = mol.atoms()[atm1].atomic_number() == 1
&& mol.atoms()[atm2].atomic_number() == 7
&& neighbors_for_atom(mol, atm2).len() == 3
&& total_hs_atm2 == 1;
let secondary_right = mol.atoms()[atm4].atomic_number() == 1
&& mol.atoms()[atm3].atomic_number() == 7
&& neighbors_for_atom(mol, atm3).len() == 3
&& total_hs_atm3 == 1;
if secondary_left || secondary_right {
let dl = compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23);
record_path_flag(
&mut accum_data.cis_paths,
path14_id(nb, bid1, bid2, bid3),
);
record_path_flag(
&mut accum_data.cis_paths,
path14_id(nb, bid3, bid2, bid1),
);
(dl - GEN_DIST_TOL, dl + GEN_DIST_TOL, Path14Kind::Cis)
} else {
let dl = compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23);
record_path_flag(
&mut accum_data.trans_paths,
path14_id(nb, bid1, bid2, bid3),
);
record_path_flag(
&mut accum_data.trans_paths,
path14_id(nb, bid3, bid2, bid1),
);
(dl - GEN_DIST_TOL, dl + GEN_DIST_TOL, Path14Kind::Trans)
}
} else {
(
compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23),
compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23),
Path14Kind::Other,
)
}
} else {
(
compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23),
compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23),
Path14Kind::Other,
)
}
}
_ => (
compute_14_dist_cis(bl1, bl2, bl3, ba12, ba23),
compute_14_dist_trans(bl1, bl2, bl3, ba12, ba23),
Path14Kind::Other,
),
};
if set_the_bound {
if (du - dl).abs() < DIST12_DELTA {
dl -= GEN_DIST_TOL;
du += GEN_DIST_TOL;
}
mmat.check_and_set_bounds(aid1, aid4, dl, du);
accum_data.paths14.push(Path14Configuration {
bid1,
bid2,
bid3,
kind,
});
accum_data.visited14_bounds[pid] = true;
}
}
fn check_h2_nx3_h1_ox2(mol: &Molecule, atom_idx: usize) -> bool {
let atom = &mol.atoms()[atom_idx];
let Some(total_hs) = total_num_hydrogens_rdkit_like(mol, atom_idx) else {
return false;
};
(atom.atomic_number() == 6 && total_hs == 2)
|| (atom.atomic_number() == 8 && total_hs == 0)
|| (atom.atomic_number() == 7
&& neighbors_for_atom(mol, atom_idx).len() == 3
&& total_hs == 1)
}
fn check_nh_ch_ch_nh(mol: &Molecule, atm1: usize, atm2: usize, atm3: usize, atm4: usize) -> bool {
mol.atoms()[atm1].atomic_number() != 1
&& mol.atoms()[atm4].atomic_number() != 1
&& check_h2_nx3_h1_ox2(mol, atm2)
&& check_h2_nx3_h1_ox2(mol, atm3)
}
fn check_amide_ester_14(
mol: &Molecule,
bnd1_idx: usize,
bnd3_idx: usize,
atm2: usize,
atm3: usize,
atm4: usize,
) -> bool {
let bnd1 = &mol.bonds()[bnd1_idx];
let bnd3 = &mol.bonds()[bnd3_idx];
let a2_num = mol.atoms()[atm2].atomic_number();
let a3_num = mol.atoms()[atm3].atomic_number();
let a4_num = mol.atoms()[atm4].atomic_number();
let total_hs_atm2 = total_num_hydrogens_rdkit_like(mol, atm2).unwrap_or(0);
a3_num == 6
&& bnd3.order() == BondOrder::Double
&& (a4_num == 8 || a4_num == 7)
&& bnd1.order() == BondOrder::Single
&& (a2_num == 8 || (a2_num == 7 && total_hs_atm2 == 1))
}
fn check_macrocycle_all_in_same_ring_amide_ester_14(
mol: &Molecule,
atm1: usize,
atm2: usize,
atm3: usize,
atm4: usize,
) -> bool {
let a2_num = mol.atoms()[atm2].atomic_number();
let a3_num = mol.atoms()[atm3].atomic_number();
if a3_num != 6 {
return false;
}
if a2_num != 7 && a2_num != 8 {
return false;
}
if neighbors_for_atom(mol, atm2).len() != 3 || neighbors_for_atom(mol, atm3).len() != 3 {
return false;
}
for nbr_idx in neighbors_for_atom(mol, atm2) {
if nbr_idx != atm1 && nbr_idx != atm3 {
let res = &mol.atoms()[nbr_idx];
let res_bnd = &mol.bonds()[bond_between_idx_simple(mol, atm2, nbr_idx).expect("bond")];
if (res.atomic_number() != 6 && res.atomic_number() != 1)
|| res_bnd.order() != BondOrder::Single
{
return false;
}
break;
}
}
for nbr_idx in neighbors_for_atom(mol, atm3) {
if nbr_idx != atm2 && nbr_idx != atm4 {
let res = &mol.atoms()[nbr_idx];
let res_bnd = &mol.bonds()[bond_between_idx_simple(mol, atm3, nbr_idx).expect("bond")];
if res.atomic_number() != 8 || res_bnd.order() != BondOrder::Double {
return false;
}
break;
}
}
true
}
fn is_carbonyl(mol: &Molecule, atom_idx: usize) -> bool {
let atom = &mol.atoms()[atom_idx];
if atom.atomic_number() != 6 || neighbors_for_atom(mol, atom_idx).len() <= 2 {
return false;
}
neighbors_for_atom(mol, atom_idx)
.into_iter()
.any(|nbr_idx| {
let at_num = mol.atoms()[nbr_idx].atomic_number();
(at_num == 8 || at_num == 7)
&& mol.bonds()[bond_between_idx_simple(mol, atom_idx, nbr_idx).expect("bond")]
.order()
== BondOrder::Double
})
}
fn check_amide_ester_15(
mol: &Molecule,
bnd1_idx: usize,
bnd3_idx: usize,
atm2: usize,
atm3: usize,
) -> bool {
let a2_num = mol.atoms()[atm2].atomic_number();
let total_hs_atm2 = total_num_hydrogens_rdkit_like(mol, atm2).unwrap_or(0);
((a2_num == 8) || (a2_num == 7 && total_hs_atm2 == 1))
&& mol.bonds()[bnd1_idx].order() == BondOrder::Single
&& mol.atoms()[atm3].atomic_number() == 6
&& mol.bonds()[bnd3_idx].order() == BondOrder::Single
&& is_carbonyl(mol, atm3)
}
fn compute_15_dist_cis_cis(
d1: f64,
d2: f64,
d3: f64,
d4: f64,
ang12: f64,
ang23: f64,
ang34: f64,
) -> f64 {
let dx14 = d2 - d3 * ang23.cos() - d1 * ang12.cos();
let dy14 = d3 * ang23.sin() - d1 * ang12.sin();
let d14 = (dx14 * dx14 + dy14 * dy14).sqrt();
let cval = ((d3 - d2 * ang23.cos() + d1 * (ang12 + ang23).cos()) / d14).clamp(-1.0, 1.0);
let ang143 = cval.acos();
let ang145 = ang34 - ang143;
compute_13_dist(d14, d4, ang145)
}
fn compute_15_dist_cis_trans(
d1: f64,
d2: f64,
d3: f64,
d4: f64,
ang12: f64,
ang23: f64,
ang34: f64,
) -> f64 {
let dx14 = d2 - d3 * ang23.cos() - d1 * ang12.cos();
let dy14 = d3 * ang23.sin() - d1 * ang12.sin();
let d14 = (dx14 * dx14 + dy14 * dy14).sqrt();
let cval = ((d3 - d2 * ang23.cos() + d1 * (ang12 + ang23).cos()) / d14).clamp(-1.0, 1.0);
let ang143 = cval.acos();
let ang145 = ang34 + ang143;
compute_13_dist(d14, d4, ang145)
}
fn compute_15_dist_trans_trans(
d1: f64,
d2: f64,
d3: f64,
d4: f64,
ang12: f64,
ang23: f64,
ang34: f64,
) -> f64 {
let dx14 = d2 - d3 * ang23.cos() - d1 * ang12.cos();
let dy14 = d3 * ang23.sin() + d1 * ang12.sin();
let d14 = (dx14 * dx14 + dy14 * dy14).sqrt();
let cval = ((d3 - d2 * ang23.cos() + d1 * (ang12 - ang23).cos()) / d14).clamp(-1.0, 1.0);
let ang143 = cval.acos();
let ang145 = ang34 + ang143;
compute_13_dist(d14, d4, ang145)
}
fn compute_15_dist_trans_cis(
d1: f64,
d2: f64,
d3: f64,
d4: f64,
ang12: f64,
ang23: f64,
ang34: f64,
) -> f64 {
let dx14 = d2 - d3 * ang23.cos() - d1 * ang12.cos();
let dy14 = d3 * ang23.sin() + d1 * ang12.sin();
let d14 = (dx14 * dx14 + dy14 * dy14).sqrt();
let cval = ((d3 - d2 * ang23.cos() + d1 * (ang12 - ang23).cos()) / d14).clamp(-1.0, 1.0);
let ang143 = cval.acos();
let ang145 = ang34 - ang143;
compute_13_dist(d14, d4, ang145)
}
fn set_15_bounds(
mol: &Molecule,
mmat: &mut BoundsMatrix,
accum_data: &mut ComputedData,
dmat: &[f64],
) {
let nb = mol.bonds().len();
let na = mol.atoms().len();
for path_idx in 0..accum_data.paths14.len() {
let path = accum_data.paths14[path_idx];
set_15_bounds_helper(
mol, mmat, accum_data, dmat, nb, na, path.bid1, path.bid2, path.bid3, path.kind,
);
set_15_bounds_helper(
mol, mmat, accum_data, dmat, nb, na, path.bid3, path.bid2, path.bid1, path.kind,
);
}
}
#[allow(clippy::too_many_arguments)]
fn set_15_bounds_helper(
mol: &Molecule,
mmat: &mut BoundsMatrix,
accum_data: &mut ComputedData,
dmat: &[f64],
nb: usize,
na: usize,
bid1: usize,
bid2: usize,
bid3: usize,
kind: Path14Kind,
) {
let aid2 = accum_data.get_bond_adj(nb, bid1, bid2) as usize;
let aid1 = if mol.bonds()[bid1].begin().index() == aid2 {
mol.bonds()[bid1].end().index()
} else {
mol.bonds()[bid1].begin().index()
};
let aid3 = accum_data.get_bond_adj(nb, bid2, bid3) as usize;
let aid4 = if mol.bonds()[bid3].begin().index() == aid3 {
mol.bonds()[bid3].end().index()
} else {
mol.bonds()[bid3].begin().index()
};
let d1 = accum_data.bond_lengths[bid1];
let d2 = accum_data.bond_lengths[bid2];
let d3 = accum_data.bond_lengths[bid3];
let ang12 = accum_data.get_bond_angle(nb, bid1, bid2);
let ang23 = accum_data.get_bond_angle(nb, bid2, bid3);
for i in 0..nb {
if accum_data.get_bond_adj(nb, bid3, i) != aid4 as i32 {
continue;
}
let aid5 = if mol.bonds()[i].begin().index() == aid4 {
mol.bonds()[i].end().index()
} else {
mol.bonds()[i].begin().index()
};
let pid = aid1.min(aid5) * na + aid1.max(aid5);
if accum_data.visited_bound(pid, DistType::Dist14) {
return;
}
if dmat[aid1.max(aid5) * na + aid1.min(aid5)] < 3.9 {
continue;
}
if aid1 == aid5 {
continue;
}
let pid1 = aid1 * na + aid5;
let pid2 = aid5 * na + aid1;
if !(mmat.get_lower(aid1, aid5) < DIST12_DELTA
|| accum_data.set15_atoms[pid1]
|| accum_data.set15_atoms[pid2])
{
continue;
}
let d4 = accum_data.bond_lengths[i];
let ang34 = accum_data.get_bond_angle(nb, bid3, i);
let path_id = bid2 as u64 * nb as u64 * nb as u64 + bid3 as u64 * nb as u64 + i as u64;
let (mut dl, mut du) = match kind {
Path14Kind::Cis => {
if has_path_flag(&accum_data.cis_paths, path_id) {
let base = compute_15_dist_cis_cis(d1, d2, d3, d4, ang12, ang23, ang34);
(base - DIST15_TOL, base + DIST15_TOL)
} else if has_path_flag(&accum_data.trans_paths, path_id) {
let base = compute_15_dist_cis_trans(d1, d2, d3, d4, ang12, ang23, ang34);
(base - DIST15_TOL, base + DIST15_TOL)
} else {
(
compute_15_dist_cis_cis(d1, d2, d3, d4, ang12, ang23, ang34) - DIST15_TOL,
compute_15_dist_cis_trans(d1, d2, d3, d4, ang12, ang23, ang34) + DIST15_TOL,
)
}
}
Path14Kind::Trans => {
if has_path_flag(&accum_data.cis_paths, path_id) {
let base = compute_15_dist_trans_cis(d1, d2, d3, d4, ang12, ang23, ang34);
(base - DIST15_TOL, base + DIST15_TOL)
} else if has_path_flag(&accum_data.trans_paths, path_id) {
let base = compute_15_dist_trans_trans(d1, d2, d3, d4, ang12, ang23, ang34);
(base - DIST15_TOL, base + DIST15_TOL)
} else {
(
compute_15_dist_trans_cis(d1, d2, d3, d4, ang12, ang23, ang34) - DIST15_TOL,
compute_15_dist_trans_trans(d1, d2, d3, d4, ang12, ang23, ang34)
+ DIST15_TOL,
)
}
}
Path14Kind::Other => {
if has_path_flag(&accum_data.cis_paths, path_id) {
(
compute_15_dist_cis_cis(d4, d3, d2, d1, ang34, ang23, ang12) - DIST15_TOL,
compute_15_dist_cis_trans(d4, d3, d2, d1, ang34, ang23, ang12) + DIST15_TOL,
)
} else if has_path_flag(&accum_data.trans_paths, path_id) {
(
compute_15_dist_trans_cis(d4, d3, d2, d1, ang34, ang23, ang12) - DIST15_TOL,
compute_15_dist_trans_trans(d4, d3, d2, d1, ang34, ang23, ang12)
+ DIST15_TOL,
)
} else {
let vw1 = vdw_radius(mol.atoms()[aid1].atomic_number());
let vw5 = vdw_radius(mol.atoms()[aid5].atomic_number());
(VDW_SCALE_15 * (vw1 + vw5), MAX_UPPER)
}
}
};
if du < 0.0 {
du = MAX_UPPER;
}
mmat.check_and_set_bounds(aid1, aid5, dl, du);
accum_data.set15_atoms[pid1] = true;
accum_data.set15_atoms[pid2] = true;
}
}
fn set_14_bounds(
mol: &Molecule,
mmat: &mut BoundsMatrix,
accum_data: &mut ComputedData,
dmat: &[f64],
use_macrocycle_14config: bool,
force_trans_amides: bool,
) {
let npt = mmat.num_rows();
assert_eq!(npt, mol.num_atoms(), "Wrong size metric matrix");
let max_num_bonds = (u64::MAX as f64).cbrt() as usize;
if mol.num_bonds() >= max_num_bonds {
panic!("Too many bonds in the molecule, cannot compute 1-4 bounds");
}
let rinfo = ring_info_for_distgeom(mol);
let bond_rings = rinfo.bond_rings();
let mut bid_is_macrocycle: HashSet<usize> = HashSet::new();
let mut ring_bond_pairs: HashSet<u64> = HashSet::new();
let mut done_paths: HashSet<u64> = HashSet::new();
let nb = mol.num_bonds() as u64;
for bring in bond_rings {
let r_size = bring.len();
if r_size < 3 {
continue;
}
let mut bid1 = bring[r_size - 1].index();
for i in 0..r_size {
let bid2 = bring[i].index();
let bid3 = bring[(i + 1) % r_size].index();
let pid1 = bid1 as u64 * nb + bid2 as u64;
let pid2 = bid2 as u64 * nb + bid1 as u64;
let id1 = bid1 as u64 * nb * nb + bid2 as u64 * nb + bid3 as u64;
let id2 = bid3 as u64 * nb * nb + bid2 as u64 * nb + bid1 as u64;
ring_bond_pairs.insert(pid1);
ring_bond_pairs.insert(pid2);
done_paths.insert(id1);
done_paths.insert(id2);
if r_size > 5 {
if use_macrocycle_14config && r_size >= MIN_MACROCYCLE_RING_SIZE {
set_macrocycle_all_in_same_ring_14_bounds(
mol, bid1, bid2, bid3, accum_data, mmat,
);
bid_is_macrocycle.insert(bid2);
} else {
set_in_ring_14_bounds(mol, bid1, bid2, bid3, accum_data, mmat, dmat, r_size);
}
} else {
record_14_path(mol, bid1, bid2, bid3, accum_data);
}
bid1 = bid2;
}
}
for bond in mol.bonds() {
let bid2 = bond.id().index();
let aid2 = bond.begin().index();
let aid3 = bond.end().index();
for nbr1 in neighbors_for_atom(mol, aid2) {
let Some(bid1) = bond_between_idx_simple(mol, aid2, nbr1) else {
continue;
};
if bid1 == bid2 {
continue;
}
for nbr3 in neighbors_for_atom(mol, aid3) {
let Some(bid3) = bond_between_idx_simple(mol, aid3, nbr3) else {
continue;
};
if bid3 == bid2 {
continue;
}
let id1 = bid1 as u64 * nb * nb + bid2 as u64 * nb + bid3 as u64;
let id2 = bid3 as u64 * nb * nb + bid2 as u64 * nb + bid1 as u64;
if done_paths.contains(&id1) || done_paths.contains(&id2) {
continue;
}
let pid1 = bid1 as u64 * nb + bid2 as u64;
let pid2 = bid2 as u64 * nb + bid1 as u64;
let pid3 = bid2 as u64 * nb + bid3 as u64;
let pid4 = bid3 as u64 * nb + bid2 as u64;
if ring_bond_pairs.contains(&pid1)
|| ring_bond_pairs.contains(&pid2)
|| ring_bond_pairs.contains(&pid3)
|| ring_bond_pairs.contains(&pid4)
{
if use_macrocycle_14config && bid_is_macrocycle.contains(&bid2) {
set_macrocycle_two_in_same_ring_14_bounds(
mol, bid1, bid2, bid3, accum_data, mmat, dmat,
);
} else {
set_two_in_same_ring_14_bounds(
mol, bid1, bid2, bid3, accum_data, mmat, dmat,
);
}
} else if (rinfo.num_bond_rings(BondId::new(bid1)) > 0
&& rinfo.num_bond_rings(BondId::new(bid2)) > 0)
|| (rinfo.num_bond_rings(BondId::new(bid2)) > 0
&& rinfo.num_bond_rings(BondId::new(bid3)) > 0)
{
set_two_in_diff_ring_14_bounds(mol, bid1, bid2, bid3, accum_data, mmat, dmat);
} else if rinfo.num_bond_rings(BondId::new(bid2)) > 0 {
set_share_ring_bond_14_bounds(mol, bid1, bid2, bid3, accum_data, mmat, dmat);
} else {
set_chain_14_bounds(
mol,
bid1,
bid2,
bid3,
accum_data,
mmat,
force_trans_amides,
);
}
}
}
}
}
fn bond_lengths_from_accum(accum_data: &ComputedData, bid: usize) -> f64 {
let bl = accum_data.bond_lengths[bid];
if bl > 0.0 { bl } else { 1.5 }
}
fn bond_between_idx_simple(mol: &Molecule, a: usize, b: usize) -> Option<usize> {
mol.bonds()
.iter()
.find(|bond| {
(bond.begin().index() == a && bond.end().index() == b)
|| (bond.begin().index() == b && bond.end().index() == a)
})
.map(|b| b.id().index())
}
fn collect_bonds_and_angles(
mol: &Molecule,
bonds: &mut Vec<(i32, i32)>,
angles: &mut Vec<Vec<i32>>,
) {
bonds.clear();
angles.clear();
bonds.reserve(mol.num_bonds());
for bondi in mol.bonds() {
bonds.push((bondi.begin().index() as i32, bondi.end().index() as i32));
for j in (bondi.id().index() + 1)..mol.num_bonds() {
let bondj = &mol.bonds()[j];
let aid11 = bondi.begin().index() as i32;
let aid12 = bondi.end().index() as i32;
let aid21 = bondj.begin().index() as i32;
let aid22 = bondj.end().index() as i32;
if aid11 != aid21 && aid11 != aid22 && aid12 != aid21 && aid12 != aid22 {
continue;
}
let mut tmp = vec![0; 4];
if aid12 == aid21 {
tmp[0] = aid11;
tmp[1] = aid12;
tmp[2] = aid22;
} else if aid12 == aid22 {
tmp[0] = aid11;
tmp[1] = aid12;
tmp[2] = aid21;
} else if aid11 == aid21 {
tmp[0] = aid12;
tmp[1] = aid11;
tmp[2] = aid22;
} else if aid11 == aid22 {
tmp[0] = aid12;
tmp[1] = aid11;
tmp[2] = aid21;
}
if bondi.order() == BondOrder::Triple || bondj.order() == BondOrder::Triple {
tmp[3] = 1;
} else if bondi.order() == BondOrder::Double
&& bondj.order() == BondOrder::Double
&& neighbors_for_atom(mol, tmp[1] as usize).len() == 2
{
tmp[3] = 1;
}
angles.push(tmp);
}
}
}
fn set_lower_bound_vdw(mol: &Molecule, mmat: &mut BoundsMatrix, _scale_vdw: bool, dmat: &[f64]) {
let n = mol.atoms().len();
let npt = mmat.num_rows();
assert_eq!(npt, n, "Wrong size metric matrix");
let mut h_in_hbond_donors = vec![false; n];
let mut hbond_acceptors = vec![false; n];
for i in 1..n {
h_in_hbond_donors[i] = is_h_in_hbond_donor(mol, i);
hbond_acceptors[i] = is_hbond_acceptor(mol.atoms()[i].atomic_number());
}
for i in 1..n {
let vw1 = vdw_radius(mol.atoms()[i].atomic_number());
for j in 0..i {
if mmat.get_lower(i, j) > DIST12_DELTA {
continue;
}
let vw2 = vdw_radius(mol.atoms()[j].atomic_number());
let td = dmat[i * npt + j];
if (h_in_hbond_donors[i] && hbond_acceptors[j])
|| (hbond_acceptors[i] && h_in_hbond_donors[j])
{
mmat.set_lower(i, j, H_BOND_LENGTH);
} else if td == 4.0 {
mmat.set_lower(i, j, VDW_SCALE_15 * (vw1 + vw2));
} else if td == 5.0 {
mmat.set_lower(
i,
j,
(VDW_SCALE_15 + 0.5 * (1.0 - VDW_SCALE_15)) * (vw1 + vw2),
);
} else {
mmat.set_lower(i, j, vw1 + vw2);
}
}
}
}
#[allow(clippy::too_many_arguments)]
fn set_topol_bounds(
mol: &Molecule,
mmat: &mut BoundsMatrix,
set15bounds: bool,
scale_vdw: bool,
use_macrocycle_14config: bool,
force_trans_amides: bool,
set14bounds: bool,
set13bounds: bool,
) -> Result<(), DgBoundsError> {
let nb = mol.num_bonds();
let na = mol.num_atoms();
if na == 0 {
return Err(DgBoundsError::GenerationFailed(
"molecule has no atoms".to_string(),
));
}
let max_num_bonds = (u64::MAX as f64).cbrt() as usize;
if nb >= max_num_bonds {
return Err(DgBoundsError::GenerationFailed(
"Too many bonds in the molecule, cannot compute 1-4 bounds".to_string(),
));
}
let mut accum_data = ComputedData::new(na, nb);
let dist_matrix = flatten_topological_distances_matrix(mol);
set_12_bounds(mol, mmat, &mut accum_data)?;
if set13bounds {
set_13_bounds(mol, mmat, &mut accum_data);
}
if set14bounds {
set_14_bounds(
mol,
mmat,
&mut accum_data,
&dist_matrix,
use_macrocycle_14config,
force_trans_amides,
);
}
if set15bounds {
set_15_bounds(mol, mmat, &mut accum_data, &dist_matrix);
}
set_lower_bound_vdw(mol, mmat, scale_vdw, &dist_matrix);
Ok(())
}
#[allow(clippy::too_many_arguments)]
fn set_topol_bounds_with_outputs(
mol: &Molecule,
mmat: &mut BoundsMatrix,
bonds: &mut Vec<(i32, i32)>,
angles: &mut Vec<Vec<i32>>,
set15bounds: bool,
scale_vdw: bool,
use_macrocycle_14config: bool,
force_trans_amides: bool,
set14bounds: bool,
set13bounds: bool,
) -> Result<(), DgBoundsError> {
bonds.clear();
angles.clear();
set_topol_bounds(
mol,
mmat,
set15bounds,
scale_vdw,
use_macrocycle_14config,
force_trans_amides,
set14bounds,
set13bounds,
)?;
collect_bonds_and_angles(mol, bonds, angles);
Ok(())
}
pub fn dg_bounds_matrix_with_options(
molecule: &Molecule,
set15bounds: bool,
scale_vdw: bool,
do_triangle_smoothing: bool,
use_macrocycle14config: bool,
) -> Result<Vec<Vec<f64>>, DgBoundsError> {
let n = molecule.atoms().len();
let mut mmat = BoundsMatrix::new(n);
set_topol_bounds(
molecule,
&mut mmat,
set15bounds,
scale_vdw,
use_macrocycle14config,
true,
true,
true,
)?;
if do_triangle_smoothing && !mmat.triangle_smooth(0.0) {
return Err(DgBoundsError::GenerationFailed(
"triangle smoothing found inconsistent bounds".to_string(),
));
}
Ok(mmat.to_vec_vec())
}
pub fn dg_bounds_matrix(molecule: &Molecule) -> Result<Vec<Vec<f64>>, DgBoundsError> {
dg_bounds_matrix_with_options(molecule, true, false, true, false)
}
#[cfg(test)]
mod tests;