mod chem_env;
pub(crate) mod frcmod;
mod frcmod_missing_params;
mod parmchk_parse;
mod post_process;
use std::io;
use bio_files::{
AtomGeneric, BondGeneric, BondType,
amber_typedef::{AmberDef, AtomTypeDef},
md_params::ForceFieldParams,
};
use chem_env::*;
pub use frcmod::assign_missing_params;
use na_seq::Element;
use post_process::*;
use crate::{partial_charge_inference::infer_charge, util::build_adjacency_list};
const DEF_ABCG2: &str = include_str!("../../param_data/antechamber_defs/ATOMTYPE_ABCG2.DEF");
const DEF_GFF2: &str = include_str!("../../param_data/antechamber_defs/ATOMTYPE_GFF2.DEF");
pub struct AmberDefSet {
pub abcg2: AmberDef,
pub gff2: AmberDef,
}
impl AmberDefSet {
pub fn new() -> io::Result<Self> {
let abcg2 = AmberDef::new(DEF_ABCG2)?;
let gff2 = AmberDef::new(DEF_GFF2)?;
Ok(Self {
abcg2,
gff2,
})
}
}
#[derive(Debug)]
pub struct AtomEnvData {
degree: u8,
num_attached_h: u8,
ring_sizes: Vec<u8>,
is_aromatic: bool,
num_double_bonds: u8,
num_triple_bonds: u8,
has_double_to_hetero: bool,
}
#[allow(clippy::too_many_arguments)]
fn dfs_find_rings(
start: usize,
current: usize,
depth: u8,
max_size: u8,
visited: &mut [bool],
path: &mut Vec<usize>,
adj: &[Vec<usize>],
ring_sizes: &mut [Vec<u8>],
) {
if depth >= max_size {
return;
}
for &nbr in &adj[current] {
if nbr == start && depth + 1 >= 3 {
let size = depth + 1;
for &idx in path.iter() {
if !ring_sizes[idx].contains(&size) {
ring_sizes[idx].push(size);
}
}
} else if !visited[nbr] {
visited[nbr] = true;
path.push(nbr);
dfs_find_rings(
start,
nbr,
depth + 1,
max_size,
visited,
path,
adj,
ring_sizes,
);
path.pop();
visited[nbr] = false;
}
}
}
fn detect_rings(adj: &[Vec<usize>], max_size: u8) -> Vec<Vec<u8>> {
let n = adj.len();
let mut ring_sizes = vec![Vec::<u8>::new(); n];
for start in 0..n {
let mut visited = vec![false; n];
visited[start] = true;
let mut path = vec![start];
dfs_find_rings(
start,
start,
0,
max_size,
&mut visited,
&mut path,
adj,
&mut ring_sizes,
);
}
ring_sizes
}
fn build_env(atoms: &[AtomGeneric], bonds: &[BondGeneric], adj: &[Vec<usize>]) -> Vec<AtomEnvData> {
let ring_sizes = detect_rings(adj, 8);
let mut aromatic_bond_counts = vec![0u8; atoms.len()];
let mut num_double_bonds = vec![0u8; atoms.len()];
let mut num_triple_bonds = vec![0u8; atoms.len()];
let mut has_double_to_hetero = vec![false; atoms.len()];
for bond in bonds {
if bond.atom_0_sn == 0 || bond.atom_1_sn == 0 {
eprintln!("Error: Invalid (0) bond SN when building env. Aborting");
return Vec::new();
}
let i = bond.atom_0_sn as usize - 1;
let j = bond.atom_1_sn as usize - 1;
match bond.bond_type {
BondType::Aromatic => {
if i >= ring_sizes.len() || j >= ring_sizes.len() {
eprintln!("Invalid ring indices i: {i} j: {j}");
return Vec::new();
}
let in_ring_i = ring_sizes[i].iter().any(|s| (5..=7).contains(s));
let in_ring_j = ring_sizes[j].iter().any(|s| (5..=7).contains(s));
if in_ring_i && in_ring_j {
aromatic_bond_counts[i] = aromatic_bond_counts[i].saturating_add(1);
aromatic_bond_counts[j] = aromatic_bond_counts[j].saturating_add(1);
}
}
BondType::Double => {
if i >= num_double_bonds.len() || j >= num_double_bonds.len() {
eprintln!("Invalid db bond indices i: {i} j: {j}");
return Vec::new();
}
num_double_bonds[i] = num_double_bonds[i].saturating_add(1);
num_double_bonds[j] = num_double_bonds[j].saturating_add(1);
let ei = atoms[i].element;
let ej = atoms[j].element;
let i_hetero = matches!(
ei,
Element::Oxygen | Element::Nitrogen | Element::Sulfur | Element::Phosphorus
);
let j_hetero = matches!(
ej,
Element::Oxygen | Element::Nitrogen | Element::Sulfur | Element::Phosphorus
);
if i_hetero && !j_hetero {
has_double_to_hetero[j] = true;
} else if j_hetero && !i_hetero {
has_double_to_hetero[i] = true;
}
}
BondType::Triple => {
if i >= num_triple_bonds.len() || j >= num_triple_bonds.len() {
eprintln!("Invalid db triple indices i: {i} j: {j}");
return Vec::new();
}
num_triple_bonds[i] = num_triple_bonds[i].saturating_add(1);
num_triple_bonds[j] = num_triple_bonds[j].saturating_add(1);
}
_ => {}
}
}
atoms
.iter()
.enumerate()
.map(|(idx, _atom)| {
let neighbors = &adj[idx];
let degree = neighbors.len() as u8;
let num_attached_h = neighbors
.iter()
.filter(|&&j| atoms[j].element == Element::Hydrogen)
.count() as u8;
let is_in_ring_5_7 = ring_sizes[idx].iter().any(|s| (5..=7).contains(s));
let is_aromatic = is_in_ring_5_7 && aromatic_bond_counts[idx] >= 2;
AtomEnvData {
degree,
num_attached_h,
ring_sizes: ring_sizes[idx].clone(),
is_aromatic,
num_double_bonds: num_double_bonds[idx],
num_triple_bonds: num_triple_bonds[idx],
has_double_to_hetero: has_double_to_hetero[idx],
}
})
.collect()
}
fn parse_ring_size(s: &str) -> Option<u8> {
let s = s.trim();
let pos = s.find("RG")?;
let start = pos + 2;
let mut digits = String::new();
for ch in s[start..].chars() {
if ch.is_ascii_digit() {
digits.push(ch);
} else {
break;
}
}
if digits.is_empty() {
None
} else {
digits.parse().ok()
}
}
fn is_elec_withdrawing_element(el: Element) -> bool {
use Element::*;
matches!(
el,
Oxygen | Nitrogen | Sulfur | Fluorine | Chlorine | Bromine | Iodine
)
}
fn ew_count_for_heavy(idx: usize, atoms: &[AtomGeneric], adj: &[Vec<usize>]) -> u8 {
let mut count = 0u8;
for &j in &adj[idx] {
let nb = &atoms[j];
if nb.element == Element::Hydrogen {
continue;
}
if is_elec_withdrawing_element(nb.element) {
count = count.saturating_add(1);
}
}
count.min(3)
}
fn ew_count_for_atom(idx: usize, atoms: &[AtomGeneric], adj: &[Vec<usize>]) -> u8 {
if atoms[idx].element == Element::Hydrogen {
if let Some(nb) = single_heavy_neighbor(idx, atoms, adj) {
ew_count_for_heavy(nb, atoms, adj)
} else {
0
}
} else {
ew_count_for_heavy(idx, atoms, adj)
}
}
fn _non_h_env_matches(
env_str: &str,
idx: usize,
_atoms: &[AtomGeneric],
env_all: &[AtomEnvData],
adj: &[Vec<usize>],
) -> bool {
let env_str = env_str.trim();
if env_str == "&" {
return true;
}
if !env_str.starts_with('(') || !env_str.ends_with(')') {
return false;
}
let inner = &env_str[1..env_str.len() - 1].trim();
let parts: Vec<&str> = inner
.split(',')
.map(str::trim)
.filter(|p| !p.is_empty())
.collect();
if parts.is_empty() {
return false;
}
let all_xx = parts
.iter()
.all(|p| p.starts_with("XX[") && p.ends_with(']'));
if !all_xx {
return false;
}
let mut required_aromatic_neighbors = 0usize;
for part in &parts {
let inside = &part[3..part.len() - 1]; let tags: Vec<&str> = inside.split('.').map(str::trim).collect();
if tags.iter().any(|t| t.starts_with("AR")) {
required_aromatic_neighbors += 1;
} else {
return false;
}
}
if required_aromatic_neighbors == 0 {
return false;
}
let aromatic_neighbors = adj[idx].iter().filter(|&&j| env_all[j].is_aromatic).count();
aromatic_neighbors >= required_aromatic_neighbors
}
pub fn matches_def(
def: &AtomTypeDef,
idx: usize,
atoms: &[AtomGeneric],
env_all: &[AtomEnvData],
bonds: &[BondGeneric],
adj: &[Vec<usize>],
) -> bool {
if idx >= atoms.len() || idx >= env_all.len() {
return false;
}
let atom = &atoms[idx];
let env = &env_all[idx];
if def.name == "cd" {
return false;
}
if let Some(e) = def.element
&& atom.element != e
{
return false;
}
if let Some(n) = def.attached_atoms
&& env.degree != n
{
return false;
}
if let Some(nh) = def.attached_h
&& env.num_attached_h != nh
{
return false;
}
if def.name == "ce" {
if env.num_double_bonds == 0 {
return false;
}
if env.ring_sizes.is_empty() && !env.has_double_to_hetero {
return false;
}
}
if def.name == "nh" {
let has_aromatic_c_neighbor = adj[idx]
.iter()
.any(|&j| atoms[j].element == Element::Carbon && env_all[j].is_aromatic);
if !has_aromatic_c_neighbor {
return false;
}
}
if def.name == "cp" {
return false;
}
if def.name == "ca" {
if !env.is_aromatic {
return false;
}
if !env.ring_sizes.contains(&6) {
return false;
}
}
if def.name == "c" {
if !is_carbonyl_carbon(idx, atoms, bonds) {
return false;
}
return true;
}
if def.name == "nb" && !env.ring_sizes.is_empty() {
return false;
}
if def.name == "nc" {
if env.ring_sizes.is_empty() {
return false;
}
if env.degree != 2 {
return false;
}
let mut has_db = false;
for b in bonds {
let i = b.atom_0_sn as usize - 1;
let j = b.atom_1_sn as usize - 1;
if i == idx || j == idx && matches!(b.bond_type, BondType::Double) {
let other = if i == idx { j } else { i };
if atoms[other].element != Element::Hydrogen {
has_db = true;
}
}
}
if !has_db {
return false;
}
}
if let Some(ew_needed) = def.electron_withdrawal_count {
let ew_here = ew_count_for_atom(idx, atoms, adj);
if ew_here != ew_needed {
return false;
}
}
if let Some(ref prop) = def.atomic_property {
if def.name != "c" && def.name != "cd" {
if let Some(ring_size) = parse_ring_size(prop)
&& !env.ring_sizes.contains(&ring_size)
{
return false;
}
if prop.contains("AR") {
let mut aromatic = env.is_aromatic;
if !aromatic {
let mut has_db_like = false;
for b in bonds {
let i = b.atom_0_sn as usize - 1;
let j = b.atom_1_sn as usize - 1;
if i == idx || j == idx {
match b.bond_type {
BondType::Double | BondType::Aromatic => {
has_db_like = true;
}
_ => {}
}
}
}
if !env.ring_sizes.is_empty() && has_db_like {
aromatic = true;
}
}
if !aromatic {
return false;
}
}
if prop.contains("[DB]") || prop.contains("[TB]") {
let mut has_db = false;
let mut has_tb = false;
for b in bonds {
let i = b.atom_0_sn as usize - 1;
let j = b.atom_1_sn as usize - 1;
if i == idx || j == idx {
match b.bond_type {
BondType::Double => has_db = true,
BondType::Triple => has_tb = true,
_ => {}
}
}
}
if prop.contains("[DB]") && !has_db {
return false;
}
if prop.contains("[TB]") && !has_tb {
return false;
}
}
}
}
if let Some(ref env_str) = def.chem_env
&& env_str != "&"
{
if atom.element == Element::Hydrogen {
if !hydrogen_env_matches(env_str, idx, atoms, env_all, adj) {
return false;
}
} else {
let pattern: ChemEnvPattern = env_str.as_str().into();
if !pattern.matches(idx, atoms, env_all, bonds, adj) {
return false;
}
}
}
true
}
fn single_heavy_neighbor(idx: usize, atoms: &[AtomGeneric], adj: &[Vec<usize>]) -> Option<usize> {
let neighbors = &adj[idx];
let mut heavy = neighbors
.iter()
.copied()
.filter(|&j| atoms[j].element != Element::Hydrogen);
let first = heavy.next()?;
if heavy.next().is_some() {
return None;
}
Some(first)
}
fn hydrogen_env_matches(
env_str: &str,
idx: usize,
atoms: &[AtomGeneric],
env_all: &[AtomEnvData],
adj: &[Vec<usize>],
) -> bool {
let env_str = env_str.trim();
if env_str == "&" {
return true;
}
if !env_str.starts_with('(') || !env_str.ends_with(')') {
return false;
}
let inner = env_str[1..env_str.len() - 1].trim();
let nb = match single_heavy_neighbor(idx, atoms, adj) {
Some(i) => i,
None => return false,
};
let nb_atom = &atoms[nb];
let nb_env = &env_all[nb];
match inner {
"O" => nb_atom.element == Element::Oxygen,
"N" => nb_atom.element == Element::Nitrogen,
"S" => nb_atom.element == Element::Sulfur,
"P" => nb_atom.element == Element::Phosphorus,
inner if inner.starts_with('C') => {
let digits = &inner[1..].trim();
if digits.is_empty() {
nb_atom.element == Element::Carbon
} else if let Ok(target_deg) = digits.parse::<u8>() {
nb_atom.element == Element::Carbon && nb_env.degree == target_deg
} else {
false
}
}
_ => false,
}
}
pub fn find_ff_types(
atoms: &[AtomGeneric],
bonds: &[BondGeneric],
defs: &AmberDefSet,
) -> Vec<String> {
let adj = build_adjacency_list(atoms, bonds).unwrap();
let env = build_env(atoms, bonds, &adj);
let mut result = assign_types(&defs.gff2.atomtypes, atoms, &env, bonds, &adj);
postprocess_carbonyl_c(atoms, bonds, &mut result);
postprocess_ring_n_types(atoms, bonds, &adj, &env, &mut result);
postprocess_nd_sp2_hetero(atoms, bonds, &adj, &env, &mut result);
postprocess_ne_to_n2(atoms, bonds, &mut result);
postprocess_na_ring_bridge(atoms, &adj, &env, &mut result);
postprocess_nb_aromatic(atoms, &adj, &env, &mut result);
postprocess_p5(atoms, bonds, &mut result);
postprocess_nu_to_n7(atoms, bonds, &mut result);
postprocess_nb_to_na_ring_with_h(atoms, &env, &mut result);
postprocess_n7_to_nu(atoms, &adj, &mut result);
postprocess_c2_to_c_three_oxygens(atoms, &adj, &mut result);
postprocess_na_to_n3(atoms, &adj, &env, &mut result);
postprocess_sy_to_s6(atoms, bonds, &mut result);
postprocess_sy_to_s6_if_nonaryl_sulfonamide(atoms, bonds, &mut result);
postprocess_py_to_p5_by_o_count(atoms, &adj, &mut result);
postprocess_cz_demote_ring_nd(atoms, bonds, &mut result);
postprocess_cc_to_cd_ring_hetero(atoms, &adj, &env, &mut result);
postprocess_s6_to_sy(atoms, bonds, &mut result);
postprocess_s6_to_sy_if_attached_to_nh2_only(atoms, bonds, &adj, &mut result);
postprocess_n8_to_nv_guanidinium(atoms, bonds, &mut result);
postprocess_nv_to_n8_non_guanidinium(atoms, bonds, &mut result);
postprocess_n3_to_na_bridge_nd(atoms, &adj, &env, &mut result);
postprocess_cz_to_c2_guanidinium_mixed_n(atoms, bonds, &mut result);
postprocess_tris_n_c_to_c2(atoms, bonds, &mut result);
postprocess_nd_to_nc_ring_no_n_neighbor(atoms, bonds, &mut result);
postprocess_cz_to_cd_if_has_explicit_multibond(atoms, bonds, &mut result);
postprocess_nd_to_nc_if_double_to_cd(atoms, bonds, &mut result);
postprocess_nd_to_nc_only_for_c_s_motifs(atoms, bonds, &mut result);
postprocess_n7_to_nu_if_exocyclic(atoms, bonds, &mut result);
postprocess_n3_to_n_if_attached_to_acyl_carbon(atoms, bonds, &mut result);
postprocess_n3_to_nh_if_conjugated(atoms, bonds, &adj, &mut result);
postprocess_cz_to_ca_if_ring_no_n_neighbors(atoms, bonds, &mut result);
postprocess_cz_to_ca_if_has_aromatic_bond(atoms, bonds, &mut result);
postprocess_c2_to_cf_if_conjugated_to_carbonyl(atoms, bonds, &adj, &mut result);
postprocess_c2_to_ce_if_conjugated_to_carbonyl(atoms, bonds, &adj, &mut result);
postprocess_cc_to_ca_if_has_aromatic_bond(atoms, bonds, &mut result);
postprocess_cd_to_ca_if_has_aromatic_bond(atoms, bonds, &mut result);
postprocess_h_to_hx_alpha_carbon(atoms, &adj, &mut result);
postprocess_n7_to_n6_if_small_ring(atoms, bonds, &adj, &mut result);
postprocess_sy_to_s6_if_nonaryl_sulfonyl(atoms, bonds, &mut result);
postprocess_s6_to_sy_if_primary_sulfonamide(atoms, &adj, &mut result);
postprocess_n3_to_na_if_attached_to_alkenyl_c(atoms, bonds, &adj, &mut result);
postprocess_c2_to_ce_if_vinylic_attached_to_aromatic(atoms, bonds, &adj, &mut result);
postprocess_n1_to_n2_unless_sp_like(atoms, bonds, &adj, &mut result);
result
}
fn assign_types(
defs: &[AtomTypeDef],
atoms: &[AtomGeneric],
env_all: &[AtomEnvData],
bonds: &[BondGeneric],
adj: &[Vec<usize>],
) -> Vec<String> {
let mut types = Vec::with_capacity(atoms.len());
for idx in 0..atoms.len() {
let mut ty = "du".to_owned();
for def in defs {
if def.name == "cd" {
continue;
}
if matches_def(def, idx, atoms, env_all, bonds, adj) {
ty = def.name.clone();
break;
}
}
types.push(ty);
}
types
}
pub fn update_small_mol_params(
atoms: &mut [AtomGeneric],
bonds: &[BondGeneric],
adjacency_list: Option<&[Vec<usize>]>,
gaff2: &ForceFieldParams,
) -> io::Result<ForceFieldParams> {
let defs = AmberDefSet::new()?;
let ff_types = find_ff_types(atoms, bonds, &defs);
for (i, atom) in atoms.iter_mut().enumerate() {
atom.force_field_type = Some(ff_types[i].clone());
}
let charge = infer_charge(atoms, bonds).map_err(|e| io::Error::other(e))?;
for (i, atom) in atoms.iter_mut().enumerate() {
atom.partial_charge = Some(charge[i]);
}
let adj_list = match adjacency_list {
Some(a) => a,
None => &build_adjacency_list(atoms, bonds)
.map_err(|_| io::Error::other("Problem building adjacency list"))?,
};
let params = assign_missing_params(atoms, adj_list, gaff2)?;
Ok(params)
}