use std::{
collections::{HashMap, HashSet},
io,
path::PathBuf,
};
use bio_files::{
AtomGeneric, BondGeneric, ChainGeneric, LipidStandard, MmCif, ResidueEnd, ResidueGeneric,
ResidueType, create_bonds,
md_params::{
AngleBendingParams, BondStretchingParams, ChargeParams, ChargeParamsProtein,
DihedralParams, ForceFieldParams, LjParams, MassParams, NucleotideTemplate,
load_amino_charges, parse_lib_lipid, parse_lib_nucleic_acid, parse_lib_peptide,
},
};
use na_seq::{AminoAcid, AminoAcidGeneral, AminoAcidProtenationVariant, AtomTypeInRes, Element};
use crate::{Dihedral, ParamError, merge_params, populate_hydrogens_dihedrals};
pub type ProtFfChargeMap = HashMap<AminoAcidGeneral, Vec<ChargeParamsProtein>>;
pub type LipidFfChargeMap = HashMap<LipidStandard, Vec<ChargeParams>>;
pub type NucleicAcidFfChargeMap = HashMap<NucleotideTemplate, Vec<ChargeParams>>;
const PARM_19: &str = include_str!("../param_data/parm19.dat"); const FRCMOD_FF19SB: &str = include_str!("../param_data/frcmod.ff19SB"); pub const AMINO_19: &str = include_str!("../param_data/amino19.lib"); const AMINO_NT12: &str = include_str!("../param_data/aminont12.lib"); const AMINO_CT12: &str = include_str!("../param_data/aminoct12.lib");
const GAFF2: &str = include_str!("../param_data/gaff2.dat");
const LIPID_21: &str = include_str!("../param_data/lipid21.dat");
pub const LIPID_21_LIB: &str = include_str!("../param_data/lipid21.lib");
pub const OL24_LIB: &str = include_str!("../param_data/ff-nucleic-OL24.lib");
const OL24_FRCMOD: &str = include_str!("../param_data/ff-nucleic-OL24.frcmod");
pub const RNA_LIB: &str = include_str!("../param_data/RNA.lib");
#[derive(Default, Debug)]
pub struct FfParamSet {
pub peptide: Option<ForceFieldParams>,
pub small_mol: Option<ForceFieldParams>,
pub dna: Option<ForceFieldParams>,
pub rna: Option<ForceFieldParams>,
pub lipids: Option<ForceFieldParams>,
pub carbohydrates: Option<ForceFieldParams>,
pub peptide_ff_q_map: Option<ProtFfChargeMapSet>,
pub lipid_ff_q_map: Option<LipidFfChargeMap>,
pub dna_ff_q_map: Option<NucleicAcidFfChargeMap>,
pub rna_ff_q_map: Option<NucleicAcidFfChargeMap>,
}
#[derive(Clone, Debug, Default)]
pub struct ParamGeneralPaths {
pub peptide: Option<PathBuf>,
pub peptide_mod: Option<PathBuf>,
pub peptide_ff_q: Option<PathBuf>,
pub peptide_ff_q_c: Option<PathBuf>,
pub peptide_ff_q_n: Option<PathBuf>,
pub small_organic: Option<PathBuf>,
pub dna: Option<PathBuf>,
pub dna_mod: Option<PathBuf>,
pub rna: Option<PathBuf>,
pub lipid: Option<PathBuf>,
pub carbohydrate: Option<PathBuf>,
}
impl FfParamSet {
pub fn new(paths: &ParamGeneralPaths) -> io::Result<Self> {
let mut result = FfParamSet::default();
if let Some(p) = &paths.peptide {
let peptide = ForceFieldParams::load_dat(p)?;
if let Some(p_mod) = &paths.peptide_mod {
let frcmod = ForceFieldParams::load_frcmod(p_mod)?;
result.peptide = Some(merge_params(&peptide, &frcmod));
} else {
result.peptide = Some(peptide);
}
}
let mut ff_map = ProtFfChargeMapSet::default();
if let Some(p) = &paths.peptide_ff_q {
ff_map.internal = load_amino_charges(p)?;
}
if let Some(p) = &paths.peptide_ff_q_c {
ff_map.internal = load_amino_charges(p)?;
}
if let Some(p) = &paths.peptide_ff_q_n {
ff_map.internal = load_amino_charges(p)?;
}
result.peptide_ff_q_map = Some(ff_map);
if let Some(p) = &paths.small_organic {
result.small_mol = Some(ForceFieldParams::load_dat(p)?);
}
if let Some(p) = &paths.dna {
let peptide = ForceFieldParams::load_dat(p)?;
if let Some(p_mod) = &paths.dna_mod {
let frcmod = ForceFieldParams::load_frcmod(p_mod)?;
result.dna = Some(merge_params(&peptide, &frcmod));
} else {
result.dna = Some(peptide);
}
}
if let Some(p) = &paths.rna {
result.rna = Some(ForceFieldParams::load_dat(p)?);
}
if let Some(p) = &paths.lipid {
result.lipids = Some(ForceFieldParams::load_dat(p)?);
}
if let Some(p) = &paths.carbohydrate {
result.carbohydrates = Some(ForceFieldParams::load_dat(p)?);
}
Ok(result)
}
pub fn new_amber() -> io::Result<Self> {
let mut result = FfParamSet::default();
let parm19 = ForceFieldParams::from_dat(PARM_19)?;
let peptide_frcmod = ForceFieldParams::from_frcmod(FRCMOD_FF19SB)?;
result.peptide = Some(merge_params(&parm19, &peptide_frcmod));
{
let internal = parse_lib_peptide(AMINO_19)?;
let n_terminus = parse_lib_peptide(AMINO_NT12)?;
let c_terminus = parse_lib_peptide(AMINO_CT12)?;
result.peptide_ff_q_map = Some(ProtFfChargeMapSet {
internal,
n_terminus,
c_terminus,
});
}
let lipid_dat = ForceFieldParams::from_dat(LIPID_21)?;
result.lipids = Some(lipid_dat);
let lipid_charges = parse_lib_lipid(LIPID_21_LIB)?;
result.lipid_ff_q_map = Some(lipid_charges);
result.small_mol = Some(ForceFieldParams::from_dat(GAFF2)?);
let dna_frcmod = ForceFieldParams::from_frcmod(OL24_FRCMOD)?;
result.dna = Some(merge_params(&parm19, &dna_frcmod));
result.rna = Some(parm19.clone());
let dna_charges = parse_lib_nucleic_acid(OL24_LIB)?;
result.dna_ff_q_map = Some(dna_charges);
let rna_charges = parse_lib_nucleic_acid(RNA_LIB)?;
result.rna_ff_q_map = Some(rna_charges);
Ok(result)
}
}
#[derive(Clone, Debug, Default)]
pub(crate) struct ForceFieldParamsIndexed {
pub mass: HashMap<usize, MassParams>,
pub bond_stretching: HashMap<(usize, usize), BondStretchingParams>,
pub bond_rigid_constraints: HashMap<(usize, usize), (f32, f32)>,
pub angle: HashMap<(usize, usize, usize), AngleBendingParams>,
pub dihedral: HashMap<(usize, usize, usize, usize), Vec<DihedralParams>>,
pub improper: HashMap<(usize, usize, usize, usize), Vec<DihedralParams>>,
pub bonds_topology: HashSet<(usize, usize)>,
pub lennard_jones: HashMap<usize, LjParams>,
}
#[derive(Clone, Default, Debug)]
pub struct ProtFfChargeMapSet {
pub internal: ProtFfChargeMap,
pub n_terminus: ProtFfChargeMap,
pub c_terminus: ProtFfChargeMap,
}
pub fn populate_peptide_ff_and_q(
atoms: &mut [AtomGeneric],
residues: &[ResidueGeneric],
ff_type_charge: &ProtFfChargeMapSet,
) -> Result<(), ParamError> {
let mut index_map = HashMap::new();
for (i, atom) in atoms.iter().enumerate() {
index_map.insert(atom.serial_number, i);
}
for res in residues {
for sn in &res.atom_sns {
let atom = match atoms.get_mut(index_map[sn]) {
Some(a) => a,
None => {
return Err(ParamError::new(&format!(
"Unable to populate Charge or FF type for atom {sn}"
)));
}
};
if atom.hetero {
continue;
}
let Some(type_in_res) = &atom.type_in_res else {
return Err(ParamError::new(&format!(
"MD failure: Missing type in residue for atom: {atom}"
)));
};
let ResidueType::AminoAcid(aa) = &res.res_type else {
continue;
};
let aa_gen = AminoAcidGeneral::Standard(*aa);
let charge_map = match res.end {
ResidueEnd::Internal => &ff_type_charge.internal,
ResidueEnd::NTerminus => &ff_type_charge.n_terminus,
ResidueEnd::CTerminus => &ff_type_charge.c_terminus,
ResidueEnd::Hetero => {
return Err(ParamError::new(&format!(
"Error: Encountered hetero atom when parsing amino acid FF types: {atom}"
)));
}
};
let charges = match charge_map.get(&aa_gen) {
Some(c) => c,
None if aa_gen == AminoAcidGeneral::Standard(AminoAcid::His) => charge_map
.get(&AminoAcidGeneral::Variant(AminoAcidProtenationVariant::Hid))
.ok_or_else(|| ParamError::new("Unable to find AA mapping"))?,
None => return Err(ParamError::new("Unable to find AA mapping")),
};
let mut found = false;
for charge in charges {
if charge.type_in_res == *type_in_res {
atom.force_field_type = Some(charge.ff_type.clone());
atom.partial_charge = Some(charge.charge);
found = true;
break;
}
}
if !found {
match type_in_res {
AtomTypeInRes::H(_) => {
if aa_gen == AminoAcidGeneral::Standard(AminoAcid::His) {
let charges = charge_map
.get(&AminoAcidGeneral::Variant(AminoAcidProtenationVariant::Hie))
.ok_or_else(|| {
ParamError::new("Unable to find AA mapping for HIE")
})?;
for charge in charges {
if charge.type_in_res == *type_in_res {
atom.force_field_type = Some(charge.ff_type.clone());
atom.partial_charge = Some(charge.charge);
found = true;
break;
}
}
if found {
break;
}
}
if aa_gen == AminoAcidGeneral::Standard(AminoAcid::Arg)
&& *type_in_res == AtomTypeInRes::H("HH23".to_owned())
{
for charge in charges {
if charge.type_in_res == AtomTypeInRes::H("HH22".to_string()) {
atom.force_field_type = Some("HH22".to_string());
atom.partial_charge = Some(charge.charge);
found = true;
break;
}
}
if found {
break;
}
}
eprintln!(
"Error assigning FF type and q based on atom type in res: Failed to match H type. Res #{}, Atom #{}, {type_in_res}, {aa_gen:?}. \
Falling back to a generic H",
res.serial_number, atom.serial_number,
);
for charge in charges {
if charge.type_in_res == AtomTypeInRes::H("H".to_string())
|| charge.type_in_res == AtomTypeInRes::H("HA".to_string())
{
atom.force_field_type = Some("HB2".to_string());
atom.partial_charge = Some(charge.charge);
found = true;
break;
}
}
}
_ => (),
}
if !found {
eprintln!("Problem populating FF or Q: {}", atom);
continue;
}
}
}
}
Ok(())
}
pub fn prepare_peptide(
atoms: &mut Vec<AtomGeneric>,
bonds: &mut Vec<BondGeneric>,
residues: &mut Vec<ResidueGeneric>,
chains: &mut [ChainGeneric],
ff_map: &ProtFfChargeMapSet,
ph: f32, ) -> Result<Vec<Dihedral>, ParamError> {
let mut dihedrals = Vec::new();
let h_count = atoms
.iter()
.filter(|a| a.element == Element::Hydrogen)
.count();
if h_count < 10 {
dihedrals = populate_hydrogens_dihedrals(atoms, residues, chains, ff_map, ph)?;
}
populate_peptide_ff_and_q(atoms, residues, ff_map)?;
if bonds.is_empty() {
*bonds = create_bonds(atoms);
}
Ok(dihedrals)
}
pub fn prepare_peptide_mmcif(
mol: &mut MmCif,
ff_map: &ProtFfChargeMapSet,
ph: f32, ) -> Result<(Vec<BondGeneric>, Vec<Dihedral>), ParamError> {
let mut dihedrals = Vec::new();
let h_count = mol
.atoms
.iter()
.filter(|a| a.element == Element::Hydrogen)
.count();
if h_count < 10 {
dihedrals = populate_hydrogens_dihedrals(
&mut mol.atoms,
&mut mol.residues,
&mut mol.chains,
ff_map,
ph,
)?;
}
populate_peptide_ff_and_q(&mut mol.atoms, &mol.residues, ff_map)?;
let bonds = create_bonds(&mol.atoms);
Ok((bonds, dihedrals))
}