#![allow(clippy::excessive_precision)]
use std::{
collections::HashMap,
fs::File,
io::{self, ErrorKind, Read},
path::Path,
str::FromStr,
};
use na_seq::{AminoAcidGeneral, AtomTypeInRes, Nucleotide};
use crate::{LipidStandard, ResidueEnd};
#[derive(Debug, Clone)]
pub struct MassParams {
pub atom_type: String,
pub mass: f32,
pub comment: Option<String>,
}
impl MassParams {
pub fn from_line(line: &str) -> io::Result<Self> {
let cols: Vec<_> = line.split_whitespace().collect();
if cols.len() < 2 {
return Err(io::Error::new(
ErrorKind::InvalidData,
format!("Not enough cols (Mass) when parsing line {line}"),
));
}
let atom_type = cols[0].to_string();
let mass = parse_float(cols[1])?;
let mut comment = None;
if cols.len() >= 4 {
comment = Some(cols[3..].join(" "));
}
Ok(Self {
atom_type,
mass,
comment,
})
}
}
#[derive(Debug, Clone)]
pub struct BondStretchingParams {
pub atom_types: (String, String),
pub k_b: f32,
pub r_0: f32,
pub comment: Option<String>,
}
impl BondStretchingParams {
pub fn from_line(line: &str) -> io::Result<Self> {
let cols: Vec<_> = line.split_whitespace().collect();
if cols.len() < 3 {
return Err(io::Error::new(
ErrorKind::InvalidData,
format!("Not enough cols (Bond) when parsing line {line}"),
));
}
let (atom_types, col1_i) = get_atom_types(&cols);
let atom_types = (atom_types[0].to_owned(), atom_types[1].to_owned());
let k = parse_float(cols[col1_i])?;
let r_0 = parse_float(cols[col1_i + 1])?;
let mut comment = None;
if cols.len() >= col1_i + 2 {
comment = Some(cols[col1_i + 2..].join(" "));
}
Ok(Self {
atom_types,
k_b: k,
r_0,
comment,
})
}
}
#[derive(Debug, Clone)]
pub struct AngleBendingParams {
pub atom_types: (String, String, String),
pub k: f32,
pub theta_0: f32,
pub comment: Option<String>,
}
impl AngleBendingParams {
pub fn from_line(line: &str) -> io::Result<Self> {
let cols: Vec<_> = line.split_whitespace().collect();
if cols.len() < 3 {
return Err(io::Error::new(
ErrorKind::InvalidData,
format!("Not enough cols (Angle) when parsing line {line}"),
));
}
let (atom_types, col1_i) = get_atom_types(&cols);
let atom_types = (
atom_types[0].to_owned(),
atom_types[1].to_owned(),
atom_types[2].to_owned(),
);
let k = parse_float(cols[col1_i])?;
let angle = parse_float(cols[col1_i + 1])?.to_radians();
let mut comment = None;
if cols.len() >= col1_i + 2 {
comment = Some(cols[col1_i + 2..].join(" "));
}
Ok(Self {
atom_types,
k,
theta_0: angle,
comment,
})
}
}
#[derive(Debug, Clone, Default)]
pub struct DihedralParams {
pub atom_types: (String, String, String, String),
pub divider: u8,
pub barrier_height: f32,
pub phase: f32,
pub periodicity: u8,
pub comment: Option<String>,
}
impl DihedralParams {
pub fn from_line(line: &str) -> io::Result<(Self, bool)> {
let cols: Vec<_> = line.split_whitespace().collect();
if cols.len() < 4 {
return Err(io::Error::new(
ErrorKind::InvalidData,
format!("Not enough cols (Dihedral) when parsing line {line}"),
));
}
let (atom_types, mut col1_i) = get_atom_types(&cols);
let atom_types = (
atom_types[0].to_owned(),
atom_types[1].to_owned(),
atom_types[2].to_owned(),
atom_types[3].to_owned(),
);
let mut improper = true;
let mut integer_divisor = 1;
if !cols[col1_i].contains(".") {
integer_divisor = parse_float(cols[col1_i])? as u8;
col1_i += 1;
improper = false;
}
let barrier_height_vn = parse_float(cols[col1_i])?;
let phase = parse_float(cols[col1_i + 1])?.to_radians();
let periodicity = parse_float(cols[col1_i + 2])?.abs() as u8;
let mut comment = None;
if cols.len() >= col1_i + 3 {
comment = Some(cols[col1_i + 3..].join(" "));
}
Ok((
Self {
atom_types,
divider: integer_divisor,
barrier_height: barrier_height_vn,
phase,
periodicity,
comment,
},
improper,
))
}
}
#[derive(Debug, Clone)]
pub struct LjParams {
pub atom_type: String,
pub sigma: f32,
pub eps: f32,
}
impl LjParams {
pub fn from_line(line: &str) -> io::Result<Self> {
const SIGMA_FACTOR: f32 = 2. / 1.122_462_048_309_373;
let cols: Vec<_> = line.split_whitespace().collect();
if cols.len() < 3 {
return Err(io::Error::new(
ErrorKind::InvalidData,
format!("Not enough cols (Lennard Jones) when parsing line {line}"),
));
}
let atom_type = cols[0].to_string();
let r_star = parse_float(cols[1])?;
let eps = parse_float(cols[2])?;
let sigma = r_star * SIGMA_FACTOR;
Ok(Self {
atom_type,
sigma,
eps,
})
}
}
#[derive(Clone, Debug)]
pub struct ChargeParamsProtein {
pub type_in_res: AtomTypeInRes,
pub ff_type: String,
pub charge: f32,
}
#[derive(Clone, Debug)]
pub struct ChargeParams {
pub type_in_res: String,
pub ff_type: String,
pub charge: f32,
}
#[derive(Debug, Default)]
pub struct ForceFieldParamsVec {
pub bond: Vec<BondStretchingParams>,
pub angle: Vec<AngleBendingParams>,
pub dihedral: Vec<DihedralParams>,
pub improper: Vec<DihedralParams>,
pub mass: Vec<MassParams>,
pub lennard_jones: Vec<LjParams>,
pub remarks: Vec<String>,
}
#[derive(Clone, Debug, Default)]
pub struct ForceFieldParams {
pub bond: HashMap<(String, String), BondStretchingParams>,
pub angle: HashMap<(String, String, String), AngleBendingParams>,
pub dihedral: HashMap<(String, String, String, String), Vec<DihedralParams>>,
pub improper: HashMap<(String, String, String, String), Vec<DihedralParams>>,
pub mass: HashMap<String, MassParams>,
pub lennard_jones: HashMap<String, LjParams>,
}
impl ForceFieldParams {
pub fn new(params: &ForceFieldParamsVec) -> Self {
let mut result = Self::default();
for val in ¶ms.mass {
result.mass.insert(val.atom_type.clone(), val.clone());
}
for val in ¶ms.bond {
result.bond.insert(val.atom_types.clone(), val.clone());
}
for val in ¶ms.angle {
result.angle.insert(val.atom_types.clone(), val.clone());
}
for val in ¶ms.dihedral {
result
.dihedral
.entry(val.atom_types.clone())
.and_modify(|v| v.push(val.clone()))
.or_insert_with(|| vec![val.clone()]);
}
for val in ¶ms.improper {
result
.improper
.entry(val.atom_types.clone())
.and_modify(|v| v.push(val.clone()))
.or_insert_with(|| vec![val.clone()]);
}
for val in ¶ms.lennard_jones {
result
.lennard_jones
.insert(val.atom_type.clone(), val.clone());
}
result
}
pub fn merge_with(&self, other: &Self) -> Self {
let mut result = other.clone();
result.mass.extend(self.mass.clone());
result.lennard_jones.extend(self.lennard_jones.clone());
result.bond.extend(self.bond.clone());
result.angle.extend(self.angle.clone());
result.dihedral.extend(self.dihedral.clone());
result.improper.extend(self.improper.clone());
result
}
pub fn from_frcmod(text: &str) -> io::Result<Self> {
Ok(Self::new(&ForceFieldParamsVec::from_frcmod(text)?))
}
pub fn from_dat(text: &str) -> io::Result<Self> {
Ok(Self::new(&ForceFieldParamsVec::from_dat(text)?))
}
pub fn load_frcmod(path: &Path) -> io::Result<Self> {
Ok(Self::new(&ForceFieldParamsVec::load_frcmod(path)?))
}
pub fn load_dat(path: &Path) -> io::Result<Self> {
Ok(Self::new(&ForceFieldParamsVec::load_dat(path)?))
}
fn wildcard_variants(atom: &str) -> Vec<String> {
let mut out = Vec::new();
out.push(atom.to_string());
if !atom.is_empty() {
let first = atom.chars().next().unwrap();
if first.is_ascii_alphabetic() {
out.push(format!("{}*", first));
}
}
out.push("X".to_string());
out
}
pub fn get_bond(
&self,
atom_types: &(String, String),
wildcard_allowed: bool,
) -> Option<&BondStretchingParams> {
let (a_variants, b_variants) = if wildcard_allowed {
(
Self::wildcard_variants(&atom_types.0),
Self::wildcard_variants(&atom_types.1),
)
} else {
(
vec![atom_types.0.to_string()],
vec![atom_types.1.to_string()],
)
};
for a in &a_variants {
for b in &b_variants {
for &(k0, k1) in &[(a, b), (b, a)] {
let key = (k0.clone(), k1.clone());
if let Some(hit) = self.bond.get(&key) {
return Some(hit);
}
}
}
}
None
}
pub fn get_valence_angle(
&self,
atom_types: &(String, String, String),
wildcard_allowed: bool,
) -> Option<&AngleBendingParams> {
let (a_variants, b_variants, c_variants) = if wildcard_allowed {
(
Self::wildcard_variants(&atom_types.0),
Self::wildcard_variants(&atom_types.1),
Self::wildcard_variants(&atom_types.2),
)
} else {
(
vec![atom_types.0.to_string()],
vec![atom_types.1.to_string()],
vec![atom_types.2.to_string()],
)
};
for a in &a_variants {
for b in &b_variants {
for c in &c_variants {
for &(k0, k1, k2) in &[(a, b, c), (c, b, a)] {
let key = (k0.clone(), k1.clone(), k2.clone());
if let Some(hit) = self.angle.get(&key) {
return Some(hit);
}
}
}
}
}
None
}
pub fn get_dihedral(
&self,
atom_types: &(String, String, String, String),
proper: bool,
wildcard_allowed: bool,
) -> Option<&Vec<DihedralParams>> {
let (a_variants, b_variants, c_variants, d_variants) = if wildcard_allowed {
(
Self::wildcard_variants(&atom_types.0),
Self::wildcard_variants(&atom_types.1),
Self::wildcard_variants(&atom_types.2),
Self::wildcard_variants(&atom_types.3),
)
} else {
(
vec![atom_types.0.to_string()],
vec![atom_types.1.to_string()],
vec![atom_types.2.to_string()],
vec![atom_types.3.to_string()],
)
};
for a in &a_variants {
for b in &b_variants {
for c in &c_variants {
for d in &d_variants {
for &(k0, k1, k2, k3) in &[(a, b, c, d), (d, c, b, a)] {
let key = (k0.clone(), k1.clone(), k2.clone(), k3.clone());
let hit = if proper {
self.dihedral.get(&key)
} else {
self.improper.get(&key)
};
if let Some(h) = hit {
return Some(h);
}
}
}
}
}
}
None
}
}
pub(crate) fn get_atom_types(cols: &[&str]) -> (Vec<String>, usize) {
let mut atom_types = cols[0].to_string();
let mut col_1_i = 1;
for col in &cols[1..] {
if col.parse::<f32>().is_ok() || col_1_i >= 4 {
break; }
if col.starts_with("-") {
atom_types += col;
col_1_i += 1;
}
}
let atom_types: Vec<_> = atom_types.split("-").map(|v| v.to_owned()).collect();
(atom_types, col_1_i)
}
fn parse_float(v: &str) -> io::Result<f32> {
v.parse()
.map_err(|_| io::Error::new(ErrorKind::InvalidData, format!("Invalid float: {v}")))
}
pub fn parse_lib_peptide(
text: &str,
) -> io::Result<HashMap<AminoAcidGeneral, Vec<ChargeParamsProtein>>> {
let parsed = parse_lib(text)?;
let mut result = HashMap::new();
for (tag, v) in parsed {
let Ok(aa) = AminoAcidGeneral::from_str(&tag) else {
return Err(io::Error::new(
ErrorKind::InvalidData,
"Unable to parse amino acid from lib",
));
};
let mut v_prot = Vec::new();
for param in v {
v_prot.push(ChargeParamsProtein {
type_in_res: AtomTypeInRes::from_str(¶m.type_in_res)?,
ff_type: param.ff_type,
charge: param.charge,
});
}
result.insert(aa, v_prot);
}
Ok(result)
}
pub fn parse_lib_lipid(text: &str) -> io::Result<HashMap<LipidStandard, Vec<ChargeParams>>> {
let parsed = parse_lib(text)?;
let mut result = HashMap::new();
for (tag, v) in parsed {
let Ok(s) = LipidStandard::from_str(&tag) else {
return Err(io::Error::new(
ErrorKind::InvalidData,
format!("Unable to parse lipid from lib: {tag}"),
));
};
result.insert(s, v);
}
Ok(result)
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct NucleotideTemplate {
pub nt: Nucleotide,
pub end: ResidueEnd,
}
impl FromStr for NucleotideTemplate {
type Err = io::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let s = s.to_owned().replace("D", "");
let first = match s.chars().next() {
Some(c) => c,
None => {
return Err(io::Error::new(
ErrorKind::InvalidInput,
format!("Nucleotide in template too short: {s}"),
));
}
};
let nt = match first {
'A' => Nucleotide::A,
'C' => Nucleotide::C,
'T' | 'U' => Nucleotide::T,
'G' => Nucleotide::G,
_ => {
return Err(io::Error::new(
ErrorKind::InvalidInput,
format!("Unrecognized nucleotide in template: {s}"),
));
}
};
let end = if s.ends_with('5') {
ResidueEnd::NTerminus
} else if s.ends_with('3') {
ResidueEnd::CTerminus
} else if s.ends_with('N') {
ResidueEnd::Hetero } else {
ResidueEnd::Internal
};
Ok(Self { nt, end })
}
}
pub fn parse_lib_nucleic_acid(
text: &str,
) -> io::Result<HashMap<NucleotideTemplate, Vec<ChargeParams>>> {
let parsed = parse_lib(text)?;
let mut result = HashMap::new();
for (tag, v) in parsed {
if &tag == "OHE" {
continue;
}
let Ok(s) = NucleotideTemplate::from_str(&tag) else {
return Err(io::Error::new(
ErrorKind::InvalidData,
format!("Unable to parse nucleotide from lib: {tag}"),
));
};
result.insert(s, v);
}
Ok(result)
}
pub fn parse_lib(text: &str) -> io::Result<HashMap<String, Vec<ChargeParams>>> {
enum Mode {
Scan, InAtoms { res: String }, }
let mut state = Mode::Scan;
let mut result: HashMap<String, Vec<ChargeParams>> = HashMap::new();
let lines: Vec<&str> = text.lines().collect();
for line in lines {
let ltrim = line.trim_start();
if let Some(rest) = ltrim.strip_prefix("!entry.") {
state = Mode::Scan;
if let Some((tag, tail)) = rest.split_once('.') {
if tail.starts_with("unit.atoms table") {
state = Mode::InAtoms {
res: tag.to_owned(),
};
result.entry(tag.to_owned()).or_default(); }
}
continue;
}
if let Mode::InAtoms { ref res } = state {
if ltrim.is_empty() || ltrim.starts_with('!') {
state = Mode::Scan;
continue;
}
let mut tokens = Vec::<&str>::new();
let mut in_quote = false;
let mut start = 0usize;
let bytes = ltrim.as_bytes();
for (i, &b) in bytes.iter().enumerate() {
match b {
b'"' => in_quote = !in_quote,
b' ' | b'\t' if !in_quote => {
if start < i {
tokens.push(<rim[start..i]);
}
start = i + 1;
}
_ => {}
}
}
if start < ltrim.len() {
tokens.push(<rim[start..]);
}
let type_in_res = tokens[0].trim_matches('"').to_string();
let ff_type = tokens[1].trim_matches('"').to_string();
let charge = parse_float(tokens.last().unwrap())?;
result.get_mut(res).unwrap().push(ChargeParams {
type_in_res,
ff_type,
charge,
});
}
}
Ok(result)
}
pub fn load_amino_charges(
path: &Path,
) -> io::Result<HashMap<AminoAcidGeneral, Vec<ChargeParamsProtein>>> {
let mut file = File::open(path)?;
let mut buffer = Vec::new();
file.read_to_end(&mut buffer)?;
let data_str: String = String::from_utf8(buffer)
.map_err(|_| io::Error::new(ErrorKind::InvalidData, "Invalid UTF8"))?;
parse_lib_peptide(&data_str)
}