use crate::PdbStructure;
use crate::records::Atom;
use std::collections::{HashMap, HashSet};
type ResidueContactInfo = (f64, usize, String);
#[derive(Debug, Clone)]
pub struct ContactResidue {
pub chain_id: String,
pub residue_seq: i32,
pub residue_name: String,
pub ins_code: Option<char>,
pub min_distance: f64,
pub num_contacts: usize,
}
#[derive(Debug, Clone)]
pub struct ProteinLigandHBond {
pub protein_chain: String,
pub protein_resid: i32,
pub protein_resname: String,
pub protein_atom: String,
pub ligand_name: String,
pub ligand_atom: String,
pub distance: f64,
pub is_protein_donor: bool,
}
#[derive(Debug, Clone)]
pub struct SaltBridge {
pub protein_chain: String,
pub protein_resid: i32,
pub protein_resname: String,
pub protein_atom: String,
pub ligand_name: String,
pub ligand_atom: String,
pub distance: f64,
pub protein_positive: bool,
}
#[derive(Debug, Clone)]
pub struct HydrophobicContact {
pub protein_chain: String,
pub protein_resid: i32,
pub protein_resname: String,
pub protein_atom: String,
pub ligand_name: String,
pub ligand_atom: String,
pub distance: f64,
}
#[derive(Debug, Clone)]
pub struct LigandInteractionProfile {
pub ligand_name: String,
pub ligand_chain: String,
pub ligand_resid: i32,
pub contact_residues: Vec<ContactResidue>,
pub hydrogen_bonds: Vec<ProteinLigandHBond>,
pub salt_bridges: Vec<SaltBridge>,
pub hydrophobic_contacts: Vec<HydrophobicContact>,
}
impl LigandInteractionProfile {
pub fn total_interactions(&self) -> usize {
self.hydrogen_bonds.len() + self.salt_bridges.len() + self.hydrophobic_contacts.len()
}
pub fn has_interactions(&self) -> bool {
!self.hydrogen_bonds.is_empty()
|| !self.salt_bridges.is_empty()
|| !self.hydrophobic_contacts.is_empty()
}
}
#[derive(Debug, Clone)]
pub struct BindingSite {
pub ligand_name: String,
pub ligand_chain: String,
pub ligand_resid: i32,
pub distance_cutoff: f64,
pub contact_residues: Vec<ContactResidue>,
}
impl BindingSite {
pub fn num_residues(&self) -> usize {
self.contact_residues.len()
}
pub fn residues_by_distance(&self) -> Vec<&ContactResidue> {
let mut sorted: Vec<_> = self.contact_residues.iter().collect();
sorted.sort_by(|a, b| a.min_distance.partial_cmp(&b.min_distance).unwrap());
sorted
}
}
const HBOND_DISTANCE_CUTOFF: f64 = 3.5;
const SALT_BRIDGE_CUTOFF: f64 = 4.0;
const HYDROPHOBIC_CUTOFF: f64 = 4.0;
const HBOND_DONORS: &[&str] = &[
"N", "NE", "NH1", "NH2", "NZ", "ND1", "ND2", "NE1", "NE2", "OG", "OG1", "OH",
];
const HBOND_ACCEPTORS: &[&str] = &[
"O", "OD1", "OD2", "OE1", "OE2", "OG", "OG1", "OH", "ND1", "NE2",
];
const POSITIVE_ATOMS: &[(&str, &str)] = &[
("LYS", "NZ"),
("ARG", "NH1"),
("ARG", "NH2"),
("ARG", "NE"),
("HIS", "ND1"),
("HIS", "NE2"),
];
const NEGATIVE_ATOMS: &[(&str, &str)] = &[
("ASP", "OD1"),
("ASP", "OD2"),
("GLU", "OE1"),
("GLU", "OE2"),
];
const HYDROPHOBIC_RESIDUES: &[&str] = &["ALA", "VAL", "LEU", "ILE", "MET", "PHE", "TRP", "PRO"];
fn atom_distance(a1: &Atom, a2: &Atom) -> f64 {
let dx = a1.x - a2.x;
let dy = a1.y - a2.y;
let dz = a1.z - a2.z;
(dx * dx + dy * dy + dz * dz).sqrt()
}
fn is_hbond_donor(atom: &Atom) -> bool {
HBOND_DONORS.contains(&atom.name.trim())
}
fn is_hbond_acceptor(atom: &Atom) -> bool {
HBOND_ACCEPTORS.contains(&atom.name.trim())
}
fn is_positive_atom(atom: &Atom) -> bool {
POSITIVE_ATOMS
.iter()
.any(|(res, atm)| atom.residue_name == *res && atom.name.trim() == *atm)
}
fn is_negative_atom(atom: &Atom) -> bool {
NEGATIVE_ATOMS
.iter()
.any(|(res, atm)| atom.residue_name == *res && atom.name.trim() == *atm)
}
fn is_hydrophobic_atom(atom: &Atom) -> bool {
if atom.element.trim() == "C" && HYDROPHOBIC_RESIDUES.contains(&atom.residue_name.as_str()) {
return true;
}
if atom.element.trim() == "C" && !["CA", "C"].contains(&atom.name.trim()) {
return true;
}
false
}
fn ligand_might_donate(atom: &Atom) -> bool {
let elem = atom.element.trim();
elem == "N" || elem == "O"
}
fn ligand_might_accept(atom: &Atom) -> bool {
let elem = atom.element.trim();
elem == "N" || elem == "O" || elem == "S"
}
fn ligand_might_be_charged(atom: &Atom) -> bool {
let elem = atom.element.trim();
let name = atom.name.trim();
(elem == "N" && (name.contains("+") || name.starts_with("N")))
|| (elem == "O" && (name.contains("-") || name.starts_with("O")))
}
fn ligand_is_hydrophobic(atom: &Atom) -> bool {
atom.element.trim() == "C"
}
impl PdbStructure {
pub fn binding_site(&self, ligand_name: &str, distance_cutoff: f64) -> Option<BindingSite> {
let ligand_atoms: Vec<&Atom> = self
.atoms
.iter()
.filter(|a| a.residue_name == ligand_name)
.collect();
if ligand_atoms.is_empty() {
return None;
}
let ligand_chain = ligand_atoms[0].chain_id.clone();
let ligand_resid = ligand_atoms[0].residue_seq;
let protein_atoms: Vec<&Atom> = self
.atoms
.iter()
.filter(|a| is_standard_amino_acid(&a.residue_name))
.collect();
let mut residue_contacts: HashMap<(String, i32, Option<char>), ResidueContactInfo> =
HashMap::new();
for prot_atom in &protein_atoms {
for lig_atom in &ligand_atoms {
let dist = atom_distance(prot_atom, lig_atom);
if dist <= distance_cutoff {
let key = (
prot_atom.chain_id.clone(),
prot_atom.residue_seq,
prot_atom.ins_code,
);
let entry = residue_contacts.entry(key).or_insert((
f64::MAX,
0,
prot_atom.residue_name.clone(),
));
if dist < entry.0 {
entry.0 = dist;
}
entry.1 += 1;
}
}
}
let contact_residues: Vec<ContactResidue> = residue_contacts
.into_iter()
.map(
|((chain_id, residue_seq, ins_code), (min_dist, contacts, res_name))| {
ContactResidue {
chain_id,
residue_seq,
residue_name: res_name,
ins_code,
min_distance: min_dist,
num_contacts: contacts,
}
},
)
.collect();
Some(BindingSite {
ligand_name: ligand_name.to_string(),
ligand_chain,
ligand_resid,
distance_cutoff,
contact_residues,
})
}
pub fn ligand_interactions(&self, ligand_name: &str) -> Option<LigandInteractionProfile> {
let ligand_atoms: Vec<&Atom> = self
.atoms
.iter()
.filter(|a| a.residue_name == ligand_name)
.collect();
if ligand_atoms.is_empty() {
return None;
}
let ligand_chain = ligand_atoms[0].chain_id.clone();
let ligand_resid = ligand_atoms[0].residue_seq;
let binding_site = self.binding_site(ligand_name, 6.0)?;
let protein_atoms: Vec<&Atom> = self
.atoms
.iter()
.filter(|a| is_standard_amino_acid(&a.residue_name))
.collect();
let mut hydrogen_bonds = Vec::new();
let mut salt_bridges = Vec::new();
let mut hydrophobic_contacts = Vec::new();
for prot_atom in &protein_atoms {
for lig_atom in &ligand_atoms {
let dist = atom_distance(prot_atom, lig_atom);
if dist <= HBOND_DISTANCE_CUTOFF {
if is_hbond_donor(prot_atom) && ligand_might_accept(lig_atom) {
hydrogen_bonds.push(ProteinLigandHBond {
protein_chain: prot_atom.chain_id.clone(),
protein_resid: prot_atom.residue_seq,
protein_resname: prot_atom.residue_name.clone(),
protein_atom: prot_atom.name.clone(),
ligand_name: ligand_name.to_string(),
ligand_atom: lig_atom.name.clone(),
distance: dist,
is_protein_donor: true,
});
}
if is_hbond_acceptor(prot_atom) && ligand_might_donate(lig_atom) {
hydrogen_bonds.push(ProteinLigandHBond {
protein_chain: prot_atom.chain_id.clone(),
protein_resid: prot_atom.residue_seq,
protein_resname: prot_atom.residue_name.clone(),
protein_atom: prot_atom.name.clone(),
ligand_name: ligand_name.to_string(),
ligand_atom: lig_atom.name.clone(),
distance: dist,
is_protein_donor: false,
});
}
}
if dist <= SALT_BRIDGE_CUTOFF {
let prot_positive = is_positive_atom(prot_atom);
let prot_negative = is_negative_atom(prot_atom);
let lig_charged = ligand_might_be_charged(lig_atom);
if (prot_positive || prot_negative) && lig_charged {
salt_bridges.push(SaltBridge {
protein_chain: prot_atom.chain_id.clone(),
protein_resid: prot_atom.residue_seq,
protein_resname: prot_atom.residue_name.clone(),
protein_atom: prot_atom.name.clone(),
ligand_name: ligand_name.to_string(),
ligand_atom: lig_atom.name.clone(),
distance: dist,
protein_positive: prot_positive,
});
}
}
if dist <= HYDROPHOBIC_CUTOFF
&& is_hydrophobic_atom(prot_atom)
&& ligand_is_hydrophobic(lig_atom)
{
hydrophobic_contacts.push(HydrophobicContact {
protein_chain: prot_atom.chain_id.clone(),
protein_resid: prot_atom.residue_seq,
protein_resname: prot_atom.residue_name.clone(),
protein_atom: prot_atom.name.clone(),
ligand_name: ligand_name.to_string(),
ligand_atom: lig_atom.name.clone(),
distance: dist,
});
}
}
}
Some(LigandInteractionProfile {
ligand_name: ligand_name.to_string(),
ligand_chain,
ligand_resid,
contact_residues: binding_site.contact_residues,
hydrogen_bonds,
salt_bridges,
hydrophobic_contacts,
})
}
pub fn all_ligand_interactions(&self) -> Vec<LigandInteractionProfile> {
let ligand_names: HashSet<String> = self
.atoms
.iter()
.filter(|a| !is_standard_amino_acid(&a.residue_name))
.filter(|a| !["HOH", "WAT", "H2O", "DOD"].contains(&a.residue_name.as_str()))
.map(|a| a.residue_name.clone())
.collect();
ligand_names
.into_iter()
.filter_map(|name| self.ligand_interactions(&name))
.filter(|profile| !profile.contact_residues.is_empty())
.collect()
}
}
fn is_standard_amino_acid(name: &str) -> bool {
const AMINO_ACIDS: &[&str] = &[
"ALA", "ARG", "ASN", "ASP", "CYS", "GLN", "GLU", "GLY", "HIS", "ILE", "LEU", "LYS", "MET",
"PHE", "PRO", "SER", "THR", "TRP", "TYR", "VAL", "SEC", "PYL",
];
AMINO_ACIDS.contains(&name)
}
#[cfg(test)]
mod tests {
use super::*;
#[allow(clippy::too_many_arguments)]
fn make_atom(
chain: &str,
resid: i32,
res_name: &str,
name: &str,
element: &str,
x: f64,
y: f64,
z: f64,
) -> Atom {
Atom {
serial: resid,
name: name.to_string(),
alt_loc: None,
residue_name: res_name.to_string(),
chain_id: chain.to_string(),
residue_seq: resid,
ins_code: None,
is_hetatm: false,
x,
y,
z,
occupancy: 1.0,
temp_factor: 20.0,
element: element.to_string(),
}
}
#[test]
fn test_is_standard_amino_acid() {
assert!(is_standard_amino_acid("ALA"));
assert!(is_standard_amino_acid("GLY"));
assert!(is_standard_amino_acid("TRP"));
assert!(!is_standard_amino_acid("ATP"));
assert!(!is_standard_amino_acid("HOH"));
}
#[test]
fn test_atom_distance() {
let a1 = make_atom("A", 1, "ALA", "CA", "C", 0.0, 0.0, 0.0);
let a2 = make_atom("A", 2, "GLY", "CA", "C", 3.0, 4.0, 0.0);
assert!((atom_distance(&a1, &a2) - 5.0).abs() < 0.001);
}
#[test]
fn test_is_hbond_donor() {
let donor = make_atom("A", 1, "ALA", "N", "N", 0.0, 0.0, 0.0);
assert!(is_hbond_donor(&donor));
let non_donor = make_atom("A", 1, "ALA", "CA", "C", 0.0, 0.0, 0.0);
assert!(!is_hbond_donor(&non_donor));
}
#[test]
fn test_is_positive_atom() {
let lys_nz = make_atom("A", 1, "LYS", "NZ", "N", 0.0, 0.0, 0.0);
assert!(is_positive_atom(&lys_nz));
let ala_n = make_atom("A", 1, "ALA", "N", "N", 0.0, 0.0, 0.0);
assert!(!is_positive_atom(&ala_n));
}
#[test]
fn test_is_negative_atom() {
let asp_od1 = make_atom("A", 1, "ASP", "OD1", "O", 0.0, 0.0, 0.0);
assert!(is_negative_atom(&asp_od1));
let ala_o = make_atom("A", 1, "ALA", "O", "O", 0.0, 0.0, 0.0);
assert!(!is_negative_atom(&ala_o));
}
#[test]
fn test_binding_site_not_found() {
let structure = PdbStructure::new();
assert!(structure.binding_site("ATP", 5.0).is_none());
}
#[test]
fn test_binding_site_detection() {
let mut structure = PdbStructure::new();
structure
.atoms
.push(make_atom("A", 1, "ALA", "CA", "C", 0.0, 0.0, 0.0));
structure
.atoms
.push(make_atom("A", 2, "GLY", "CA", "C", 3.0, 0.0, 0.0));
structure
.atoms
.push(make_atom("A", 3, "VAL", "CA", "C", 20.0, 0.0, 0.0));
structure
.atoms
.push(make_atom("A", 100, "ATP", "C1", "C", 2.0, 0.0, 0.0));
let site = structure.binding_site("ATP", 5.0);
assert!(site.is_some());
let site = site.unwrap();
assert_eq!(site.ligand_name, "ATP");
assert!(!site.contact_residues.is_empty());
let res_ids: Vec<i32> = site
.contact_residues
.iter()
.map(|r| r.residue_seq)
.collect();
assert!(res_ids.contains(&1));
assert!(res_ids.contains(&2));
assert!(!res_ids.contains(&3));
}
#[test]
fn test_ligand_interaction_profile() {
let profile = LigandInteractionProfile {
ligand_name: "ATP".to_string(),
ligand_chain: "A".to_string(),
ligand_resid: 100,
contact_residues: vec![],
hydrogen_bonds: vec![ProteinLigandHBond {
protein_chain: "A".to_string(),
protein_resid: 1,
protein_resname: "SER".to_string(),
protein_atom: "OG".to_string(),
ligand_name: "ATP".to_string(),
ligand_atom: "O1".to_string(),
distance: 2.8,
is_protein_donor: true,
}],
salt_bridges: vec![],
hydrophobic_contacts: vec![],
};
assert_eq!(profile.total_interactions(), 1);
assert!(profile.has_interactions());
}
#[test]
fn test_binding_site_by_distance() {
let site = BindingSite {
ligand_name: "ATP".to_string(),
ligand_chain: "A".to_string(),
ligand_resid: 100,
distance_cutoff: 5.0,
contact_residues: vec![
ContactResidue {
chain_id: "A".to_string(),
residue_seq: 1,
residue_name: "ALA".to_string(),
ins_code: None,
min_distance: 3.5,
num_contacts: 2,
},
ContactResidue {
chain_id: "A".to_string(),
residue_seq: 2,
residue_name: "GLY".to_string(),
ins_code: None,
min_distance: 2.5,
num_contacts: 1,
},
],
};
let sorted = site.residues_by_distance();
assert_eq!(sorted[0].residue_seq, 2); assert_eq!(sorted[1].residue_seq, 1);
}
}