use super::clash::{AtomClash, detect_clashes, detect_cofactor_clashes, find_min_distance};
use super::overlap::{calculate_volume_overlap, filter_nearby_protein_atoms};
use super::{MAX_VOLUME_OVERLAP_PCT, WATER_RESIDUES};
use crate::core::PdbStructure;
use crate::records::Atom;
use std::collections::HashSet;
#[derive(Debug, Clone)]
pub struct LigandPoseReport {
pub ligand_name: String,
pub ligand_chain_id: String,
pub ligand_residue_seq: i32,
pub ligand_atom_count: usize,
pub min_protein_ligand_distance: f64,
pub clashes: Vec<AtomClash>,
pub has_protein_clash: bool,
pub num_clashes: usize,
pub worst_clash_severity: f64,
pub protein_volume_overlap_pct: f64,
pub cofactor_clashes: Vec<AtomClash>,
pub has_cofactor_clash: bool,
pub num_cofactor_clashes: usize,
pub passes_distance_check: bool,
pub passes_overlap_check: bool,
pub is_geometry_valid: bool,
}
impl LigandPoseReport {
pub fn new(ligand_name: &str, chain_id: &str, residue_seq: i32) -> Self {
Self {
ligand_name: ligand_name.to_string(),
ligand_chain_id: chain_id.to_string(),
ligand_residue_seq: residue_seq,
ligand_atom_count: 0,
min_protein_ligand_distance: f64::INFINITY,
clashes: Vec::new(),
has_protein_clash: false,
num_clashes: 0,
worst_clash_severity: 0.0,
protein_volume_overlap_pct: 0.0,
cofactor_clashes: Vec::new(),
has_cofactor_clash: false,
num_cofactor_clashes: 0,
passes_distance_check: true,
passes_overlap_check: true,
is_geometry_valid: true,
}
}
pub fn summary(&self) -> String {
let status = if self.is_geometry_valid {
"PASS"
} else {
"FAIL"
};
format!(
"{} ({}{}): {} - {} clashes, {:.1}% overlap, min_dist={:.2}Å",
self.ligand_name,
self.ligand_chain_id,
self.ligand_residue_seq,
status,
self.num_clashes,
self.protein_volume_overlap_pct,
self.min_protein_ligand_distance
)
}
}
pub fn compute_ligand_pose_report(
structure: &PdbStructure,
ligand_name: &str,
) -> Option<LigandPoseReport> {
let ligand_atoms: Vec<&Atom> = structure
.atoms
.iter()
.filter(|atom| atom.is_hetatm && atom.residue_name == ligand_name)
.collect();
if ligand_atoms.is_empty() {
return None;
}
let first_lig = ligand_atoms[0];
let mut report = LigandPoseReport::new(ligand_name, &first_lig.chain_id, first_lig.residue_seq);
report.ligand_atom_count = ligand_atoms.len();
let protein_atoms: Vec<&Atom> = structure
.atoms
.iter()
.filter(|atom| !atom.is_hetatm)
.collect();
let cofactor_atoms: Vec<&Atom> = structure
.atoms
.iter()
.filter(|atom| {
atom.is_hetatm
&& atom.residue_name != ligand_name
&& !WATER_RESIDUES.contains(&atom.residue_name.as_str())
})
.collect();
let connected_pairs = build_connectivity_set(structure);
report.min_protein_ligand_distance = find_min_distance(&ligand_atoms, &protein_atoms);
report.clashes = detect_clashes(&ligand_atoms, &protein_atoms, &connected_pairs);
report.num_clashes = report.clashes.len();
report.has_protein_clash = !report.clashes.is_empty();
report.worst_clash_severity = report
.clashes
.iter()
.map(|c| c.severity)
.fold(0.0f64, f64::max);
report.cofactor_clashes =
detect_cofactor_clashes(&ligand_atoms, &cofactor_atoms, &connected_pairs);
report.num_cofactor_clashes = report.cofactor_clashes.len();
report.has_cofactor_clash = !report.cofactor_clashes.is_empty();
let nearby_protein = filter_nearby_protein_atoms(&ligand_atoms, &protein_atoms, 8.0);
let nearby_refs: Vec<&Atom> = nearby_protein.to_vec();
report.protein_volume_overlap_pct = calculate_volume_overlap(&ligand_atoms, &nearby_refs);
report.passes_distance_check = !report.has_protein_clash;
report.passes_overlap_check = report.protein_volume_overlap_pct < MAX_VOLUME_OVERLAP_PCT;
report.is_geometry_valid = report.passes_distance_check && report.passes_overlap_check;
Some(report)
}
pub fn compute_all_ligand_reports(structure: &PdbStructure) -> Vec<LigandPoseReport> {
let mut seen: HashSet<(String, String, i32)> = HashSet::new();
let mut ligand_ids: Vec<(String, String, i32)> = Vec::new();
for atom in &structure.atoms {
if atom.is_hetatm && !WATER_RESIDUES.contains(&atom.residue_name.as_str()) {
let id = (
atom.residue_name.clone(),
atom.chain_id.clone(),
atom.residue_seq,
);
if !seen.contains(&id) {
seen.insert(id.clone());
ligand_ids.push(id);
}
}
}
ligand_ids.sort_by(|a, b| match a.1.cmp(&b.1) {
std::cmp::Ordering::Equal => a.2.cmp(&b.2),
other => other,
});
ligand_ids
.iter()
.filter_map(|(name, _chain, _seq)| compute_ligand_pose_report(structure, name))
.collect()
}
fn build_connectivity_set(structure: &PdbStructure) -> HashSet<(i32, i32)> {
let mut connected: HashSet<(i32, i32)> = HashSet::new();
for conect in &structure.connects {
connected.insert((conect.atom1, conect.atom2));
connected.insert((conect.atom2, conect.atom1));
if let Some(atom3) = conect.atom3 {
connected.insert((conect.atom1, atom3));
connected.insert((atom3, conect.atom1));
}
if let Some(atom4) = conect.atom4 {
connected.insert((conect.atom1, atom4));
connected.insert((atom4, conect.atom1));
}
}
connected
}
#[cfg(test)]
mod tests {
use super::*;
#[allow(clippy::too_many_arguments)]
fn create_test_atom(
serial: i32,
name: &str,
residue_name: &str,
chain_id: &str,
residue_seq: i32,
x: f64,
y: f64,
z: f64,
element: &str,
is_hetatm: bool,
) -> Atom {
Atom {
serial,
name: name.to_string(),
alt_loc: None,
residue_name: residue_name.to_string(),
chain_id: chain_id.to_string(),
residue_seq,
ins_code: None,
is_hetatm,
x,
y,
z,
occupancy: 1.0,
temp_factor: 20.0,
element: element.to_string(),
}
}
fn create_valid_complex() -> PdbStructure {
let mut structure = PdbStructure::new();
structure.atoms.push(create_test_atom(
1, "N", "ALA", "A", 1, 0.0, 0.0, 0.0, "N", false,
));
structure.atoms.push(create_test_atom(
2, "CA", "ALA", "A", 1, 1.5, 0.0, 0.0, "C", false,
));
structure.atoms.push(create_test_atom(
3, "C", "ALA", "A", 1, 3.0, 0.0, 0.0, "C", false,
));
structure.atoms.push(create_test_atom(
4, "O", "ALA", "A", 1, 3.0, 1.2, 0.0, "O", false,
));
structure.atoms.push(create_test_atom(
10, "C1", "LIG", "A", 100, 10.0, 10.0, 10.0, "C", true,
));
structure.atoms.push(create_test_atom(
11, "O1", "LIG", "A", 100, 11.0, 10.0, 10.0, "O", true,
));
structure.atoms.push(create_test_atom(
12, "N1", "LIG", "A", 100, 10.0, 11.0, 10.0, "N", true,
));
structure
}
fn create_clashing_complex() -> PdbStructure {
let mut structure = PdbStructure::new();
structure.atoms.push(create_test_atom(
1, "CA", "ALA", "A", 1, 0.0, 0.0, 0.0, "C", false,
));
structure.atoms.push(create_test_atom(
10, "C1", "BAD", "A", 100, 1.5, 0.0, 0.0, "C", true,
));
structure
}
#[test]
fn test_report_new() {
let report = LigandPoseReport::new("LIG", "A", 100);
assert_eq!(report.ligand_name, "LIG");
assert_eq!(report.ligand_chain_id, "A");
assert_eq!(report.ligand_residue_seq, 100);
assert!(report.is_geometry_valid);
}
#[test]
fn test_compute_valid_pose_report() {
let structure = create_valid_complex();
let report = compute_ligand_pose_report(&structure, "LIG");
assert!(report.is_some());
let report = report.unwrap();
assert_eq!(report.ligand_name, "LIG");
assert_eq!(report.ligand_atom_count, 3);
assert!(!report.has_protein_clash);
assert_eq!(report.num_clashes, 0);
assert!(report.passes_distance_check);
assert!(report.is_geometry_valid);
}
#[test]
fn test_compute_clashing_pose_report() {
let structure = create_clashing_complex();
let report = compute_ligand_pose_report(&structure, "BAD");
assert!(report.is_some());
let report = report.unwrap();
assert!(report.has_protein_clash);
assert!(report.num_clashes > 0);
assert!(!report.passes_distance_check);
assert!(!report.is_geometry_valid);
}
#[test]
fn test_compute_nonexistent_ligand() {
let structure = create_valid_complex();
let report = compute_ligand_pose_report(&structure, "XYZ");
assert!(report.is_none());
}
#[test]
fn test_compute_all_reports() {
let structure = create_valid_complex();
let reports = compute_all_ligand_reports(&structure);
assert_eq!(reports.len(), 1);
assert_eq!(reports[0].ligand_name, "LIG");
}
#[test]
fn test_report_summary() {
let structure = create_valid_complex();
let report = compute_ligand_pose_report(&structure, "LIG").unwrap();
let summary = report.summary();
assert!(summary.contains("LIG"));
assert!(summary.contains("PASS") || summary.contains("FAIL"));
}
#[test]
fn test_connectivity_excludes_bonded() {
let mut structure = create_clashing_complex();
structure.connects.push(crate::records::Conect {
atom1: 1,
atom2: 10,
atom3: None,
atom4: None,
});
let report = compute_ligand_pose_report(&structure, "BAD");
assert!(report.is_some());
let report = report.unwrap();
assert!(!report.has_protein_clash);
assert_eq!(report.num_clashes, 0);
}
#[test]
fn test_water_excluded() {
let mut structure = create_valid_complex();
structure.atoms.push(create_test_atom(
20, "O", "HOH", "A", 200, 5.0, 5.0, 5.0, "O", true,
));
let reports = compute_all_ligand_reports(&structure);
assert_eq!(reports.len(), 1);
assert_eq!(reports[0].ligand_name, "LIG");
}
}