use crate::core::PdbStructure;
use std::collections::HashMap;
pub const HYDROPHOBIC_RESIDUES: &[&str] = &[
"ALA", "VAL", "ILE", "LEU", "MET", "PHE", "TRP", "PRO", ];
pub const POLAR_RESIDUES: &[&str] = &[
"SER", "THR", "CYS", "TYR", "ASN", "GLN", ];
pub const CHARGED_RESIDUES: &[&str] = &[
"ASP", "GLU", "LYS", "ARG", "HIS", ];
pub const AROMATIC_RESIDUES: &[&str] = &[
"PHE", "TYR", "TRP", ];
pub const SMALL_RESIDUES: &[&str] = &[
"GLY", "ALA", "SER", "PRO", ];
impl PdbStructure {
pub fn aa_composition(&self) -> HashMap<String, f64> {
let mut counts: HashMap<String, usize> = HashMap::new();
let mut total = 0usize;
for atom in &self.atoms {
if atom.name.trim() == "CA" {
*counts.entry(atom.residue_name.clone()).or_insert(0) += 1;
total += 1;
}
}
let mut composition = HashMap::new();
if total > 0 {
for (residue, count) in counts {
composition.insert(residue, count as f64 / total as f64);
}
}
composition
}
pub fn glycine_ratio(&self) -> f64 {
self.aa_composition().get("GLY").copied().unwrap_or(0.0)
}
pub fn hydrophobic_ratio(&self) -> f64 {
let composition = self.aa_composition();
HYDROPHOBIC_RESIDUES
.iter()
.filter_map(|&aa| composition.get(aa))
.sum()
}
pub fn polar_ratio(&self) -> f64 {
let composition = self.aa_composition();
POLAR_RESIDUES
.iter()
.filter_map(|&aa| composition.get(aa))
.sum()
}
pub fn charged_ratio(&self) -> f64 {
let composition = self.aa_composition();
CHARGED_RESIDUES
.iter()
.filter_map(|&aa| composition.get(aa))
.sum()
}
pub fn aromatic_ratio(&self) -> f64 {
let composition = self.aa_composition();
AROMATIC_RESIDUES
.iter()
.filter_map(|&aa| composition.get(aa))
.sum()
}
pub fn small_ratio(&self) -> f64 {
let composition = self.aa_composition();
SMALL_RESIDUES
.iter()
.filter_map(|&aa| composition.get(aa))
.sum()
}
pub fn count_ca_residues(&self) -> usize {
self.atoms
.iter()
.filter(|atom| atom.name.trim() == "CA")
.count()
}
pub fn missing_residue_ratio(&self) -> f64 {
let mut chain_residues: HashMap<String, Vec<i32>> = HashMap::new();
for atom in &self.atoms {
if atom.name.trim() == "CA" {
chain_residues
.entry(atom.chain_id.clone())
.or_default()
.push(atom.residue_seq);
}
}
if chain_residues.is_empty() {
return 0.0;
}
let mut total_expected = 0i32;
let mut total_actual = 0usize;
for residues in chain_residues.values() {
if residues.is_empty() {
continue;
}
let min_res = *residues.iter().min().unwrap();
let max_res = *residues.iter().max().unwrap();
let expected = max_res - min_res + 1;
let actual = residues.len();
total_expected += expected;
total_actual += actual;
}
if total_expected <= 0 {
return 0.0;
}
if total_actual >= total_expected as usize {
return 0.0;
}
let missing = total_expected as usize - total_actual;
missing as f64 / total_expected as f64
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::records::Atom;
fn create_test_structure() -> PdbStructure {
let mut structure = PdbStructure::new();
structure.atoms = vec![
create_ca_atom(1, "ALA", "A", 1),
create_ca_atom(2, "ALA", "A", 2),
create_ca_atom(3, "GLY", "A", 3),
create_ca_atom(4, "GLY", "A", 4),
create_ca_atom(5, "VAL", "A", 5),
];
structure
}
fn create_ca_atom(serial: i32, residue_name: &str, chain_id: &str, residue_seq: i32) -> Atom {
Atom {
serial,
name: " CA ".to_string(),
alt_loc: None,
residue_name: residue_name.to_string(),
chain_id: chain_id.to_string(),
residue_seq,
ins_code: None,
is_hetatm: false,
x: serial as f64,
y: 0.0,
z: 0.0,
occupancy: 1.0,
temp_factor: 20.0,
element: "C".to_string(),
}
}
#[test]
fn test_aa_composition() {
let structure = create_test_structure();
let composition = structure.aa_composition();
assert_eq!(composition.len(), 3);
let ala = composition.get("ALA").unwrap();
let gly = composition.get("GLY").unwrap();
let val = composition.get("VAL").unwrap();
assert!((ala - 0.4).abs() < 1e-10); assert!((gly - 0.4).abs() < 1e-10); assert!((val - 0.2).abs() < 1e-10); }
#[test]
fn test_glycine_ratio() {
let structure = create_test_structure();
let gly_ratio = structure.glycine_ratio();
assert!((gly_ratio - 0.4).abs() < 1e-10); }
#[test]
fn test_hydrophobic_ratio() {
let structure = create_test_structure();
let hydro_ratio = structure.hydrophobic_ratio();
assert!((hydro_ratio - 0.6).abs() < 1e-10);
}
#[test]
fn test_count_ca_residues() {
let structure = create_test_structure();
assert_eq!(structure.count_ca_residues(), 5);
}
#[test]
fn test_missing_residue_ratio_no_gaps() {
let structure = create_test_structure();
let missing = structure.missing_residue_ratio();
assert!((missing - 0.0).abs() < 1e-10);
}
#[test]
fn test_missing_residue_ratio_with_gaps() {
let mut structure = PdbStructure::new();
structure.atoms = vec![
create_ca_atom(1, "ALA", "A", 1),
create_ca_atom(2, "ALA", "A", 3),
create_ca_atom(3, "ALA", "A", 5),
];
let missing = structure.missing_residue_ratio();
assert!((missing - 0.4).abs() < 1e-10);
}
#[test]
fn test_empty_structure_composition() {
let structure = PdbStructure::new();
let composition = structure.aa_composition();
assert!(composition.is_empty());
assert_eq!(structure.glycine_ratio(), 0.0);
assert_eq!(structure.hydrophobic_ratio(), 0.0);
assert_eq!(structure.count_ca_residues(), 0);
assert_eq!(structure.missing_residue_ratio(), 0.0);
}
#[test]
fn test_composition_ignores_non_ca_atoms() {
let mut structure = PdbStructure::new();
structure.atoms = vec![
create_ca_atom(1, "ALA", "A", 1),
Atom {
serial: 2,
name: " N ".to_string(), alt_loc: None,
residue_name: "ALA".to_string(),
chain_id: "A".to_string(),
residue_seq: 1,
ins_code: None,
is_hetatm: false,
x: 0.0,
y: 0.0,
z: 0.0,
occupancy: 1.0,
temp_factor: 20.0,
element: "N".to_string(),
},
create_ca_atom(3, "GLY", "A", 2),
];
let composition = structure.aa_composition();
assert_eq!(structure.count_ca_residues(), 2);
assert!((composition.get("ALA").unwrap() - 0.5).abs() < 1e-10);
assert!((composition.get("GLY").unwrap() - 0.5).abs() < 1e-10);
}
}