use crate::core::PdbStructure;
use crate::descriptors::StructureDescriptors;
use crate::quality::QualityReport;
use std::collections::HashMap;
#[derive(Debug, Clone)]
pub struct StructureSummary {
pub has_ca_only: bool,
pub has_multiple_models: bool,
pub has_altlocs: bool,
pub num_chains: usize,
pub num_models: usize,
pub has_hetatm: bool,
pub has_hydrogens: bool,
pub has_ssbonds: bool,
pub num_residues: usize,
pub num_atoms: usize,
pub missing_residue_ratio: f64,
pub glycine_ratio: f64,
pub hydrophobic_ratio: f64,
pub aa_composition: HashMap<String, f64>,
pub radius_of_gyration: f64,
pub max_ca_distance: f64,
pub secondary_structure_ratio: f64,
pub compactness_index: f64,
pub ca_density: f64,
}
impl Default for StructureSummary {
fn default() -> Self {
Self {
has_ca_only: false,
has_multiple_models: false,
has_altlocs: false,
num_chains: 0,
num_models: 0,
has_hetatm: false,
has_hydrogens: false,
has_ssbonds: false,
num_residues: 0,
num_atoms: 0,
missing_residue_ratio: 0.0,
glycine_ratio: 0.0,
hydrophobic_ratio: 0.0,
aa_composition: HashMap::new(),
radius_of_gyration: 0.0,
max_ca_distance: 0.0,
secondary_structure_ratio: 0.0,
compactness_index: 0.0,
ca_density: 0.0,
}
}
}
impl StructureSummary {
pub fn is_analysis_ready(&self) -> bool {
self.num_atoms > 0 && !self.has_multiple_models && !self.has_altlocs && !self.has_ca_only
}
pub fn is_clean(&self) -> bool {
self.num_atoms > 0 && !self.has_ca_only && !self.has_altlocs
}
pub fn field_names() -> Vec<&'static str> {
vec![
"has_ca_only",
"has_multiple_models",
"has_altlocs",
"num_chains",
"num_models",
"has_hetatm",
"has_hydrogens",
"has_ssbonds",
"num_residues",
"num_atoms",
"missing_residue_ratio",
"glycine_ratio",
"hydrophobic_ratio",
"radius_of_gyration",
"max_ca_distance",
"secondary_structure_ratio",
"compactness_index",
"ca_density",
]
}
pub fn to_csv_values(&self) -> Vec<String> {
vec![
self.has_ca_only.to_string(),
self.has_multiple_models.to_string(),
self.has_altlocs.to_string(),
self.num_chains.to_string(),
self.num_models.to_string(),
self.has_hetatm.to_string(),
self.has_hydrogens.to_string(),
self.has_ssbonds.to_string(),
self.num_residues.to_string(),
self.num_atoms.to_string(),
format!("{:.6}", self.missing_residue_ratio),
format!("{:.6}", self.glycine_ratio),
format!("{:.6}", self.hydrophobic_ratio),
format!("{:.4}", self.radius_of_gyration),
format!("{:.4}", self.max_ca_distance),
format!("{:.6}", self.secondary_structure_ratio),
format!("{:.6}", self.compactness_index),
format!("{:.8}", self.ca_density),
]
}
pub fn from_parts(quality: QualityReport, descriptors: StructureDescriptors) -> Self {
Self {
has_ca_only: quality.has_ca_only,
has_multiple_models: quality.has_multiple_models,
has_altlocs: quality.has_altlocs,
num_chains: quality.num_chains,
num_models: quality.num_models,
has_hetatm: quality.has_hetatm,
has_hydrogens: quality.has_hydrogens,
has_ssbonds: quality.has_ssbonds,
num_residues: descriptors.num_residues,
num_atoms: descriptors.num_atoms,
missing_residue_ratio: descriptors.missing_residue_ratio,
glycine_ratio: descriptors.glycine_ratio,
hydrophobic_ratio: descriptors.hydrophobic_ratio,
aa_composition: descriptors.aa_composition,
radius_of_gyration: descriptors.radius_of_gyration,
max_ca_distance: descriptors.max_ca_distance,
secondary_structure_ratio: descriptors.secondary_structure_ratio,
compactness_index: descriptors.compactness_index,
ca_density: descriptors.ca_density,
}
}
}
impl PdbStructure {
pub fn summary(&self) -> StructureSummary {
let quality = self.quality_report();
let descriptors = self.structure_descriptors();
StructureSummary::from_parts(quality, descriptors)
}
}
pub fn batch_summarize(structures: &[PdbStructure]) -> Vec<StructureSummary> {
structures.iter().map(|s| s.summary()).collect()
}
pub fn summaries_to_csv(summaries: &[StructureSummary], include_header: bool) -> String {
let mut output = String::new();
if include_header {
output.push_str(&StructureSummary::field_names().join(","));
output.push('\n');
}
for summary in summaries {
output.push_str(&summary.to_csv_values().join(","));
output.push('\n');
}
output
}
#[cfg(test)]
mod tests {
use super::*;
use crate::records::Atom;
fn create_test_structure() -> PdbStructure {
let mut structure = PdbStructure::new();
structure.atoms = vec![
create_atom(1, " N ", "ALA", "A", 1, 0.0, 0.0, 0.0),
create_atom(2, " CA ", "ALA", "A", 1, 1.5, 0.0, 0.0),
create_atom(3, " C ", "ALA", "A", 1, 3.0, 0.0, 0.0),
create_atom(4, " CA ", "GLY", "A", 2, 6.8, 0.0, 0.0),
create_atom(5, " CA ", "VAL", "A", 3, 10.6, 0.0, 0.0),
];
structure
}
#[allow(clippy::too_many_arguments)]
fn create_atom(
serial: i32,
name: &str,
residue_name: &str,
chain_id: &str,
residue_seq: i32,
x: f64,
y: f64,
z: f64,
) -> Atom {
Atom {
serial,
name: name.to_string(),
alt_loc: None,
residue_name: residue_name.to_string(),
chain_id: chain_id.to_string(),
residue_seq,
ins_code: None,
is_hetatm: false,
x,
y,
z,
occupancy: 1.0,
temp_factor: 20.0,
element: "C".to_string(),
}
}
#[test]
fn test_summary() {
let structure = create_test_structure();
let summary = structure.summary();
assert_eq!(summary.num_atoms, 5);
assert_eq!(summary.num_chains, 1);
assert!(!summary.has_ca_only);
assert!(!summary.has_multiple_models);
assert!(!summary.has_altlocs);
}
#[test]
fn test_summary_descriptors() {
let structure = create_test_structure();
let summary = structure.summary();
assert_eq!(summary.num_residues, 3);
assert!(summary.radius_of_gyration > 0.0);
assert!(summary.max_ca_distance > 0.0);
}
#[test]
fn test_summary_is_analysis_ready() {
let structure = create_test_structure();
let summary = structure.summary();
assert!(summary.is_analysis_ready());
assert!(summary.is_clean());
}
#[test]
fn test_summary_empty_structure() {
let structure = PdbStructure::new();
let summary = structure.summary();
assert_eq!(summary.num_atoms, 0);
assert_eq!(summary.num_residues, 0);
assert!(!summary.is_analysis_ready());
}
#[test]
fn test_summary_default() {
let summary = StructureSummary::default();
assert_eq!(summary.num_atoms, 0);
assert!(!summary.has_ca_only);
assert!(summary.aa_composition.is_empty());
}
#[test]
fn test_field_names() {
let names = StructureSummary::field_names();
assert!(names.contains(&"num_atoms"));
assert!(names.contains(&"radius_of_gyration"));
assert!(names.contains(&"has_altlocs"));
}
#[test]
fn test_to_csv_values() {
let structure = create_test_structure();
let summary = structure.summary();
let values = summary.to_csv_values();
assert_eq!(values.len(), StructureSummary::field_names().len());
}
#[test]
fn test_batch_summarize() {
let structures = vec![create_test_structure(), create_test_structure()];
let summaries = batch_summarize(&structures);
assert_eq!(summaries.len(), 2);
assert_eq!(summaries[0].num_atoms, summaries[1].num_atoms);
}
#[test]
fn test_summaries_to_csv() {
let structures = vec![create_test_structure()];
let summaries = batch_summarize(&structures);
let csv_with_header = summaries_to_csv(&summaries, true);
let csv_without_header = summaries_to_csv(&summaries, false);
assert!(csv_with_header.len() > csv_without_header.len());
assert!(csv_with_header.contains("num_atoms"));
assert!(csv_with_header.contains("radius_of_gyration"));
}
#[test]
fn test_from_parts() {
let quality = QualityReport {
has_ca_only: false,
has_multiple_models: true,
has_altlocs: false,
num_chains: 2,
num_models: 5,
num_atoms: 100,
num_residues: 10,
has_hetatm: true,
has_hydrogens: false,
has_ssbonds: true,
has_conect: false,
};
let descriptors = StructureDescriptors {
num_residues: 50,
num_atoms: 500,
aa_composition: HashMap::new(),
glycine_ratio: 0.1,
hydrophobic_ratio: 0.4,
radius_of_gyration: 15.0,
max_ca_distance: 40.0,
missing_residue_ratio: 0.05,
secondary_structure_ratio: 0.8,
compactness_index: 2.5,
ca_density: 0.005,
b_factor_mean: 25.0,
b_factor_mean_ca: 22.0,
b_factor_min: 10.0,
b_factor_max: 60.0,
b_factor_std: 12.0,
};
let summary = StructureSummary::from_parts(quality, descriptors);
assert!(summary.has_multiple_models);
assert_eq!(summary.num_models, 5);
assert!(summary.has_ssbonds);
assert_eq!(summary.num_residues, 50);
assert!((summary.radius_of_gyration - 15.0).abs() < 1e-10);
}
}