use crate::structure;
use std::collections::HashMap;
use std::fs::File;
use std::io::Write;
use std::io::{BufRead, BufReader};
pub fn read_complex(pdbf: &str) -> (structure::Molecule, structure::Molecule) {
let molecules = split_complex(pdbf);
let mol_a = structure::read_pdb(&molecules[0]).0[0].clone();
let mol_b = structure::read_pdb(&molecules[1]).0[0].clone();
std::fs::remove_file(&molecules[0]).unwrap();
std::fs::remove_file(&molecules[1]).unwrap();
if let Some(parent) = std::path::Path::new(&molecules[0]).parent() {
let _ = std::fs::remove_dir(parent);
}
(mol_a, mol_b)
}
pub fn split_complex(pdb_file: &str) -> Vec<String> {
let temp_dir = crate::utils::get_unique_tempdir();
std::fs::create_dir_all(&temp_dir).expect("Cannot create temp directory");
let mut atom_map: HashMap<String, Vec<String>> = HashMap::new();
let file = File::open(pdb_file).expect("Cannot open file");
for line in BufReader::new(file).lines().map_while(Result::ok) {
if line.starts_with("ATOM") && line.len() >= 22 {
let chain = line
.chars()
.nth(21)
.unwrap_or(' ')
.to_string()
.trim()
.to_string();
let chain = if chain.is_empty() {
" ".to_string()
} else {
chain
};
if !atom_map.contains_key(&chain) {
atom_map.insert(chain.clone(), Vec::new());
}
let mut atom = line.clone();
atom.push('\n');
atom_map.get_mut(&chain).unwrap().push(atom);
}
}
let mut chains: Vec<_> = atom_map.keys().cloned().collect();
chains.sort();
let mut result = vec![];
for chain in chains {
let atoms = &atom_map[&chain];
let file_path = temp_dir.join(format!("{}.pdb", chain));
let mut file = File::create(&file_path).expect("Cannot create file");
let mut pdb_string = String::new();
for atom in atoms {
pdb_string.push_str(atom);
}
file.write_all(pdb_string.as_bytes())
.expect("Cannot write to file");
result.push(file_path.to_str().unwrap().to_string());
}
result
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use std::path::Path;
#[test]
fn test_split_complex_creates_two_chains() {
let result = split_complex("data/2oob.pdb");
assert_eq!(result.len(), 2);
assert!(Path::new(&result[0]).exists());
assert!(Path::new(&result[1]).exists());
for path in &result {
assert!(path.contains("gdock_"));
}
if let Some(parent) = Path::new(&result[0]).parent() {
let _ = fs::remove_dir_all(parent);
}
}
#[test]
fn test_split_complex_chain_names() {
let result = split_complex("data/2oob.pdb");
assert!(result[0].ends_with("A.pdb"));
assert!(result[1].ends_with("B.pdb"));
if let Some(parent) = Path::new(&result[0]).parent() {
let _ = fs::remove_dir_all(parent);
}
}
#[test]
fn test_split_complex_files_have_content() {
let result = split_complex("data/2oob.pdb");
let content_a = fs::read_to_string(&result[0]).expect("Failed to read chain A file");
let content_b = fs::read_to_string(&result[1]).expect("Failed to read chain B file");
assert!(!content_a.is_empty());
assert!(!content_b.is_empty());
assert!(content_a.contains("ATOM"));
assert!(content_b.contains("ATOM"));
if let Some(parent) = Path::new(&result[0]).parent() {
let _ = fs::remove_dir_all(parent);
}
}
#[test]
fn test_read_complex_returns_two_molecules() {
let (mol_a, mol_b) = read_complex("data/2oob.pdb");
assert!(!mol_a.0.is_empty());
assert!(!mol_b.0.is_empty());
assert_eq!(mol_a.0[0].chainid, 'A');
assert_eq!(mol_b.0[0].chainid, 'B');
}
#[test]
fn test_read_complex_cleans_up_temp_files() {
let (mol_a, mol_b) = read_complex("data/2oob.pdb");
assert!(!mol_a.0.is_empty());
assert!(!mol_b.0.is_empty());
}
#[test]
fn test_read_complex_preserves_coordinates() {
let (mol_a, mol_b) = read_complex("data/2oob.pdb");
for atom in &mol_a.0 {
assert!(atom.x.is_finite());
assert!(atom.y.is_finite());
assert!(atom.z.is_finite());
}
for atom in &mol_b.0 {
assert!(atom.x.is_finite());
assert!(atom.y.is_finite());
assert!(atom.z.is_finite());
}
}
#[test]
#[should_panic(expected = "Cannot open file")]
fn test_split_complex_nonexistent_file() {
split_complex("nonexistent_file_xyz123.pdb");
}
#[test]
fn test_split_complex_deterministic_order() {
let result1 = split_complex("data/2oob.pdb");
let result2 = split_complex("data/2oob.pdb");
let name1_a = Path::new(&result1[0]).file_name().unwrap();
let name1_b = Path::new(&result1[1]).file_name().unwrap();
let name2_a = Path::new(&result2[0]).file_name().unwrap();
let name2_b = Path::new(&result2[1]).file_name().unwrap();
assert_eq!(name1_a, name2_a);
assert_eq!(name1_b, name2_b);
if let Some(parent) = Path::new(&result1[0]).parent() {
let _ = fs::remove_dir_all(parent);
}
if let Some(parent) = Path::new(&result2[0]).parent() {
let _ = fs::remove_dir_all(parent);
}
}
}