use crate::core::PdbStructure;
use crate::core::mmcif::MmcifParser;
use crate::core::mmcif_converter::mmcif_to_pdb_structure;
use crate::error::PdbError;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::Path;
pub fn parse_mmcif_file<P: AsRef<Path>>(path: P) -> Result<PdbStructure, PdbError> {
let mut parser = MmcifParser::new();
parser
.parse_file(
path.as_ref()
.to_str()
.ok_or_else(|| PdbError::InvalidRecord("Invalid file path".to_string()))?,
)
.map_err(PdbError::IoError)?;
mmcif_to_pdb_structure(&parser)
}
pub fn parse_mmcif_reader<R: BufRead>(reader: R) -> Result<PdbStructure, PdbError> {
let mut parser = MmcifParser::new();
parser.parse_reader(reader).map_err(PdbError::IoError)?;
mmcif_to_pdb_structure(&parser)
}
pub fn parse_mmcif_string(data: &str) -> Result<PdbStructure, PdbError> {
use std::io::Cursor;
let cursor = Cursor::new(data);
parse_mmcif_reader(cursor)
}
pub fn parse_structure_file<P: AsRef<Path>>(path: P) -> Result<PdbStructure, PdbError> {
let path_ref = path.as_ref();
if let Some(extension) = path_ref.extension() {
match extension.to_str() {
Some("cif") | Some("mmcif") => {
return parse_mmcif_file(path);
}
Some("pdb") | Some("ent") => {
return crate::parser::parse_pdb_file(path);
}
_ => {
}
}
}
detect_and_parse_file(path)
}
fn detect_and_parse_file<P: AsRef<Path>>(path: P) -> Result<PdbStructure, PdbError> {
let file = File::open(&path)?;
let reader = BufReader::new(file);
let mut first_meaningful_line = String::new();
for line in reader.lines() {
let line = line?;
let trimmed = line.trim();
if !trimmed.is_empty() && !trimmed.starts_with('#') {
first_meaningful_line = trimmed.to_string();
break;
}
}
if first_meaningful_line.starts_with("data_") || first_meaningful_line.starts_with("_") {
parse_mmcif_file(path)
} else if first_meaningful_line.starts_with("HEADER")
|| first_meaningful_line.starts_with("ATOM")
|| first_meaningful_line.starts_with("HETATM")
|| first_meaningful_line.starts_with("TITLE")
|| first_meaningful_line.starts_with("REMARK")
{
crate::parser::parse_pdb_file(path)
} else {
crate::parser::parse_pdb_file(path)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Cursor;
use std::io::Write;
use tempfile::NamedTempFile;
#[test]
fn test_parse_mmcif_string() {
let mmcif_data = r#"
data_test
_entry.id TEST_ENTRY
_struct.title "Test Structure"
loop_
_atom_site.group_PDB
_atom_site.id
_atom_site.type_symbol
_atom_site.label_atom_id
_atom_site.label_alt_id
_atom_site.label_comp_id
_atom_site.label_asym_id
_atom_site.label_seq_id
_atom_site.Cartn_x
_atom_site.Cartn_y
_atom_site.Cartn_z
_atom_site.occupancy
_atom_site.B_iso_or_equiv
ATOM 1 N N . MET A 1 20.154 10.000 5.000 1.00 25.00
ATOM 2 C CA . MET A 1 21.500 10.500 5.500 1.00 24.50
"#;
let structure = parse_mmcif_string(mmcif_data).unwrap();
assert_eq!(structure.atoms.len(), 2);
assert!(structure.header.is_some());
assert_eq!(structure.title, Some("Test Structure".to_string()));
let atom1 = &structure.atoms[0];
assert_eq!(atom1.serial, 1);
assert_eq!(atom1.name, "N");
assert_eq!(atom1.element, "N");
assert_eq!(atom1.residue_name, "MET");
assert_eq!(atom1.chain_id, "A");
assert_eq!(atom1.residue_seq, 1);
}
#[test]
fn test_parse_mmcif_reader() {
let mmcif_data = r#"
data_test
loop_
_atom_site.group_PDB
_atom_site.id
_atom_site.type_symbol
_atom_site.label_atom_id
_atom_site.label_alt_id
_atom_site.label_comp_id
_atom_site.label_asym_id
_atom_site.label_seq_id
_atom_site.Cartn_x
_atom_site.Cartn_y
_atom_site.Cartn_z
_atom_site.occupancy
_atom_site.B_iso_or_equiv
ATOM 1 N N . ALA A 1 0.000 0.000 0.000 1.00 20.00
"#;
let cursor = Cursor::new(mmcif_data);
let structure = parse_mmcif_reader(cursor).unwrap();
assert_eq!(structure.atoms.len(), 1);
assert_eq!(structure.atoms[0].residue_name, "ALA");
}
#[test]
fn test_parse_mmcif_file() {
let mmcif_data = r#"
data_test
_entry.id TEST
loop_
_atom_site.group_PDB
_atom_site.id
_atom_site.type_symbol
_atom_site.label_atom_id
_atom_site.label_alt_id
_atom_site.label_comp_id
_atom_site.label_asym_id
_atom_site.label_seq_id
_atom_site.Cartn_x
_atom_site.Cartn_y
_atom_site.Cartn_z
_atom_site.occupancy
_atom_site.B_iso_or_equiv
ATOM 1 C CA . VAL B 2 1.000 2.000 3.000 0.90 30.00
"#;
let mut temp_file = NamedTempFile::new().unwrap();
temp_file.write_all(mmcif_data.as_bytes()).unwrap();
let structure = parse_mmcif_file(temp_file.path()).unwrap();
assert_eq!(structure.atoms.len(), 1);
assert_eq!(structure.atoms[0].residue_name, "VAL");
assert_eq!(structure.atoms[0].chain_id, "B");
assert_eq!(structure.atoms[0].residue_seq, 2);
}
#[test]
fn test_auto_detect_mmcif() {
let mmcif_data = r#"data_test
_entry.id TEST
loop_
_atom_site.group_PDB
_atom_site.id
_atom_site.type_symbol
_atom_site.label_atom_id
_atom_site.label_alt_id
_atom_site.label_comp_id
_atom_site.label_asym_id
_atom_site.label_seq_id
_atom_site.Cartn_x
_atom_site.Cartn_y
_atom_site.Cartn_z
_atom_site.occupancy
_atom_site.B_iso_or_equiv
ATOM 1 N N . ALA A 1 0.0 0.0 0.0 1.0 20.0"#;
let mut temp_file = NamedTempFile::with_suffix(".unknown").unwrap();
temp_file.write_all(mmcif_data.as_bytes()).unwrap();
let structure = parse_structure_file(temp_file.path()).unwrap();
assert_eq!(structure.atoms.len(), 1);
}
#[test]
fn test_auto_detect_pdb() {
let pdb_data =
"ATOM 1 N ALA A 1 20.154 16.967 23.486 1.00 25.00 N \n";
let mut temp_file = NamedTempFile::with_suffix(".unknown").unwrap();
temp_file.write_all(pdb_data.as_bytes()).unwrap();
let structure = parse_structure_file(temp_file.path()).unwrap();
assert_eq!(structure.atoms.len(), 1);
}
#[test]
fn test_extension_detection() {
let mmcif_data = "data_test\n_entry.id TEST\n";
let mut cif_file = NamedTempFile::with_suffix(".cif").unwrap();
cif_file.write_all(mmcif_data.as_bytes()).unwrap();
let structure = parse_structure_file(cif_file.path()).unwrap();
assert!(structure.header.is_some());
let pdb_data = "HEADER TEST STRUCTURE\n";
let mut pdb_file = NamedTempFile::with_suffix(".pdb").unwrap();
pdb_file.write_all(pdb_data.as_bytes()).unwrap();
let structure = parse_structure_file(pdb_file.path()).unwrap();
assert!(structure.header.is_some());
}
}