use crate::core::PdbStructure;
use crate::error::PdbError;
use flate2::read::GzDecoder;
use std::fs::File;
use std::io::{BufReader, Read};
use std::path::Path;
pub fn parse_gzip_pdb_file<P: AsRef<Path>>(path: P) -> Result<PdbStructure, PdbError> {
let file = File::open(&path)?;
let decoder = GzDecoder::new(file);
let reader = BufReader::new(decoder);
crate::parser::pdb::parse_pdb_reader(reader)
}
pub fn parse_gzip_mmcif_file<P: AsRef<Path>>(path: P) -> Result<PdbStructure, PdbError> {
let file = File::open(&path)?;
let decoder = GzDecoder::new(file);
let reader = BufReader::new(decoder);
crate::parser::mmcif::parse_mmcif_reader(reader)
}
pub fn parse_gzip_structure_file<P: AsRef<Path>>(path: P) -> Result<PdbStructure, PdbError> {
let path_ref = path.as_ref();
if let Some(stem) = path_ref.file_stem() {
let stem_str = stem.to_string_lossy().to_lowercase();
if stem_str.ends_with(".ent") || stem_str.ends_with(".pdb") {
return parse_gzip_pdb_file(path);
}
if stem_str.ends_with(".cif") || stem_str.ends_with(".mmcif") {
return parse_gzip_mmcif_file(path);
}
if stem_str.starts_with("pdb") {
return parse_gzip_pdb_file(path);
}
}
let file = File::open(&path)?;
let mut decoder = GzDecoder::new(file);
let mut first_bytes = vec![0u8; 512];
let bytes_read = decoder.read(&mut first_bytes).unwrap_or(0);
let content = String::from_utf8_lossy(&first_bytes[..bytes_read]);
let first_line = content.lines().find(|line| {
let trimmed = line.trim();
!trimmed.is_empty() && !trimmed.starts_with('#')
});
let is_mmcif =
first_line.is_some_and(|line| line.starts_with("data_") || line.starts_with('_'));
if is_mmcif {
parse_gzip_mmcif_file(path)
} else {
parse_gzip_pdb_file(path)
}
}
pub fn parse_gzip_pdb_reader<R: Read>(reader: R) -> Result<PdbStructure, PdbError> {
let decoder = GzDecoder::new(reader);
let buf_reader = BufReader::new(decoder);
crate::parser::pdb::parse_pdb_reader(buf_reader)
}
pub fn parse_gzip_mmcif_reader<R: Read>(reader: R) -> Result<PdbStructure, PdbError> {
let decoder = GzDecoder::new(reader);
let buf_reader = BufReader::new(decoder);
crate::parser::mmcif::parse_mmcif_reader(buf_reader)
}
#[cfg(test)]
mod tests {
use super::*;
use flate2::Compression;
use flate2::write::GzEncoder;
use std::io::Write;
use tempfile::NamedTempFile;
fn create_gzip_file(content: &str) -> NamedTempFile {
let file = NamedTempFile::new().unwrap();
let mut encoder = GzEncoder::new(file.reopen().unwrap(), Compression::default());
encoder.write_all(content.as_bytes()).unwrap();
encoder.finish().unwrap();
file
}
#[test]
fn test_parse_gzip_pdb_simple() {
let pdb_content = r#"HEADER TEST STRUCTURE
ATOM 1 N ALA A 1 0.000 0.000 0.000 1.00 0.00 N
ATOM 2 CA ALA A 1 1.458 0.000 0.000 1.00 0.00 C
END
"#;
let file = create_gzip_file(pdb_content);
let structure = parse_gzip_pdb_file(file.path()).unwrap();
assert_eq!(structure.atoms.len(), 2);
}
#[test]
fn test_parse_gzip_structure_file_pdb_extension() {
let pdb_content = r#"ATOM 1 N ALA A 1 0.000 0.000 0.000 1.00 0.00 N
END
"#;
let file = NamedTempFile::with_suffix(".pdb.gz").unwrap();
let mut encoder = GzEncoder::new(file.reopen().unwrap(), Compression::default());
encoder.write_all(pdb_content.as_bytes()).unwrap();
encoder.finish().unwrap();
let structure = parse_gzip_structure_file(file.path()).unwrap();
assert_eq!(structure.atoms.len(), 1);
}
#[test]
fn test_parse_gzip_reader() {
use std::io::Cursor;
let pdb_content = r#"ATOM 1 N ALA A 1 0.000 0.000 0.000 1.00 0.00 N
END
"#;
let mut compressed = Vec::new();
{
let mut encoder = GzEncoder::new(&mut compressed, Compression::default());
encoder.write_all(pdb_content.as_bytes()).unwrap();
}
let cursor = Cursor::new(compressed);
let structure = parse_gzip_pdb_reader(cursor).unwrap();
assert_eq!(structure.atoms.len(), 1);
}
}