use crate::core::PdbStructure;
use crate::error::PdbError;
use crate::records::{Atom, Conect, Model, Remark, SSBond, SeqRes};
use crate::utils::{parse_float, parse_int};
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::Path;
pub fn parse_pdb_file<P: AsRef<Path>>(path: P) -> Result<PdbStructure, PdbError> {
let file = File::open(path)?;
let reader = BufReader::new(file);
parse_pdb_reader(reader)
}
pub fn parse_pdb_string(content: &str) -> Result<PdbStructure, PdbError> {
let reader = BufReader::new(content.as_bytes());
parse_pdb_reader(reader)
}
pub fn parse_pdb_reader<R: BufRead>(reader: R) -> Result<PdbStructure, PdbError> {
let mut structure = PdbStructure::new();
let mut current_model: Option<Model> = None;
for line in reader.lines() {
let line = line?;
if line.len() < 6 {
continue; }
let record_type = line[0..6].trim();
match record_type {
"ATOM" | "HETATM" => {
let is_hetatm = record_type == "HETATM";
let atom = parse_atom_record(&line, is_hetatm)?;
if let Some(model) = &mut current_model {
model.atoms.push(atom.clone());
}
structure.atoms.push(atom);
}
"SEQRES" => {
structure.seqres.push(parse_seqres_record(&line)?);
}
"CONECT" => {
structure.connects.push(parse_conect_record(&line)?);
}
"SSBOND" => {
structure.ssbonds.push(parse_ssbond_record(&line)?);
}
"REMARK" => {
let remark = parse_remark_record(&line)?;
if let Some(model) = &mut current_model {
model.remarks.push(remark.clone());
}
structure.remarks.push(remark);
}
"HEADER" => {
structure.header = Some(line[10..].trim().to_string());
}
"TITLE" => {
structure.title = Some(line[10..].trim().to_string());
}
"MODEL" => {
if let Some(model) = current_model.take() {
structure.models.push(model);
}
let serial = if line.len() >= 14 {
let serial_str = line[10..14].trim();
if serial_str.is_empty() {
1 } else {
parse_int(serial_str)?
}
} else {
1 };
current_model = Some(Model {
serial,
atoms: Vec::new(),
remarks: Vec::new(),
});
structure.current_model = Some(serial);
}
"ENDMDL" => {
if let Some(model) = current_model.take() {
structure.models.push(model);
}
structure.current_model = None;
}
"END" => {
if let Some(model) = current_model.take() {
structure.models.push(model);
}
break;
}
_ => {
}
}
}
if let Some(model) = current_model {
structure.models.push(model);
}
Ok(structure)
}
fn parse_atom_record(line: &str, is_hetatm: bool) -> Result<Atom, PdbError> {
if line.len() < 54 {
return Err(PdbError::InvalidRecord(
"ATOM/HETATM record too short".to_string(),
));
}
let serial = parse_int(&line[6..11])?;
let name = line[12..16].trim().to_string();
let alt_loc = if line.len() > 16 {
let c = line.chars().nth(16).unwrap_or(' ');
if c == ' ' { None } else { Some(c) }
} else {
None
};
let residue_name = line[17..20].trim().to_string();
let chain_id = line[21..22].to_string();
let residue_seq = parse_int(&line[22..26])?;
let ins_code = if line.len() > 26 {
let c = line.chars().nth(26).unwrap_or(' ');
if c == ' ' { None } else { Some(c) }
} else {
None
};
let x = parse_float(&line[30..38])?;
let y = parse_float(&line[38..46])?;
let z = parse_float(&line[46..54])?;
let occupancy = if line.len() >= 60 {
parse_float(&line[54..60])?
} else {
1.0
};
let temp_factor = if line.len() >= 66 {
parse_float(&line[60..66])?
} else {
0.0
};
let element = if line.len() >= 78 {
line[76..78].trim().to_string()
} else {
"".to_string()
};
Ok(Atom {
serial,
name,
alt_loc,
residue_name,
chain_id,
residue_seq,
x,
y,
z,
occupancy,
temp_factor,
element,
ins_code,
is_hetatm,
})
}
fn parse_seqres_record(line: &str) -> Result<SeqRes, PdbError> {
if line.len() < 19 {
return Err(PdbError::InvalidRecord(
"SEQRES record too short".to_string(),
));
}
let serial = parse_int(&line[8..10])?;
let chain_id = line[11..12].to_string();
let num_residues = parse_int(&line[13..17])?;
let mut residues = Vec::new();
let residue_section = if line.len() > 19 { &line[19..] } else { "" };
for i in (0..residue_section.len()).step_by(4) {
if i + 3 <= residue_section.len() {
let residue = residue_section[i..i + 3].trim();
if !residue.is_empty() {
residues.push(residue.to_string());
}
}
}
Ok(SeqRes {
serial,
chain_id,
num_residues,
residues,
})
}
fn parse_conect_record(line: &str) -> Result<Conect, PdbError> {
if line.len() < 11 {
return Err(PdbError::InvalidRecord(
"CONECT record too short".to_string(),
));
}
let atom1 = parse_int(&line[6..11])?;
let atom2 = if line.len() >= 16 {
parse_int(&line[11..16])?
} else {
return Err(PdbError::InvalidRecord(
"CONECT record missing second atom".to_string(),
));
};
let atom3 = if line.len() >= 21 && !line[16..21].trim().is_empty() {
Some(parse_int(&line[16..21])?)
} else {
None
};
let atom4 = if line.len() >= 26 && !line[21..26].trim().is_empty() {
Some(parse_int(&line[21..26])?)
} else {
None
};
Ok(Conect {
atom1,
atom2,
atom3,
atom4,
})
}
fn parse_ssbond_record(line: &str) -> Result<SSBond, PdbError> {
if line.len() < 35 {
return Err(PdbError::InvalidRecord(
"SSBOND record too short".to_string(),
));
}
let serial = parse_int(&line[7..10])?;
let residue1_name = line[11..14].trim().to_string();
let chain1_id = line[15..16].to_string();
let residue1_seq = parse_int(&line[17..21])?;
let icode1 = if line.len() > 21 {
let c = line.chars().nth(21).unwrap_or(' ');
if c == ' ' { None } else { Some(c) }
} else {
None
};
let residue2_name = line[25..28].trim().to_string();
let chain2_id = line[29..30].to_string();
let residue2_seq = parse_int(&line[31..35])?;
let icode2 = if line.len() > 35 {
let c = line.chars().nth(35).unwrap_or(' ');
if c == ' ' { None } else { Some(c) }
} else {
None
};
let (sym1, sym2, length) = if line.len() >= 70 {
(
parse_int(&line[59..63])?,
parse_int(&line[66..70])?,
if line.len() >= 78 {
parse_float(&line[73..78])?
} else {
2.04
}, )
} else {
(1555, 1555, 2.04) };
Ok(SSBond {
serial,
residue1_name,
chain1_id,
residue1_seq,
icode1,
residue2_name,
chain2_id,
residue2_seq,
icode2,
sym1,
sym2,
length,
})
}
fn parse_remark_record(line: &str) -> Result<Remark, PdbError> {
if line.len() < 10 {
return Err(PdbError::InvalidRecord(
"REMARK record too short".to_string(),
));
}
let number = parse_int(&line[6..10])?;
let content = line[11..].trim().to_string();
Ok(Remark { number, content })
}