1use cyanea_core::{CyaneaError, Result};
7
8use crate::types::{Atom, Chain, Point3D, Residue, Structure};
9
10use alloc::string::{String, ToString};
11use alloc::vec::Vec;
12
13pub fn parse_pdb(input: &str) -> Result<Structure> {
20 let mut id = String::from("UNKN");
21 let mut current_chain_id: Option<char> = None;
22 let mut current_residue_key: Option<(i32, Option<char>, String)> = None;
23 let mut chains: Vec<Chain> = Vec::new();
24 let mut residues: Vec<Residue> = Vec::new();
25 let mut atoms: Vec<Atom> = Vec::new();
26 let mut atom_count = 0u32;
27 let mut in_first_model = true;
28
29 for line in input.lines() {
30 if line.starts_with("ENDMDL") {
31 break; }
33 if line.starts_with("MODEL") {
34 if !in_first_model {
35 break;
36 }
37 in_first_model = false;
38 continue;
39 }
40
41 if line.starts_with("HEADER") && line.len() >= 66 {
42 let pdb_id = line[62..66].trim();
43 if !pdb_id.is_empty() {
44 id = pdb_id.into();
45 }
46 }
47
48 let is_atom = line.starts_with("ATOM ");
49 let is_hetatm = line.starts_with("HETATM");
50
51 if is_atom || is_hetatm {
52 let atom = parse_atom_record(line, is_hetatm)?;
53 atom_count += 1;
54
55 let chain_id = parse_chain_id(line);
56 let seq_num = parse_residue_seq(line)?;
57 let i_code = parse_insertion_code(line);
58 let res_name = parse_residue_name(line);
59
60 let res_key = (seq_num, i_code, res_name.clone());
61
62 if current_chain_id != Some(chain_id) {
64 if !atoms.is_empty() {
66 if let Some((sn, ic, rn)) = current_residue_key.take() {
67 residues.push(Residue {
68 name: rn,
69 seq_num: sn,
70 i_code: ic,
71 atoms: core::mem::take(&mut atoms),
72 });
73 }
74 }
75 if let Some(cid) = current_chain_id {
77 if !residues.is_empty() {
78 chains.push(Chain::new(cid, core::mem::take(&mut residues)));
79 }
80 }
81 current_chain_id = Some(chain_id);
82 current_residue_key = Some(res_key);
83 } else if current_residue_key.as_ref() != Some(&res_key) {
84 if !atoms.is_empty() {
86 if let Some((sn, ic, rn)) = current_residue_key.take() {
87 residues.push(Residue {
88 name: rn,
89 seq_num: sn,
90 i_code: ic,
91 atoms: core::mem::take(&mut atoms),
92 });
93 }
94 }
95 current_residue_key = Some(res_key);
96 }
97
98 atoms.push(atom);
99 }
100
101 if line.starts_with("TER") {
102 if !atoms.is_empty() {
104 if let Some((sn, ic, rn)) = current_residue_key.take() {
105 residues.push(Residue {
106 name: rn,
107 seq_num: sn,
108 i_code: ic,
109 atoms: core::mem::take(&mut atoms),
110 });
111 }
112 }
113 if let Some(cid) = current_chain_id.take() {
114 if !residues.is_empty() {
115 chains.push(Chain::new(cid, core::mem::take(&mut residues)));
116 }
117 }
118 }
119 }
120
121 if !atoms.is_empty() {
123 if let Some((sn, ic, rn)) = current_residue_key.take() {
124 residues.push(Residue {
125 name: rn,
126 seq_num: sn,
127 i_code: ic,
128 atoms,
129 });
130 }
131 }
132 if let Some(cid) = current_chain_id {
133 if !residues.is_empty() {
134 chains.push(Chain::new(cid, residues));
135 }
136 }
137
138 if atom_count == 0 {
139 return Err(CyaneaError::Parse("no ATOM records found".into()));
140 }
141
142 Ok(Structure { id, chains })
143}
144
145#[cfg(feature = "std")]
147pub fn parse_pdb_file(path: impl AsRef<::std::path::Path>) -> Result<Structure> {
148 let contents = ::std::fs::read_to_string(path)?;
149 parse_pdb(&contents)
150}
151
152fn parse_atom_record(line: &str, is_hetatm: bool) -> Result<Atom> {
153 if line.len() < 54 {
155 return Err(CyaneaError::Parse(alloc::format!(
156 "ATOM record too short ({} chars): {}",
157 line.len(),
158 line
159 )));
160 }
161
162 let serial = safe_slice(line, 6, 11)
163 .trim()
164 .parse::<u32>()
165 .map_err(|e| CyaneaError::Parse(alloc::format!("bad atom serial: {}", e)))?;
166
167 let name = safe_slice(line, 12, 16).to_string();
168
169 let alt_loc = {
170 let c = safe_slice(line, 16, 17).chars().next().unwrap_or(' ');
171 if c == ' ' {
172 None
173 } else {
174 Some(c)
175 }
176 };
177
178 let x = safe_slice(line, 30, 38)
179 .trim()
180 .parse::<f64>()
181 .map_err(|e| CyaneaError::Parse(alloc::format!("bad x coordinate: {}", e)))?;
182 let y = safe_slice(line, 38, 46)
183 .trim()
184 .parse::<f64>()
185 .map_err(|e| CyaneaError::Parse(alloc::format!("bad y coordinate: {}", e)))?;
186 let z = safe_slice(line, 46, 54)
187 .trim()
188 .parse::<f64>()
189 .map_err(|e| CyaneaError::Parse(alloc::format!("bad z coordinate: {}", e)))?;
190
191 let occupancy = if line.len() >= 60 {
192 safe_slice(line, 54, 60).trim().parse::<f64>().unwrap_or(1.0)
193 } else {
194 1.0
195 };
196
197 let temp_factor = if line.len() >= 66 {
198 safe_slice(line, 60, 66).trim().parse::<f64>().unwrap_or(0.0)
199 } else {
200 0.0
201 };
202
203 let element = if line.len() >= 78 {
204 let e = safe_slice(line, 76, 78).trim();
205 if e.is_empty() {
206 None
207 } else {
208 Some(e.to_string())
209 }
210 } else {
211 None
212 };
213
214 let charge = if line.len() >= 80 {
215 let ch = safe_slice(line, 78, 80).trim();
216 parse_pdb_charge(ch)
217 } else {
218 None
219 };
220
221 Ok(Atom {
222 serial,
223 name,
224 alt_loc,
225 coords: Point3D::new(x, y, z),
226 occupancy,
227 temp_factor,
228 element,
229 charge,
230 is_hetatm,
231 })
232}
233
234fn parse_chain_id(line: &str) -> char {
235 safe_slice(line, 21, 22).chars().next().unwrap_or(' ')
236}
237
238fn parse_residue_seq(line: &str) -> Result<i32> {
239 safe_slice(line, 22, 26)
240 .trim()
241 .parse::<i32>()
242 .map_err(|e| CyaneaError::Parse(alloc::format!("bad residue seq number: {}", e)))
243}
244
245fn parse_insertion_code(line: &str) -> Option<char> {
246 let c = safe_slice(line, 26, 27).chars().next().unwrap_or(' ');
247 if c == ' ' {
248 None
249 } else {
250 Some(c)
251 }
252}
253
254fn parse_residue_name(line: &str) -> String {
255 safe_slice(line, 17, 20).trim().to_string()
256}
257
258fn parse_pdb_charge(s: &str) -> Option<i8> {
259 if s.is_empty() {
260 return None;
261 }
262 let s = s.trim();
264 if s.is_empty() {
265 return None;
266 }
267 if s.len() == 2 {
269 let chars: Vec<char> = s.chars().collect();
270 if chars[1] == '+' || chars[1] == '-' {
271 if let Some(d) = chars[0].to_digit(10) {
272 let sign: i8 = if chars[1] == '+' { 1 } else { -1 };
273 return Some(sign * d as i8);
274 }
275 }
276 if chars[0] == '+' || chars[0] == '-' {
277 if let Some(d) = chars[1].to_digit(10) {
278 let sign: i8 = if chars[0] == '+' { 1 } else { -1 };
279 return Some(sign * d as i8);
280 }
281 }
282 }
283 None
284}
285
286fn safe_slice(s: &str, start: usize, end: usize) -> &str {
288 let bytes = s.as_bytes();
289 let len = bytes.len();
290 if start >= len {
291 return "";
292 }
293 let actual_end = end.min(len);
294 &s[start..actual_end]
296}
297
298#[cfg(test)]
299mod tests {
300 use super::*;
301
302 fn minimal_pdb() -> &'static str {
303 "\
304HEADER 1CRN\n\
305ATOM 1 N THR A 1 2.464 9.901 13.546 1.00 10.00 N\n\
306ATOM 2 CA THR A 1 2.135 10.226 12.120 1.00 10.00 C\n\
307ATOM 3 C THR A 1 3.427 10.018 11.354 1.00 10.00 C\n\
308ATOM 4 O THR A 1 3.426 10.335 10.184 1.00 10.00 O\n\
309ATOM 5 N ILE A 2 4.462 9.470 11.952 1.00 10.00 N\n\
310ATOM 6 CA ILE A 2 5.735 9.197 11.275 1.00 10.00 C\n\
311TER 7 ILE A 2\n\
312END\n"
313 }
314
315 #[test]
316 fn parse_single_chain() {
317 let s = parse_pdb(minimal_pdb()).unwrap();
318 assert_eq!(s.id, "1CRN");
319 assert_eq!(s.chain_count(), 1);
320 assert_eq!(s.get_chain('A').unwrap().residue_count(), 2);
321 assert_eq!(s.atom_count(), 6);
322 }
323
324 #[test]
325 fn parse_multi_chain() {
326 let input = "\
327ATOM 1 CA ALA A 1 1.000 2.000 3.000 1.00 0.00 C\n\
328TER\n\
329ATOM 2 CA ALA B 1 4.000 5.000 6.000 1.00 0.00 C\n\
330TER\n\
331END\n";
332 let s = parse_pdb(input).unwrap();
333 assert_eq!(s.chain_count(), 2);
334 assert_eq!(s.get_chain('A').unwrap().atom_count(), 1);
335 assert_eq!(s.get_chain('B').unwrap().atom_count(), 1);
336 }
337
338 #[test]
339 fn parse_hetatm() {
340 let input = "\
341ATOM 1 CA ALA A 1 1.000 2.000 3.000 1.00 0.00 C\n\
342HETATM 2 O HOH A 2 4.000 5.000 6.000 1.00 0.00 O\n\
343END\n";
344 let s = parse_pdb(input).unwrap();
345 let chain = s.get_chain('A').unwrap();
346 assert_eq!(chain.residue_count(), 2);
347 let water = &chain.residues[1];
348 assert_eq!(water.name, "HOH");
349 assert!(water.atoms[0].is_hetatm);
350 }
351
352 #[test]
353 fn parse_insertion_codes() {
354 let input = "\
368ATOM 1 CA ALA A 10 1.000 2.000 3.000 1.00 0.00 C\n\
369ATOM 2 CA ALA A 10A 4.000 5.000 6.000 1.00 0.00 C\n\
370END\n";
371 let s = parse_pdb(input).unwrap();
372 let chain = s.get_chain('A').unwrap();
373 assert_eq!(chain.residue_count(), 2);
374 assert_eq!(chain.residues[0].i_code, None);
375 assert_eq!(chain.residues[1].i_code, Some('A'));
376 }
377
378 #[test]
379 fn parse_malformed_record() {
380 let input = "ATOM BAD\n";
381 assert!(parse_pdb(input).is_err());
382 }
383}