gchemol_readwrite/formats/
pdb.rs1use super::*;
13use super::parser::*;
14fn read_lattice(s: &str) -> IResult<&str, Lattice> {
18 let mut cryst1 = tag("CRYST1");
19 let mut read_length = map_res(take_s(9), |x| x.trim().parse::<f64>());
20 let mut read_angle = map_res(take_s(7), |x| x.trim().parse::<f64>());
21 let mut take1 = take_s(1);
22 let mut take11 = take_s(11);
23 do_parse!(
24 s,
25 cryst1 >> a : read_length >> b : read_length >> c : read_length >> alpha : read_angle >> beta : read_angle >> gamma : read_angle >> take1 >> take11 >> read_line >> (
36 {
37 let mut lat = Lattice::from_params(a, b, c, alpha, beta, gamma);
38
39 lat
40 }
41 )
42 )
43}
44
45#[test]
46fn test_pdb_lattice() {
47 let lines = "CRYST1 18.126 18.126 7.567 90.00 90.00 120.00 P6/MMM
48ORIGX1 1.000000 0.000000 0.000000 0.00000
49ORIGX2 0.000000 1.000000 0.000000 0.00000
50ORIGX3 0.000000 0.000000 1.000000 0.00000
51SCALE1 0.055169 0.031852 0.000000 0.00000
52SCALE2 0.000000 0.063704 0.000000 0.00000
53SCALE3 0.000000 0.000000 0.132153 0.00000
54ATOM 1 O2 MOL 2 -4.808 4.768 2.469 1.00 0.00 O
55ATOM 2 O3 MOL 2 -6.684 6.549 1.983 1.00 0.00 O
56ATOM 3 T1 MOL 2 -5.234 6.009 1.536 1.00 0.00 Si1+
57";
58 let (_, mut v) = read_lattice(lines).expect("pdb lattice");
59 let abc = v.lengths();
60 assert_eq!(abc[1], 18.126);
61}
62fn guess_element<'a>(name: &'a str, r: &'a str) -> Option<&'a str> {
66 if let Some(sym) = r.get(22..24).and_then(|s| Some(s.trim())) {
68 if !sym.is_empty() {
69 return Some(sym);
70 }
71 }
72
73 if let Some(e) = name.chars().next() {
76 if !e.is_alphabetic() {
77 return name.get(1..2);
78 }
79 }
80 return name.get(0..1);
81}
82
83#[test]
84fn test_guess_element() {
85 let x = guess_element("1CA ", " 1.00 0.00 UC1 SI");
87 assert_eq!(Some("SI"), x);
88 let x = guess_element("1CA ", " 1.00 0.00 UC1 I");
89 assert_eq!(Some("I"), x);
90
91 let x = guess_element("CA ", "");
93 assert_eq!(Some("C"), x);
94 let x = guess_element("1SA ", "");
95 assert_eq!(Some("S"), x);
96 let x = guess_element(" N B ", "");
97 assert_eq!(Some("N"), x);
98 let x = guess_element(" H ", " ");
100 assert_eq!(Some("H"), x);
101}
102fn read_atom_record(s: &str) -> IResult<&str, (usize, Atom)> {
107 let mut tag_atom = alt((tag("ATOM "), tag("HETATM")));
108 let mut take1 = take_s(1);
109 let mut take3 = take_s(3);
110 let mut take4 = take_s(4);
111 let mut read_coord = map_res(take_s(8), |x| x.trim().parse::<f64>());
112 let mut read_sn = map_res(take_s(5), |x| x.trim().parse::<usize>());
113 do_parse!(
114 s,
115 tag_atom >> sn : read_sn >> take1 >> name : take4 >> alt_loc : take1 >> res_name: take3 >> take1 >> chain_id: take1 >> res_seq : take4 >> icode : take1 >> take3 >> x : read_coord >> y : read_coord >> z : read_coord >> rest : read_line >>
130 (
131 {
132 let sym = guess_element(name, rest).unwrap();
134 let mut a = Atom::new(sym, [x, y, z]);
135
136 (sn, a)
137 }
138 )
139 )
140}
141
142fn format_atom(i: usize, a: &Atom) -> String {
144 let [x, y, z] = a.position();
145 format!(
146 "ATOM {index:>5} {name:<4}{alt_loc:1}{res_name:<3} {chain_id:1}{res_seq:>4}{icode:>1} {x:-8.3}{y:-8.3}{z:-8.3} 1.00 0.00 {symbol:>2}\n",
147 index=i,
148 alt_loc=" ",
149 res_name="xx",
150 name=a.get_label().unwrap_or(a.symbol()),
151 chain_id=1,
152 res_seq=1,
153 icode=" ",
154 symbol=a.symbol(),
155 x=x,
156 y=y,
157 z=z,
158 )
159}
160
161#[test]
162fn test_pdb_atom() {
163 let line = "ATOM 3 SI2 SIO2X 1 3.484 3.484 3.474\n";
164 let (_, (i, a)) = read_atom_record(line).expect("pdb atom");
165 assert_eq!(3, i);
166 assert_eq!("S", a.symbol());
167 assert_eq!([3.484, 3.484, 3.474], a.position());
168
169 let line = "ATOM 3 SI2 SIO2X 1 3.484 3.484 3.474 1.00 0.00 UC1 SI\n";
170 let (_, (i, a)) = read_atom_record(line).expect("pdb atom");
171 assert_eq!("Si", a.symbol());
172
173 let line = "HETATM 1632 O1S MID E 5 -6.883 5.767 26.435 1.00 26.56 O \n";
174 let (_, (i, a)) = read_atom_record(line).expect("pdb atom");
175 assert_eq!(1632, i);
176 assert_eq!("O", a.symbol());
177 assert_eq!([-6.883, 5.767, 26.435], a.position());
178
179 let line = format_atom(3, &a);
180 let (_, (i, b)) = read_atom_record(&line).expect("pdb atom");
181 assert_eq!(3, i);
182 assert_eq!(a.symbol(), b.symbol());
183 assert_eq!(a.position(), b.position());
184}
185
186fn read_atoms(s: &str) -> IResult<&str, Vec<(usize, Atom)>> {
187 let mut read_atom_list = many0(read_atom_record);
188 do_parse!(s, atoms: read_atom_list >> (atoms))
189}
190
191#[test]
192fn test_pdb_get_atoms() {
193 let lines = "HETATM 1631 S MID E 5 -5.827 4.782 25.917 1.00 24.57 S
194HETATM 1634 C1 MID E 5 -3.761 3.904 27.580 1.00 28.14 C
195ATOM 1634 C1 MID E 5 -3.761 3.904 27.580 1.00 28.14 C
196HETATM 1641 C8 MID E 5 -2.096 3.018 29.071 1.00 30.82 C\n\n";
197 let (_, atoms) = read_atoms(lines).expect("pdb atoms");
198 assert_eq!(4, atoms.len());
199}
200fn read_bond_record(s: &str) -> IResult<&str, Vec<(usize, usize)>> {
204 let mut tag_conect = tag("CONECT");
205 let mut atom_sn = map_res(take_s(5), |x| x.trim().parse::<usize>());
206 let mut atom_sn2 = map_res(take_s(5), |x| x.trim().parse::<usize>());
207 let mut bonded_atoms = many1(atom_sn2);
208 do_parse!(
209 s,
210 tag_conect >> current: atom_sn >> others : bonded_atoms >> eol >> (
214 {
215 let mut pairs = vec![];
216 for other in others {
217 pairs.push((current, other));
218 }
219
220 pairs
221 }
222 )
223 )
224}
225
226fn format_bonds(mol: &Molecule) -> String {
227 let mut lines = String::new();
228
229 let mut map = std::collections::HashMap::new();
232 for (i, j, b) in mol.bonds() {
233 let mut neighbors = map.entry(i).or_insert(vec![]);
234 neighbors.push((j, b.order()));
235 }
236 for (i, a) in mol.atoms() {
237 if let Some(neighbors) = map.get(&i) {
238 let mut line = format!("CONECT{:>5}", i);
239 for (j, _) in neighbors {
240 line.push_str(&format!("{:>5}", j));
241 }
242 lines.push_str(&format!("{}\n", line));
243 }
244 }
245
246 lines
247}
248
249#[test]
250fn test_pdb_read_bond() {
251 let line = "CONECT 1179 1211 1222 \n";
252 let (_, x) = read_bond_record(line).expect("pdb bond record test1");
253 assert_eq!(2, x.len());
254
255 let line = "CONECT 2041 2040 2042\n";
256 let (_, x) = read_bond_record(line).unwrap();
257 assert_eq!(2, x.len());
258
259 let line = "CONECT 1179 746 11 \n";
260 let (r, x) = read_bond_record(line).unwrap();
261 assert_eq!(2, x.len());
262}
263
264fn read_bonds(s: &str) -> IResult<&str, Vec<(usize, usize)>> {
265 let mut bond_list = many1(read_bond_record);
266 do_parse!(
267 s,
268 bonds: bond_list >> (bonds.into_iter().flat_map(|x| x).collect())
269 )
270}
271
272#[test]
273fn test_pdb_get_bonds() {
274 let lines = "CONECT 2028 2027 2029
275CONECT 2041 2040 2042
276CONECT 2043 2042 2044
277\n";
278
279 let (_, x) = read_bonds(lines).expect("pdb bonds");
280 assert_eq!(6, x.len());
281
282 let lines = "CONECT 2028 2027 2029
283\n";
284
285 let (_, x) = read_bonds(lines).expect("pdb missing bonds");
286 assert_eq!(2, x.len());
287}
288fn jump1(s: &str) -> IResult<&str, ()> {
293 let possible_tags = alt((tag("CRYST1"), tag("ATOM "), tag("HETATM")));
294 let (r, _) = many_till(read_line, peek(possible_tags))(s)?;
295
296 Ok((r, ()))
297}
298
299fn read_molecule(s: &str) -> IResult<&str, Molecule> {
300 let mut read_lattice = opt(read_lattice);
301 let mut read_bonds = opt(read_bonds);
302 let mut sep_atoms_bonds = opt(alt((preceded(tag("TER"), read_line), tag("END\n"))));
304 do_parse!(
305 s,
306 jump1 >> lat: read_lattice >> atoms: read_atoms >> sep_atoms_bonds >> bonds: read_bonds >> ({
312 let mut mol = Molecule::new("for pdb");
314 mol.add_atoms_from(atoms);
315
316 if let Some(lat) = lat {
318 mol.set_lattice(lat);
319 }
320
321 if let Some(bonds) = bonds {
323 let bonds = bonds.into_iter().map(|(u, v)| (u, v, Bond::single()));
325 mol.add_bonds_from(bonds);
326 }
327 mol
328 })
329 )
330}
331
332#[test]
333fn test_pdb_molecule() {
334 let lines = "\
335SCALE3 0.000000 0.000000 0.132153 0.00000
336ATOM 1 O2 MOL 2 -4.808 4.768 2.469 1.00 0.00 O
337ATOM 2 O3 MOL 2 -6.684 6.549 1.983 1.00 0.00 O
338ATOM 3 T1 MOL 2 -5.234 6.009 1.536 1.00 0.00 Si1+
339ATOM 4 O1 MOL 2 -4.152 10.936 1.688 1.00 0.00 O
340ATOM 5 O1 MOL 2 -4.150 10.935 1.688 1.00 0.00 O
341ATOM 6 O2 MOL 2 -1.725 11.578 2.469 1.00 0.00 O
342ATOM 7 O2 MOL 2 -9.164 10.843 2.469 1.00 0.00 O
343ATOM 8 T1 MOL 2 -2.587 10.589 1.536 1.00 0.00 Si1+
344ATOM 9 T1 MOL 2 -7.877 10.591 1.536 1.00 0.00 Si1+
345ATOM 10 O2 MOL 2 -1.725 -6.548 2.469 1.00 0.00 O
346ATOM 11 O3 MOL 2 -2.330 -9.063 1.983 1.00 0.00 O
347ATOM 12 T1 MOL 2 -2.587 -7.537 1.536 1.00 0.00 Si1+
348ATOM 13 O1 MOL 2 -7.395 -9.064 1.688 1.00 0.00 O
349TER 367
350CONECT 2 4
351CONECT 3 4
352END\n\n";
353
354 let (r, v) = read_molecule(lines).expect("pdb molecule");
355 assert_eq!(13, v.natoms());
356 assert_eq!(2, v.nbonds());
357
358 let lines = "\
359REMARK Created: 2018-10-22T12:36:28Z
360SCALE3 0.000000 0.000000 0.132153 0.00000
361ATOM 1 O2 MOL 2 -4.808 4.768 2.469 1.00 0.00 O
362ATOM 2 O3 MOL 2 -6.684 6.549 1.983 1.00 0.00 O
363ATOM 3 T1 MOL 2 -5.234 6.009 1.536 1.00 0.00 Si1+
364ATOM 4 O1 MOL 2 -4.152 10.936 1.688 1.00 0.00 O
365ATOM 5 O1 MOL 2 -4.150 10.935 1.688 1.00 0.00 O
366ATOM 6 O2 MOL 2 -1.725 11.578 2.469 1.00 0.00 O
367ATOM 7 O2 MOL 2 -9.164 10.843 2.469 1.00 0.00 O
368ATOM 8 T1 MOL 2 -2.587 10.589 1.536 1.00 0.00 Si1+
369ATOM 9 T1 MOL 2 -7.877 10.591 1.536 1.00 0.00 Si1+
370ATOM 10 O2 MOL 2 -1.725 -6.548 2.469 1.00 0.00 O
371ATOM 11 O3 MOL 2 -2.330 -9.063 1.983 1.00 0.00 O
372ATOM 12 T1 MOL 2 -2.587 -7.537 1.536 1.00 0.00 Si1+
373ATOM 13 O1 MOL 2 -7.395 -9.064 1.688 1.00 0.00 O
374\n\n\n";
375
376 let (r, v) = read_molecule(&lines).expect("pdb molecule no bonds");
377 assert_eq!(13, v.natoms());
378 assert_eq!(0, v.nbonds());
379 let txt = "\
380CRYST1 54.758 54.758 55.584 90.00 90.00 90.00 P 1 1
381ATOM 1 SI1 SIO2X 1 1.494 1.494 0.000 1.00 0.00 UC1 SI
382ATOM 2 O11 SIO2X 1 1.194 0.514 1.240 1.00 0.00 UC1 O
383ATOM 3 SI2 SIO2X 1 3.484 3.484 3.474 1.00 0.00 UC1 SI
384ATOM 4 O12 SIO2X 1 3.784 4.464 4.714 1.00 0.00 UC1 O
385ATOM 5 SI3 SIO2X 1 0.995 3.983 1.737 1.00 0.00 UC1 SI
386ATOM 6 O13 SIO2X 1 1.975 3.683 2.977 1.00 0.00 UC1 O
387ATOM 7 SI4 SIO2X 1 3.983 0.995 5.211 1.00 0.00 UC1 SI
388ATOM 8 O14 SIO2X 1 3.003 1.295 6.451 1.00 0.00 UC1 O
389ATOM 9 O21 SIO2X 1 1.295 3.003 0.497 1.00 0.00 UC1 O
390ATOM 10 O22 SIO2X 1 3.683 1.975 3.971 1.00 0.00 UC1 O
391ATOM 11 O23 SIO2X 1 0.514 1.194 5.708 1.00 0.00 UC1 O
392ATOM 12 O24 SIO2X 1 4.464 3.784 2.234 1.00 0.00 UC1 O
393END\n
394";
395 let (_, v) = read_molecule(&txt).expect("pdb crystal");
396 assert_eq!(12, v.natoms());
397 assert!(v.lattice.is_some());
398}
399fn format_molecule(mol: &Molecule) -> String {
403 if mol.natoms() > 9999 {
404 warn!("PDB format is incapable for large molecule (natoms < 9999)");
405 }
406
407 let mut lines = String::from("REMARK Created by gchemol\n");
409 if let Some(lat) = mol.get_lattice() {
411 let [a, b, c] = lat.lengths();
412 let [alpha, beta, gamma] = lat.angles();
413 lines.push_str(&format!("CRYST1{a:9.4}{b:9.4}{c:9.4}{alpha:7.2}{beta:7.2}{gamma:7.2} P1 1\n"))
414 }
415 for (i, a) in mol.atoms() {
416 let line = format_atom(i, a);
417 lines.push_str(&line);
418 }
419
420 if mol.nbonds() > 0 {
422 lines.push_str(&format_bonds(&mol));
423 }
424
425 lines.push_str("END\n");
426
427 lines
428}
429#[derive(Clone, Copy, Debug)]
433pub struct PdbFile();
434
435impl ChemicalFile for PdbFile {
436 fn ftype(&self) -> &str {
437 "text/pdb"
438 }
439
440 fn possible_extensions(&self) -> Vec<&str> {
441 vec![".pdb", ".ent"]
442 }
443
444 fn format_molecule(&self, mol: &Molecule) -> Result<String> {
445 Ok(format_molecule(mol))
446 }
447}
448
449impl ParseMolecule for PdbFile {
450 fn parse_molecule(&self, input: &str) -> Result<Molecule> {
451 let (_, mol) = read_molecule(input).map_err(|e| format_err!("parse PDB format failure: {:?}", e))?;
452 Ok(mol)
453 }
454}
455impl ReadPart for PdbFile {
459 fn read_next(&self, context: ReadContext) -> ReadAction {
461 Terminated(|line: &str| line == "ENDMDL\n").read_next(context)
462 }
463}
464
465impl PdbFile {
466 pub fn partitions<R: BufRead + Seek>(&self, mut r: TextReader<R>) -> Result<impl Iterator<Item = String>> {
467 Ok(r.partitions(*self))
468 }
469}
470