gchemol_readwrite/formats/
pdb.rs

1// [[file:../../gchemol-readwrite.note::*header][header:1]]
2// parses the following record types in a PDB file:
3//
4// CRYST1
5// ATOM or HETATM
6// TER
7// END or ENDMDL
8// CONECT
9// header:1 ends here
10
11// [[file:../../gchemol-readwrite.note::*imports][imports:1]]
12use super::*;
13use super::parser::*;
14// imports:1 ends here
15
16// [[file:../../gchemol-readwrite.note::*crystal][crystal:1]]
17fn read_lattice(s: &str) -> IResult<&str, Lattice> {
18    let mut cryst1 = tag("CRYST1");
19    let mut read_length = map_res(take_s(9), |x| x.trim().parse::<f64>());
20    let mut read_angle = map_res(take_s(7), |x| x.trim().parse::<f64>());
21    let mut take1 = take_s(1);
22    let mut take11 = take_s(11);
23    do_parse!(
24        s,
25        cryst1 >> // CRYST1
26        a     : read_length  >> // vector a
27        b     : read_length  >> // vector b
28        c     : read_length  >> // vector c
29        alpha : read_angle   >> // angle alpha
30        beta  : read_angle   >> // angle beta
31        gamma : read_angle   >> // angle gamma
32                take1        >> // skip one char
33                take11       >> // space group
34                read_line    >> // ignore remaing part
35        (
36            {
37                let mut lat = Lattice::from_params(a, b, c, alpha, beta, gamma);
38
39                lat
40            }
41        )
42    )
43}
44
45#[test]
46fn test_pdb_lattice() {
47    let lines = "CRYST1   18.126   18.126    7.567  90.00  90.00 120.00 P6/MMM
48ORIGX1      1.000000  0.000000  0.000000        0.00000
49ORIGX2      0.000000  1.000000  0.000000        0.00000
50ORIGX3      0.000000  0.000000  1.000000        0.00000
51SCALE1      0.055169  0.031852  0.000000        0.00000
52SCALE2      0.000000  0.063704  0.000000        0.00000
53SCALE3      0.000000  0.000000  0.132153        0.00000
54ATOM      1  O2  MOL     2      -4.808   4.768   2.469  1.00  0.00           O
55ATOM      2  O3  MOL     2      -6.684   6.549   1.983  1.00  0.00           O
56ATOM      3 T1   MOL     2      -5.234   6.009   1.536  1.00  0.00          Si1+
57";
58    let (_, mut v) = read_lattice(lines).expect("pdb lattice");
59    let abc = v.lengths();
60    assert_eq!(abc[1], 18.126);
61}
62// crystal:1 ends here
63
64// [[file:../../gchemol-readwrite.note::*element][element:1]]
65fn guess_element<'a>(name: &'a str, r: &'a str) -> Option<&'a str> {
66    // 1. return element symbol without whitespace
67    if let Some(sym) = r.get(22..24).and_then(|s| Some(s.trim())) {
68        if !sym.is_empty() {
69            return Some(sym);
70        }
71    }
72
73    // 2. check atom name
74    // ignore the first char if it is a digit
75    if let Some(e) = name.chars().next() {
76        if !e.is_alphabetic() {
77            return name.get(1..2);
78        }
79    }
80    return name.get(0..1);
81}
82
83#[test]
84fn test_guess_element() {
85    // case 1: with columns containing element symbols
86    let x = guess_element("1CA ", "  1.00  0.00      UC1 SI");
87    assert_eq!(Some("SI"), x);
88    let x = guess_element("1CA ", "  1.00  0.00      UC1  I");
89    assert_eq!(Some("I"), x);
90
91    // case 2: without columns containing element symbols
92    let x = guess_element("CA  ", "");
93    assert_eq!(Some("C"), x);
94    let x = guess_element("1SA  ", "");
95    assert_eq!(Some("S"), x);
96    let x = guess_element(" N B ", "");
97    assert_eq!(Some("N"), x);
98    // when the remained is just whitespace
99    let x = guess_element(" H   ", "                        ");
100    assert_eq!(Some("H"), x);
101}
102// element:1 ends here
103
104// [[file:../../gchemol-readwrite.note::1bfea000][1bfea000]]
105// Return Atom index (sn) and Atom object
106fn read_atom_record(s: &str) -> IResult<&str, (usize, Atom)> {
107    let mut tag_atom = alt((tag("ATOM  "), tag("HETATM")));
108    let mut take1 = take_s(1);
109    let mut take3 = take_s(3);
110    let mut take4 = take_s(4);
111    let mut read_coord = map_res(take_s(8), |x| x.trim().parse::<f64>());
112    let mut read_sn = map_res(take_s(5), |x| x.trim().parse::<usize>());
113    do_parse!(
114        s,
115        tag_atom >> // 1-6
116        sn      : read_sn >> // 7-11
117                  take1   >> // 12
118        name    : take4   >> // 13-16
119        alt_loc : take1   >> // 17
120        res_name: take3   >> // 18-20
121                  take1   >> // 21
122        chain_id: take1   >> // 22
123        res_seq : take4   >> // 23-26
124        icode   : take1   >> // 27
125                  take3   >> // 28-30
126        x       : read_coord   >> // 31-38
127        y       : read_coord   >> // 39-46
128        z       : read_coord   >> // 47-54
129        rest    : read_line                                  >>
130        (
131            {
132                // TODO: take more attributes
133                let sym = guess_element(name, rest).unwrap();
134                let mut a = Atom::new(sym, [x, y, z]);
135
136                (sn, a)
137            }
138        )
139    )
140}
141
142// Render atom in pdb format
143fn format_atom(i: usize, a: &Atom) -> String {
144    let [x, y, z] = a.position();
145    format!(
146        "ATOM  {index:>5} {name:<4}{alt_loc:1}{res_name:<3} {chain_id:1}{res_seq:>4}{icode:>1}   {x:-8.3}{y:-8.3}{z:-8.3}  1.00  0.00          {symbol:>2}\n",
147        index=i,
148        alt_loc=" ",
149        res_name="xx",
150        name=a.get_label().unwrap_or(a.symbol()),
151        chain_id=1,
152        res_seq=1,
153        icode=" ",
154        symbol=a.symbol(),
155        x=x,
156        y=y,
157        z=z,
158    )
159}
160
161#[test]
162fn test_pdb_atom() {
163    let line = "ATOM      3  SI2 SIO2X   1       3.484   3.484   3.474\n";
164    let (_, (i, a)) = read_atom_record(line).expect("pdb atom");
165    assert_eq!(3, i);
166    assert_eq!("S", a.symbol());
167    assert_eq!([3.484, 3.484, 3.474], a.position());
168
169    let line = "ATOM      3  SI2 SIO2X   1       3.484   3.484   3.474  1.00  0.00      UC1 SI\n";
170    let (_, (i, a)) = read_atom_record(line).expect("pdb atom");
171    assert_eq!("Si", a.symbol());
172
173    let line = "HETATM 1632  O1S MID E   5      -6.883   5.767  26.435  1.00 26.56           O \n";
174    let (_, (i, a)) = read_atom_record(line).expect("pdb atom");
175    assert_eq!(1632, i);
176    assert_eq!("O", a.symbol());
177    assert_eq!([-6.883, 5.767, 26.435], a.position());
178
179    let line = format_atom(3, &a);
180    let (_, (i, b)) = read_atom_record(&line).expect("pdb atom");
181    assert_eq!(3, i);
182    assert_eq!(a.symbol(), b.symbol());
183    assert_eq!(a.position(), b.position());
184}
185
186fn read_atoms(s: &str) -> IResult<&str, Vec<(usize, Atom)>> {
187    let mut read_atom_list = many0(read_atom_record);
188    do_parse!(s, atoms: read_atom_list >> (atoms))
189}
190
191#[test]
192fn test_pdb_get_atoms() {
193    let lines = "HETATM 1631  S   MID E   5      -5.827   4.782  25.917  1.00 24.57           S
194HETATM 1634  C1  MID E   5      -3.761   3.904  27.580  1.00 28.14           C
195ATOM   1634  C1  MID E   5      -3.761   3.904  27.580  1.00 28.14           C
196HETATM 1641  C8  MID E   5      -2.096   3.018  29.071  1.00 30.82           C\n\n";
197    let (_, atoms) = read_atoms(lines).expect("pdb atoms");
198    assert_eq!(4, atoms.len());
199}
200// 1bfea000 ends here
201
202// [[file:../../gchemol-readwrite.note::*bond records][bond records:1]]
203fn read_bond_record(s: &str) -> IResult<&str, Vec<(usize, usize)>> {
204    let mut tag_conect = tag("CONECT");
205    let mut atom_sn = map_res(take_s(5), |x| x.trim().parse::<usize>());
206    let mut atom_sn2 = map_res(take_s(5), |x| x.trim().parse::<usize>());
207    let mut bonded_atoms = many1(atom_sn2);
208    do_parse!(
209        s,
210        tag_conect >>                    // CONECT
211        current: atom_sn       >>        // serial number of current atom
212        others : bonded_atoms  >> eol >> // serial number of bonded atoms
213        (
214            {
215                let mut pairs = vec![];
216                for other in others {
217                    pairs.push((current, other));
218                }
219
220                pairs
221            }
222        )
223    )
224}
225
226fn format_bonds(mol: &Molecule) -> String {
227    let mut lines = String::new();
228
229    // connectivity
230    // FIXME: add new method in molecule
231    let mut map = std::collections::HashMap::new();
232    for (i, j, b) in mol.bonds() {
233        let mut neighbors = map.entry(i).or_insert(vec![]);
234        neighbors.push((j, b.order()));
235    }
236    for (i, a) in mol.atoms() {
237        if let Some(neighbors) = map.get(&i) {
238            let mut line = format!("CONECT{:>5}", i);
239            for (j, _) in neighbors {
240                line.push_str(&format!("{:>5}", j));
241            }
242            lines.push_str(&format!("{}\n", line));
243        }
244    }
245
246    lines
247}
248
249#[test]
250fn test_pdb_read_bond() {
251    let line = "CONECT 1179 1211 1222 \n";
252    let (_, x) = read_bond_record(line).expect("pdb bond record test1");
253    assert_eq!(2, x.len());
254
255    let line = "CONECT 2041 2040 2042\n";
256    let (_, x) = read_bond_record(line).unwrap();
257    assert_eq!(2, x.len());
258
259    let line = "CONECT 1179  746 11        \n";
260    let (r, x) = read_bond_record(line).unwrap();
261    assert_eq!(2, x.len());
262}
263
264fn read_bonds(s: &str) -> IResult<&str, Vec<(usize, usize)>> {
265    let mut bond_list = many1(read_bond_record);
266    do_parse!(
267        s,
268        bonds: bond_list >> (bonds.into_iter().flat_map(|x| x).collect())
269    )
270}
271
272#[test]
273fn test_pdb_get_bonds() {
274    let lines = "CONECT 2028 2027 2029
275CONECT 2041 2040 2042
276CONECT 2043 2042 2044
277\n";
278
279    let (_, x) = read_bonds(lines).expect("pdb bonds");
280    assert_eq!(6, x.len());
281
282    let lines = "CONECT 2028 2027 2029
283\n";
284
285    let (_, x) = read_bonds(lines).expect("pdb missing bonds");
286    assert_eq!(2, x.len());
287}
288// bond records:1 ends here
289
290// [[file:../../gchemol-readwrite.note::*parse][parse:1]]
291// quick jump to starting position
292fn jump1(s: &str) -> IResult<&str, ()> {
293    let possible_tags = alt((tag("CRYST1"), tag("ATOM  "), tag("HETATM")));
294    let (r, _) = many_till(read_line, peek(possible_tags))(s)?;
295
296    Ok((r, ()))
297}
298
299fn read_molecule(s: &str) -> IResult<&str, Molecule> {
300    let mut read_lattice = opt(read_lattice);
301    let mut read_bonds = opt(read_bonds);
302    // recognize optional record between Atom and Bond
303    let mut sep_atoms_bonds = opt(alt((preceded(tag("TER"), read_line), tag("END\n"))));
304    do_parse!(
305        s,
306        jump1 >>             // seeking
307        lat: read_lattice >> // crystal info, optional
308        atoms: read_atoms >> // atoms, required
309        sep_atoms_bonds   >> // separator, optinal
310        bonds: read_bonds >> // bonds, optional
311        ({
312            // assign atoms
313            let mut mol = Molecule::new("for pdb");
314            mol.add_atoms_from(atoms);
315
316            // assign lattice
317            if let Some(lat) = lat {
318                mol.set_lattice(lat);
319            }
320
321            // assign bonds
322            if let Some(bonds) = bonds {
323                // FIXME: bond type
324                let bonds = bonds.into_iter().map(|(u, v)| (u, v, Bond::single()));
325                mol.add_bonds_from(bonds);
326            }
327            mol
328        })
329    )
330}
331
332#[test]
333fn test_pdb_molecule() {
334    let lines = "\
335SCALE3      0.000000  0.000000  0.132153        0.00000
336ATOM      1  O2  MOL     2      -4.808   4.768   2.469  1.00  0.00           O
337ATOM      2  O3  MOL     2      -6.684   6.549   1.983  1.00  0.00           O
338ATOM      3 T1   MOL     2      -5.234   6.009   1.536  1.00  0.00          Si1+
339ATOM      4  O1  MOL     2      -4.152  10.936   1.688  1.00  0.00           O
340ATOM      5  O1  MOL     2      -4.150  10.935   1.688  1.00  0.00           O
341ATOM      6  O2  MOL     2      -1.725  11.578   2.469  1.00  0.00           O
342ATOM      7  O2  MOL     2      -9.164  10.843   2.469  1.00  0.00           O
343ATOM      8 T1   MOL     2      -2.587  10.589   1.536  1.00  0.00          Si1+
344ATOM      9 T1   MOL     2      -7.877  10.591   1.536  1.00  0.00          Si1+
345ATOM     10  O2  MOL     2      -1.725  -6.548   2.469  1.00  0.00           O
346ATOM     11  O3  MOL     2      -2.330  -9.063   1.983  1.00  0.00           O
347ATOM     12 T1   MOL     2      -2.587  -7.537   1.536  1.00  0.00          Si1+
348ATOM     13  O1  MOL     2      -7.395  -9.064   1.688  1.00  0.00           O
349TER     367
350CONECT    2    4
351CONECT    3    4
352END\n\n";
353
354    let (r, v) = read_molecule(lines).expect("pdb molecule");
355    assert_eq!(13, v.natoms());
356    assert_eq!(2, v.nbonds());
357
358    let lines = "\
359REMARK   Created:  2018-10-22T12:36:28Z
360SCALE3      0.000000  0.000000  0.132153        0.00000
361ATOM      1  O2  MOL     2      -4.808   4.768   2.469  1.00  0.00           O
362ATOM      2  O3  MOL     2      -6.684   6.549   1.983  1.00  0.00           O
363ATOM      3 T1   MOL     2      -5.234   6.009   1.536  1.00  0.00          Si1+
364ATOM      4  O1  MOL     2      -4.152  10.936   1.688  1.00  0.00           O
365ATOM      5  O1  MOL     2      -4.150  10.935   1.688  1.00  0.00           O
366ATOM      6  O2  MOL     2      -1.725  11.578   2.469  1.00  0.00           O
367ATOM      7  O2  MOL     2      -9.164  10.843   2.469  1.00  0.00           O
368ATOM      8 T1   MOL     2      -2.587  10.589   1.536  1.00  0.00          Si1+
369ATOM      9 T1   MOL     2      -7.877  10.591   1.536  1.00  0.00          Si1+
370ATOM     10  O2  MOL     2      -1.725  -6.548   2.469  1.00  0.00           O
371ATOM     11  O3  MOL     2      -2.330  -9.063   1.983  1.00  0.00           O
372ATOM     12 T1   MOL     2      -2.587  -7.537   1.536  1.00  0.00          Si1+
373ATOM     13  O1  MOL     2      -7.395  -9.064   1.688  1.00  0.00           O
374\n\n\n";
375
376    let (r, v) = read_molecule(&lines).expect("pdb molecule no bonds");
377    assert_eq!(13, v.natoms());
378    assert_eq!(0, v.nbonds());
379    let txt = "\
380CRYST1   54.758   54.758   55.584  90.00  90.00  90.00 P 1           1
381ATOM      1  SI1 SIO2X   1       1.494   1.494   0.000  1.00  0.00      UC1 SI
382ATOM      2  O11 SIO2X   1       1.194   0.514   1.240  1.00  0.00      UC1  O
383ATOM      3  SI2 SIO2X   1       3.484   3.484   3.474  1.00  0.00      UC1 SI
384ATOM      4  O12 SIO2X   1       3.784   4.464   4.714  1.00  0.00      UC1  O
385ATOM      5  SI3 SIO2X   1       0.995   3.983   1.737  1.00  0.00      UC1 SI
386ATOM      6  O13 SIO2X   1       1.975   3.683   2.977  1.00  0.00      UC1  O
387ATOM      7  SI4 SIO2X   1       3.983   0.995   5.211  1.00  0.00      UC1 SI
388ATOM      8  O14 SIO2X   1       3.003   1.295   6.451  1.00  0.00      UC1  O
389ATOM      9  O21 SIO2X   1       1.295   3.003   0.497  1.00  0.00      UC1  O
390ATOM     10  O22 SIO2X   1       3.683   1.975   3.971  1.00  0.00      UC1  O
391ATOM     11  O23 SIO2X   1       0.514   1.194   5.708  1.00  0.00      UC1  O
392ATOM     12  O24 SIO2X   1       4.464   3.784   2.234  1.00  0.00      UC1  O
393END\n
394";
395    let (_, v) = read_molecule(&txt).expect("pdb crystal");
396    assert_eq!(12, v.natoms());
397    assert!(v.lattice.is_some());
398}
399// parse:1 ends here
400
401// [[file:../../gchemol-readwrite.note::ccd72c38][ccd72c38]]
402fn format_molecule(mol: &Molecule) -> String {
403    if mol.natoms() > 9999 {
404        warn!("PDB format is incapable for large molecule (natoms < 9999)");
405    }
406
407    // atoms
408    let mut lines = String::from("REMARK Created by gchemol\n");
409    // write crystal info
410    if let Some(lat) = mol.get_lattice() {
411        let [a, b, c] = lat.lengths();
412        let [alpha, beta, gamma] = lat.angles();
413        lines.push_str(&format!("CRYST1{a:9.4}{b:9.4}{c:9.4}{alpha:7.2}{beta:7.2}{gamma:7.2} P1            1\n"))
414    }
415    for (i, a) in mol.atoms() {
416        let line = format_atom(i, a);
417        lines.push_str(&line);
418    }
419
420    // bonds
421    if mol.nbonds() > 0 {
422        lines.push_str(&format_bonds(&mol));
423    }
424
425    lines.push_str("END\n");
426
427    lines
428}
429// ccd72c38 ends here
430
431// [[file:../../gchemol-readwrite.note::5436d589][5436d589]]
432#[derive(Clone, Copy, Debug)]
433pub struct PdbFile();
434
435impl ChemicalFile for PdbFile {
436    fn ftype(&self) -> &str {
437        "text/pdb"
438    }
439
440    fn possible_extensions(&self) -> Vec<&str> {
441        vec![".pdb", ".ent"]
442    }
443
444    fn format_molecule(&self, mol: &Molecule) -> Result<String> {
445        Ok(format_molecule(mol))
446    }
447}
448
449impl ParseMolecule for PdbFile {
450    fn parse_molecule(&self, input: &str) -> Result<Molecule> {
451        let (_, mol) = read_molecule(input).map_err(|e| format_err!("parse PDB format failure: {:?}", e))?;
452        Ok(mol)
453    }
454}
455// 5436d589 ends here
456
457// [[file:../../gchemol-readwrite.note::cc0cbfc6][cc0cbfc6]]
458impl ReadPart for PdbFile {
459    // for multi-model records
460    fn read_next(&self, context: ReadContext) -> ReadAction {
461        Terminated(|line: &str| line == "ENDMDL\n").read_next(context)
462    }
463}
464
465impl PdbFile {
466    pub fn partitions<R: BufRead + Seek>(&self, mut r: TextReader<R>) -> Result<impl Iterator<Item = String>> {
467        Ok(r.partitions(*self))
468    }
469}
470// cc0cbfc6 ends here