Skip to main content

nom_pdb/
complete.rs

1// Copyright (c) 2020 Tianyi Shi
2//
3// This software is released under the MIT License.
4// https://opensource.org/licenses/MIT
5
6/// A simple single-thread parser.
7use crate::{
8    coordinate::*, crystallography::*, primary_structure::*, secondary_structure::*,
9    title_section::*,
10};
11
12// use crate::common::error::PdbParseError;
13use crate::common::parser::FieldParser;
14use nom::bytes::complete::take;
15use nom::character::complete::{line_ending, not_line_ending};
16
17use crate::types::{
18    Connect, Helix, Model, ModifiedAminoAcidTable, ModifiedNucleotideTable, Sheet, Ssbond,
19    Structure,
20};
21
22use protein_core::metadata::*;
23
24pub struct Parser {}
25
26impl Parser {
27    pub fn parse(input: &[u8]) -> Result<Structure, Box<dyn std::error::Error + '_>> {
28        let (_, result) = Self::_parse(input)?;
29        Ok(result)
30    }
31
32    fn _parse(mut inp: &[u8]) -> nom::IResult<&[u8], Structure> {
33        let mut metadata = Metadata::default();
34
35        let mut seqres_buffer: Vec<u8> = Default::default();
36        let mut ssbonds: Vec<Ssbond> = Default::default();
37
38        let mut helices: Vec<Helix> = Vec::new();
39        let mut sheets: Vec<Sheet> = Vec::new();
40
41        let mut connect: Vec<Connect> = Vec::new();
42
43        let mut models: Vec<Model> = vec![Model::default()];
44
45        let mut modified_aa: ModifiedAminoAcidTable = Default::default();
46        let mut modified_nuc: ModifiedNucleotideTable = Default::default();
47
48        let mut model_idx = 0;
49
50        loop {
51            let (i, tag) = take(6usize)(inp)?;
52            inp = match tag {
53                b"HEADER" => HeaderParser::parse_into_option(&i, &mut metadata.header),
54                b"TITLE " => TitleParser::parse_into_option(&i, &mut metadata.title),
55                b"AUTHOR" => AuthorsParser::parse_into_option(&i, &mut metadata.authors),
56                b"CRYST1" => Cryst1Parser::parse_into_option(&i, &mut metadata.cryst1),
57                b"SEQRES" => SeqResParser::buffer_seqres(&i, &mut seqres_buffer)?.0,
58                b"MODRES" => ModresParser::parse_into(&i, &mut modified_aa, &mut modified_nuc)?.0,
59                b"SSBOND" => SsbondParser::parse_into_vec(&i, &mut ssbonds),
60                b"EXPDTA" => ExperimentalTechniquesParser::parse_into_option(
61                    &i,
62                    &mut metadata.experimental_techniques,
63                ),
64                b"ATOM  " | b"HETATM" => {
65                    let (i, atom) = GenericAtomParser::parse(&i, &modified_aa, &modified_nuc)?;
66                    models[model_idx].atoms.push(atom);
67                    i
68                }
69                b"ANISOU" => AnisouParser::parse_into_vec(&i, &mut models[model_idx].anisou),
70                b"CONECT" => {
71                    let (i, cnct) = ConectParser::parse(&i)?;
72                    for c in cnct {
73                        if !connect.contains(&c) {
74                            connect.push(c); // ! is this reliable?
75                        }
76                    }
77                    i
78                }
79                b"MODEL " => {
80                    if models.len() != 1 {
81                        // * if there's one model, there would be no "MODEL"
82                        models.push(Model::default());
83                        model_idx += 1;
84                    }
85                    let (i, _) = not_line_ending(i)?;
86                    let (i, _) = line_ending(i)?;
87                    i
88                }
89                b"SHEET " => SheetParser::parse_into_vec(&i, &mut sheets),
90                b"HELIX " => HelixParser::parse_into_vec(&i, &mut helices),
91                b"END   " => {
92                    inp = b"";
93                    break;
94                }
95                _ => {
96                    // new line
97                    let (i, _) = not_line_ending(i)?;
98                    let (i, _) = line_ending(i)?;
99                    i
100                } //panic!("Unkown field"),
101            }
102        }
103        let (_, (chains_aa, chains_nuc)) =
104            SeqResParser::parse(&seqres_buffer, &modified_aa, &modified_nuc).unwrap();
105        Ok((
106            inp,
107            Structure {
108                chains_aa,
109                chains_nuc,
110                helices,
111                sheets,
112                ssbonds,
113                modified_aa,
114                modified_nuc,
115                connect,
116                models,
117                metadata: Some(metadata),
118            },
119        ))
120    }
121}