1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
use crate::{
coordinate::*, crystallography::*, primary_structure::*, secondary_structure::*,
title_section::*,
};
use crate::common::parser::FieldParser;
use nom::bytes::complete::take;
use nom::character::complete::{line_ending, not_line_ending};
use crate::types::{
Connect, Helix, Model, ModifiedAminoAcidTable, ModifiedNucleotideTable, Sheet, Ssbond,
Structure,
};
use protein_core::metadata::*;
pub struct Parser {}
impl Parser {
pub fn parse(input: &[u8]) -> Result<Structure, Box<dyn std::error::Error + '_>> {
let (_, result) = Self::_parse(input)?;
Ok(result)
}
fn _parse(mut inp: &[u8]) -> nom::IResult<&[u8], Structure> {
let mut metadata = Metadata::default();
let mut seqres_buffer: Vec<u8> = Default::default();
let mut ssbonds: Vec<Ssbond> = Default::default();
let mut helices: Vec<Helix> = Vec::new();
let mut sheets: Vec<Sheet> = Vec::new();
let mut connect: Vec<Connect> = Vec::new();
let mut models: Vec<Model> = vec![Model::default()];
let mut modified_aa: ModifiedAminoAcidTable = Default::default();
let mut modified_nuc: ModifiedNucleotideTable = Default::default();
let mut model_idx = 0;
loop {
let (i, tag) = take(6usize)(inp)?;
inp = match tag {
b"HEADER" => HeaderParser::parse_into_option(&i, &mut metadata.header),
b"TITLE " => TitleParser::parse_into_option(&i, &mut metadata.title),
b"AUTHOR" => AuthorsParser::parse_into_option(&i, &mut metadata.authors),
b"CRYST1" => Cryst1Parser::parse_into_option(&i, &mut metadata.cryst1),
b"SEQRES" => SeqResParser::buffer_seqres(&i, &mut seqres_buffer)?.0,
b"MODRES" => ModresParser::parse_into(&i, &mut modified_aa, &mut modified_nuc)?.0,
b"SSBOND" => SsbondParser::parse_into_vec(&i, &mut ssbonds),
b"EXPDTA" => ExperimentalTechniquesParser::parse_into_option(
&i,
&mut metadata.experimental_techniques,
),
b"ATOM " | b"HETATM" => {
let (i, atom) = GenericAtomParser::parse(&i, &modified_aa, &modified_nuc)?;
models[model_idx].atoms.push(atom);
i
}
b"ANISOU" => AnisouParser::parse_into_vec(&i, &mut models[model_idx].anisou),
b"CONECT" => {
let (i, cnct) = ConectParser::parse(&i)?;
for c in cnct {
if !connect.contains(&c) {
connect.push(c);
}
}
i
}
b"MODEL " => {
if models.len() != 1 {
models.push(Model::default());
model_idx += 1;
}
let (i, _) = not_line_ending(i)?;
let (i, _) = line_ending(i)?;
i
}
b"SHEET " => SheetParser::parse_into_vec(&i, &mut sheets),
b"HELIX " => HelixParser::parse_into_vec(&i, &mut helices),
b"END " => {
inp = b"";
break;
}
_ => {
let (i, _) = not_line_ending(i)?;
let (i, _) = line_ending(i)?;
i
}
}
}
let (_, (chains_aa, chains_nuc)) =
SeqResParser::parse(&seqres_buffer, &modified_aa, &modified_nuc).unwrap();
Ok((
inp,
Structure {
chains_aa,
chains_nuc,
helices,
sheets,
ssbonds,
modified_aa,
modified_nuc,
connect,
models,
metadata: Some(metadata),
},
))
}
}