fastobo/parser/
sequential.rs

1use std::convert::TryFrom;
2use std::fs::File;
3use std::io::BufRead;
4use std::io::BufReader;
5use std::iter::Iterator;
6
7use crate::ast::EntityFrame;
8use crate::ast::Frame;
9use crate::ast::HeaderClause;
10use crate::ast::HeaderFrame;
11use crate::ast::OboDoc;
12use crate::error::Error;
13use crate::error::SyntaxError;
14use crate::syntax::Lexer;
15use crate::syntax::Rule;
16
17use super::Cache;
18use super::FromPair;
19use super::Parser;
20
21/// An iterator reading entity frames contained in an OBO stream sequentially.
22pub struct SequentialParser<B: BufRead> {
23    stream: B,
24    line: String,
25    offset: usize,
26    line_offset: usize,
27    header: Option<Result<Frame, Error>>,
28    cache: Cache,
29}
30
31impl<B: BufRead> AsRef<B> for SequentialParser<B> {
32    fn as_ref(&self) -> &B {
33        &self.stream
34    }
35}
36
37impl<B: BufRead> AsRef<B> for Box<SequentialParser<B>> {
38    fn as_ref(&self) -> &B {
39        (**self).as_ref()
40    }
41}
42
43impl<B: BufRead> AsMut<B> for SequentialParser<B> {
44    fn as_mut(&mut self) -> &mut B {
45        &mut self.stream
46    }
47}
48
49impl<B: BufRead> AsMut<B> for Box<SequentialParser<B>> {
50    fn as_mut(&mut self) -> &mut B {
51        (**self).as_mut()
52    }
53}
54
55impl<B: BufRead> From<B> for SequentialParser<B> {
56    fn from(reader: B) -> Self {
57        <Self as Parser<B>>::new(reader)
58    }
59}
60
61impl<B: BufRead> From<B> for Box<SequentialParser<B>> {
62    fn from(stream: B) -> Self {
63        Box::new(SequentialParser::from(stream))
64    }
65}
66
67impl<B: BufRead> Iterator for SequentialParser<B> {
68    type Item = Result<Frame, Error>;
69
70    fn next(&mut self) -> Option<Self::Item> {
71        let mut l: &str;
72        let mut frame_lines = String::new();
73        let mut local_line_offset = 0;
74        let mut local_offset = 0;
75
76        if let Some(res) = self.header.take() {
77            return Some(res);
78        }
79
80        while !self.line.is_empty() {
81            // Store the line in the frame lines and clear the buffer.
82            frame_lines.push_str(&self.line);
83            self.line.clear();
84
85            // Read the next line.
86            if let Err(e) = self.stream.read_line(&mut self.line) {
87                return Some(Err(Error::from(e)));
88            }
89
90            // Process the frame if we reached the next frame.
91            l = self.line.trim_start();
92            if l.starts_with('[') || self.line.is_empty() {
93                let res = unsafe {
94                    match Lexer::tokenize(Rule::EntitySingle, &frame_lines) {
95                        Ok(mut pairs) => {
96                            EntityFrame::from_pair_unchecked(pairs.next().unwrap(), &self.cache)
97                                .map_err(Error::from)
98                        }
99                        Err(e) => Err(Error::from(
100                            SyntaxError::from(e).with_offsets(self.line_offset, self.offset),
101                        )),
102                    }
103                };
104
105                // Update offsets
106                self.line_offset += local_line_offset + 1;
107                self.offset += local_offset + self.line.len();
108                return Some(res.map(Frame::from));
109            }
110
111            // Update local offsets
112            local_line_offset += 1;
113            local_offset += self.line.len();
114        }
115
116        None
117    }
118}
119
120impl<B: BufRead> Parser<B> for SequentialParser<B> {
121    /// Create a new `SequentialParser` from the given stream.
122    ///
123    /// The constructor will parse the header frame right away, and return an
124    /// error if it fails. The header can then be accessed using the `header`
125    /// method.
126    fn new(mut stream: B) -> Self {
127        let cache = Cache::default();
128        let mut line = String::new();
129        let mut l: &str;
130        let mut offset = 0;
131        let mut line_offset = 0;
132        let mut frame_clauses = Vec::new();
133
134        let header = loop {
135            // Read the next line
136            line.clear();
137            if let Err(e) = stream.read_line(&mut line) {
138                break Some(Err(Error::from(e)));
139            };
140            l = line.trim_start();
141
142            // Parse header as long as we didn't reach EOL or first frame.
143            if !l.starts_with('[') && !l.is_empty() {
144                unsafe {
145                    // use `fastobo_syntax::Lexer` to tokenize the input
146                    let p = match Lexer::tokenize(Rule::HeaderClause, &line) {
147                        Ok(mut pairs) => pairs.next().unwrap(),
148                        Err(e) => {
149                            let err = SyntaxError::from(e).with_offsets(line_offset, offset);
150                            break Some(Err(Error::from(err)));
151                        }
152                    };
153                    // produce a header clause from the token stream
154                    match HeaderClause::from_pair_unchecked(p, &cache) {
155                        Ok(clause) => frame_clauses.push(clause),
156                        Err(e) => {
157                            let err = e.with_offsets(line_offset, offset);
158                            break Some(Err(Error::from(err)));
159                        }
160                    }
161                }
162            }
163
164            if l.starts_with('[') || line.is_empty() {
165                // Bail out if we reached EOL or first frame.
166                let frame = Frame::from(HeaderFrame::from(frame_clauses));
167                break Some(Ok(frame));
168            } else {
169                // Update offsets
170                line_offset += 1;
171                offset += line.len();
172            }
173        };
174
175        Self {
176            stream,
177            line,
178            offset,
179            line_offset,
180            header,
181            cache,
182        }
183    }
184
185    /// Make the parser yield frames in the order they appear in the document.
186    ///
187    /// This has no effect on `SequentialParser` since the frames are always
188    /// processed in the document order, but this method is provided for
189    /// consistency of the [`FrameReader`](./type.FrameReader.html) type.
190    fn ordered(&mut self, _ordered: bool) -> &mut Self {
191        self
192    }
193
194    /// Consume the reader and extract the internal reader.
195    fn into_inner(self) -> B {
196        self.stream
197    }
198}
199
200impl<B: BufRead> Parser<B> for Box<SequentialParser<B>> {
201    fn new(stream: B) -> Self {
202        Box::new(SequentialParser::new(stream))
203    }
204
205    fn ordered(&mut self, ordered: bool) -> &mut Self {
206        (**self).ordered(ordered);
207        self
208    }
209
210    fn into_inner(self) -> B {
211        (*self).into_inner()
212    }
213}
214
215impl<B: BufRead> TryFrom<SequentialParser<B>> for OboDoc {
216    type Error = Error;
217    fn try_from(mut parser: SequentialParser<B>) -> Result<Self, Self::Error> {
218        OboDoc::try_from(&mut parser)
219    }
220}
221
222impl<B: BufRead> TryFrom<&mut SequentialParser<B>> for OboDoc {
223    type Error = Error;
224    fn try_from(parser: &mut SequentialParser<B>) -> Result<Self, Self::Error> {
225        // extract the header and create the doc
226        let header = parser.next().unwrap()?.into_header().unwrap();
227
228        // extract the remaining entities
229        let entities = parser
230            .map(|r| r.map(|f| f.into_entity().unwrap()))
231            .collect::<Result<Vec<EntityFrame>, Error>>()?;
232
233        // return the doc
234        Ok(OboDoc::with_header(header).and_entities(entities))
235    }
236}
237
238impl<B: BufRead> TryFrom<Box<SequentialParser<B>>> for OboDoc {
239    type Error = Error;
240    fn try_from(mut reader: Box<SequentialParser<B>>) -> Result<Self, Self::Error> {
241        OboDoc::try_from(&mut (*reader))
242    }
243}
244
245impl From<File> for SequentialParser<BufReader<File>> {
246    fn from(f: File) -> Self {
247        Self::new(BufReader::new(f))
248    }
249}
250
251impl From<File> for Box<SequentialParser<BufReader<File>>> {
252    fn from(f: File) -> Self {
253        Box::new(SequentialParser::new(BufReader::new(f)))
254    }
255}