hml_rs/hml_reader/
parser.rs

1//a Imports
2use super::{CloseTag, OpenTag, StackElement, Token, TokenType};
3use crate::markup::{ContentType, Event};
4use crate::names::NamespaceStack;
5use crate::{HmlError, HmlResult, Posn, Span};
6
7//a Internal types
8//ti TagExtra
9#[derive(Debug)]
10struct TagExtra {
11    depth: usize,
12    boxed: bool,
13}
14impl TagExtra {
15    fn new(depth: usize, boxed: bool) -> Self {
16        Self { depth, boxed }
17    }
18}
19
20//a Public types: Parser and TokenFn
21//tp Parser
22/// A parser, using a file position provided
23///
24pub struct Parser<P>
25where
26    P: Posn,
27{
28    version: usize,
29    pending_eof: bool,
30    start_emitted: bool,
31    end_emitted: bool,
32    finished: bool,
33    tag_depth: usize,
34    tag_stack: Vec<StackElement<P, TagExtra>>,
35    pending_open_tag: Option<OpenTag<P, TagExtra>>,
36    pending_close_tag: Option<CloseTag<P, TagExtra>>,
37    pending_token: Option<Token<P>>,
38    start_element_building: bool,
39    token_pos: P,
40}
41
42//ip Default for Parser
43impl<P> Default for Parser<P>
44where
45    P: Posn,
46{
47    fn default() -> Self {
48        Parser {
49            version: 100,
50            start_emitted: false,
51            end_emitted: false,
52            finished: false,
53            tag_depth: 0,
54            tag_stack: Vec::new(),
55            pending_eof: false,
56            pending_open_tag: None,
57            pending_close_tag: None,
58            pending_token: None,
59            start_element_building: false,
60            token_pos: P::default(),
61        }
62    }
63}
64
65//ip Parser
66impl<P> Parser<P>
67where
68    P: Posn,
69{
70    //mp set_version
71    /// Set the target XML version number - 100 for 1.00, or 110 for
72    /// 1.10
73    #[inline]
74    pub fn set_version(mut self, version: usize) -> Self {
75        self.version = version;
76        self
77    }
78
79    //mi pop_tag_stack
80    /// Pops the tag stack and returns an Event of an end of that element
81    fn pop_tag_stack(
82        &mut self,
83        ns_stack: &mut NamespaceStack,
84        span: &Span<P>,
85    ) -> HmlResult<Option<Event<P>>, P> {
86        assert!(!self.tag_stack.is_empty());
87        let (e, depth) = self.tag_stack.pop().unwrap().as_end_element(ns_stack, span);
88        self.tag_depth = depth;
89        Ok(Some(e))
90    }
91
92    //mi handle_pending_eof
93    fn handle_pending_eof(
94        &mut self,
95        ns_stack: &mut NamespaceStack,
96    ) -> HmlResult<Option<Event<P>>, P> {
97        if self.tag_stack.is_empty() {
98            self.end_emitted = true;
99            Ok(None)
100        } else {
101            let span = Span::new_at(&self.token_pos);
102            self.pop_tag_stack(ns_stack, &span)
103        }
104    }
105
106    //mi handle_close_tag
107    /// A close tag closes all elements whose tag depth is > 0
108    ///
109    /// If the tag depth is 0 then the close tag should match the top of the tag stack
110    fn handle_close_tag(
111        &mut self,
112        ns_stack: &mut NamespaceStack,
113        close_tag: CloseTag<P, TagExtra>,
114    ) -> HmlResult<Option<Event<P>>, P> {
115        // If there are tags that are close the current element at the top of the stack
116        if self.tag_depth > 0 {
117            let span = Span::new_at(close_tag.span().start());
118            self.pending_close_tag = Some(close_tag);
119            self.pop_tag_stack(ns_stack, &span)
120        } else {
121            // should validate close_tag matches the StackElement at the top of the tag stack
122            self.pop_tag_stack(ns_stack, close_tag.span())
123        }
124    }
125
126    //mi handle_open_tag
127    /// If the OpenTag has a depth <= the current then close the top of the tag stack
128    ///
129    /// If the OpenTag has a depth == current+1 then open it up
130    ///
131    /// If the OpenTag has a depth > current+1 then it has too much depth
132    fn handle_open_tag(
133        &mut self,
134        ns_stack: &mut NamespaceStack,
135        open_tag: OpenTag<P, TagExtra>,
136    ) -> HmlResult<Option<Event<P>>, P> {
137        if open_tag.extra.depth <= self.tag_depth {
138            let span = Span::new_at(open_tag.span().start());
139            self.pending_open_tag = Some(open_tag);
140            self.pop_tag_stack(ns_stack, &span)
141        } else if open_tag.extra.depth == self.tag_depth + 1 {
142            // open the new element
143            let boxed = open_tag.extra.boxed;
144            self.tag_stack
145                .push(StackElement::new(ns_stack, self.tag_depth, open_tag));
146            self.start_element_building = true;
147            self.tag_depth += 1;
148            if boxed {
149                self.tag_depth = 0;
150            }
151            Ok(None)
152        } else {
153            // tag with too much depth
154            HmlError::unexpected_tag_indent(*open_tag.span(), self.tag_depth + 1)
155        }
156    }
157
158    //mi handle_token
159    fn handle_token(
160        &mut self,
161        ns_stack: &mut NamespaceStack,
162        mut token: Token<P>,
163    ) -> HmlResult<Option<Event<P>>, P> {
164        if token.is_whitespace() {
165            return Ok(None);
166        }
167        if self.start_element_building && !token.is_attribute() {
168            self.start_element_building = false;
169            self.pending_token = Some(token);
170            Ok(Some(
171                self.tag_stack
172                    .last_mut()
173                    .unwrap()
174                    .as_start_element(ns_stack)?,
175            ))
176        } else {
177            self.token_pos = *token.get_span().end();
178            match token.token_type() {
179                TokenType::Comment => {
180                    let mut lengths = Vec::new();
181                    let mut s = String::new();
182                    for (i, c) in token.take_contents().into_iter().enumerate() {
183                        lengths.push(c.len());
184                        if i > 0 {
185                            s.push('\n');
186                        }
187                        s += &c;
188                    }
189                    Ok(Some(Event::comment(*token.get_span(), s, lengths)))
190                }
191                TokenType::TagOpen => {
192                    let span = *token.get_span();
193                    let mut args = token.take_contents();
194                    let prefix = args.pop_front().unwrap();
195                    let name = args.pop_front().unwrap();
196                    self.pending_open_tag = Some(OpenTag::new(
197                        span,
198                        prefix,
199                        name,
200                        TagExtra::new(token.get_depth(), token.get_boxed()),
201                    ));
202                    Ok(None)
203                }
204                TokenType::TagClose => {
205                    let span = *token.get_span();
206                    let mut args = token.take_contents();
207                    let prefix = args.pop_front().unwrap();
208                    let name = args.pop_front().unwrap();
209                    let close_tag = CloseTag::new(
210                        span,
211                        ns_stack,
212                        &prefix,
213                        &name,
214                        TagExtra::new(token.get_depth(), false),
215                    )?;
216                    self.pending_close_tag = Some(close_tag);
217                    Ok(None)
218                }
219                TokenType::Attribute => {
220                    let span = *token.get_span();
221                    let mut args = token.take_contents();
222                    let prefix = args.pop_front().unwrap();
223                    let name = args.pop_front().unwrap();
224                    let value = args.pop_front().unwrap();
225                    if self.start_element_building {
226                        self.tag_stack
227                            .last_mut()
228                            .unwrap()
229                            .add_attribute(span, ns_stack, &prefix, &name, value)?;
230                        Ok(None)
231                    } else {
232                        HmlError::unexpected_attribute(span, &prefix, &name)
233                    }
234                }
235                TokenType::Characters => {
236                    let mut data = token.take_contents();
237                    let data = data.pop_front().unwrap();
238                    Ok(Some(Event::content(
239                        *token.get_span(),
240                        ContentType::Interpretable,
241                        data,
242                    )))
243                }
244                TokenType::RawCharacters => {
245                    let mut data = token.take_contents();
246                    let data = data.pop_front().unwrap();
247                    Ok(Some(Event::content(
248                        *token.get_span(),
249                        ContentType::Raw,
250                        data,
251                    )))
252                }
253                TokenType::Whitespace => Ok(None),
254                TokenType::EndOfFile => {
255                    self.pending_eof = true;
256                    Ok(None)
257                }
258            }
259        }
260    }
261
262    //mp next_event
263    /// next_event
264    pub fn next_event<T>(
265        &mut self,
266        ns_stack: &mut NamespaceStack,
267        mut get_token: T,
268    ) -> HmlResult<Event<P>, P>
269    where
270        T: FnMut() -> Option<HmlResult<Token<P>, P>>,
271    {
272        loop {
273            if !self.start_emitted {
274                self.start_emitted = true;
275                let span = Span::new_at(&self.token_pos);
276                return Ok(Event::start_document(span, self.version));
277            } else if self.finished {
278                return HmlError::no_more_events();
279            } else if self.end_emitted {
280                self.finished = true;
281                let span = Span::new_at(&self.token_pos);
282                return Ok(Event::end_document(span));
283            }
284            if let Some(event) = {
285                if self.pending_eof {
286                    self.handle_pending_eof(ns_stack)
287                } else if let Some(close_tag) = self.pending_close_tag.take() {
288                    self.handle_close_tag(ns_stack, close_tag)
289                } else if let Some(open_tag) = self.pending_open_tag.take() {
290                    self.handle_open_tag(ns_stack, open_tag)
291                } else if let Some(token) = self.pending_token.take() {
292                    self.handle_token(ns_stack, token)
293                } else if let Some(token) = get_token() {
294                    self.handle_token(ns_stack, token?)
295                } else {
296                    let span = Span::new_at(&self.token_pos);
297                    let token = Token::eof(span);
298                    self.handle_token(ns_stack, token)
299                }
300            }? {
301                return Ok(event);
302            }
303        }
304    }
305}