quick_xml/parser/
element.rs

1//! Contains a parser for an XML element.
2
3use crate::errors::SyntaxError;
4use crate::parser::Parser;
5
6/// A parser that search a `>` symbol in the slice outside of quoted regions.
7///
8/// The parser considers two quoted regions: a double-quoted (`"..."`) and
9/// a single-quoted (`'...'`) region. Matches found inside those regions are not
10/// considered as results. Each region starts and ends by its quote symbol,
11/// which cannot be escaped (but can be encoded as XML character entity or named
12/// entity. Anyway, that encoding does not contain literal quotes).
13///
14/// To use a parser create an instance of parser and [`feed`] data into it.
15/// After successful search the parser will return [`Some`] with position of
16/// found symbol. If search is unsuccessful, a [`None`] will be returned. You
17/// typically would expect positive result of search, so that you should feed
18/// new data until you get it.
19///
20/// NOTE: after successful match the parser does not returned to the initial
21/// state and should not be used anymore. Create a new parser if you want to perform
22/// new search.
23///
24/// # Example
25///
26/// ```
27/// # use pretty_assertions::assert_eq;
28/// use quick_xml::parser::{ElementParser, Parser};
29///
30/// let mut parser = ElementParser::default();
31///
32/// // Parse `<my-element  with = 'some > inside'>and the text follow...`
33/// // splitted into three chunks
34/// assert_eq!(parser.feed(b"<my-element"), None);
35/// // ...get new chunk of data
36/// assert_eq!(parser.feed(b" with = 'some >"), None);
37/// // ...get another chunk of data
38/// assert_eq!(parser.feed(b" inside'>and the text follow..."), Some(8));
39/// //                       ^       ^
40/// //                       0       8
41/// ```
42///
43/// [`feed`]: Self::feed()
44#[derive(Clone, Copy, Debug, Eq, PartialEq)]
45pub enum ElementParser {
46    /// The initial state (inside element, but outside of attribute value).
47    Outside,
48    /// Inside a single-quoted region (`'...'`).
49    SingleQ,
50    /// Inside a double-quoted region (`"..."`).
51    DoubleQ,
52}
53
54impl Parser for ElementParser {
55    /// Returns number of consumed bytes or `None` if `>` was not found in `bytes`.
56    #[inline]
57    fn feed(&mut self, bytes: &[u8]) -> Option<usize> {
58        for i in memchr::memchr3_iter(b'>', b'\'', b'"', bytes) {
59            *self = match (*self, bytes[i]) {
60                // only allowed to match `>` while we are in state `Outside`
61                (Self::Outside, b'>') => return Some(i),
62                (Self::Outside, b'\'') => Self::SingleQ,
63                (Self::Outside, b'\"') => Self::DoubleQ,
64
65                // the only end_byte that gets us out if the same character
66                (Self::SingleQ, b'\'') | (Self::DoubleQ, b'"') => Self::Outside,
67
68                // all other bytes: no state change
69                _ => continue,
70            };
71        }
72        None
73    }
74
75    #[inline]
76    fn eof_error(self, _content: &[u8]) -> SyntaxError {
77        match self {
78            Self::Outside => SyntaxError::UnclosedTag,
79            Self::SingleQ => SyntaxError::UnclosedSingleQuotedAttributeValue,
80            Self::DoubleQ => SyntaxError::UnclosedDoubleQuotedAttributeValue,
81        }
82    }
83}
84
85impl Default for ElementParser {
86    #[inline]
87    fn default() -> Self {
88        Self::Outside
89    }
90}
91
92#[test]
93fn parse() {
94    use pretty_assertions::assert_eq;
95    use ElementParser::*;
96
97    /// Returns `Ok(pos)` with the position in the buffer where element is ended.
98    ///
99    /// Returns `Err(internal_state)` if parsing does not done yet.
100    fn parse_element(bytes: &[u8], mut parser: ElementParser) -> Result<usize, ElementParser> {
101        match parser.feed(bytes) {
102            Some(i) => Ok(i),
103            None => Err(parser),
104        }
105    }
106
107    assert_eq!(parse_element(b"", Outside), Err(Outside));
108    assert_eq!(parse_element(b"", SingleQ), Err(SingleQ));
109    assert_eq!(parse_element(b"", DoubleQ), Err(DoubleQ));
110
111    assert_eq!(parse_element(b"'", Outside), Err(SingleQ));
112    assert_eq!(parse_element(b"'", SingleQ), Err(Outside));
113    assert_eq!(parse_element(b"'", DoubleQ), Err(DoubleQ));
114
115    assert_eq!(parse_element(b"\"", Outside), Err(DoubleQ));
116    assert_eq!(parse_element(b"\"", SingleQ), Err(SingleQ));
117    assert_eq!(parse_element(b"\"", DoubleQ), Err(Outside));
118
119    assert_eq!(parse_element(b">", Outside), Ok(0));
120    assert_eq!(parse_element(b">", SingleQ), Err(SingleQ));
121    assert_eq!(parse_element(b">", DoubleQ), Err(DoubleQ));
122
123    assert_eq!(parse_element(b"''>", Outside), Ok(2));
124    assert_eq!(parse_element(b"''>", SingleQ), Err(SingleQ));
125    assert_eq!(parse_element(b"''>", DoubleQ), Err(DoubleQ));
126}