quick_xml/parser/element.rs
1//! Contains a parser for an XML element.
2
3use crate::errors::SyntaxError;
4use crate::parser::Parser;
5
6/// A parser that search a `>` symbol in the slice outside of quoted regions.
7///
8/// The parser considers two quoted regions: a double-quoted (`"..."`) and
9/// a single-quoted (`'...'`) region. Matches found inside those regions are not
10/// considered as results. Each region starts and ends by its quote symbol,
11/// which cannot be escaped (but can be encoded as XML character entity or named
12/// entity. Anyway, that encoding does not contain literal quotes).
13///
14/// To use a parser create an instance of parser and [`feed`] data into it.
15/// After successful search the parser will return [`Some`] with position of
16/// found symbol. If search is unsuccessful, a [`None`] will be returned. You
17/// typically would expect positive result of search, so that you should feed
18/// new data until you get it.
19///
20/// NOTE: after successful match the parser does not returned to the initial
21/// state and should not be used anymore. Create a new parser if you want to perform
22/// new search.
23///
24/// # Example
25///
26/// ```
27/// # use pretty_assertions::assert_eq;
28/// use quick_xml::parser::{ElementParser, Parser};
29///
30/// let mut parser = ElementParser::default();
31///
32/// // Parse `<my-element with = 'some > inside'>and the text follow...`
33/// // splitted into three chunks
34/// assert_eq!(parser.feed(b"<my-element"), None);
35/// // ...get new chunk of data
36/// assert_eq!(parser.feed(b" with = 'some >"), None);
37/// // ...get another chunk of data
38/// assert_eq!(parser.feed(b" inside'>and the text follow..."), Some(8));
39/// // ^ ^
40/// // 0 8
41/// ```
42///
43/// [`feed`]: Self::feed()
44#[derive(Clone, Copy, Debug, Eq, PartialEq)]
45pub enum ElementParser {
46 /// The initial state (inside element, but outside of attribute value).
47 Outside,
48 /// Inside a single-quoted region (`'...'`).
49 SingleQ,
50 /// Inside a double-quoted region (`"..."`).
51 DoubleQ,
52}
53
54impl Parser for ElementParser {
55 /// Returns number of consumed bytes or `None` if `>` was not found in `bytes`.
56 #[inline]
57 fn feed(&mut self, bytes: &[u8]) -> Option<usize> {
58 for i in memchr::memchr3_iter(b'>', b'\'', b'"', bytes) {
59 *self = match (*self, bytes[i]) {
60 // only allowed to match `>` while we are in state `Outside`
61 (Self::Outside, b'>') => return Some(i),
62 (Self::Outside, b'\'') => Self::SingleQ,
63 (Self::Outside, b'\"') => Self::DoubleQ,
64
65 // the only end_byte that gets us out if the same character
66 (Self::SingleQ, b'\'') | (Self::DoubleQ, b'"') => Self::Outside,
67
68 // all other bytes: no state change
69 _ => continue,
70 };
71 }
72 None
73 }
74
75 #[inline]
76 fn eof_error(self, _content: &[u8]) -> SyntaxError {
77 match self {
78 Self::Outside => SyntaxError::UnclosedTag,
79 Self::SingleQ => SyntaxError::UnclosedSingleQuotedAttributeValue,
80 Self::DoubleQ => SyntaxError::UnclosedDoubleQuotedAttributeValue,
81 }
82 }
83}
84
85impl Default for ElementParser {
86 #[inline]
87 fn default() -> Self {
88 Self::Outside
89 }
90}
91
92#[test]
93fn parse() {
94 use pretty_assertions::assert_eq;
95 use ElementParser::*;
96
97 /// Returns `Ok(pos)` with the position in the buffer where element is ended.
98 ///
99 /// Returns `Err(internal_state)` if parsing does not done yet.
100 fn parse_element(bytes: &[u8], mut parser: ElementParser) -> Result<usize, ElementParser> {
101 match parser.feed(bytes) {
102 Some(i) => Ok(i),
103 None => Err(parser),
104 }
105 }
106
107 assert_eq!(parse_element(b"", Outside), Err(Outside));
108 assert_eq!(parse_element(b"", SingleQ), Err(SingleQ));
109 assert_eq!(parse_element(b"", DoubleQ), Err(DoubleQ));
110
111 assert_eq!(parse_element(b"'", Outside), Err(SingleQ));
112 assert_eq!(parse_element(b"'", SingleQ), Err(Outside));
113 assert_eq!(parse_element(b"'", DoubleQ), Err(DoubleQ));
114
115 assert_eq!(parse_element(b"\"", Outside), Err(DoubleQ));
116 assert_eq!(parse_element(b"\"", SingleQ), Err(SingleQ));
117 assert_eq!(parse_element(b"\"", DoubleQ), Err(Outside));
118
119 assert_eq!(parse_element(b">", Outside), Ok(0));
120 assert_eq!(parse_element(b">", SingleQ), Err(SingleQ));
121 assert_eq!(parse_element(b">", DoubleQ), Err(DoubleQ));
122
123 assert_eq!(parse_element(b"''>", Outside), Ok(2));
124 assert_eq!(parse_element(b"''>", SingleQ), Err(SingleQ));
125 assert_eq!(parse_element(b"''>", DoubleQ), Err(DoubleQ));
126}