quick_xml/parser/comment.rs
1//! Contains a parser for an XML comment.
2
3use crate::errors::SyntaxError;
4use crate::parser::Parser;
5
6/// A parser that search a `-->` sequence in the slice.
7///
8/// To use a parser create an instance of parser and [`feed`] data into it.
9/// After successful search the parser will return [`Some`] with position where
10/// comment is ended (the position after `-->`). If search was unsuccessful,
11/// a [`None`] will be returned. You typically would expect positive result of
12/// search, so that you should feed new data until yo'll get it.
13///
14/// NOTE: after successful match the parser does not returned to the initial
15/// state and should not be used anymore. Create a new parser if you want to perform
16/// new search.
17///
18/// # Example
19///
20/// ```
21/// # use pretty_assertions::assert_eq;
22/// use quick_xml::parser::{CommentParser, Parser};
23///
24/// let mut parser = CommentParser::default();
25///
26/// // Parse `<!-- comment with some -> and --- inside-->and the text follow...`
27/// // splitted into three chunks
28/// assert_eq!(parser.feed(b"<!-- comment"), None);
29/// // ...get new chunk of data
30/// assert_eq!(parser.feed(b" with some -> and -"), None);
31/// // ...get another chunk of data
32/// assert_eq!(parser.feed(b"-- inside-->and the text follow..."), Some(12));
33/// // ^ ^
34/// // 0 11
35/// ```
36///
37/// [`feed`]: Self::feed()
38#[derive(Clone, Copy, Debug, Eq, PartialEq)]
39pub enum CommentParser {
40 /// The parser does not yet seen any dashes at the end of previous slice.
41 Seen0,
42 /// The parser already seen one dash on the end of previous slice.
43 Seen1,
44 /// The parser already seen two dashes on the end of previous slice.
45 Seen2,
46}
47
48impl Default for CommentParser {
49 #[inline]
50 fn default() -> Self {
51 Self::Seen0
52 }
53}
54
55impl Parser for CommentParser {
56 /// Determines the end position of an XML comment in the provided slice.
57 /// Comments is a pieces of text enclosed in `<!--` and `-->` braces.
58 /// Comment ends on the first occurrence of `-->` which cannot be escaped.
59 ///
60 /// Returns position after the `-->` or `None` if such sequence was not found.
61 ///
62 /// # Parameters
63 /// - `bytes`: a slice to find the end of a comment.
64 /// Should contain text in ASCII-compatible encoding
65 #[inline]
66 fn feed(&mut self, bytes: &[u8]) -> Option<usize> {
67 let result = match self {
68 Self::Seen0 => seen0(bytes),
69 Self::Seen1 => seen1(bytes),
70 Self::Seen2 => seen2(bytes),
71 };
72 if let Some(r) = result {
73 return Some(r);
74 }
75 if bytes.ends_with(b"--") {
76 *self = Self::Seen2;
77 } else {
78 self.next_state(bytes.last().copied());
79 }
80 None
81 }
82
83 #[inline]
84 fn eof_error(self, _content: &[u8]) -> SyntaxError {
85 SyntaxError::UnclosedComment
86 }
87}
88
89impl CommentParser {
90 #[inline]
91 fn next_state(&mut self, last: Option<u8>) {
92 match (*self, last) {
93 (Self::Seen0, Some(b'-')) => *self = Self::Seen1,
94
95 (Self::Seen1, Some(b'-')) => *self = Self::Seen2,
96 (Self::Seen1, Some(_)) => *self = Self::Seen0,
97
98 (Self::Seen2, Some(b'-')) => {}
99 (Self::Seen2, Some(_)) => *self = Self::Seen0,
100
101 _ => {}
102 }
103 }
104}
105
106#[inline]
107fn seen0(bytes: &[u8]) -> Option<usize> {
108 for i in memchr::memchr_iter(b'>', bytes) {
109 if bytes[..i].ends_with(b"--") {
110 // +1 for `>` which should be included in event
111 return Some(i + 1);
112 }
113 }
114 None
115}
116
117#[inline]
118fn seen1(bytes: &[u8]) -> Option<usize> {
119 // -|->
120 if bytes.starts_with(b"->") {
121 return Some(2);
122 }
123 // Even if the first character is `-` it cannot be part of close sequence,
124 // because we checked that condition above. That means that we can forgot that
125 // we seen one `-` at the end of the previous chunk.
126 // -|x...
127 seen0(bytes)
128}
129
130#[inline]
131fn seen2(bytes: &[u8]) -> Option<usize> {
132 match bytes.get(0) {
133 // --|
134 None => None,
135 // --|>
136 Some(b'>') => Some(1),
137 // The end sequence here can be matched only if bytes starts with `->`
138 // which is handled in seen1().
139 // --|x...
140 Some(_) => seen1(bytes),
141 }
142}
143
144#[test]
145fn parse() {
146 use pretty_assertions::assert_eq;
147 use CommentParser::*;
148
149 /// Returns `Ok(pos)` with the position in the buffer where element is ended.
150 ///
151 /// Returns `Err(internal_state)` if parsing was not done yet.
152 fn parse_comment(bytes: &[u8], mut parser: CommentParser) -> Result<usize, CommentParser> {
153 match parser.feed(bytes) {
154 Some(i) => Ok(i),
155 None => Err(parser),
156 }
157 }
158
159 assert_eq!(parse_comment(b"", Seen0), Err(Seen0)); // xx|
160 assert_eq!(parse_comment(b"", Seen1), Err(Seen1)); // x-|
161 assert_eq!(parse_comment(b"", Seen2), Err(Seen2)); // --|
162
163 assert_eq!(parse_comment(b"-", Seen0), Err(Seen1)); // xx|-
164 assert_eq!(parse_comment(b"-", Seen1), Err(Seen2)); // x-|-
165 assert_eq!(parse_comment(b"-", Seen2), Err(Seen2)); // --|-
166
167 assert_eq!(parse_comment(b">", Seen0), Err(Seen0)); // xx|>
168 assert_eq!(parse_comment(b">", Seen1), Err(Seen0)); // x-|>
169 assert_eq!(parse_comment(b">", Seen2), Ok(1)); // --|>
170
171 assert_eq!(parse_comment(b"--", Seen0), Err(Seen2)); // xx|--
172 assert_eq!(parse_comment(b"--", Seen1), Err(Seen2)); // x-|--
173 assert_eq!(parse_comment(b"--", Seen2), Err(Seen2)); // --|--
174
175 assert_eq!(parse_comment(b"->", Seen0), Err(Seen0)); // xx|->
176 assert_eq!(parse_comment(b"->", Seen1), Ok(2)); // x-|->
177 assert_eq!(parse_comment(b"->", Seen2), Ok(2)); // --|->
178
179 assert_eq!(parse_comment(b"-->", Seen0), Ok(3)); // xx|-->
180 assert_eq!(parse_comment(b"-->", Seen1), Ok(3)); // x-|-->
181 assert_eq!(parse_comment(b"-->", Seen2), Ok(3)); // --|-->
182
183 assert_eq!(parse_comment(b">-->", Seen0), Ok(4)); // xx|>-->
184 assert_eq!(parse_comment(b">-->", Seen1), Ok(4)); // x-|>-->
185 assert_eq!(parse_comment(b">-->", Seen2), Ok(1)); // --|>-->
186
187 assert_eq!(parse_comment(b"->-->", Seen0), Ok(5)); // xx|->-->
188 assert_eq!(parse_comment(b"->-->", Seen1), Ok(2)); // x-|->-->
189 assert_eq!(parse_comment(b"->-->", Seen2), Ok(2)); // --|->-->
190}