quick_xml/parser/pi.rs
1//! Contains a parser for an XML processing instruction.
2
3use crate::errors::SyntaxError;
4use crate::parser::Parser;
5use crate::utils::is_whitespace;
6
7/// A parser that search a `?>` sequence in the slice.
8///
9/// To use a parser create an instance of parser and [`feed`] data into it.
10/// After successful search the parser will return [`Some`] with position where
11/// processing instruction is ended (the position after `?>`). If search was
12/// unsuccessful, a [`None`] will be returned. You typically would expect positive
13/// result of search, so that you should feed new data until you get it.
14///
15/// NOTE: after successful match the parser does not returned to the initial
16/// state and should not be used anymore. Create a new parser if you want to perform
17/// new search.
18///
19/// # Example
20///
21/// ```
22/// # use pretty_assertions::assert_eq;
23/// use quick_xml::parser::{Parser, PiParser};
24///
25/// let mut parser = PiParser::default();
26///
27/// // Parse `<?instruction with = 'some > and ?' inside?>and the text follow...`
28/// // splitted into three chunks
29/// assert_eq!(parser.feed(b"<?instruction"), None);
30/// // ...get new chunk of data
31/// assert_eq!(parser.feed(b" with = 'some > and ?"), None);
32/// // ...get another chunk of data
33/// assert_eq!(parser.feed(b"' inside?>and the text follow..."), Some(9));
34/// // ^ ^
35/// // 0 9
36/// ```
37///
38/// [`feed`]: Self::feed()
39#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
40pub struct PiParser(
41 /// A flag that indicates was the `bytes` in the previous attempt to find the
42 /// end ended with `?`.
43 pub bool,
44);
45
46impl Parser for PiParser {
47 /// Determines the end position of a processing instruction in the provided slice.
48 /// Processing instruction ends on the first occurrence of `?>` which cannot be
49 /// escaped.
50 ///
51 /// Returns position after the `?>` or `None` if such sequence was not found.
52 ///
53 /// [Section 2.6]: Parameter entity references MUST NOT be recognized within
54 /// processing instructions, so parser do not search for them.
55 ///
56 /// # Parameters
57 /// - `bytes`: a slice to find the end of a processing instruction.
58 /// Should contain text in ASCII-compatible encoding
59 ///
60 /// [Section 2.6]: https://www.w3.org/TR/xml11/#sec-pi
61 #[inline]
62 fn feed(&mut self, bytes: &[u8]) -> Option<usize> {
63 for i in memchr::memchr_iter(b'>', bytes) {
64 match i {
65 0 if self.0 => return Some(0),
66 // If the previous byte is `?`, then we found `?>`
67 i if i > 0 && bytes[i - 1] == b'?' => return Some(i),
68 _ => {}
69 }
70 }
71 self.0 = bytes.last().copied() == Some(b'?');
72 None
73 }
74
75 #[inline]
76 fn eof_error(self, content: &[u8]) -> SyntaxError {
77 // Check if content starts with "?xml" followed by whitespace, '?' or end.
78 // This determines whether to report an unclosed XML declaration or PI.
79 // FIXME: Add support for UTF-8/ASCII incompatible encodings (UTF-16)
80 let is_xml_decl = content.starts_with(b"?xml")
81 && content
82 .get(4)
83 .map_or(true, |&b| is_whitespace(b) || b == b'?');
84 if is_xml_decl {
85 SyntaxError::UnclosedXmlDecl
86 } else {
87 SyntaxError::UnclosedPI
88 }
89 }
90}
91
92#[test]
93fn pi() {
94 use pretty_assertions::assert_eq;
95
96 /// Returns `Ok(pos)` with the position in the buffer where processing
97 /// instruction is ended.
98 ///
99 /// Returns `Err(internal_state)` if parsing is not done yet.
100 fn parse_pi(bytes: &[u8], had_question_mark: bool) -> Result<usize, bool> {
101 let mut parser = PiParser(had_question_mark);
102 match parser.feed(bytes) {
103 Some(i) => Ok(i),
104 None => Err(parser.0),
105 }
106 }
107
108 // Comments shows which character was seen the last before calling `feed`.
109 // `x` means any character, pipe denotes start of the buffer that passed to `feed`
110
111 assert_eq!(parse_pi(b"", false), Err(false)); // x|
112 assert_eq!(parse_pi(b"", true), Err(false)); // ?|
113
114 assert_eq!(parse_pi(b"?", false), Err(true)); // x|?
115 assert_eq!(parse_pi(b"?", true), Err(true)); // ?|?
116
117 assert_eq!(parse_pi(b">", false), Err(false)); // x|>
118 assert_eq!(parse_pi(b">", true), Ok(0)); // ?|>
119
120 assert_eq!(parse_pi(b"?>", false), Ok(1)); // x|?>
121 assert_eq!(parse_pi(b"?>", true), Ok(1)); // ?|?>
122
123 assert_eq!(parse_pi(b">?>", false), Ok(2)); // x|>?>
124 assert_eq!(parse_pi(b">?>", true), Ok(0)); // ?|>?>
125}