1use std::borrow::Cow;
2use std::iter::{Iterator, Peekable};
3
4use xmlparser::ElementEnd;
5use xmlparser::Error;
6use xmlparser::Token;
7use xmlparser::Tokenizer;
8
9use crate::xml_unescape::xml_unescape;
10use crate::{XmlError, XmlResult};
11
12pub struct XmlReader<'a> {
17 tokenizer: Peekable<Tokenizer<'a>>,
18}
19
20impl<'a> XmlReader<'a> {
21 #[inline]
22 pub fn new(text: &'a str) -> XmlReader<'a> {
23 XmlReader {
24 tokenizer: Tokenizer::from(text).peekable(),
25 }
26 }
27
28 #[inline]
29 pub fn next(&mut self) -> Option<Result<Token<'a>, Error>> {
30 self.tokenizer.next()
31 }
32
33 #[inline]
34 pub fn peek(&mut self) -> Option<&Result<Token<'a>, Error>> {
35 self.tokenizer.peek()
36 }
37
38 #[inline]
39 pub fn read_text(&mut self, end_tag: &str) -> XmlResult<Cow<'a, str>> {
40 let mut res = None;
41
42 while let Some(token) = self.next() {
43 match token? {
44 Token::ElementEnd {
45 end: ElementEnd::Open,
46 ..
47 }
48 | Token::Attribute { .. } => (),
49 Token::Text { text } => {
50 res = Some(xml_unescape(text.as_str())?);
51 }
52 Token::Cdata { text, .. } => {
53 res = Some(Cow::Borrowed(text.as_str()));
54 }
55 Token::ElementEnd {
56 end: ElementEnd::Close(_, _),
57 span,
58 } => {
59 let span = span.as_str(); let tag = &span[2..span.len() - 1]; if end_tag == tag {
62 break;
63 } else {
64 return Err(XmlError::TagMismatch {
65 expected: end_tag.to_owned(),
66 found: tag.to_owned(),
67 });
68 }
69 }
70 token => {
71 return Err(XmlError::UnexpectedToken {
72 token: format!("{:?}", token),
73 });
74 }
75 }
76 }
77
78 Ok(res.unwrap_or_default())
79 }
80
81 #[inline]
82 pub fn read_till_element_start(&mut self, end_tag: &str) -> XmlResult<()> {
83 while let Some(token) = self.next() {
84 match token? {
85 Token::ElementStart { span, .. } => {
86 let tag = &span.as_str()[1..];
87 if end_tag == tag {
88 break;
89 } else {
90 self.read_to_end(tag)?;
91 }
92 }
93 Token::ElementEnd { .. }
94 | Token::Attribute { .. }
95 | Token::Text { .. }
96 | Token::Cdata { .. } => {
97 return Err(XmlError::UnexpectedToken {
98 token: format!("{:?}", token),
99 });
100 }
101 _ => (),
102 }
103 }
104 Ok(())
105 }
106
107 #[inline]
108 pub fn find_attribute(&mut self) -> XmlResult<Option<(&'a str, Cow<'a, str>)>> {
109 if let Some(token) = self.tokenizer.peek() {
110 match token {
111 Ok(Token::Attribute { span, value, .. }) => {
112 let value = value.as_str();
113 let span = span.as_str(); let key = &span[0..span.len() - value.len() - 3]; let value = Cow::Borrowed(value);
116 self.next();
117 return Ok(Some((key, value)));
118 }
119 Ok(Token::ElementEnd {
120 end: ElementEnd::Open,
121 ..
122 })
123 | Ok(Token::ElementEnd {
124 end: ElementEnd::Empty,
125 ..
126 }) => return Ok(None),
127 Ok(token) => {
128 return Err(XmlError::UnexpectedToken {
129 token: format!("{:?}", token),
130 })
131 }
132 Err(_) => {
133 self.next().unwrap()?;
135 }
136 }
137 }
138
139 Err(XmlError::UnexpectedEof)
140 }
141
142 #[inline]
143 pub fn find_element_start(&mut self, end_tag: Option<&str>) -> XmlResult<Option<&'a str>> {
144 while let Some(token) = self.tokenizer.peek() {
145 match token {
146 Ok(Token::ElementStart { span, .. }) => {
147 return Ok(Some(&span.as_str()[1..]));
148 }
149 Ok(Token::ElementEnd {
150 end: ElementEnd::Close(_, _),
151 span,
152 }) if end_tag.is_some() => {
153 let end_tag = end_tag.unwrap();
154 let span = span.as_str(); let tag = &span[2..span.len() - 1]; if tag == end_tag {
157 self.next();
158 return Ok(None);
159 } else {
160 return Err(XmlError::TagMismatch {
161 expected: end_tag.to_owned(),
162 found: tag.to_owned(),
163 });
164 }
165 }
166 Ok(Token::ElementEnd { .. }) | Ok(Token::Attribute { .. }) => {
167 return Err(XmlError::UnexpectedToken {
168 token: format!("{:?}", token),
169 })
170 }
171 _ => {
172 self.next().unwrap()?;
174 }
175 }
176 }
177
178 Err(XmlError::UnexpectedEof)
179 }
180
181 #[inline]
182 pub fn read_to_end(&mut self, end_tag: &str) -> XmlResult<()> {
183 while let Some(token) = self.next() {
184 match token? {
185 Token::ElementEnd {
187 end: ElementEnd::Empty,
188 ..
189 } => return Ok(()),
190 Token::ElementEnd {
191 end: ElementEnd::Open,
192 ..
193 } => break,
194 Token::Attribute { .. } => (),
195 token => {
197 return Err(XmlError::UnexpectedToken {
198 token: format!("{:?}", token),
199 })
200 }
201 }
202 }
203
204 let mut depth = 1;
205
206 while let Some(token) = self.next() {
207 match token? {
208 Token::ElementStart { span, .. } if end_tag == &span.as_str()[1..] => {
209 while let Some(token) = self.next() {
210 match token? {
211 Token::ElementEnd {
212 end: ElementEnd::Empty,
213 ..
214 } => {
215 if depth == 0 {
216 return Ok(());
217 } else {
218 break;
220 }
221 }
222 Token::ElementEnd {
223 end: ElementEnd::Open,
224 ..
225 } => {
226 depth += 1;
227 break;
228 }
229 Token::Attribute { .. } => (),
230 token => {
232 return Err(XmlError::UnexpectedToken {
233 token: format!("{:?}", token),
234 });
235 }
236 }
237 }
238 }
239 Token::ElementEnd {
240 end: ElementEnd::Close(_, _),
241 span,
242 } if end_tag == &span.as_str()[2..span.as_str().len() - 1] => {
243 depth -= 1;
244 if depth == 0 {
245 return Ok(());
246 }
247 }
248 _ => (),
249 }
250 }
251
252 Err(XmlError::UnexpectedEof)
253 }
254}
255
256#[test]
257fn read_text() -> XmlResult<()> {
258 let mut reader = XmlReader::new("<parent></parent>");
259
260 assert!(reader.next().is_some()); assert_eq!(reader.read_text("parent")?, "");
262 assert!(reader.next().is_none());
263
264 reader = XmlReader::new("<parent>text</parent>");
265
266 assert!(reader.next().is_some()); assert_eq!(reader.read_text("parent")?, "text");
268 assert!(reader.next().is_none());
269
270 reader = XmlReader::new("<parent attr=\"value\">text</parent>");
271
272 assert!(reader.next().is_some()); assert_eq!(reader.read_text("parent")?, "text");
274 assert!(reader.next().is_none());
275
276 reader = XmlReader::new("<parent attr=\"value\">"'<>&</parent>");
277
278 assert!(reader.next().is_some()); assert_eq!(reader.read_text("parent")?, r#""'<>&"#);
280 assert!(reader.next().is_none());
281
282 let mut reader = XmlReader::new("<parent><![CDATA[]]></parent>");
283
284 assert!(reader.next().is_some()); assert_eq!(reader.read_text("parent")?, "");
286 assert!(reader.next().is_none());
287
288 reader = XmlReader::new("<parent><![CDATA[text]]></parent>");
289
290 assert!(reader.next().is_some()); assert_eq!(reader.read_text("parent")?, "text");
292 assert!(reader.next().is_none());
293
294 reader = XmlReader::new("<parent attr=\"value\"><![CDATA[text]]></parent>");
295
296 assert!(reader.next().is_some()); assert_eq!(reader.read_text("parent")?, "text");
298 assert!(reader.next().is_none());
299
300 reader = XmlReader::new("<parent attr=\"value\"><![CDATA[<foo></foo>]]></parent>");
301
302 assert!(reader.next().is_some()); assert_eq!(reader.read_text("parent")?, "<foo></foo>");
304 assert!(reader.next().is_none());
305
306 reader =
307 XmlReader::new("<parent attr=\"value\"><![CDATA["'<>&]]></parent>");
308
309 assert!(reader.next().is_some()); assert_eq!(reader.read_text("parent")?, ""'<>&");
311 assert!(reader.next().is_none());
312
313 Ok(())
314}
315
316#[test]
317fn read_till_element_start() -> XmlResult<()> {
318 let mut reader = XmlReader::new("<tag/>");
319
320 reader.read_till_element_start("tag")?;
321 assert!(reader.next().is_some()); assert!(reader.next().is_none());
323
324 reader = XmlReader::new("<parent><skip/><tag/></parent>");
325
326 assert!(reader.next().is_some()); assert!(reader.next().is_some()); reader.read_till_element_start("tag")?;
329 assert!(reader.next().is_some()); assert!(reader.next().is_some()); assert!(reader.next().is_none());
332
333 reader = XmlReader::new("<parent><skip></skip><tag/></parent>");
334
335 assert!(reader.next().is_some()); assert!(reader.next().is_some()); reader.read_till_element_start("tag")?;
338 assert!(reader.next().is_some()); assert!(reader.next().is_some()); assert!(reader.next().is_none());
341
342 reader = XmlReader::new("<parent><skip><skip/></skip><tag/></parent>");
343
344 assert!(reader.next().is_some()); assert!(reader.next().is_some()); reader.read_till_element_start("tag")?;
347 assert!(reader.next().is_some()); assert!(reader.next().is_some()); assert!(reader.next().is_none());
350
351 reader = XmlReader::new("<parent><skip><skip></skip></skip><tag/></parent>");
352
353 assert!(reader.next().is_some()); assert!(reader.next().is_some()); reader.read_till_element_start("tag")?;
356 assert!(reader.next().is_some()); assert!(reader.next().is_some()); assert!(reader.next().is_none());
359
360 Ok(())
361}
362
363#[test]
364fn read_to_end() -> XmlResult<()> {
365 let mut reader = XmlReader::new("<parent><child/></parent>");
366
367 assert!(reader.next().is_some()); assert!(reader.next().is_some()); assert!(reader.next().is_some()); reader.read_to_end("child")?;
371 assert!(reader.next().is_some()); assert!(reader.next().is_none());
373
374 reader = XmlReader::new("<parent><child></child></parent>");
375
376 assert!(reader.next().is_some()); assert!(reader.next().is_some()); assert!(reader.next().is_some()); reader.read_to_end("child")?;
380 assert!(reader.next().is_some()); assert!(reader.next().is_none());
382
383 reader = XmlReader::new("<parent><child><child/></child></parent>");
384
385 assert!(reader.next().is_some()); assert!(reader.next().is_some()); assert!(reader.next().is_some()); reader.read_to_end("child")?;
389 assert!(reader.next().is_some()); assert!(reader.next().is_none());
391
392 reader = XmlReader::new("<parent><child><child></child></child></parent>");
393
394 assert!(reader.next().is_some()); assert!(reader.next().is_some()); assert!(reader.next().is_some()); reader.read_to_end("child")?;
398 assert!(reader.next().is_some()); assert!(reader.next().is_none());
400
401 Ok(())
402}