1use std::borrow::Cow;
2use std::iter::{Iterator, Peekable};
3
4use xmlparser::ElementEnd;
5use xmlparser::Error;
6use xmlparser::Token;
7use xmlparser::Tokenizer;
8
9use crate::xml_unescape::xml_unescape;
10use crate::{XmlError, XmlResult};
11
12pub struct XmlReader<'a> {
17 tokenizer: Peekable<Tokenizer<'a>>,
18}
19
20impl<'a> XmlReader<'a> {
21 #[inline]
22 pub fn new(text: &'a str) -> XmlReader<'a> {
23 XmlReader {
24 tokenizer: Tokenizer::from(text).peekable(),
25 }
26 }
27
28 #[inline]
29 pub fn next(&mut self) -> Option<Result<Token<'a>, Error>> {
30 self.tokenizer.next()
31 }
32
33 #[inline]
34 pub fn peek(&mut self) -> Option<&Result<Token<'a>, Error>> {
35 self.tokenizer.peek()
36 }
37
38 #[inline]
39 pub fn read_text(&mut self, end_tag: &str) -> XmlResult<Cow<'a, str>> {
40 let mut res = Cow::Borrowed("");
41
42 while let Some(token) = self.next() {
43 match token? {
44 Token::ElementEnd {
45 end: ElementEnd::Open,
46 ..
47 }
48 | Token::Attribute { .. } => (),
49 Token::Text { text } => {
50 let text = xml_unescape(text.as_str())?;
51 if res.is_empty() {
52 res = text;
53 } else {
54 res.to_mut().push_str(&text);
55 }
56 }
57 Token::Cdata { text, .. } => {
58 if res.is_empty() {
59 res = Cow::Borrowed(text.as_str());
60 } else {
61 res.to_mut().push_str(&text);
62 }
63 }
64 Token::ElementEnd {
65 end: ElementEnd::Close(_, _),
66 span,
67 } => {
68 let span = span.as_str(); let tag = &span[2..span.len() - 1]; if end_tag == tag {
71 break;
72 } else {
73 return Err(XmlError::TagMismatch {
74 expected: end_tag.to_owned(),
75 found: tag.to_owned(),
76 });
77 }
78 }
79 Token::ElementEnd {
80 end: ElementEnd::Empty,
81 ..
82 } => {
83 break;
84 }
85 token => {
86 return Err(XmlError::UnexpectedToken {
87 token: format!("{:?}", token),
88 });
89 }
90 }
91 }
92
93 Ok(res)
94 }
95
96 #[inline]
97 pub fn read_till_element_start(&mut self, end_tag: &str) -> XmlResult<()> {
98 while let Some(token) = self.next() {
99 match token? {
100 Token::ElementStart { span, .. } => {
101 let tag = &span.as_str()[1..];
102 if end_tag == tag {
103 break;
104 } else {
105 self.read_to_end(tag)?;
106 }
107 }
108 Token::ElementEnd { .. }
109 | Token::Attribute { .. }
110 | Token::Text { .. }
111 | Token::Cdata { .. } => {
112 return Err(XmlError::UnexpectedToken {
113 token: format!("{:?}", token),
114 });
115 }
116 _ => (),
117 }
118 }
119 Ok(())
120 }
121
122 #[inline]
123 pub fn find_attribute(&mut self) -> XmlResult<Option<(&'a str, Cow<'a, str>)>> {
124 if let Some(token) = self.tokenizer.peek() {
125 match token {
126 Ok(Token::Attribute { span, value, .. }) => {
127 let value = value.as_str();
128 let span = span.as_str(); let key = &span[0..span.len() - value.len() - 3]; let value = xml_unescape(value)?;
131 self.next();
132 return Ok(Some((key, value)));
133 }
134 Ok(Token::ElementEnd {
135 end: ElementEnd::Open,
136 ..
137 })
138 | Ok(Token::ElementEnd {
139 end: ElementEnd::Empty,
140 ..
141 }) => return Ok(None),
142 Ok(token) => {
143 return Err(XmlError::UnexpectedToken {
144 token: format!("{:?}", token),
145 })
146 }
147 Err(_) => {
148 self.next().unwrap()?;
150 }
151 }
152 }
153
154 Err(XmlError::UnexpectedEof)
155 }
156
157 #[inline]
158 pub fn find_element_start(&mut self, end_tag: Option<&str>) -> XmlResult<Option<&'a str>> {
159 while let Some(token) = self.tokenizer.peek() {
160 match token {
161 Ok(Token::ElementStart { span, .. }) => {
162 return Ok(Some(&span.as_str()[1..]));
163 }
164 Ok(Token::ElementEnd {
165 end: ElementEnd::Close(_, _),
166 span,
167 }) if end_tag.is_some() => {
168 let end_tag = end_tag.unwrap();
169 let span = span.as_str(); let tag = &span[2..span.len() - 1]; if tag == end_tag {
172 self.next();
173 return Ok(None);
174 } else {
175 return Err(XmlError::TagMismatch {
176 expected: end_tag.to_owned(),
177 found: tag.to_owned(),
178 });
179 }
180 }
181 Ok(Token::ElementEnd { .. }) | Ok(Token::Attribute { .. }) => {
182 return Err(XmlError::UnexpectedToken {
183 token: format!("{:?}", token),
184 })
185 }
186 _ => {
187 self.next().unwrap()?;
189 }
190 }
191 }
192
193 Err(XmlError::UnexpectedEof)
194 }
195
196 #[inline]
197 pub fn read_to_end(&mut self, end_tag: &str) -> XmlResult<()> {
198 while let Some(token) = self.next() {
199 match token? {
200 Token::ElementEnd {
202 end: ElementEnd::Empty,
203 ..
204 } => return Ok(()),
205 Token::ElementEnd {
206 end: ElementEnd::Open,
207 ..
208 } => break,
209 Token::Attribute { .. } => (),
210 token => {
212 return Err(XmlError::UnexpectedToken {
213 token: format!("{:?}", token),
214 })
215 }
216 }
217 }
218
219 let mut depth = 1;
220
221 while let Some(token) = self.next() {
222 match token? {
223 Token::ElementStart { span, .. } if end_tag == &span.as_str()[1..] => {
224 while let Some(token) = self.next() {
225 match token? {
226 Token::ElementEnd {
227 end: ElementEnd::Empty,
228 ..
229 } => {
230 if depth == 0 {
231 return Ok(());
232 } else {
233 break;
235 }
236 }
237 Token::ElementEnd {
238 end: ElementEnd::Open,
239 ..
240 } => {
241 depth += 1;
242 break;
243 }
244 Token::Attribute { .. } => (),
245 token => {
247 return Err(XmlError::UnexpectedToken {
248 token: format!("{:?}", token),
249 });
250 }
251 }
252 }
253 }
254 Token::ElementEnd {
255 end: ElementEnd::Close(_, _),
256 span,
257 } if end_tag == &span.as_str()[2..span.as_str().len() - 1] => {
258 depth -= 1;
259 if depth == 0 {
260 return Ok(());
261 }
262 }
263 _ => (),
264 }
265 }
266
267 Err(XmlError::UnexpectedEof)
268 }
269}
270
271#[test]
272fn read_text() -> XmlResult<()> {
273 let mut reader = XmlReader::new("<parent></parent>");
274
275 assert!(reader.next().is_some()); assert_eq!(reader.read_text("parent")?, "");
277 assert!(reader.next().is_none());
278
279 reader = XmlReader::new("<parent>text</parent>");
280
281 assert!(reader.next().is_some()); assert_eq!(reader.read_text("parent")?, "text");
283 assert!(reader.next().is_none());
284
285 reader = XmlReader::new("<parent attr=\"value\">text</parent>");
286
287 assert!(reader.next().is_some()); assert_eq!(reader.read_text("parent")?, "text");
289 assert!(reader.next().is_none());
290
291 reader = XmlReader::new("<parent attr=\"value\">"'<>&</parent>");
292
293 assert!(reader.next().is_some()); assert_eq!(reader.read_text("parent")?, r#""'<>&"#);
295 assert!(reader.next().is_none());
296
297 let mut reader = XmlReader::new("<parent><![CDATA[]]></parent>");
298
299 assert!(reader.next().is_some()); assert_eq!(reader.read_text("parent")?, "");
301 assert!(reader.next().is_none());
302
303 reader = XmlReader::new("<parent><![CDATA[text]]></parent>");
304
305 assert!(reader.next().is_some()); assert_eq!(reader.read_text("parent")?, "text");
307 assert!(reader.next().is_none());
308
309 reader = XmlReader::new("<parent attr=\"value\"><![CDATA[text]]></parent>");
310
311 assert!(reader.next().is_some()); assert_eq!(reader.read_text("parent")?, "text");
313 assert!(reader.next().is_none());
314
315 reader = XmlReader::new("<parent attr=\"value\"><![CDATA[<foo></foo>]]></parent>");
316
317 assert!(reader.next().is_some()); assert_eq!(reader.read_text("parent")?, "<foo></foo>");
319 assert!(reader.next().is_none());
320
321 reader =
322 XmlReader::new("<parent attr=\"value\"><![CDATA["'<>&]]></parent>");
323
324 assert!(reader.next().is_some()); assert_eq!(reader.read_text("parent")?, ""'<>&");
326 assert!(reader.next().is_none());
327
328 reader = XmlReader::new("<parent>\n text\n \n</parent>");
329 assert!(reader.next().is_some()); assert_eq!(reader.read_text("parent")?, "\n text\n \n");
331 assert!(reader.next().is_none());
332
333 reader = XmlReader::new("<parent>\n <![CDATA[text]]>\n \n</parent>");
334 assert!(reader.next().is_some()); assert_eq!(reader.read_text("parent")?, "\n text\n \n");
336 assert!(reader.next().is_none());
337
338 reader = XmlReader::new("<parent>\n <![CDATA[text1]]>\n <![CDATA[text2]]>\n \n</parent>");
339 assert!(reader.next().is_some()); assert_eq!(reader.read_text("parent")?, "\n text1\n text2\n \n");
341 assert!(reader.next().is_none());
342
343 Ok(())
344}
345
346#[test]
347fn read_till_element_start() -> XmlResult<()> {
348 let mut reader = XmlReader::new("<tag/>");
349
350 reader.read_till_element_start("tag")?;
351 assert!(reader.next().is_some()); assert!(reader.next().is_none());
353
354 reader = XmlReader::new("<parent><skip/><tag/></parent>");
355
356 assert!(reader.next().is_some()); assert!(reader.next().is_some()); reader.read_till_element_start("tag")?;
359 assert!(reader.next().is_some()); assert!(reader.next().is_some()); assert!(reader.next().is_none());
362
363 reader = XmlReader::new("<parent><skip></skip><tag/></parent>");
364
365 assert!(reader.next().is_some()); assert!(reader.next().is_some()); reader.read_till_element_start("tag")?;
368 assert!(reader.next().is_some()); assert!(reader.next().is_some()); assert!(reader.next().is_none());
371
372 reader = XmlReader::new("<parent><skip><skip/></skip><tag/></parent>");
373
374 assert!(reader.next().is_some()); assert!(reader.next().is_some()); reader.read_till_element_start("tag")?;
377 assert!(reader.next().is_some()); assert!(reader.next().is_some()); assert!(reader.next().is_none());
380
381 reader = XmlReader::new("<parent><skip><skip></skip></skip><tag/></parent>");
382
383 assert!(reader.next().is_some()); assert!(reader.next().is_some()); reader.read_till_element_start("tag")?;
386 assert!(reader.next().is_some()); assert!(reader.next().is_some()); assert!(reader.next().is_none());
389
390 Ok(())
391}
392
393#[test]
394fn read_to_end() -> XmlResult<()> {
395 let mut reader = XmlReader::new("<parent><child/></parent>");
396
397 assert!(reader.next().is_some()); assert!(reader.next().is_some()); assert!(reader.next().is_some()); reader.read_to_end("child")?;
401 assert!(reader.next().is_some()); assert!(reader.next().is_none());
403
404 reader = XmlReader::new("<parent><child></child></parent>");
405
406 assert!(reader.next().is_some()); assert!(reader.next().is_some()); assert!(reader.next().is_some()); reader.read_to_end("child")?;
410 assert!(reader.next().is_some()); assert!(reader.next().is_none());
412
413 reader = XmlReader::new("<parent><child><child/></child></parent>");
414
415 assert!(reader.next().is_some()); assert!(reader.next().is_some()); assert!(reader.next().is_some()); reader.read_to_end("child")?;
419 assert!(reader.next().is_some()); assert!(reader.next().is_none());
421
422 reader = XmlReader::new("<parent><child><child></child></child></parent>");
423
424 assert!(reader.next().is_some()); assert!(reader.next().is_some()); assert!(reader.next().is_some()); reader.read_to_end("child")?;
428 assert!(reader.next().is_some()); assert!(reader.next().is_none());
430
431 Ok(())
432}