xml/
parser.rs

1// RustyXML
2// Copyright 2013-2016 RustyXML developers
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9//
10// The parser herein is derived from OFXMLParser as included with
11// ObjFW, Copyright (c) 2008-2013 Jonathan Schleifer.
12// Permission to license this derived work under MIT license has been granted by ObjFW's author.
13
14use crate::{unescape, AttrMap, EndTag, StartTag};
15use std::collections::{HashMap, VecDeque};
16use std::error::Error;
17use std::fmt;
18use std::iter::Iterator;
19use std::mem;
20
21#[derive(PartialEq, Eq, Debug)]
22/// Events returned by the `Parser`
23pub enum Event {
24    /// Event indicating processing information was found
25    PI(String),
26    /// Event indicating a start tag was found
27    ElementStart(StartTag),
28    /// Event indicating a end tag was found
29    ElementEnd(EndTag),
30    /// Event indicating character data was found
31    Characters(String),
32    /// Event indicating CDATA was found
33    CDATA(String),
34    /// Event indicating a comment was found
35    Comment(String),
36}
37
38#[derive(PartialEq, Debug, Clone)]
39#[allow(missing_copy_implementations)]
40/// The structure returned, when erroneous XML is read
41pub struct ParserError {
42    /// The line number at which the error occurred
43    pub line: u32,
44    /// The column number at which the error occurred
45    pub col: u32,
46    /// The kind of error encountered
47    pub kind: ParserErrorKind,
48}
49
50impl Error for ParserError {}
51
52impl fmt::Display for ParserError {
53    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
54        write!(
55            f,
56            "Parse error; Line: {}, Column: {}, Reason: {}",
57            self.line, self.col, self.kind,
58        )
59    }
60}
61
62#[derive(PartialEq, Debug, Copy, Clone)]
63#[non_exhaustive]
64pub enum ParserErrorKind {
65    UnboundNsPrefixInTagName,
66    UnboundNsPrefixInAttributeName,
67    SpaceInAttributeName,
68    DuplicateAttribute,
69    UndelimitedAttribute,
70    InvalidEntity,
71    InvalidCdataStart,
72    InvalidCommentStart,
73    InvalidCommentContent,
74    InvalidDoctype,
75    ExpectedTagClose,
76    ExpectedLwsOrTagClose,
77    MalformedXml,
78}
79
80impl fmt::Display for ParserErrorKind {
81    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
82        let msg = match *self {
83            ParserErrorKind::UnboundNsPrefixInTagName => "Unbound namespace prefix in tag name",
84            ParserErrorKind::UnboundNsPrefixInAttributeName => {
85                "Unbound namespace prefix in attribute name"
86            }
87            ParserErrorKind::SpaceInAttributeName => "Space occured in attribute name",
88            ParserErrorKind::DuplicateAttribute => "Duplicate attribute",
89            ParserErrorKind::UndelimitedAttribute => "Attribute value not enclosed in ' or \"",
90            ParserErrorKind::InvalidEntity => "Found invalid entity",
91            ParserErrorKind::InvalidCdataStart => "Invalid CDATA opening sequence",
92            ParserErrorKind::InvalidCommentContent => {
93                "No more than one adjacent '-' allowed in a comment"
94            }
95            ParserErrorKind::InvalidDoctype => "Invalid DOCTYPE",
96            ParserErrorKind::InvalidCommentStart => "Expected 2nd '-' to start comment",
97            ParserErrorKind::ExpectedTagClose => "Expected '>' to close tag",
98            ParserErrorKind::ExpectedLwsOrTagClose => "Expected '>' to close tag, or LWS",
99            ParserErrorKind::MalformedXml => "Malformed XML",
100        };
101        msg.fmt(f)
102    }
103}
104
105// Event based parser
106enum State {
107    OutsideTag,
108    TagOpened,
109    InProcessingInstructions,
110    InTagName,
111    InCloseTagName,
112    InTag,
113    InAttrName,
114    InAttrValue,
115    ExpectDelimiter,
116    ExpectClose,
117    ExpectSpaceOrClose,
118    InExclamationMark,
119    InCDATAOpening,
120    InCDATA,
121    InCommentOpening,
122    InComment1,
123    InComment2,
124    InDoctype,
125}
126
127/// A streaming XML parser
128///
129/// Data is fed to the parser using the `feed_str()` method.
130/// The `Event`s, and `ParserError`s generated while parsing the string
131/// can be requested by iterating over the parser
132///
133/// ~~~
134/// use xml::Parser;
135///
136/// let mut p = Parser::new();
137/// p.feed_str("<a href='http://rust-lang.org'>Rust</a>");
138/// for event in p {
139///     match event {
140///        // [...]
141///        _ => ()
142///     }
143/// }
144/// ~~~
145pub struct Parser {
146    line: u32,
147    col: u32,
148    has_error: bool,
149    data: VecDeque<char>,
150    buf: String,
151    namespaces: Vec<HashMap<String, String>>,
152    attributes: Vec<(String, Option<String>, String)>,
153    st: State,
154    name: Option<(Option<String>, String)>,
155    attr: Option<(Option<String>, String)>,
156    delim: Option<char>,
157    level: u8,
158}
159
160impl Parser {
161    /// Returns a new `Parser`
162    pub fn new() -> Parser {
163        let mut ns = HashMap::with_capacity(2);
164        // Add standard namespaces
165        ns.insert(
166            "xml".to_owned(),
167            "http://www.w3.org/XML/1998/namespace".to_owned(),
168        );
169        ns.insert(
170            "xmlns".to_owned(),
171            "http://www.w3.org/2000/xmlns/".to_owned(),
172        );
173
174        Parser {
175            line: 1,
176            col: 0,
177            has_error: false,
178            data: VecDeque::with_capacity(4096),
179            buf: String::new(),
180            namespaces: vec![ns],
181            attributes: Vec::new(),
182            st: State::OutsideTag,
183            name: None,
184            attr: None,
185            delim: None,
186            level: 0,
187        }
188    }
189
190    /// Feeds a string slice to the parser
191    pub fn feed_str(&mut self, data: &str) {
192        self.data.extend(data.chars());
193    }
194}
195
196impl Iterator for Parser {
197    type Item = Result<Event, ParserError>;
198
199    fn next(&mut self) -> Option<Result<Event, ParserError>> {
200        if self.has_error {
201            return None;
202        }
203
204        loop {
205            let c = match self.data.pop_front() {
206                Some(c) => c,
207                None => return None,
208            };
209
210            if c == '\n' {
211                self.line += 1;
212                self.col = 0;
213            } else {
214                self.col += 1;
215            }
216
217            match self.parse_character(c) {
218                Ok(None) => continue,
219                Ok(Some(event)) => {
220                    return Some(Ok(event));
221                }
222                Err(e) => {
223                    self.has_error = true;
224                    return Some(Err(e));
225                }
226            }
227        }
228    }
229}
230
231#[inline]
232// Parse a QName to get Prefix and LocalPart
233fn parse_qname(mut qname: String) -> (Option<String>, String) {
234    if let Some(i) = qname.find(':') {
235        let local = qname.split_off(i + 1);
236        qname.pop();
237        (Some(qname), local)
238    } else {
239        (None, qname)
240    }
241}
242
243fn unescape_owned(input: String) -> Result<String, String> {
244    if input.find('&').is_none() {
245        Ok(input)
246    } else {
247        unescape(&input)
248    }
249}
250
251impl Parser {
252    // Get the namespace currently bound to a prefix.
253    // Bindings are stored as a stack of HashMaps, we start searching in the top most HashMap
254    // and traverse down until the prefix is found.
255    fn namespace_for_prefix(&self, prefix: &str) -> Option<String> {
256        for ns in self.namespaces.iter().rev() {
257            if let Some(namespace) = ns.get(prefix) {
258                if namespace.is_empty() {
259                    return None;
260                }
261                return Some(namespace.clone());
262            }
263        }
264        None
265    }
266
267    fn take_buf(&mut self) -> String {
268        self.buf.split_off(0)
269    }
270
271    fn error(&self, kind: ParserErrorKind) -> Result<Option<Event>, ParserError> {
272        Err(ParserError {
273            line: self.line,
274            col: self.col,
275            kind,
276        })
277    }
278
279    fn parse_character(&mut self, c: char) -> Result<Option<Event>, ParserError> {
280        // println(fmt!("Now in state: %?", self.st));
281        match self.st {
282            State::OutsideTag => self.outside_tag(c),
283            State::TagOpened => self.tag_opened(c),
284            State::InProcessingInstructions => self.in_processing_instructions(c),
285            State::InTagName => self.in_tag_name(c),
286            State::InCloseTagName => self.in_close_tag_name(c),
287            State::InTag => self.in_tag(c),
288            State::InAttrName => self.in_attr_name(c),
289            State::InAttrValue => self.in_attr_value(c),
290            State::ExpectDelimiter => self.expect_delimiter(c),
291            State::ExpectClose => self.expect_close(c),
292            State::ExpectSpaceOrClose => self.expect_space_or_close(c),
293            State::InExclamationMark => self.in_exclamation_mark(c),
294            State::InCDATAOpening => self.in_cdata_opening(c),
295            State::InCDATA => self.in_cdata(c),
296            State::InCommentOpening => self.in_comment_opening(c),
297            State::InComment1 => self.in_comment1(c),
298            State::InComment2 => self.in_comment2(c),
299            State::InDoctype => self.in_doctype(c),
300        }
301    }
302
303    // Outside any tag, or other construct
304    // '<' => TagOpened, producing Event::Characters
305    fn outside_tag(&mut self, c: char) -> Result<Option<Event>, ParserError> {
306        match c {
307            '<' if self.buf.is_empty() => self.st = State::TagOpened,
308            '<' => {
309                self.st = State::TagOpened;
310                let buf = match unescape_owned(self.take_buf()) {
311                    Ok(unescaped) => unescaped,
312                    Err(_) => return self.error(ParserErrorKind::InvalidEntity),
313                };
314                return Ok(Some(Event::Characters(buf)));
315            }
316            _ => self.buf.push(c),
317        }
318        Ok(None)
319    }
320
321    // Character following a '<', starting a tag or other construct
322    // '?' => InProcessingInstructions
323    // '!' => InExclamationMark
324    // '/' => InCloseTagName
325    //  _  => InTagName
326    fn tag_opened(&mut self, c: char) -> Result<Option<Event>, ParserError> {
327        self.st = match c {
328            '?' => State::InProcessingInstructions,
329            '!' => State::InExclamationMark,
330            '/' => State::InCloseTagName,
331            _ => {
332                self.buf.push(c);
333                State::InTagName
334            }
335        };
336        Ok(None)
337    }
338
339    // Inside a processing instruction
340    // '?' '>' => OutsideTag, producing PI
341    fn in_processing_instructions(&mut self, c: char) -> Result<Option<Event>, ParserError> {
342        match c {
343            '?' => {
344                self.level = 1;
345                self.buf.push(c);
346            }
347            '>' if self.level == 1 => {
348                self.level = 0;
349                self.st = State::OutsideTag;
350                let _ = self.buf.pop();
351                let buf = self.take_buf();
352                return Ok(Some(Event::PI(buf)));
353            }
354            _ => self.buf.push(c),
355        }
356        Ok(None)
357    }
358
359    // Inside a tag name (opening tag)
360    // '/' => ExpectClose, producing Event::ElementStart
361    // '>' => OutsideTag, producing Event::ElementStart
362    // ' ' or '\t' or '\r' or '\n' => InTag
363    fn in_tag_name(&mut self, c: char) -> Result<Option<Event>, ParserError> {
364        match c {
365            '/' | '>' => {
366                let (prefix, name) = parse_qname(self.take_buf());
367                let ns = match prefix {
368                    None => self.namespace_for_prefix(""),
369                    Some(ref pre) => match self.namespace_for_prefix(pre) {
370                        None => return self.error(ParserErrorKind::UnboundNsPrefixInTagName),
371                        ns => ns,
372                    },
373                };
374
375                self.namespaces.push(HashMap::new());
376                self.st = if c == '/' {
377                    self.name = Some((prefix.clone(), name.clone()));
378                    State::ExpectClose
379                } else {
380                    State::OutsideTag
381                };
382
383                return Ok(Some(Event::ElementStart(StartTag {
384                    name,
385                    ns,
386                    prefix,
387                    attributes: AttrMap::new(),
388                })));
389            }
390            ' ' | '\t' | '\r' | '\n' => {
391                self.namespaces.push(HashMap::new());
392                self.name = Some(parse_qname(self.take_buf()));
393                self.st = State::InTag;
394            }
395            _ => self.buf.push(c),
396        }
397        Ok(None)
398    }
399
400    // Inside a tag name (closing tag)
401    // '>' => OutsideTag, producing ElementEnd
402    // ' ' or '\t' or '\r' or '\n' => ExpectSpaceOrClose, producing ElementEnd
403    fn in_close_tag_name(&mut self, c: char) -> Result<Option<Event>, ParserError> {
404        match c {
405            ' ' | '\t' | '\r' | '\n' | '>' => {
406                let (prefix, name) = parse_qname(self.take_buf());
407
408                let ns = match prefix {
409                    None => self.namespace_for_prefix(""),
410                    Some(ref pre) => match self.namespace_for_prefix(pre) {
411                        None => return self.error(ParserErrorKind::UnboundNsPrefixInTagName),
412                        ns => ns,
413                    },
414                };
415
416                self.namespaces.pop();
417                self.st = if c == '>' {
418                    State::OutsideTag
419                } else {
420                    State::ExpectSpaceOrClose
421                };
422
423                Ok(Some(Event::ElementEnd(EndTag { name, ns, prefix })))
424            }
425            _ => {
426                self.buf.push(c);
427                Ok(None)
428            }
429        }
430    }
431
432    // Inside a tag, parsing attributes
433    // '/' => ExpectClose, producing StartTag
434    // '>' => OutsideTag, producing StartTag
435    // ' ' or '\t' or '\r' or '\n' => InAttrName
436    fn in_tag(&mut self, c: char) -> Result<Option<Event>, ParserError> {
437        match c {
438            '/' | '>' => {
439                let attributes = mem::take(&mut self.attributes);
440                let (prefix, name) = self
441                    .name
442                    .take()
443                    .expect("Internal error: No element name set");
444                let ns = match prefix {
445                    None => self.namespace_for_prefix(""),
446                    Some(ref pre) => match self.namespace_for_prefix(pre) {
447                        None => return self.error(ParserErrorKind::UnboundNsPrefixInTagName),
448                        ns => ns,
449                    },
450                };
451
452                let mut attributes_map: AttrMap<(String, Option<String>), String> = AttrMap::new();
453
454                // At this point attribute namespaces are really just prefixes,
455                // map them to the actual namespace
456                for (name, ns, value) in attributes {
457                    let ns = match ns {
458                        None => None,
459                        Some(ref prefix) => match self.namespace_for_prefix(prefix) {
460                            None => {
461                                return self.error(ParserErrorKind::UnboundNsPrefixInAttributeName)
462                            }
463                            ns => ns,
464                        },
465                    };
466                    if attributes_map.insert((name, ns), value).is_some() {
467                        return self.error(ParserErrorKind::DuplicateAttribute);
468                    }
469                }
470
471                self.st = if c == '/' {
472                    self.name = Some((prefix.clone(), name.clone()));
473                    State::ExpectClose
474                } else {
475                    State::OutsideTag
476                };
477
478                return Ok(Some(Event::ElementStart(StartTag {
479                    name,
480                    ns,
481                    prefix,
482                    attributes: attributes_map,
483                })));
484            }
485            ' ' | '\t' | '\r' | '\n' => (),
486            _ => {
487                self.buf.push(c);
488                self.st = State::InAttrName;
489            }
490        }
491        Ok(None)
492    }
493
494    // Inside an attribute name
495    // '=' => ExpectDelimiter
496    fn in_attr_name(&mut self, c: char) -> Result<Option<Event>, ParserError> {
497        match c {
498            '=' => {
499                self.level = 0;
500                self.attr = Some(parse_qname(self.take_buf()));
501                self.st = State::ExpectDelimiter;
502            }
503            ' ' | '\t' | '\r' | '\n' => self.level = 1,
504            _ if self.level == 0 => self.buf.push(c),
505            _ => return self.error(ParserErrorKind::SpaceInAttributeName),
506        }
507        Ok(None)
508    }
509
510    // Inside an attribute value
511    // delimiter => InTag, adds attribute
512    fn in_attr_value(&mut self, c: char) -> Result<Option<Event>, ParserError> {
513        if c == self
514            .delim
515            .expect("Internal error: In attribute value, but no delimiter set")
516        {
517            self.delim = None;
518            self.st = State::InTag;
519            let attr = self.attr.take();
520            let (prefix, name) =
521                attr.expect("Internal error: In attribute value, but no attribute name set");
522            let value = match unescape_owned(self.take_buf()) {
523                Ok(unescaped) => unescaped,
524                Err(_) => return self.error(ParserErrorKind::InvalidEntity),
525            };
526
527            let last = self
528                .namespaces
529                .last_mut()
530                .expect("Internal error: Empty namespace stack");
531            match prefix {
532                None if name == "xmlns" => {
533                    last.insert(String::new(), value.clone());
534                }
535                Some(ref prefix) if prefix == "xmlns" => {
536                    last.insert(name.clone(), value.clone());
537                }
538                _ => (),
539            }
540
541            self.attributes.push((name, prefix, value));
542        } else {
543            self.buf.push(c);
544        }
545        Ok(None)
546    }
547
548    // Looking for an attribute value delimiter
549    // '"' or '\'' => InAttrValue, sets delimiter
550    fn expect_delimiter(&mut self, c: char) -> Result<Option<Event>, ParserError> {
551        match c {
552            '"' | '\'' => {
553                self.delim = Some(c);
554                self.st = State::InAttrValue;
555            }
556            ' ' | '\t' | '\r' | '\n' => (),
557            _ => return self.error(ParserErrorKind::UndelimitedAttribute),
558        }
559        Ok(None)
560    }
561
562    // Expect closing '>' of an empty-element tag (no whitespace allowed)
563    // '>' => OutsideTag
564    fn expect_close(&mut self, c: char) -> Result<Option<Event>, ParserError> {
565        match c {
566            '>' => {
567                self.st = State::OutsideTag;
568                let (prefix, name) = self
569                    .name
570                    .take()
571                    .expect("Internal error: No element name set");
572                let ns = match prefix {
573                    None => self.namespace_for_prefix(""),
574                    Some(ref pre) => match self.namespace_for_prefix(pre) {
575                        None => return self.error(ParserErrorKind::UnboundNsPrefixInTagName),
576                        ns => ns,
577                    },
578                };
579                self.namespaces.pop();
580                Ok(Some(Event::ElementEnd(EndTag { name, ns, prefix })))
581            }
582            _ => self.error(ParserErrorKind::ExpectedTagClose),
583        }
584    }
585
586    // Expect closing '>' of a start tag
587    // '>' => OutsideTag
588    fn expect_space_or_close(&mut self, c: char) -> Result<Option<Event>, ParserError> {
589        match c {
590            ' ' | '\t' | '\r' | '\n' => Ok(None),
591            '>' => {
592                self.st = State::OutsideTag;
593                Ok(None)
594            }
595            _ => self.error(ParserErrorKind::ExpectedLwsOrTagClose),
596        }
597    }
598
599    // After an '!' trying to determine the type of the following construct
600    // '-' => InCommentOpening
601    // '[' => InCDATAOpening
602    // 'D' => InDoctype
603    fn in_exclamation_mark(&mut self, c: char) -> Result<Option<Event>, ParserError> {
604        self.st = match c {
605            '-' => State::InCommentOpening,
606            '[' => State::InCDATAOpening,
607            'D' => State::InDoctype,
608            _ => return self.error(ParserErrorKind::MalformedXml),
609        };
610        Ok(None)
611    }
612
613    // Opening sequence of Event::CDATA
614    // 'C' 'D' 'A' 'T' 'A' '[' => InCDATA
615    fn in_cdata_opening(&mut self, c: char) -> Result<Option<Event>, ParserError> {
616        static CDATA_PATTERN: [char; 6] = ['C', 'D', 'A', 'T', 'A', '['];
617        if c == CDATA_PATTERN[self.level as usize] {
618            self.level += 1;
619        } else {
620            return self.error(ParserErrorKind::InvalidCdataStart);
621        }
622
623        if self.level == 6 {
624            self.level = 0;
625            self.st = State::InCDATA;
626        }
627        Ok(None)
628    }
629
630    // Inside CDATA
631    // ']' ']' '>' => OutsideTag, producing Event::CDATA
632    fn in_cdata(&mut self, c: char) -> Result<Option<Event>, ParserError> {
633        match c {
634            ']' => {
635                self.buf.push(c);
636                self.level += 1;
637            }
638            '>' if self.level >= 2 => {
639                self.st = State::OutsideTag;
640                self.level = 0;
641                let len = self.buf.len();
642                self.buf.truncate(len - 2);
643                let buf = self.take_buf();
644                return Ok(Some(Event::CDATA(buf)));
645            }
646            _ => {
647                self.buf.push(c);
648                self.level = 0;
649            }
650        }
651        Ok(None)
652    }
653
654    // Opening sequence of a comment
655    // '-' => InComment1
656    fn in_comment_opening(&mut self, c: char) -> Result<Option<Event>, ParserError> {
657        if c == '-' {
658            self.st = State::InComment1;
659            self.level = 0;
660            Ok(None)
661        } else {
662            self.error(ParserErrorKind::InvalidCommentStart)
663        }
664    }
665
666    // Inside a comment
667    // '-' '-' => InComment2
668    fn in_comment1(&mut self, c: char) -> Result<Option<Event>, ParserError> {
669        if c == '-' {
670            self.level += 1;
671        } else {
672            self.level = 0;
673        }
674
675        if self.level == 2 {
676            self.level = 0;
677            self.st = State::InComment2;
678        }
679
680        self.buf.push(c);
681
682        Ok(None)
683    }
684
685    // Closing a comment
686    // '>' => OutsideTag, producing Comment
687    fn in_comment2(&mut self, c: char) -> Result<Option<Event>, ParserError> {
688        if c != '>' {
689            self.error(ParserErrorKind::InvalidCommentContent)
690        } else {
691            self.st = State::OutsideTag;
692            let len = self.buf.len();
693            self.buf.truncate(len - 2);
694            let buf = self.take_buf();
695            Ok(Some(Event::Comment(buf)))
696        }
697    }
698
699    // Inside a doctype
700    // '>' after appropriate opening => OutsideTag
701    fn in_doctype(&mut self, c: char) -> Result<Option<Event>, ParserError> {
702        static DOCTYPE_PATTERN: [char; 6] = ['O', 'C', 'T', 'Y', 'P', 'E'];
703        match self.level {
704            0..=5 => {
705                if c == DOCTYPE_PATTERN[self.level as usize] {
706                    self.level += 1;
707                } else {
708                    return self.error(ParserErrorKind::InvalidDoctype);
709                }
710            }
711            6 => {
712                match c {
713                    ' ' | '\t' | '\r' | '\n' => (),
714                    _ => return self.error(ParserErrorKind::InvalidDoctype),
715                }
716                self.level += 1;
717            }
718            _ if c == '>' => {
719                self.level = 0;
720                self.st = State::OutsideTag;
721            }
722            _ => (),
723        }
724        Ok(None)
725    }
726}
727
728#[cfg(test)]
729mod parser_tests {
730    use super::Parser;
731    use crate::{AttrMap, EndTag, Event, ParserError, StartTag};
732
733    #[test]
734    fn test_start_tag() {
735        let mut p = Parser::new();
736        let mut i = 0u8;
737        p.feed_str("<a>");
738        for event in p {
739            i += 1;
740            assert_eq!(
741                event,
742                Ok(Event::ElementStart(StartTag {
743                    name: "a".to_owned(),
744                    ns: None,
745                    prefix: None,
746                    attributes: AttrMap::new()
747                })),
748            );
749        }
750        assert_eq!(i, 1u8);
751    }
752
753    #[test]
754    fn test_end_tag() {
755        let mut p = Parser::new();
756        let mut i = 0u8;
757        p.feed_str("</a>");
758        for event in p {
759            i += 1;
760            assert_eq!(
761                event,
762                Ok(Event::ElementEnd(EndTag {
763                    name: "a".to_owned(),
764                    ns: None,
765                    prefix: None
766                })),
767            );
768        }
769        assert_eq!(i, 1u8);
770    }
771
772    #[test]
773    fn test_self_closing_with_space() {
774        let mut p = Parser::new();
775        p.feed_str("<register />");
776
777        let v: Vec<Result<Event, ParserError>> = p.collect();
778        assert_eq!(
779            v,
780            vec![
781                Ok(Event::ElementStart(StartTag {
782                    name: "register".to_owned(),
783                    ns: None,
784                    prefix: None,
785                    attributes: AttrMap::new()
786                })),
787                Ok(Event::ElementEnd(EndTag {
788                    name: "register".to_owned(),
789                    ns: None,
790                    prefix: None,
791                }))
792            ],
793        );
794    }
795
796    #[test]
797    fn test_self_closing_without_space() {
798        let mut p = Parser::new();
799        p.feed_str("<register/>");
800
801        let v: Vec<Result<Event, ParserError>> = p.collect();
802        assert_eq!(
803            v,
804            vec![
805                Ok(Event::ElementStart(StartTag {
806                    name: "register".to_owned(),
807                    ns: None,
808                    prefix: None,
809                    attributes: AttrMap::new()
810                })),
811                Ok(Event::ElementEnd(EndTag {
812                    name: "register".to_owned(),
813                    ns: None,
814                    prefix: None,
815                }))
816            ],
817        );
818    }
819
820    #[test]
821    fn test_self_closing_namespace() {
822        let mut p = Parser::new();
823        p.feed_str("<foo:a xmlns:foo='urn:foo'/>");
824
825        let v: Vec<Result<Event, ParserError>> = p.collect();
826        let mut attr: AttrMap<(String, Option<String>), String> = AttrMap::new();
827        attr.insert(
828            (
829                "foo".to_owned(),
830                Some("http://www.w3.org/2000/xmlns/".to_owned()),
831            ),
832            "urn:foo".to_owned(),
833        );
834        assert_eq!(
835            v,
836            vec![
837                Ok(Event::ElementStart(StartTag {
838                    name: "a".to_owned(),
839                    ns: Some("urn:foo".to_owned()),
840                    prefix: Some("foo".to_owned()),
841                    attributes: attr,
842                })),
843                Ok(Event::ElementEnd(EndTag {
844                    name: "a".to_owned(),
845                    ns: Some("urn:foo".to_owned()),
846                    prefix: Some("foo".to_owned()),
847                }))
848            ],
849        );
850    }
851
852    #[test]
853    fn test_pi() {
854        let mut p = Parser::new();
855        let mut i = 0u8;
856        p.feed_str("<?xml version='1.0' encoding='utf-8'?>");
857        for event in p {
858            i += 1;
859            assert_eq!(
860                event,
861                Ok(Event::PI("xml version='1.0' encoding='utf-8'".to_owned())),
862            );
863        }
864        assert_eq!(i, 1u8);
865    }
866
867    #[test]
868    fn test_comment() {
869        let mut p = Parser::new();
870        let mut i = 0u8;
871        p.feed_str("<!--Nothing to see-->");
872        for event in p {
873            i += 1;
874            assert_eq!(event, Ok(Event::Comment("Nothing to see".to_owned())));
875        }
876        assert_eq!(i, 1u8);
877    }
878    #[test]
879    fn test_cdata() {
880        let mut p = Parser::new();
881        let mut i = 0u8;
882        p.feed_str("<![CDATA[<html><head><title>x</title></head><body/></html>]]>");
883        for event in p {
884            i += 1;
885            assert_eq!(
886                event,
887                Ok(Event::CDATA(
888                    "<html><head><title>x</title></head><body/></html>".to_owned()
889                )),
890            );
891        }
892        assert_eq!(i, 1u8);
893    }
894
895    #[test]
896    fn test_characters() {
897        let mut p = Parser::new();
898        let mut i = 0u8;
899        p.feed_str("<text>Hello World, it&apos;s a nice day</text>");
900        for event in p {
901            i += 1;
902            if i == 2 {
903                assert_eq!(
904                    event,
905                    Ok(Event::Characters("Hello World, it's a nice day".to_owned())),
906                );
907            }
908        }
909        assert_eq!(i, 3u8);
910    }
911
912    #[test]
913    fn test_doctype() {
914        let mut p = Parser::new();
915        let mut i = 0u8;
916        p.feed_str("<!DOCTYPE html>");
917        for _ in p {
918            i += 1;
919        }
920        assert_eq!(i, 0u8);
921    }
922
923    #[test]
924    #[cfg(feature = "ordered_attrs")]
925    fn test_attribute_order() {
926        let input = "<a href='/' title='Home' target='_blank'>";
927        let expected_attributes = vec![
928            (("href".to_owned(), None), "/".to_owned()),
929            (("title".to_owned(), None), "Home".to_owned()),
930            (("target".to_owned(), None), "_blank".to_owned()),
931        ];
932
933        // Run this 5 times to make it unlikely this test succeeds at random
934        for _ in 0..5 {
935            let mut p = Parser::new();
936            p.feed_str(input);
937            if let Some(Ok(Event::ElementStart(tag))) = p.next() {
938                for (expected, actual) in expected_attributes.iter().zip(tag.attributes) {
939                    assert_eq!(expected, &actual);
940                }
941            } else {
942                panic!("Missing ElementStart event");
943            }
944            assert!(p.next().is_none());
945        }
946    }
947}