xml/reader/
parser.rs

1//! Contains an implementation of pull-based XML parser.
2
3use crate::common::{is_xml10_char, is_xml11_char, is_xml11_char_not_restricted, is_name_char, is_name_start_char, is_whitespace_char};
4use crate::common::{Position, TextPosition, XmlVersion};
5use crate::name::OwnedName;
6use crate::namespace::NamespaceStack;
7use crate::reader::config::ParserConfig2;
8use crate::reader::error::SyntaxError;
9use crate::reader::events::XmlEvent;
10use crate::reader::indexset::AttributesSet;
11use crate::reader::lexer::{Lexer, Token};
12use super::{Error, ErrorKind};
13
14use std::collections::HashMap;
15use std::io::Read;
16
17macro_rules! gen_takes(
18    ($($field:ident -> $method:ident, $t:ty, $def:expr);+) => (
19        $(
20        impl MarkupData {
21            #[inline]
22            #[allow(clippy::mem_replace_option_with_none)]
23            #[allow(clippy::mem_replace_with_default)]
24            fn $method(&mut self) -> $t {
25                std::mem::replace(&mut self.$field, $def)
26            }
27        }
28        )+
29    )
30);
31
32gen_takes!(
33    name         -> take_name, String, String::new();
34    ref_data     -> take_ref_data, String, String::new();
35
36    encoding     -> take_encoding, Option<String>, None;
37
38    element_name -> take_element_name, Option<OwnedName>, None;
39
40    attr_name    -> take_attr_name, Option<OwnedName>, None;
41    attributes   -> take_attributes, AttributesSet, AttributesSet::new()
42);
43
44mod inside_cdata;
45mod inside_closing_tag_name;
46mod inside_comment;
47mod inside_declaration;
48mod inside_doctype;
49mod inside_opening_tag;
50mod inside_processing_instruction;
51mod inside_reference;
52mod outside_tag;
53
54static DEFAULT_VERSION: XmlVersion = XmlVersion::Version10;
55static DEFAULT_STANDALONE: Option<bool> = None;
56
57type ElementStack = Vec<OwnedName>;
58pub type Result = super::Result<XmlEvent>;
59
60/// Pull-based XML parser.
61pub(crate) struct PullParser {
62    config: ParserConfig2,
63    lexer: Lexer,
64    st: State,
65    state_after_reference: State,
66    buf: String,
67
68    /// From DTD internal subset
69    entities: HashMap<String, String>,
70
71    nst: NamespaceStack,
72
73    data: MarkupData,
74    final_result: Option<Result>,
75    next_event: Option<Result>,
76    est: ElementStack,
77    pos: Vec<TextPosition>,
78
79    encountered: Encountered,
80    inside_whitespace: bool,
81    seen_prefix_separator: bool,
82    pop_namespace: bool,
83}
84
85// Keeps track when XML declaration can happen
86#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
87enum Encountered {
88    None = 0,
89    AnyChars, // whitespace before <?xml is not allowed
90    Declaration,
91    Comment,
92    Doctype,
93    Element,
94}
95
96impl PullParser {
97    /// Returns a new parser using the given config.
98    #[inline]
99    pub fn new(config: impl Into<ParserConfig2>) -> Self {
100        let config = config.into();
101        Self::new_with_config2(config)
102    }
103
104    #[inline]
105    fn new_with_config2(config: ParserConfig2) -> Self {
106        let mut lexer = Lexer::new(&config);
107        if let Some(enc) = config.override_encoding {
108            lexer.set_encoding(enc);
109        }
110
111        let mut pos = Vec::with_capacity(16);
112        pos.push(TextPosition::new());
113
114        Self {
115            config,
116            lexer,
117            st: State::DocumentStart,
118            state_after_reference: State::OutsideTag,
119            buf: String::new(),
120            entities: HashMap::new(),
121            nst: NamespaceStack::default(),
122
123            data: MarkupData {
124                name: String::new(),
125                doctype: None,
126                version: None,
127                encoding: None,
128                standalone: None,
129                ref_data: String::new(),
130                element_name: None,
131                quote: None,
132                attr_name: None,
133                attributes: AttributesSet::new(),
134            },
135            final_result: None,
136            next_event: None,
137            est: Vec::new(),
138            pos,
139
140            encountered: Encountered::None,
141            inside_whitespace: true,
142            seen_prefix_separator: false,
143            pop_namespace: false,
144        }
145    }
146
147    /// Checks if this parser ignores the end of stream errors.
148    pub fn is_ignoring_end_of_stream(&self) -> bool { self.config.c.ignore_end_of_stream }
149
150    /// Retrieves the Doctype from the document if any
151    #[inline]
152    pub fn doctype(&self) -> Option<&str> {
153        self.data.doctype.as_deref()
154    }
155
156    #[inline(never)]
157    fn set_encountered(&mut self, new_encounter: Encountered) -> Option<Result> {
158        if new_encounter <= self.encountered {
159            return None;
160        }
161        let prev_enc = self.encountered;
162        self.encountered = new_encounter;
163
164        // If declaration was not parsed and we have encountered an element,
165        // emit this declaration as the next event.
166        if prev_enc == Encountered::None {
167            self.push_pos();
168            Some(Ok(XmlEvent::StartDocument {
169                version: DEFAULT_VERSION,
170                encoding: self.lexer.encoding().to_string(),
171                standalone: DEFAULT_STANDALONE,
172            }))
173        } else {
174            None
175        }
176    }
177}
178
179impl Position for PullParser {
180    /// Returns the position of the last event produced by the parser
181    #[inline]
182    fn position(&self) -> TextPosition {
183        self.pos.first().copied().unwrap_or_else(TextPosition::new)
184    }
185}
186
187#[derive(Copy, Clone, PartialEq)]
188pub enum State {
189    OutsideTag,
190    InsideOpeningTag(OpeningTagSubstate),
191    InsideClosingTag(ClosingTagSubstate),
192    InsideProcessingInstruction(ProcessingInstructionSubstate),
193    InsideComment,
194    InsideCData,
195    InsideDeclaration(DeclarationSubstate),
196    InsideDoctype(DoctypeSubstate),
197    InsideReference,
198    DocumentStart,
199}
200
201#[derive(Copy, Clone, PartialEq)]
202pub enum DoctypeSubstate {
203    Outside,
204    String,
205    InsideName,
206    BeforeEntityName,
207    EntityName,
208    BeforeEntityValue,
209    EntityValue,
210    NumericReferenceStart,
211    NumericReference,
212    /// expansion
213    PEReferenceInValue,
214    PEReferenceInDtd,
215    /// name definition
216    PEReferenceDefinitionStart,
217    PEReferenceDefinition,
218    SkipDeclaration,
219    Comment,
220}
221
222#[derive(Copy, Clone, PartialEq)]
223pub enum OpeningTagSubstate {
224    InsideName,
225
226    InsideTag,
227
228    InsideAttributeName,
229    AfterAttributeName,
230
231    InsideAttributeValue,
232    AfterAttributeValue,
233}
234
235#[derive(Copy, Clone, PartialEq)]
236pub enum ClosingTagSubstate {
237    CTInsideName,
238    CTAfterName,
239}
240
241#[derive(Copy, Clone, PartialEq)]
242pub enum ProcessingInstructionSubstate {
243    PIInsideName,
244    PIInsideData,
245}
246
247#[derive(Copy, Clone, PartialEq)]
248pub enum DeclarationSubstate {
249    BeforeVersion,
250    InsideVersion,
251    AfterVersion,
252
253    InsideVersionValue,
254    AfterVersionValue,
255
256    BeforeEncoding,
257    InsideEncoding,
258    AfterEncoding,
259
260    InsideEncodingValue,
261    AfterEncodingValue,
262
263    BeforeStandaloneDecl,
264    InsideStandaloneDecl,
265    AfterStandaloneDecl,
266
267    InsideStandaloneDeclValue,
268    AfterStandaloneDeclValue,
269}
270
271#[derive(Copy, Clone, PartialEq)]
272enum QualifiedNameTarget {
273    AttributeNameTarget,
274    OpeningTagNameTarget,
275    ClosingTagNameTarget,
276}
277
278#[derive(Copy, Clone, PartialEq, Eq)]
279enum QuoteToken {
280    SingleQuoteToken,
281    DoubleQuoteToken,
282}
283
284impl QuoteToken {
285    #[inline]
286    fn from_token(t: Token) -> Option<Self> {
287        match t {
288            Token::SingleQuote => Some(Self::SingleQuoteToken),
289            Token::DoubleQuote => Some(Self::DoubleQuoteToken),
290            _ => {
291                debug_assert!(false);
292                None
293            },
294        }
295    }
296
297    const fn as_token(self) -> Token {
298        match self {
299            Self::SingleQuoteToken => Token::SingleQuote,
300            Self::DoubleQuoteToken => Token::DoubleQuote,
301        }
302    }
303}
304
305struct MarkupData {
306    name: String,     // used for processing instruction name
307    ref_data: String,  // used for reference content
308
309    doctype: Option<String>, // keeps a copy of the original doctype
310    version: Option<XmlVersion>,  // used for XML declaration version
311    encoding: Option<String>,  // used for XML declaration encoding
312    standalone: Option<bool>,  // used for XML declaration standalone parameter
313
314    element_name: Option<OwnedName>,  // used for element name
315
316    quote: Option<QuoteToken>,  // used to hold opening quote for attribute value
317    attr_name: Option<OwnedName>,  // used to hold attribute name
318    attributes: AttributesSet,   // used to hold all accumulated attributes
319}
320
321impl PullParser {
322    /// Returns next event read from the given buffer.
323    ///
324    /// This method should be always called with the same buffer. If you call it
325    /// providing different buffers each time, the result will be undefined.
326    pub fn next<R: Read>(&mut self, r: &mut R) -> Result {
327        if let Some(ref ev) = self.final_result {
328            return ev.clone();
329        }
330
331        if let Some(ev) = self.next_event.take() {
332            return ev;
333        }
334
335        if self.pop_namespace {
336            self.pop_namespace = false;
337            self.nst.pop();
338        }
339
340        loop {
341            debug_assert!(self.next_event.is_none());
342            debug_assert!(!self.pop_namespace);
343
344            // While lexer gives us Ok(maybe_token) -- we loop.
345            // Upon having a complete XML-event -- we return from the whole function.
346            match self.lexer.next_token(r) {
347                Ok(Token::Eof) => {
348                    // Forward pos to the lexer head
349                    self.next_pos();
350                    return self.handle_eof();
351                },
352                Ok(token) => match self.dispatch_token(token) {
353                    None => continue,
354                    Some(Ok(xml_event)) => {
355                        self.next_pos();
356                        return Ok(xml_event);
357                    },
358                    Some(Err(xml_error)) => {
359                        self.next_pos();
360                        return self.set_final_result(Err(xml_error));
361                    },
362                },
363                Err(lexer_error) => {
364                    self.next_pos();
365                    return self.set_final_result(Err(lexer_error));
366                },
367            }
368        }
369    }
370
371    /// Handle end of stream
372    #[cold]
373    fn handle_eof(&mut self) -> std::result::Result<XmlEvent, super::Error> {
374        let ev = if self.depth() == 0 {
375            if self.encountered == Encountered::Element && self.st == State::OutsideTag {  // all is ok
376                Ok(XmlEvent::EndDocument)
377            } else if self.encountered < Encountered::Element {
378                self.error(SyntaxError::NoRootElement)
379            } else {  // self.st != State::OutsideTag
380                self.error(SyntaxError::UnexpectedEof)  // TODO: add expected hint?
381            }
382        } else if self.config.c.ignore_end_of_stream {
383            self.final_result = None;
384            self.lexer.reset_eof_handled();
385            return self.error(SyntaxError::UnbalancedRootElement);
386        } else {
387            self.error(SyntaxError::UnbalancedRootElement)
388        };
389        self.set_final_result(ev)
390    }
391
392    // This function is to be called when a terminal event is reached.
393    // The function sets up the `self.final_result` into `Some(result)` and return `result`.
394    #[inline]
395    fn set_final_result(&mut self, result: Result) -> Result {
396        self.final_result = Some(result.clone());
397        result
398    }
399
400    #[cold]
401    fn error(&self, e: SyntaxError) -> Result {
402        Err(Error {
403            pos: self.lexer.position(),
404            kind: ErrorKind::Syntax(e.to_cow()),
405        })
406    }
407
408    #[inline]
409    fn next_pos(&mut self) {
410        // unfortunately calls to next_pos will never be perfectly balanced with push_pos,
411        // at very least because parse errors and EOF can happen unexpectedly without a prior push.
412        if !self.pos.is_empty() {
413            if self.pos.len() > 1 {
414                self.pos.remove(0);
415            } else {
416                self.pos[0] = self.lexer.position();
417            }
418        }
419    }
420
421    #[inline]
422    #[track_caller]
423    fn push_pos(&mut self) {
424        debug_assert!(self.pos.len() != self.pos.capacity(), "You've found a bug in xml-rs, caused by calls to push_pos() in states that don't end up emitting events.
425            This case is ignored in release mode, and merely causes document positions to be out of sync.
426            Please file a bug and include the XML document that triggers this assert.");
427
428        // it has capacity preallocated for more than it ever needs, so this reduces code size
429        if self.pos.len() != self.pos.capacity() {
430            self.pos.push(self.lexer.position());
431        } else if self.pos.len() > 1 {
432            self.pos.remove(0); // this mitigates the excessive push_pos() call
433        }
434    }
435
436    #[inline(never)]
437    fn dispatch_token(&mut self, t: Token) -> Option<Result> {
438        match self.st {
439            State::OutsideTag                     => self.outside_tag(t),
440            State::InsideOpeningTag(s)            => self.inside_opening_tag(t, s),
441            State::InsideClosingTag(s)            => self.inside_closing_tag_name(t, s),
442            State::InsideReference                => self.inside_reference(t),
443            State::InsideComment                  => self.inside_comment(t),
444            State::InsideCData                    => self.inside_cdata(t),
445            State::InsideProcessingInstruction(s) => self.inside_processing_instruction(t, s),
446            State::InsideDoctype(s)               => self.inside_doctype(t, s),
447            State::InsideDeclaration(s)           => self.inside_declaration(t, s),
448            State::DocumentStart                  => self.document_start(t),
449        }
450    }
451
452    #[inline]
453    fn depth(&self) -> usize {
454        self.est.len()
455    }
456
457    #[inline]
458    fn buf_has_data(&self) -> bool {
459        !self.buf.is_empty()
460    }
461
462    #[inline]
463    fn take_buf(&mut self) -> String {
464        std::mem::take(&mut self.buf)
465    }
466
467    #[inline]
468    fn into_state(&mut self, st: State, ev: Option<Result>) -> Option<Result> {
469        self.st = st;
470        ev
471    }
472
473    #[inline]
474    fn into_state_continue(&mut self, st: State) -> Option<Result> {
475        self.into_state(st, None)
476    }
477
478    #[inline]
479    fn into_state_emit(&mut self, st: State, ev: Result) -> Option<Result> {
480        self.into_state(st, Some(ev))
481    }
482
483    /// Dispatches tokens in order to process qualified name. If qualified name cannot be parsed,
484    /// an error is returned.
485    ///
486    /// # Parameters
487    /// * `t`       --- next token;
488    /// * `on_name` --- a callback which is executed when whitespace is encountered.
489    fn read_qualified_name<F>(&mut self, t: Token, target: QualifiedNameTarget, on_name: F) -> Option<Result>
490      where F: Fn(&mut Self, Token, OwnedName) -> Option<Result> {
491
492        let invoke_callback = move |this: &mut Self, t| {
493            let name = this.take_buf();
494            this.seen_prefix_separator = false;
495            match name.parse() {
496                Ok(name) => on_name(this, t, name),
497                Err(()) => Some(this.error(SyntaxError::InvalidQualifiedName(name.into()))),
498            }
499        };
500
501        match t {
502            // There can be only one colon, and not as the first character
503            Token::Character(':') if self.buf_has_data() && !self.seen_prefix_separator => {
504                self.buf.push(':');
505                self.seen_prefix_separator = true;
506                None
507            },
508
509            Token::Character(c) if c != ':' && (self.buf.is_empty() && is_name_start_char(c) ||
510                                          self.buf_has_data() && is_name_char(c)) => {
511                if self.buf.len() > self.config.max_name_length {
512                    return Some(self.error(SyntaxError::ExceededConfiguredLimit));
513                }
514                self.buf.push(c);
515                None
516            },
517
518            Token::EqualsSign if target == QualifiedNameTarget::AttributeNameTarget => invoke_callback(self, t),
519
520            Token::EmptyTagEnd if target == QualifiedNameTarget::OpeningTagNameTarget => invoke_callback(self, t),
521
522            Token::TagEnd if target == QualifiedNameTarget::OpeningTagNameTarget ||
523                      target == QualifiedNameTarget::ClosingTagNameTarget => invoke_callback(self, t),
524
525            Token::Character(c) if is_whitespace_char(c) => invoke_callback(self, t),
526
527            _ => Some(self.error(SyntaxError::UnexpectedQualifiedName(t))),
528        }
529    }
530
531    /// Dispatches tokens in order to process attribute value.
532    ///
533    /// # Parameters
534    /// * `t`        --- next token;
535    /// * `on_value` --- a callback which is called when terminating quote is encountered.
536    fn read_attribute_value<F>(&mut self, t: Token, on_value: F) -> Option<Result>
537      where F: Fn(&mut Self, String) -> Option<Result> {
538        match t {
539            Token::Character(c) if self.data.quote.is_none() && is_whitespace_char(c) => None, // skip leading whitespace
540
541            Token::DoubleQuote | Token::SingleQuote => match self.data.quote {
542                None => {  // Entered attribute value
543                    self.data.quote = QuoteToken::from_token(t);
544                    None
545                },
546                Some(q) if q.as_token() == t => {
547                    self.data.quote = None;
548                    let value = self.take_buf();
549                    on_value(self, value)
550                },
551                _ => {
552                    if let Token::Character(c) = t {
553                        if !self.is_valid_xml_char_not_restricted(c) {
554                            return Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)));
555                        }
556                    }
557                    if self.buf.len() > self.config.max_attribute_length {
558                        return Some(self.error(SyntaxError::ExceededConfiguredLimit));
559                    }
560                    t.push_to_string(&mut self.buf);
561                    None
562                },
563            },
564
565            Token::ReferenceStart if self.data.quote.is_some() => {
566                self.state_after_reference = self.st;
567                self.into_state_continue(State::InsideReference)
568            },
569
570            Token::OpeningTagStart | Token::ProcessingInstructionStart => Some(self.error(SyntaxError::UnexpectedOpeningTag)),
571
572            Token::Character(c) if !self.is_valid_xml_char_not_restricted(c) => {
573                Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)))
574            },
575
576            // Every character except " and ' and < is okay
577            _ if self.data.quote.is_some() => {
578                if self.buf.len() > self.config.max_attribute_length {
579                    return Some(self.error(SyntaxError::ExceededConfiguredLimit));
580                }
581                t.push_to_string(&mut self.buf);
582                None
583            },
584
585            _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
586        }
587    }
588
589    fn emit_start_element(&mut self, emit_end_element: bool) -> Option<Result> {
590        let mut name = self.data.take_element_name()?;
591        let mut attributes = self.data.take_attributes().into_vec();
592
593        // check whether the name prefix is bound and fix its namespace
594        match self.nst.get(name.borrow().prefix_repr()) {
595            Some("") => name.namespace = None, // default namespace
596            Some(ns) => name.namespace = Some(ns.into()),
597            None => return Some(self.error(SyntaxError::UnboundElementPrefix(name.to_string().into()))),
598        }
599
600        // check and fix accumulated attributes prefixes
601        for attr in &mut attributes {
602            if let Some(ref pfx) = attr.name.prefix {
603                let new_ns = match self.nst.get(pfx) {
604                    Some("") => None, // default namespace
605                    Some(ns) => Some(ns.into()),
606                    None => return Some(self.error(SyntaxError::UnboundAttribute(attr.name.to_string().into()))),
607                };
608                attr.name.namespace = new_ns;
609            }
610        }
611
612        if emit_end_element {
613            self.pop_namespace = true;
614            self.next_event = Some(Ok(XmlEvent::EndElement {
615                name: name.clone()
616            }));
617        } else {
618            self.est.push(name.clone());
619        }
620        let namespace = self.nst.squash();
621        self.into_state_emit(State::OutsideTag, Ok(XmlEvent::StartElement {
622            name,
623            attributes,
624            namespace
625        }))
626    }
627
628    fn emit_end_element(&mut self) -> Option<Result> {
629        let mut name = self.data.take_element_name()?;
630
631        // check whether the name prefix is bound and fix its namespace
632        match self.nst.get(name.borrow().prefix_repr()) {
633            Some("") => name.namespace = None, // default namespace
634            Some(ns) => name.namespace = Some(ns.into()),
635            None => return Some(self.error(SyntaxError::UnboundElementPrefix(name.to_string().into()))),
636        }
637
638        let op_name = self.est.pop()?;
639
640        if name == op_name {
641            self.pop_namespace = true;
642            self.into_state_emit(State::OutsideTag, Ok(XmlEvent::EndElement { name }))
643        } else {
644            Some(self.error(SyntaxError::UnexpectedClosingTag(format!("{name} != {op_name}").into())))
645        }
646    }
647
648    #[inline]
649    fn is_valid_xml_char(&self, c: char) -> bool {
650        if Some(XmlVersion::Version11) == self.data.version {
651            is_xml11_char(c)
652        } else {
653            is_xml10_char(c)
654        }
655    }
656
657    #[inline]
658    fn is_valid_xml_char_not_restricted(&self, c: char) -> bool {
659        if Some(XmlVersion::Version11) == self.data.version {
660            is_xml11_char_not_restricted(c)
661        } else {
662            is_xml10_char(c)
663        }
664    }
665}
666
667#[cfg(test)]
668mod tests {
669    use crate::attribute::OwnedAttribute;
670    use crate::common::TextPosition;
671    use crate::name::OwnedName;
672    use crate::reader::events::XmlEvent;
673    use crate::reader::parser::PullParser;
674    use crate::reader::ParserConfig;
675    use std::io::BufReader;
676
677    fn new_parser() -> PullParser {
678        PullParser::new(ParserConfig::new())
679    }
680
681    macro_rules! expect_event(
682        ($r:expr, $p:expr, $t:pat) => (
683            match $p.next(&mut $r) {
684                $t => {}
685                e => panic!("Unexpected event: {e:?}\nExpected: {}", stringify!($t))
686            }
687        );
688        ($r:expr, $p:expr, $t:pat => $c:expr ) => (
689            match $p.next(&mut $r) {
690                $t if $c => {}
691                e => panic!("Unexpected event: {e:?}\nExpected: {} if {}", stringify!($t), stringify!($c))
692            }
693        )
694    );
695
696    macro_rules! test_data(
697        ($d:expr) => ({
698            static DATA: &'static str = $d;
699            let r = BufReader::new(DATA.as_bytes());
700            let p = new_parser();
701            (r, p)
702        })
703    );
704
705    #[test]
706    fn issue_3_semicolon_in_attribute_value() {
707        let (mut r, mut p) = test_data!(r#"
708            <a attr="zzz;zzz" />
709        "#);
710
711        expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
712        expect_event!(r, p, Ok(XmlEvent::StartElement { ref name, ref attributes, ref namespace }) =>
713            *name == OwnedName::local("a") &&
714             attributes.len() == 1 &&
715             attributes[0] == OwnedAttribute::new(OwnedName::local("attr"), "zzz;zzz") &&
716             namespace.is_essentially_empty()
717        );
718        expect_event!(r, p, Ok(XmlEvent::EndElement { ref name }) => *name == OwnedName::local("a"));
719        expect_event!(r, p, Ok(XmlEvent::EndDocument));
720    }
721
722    #[test]
723    fn issue_140_entity_reference_inside_tag() {
724        let (mut r, mut p) = test_data!(r"
725            <bla>&#9835;</bla>
726        ");
727
728        expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
729        expect_event!(r, p, Ok(XmlEvent::StartElement { ref name, .. }) => *name == OwnedName::local("bla"));
730        expect_event!(r, p, Ok(XmlEvent::Characters(ref s)) => s == "\u{266b}");
731        expect_event!(r, p, Ok(XmlEvent::EndElement { ref name, .. }) => *name == OwnedName::local("bla"));
732        expect_event!(r, p, Ok(XmlEvent::EndDocument));
733    }
734
735    #[test]
736    fn issue_220_comment() {
737        let (mut r, mut p) = test_data!(r"<x><!-- <!--></x>");
738        expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
739        expect_event!(r, p, Ok(XmlEvent::StartElement { .. }));
740        expect_event!(r, p, Ok(XmlEvent::EndElement { .. }));
741        expect_event!(r, p, Ok(XmlEvent::EndDocument));
742
743        let (mut r, mut p) = test_data!(r"<x><!-- <!---></x>");
744        expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
745        expect_event!(r, p, Ok(XmlEvent::StartElement { .. }));
746        expect_event!(r, p, Err(_)); // ---> is forbidden in comments
747
748        let (mut r, mut p) = test_data!(r"<x><!--<text&x;> <!--></x>");
749        p.config.c.ignore_comments = false;
750        expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
751        expect_event!(r, p, Ok(XmlEvent::StartElement { .. }));
752        expect_event!(r, p, Ok(XmlEvent::Comment(s)) => s == "<text&x;> <!");
753        expect_event!(r, p, Ok(XmlEvent::EndElement { .. }));
754        expect_event!(r, p, Ok(XmlEvent::EndDocument));
755    }
756
757    #[test]
758    fn malformed_declaration_attrs() {
759        let (mut r, mut p) = test_data!(r#"<?xml version x="1.0"?>"#);
760        expect_event!(r, p, Err(_));
761
762        let (mut r, mut p) = test_data!(r#"<?xml version="1.0" version="1.0"?>"#);
763        expect_event!(r, p, Err(_));
764
765        let (mut r, mut p) = test_data!(r#"<?xml version="1.0"encoding="utf-8"?>"#);
766        expect_event!(r, p, Err(_));
767
768        let (mut r, mut p) = test_data!(r#"<?xml version="1.0"standalone="yes"?>"#);
769        expect_event!(r, p, Err(_));
770
771        let (mut r, mut p) = test_data!(r#"<?xml version="1.0" encoding="utf-8"standalone="yes"?>"#);
772        expect_event!(r, p, Err(_));
773    }
774
775    #[test]
776    fn opening_tag_in_attribute_value() {
777        use crate::reader::error::{SyntaxError, Error, ErrorKind};
778
779        let (mut r, mut p) = test_data!(r#"
780            <a attr="zzz<zzz" />
781        "#);
782
783        expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
784        expect_event!(r, p, Err(ref e) =>
785            *e == Error {
786                kind: ErrorKind::Syntax(SyntaxError::UnexpectedOpeningTag.to_cow()),
787                pos: TextPosition { row: 1, column: 24 }
788            }
789        );
790    }
791
792    #[test]
793    fn processing_instruction_in_attribute_value() {
794        use crate::reader::error::{SyntaxError, Error, ErrorKind};
795
796        let (mut r, mut p) = test_data!(r#"
797            <y F="<?abc"><x G="/">
798        "#);
799
800        expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
801        expect_event!(r, p, Err(ref e) =>
802            *e == Error {
803                kind: ErrorKind::Syntax(SyntaxError::UnexpectedOpeningTag.to_cow()),
804                pos: TextPosition { row: 1, column: 18 }
805            }
806        );
807    }
808
809    #[test]
810    fn reference_err() {
811        let (mut r, mut p) = test_data!(r"
812            <a>&&amp;</a>
813        ");
814
815        expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
816        expect_event!(r, p, Ok(XmlEvent::StartElement { .. }));
817        expect_event!(r, p, Err(_));
818    }
819
820    #[test]
821    fn state_size() {
822        assert_eq!(2, std::mem::size_of::<super::State>());
823        assert_eq!(1, std::mem::size_of::<super::DoctypeSubstate>());
824    }
825}