Skip to main content

quick_xml/reader/
mod.rs

1//! Contains high-level interface for a pull-based XML parser.
2
3#[cfg(feature = "encoding")]
4use encoding_rs::Encoding;
5use std::io;
6use std::ops::Range;
7
8use crate::encoding::Decoder;
9use crate::errors::{Error, IllFormedError, SyntaxError};
10use crate::events::{BytesRef, Event};
11use crate::parser::{DtdParser, ElementParser, Parser, PiParser};
12use crate::reader::state::ReaderState;
13
14/// A struct that holds a parser configuration.
15///
16/// Current parser configuration can be retrieved by calling [`Reader::config()`]
17/// and changed by changing properties of the object returned by a call to
18/// [`Reader::config_mut()`].
19///
20/// [`Reader::config()`]: crate::reader::Reader::config
21/// [`Reader::config_mut()`]: crate::reader::Reader::config_mut
22#[derive(Debug, Clone, PartialEq, Eq)]
23#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
24#[cfg_attr(feature = "serde-types", derive(serde::Deserialize, serde::Serialize))]
25#[non_exhaustive]
26pub struct Config {
27    /// Whether lone ampersand character (without a paired semicolon) should be
28    /// allowed in textual content. Unless enabled, in case of a dangling ampersand,
29    /// the [`Error::IllFormed(UnclosedReference)`] is returned from read methods.
30    ///
31    /// Default: `false`
32    ///
33    /// # Example
34    ///
35    /// ```
36    /// # use quick_xml::events::{BytesRef, BytesText, Event};
37    /// # use quick_xml::reader::Reader;
38    /// # use pretty_assertions::assert_eq;
39    /// let mut reader = Reader::from_str("text with & & & alone");
40    /// reader.config_mut().allow_dangling_amp = true;
41    ///
42    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::new("text with ")));
43    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::from_escaped("& ")));
44    /// assert_eq!(reader.read_event().unwrap(), Event::GeneralRef(BytesRef::new("amp")));
45    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::new(" ")));
46    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::from_escaped("& alone")));
47    /// assert_eq!(reader.read_event().unwrap(), Event::Eof);
48    /// ```
49    ///
50    /// [`Error::IllFormed(UnclosedReference)`]: crate::errors::IllFormedError::UnclosedReference
51    pub allow_dangling_amp: bool,
52
53    /// Whether unmatched closing tag names should be allowed. Unless enabled,
54    /// in case of a dangling end tag, the [`Error::IllFormed(UnmatchedEndTag)`]
55    /// is returned from read methods.
56    ///
57    /// When set to `true`, it won't check if a closing tag has a corresponding
58    /// opening tag at all. For example, `<a></a></b>` will be permitted.
59    ///
60    /// Note that the emitted [`End`] event will not be modified if this is enabled,
61    /// ie. it will contain the data of the unmatched end tag.
62    ///
63    /// Note, that setting this to `true` will lead to additional allocates that
64    /// needed to store tag name for an [`End`] event.
65    ///
66    /// Default: `false`
67    ///
68    /// [`Error::IllFormed(UnmatchedEndTag)`]: crate::errors::IllFormedError::UnmatchedEndTag
69    /// [`End`]: crate::events::Event::End
70    pub allow_unmatched_ends: bool,
71
72    /// Whether comments should be validated. If enabled, in case of invalid comment
73    /// [`Error::IllFormed(DoubleHyphenInComment)`] is returned from read methods.
74    ///
75    /// When set to `true`, every [`Comment`] event will be checked for not
76    /// containing `--`, which [is not allowed] in XML comments. Most of the time
77    /// we don't want comments at all so we don't really care about comment
78    /// correctness, thus the default value is `false` to improve performance.
79    ///
80    /// Default: `false`
81    ///
82    /// [`Error::IllFormed(DoubleHyphenInComment)`]: crate::errors::IllFormedError::DoubleHyphenInComment
83    /// [`Comment`]: crate::events::Event::Comment
84    /// [is not allowed]: https://www.w3.org/TR/xml11/#sec-comments
85    pub check_comments: bool,
86
87    /// Whether mismatched closing tag names should be detected. If enabled, in
88    /// case of mismatch the [`Error::IllFormed(MismatchedEndTag)`] is returned from
89    /// read methods.
90    ///
91    /// Note, that start and end tags [should match literally][spec], they cannot
92    /// have different prefixes even if both prefixes resolve to the same namespace.
93    /// The XML
94    ///
95    /// ```xml
96    /// <outer xmlns="namespace" xmlns:p="namespace">
97    /// </p:outer>
98    /// ```
99    ///
100    /// is not valid, even though semantically the start tag is the same as the
101    /// end tag. The reason is that namespaces are an extension of the original
102    /// XML specification (without namespaces) and it should be backward-compatible.
103    ///
104    /// When set to `false`, it won't check if a closing tag matches the corresponding
105    /// opening tag. For example, `<mytag></different_tag>` will be permitted.
106    ///
107    /// If the XML is known to be sane (already processed, etc.) this saves extra time.
108    ///
109    /// Note that the emitted [`End`] event will not be modified if this is disabled,
110    /// ie. it will contain the data of the mismatched end tag.
111    ///
112    /// Note, that setting this to `true` will lead to additional allocates that
113    /// needed to store tag name for an [`End`] event. However if [`expand_empty_elements`]
114    /// is also set, only one additional allocation will be performed that support
115    /// both these options.
116    ///
117    /// Default: `true`
118    ///
119    /// [`Error::IllFormed(MismatchedEndTag)`]: crate::errors::IllFormedError::MismatchedEndTag
120    /// [spec]: https://www.w3.org/TR/xml11/#dt-etag
121    /// [`End`]: crate::events::Event::End
122    /// [`expand_empty_elements`]: Self::expand_empty_elements
123    pub check_end_names: bool,
124
125    /// Whether empty elements should be split into an `Open` and a `Close` event.
126    ///
127    /// When set to `true`, all [`Empty`] events produced by a self-closing tag
128    /// like `<tag/>` are expanded into a [`Start`] event followed by an [`End`]
129    /// event. When set to `false` (the default), those tags are represented by
130    /// an [`Empty`] event instead.
131    ///
132    /// Note, that setting this to `true` will lead to additional allocates that
133    /// needed to store tag name for an [`End`] event. However if [`check_end_names`]
134    /// is also set, only one additional allocation will be performed that support
135    /// both these options.
136    ///
137    /// Default: `false`
138    ///
139    /// [`Empty`]: crate::events::Event::Empty
140    /// [`Start`]: crate::events::Event::Start
141    /// [`End`]: crate::events::Event::End
142    /// [`check_end_names`]: Self::check_end_names
143    pub expand_empty_elements: bool,
144
145    /// Whether trailing whitespace after the markup name are trimmed in closing
146    /// tags `</a >`.
147    ///
148    /// If `true` the emitted [`End`] event is stripped of trailing whitespace
149    /// after the markup name.
150    ///
151    /// Note that if set to `false` and [`check_end_names`] is `true` the comparison
152    /// of markup names is going to fail erroneously if a closing tag contains
153    /// trailing whitespace.
154    ///
155    /// Default: `true`
156    ///
157    /// [`End`]: crate::events::Event::End
158    /// [`check_end_names`]: Self::check_end_names
159    pub trim_markup_names_in_closing_tags: bool,
160
161    /// Whether whitespace before character data should be removed.
162    ///
163    /// When set to `true`, leading whitespace is trimmed in [`Text`] events.
164    /// If after that the event is empty it will not be pushed.
165    ///
166    /// Default: `false`
167    ///
168    /// <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
169    ///
170    /// WARNING: With this option every text events will be trimmed which is
171    /// incorrect behavior when text events delimited by comments, processing
172    /// instructions or CDATA sections. To correctly trim data manually apply
173    /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`]
174    /// only to necessary events.
175    /// </div>
176    ///
177    /// [`Text`]: crate::events::Event::Text
178    /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start
179    /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end
180    pub trim_text_start: bool,
181
182    /// Whether whitespace after character data should be removed.
183    ///
184    /// When set to `true`, trailing whitespace is trimmed in [`Text`] events.
185    /// If after that the event is empty it will not be pushed.
186    ///
187    /// Default: `false`
188    ///
189    /// <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
190    ///
191    /// WARNING: With this option every text events will be trimmed which is
192    /// incorrect behavior when text events delimited by comments, processing
193    /// instructions or CDATA sections. To correctly trim data manually apply
194    /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`]
195    /// only to necessary events.
196    /// </div>
197    ///
198    /// [`Text`]: crate::events::Event::Text
199    /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start
200    /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end
201    pub trim_text_end: bool,
202}
203
204impl Config {
205    /// Set both [`trim_text_start`] and [`trim_text_end`] to the same value.
206    ///
207    /// <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
208    ///
209    /// WARNING: With this option every text events will be trimmed which is
210    /// incorrect behavior when text events delimited by comments, processing
211    /// instructions or CDATA sections. To correctly trim data manually apply
212    /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`]
213    /// only to necessary events.
214    /// </div>
215    ///
216    /// [`trim_text_start`]: Self::trim_text_start
217    /// [`trim_text_end`]: Self::trim_text_end
218    /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start
219    /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end
220    #[inline]
221    pub fn trim_text(&mut self, trim: bool) {
222        self.trim_text_start = trim;
223        self.trim_text_end = trim;
224    }
225
226    /// Turn on or off all checks for well-formedness. Currently it is that settings:
227    /// - [`check_comments`](Self::check_comments)
228    /// - [`check_end_names`](Self::check_end_names)
229    #[inline]
230    pub fn enable_all_checks(&mut self, enable: bool) {
231        self.check_comments = enable;
232        self.check_end_names = enable;
233    }
234}
235
236impl Default for Config {
237    fn default() -> Self {
238        Self {
239            allow_dangling_amp: false,
240            allow_unmatched_ends: false,
241            check_comments: false,
242            check_end_names: true,
243            expand_empty_elements: false,
244            trim_markup_names_in_closing_tags: true,
245            trim_text_start: false,
246            trim_text_end: false,
247        }
248    }
249}
250
251////////////////////////////////////////////////////////////////////////////////////////////////////
252
253macro_rules! read_event_impl {
254    (
255        $self:ident, $buf:ident,
256        $reader:expr,
257        $read_until_close:ident
258        $(, $await:ident)?
259    ) => {{
260        let event = loop {
261            break match $self.state.state {
262                ParseState::Init => { // Go to InsideText state
263                    // If encoding set explicitly, we not need to detect it. For example,
264                    // explicit UTF-8 set automatically if Reader was created using `from_str`.
265                    // But we still need to remove BOM for consistency with no encoding
266                    // feature enabled path
267                    #[cfg(feature = "encoding")]
268                    if let Some(encoding) = $reader.detect_encoding() $(.$await)? ? {
269                        if $self.state.encoding.can_be_refined() {
270                            $self.state.encoding = crate::reader::EncodingRef::BomDetected(encoding);
271                        }
272                    }
273
274                    // Removes UTF-8 BOM if it is present
275                    #[cfg(not(feature = "encoding"))]
276                    $reader.remove_utf8_bom() $(.$await)? ?;
277
278                    $self.state.state = ParseState::InsideText;
279                    continue;
280                },
281                ParseState::InsideRef => { // Go to InsideText
282                    let start = $self.state.offset;
283                    match $reader.read_ref($buf, &mut $self.state.offset) $(.$await)? {
284                        // Emit reference, go to InsideText state
285                        ReadRefResult::Ref(bytes) => {
286                            $self.state.state = ParseState::InsideText;
287                            // +1 to skip start `&`
288                            Ok(Event::GeneralRef(BytesRef::wrap(&bytes[1..], $self.decoder())))
289                        }
290                        // Go to Done state
291                        ReadRefResult::UpToEof(bytes) if $self.state.config.allow_dangling_amp => {
292                            $self.state.state = ParseState::Done;
293                            Ok(Event::Text($self.state.emit_text(bytes)))
294                        }
295                        ReadRefResult::UpToEof(_) => {
296                            $self.state.state = ParseState::Done;
297                            $self.state.last_error_offset = start;
298                            Err(Error::IllFormed(IllFormedError::UnclosedReference))
299                        }
300                        // Do not change state, stay in InsideRef
301                        ReadRefResult::UpToRef(bytes) if $self.state.config.allow_dangling_amp => {
302                            Ok(Event::Text($self.state.emit_text(bytes)))
303                        }
304                        ReadRefResult::UpToRef(_) => {
305                            $self.state.last_error_offset = start;
306                            Err(Error::IllFormed(IllFormedError::UnclosedReference))
307                        }
308                        // Go to InsideMarkup state
309                        ReadRefResult::UpToMarkup(bytes) if $self.state.config.allow_dangling_amp => {
310                            $self.state.state = ParseState::InsideMarkup;
311                            Ok(Event::Text($self.state.emit_text(bytes)))
312                        }
313                        ReadRefResult::UpToMarkup(_) => {
314                            $self.state.state = ParseState::InsideMarkup;
315                            $self.state.last_error_offset = start;
316                            Err(Error::IllFormed(IllFormedError::UnclosedReference))
317                        }
318                        ReadRefResult::Err(e) => Err(Error::Io(e.into())),
319                    }
320                }
321                ParseState::InsideText => { // Go to InsideMarkup or Done state
322                    if $self.state.config.trim_text_start {
323                        $reader.skip_whitespace(&mut $self.state.offset) $(.$await)? ?;
324                    }
325
326                    match $reader.read_text($buf, &mut $self.state.offset) $(.$await)? {
327                        ReadTextResult::Markup(buf) => {
328                            $self.state.state = ParseState::InsideMarkup;
329                            // Pass `buf` to the next next iteration of parsing loop
330                            $buf = buf;
331                            continue;
332                        }
333                        ReadTextResult::Ref(buf) => {
334                            $self.state.state = ParseState::InsideRef;
335                            // Pass `buf` to the next next iteration of parsing loop
336                            $buf = buf;
337                            continue;
338                        }
339                        ReadTextResult::UpToMarkup(bytes) => {
340                            $self.state.state = ParseState::InsideMarkup;
341                            // FIXME: Can produce an empty event if:
342                            // - event contains only spaces
343                            // - trim_text_start = false
344                            // - trim_text_end = true
345                            Ok(Event::Text($self.state.emit_text(bytes)))
346                        }
347                        ReadTextResult::UpToRef(bytes) => {
348                            $self.state.state = ParseState::InsideRef;
349                            // Return Text event with `bytes` content or Eof if bytes is empty
350                            Ok(Event::Text($self.state.emit_text(bytes)))
351                        }
352                        ReadTextResult::UpToEof(bytes) => {
353                            $self.state.state = ParseState::Done;
354                            // Trim bytes from end if required
355                            let event = $self.state.emit_text(bytes);
356                            if event.is_empty() {
357                                Ok(Event::Eof)
358                            } else {
359                                Ok(Event::Text(event))
360                            }
361                        }
362                        ReadTextResult::Err(e) => Err(Error::Io(e.into())),
363                    }
364                },
365                // Go to InsideText state in next two arms
366                ParseState::InsideMarkup => $self.$read_until_close($buf) $(.$await)?,
367                ParseState::InsideEmpty => Ok(Event::End($self.state.close_expanded_empty())),
368                ParseState::Done => Ok(Event::Eof),
369            };
370        };
371        match event {
372            // #513: In case of ill-formed errors we already consume the wrong data
373            // and change the state. We can continue parsing if we wish
374            Err(Error::IllFormed(_)) => {}
375            Err(_) | Ok(Event::Eof) => $self.state.state = ParseState::Done,
376            _ => {}
377        }
378        event
379    }};
380}
381
382/// Read bytes up to the `>` and skip it. This method is expected to be called
383/// after seeing the `<` symbol and skipping it. Inspects the next (current)
384/// symbol and returns an appropriate [`Event`]:
385///
386/// |Symbol |Event
387/// |-------|-------------------------------------
388/// |`!`    |[`Comment`], [`CData`] or [`DocType`]
389/// |`/`    |[`End`]
390/// |`?`    |[`PI`]
391/// |_other_|[`Start`] or [`Empty`]
392///
393/// Moves parser to the `InsideText` state.
394///
395/// [`Comment`]: Event::Comment
396/// [`CData`]: Event::CData
397/// [`DocType`]: Event::DocType
398/// [`End`]: Event::End
399/// [`PI`]: Event::PI
400/// [`Start`]: Event::Start
401/// [`Empty`]: Event::Empty
402macro_rules! read_until_close {
403    (
404        $self:ident, $buf:ident,
405        $reader:expr
406        $(, $await:ident)?
407    ) => {{
408        $self.state.state = ParseState::InsideText;
409
410        let start = $self.state.offset;
411        match $reader.peek_one() $(.$await)? {
412            // `<!` - comment, CDATA or DOCTYPE declaration
413            Ok(Some(b'!')) => match $reader
414                .read_bang_element($buf, &mut $self.state.offset)
415                $(.$await)?
416            {
417                Ok((bang_type, bytes)) => $self.state.emit_bang(bang_type, bytes),
418                Err(e) => {
419                    // We want to report error at `<`
420                    $self.state.last_error_offset = start;
421                    Err(e)
422                }
423            },
424            // `</` - closing tag
425            // #776: We parse using ElementParser which allows us to have attributes
426            // in close tags. While such tags are not allowed by the specification,
427            // we anyway allow to parse them because:
428            // - we do not check constraints during parsing. This is performed by the
429            //   optional validate step which user should call manually
430            // - if we just look for `>` we will parse `</tag attr=">" >` as end tag
431            //   `</tag attr=">` and text `" >` which probably no one existing parser
432            //   does. This is malformed XML, however it is tolerated by some parsers
433            //   (e.g. the one used by Adobe Flash) and such documents do exist in the wild.
434            Ok(Some(b'/')) => match $reader
435                .read_with(ElementParser::Outside, $buf, &mut $self.state.offset)
436                $(.$await)?
437            {
438                Ok(bytes) => $self.state.emit_end(bytes),
439                Err(e) => {
440                    // We want to report error at `<`
441                    $self.state.last_error_offset = start;
442                    Err(e)
443                }
444            },
445            // `<?` - processing instruction
446            Ok(Some(b'?')) => match $reader
447                .read_with(PiParser(false), $buf, &mut $self.state.offset)
448                $(.$await)?
449            {
450                Ok(bytes) => $self.state.emit_question_mark(bytes),
451                Err(e) => {
452                    // We want to report error at `<`
453                    $self.state.last_error_offset = start;
454                    Err(e)
455                }
456            },
457            // `<...` - opening or self-closed tag
458            Ok(Some(_)) => match $reader
459                .read_with(ElementParser::Outside, $buf, &mut $self.state.offset)
460                $(.$await)?
461            {
462                Ok(bytes) => Ok($self.state.emit_start(bytes)),
463                Err(e) => {
464                    // We want to report error at `<`
465                    $self.state.last_error_offset = start;
466                    Err(e)
467                }
468            },
469            // `<` - syntax error, tag not closed
470            Ok(None) => {
471                // We want to report error at `<`
472                $self.state.last_error_offset = start;
473                Err(Error::Syntax(SyntaxError::UnclosedTag))
474            }
475            Err(e) => Err(Error::Io(e.into())),
476        }
477    }};
478}
479
480/// Generalization of `read_to_end` method for buffered and borrowed readers
481macro_rules! read_to_end {
482    (
483        // $self: &mut Reader
484        $self:expr, $end:expr, $buf:expr,
485        $read_event:ident,
486        // Code block that performs clearing of internal buffer after read of each event
487        $clear:block
488        $(, $await:ident)?
489    ) => {{
490        // Because we take position after the event before the End event,
491        // it is important that this position indicates beginning of the End event.
492        // If between last event and the End event would be only spaces, then we
493        // take position before the spaces, but spaces would be skipped without
494        // generating event if `trim_text_start` is set to `true`. To prevent that
495        // we temporary disable start text trimming.
496        //
497        // We also cannot take position after getting End event, because if
498        // `trim_markup_names_in_closing_tags` is set to `true` (which is the default),
499        // we do not known the real size of the End event that it is occupies in
500        // the source and cannot correct the position after the End event.
501        // So, we in any case should tweak parser configuration.
502        let config = $self.config_mut();
503        let trim = config.trim_text_start;
504        config.trim_text_start = false;
505
506        let start = $self.buffer_position();
507        let mut depth = 0;
508        loop {
509            $clear
510            let end = $self.buffer_position();
511            match $self.$read_event($buf) $(.$await)? {
512                Err(e) => {
513                    $self.config_mut().trim_text_start = trim;
514                    return Err(e);
515                }
516
517                Ok(Event::Start(e)) if e.name() == $end => depth += 1,
518                Ok(Event::End(e)) if e.name() == $end => {
519                    if depth == 0 {
520                        $self.config_mut().trim_text_start = trim;
521                        break start..end;
522                    }
523                    depth -= 1;
524                }
525                Ok(Event::Eof) => {
526                    $self.config_mut().trim_text_start = trim;
527                    return Err(Error::missed_end($end, $self.decoder()));
528                }
529                _ => (),
530            }
531        }
532    }};
533}
534
535#[cfg(feature = "async-tokio")]
536mod async_tokio;
537mod buffered_reader;
538mod ns_reader;
539mod slice_reader;
540mod state;
541
542pub use ns_reader::NsReader;
543
544/// Range of input in bytes, that corresponds to some piece of XML
545pub type Span = Range<u64>;
546
547////////////////////////////////////////////////////////////////////////////////////////////////////
548
549/// Possible reader states. The state transition diagram (`true` and `false` shows
550/// value of [`Config::expand_empty_elements`] option):
551///
552/// ```mermaid
553/// flowchart LR
554///   subgraph _
555///     direction LR
556///
557///     Init         -- "(no event)"\n                                       --> InsideMarkup
558///     InsideMarkup -- Decl, DocType, PI\nComment, CData\nStart, Empty, End --> InsideText
559///     InsideText   -- "#lt;false#gt;\n(no event)"\nText                    --> InsideMarkup
560///     InsideRef    -- "(no event)"\nGeneralRef                             --> InsideText
561///   end
562///   InsideText     -- "#lt;true#gt;"\nStart --> InsideEmpty
563///   InsideEmpty    -- End                   --> InsideText
564///   _ -. Eof .-> Done
565/// ```
566#[derive(Clone, Debug)]
567enum ParseState {
568    /// Initial state in which reader stay after creation. Transition from that
569    /// state could produce a `Text`, `Decl`, `Comment` or `Start` event. The next
570    /// state is always `InsideMarkup`. The reader will never return to this state. The
571    /// event emitted during transition to `InsideMarkup` is a `StartEvent` if the
572    /// first symbol not `<`, otherwise no event are emitted.
573    Init,
574    /// State after seeing the `&` symbol in textual content. Depending on the next symbol all other
575    /// events could be generated.
576    ///
577    /// After generating one event the reader moves to the `ClosedTag` state.
578    InsideRef,
579    /// State after seeing the `<` symbol. Depending on the next symbol all other
580    /// events could be generated.
581    ///
582    /// After generating one event the reader moves to the `InsideText` state.
583    InsideMarkup,
584    /// State in which reader searches the `<` symbol of a markup. All bytes before
585    /// that symbol will be returned in the [`Event::Text`] event. After that
586    /// the reader moves to the `InsideMarkup` state.
587    InsideText,
588    /// This state is used only if option [`expand_empty_elements`] is set to `true`.
589    /// Reader enters to this state when it is in a `InsideText` state and emits an
590    /// [`Event::Start`] event. The next event emitted will be an [`Event::End`],
591    /// after which reader returned to the `InsideText` state.
592    ///
593    /// [`expand_empty_elements`]: Config::expand_empty_elements
594    InsideEmpty,
595    /// Reader enters this state when `Eof` event generated or an error occurred.
596    /// This is the last state, the reader stay in it forever.
597    Done,
598}
599
600/// A reference to an encoding together with information about how it was retrieved.
601///
602/// The state transition diagram:
603///
604/// ```mermaid
605/// flowchart LR
606///   Implicit    -- from_str       --> Explicit
607///   Implicit    -- BOM            --> BomDetected
608///   Implicit    -- "encoding=..." --> XmlDetected
609///   BomDetected -- "encoding=..." --> XmlDetected
610/// ```
611#[cfg(feature = "encoding")]
612#[derive(Clone, Copy, Debug)]
613enum EncodingRef {
614    /// Encoding was implicitly assumed to have a specified value. It can be refined
615    /// using BOM or by the XML declaration event (`<?xml encoding=... ?>`)
616    Implicit(&'static Encoding),
617    /// Encoding was explicitly set to the desired value. It cannot be changed
618    /// nor by BOM, nor by parsing XML declaration (`<?xml encoding=... ?>`)
619    Explicit(&'static Encoding),
620    /// Encoding was detected from a byte order mark (BOM) or by the first bytes
621    /// of the content. It can be refined by the XML declaration event (`<?xml encoding=... ?>`)
622    BomDetected(&'static Encoding),
623    /// Encoding was detected using XML declaration event (`<?xml encoding=... ?>`).
624    /// It can no longer change
625    XmlDetected(&'static Encoding),
626}
627#[cfg(feature = "encoding")]
628impl EncodingRef {
629    #[inline]
630    const fn encoding(&self) -> &'static Encoding {
631        match self {
632            Self::Implicit(e) => e,
633            Self::Explicit(e) => e,
634            Self::BomDetected(e) => e,
635            Self::XmlDetected(e) => e,
636        }
637    }
638    #[inline]
639    const fn can_be_refined(&self) -> bool {
640        match self {
641            Self::Implicit(_) | Self::BomDetected(_) => true,
642            Self::Explicit(_) | Self::XmlDetected(_) => false,
643        }
644    }
645}
646
647////////////////////////////////////////////////////////////////////////////////////////////////////
648
649/// A direct stream to the underlying [`Reader`]s reader which updates
650/// [`Reader::buffer_position()`] when read from it.
651#[derive(Debug)]
652#[must_use = "streams do nothing unless read or polled"]
653pub struct BinaryStream<'r, R> {
654    inner: &'r mut R,
655    offset: &'r mut u64,
656}
657
658impl<'r, R> BinaryStream<'r, R> {
659    /// Returns current position in bytes in the original source.
660    #[inline]
661    pub const fn offset(&self) -> u64 {
662        *self.offset
663    }
664
665    /// Gets a reference to the underlying reader.
666    #[inline]
667    pub const fn get_ref(&self) -> &R {
668        self.inner
669    }
670
671    /// Gets a mutable reference to the underlying reader.
672    ///
673    /// Avoid read from this reader because this will not update reader's position
674    /// and will lead to incorrect positions of errors. Read from this stream instead.
675    #[inline]
676    pub fn get_mut(&mut self) -> &mut R {
677        self.inner
678    }
679}
680
681impl<'r, R> io::Read for BinaryStream<'r, R>
682where
683    R: io::Read,
684{
685    #[inline]
686    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
687        let amt = self.inner.read(buf)?;
688        *self.offset += amt as u64;
689        Ok(amt)
690    }
691}
692
693impl<'r, R> io::BufRead for BinaryStream<'r, R>
694where
695    R: io::BufRead,
696{
697    #[inline]
698    fn fill_buf(&mut self) -> io::Result<&[u8]> {
699        self.inner.fill_buf()
700    }
701
702    #[inline]
703    fn consume(&mut self, amt: usize) {
704        self.inner.consume(amt);
705        *self.offset += amt as u64;
706    }
707}
708
709////////////////////////////////////////////////////////////////////////////////////////////////////
710
711/// A low level encoding-agnostic XML event reader.
712///
713/// Consumes bytes and streams XML [`Event`]s.
714///
715/// This reader does not manage namespace declarations and not able to resolve
716/// prefixes. If you want these features, use the [`NsReader`].
717///
718/// # Examples
719///
720/// ```
721/// use quick_xml::events::Event;
722/// use quick_xml::reader::Reader;
723///
724/// let xml = r#"<tag1 att1 = "test">
725///                 <tag2><!--Test comment-->Test</tag2>
726///                 <tag2>Test 2</tag2>
727///              </tag1>"#;
728/// let mut reader = Reader::from_str(xml);
729/// reader.config_mut().trim_text(true);
730///
731/// let mut count = 0;
732/// let mut txt = Vec::new();
733/// let mut buf = Vec::new();
734///
735/// // The `Reader` does not implement `Iterator` because it outputs borrowed data (`Cow`s)
736/// loop {
737///     // NOTE: this is the generic case when we don't know about the input BufRead.
738///     // when the input is a &str or a &[u8], we don't actually need to use another
739///     // buffer, we could directly call `reader.read_event()`
740///     match reader.read_event_into(&mut buf) {
741///         Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
742///         // exits the loop when reaching end of file
743///         Ok(Event::Eof) => break,
744///
745///         Ok(Event::Start(e)) => {
746///             match e.name().as_ref() {
747///                 b"tag1" => println!("attributes values: {:?}",
748///                                     e.attributes().map(|a| a.unwrap().value)
749///                                     .collect::<Vec<_>>()),
750///                 b"tag2" => count += 1,
751///                 _ => (),
752///             }
753///         }
754///         Ok(Event::Text(e)) => txt.push(e.decode().unwrap().into_owned()),
755///
756///         // There are several other `Event`s we do not consider here
757///         _ => (),
758///     }
759///     // if we don't keep a borrow elsewhere, we can clear the buffer to keep memory usage low
760///     buf.clear();
761/// }
762/// ```
763///
764/// [`NsReader`]: crate::reader::NsReader
765#[derive(Debug, Clone)]
766pub struct Reader<R> {
767    /// Source of data for parse
768    reader: R,
769    /// Configuration and current parse state
770    state: ReaderState,
771}
772
773/// Builder methods
774impl<R> Reader<R> {
775    /// Creates a `Reader` that reads from a given reader.
776    pub fn from_reader(reader: R) -> Self {
777        Self {
778            reader,
779            state: ReaderState::default(),
780        }
781    }
782
783    /// Returns reference to the parser configuration
784    pub const fn config(&self) -> &Config {
785        &self.state.config
786    }
787
788    /// Returns mutable reference to the parser configuration
789    pub fn config_mut(&mut self) -> &mut Config {
790        &mut self.state.config
791    }
792}
793
794/// Getters
795impl<R> Reader<R> {
796    /// Consumes `Reader` returning the underlying reader
797    ///
798    /// Can be used to compute line and column of a parsing error position
799    ///
800    /// # Examples
801    ///
802    /// ```
803    /// # use pretty_assertions::assert_eq;
804    /// use std::{str, io::Cursor};
805    /// use quick_xml::events::Event;
806    /// use quick_xml::reader::Reader;
807    ///
808    /// let xml = r#"<tag1 att1 = "test">
809    ///                 <tag2><!--Test comment-->Test</tag2>
810    ///                 <tag3>Test 2</tag3>
811    ///              </tag1>"#;
812    /// let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes()));
813    /// let mut buf = Vec::new();
814    ///
815    /// fn into_line_and_column(reader: Reader<Cursor<&[u8]>>) -> (usize, usize) {
816    ///     // We known that size cannot exceed usize::MAX because we created parser from single &[u8]
817    ///     let end_pos = reader.buffer_position() as usize;
818    ///     let mut cursor = reader.into_inner();
819    ///     let s = String::from_utf8(cursor.into_inner()[0..end_pos].to_owned())
820    ///         .expect("can't make a string");
821    ///     let mut line = 1;
822    ///     let mut column = 0;
823    ///     for c in s.chars() {
824    ///         if c == '\n' {
825    ///             line += 1;
826    ///             column = 0;
827    ///         } else {
828    ///             column += 1;
829    ///         }
830    ///     }
831    ///     (line, column)
832    /// }
833    ///
834    /// loop {
835    ///     match reader.read_event_into(&mut buf) {
836    ///         Ok(Event::Start(ref e)) => match e.name().as_ref() {
837    ///             b"tag1" | b"tag2" => (),
838    ///             tag => {
839    ///                 assert_eq!(b"tag3", tag);
840    ///                 assert_eq!((3, 22), into_line_and_column(reader));
841    ///                 break;
842    ///             }
843    ///         },
844    ///         Ok(Event::Eof) => unreachable!(),
845    ///         _ => (),
846    ///     }
847    ///     buf.clear();
848    /// }
849    /// ```
850    pub fn into_inner(self) -> R {
851        self.reader
852    }
853
854    /// Gets a reference to the underlying reader.
855    pub const fn get_ref(&self) -> &R {
856        &self.reader
857    }
858
859    /// Gets a mutable reference to the underlying reader.
860    ///
861    /// Avoid read from this reader because this will not update reader's position
862    /// and will lead to incorrect positions of errors. If you want to read, use
863    /// [`stream()`] instead.
864    ///
865    /// [`stream()`]: Self::stream
866    pub fn get_mut(&mut self) -> &mut R {
867        &mut self.reader
868    }
869
870    /// Gets the byte position in the input data just after the last emitted event
871    /// (i.e. this is position where data of last event ends).
872    ///
873    /// Note, that for text events which is originally ended with whitespace characters
874    /// (` `, `\t`, `\r`, and `\n`) if [`Config::trim_text_end`] is set this is position
875    /// before trim, not the position of the last byte of the [`Event::Text`] content.
876    pub const fn buffer_position(&self) -> u64 {
877        self.state.offset
878    }
879
880    /// Gets the last error byte position in the input data. If there is no errors
881    /// yet, returns `0`.
882    ///
883    /// Unlike `buffer_position` it will point to the place where it is rational
884    /// to report error to the end user. For example, all [`SyntaxError`]s are
885    /// reported when the parser sees EOF inside of some kind of markup. The
886    /// `buffer_position()` will point to the last byte of input which is not
887    /// very useful. `error_position()` will point to the start of corresponding
888    /// markup element (i. e. to the `<` character).
889    ///
890    /// This position is always `<= buffer_position()`.
891    pub const fn error_position(&self) -> u64 {
892        self.state.last_error_offset
893    }
894
895    /// Get the decoder, used to decode bytes, read by this reader, to the strings.
896    ///
897    /// If [`encoding`] feature is enabled, the used encoding may change after
898    /// parsing the XML declaration, otherwise encoding is fixed to UTF-8.
899    ///
900    /// If [`encoding`] feature is enabled and no encoding is specified in declaration,
901    /// defaults to UTF-8.
902    ///
903    /// [`encoding`]: ../index.html#encoding
904    #[inline]
905    pub const fn decoder(&self) -> Decoder {
906        self.state.decoder()
907    }
908
909    /// Get the direct access to the underlying reader, but tracks the amount of
910    /// read data and update [`Reader::buffer_position()`] accordingly.
911    ///
912    /// Note, that this method gives you access to the internal reader and read
913    /// data will not be returned in any subsequent events read by `read_event`
914    /// family of methods.
915    ///
916    /// # Example
917    ///
918    /// This example demonstrates how to read stream raw bytes from an XML document.
919    /// This could be used to implement streaming read of text, or to read raw binary
920    /// bytes embedded in an XML document. (Documents with embedded raw bytes are not
921    /// valid XML, but XML-derived file formats exist where such documents are valid).
922    ///
923    /// ```
924    /// # use pretty_assertions::assert_eq;
925    /// use std::io::{BufRead, Read};
926    /// use quick_xml::events::{BytesEnd, BytesStart, Event};
927    /// use quick_xml::reader::Reader;
928    ///
929    /// let mut reader = Reader::from_str("<tag>binary << data&></tag>");
930    /// //                                 ^    ^               ^     ^
931    /// //                                 0    5              21    27
932    ///
933    /// assert_eq!(
934    ///     (reader.read_event().unwrap(), reader.buffer_position()),
935    ///     // 5 - end of the `<tag>`
936    ///     (Event::Start(BytesStart::new("tag")), 5)
937    /// );
938    ///
939    /// // Reading directly from underlying reader will not update position
940    /// // let mut inner = reader.get_mut();
941    ///
942    /// // Reading from the stream() advances position
943    /// let mut inner = reader.stream();
944    ///
945    /// // Read binary data. We must know its size
946    /// let mut binary = [0u8; 16];
947    /// inner.read_exact(&mut binary).unwrap();
948    /// assert_eq!(&binary, b"binary << data&>");
949    /// // 21 - end of the `binary << data&>`
950    /// assert_eq!(inner.offset(), 21);
951    /// assert_eq!(reader.buffer_position(), 21);
952    ///
953    /// assert_eq!(
954    ///     (reader.read_event().unwrap(), reader.buffer_position()),
955    ///     // 27 - end of the `</tag>`
956    ///     (Event::End(BytesEnd::new("tag")), 27)
957    /// );
958    ///
959    /// assert_eq!(reader.read_event().unwrap(), Event::Eof);
960    /// ```
961    #[inline]
962    pub fn stream(&mut self) -> BinaryStream<'_, R> {
963        BinaryStream {
964            inner: &mut self.reader,
965            offset: &mut self.state.offset,
966        }
967    }
968}
969
970/// Private sync reading methods
971impl<R> Reader<R> {
972    /// Read text into the given buffer, and return an event that borrows from
973    /// either that buffer or from the input itself, based on the type of the
974    /// reader.
975    fn read_event_impl<'i, B>(&mut self, mut buf: B) -> Result<Event<'i>, Error>
976    where
977        R: XmlSource<'i, B>,
978    {
979        read_event_impl!(self, buf, self.reader, read_until_close)
980    }
981
982    /// Private function to read until `>` is found. This function expects that
983    /// it was called just after encounter a `<` symbol.
984    fn read_until_close<'i, B>(&mut self, buf: B) -> Result<Event<'i>, Error>
985    where
986        R: XmlSource<'i, B>,
987    {
988        read_until_close!(self, buf, self.reader)
989    }
990}
991
992////////////////////////////////////////////////////////////////////////////////////////////////////
993
994/// Result of an attempt to read XML textual data from the source.
995#[derive(Debug)]
996enum ReadTextResult<'r, B> {
997    /// Start of markup (`<` character) was found in the first byte. `<` was consumed.
998    /// Contains buffer that should be returned back to the next iteration cycle
999    /// to satisfy borrow checker requirements.
1000    Markup(B),
1001    /// Start of reference (`&` character) was found in the first byte.
1002    /// `&` was not consumed.
1003    /// Contains buffer that should be returned back to the next iteration cycle
1004    /// to satisfy borrow checker requirements.
1005    Ref(B),
1006    /// Contains text block up to start of markup (`<` character). `<` was consumed.
1007    UpToMarkup(&'r [u8]),
1008    /// Contains text block up to start of reference (`&` character).
1009    /// `&` was not consumed.
1010    UpToRef(&'r [u8]),
1011    /// Contains text block up to EOF, neither start of markup (`<` character)
1012    /// or start of reference (`&` character) was found.
1013    UpToEof(&'r [u8]),
1014    /// IO error occurred.
1015    Err(io::Error),
1016}
1017
1018/// Result of an attempt to read general reference from the reader.
1019#[derive(Debug)]
1020enum ReadRefResult<'r> {
1021    /// Contains text block up to end of reference (`;` character).
1022    /// Result includes start `&`, but not end `;`.
1023    Ref(&'r [u8]),
1024    /// Contains text block up to EOF. Neither end of reference (`;`), start of
1025    /// another reference (`&`) or start of markup (`<`) characters was found.
1026    /// Result includes start `&`.
1027    UpToEof(&'r [u8]),
1028    /// Contains text block up to next possible reference (`&` character).
1029    /// Result includes start `&`.
1030    UpToRef(&'r [u8]),
1031    /// Contains text block up to start of markup (`<` character).
1032    /// Result includes start `&`.
1033    UpToMarkup(&'r [u8]),
1034    /// IO error occurred.
1035    Err(io::Error),
1036}
1037
1038/// Represents an input for a reader that can return borrowed data.
1039///
1040/// There are two implementors of this trait: generic one that read data from
1041/// `Self`, copies some part of it into a provided buffer of type `B` and then
1042/// returns data that borrow from that buffer.
1043///
1044/// The other implementor is for `&[u8]` and instead of copying data returns
1045/// borrowed data from `Self` instead. This implementation allows zero-copy
1046/// deserialization.
1047///
1048/// # Parameters
1049/// - `'r`: lifetime of a buffer from which events will borrow
1050/// - `B`: a type of a buffer that can be used to store data read from `Self` and
1051///   from which events can borrow
1052trait XmlSource<'r, B> {
1053    /// Removes UTF-8 BOM if it is present
1054    #[cfg(not(feature = "encoding"))]
1055    fn remove_utf8_bom(&mut self) -> io::Result<()>;
1056
1057    /// Determines encoding from the start of input and removes BOM if it is present
1058    #[cfg(feature = "encoding")]
1059    fn detect_encoding(&mut self) -> io::Result<Option<&'static Encoding>>;
1060
1061    /// Read input until start of markup (the `<`) is found, start of general entity
1062    /// reference (the `&`) is found or end of input is reached.
1063    ///
1064    /// # Parameters
1065    /// - `buf`: Buffer that could be filled from an input (`Self`) and
1066    ///   from which [events] could borrow their data
1067    /// - `position`: Will be increased by amount of bytes consumed
1068    ///
1069    /// [events]: crate::events::Event
1070    fn read_text(&mut self, buf: B, position: &mut u64) -> ReadTextResult<'r, B>;
1071
1072    /// Read input until end of general reference (the `;`) is found, start of
1073    /// another general reference (the `&`) is found or end of input is reached.
1074    ///
1075    /// This method must be called when current character is `&`.
1076    ///
1077    /// # Parameters
1078    /// - `buf`: Buffer that could be filled from an input (`Self`) and
1079    ///   from which [events] could borrow their data
1080    /// - `position`: Will be increased by amount of bytes consumed
1081    ///
1082    /// [events]: crate::events::Event
1083    fn read_ref(&mut self, buf: B, position: &mut u64) -> ReadRefResult<'r>;
1084
1085    /// Read input until processing instruction is finished.
1086    ///
1087    /// This method expect that start sequence of a parser already was read.
1088    ///
1089    /// Returns a slice of data read up to the end of the thing being parsed.
1090    /// The end of thing and the returned content is determined by the used parser.
1091    ///
1092    /// If input (`Self`) is exhausted and no bytes was read, or if the specified
1093    /// parser could not find the ending sequence of the thing, returns `SyntaxError`.
1094    ///
1095    /// # Parameters
1096    /// - `buf`: Buffer that could be filled from an input (`Self`) and
1097    ///   from which [events] could borrow their data
1098    /// - `position`: Will be increased by amount of bytes consumed
1099    ///
1100    /// A `P` type parameter is used to preserve state between calls to the underlying
1101    /// reader which provides bytes fed into the parser.
1102    ///
1103    /// [events]: crate::events::Event
1104    fn read_with<P>(&mut self, parser: P, buf: B, position: &mut u64) -> Result<&'r [u8], Error>
1105    where
1106        P: Parser;
1107
1108    /// Read input until comment or CDATA is finished.
1109    ///
1110    /// This method expect that `<` already was read.
1111    ///
1112    /// Returns a slice of data read up to end of comment or CDATA (`>`),
1113    /// which does not include into result.
1114    ///
1115    /// If input (`Self`) is exhausted and nothing was read, returns `None`.
1116    ///
1117    /// # Parameters
1118    /// - `buf`: Buffer that could be filled from an input (`Self`) and
1119    ///   from which [events] could borrow their data
1120    /// - `position`: Will be increased by amount of bytes consumed
1121    ///
1122    /// [events]: crate::events::Event
1123    fn read_bang_element(
1124        &mut self,
1125        buf: B,
1126        position: &mut u64,
1127    ) -> Result<(BangType, &'r [u8]), Error>;
1128
1129    /// Consume and discard all the whitespace until the next non-whitespace
1130    /// character or EOF.
1131    ///
1132    /// # Parameters
1133    /// - `position`: Will be increased by amount of bytes consumed
1134    fn skip_whitespace(&mut self, position: &mut u64) -> io::Result<()>;
1135
1136    /// Return one character without consuming it, so that future `read_*` calls
1137    /// will still include it. On EOF, return `None`.
1138    fn peek_one(&mut self) -> io::Result<Option<u8>>;
1139}
1140
1141/// Possible elements started with `<!`
1142#[derive(Debug, PartialEq)]
1143enum BangType {
1144    /// <![CDATA[...]]>
1145    CData,
1146    /// <!--...-->
1147    Comment,
1148    /// <!DOCTYPE...>. Contains balance of '<' (+1) and '>' (-1)
1149    DocType(DtdParser),
1150}
1151impl BangType {
1152    #[inline(always)]
1153    const fn new(byte: Option<u8>) -> Result<Self, SyntaxError> {
1154        Ok(match byte {
1155            Some(b'[') => Self::CData,
1156            Some(b'-') => Self::Comment,
1157            Some(b'D') | Some(b'd') => Self::DocType(DtdParser::BeforeInternalSubset(0)),
1158            _ => return Err(SyntaxError::InvalidBangMarkup),
1159        })
1160    }
1161
1162    /// If element is finished, returns its content up to `>` symbol and
1163    /// an index of this symbol, otherwise returns `None`
1164    ///
1165    /// # Parameters
1166    /// - `buf`: buffer with data consumed on previous iterations
1167    /// - `chunk`: data read on current iteration and not yet consumed from reader
1168    #[inline(always)]
1169    fn parse<'b>(&mut self, buf: &[u8], chunk: &'b [u8]) -> Option<(&'b [u8], usize)> {
1170        match self {
1171            Self::Comment => {
1172                for i in memchr::memchr_iter(b'>', chunk) {
1173                    // Need to read at least 6 symbols (`!---->`) for properly finished comment
1174                    // <!----> - XML comment
1175                    // 0123456 - i
1176                    if buf.len() + i > 5 {
1177                        if chunk[..i].ends_with(b"--") {
1178                            // We cannot strip last `--` from the buffer because we need it in case of
1179                            // check_comments enabled option. XML standard requires that comment
1180                            // will not end with `--->` sequence because this is a special case of
1181                            // `--` in the comment (https://www.w3.org/TR/xml11/#sec-comments)
1182                            return Some((&chunk[..i], i + 1)); // +1 for `>`
1183                        }
1184                        // End sequence `-|->` was splitted at |
1185                        //        buf --/   \-- chunk
1186                        if i == 1 && buf.ends_with(b"-") && chunk[0] == b'-' {
1187                            return Some((&chunk[..i], i + 1)); // +1 for `>`
1188                        }
1189                        // End sequence `--|>` was splitted at |
1190                        //         buf --/   \-- chunk
1191                        if i == 0 && buf.ends_with(b"--") {
1192                            return Some((&[], i + 1)); // +1 for `>`
1193                        }
1194                    }
1195                }
1196            }
1197            Self::CData => {
1198                for i in memchr::memchr_iter(b'>', chunk) {
1199                    if chunk[..i].ends_with(b"]]") {
1200                        return Some((&chunk[..i], i + 1)); // +1 for `>`
1201                    }
1202                    // End sequence `]|]>` was splitted at |
1203                    //        buf --/   \-- chunk
1204                    if i == 1 && buf.ends_with(b"]") && chunk[0] == b']' {
1205                        return Some((&chunk[..i], i + 1)); // +1 for `>`
1206                    }
1207                    // End sequence `]]|>` was splitted at |
1208                    //         buf --/   \-- chunk
1209                    if i == 0 && buf.ends_with(b"]]") {
1210                        return Some((&[], i + 1)); // +1 for `>`
1211                    }
1212                }
1213            }
1214            Self::DocType(ref mut parser) => return parser.feed(buf, chunk),
1215        }
1216        None
1217    }
1218    #[inline]
1219    const fn to_err(&self) -> SyntaxError {
1220        match self {
1221            Self::CData => SyntaxError::UnclosedCData,
1222            Self::Comment => SyntaxError::UnclosedComment,
1223            Self::DocType(_) => SyntaxError::UnclosedDoctype,
1224        }
1225    }
1226}
1227
1228////////////////////////////////////////////////////////////////////////////////////////////////////
1229
1230#[cfg(test)]
1231mod test {
1232    /// Checks the internal implementation of the various reader methods
1233    macro_rules! check {
1234        (
1235            #[$test:meta]
1236            $read_event:ident,
1237            $read_until_close:ident,
1238            // constructor of the XML source on which internal functions will be called
1239            $source:path,
1240            // constructor of the buffer to which read data will stored
1241            $buf:expr
1242            $(, $async:ident, $await:ident)?
1243        ) => {
1244            mod read_bang_element {
1245                use super::*;
1246                use crate::errors::{Error, SyntaxError};
1247                use crate::reader::{BangType, DtdParser};
1248                use crate::utils::Bytes;
1249
1250                /// Checks that reading CDATA content works correctly
1251                mod cdata {
1252                    use super::*;
1253                    use pretty_assertions::assert_eq;
1254
1255                    /// Checks that if input begins like CDATA element, but CDATA start sequence
1256                    /// is not finished, parsing ends with an error
1257                    #[$test]
1258                    #[ignore = "start CDATA sequence fully checked outside of `read_bang_element`"]
1259                    $($async)? fn not_properly_start() {
1260                        let buf = $buf;
1261                        let mut position = 1;
1262                        let mut input = b"<![]]>other content".as_ref();
1263                        //                ^= 1
1264
1265                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1266                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedCData),
1267                            x => panic!(
1268                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1269                                x
1270                            ),
1271                        }
1272                        assert_eq!(position, 1);
1273                    }
1274
1275                    /// Checks that if CDATA startup sequence was matched, but an end sequence
1276                    /// is not found, parsing ends with an error
1277                    #[$test]
1278                    $($async)? fn not_closed() {
1279                        let buf = $buf;
1280                        let mut position = 0;
1281                        let mut input = b"<![CDATA[other content".as_ref();
1282                        //                ^= 0                  ^= 22
1283
1284                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1285                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedCData),
1286                            x => panic!(
1287                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1288                                x
1289                            ),
1290                        }
1291                        assert_eq!(position, 22);
1292                    }
1293
1294                    /// Checks that CDATA element without content inside parsed successfully
1295                    #[$test]
1296                    $($async)? fn empty() {
1297                        let buf = $buf;
1298                        let mut position = 0;
1299                        let mut input = b"<![CDATA[]]>other content".as_ref();
1300                        //                ^= 0        ^= 12
1301
1302                        let (ty, bytes) = $source(&mut input)
1303                            .read_bang_element(buf, &mut position)
1304                            $(.$await)?
1305                            .unwrap();
1306                        assert_eq!(
1307                            (ty, Bytes(bytes)),
1308                            (BangType::CData, Bytes(b"<![CDATA[]]"))
1309                        );
1310                        assert_eq!(position, 12);
1311                    }
1312
1313                    /// Checks that CDATA element with content parsed successfully.
1314                    /// Additionally checks that sequences inside CDATA that may look like
1315                    /// a CDATA end sequence do not interrupt CDATA parsing
1316                    #[$test]
1317                    $($async)? fn with_content() {
1318                        let buf = $buf;
1319                        let mut position = 0;
1320                        let mut input = b"<![CDATA[cdata]] ]>content]]>other content]]>".as_ref();
1321                        //                ^= 0                         ^= 29
1322
1323                        let (ty, bytes) = $source(&mut input)
1324                            .read_bang_element(buf, &mut position)
1325                            $(.$await)?
1326                            .unwrap();
1327                        assert_eq!(
1328                            (ty, Bytes(bytes)),
1329                            (BangType::CData, Bytes(b"<![CDATA[cdata]] ]>content]]"))
1330                        );
1331                        assert_eq!(position, 29);
1332                    }
1333                }
1334
1335                /// Checks that reading XML comments works correctly. According to the [specification],
1336                /// comment data can contain any sequence except `--`:
1337                ///
1338                /// ```peg
1339                /// comment = '<--' (!'--' char)* '-->';
1340                /// char = [#x1-#x2C]
1341                ///      / [#x2E-#xD7FF]
1342                ///      / [#xE000-#xFFFD]
1343                ///      / [#x10000-#x10FFFF]
1344                /// ```
1345                ///
1346                /// The presence of this limitation, however, is simply a poorly designed specification
1347                /// (maybe for purpose of building of LL(1) XML parser) and quick-xml does not check for
1348                /// presence of these sequences by default. This tests allow such content.
1349                ///
1350                /// [specification]: https://www.w3.org/TR/xml11/#dt-comment
1351                mod comment {
1352                    use super::*;
1353                    use pretty_assertions::assert_eq;
1354
1355                    #[$test]
1356                    #[ignore = "start comment sequence fully checked outside of `read_bang_element`"]
1357                    $($async)? fn not_properly_start() {
1358                        let buf = $buf;
1359                        let mut position = 1;
1360                        let mut input = b"<!- -->other content".as_ref();
1361                        //                ^= 1
1362
1363                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1364                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1365                            x => panic!(
1366                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1367                                x
1368                            ),
1369                        }
1370                        assert_eq!(position, 1);
1371                    }
1372
1373                    #[$test]
1374                    $($async)? fn not_properly_end() {
1375                        let buf = $buf;
1376                        let mut position = 0;
1377                        let mut input = b"<!->other content".as_ref();
1378                        //                ^= 0             ^= 17
1379
1380                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1381                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1382                            x => panic!(
1383                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1384                                x
1385                            ),
1386                        }
1387                        assert_eq!(position, 17);
1388                    }
1389
1390                    #[$test]
1391                    $($async)? fn not_closed1() {
1392                        let buf = $buf;
1393                        let mut position = 0;
1394                        let mut input = b"<!--other content".as_ref();
1395                        //                ^= 0             ^= 17
1396
1397                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1398                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1399                            x => panic!(
1400                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1401                                x
1402                            ),
1403                        }
1404                        assert_eq!(position, 17);
1405                    }
1406
1407                    #[$test]
1408                    $($async)? fn not_closed2() {
1409                        let buf = $buf;
1410                        let mut position = 0;
1411                        let mut input = b"<!-->other content".as_ref();
1412                        //                ^= 0              ^= 18
1413
1414                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1415                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1416                            x => panic!(
1417                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1418                                x
1419                            ),
1420                        }
1421                        assert_eq!(position, 18);
1422                    }
1423
1424                    #[$test]
1425                    $($async)? fn not_closed3() {
1426                        let buf = $buf;
1427                        let mut position = 0;
1428                        let mut input = b"<!--->other content".as_ref();
1429                        //                ^= 0               ^= 19
1430
1431                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1432                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1433                            x => panic!(
1434                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1435                                x
1436                            ),
1437                        }
1438                        assert_eq!(position, 19);
1439                    }
1440
1441                    #[$test]
1442                    $($async)? fn empty() {
1443                        let buf = $buf;
1444                        let mut position = 0;
1445                        let mut input = b"<!---->other content".as_ref();
1446                        //                ^= 0   ^= 7
1447
1448                        let (ty, bytes) = $source(&mut input)
1449                            .read_bang_element(buf, &mut position)
1450                            $(.$await)?
1451                            .unwrap();
1452                        assert_eq!(
1453                            (ty, Bytes(bytes)),
1454                            (BangType::Comment, Bytes(b"<!----"))
1455                        );
1456                        assert_eq!(position, 7);
1457                    }
1458
1459                    #[$test]
1460                    $($async)? fn with_content() {
1461                        let buf = $buf;
1462                        let mut position = 0;
1463                        let mut input = b"<!--->comment<--->other content".as_ref();
1464                        //                ^= 0              ^= 18
1465
1466                        let (ty, bytes) = $source(&mut input)
1467                            .read_bang_element(buf, &mut position)
1468                            $(.$await)?
1469                            .unwrap();
1470                        assert_eq!(
1471                            (ty, Bytes(bytes)),
1472                            (BangType::Comment, Bytes(b"<!--->comment<---"))
1473                        );
1474                        assert_eq!(position, 18);
1475                    }
1476                }
1477
1478                /// Checks that reading DOCTYPE definition works correctly
1479                mod doctype {
1480                    use super::*;
1481
1482                    mod uppercase {
1483                        use super::*;
1484                        use pretty_assertions::assert_eq;
1485
1486                        #[$test]
1487                        $($async)? fn not_properly_start() {
1488                            let buf = $buf;
1489                            let mut position = 0;
1490                            let mut input = b"<!D other content".as_ref();
1491                            //                ^= 0             ^= 17
1492
1493                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1494                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1495                                x => panic!(
1496                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1497                                    x
1498                                ),
1499                            }
1500                            assert_eq!(position, 17);
1501                        }
1502
1503                        #[$test]
1504                        $($async)? fn without_space() {
1505                            let buf = $buf;
1506                            let mut position = 0;
1507                            let mut input = b"<!DOCTYPEother content".as_ref();
1508                            //                ^= 0                  ^= 22
1509
1510                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1511                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1512                                x => panic!(
1513                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1514                                    x
1515                                ),
1516                            }
1517                            assert_eq!(position, 22);
1518                        }
1519
1520                        #[$test]
1521                        $($async)? fn empty() {
1522                            let buf = $buf;
1523                            let mut position = 0;
1524                            let mut input = b"<!DOCTYPE>other content".as_ref();
1525                            //                ^= 0      ^= 10
1526
1527                            let (ty, bytes) = $source(&mut input)
1528                                .read_bang_element(buf, &mut position)
1529                                $(.$await)?
1530                                .unwrap();
1531                            assert_eq!(
1532                                (ty, Bytes(bytes)),
1533                                (BangType::DocType(DtdParser::Finished), Bytes(b"<!DOCTYPE"))
1534                            );
1535                            assert_eq!(position, 10);
1536                        }
1537
1538                        #[$test]
1539                        $($async)? fn not_closed() {
1540                            let buf = $buf;
1541                            let mut position = 0;
1542                            let mut input = b"<!DOCTYPE other content".as_ref();
1543                            //                ^= 0                   ^23
1544
1545                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1546                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1547                                x => panic!(
1548                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1549                                    x
1550                                ),
1551                            }
1552                            assert_eq!(position, 23);
1553                        }
1554                    }
1555
1556                    mod lowercase {
1557                        use super::*;
1558                        use pretty_assertions::assert_eq;
1559
1560                        #[$test]
1561                        $($async)? fn not_properly_start() {
1562                            let buf = $buf;
1563                            let mut position = 0;
1564                            let mut input = b"<!d other content".as_ref();
1565                            //                ^= 0             ^= 17
1566
1567                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1568                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1569                                x => panic!(
1570                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1571                                    x
1572                                ),
1573                            }
1574                            assert_eq!(position, 17);
1575                        }
1576
1577                        #[$test]
1578                        $($async)? fn without_space() {
1579                            let buf = $buf;
1580                            let mut position = 0;
1581                            let mut input = b"<!doctypeother content".as_ref();
1582                            //                ^= 0                  ^= 22
1583
1584                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1585                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1586                                x => panic!(
1587                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1588                                    x
1589                                ),
1590                            }
1591                            assert_eq!(position, 22);
1592                        }
1593
1594                        #[$test]
1595                        $($async)? fn empty() {
1596                            let buf = $buf;
1597                            let mut position = 0;
1598                            let mut input = b"<!doctype>other content".as_ref();
1599                            //                ^= 0      ^= 10
1600
1601                            let (ty, bytes) = $source(&mut input)
1602                                .read_bang_element(buf, &mut position)
1603                                $(.$await)?
1604                                .unwrap();
1605                            assert_eq!(
1606                                (ty, Bytes(bytes)),
1607                                (BangType::DocType(DtdParser::Finished), Bytes(b"<!doctype"))
1608                            );
1609                            assert_eq!(position, 10);
1610                        }
1611
1612                        #[$test]
1613                        $($async)? fn not_closed() {
1614                            let buf = $buf;
1615                            let mut position = 0;
1616                            let mut input = b"<!doctype other content".as_ref();
1617                            //                ^= 0                   ^= 23
1618
1619                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1620                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1621                                x => panic!(
1622                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1623                                    x
1624                                ),
1625                            }
1626                            assert_eq!(position, 23);
1627                        }
1628                    }
1629                }
1630            }
1631
1632            mod read_text {
1633                use super::*;
1634                use crate::reader::ReadTextResult;
1635                use crate::utils::Bytes;
1636                use pretty_assertions::assert_eq;
1637
1638                #[$test]
1639                $($async)? fn empty() {
1640                    let buf = $buf;
1641                    let mut position = 1;
1642                    let mut input = b"".as_ref();
1643                    //                ^= 1
1644
1645                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1646                        ReadTextResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"")),
1647                        x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x),
1648                    }
1649                    assert_eq!(position, 1);
1650                }
1651
1652                #[$test]
1653                $($async)? fn markup() {
1654                    let buf = $buf;
1655                    let mut position = 1;
1656                    let mut input = b"<".as_ref();
1657                    //                 ^= 1
1658
1659                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1660                        ReadTextResult::Markup(b) => assert_eq!(b, $buf),
1661                        x => panic!("Expected `Markup(_)`, but got `{:?}`", x),
1662                    }
1663                    assert_eq!(position, 1);
1664                }
1665
1666                #[$test]
1667                $($async)? fn ref_() {
1668                    let buf = $buf;
1669                    let mut position = 1;
1670                    let mut input = b"&".as_ref();
1671                    //                ^= 1
1672
1673                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1674                        ReadTextResult::Ref(b) => assert_eq!(b, $buf),
1675                        x => panic!("Expected `Ref(_)`, but got `{:?}`", x),
1676                    }
1677                    assert_eq!(position, 1);
1678                }
1679
1680                #[$test]
1681                $($async)? fn up_to_markup() {
1682                    let buf = $buf;
1683                    let mut position = 1;
1684                    let mut input = b"a<".as_ref();
1685                    //                  ^= 2
1686
1687                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1688                        ReadTextResult::UpToMarkup(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")),
1689                        x => panic!("Expected `UpToMarkup(_)`, but got `{:?}`", x),
1690                    }
1691                    assert_eq!(position, 2);
1692                }
1693
1694                #[$test]
1695                $($async)? fn up_to_ref() {
1696                    let buf = $buf;
1697                    let mut position = 1;
1698                    let mut input = b"a&".as_ref();
1699                    //                 ^= 2
1700
1701                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1702                        ReadTextResult::UpToRef(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")),
1703                        x => panic!("Expected `UpToRef(_)`, but got `{:?}`", x),
1704                    }
1705                    assert_eq!(position, 2);
1706                }
1707
1708                #[$test]
1709                $($async)? fn up_to_eof() {
1710                    let buf = $buf;
1711                    let mut position = 1;
1712                    let mut input = b"a".as_ref();
1713                    //                 ^= 2
1714
1715                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1716                        ReadTextResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")),
1717                        x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x),
1718                    }
1719                    assert_eq!(position, 2);
1720                }
1721            }
1722
1723            mod read_ref {
1724                use super::*;
1725                use crate::reader::ReadRefResult;
1726                use crate::utils::Bytes;
1727                use pretty_assertions::assert_eq;
1728
1729                // Empty input is not allowed for `read_ref` so not tested.
1730                // Borrowed source triggers debug assertion,
1731                // buffered do nothing due to implementation details.
1732
1733                #[$test]
1734                $($async)? fn up_to_eof() {
1735                    let buf = $buf;
1736                    let mut position = 1;
1737                    let mut input = b"&".as_ref();
1738                    //                 ^= 2
1739
1740                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1741                        ReadRefResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
1742                        x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x),
1743                    }
1744                    assert_eq!(position, 2);
1745                }
1746
1747                #[$test]
1748                $($async)? fn up_to_ref() {
1749                    let buf = $buf;
1750                    let mut position = 1;
1751                    let mut input = b"&&".as_ref();
1752                    //                 ^= 2
1753
1754                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1755                        ReadRefResult::UpToRef(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
1756                        x => panic!("Expected `UpToRef(_)`, but got `{:?}`", x),
1757                    }
1758                    assert_eq!(position, 2);
1759                }
1760
1761                #[$test]
1762                $($async)? fn up_to_markup() {
1763                    let buf = $buf;
1764                    let mut position = 1;
1765                    let mut input = b"&<".as_ref();
1766                    //                 ^= 2
1767
1768                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1769                        ReadRefResult::UpToMarkup(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
1770                        x => panic!("Expected `UpToMarkup(_)`, but got `{:?}`", x),
1771                    }
1772                    assert_eq!(position, 2);
1773                }
1774
1775                #[$test]
1776                $($async)? fn empty_ref() {
1777                    let buf = $buf;
1778                    let mut position = 1;
1779                    let mut input = b"&;".as_ref();
1780                    //                  ^= 3
1781
1782                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1783                        ReadRefResult::Ref(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
1784                        x => panic!("Expected `Ref(_)`, but got `{:?}`", x),
1785                    }
1786                    assert_eq!(position, 3);
1787                }
1788
1789                #[$test]
1790                $($async)? fn normal() {
1791                    let buf = $buf;
1792                    let mut position = 1;
1793                    let mut input = b"&lt;".as_ref();
1794                    //                    ^= 5
1795
1796                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1797                        ReadRefResult::Ref(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&lt")),
1798                        x => panic!("Expected `Ref(_)`, but got `{:?}`", x),
1799                    }
1800                    assert_eq!(position, 5);
1801                }
1802            }
1803
1804            mod read_element {
1805                use super::*;
1806                use crate::errors::{Error, SyntaxError};
1807                use crate::parser::ElementParser;
1808                use crate::utils::Bytes;
1809                use pretty_assertions::assert_eq;
1810
1811                /// Checks that nothing was read from empty buffer
1812                #[$test]
1813                $($async)? fn empty() {
1814                    let buf = $buf;
1815                    let mut position = 1;
1816                    let mut input = b"".as_ref();
1817                    //                ^= 1
1818
1819                    match $source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? {
1820                        Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedTag),
1821                        x => panic!(
1822                            "Expected `Err(Syntax(_))`, but got `{:?}`",
1823                            x
1824                        ),
1825                    }
1826                    assert_eq!(position, 1);
1827                }
1828
1829                mod open {
1830                    use super::*;
1831                    use pretty_assertions::assert_eq;
1832
1833                    #[$test]
1834                    $($async)? fn empty_tag() {
1835                        let buf = $buf;
1836                        let mut position = 1;
1837                        let mut input = b">".as_ref();
1838                        //                 ^= 2
1839
1840                        assert_eq!(
1841                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1842                            Bytes(b"")
1843                        );
1844                        assert_eq!(position, 2);
1845                    }
1846
1847                    #[$test]
1848                    $($async)? fn normal() {
1849                        let buf = $buf;
1850                        let mut position = 1;
1851                        let mut input = b"tag>".as_ref();
1852                        //                    ^= 5
1853
1854                        assert_eq!(
1855                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1856                            Bytes(b"tag")
1857                        );
1858                        assert_eq!(position, 5);
1859                    }
1860
1861                    #[$test]
1862                    $($async)? fn empty_ns_empty_tag() {
1863                        let buf = $buf;
1864                        let mut position = 1;
1865                        let mut input = b":>".as_ref();
1866                        //                  ^= 3
1867
1868                        assert_eq!(
1869                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1870                            Bytes(b":")
1871                        );
1872                        assert_eq!(position, 3);
1873                    }
1874
1875                    #[$test]
1876                    $($async)? fn empty_ns() {
1877                        let buf = $buf;
1878                        let mut position = 1;
1879                        let mut input = b":tag>".as_ref();
1880                        //                     ^= 6
1881
1882                        assert_eq!(
1883                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1884                            Bytes(b":tag")
1885                        );
1886                        assert_eq!(position, 6);
1887                    }
1888
1889                    #[$test]
1890                    $($async)? fn with_attributes() {
1891                        let buf = $buf;
1892                        let mut position = 1;
1893                        let mut input = br#"tag  attr-1=">"  attr2  =  '>'  3attr>"#.as_ref();
1894                        //                                                        ^= 39
1895
1896                        assert_eq!(
1897                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1898                            Bytes(br#"tag  attr-1=">"  attr2  =  '>'  3attr"#)
1899                        );
1900                        assert_eq!(position, 39);
1901                    }
1902                }
1903
1904                mod self_closed {
1905                    use super::*;
1906                    use pretty_assertions::assert_eq;
1907
1908                    #[$test]
1909                    $($async)? fn empty_tag() {
1910                        let buf = $buf;
1911                        let mut position = 1;
1912                        let mut input = b"/>".as_ref();
1913                        //                  ^= 3
1914
1915                        assert_eq!(
1916                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1917                            Bytes(b"/")
1918                        );
1919                        assert_eq!(position, 3);
1920                    }
1921
1922                    #[$test]
1923                    $($async)? fn normal() {
1924                        let buf = $buf;
1925                        let mut position = 1;
1926                        let mut input = b"tag/>".as_ref();
1927                        //                     ^= 6
1928
1929                        assert_eq!(
1930                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1931                            Bytes(b"tag/")
1932                        );
1933                        assert_eq!(position, 6);
1934                    }
1935
1936                    #[$test]
1937                    $($async)? fn empty_ns_empty_tag() {
1938                        let buf = $buf;
1939                        let mut position = 1;
1940                        let mut input = b":/>".as_ref();
1941                        //                   ^= 4
1942
1943                        assert_eq!(
1944                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1945                            Bytes(b":/")
1946                        );
1947                        assert_eq!(position, 4);
1948                    }
1949
1950                    #[$test]
1951                    $($async)? fn empty_ns() {
1952                        let buf = $buf;
1953                        let mut position = 1;
1954                        let mut input = b":tag/>".as_ref();
1955                        //                      ^= 7
1956
1957                        assert_eq!(
1958                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1959                            Bytes(b":tag/")
1960                        );
1961                        assert_eq!(position, 7);
1962                    }
1963
1964                    #[$test]
1965                    $($async)? fn with_attributes() {
1966                        let buf = $buf;
1967                        let mut position = 1;
1968                        let mut input = br#"tag  attr-1="/>"  attr2  =  '/>'  3attr/>"#.as_ref();
1969                        //                                                           ^= 42
1970
1971                        assert_eq!(
1972                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1973                            Bytes(br#"tag  attr-1="/>"  attr2  =  '/>'  3attr/"#)
1974                        );
1975                        assert_eq!(position, 42);
1976                    }
1977                }
1978
1979                mod close {
1980                    use super::*;
1981                    use pretty_assertions::assert_eq;
1982
1983                    #[$test]
1984                    $($async)? fn empty_tag() {
1985                        let buf = $buf;
1986                        let mut position = 1;
1987                        let mut input = b"/ >".as_ref();
1988                        //                   ^= 4
1989
1990                        assert_eq!(
1991                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1992                            Bytes(b"/ ")
1993                        );
1994                        assert_eq!(position, 4);
1995                    }
1996
1997                    #[$test]
1998                    $($async)? fn normal() {
1999                        let buf = $buf;
2000                        let mut position = 1;
2001                        let mut input = b"/tag>".as_ref();
2002                        //                     ^= 6
2003
2004                        assert_eq!(
2005                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2006                            Bytes(b"/tag")
2007                        );
2008                        assert_eq!(position, 6);
2009                    }
2010
2011                    #[$test]
2012                    $($async)? fn empty_ns_empty_tag() {
2013                        let buf = $buf;
2014                        let mut position = 1;
2015                        let mut input = b"/:>".as_ref();
2016                        //                   ^= 4
2017
2018                        assert_eq!(
2019                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2020                            Bytes(b"/:")
2021                        );
2022                        assert_eq!(position, 4);
2023                    }
2024
2025                    #[$test]
2026                    $($async)? fn empty_ns() {
2027                        let buf = $buf;
2028                        let mut position = 1;
2029                        let mut input = b"/:tag>".as_ref();
2030                        //                      ^= 7
2031
2032                        assert_eq!(
2033                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2034                            Bytes(b"/:tag")
2035                        );
2036                        assert_eq!(position, 7);
2037                    }
2038
2039                    #[$test]
2040                    $($async)? fn with_attributes() {
2041                        let buf = $buf;
2042                        let mut position = 1;
2043                        let mut input = br#"/tag  attr-1=">"  attr2  =  '>'  3attr>"#.as_ref();
2044                        //                                                         ^= 40
2045
2046                        assert_eq!(
2047                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2048                            Bytes(br#"/tag  attr-1=">"  attr2  =  '>'  3attr"#)
2049                        );
2050                        assert_eq!(position, 40);
2051                    }
2052                }
2053            }
2054
2055            /// Ensures, that no empty `Text` events are generated
2056            mod $read_event {
2057                use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesPI, BytesStart, BytesText, Event};
2058                use crate::reader::Reader;
2059                use pretty_assertions::assert_eq;
2060
2061                /// When `encoding` feature is enabled, encoding should be detected
2062                /// from BOM (UTF-8) and BOM should be stripped.
2063                ///
2064                /// When `encoding` feature is disabled, UTF-8 is assumed and BOM
2065                /// character should be stripped for consistency
2066                #[$test]
2067                $($async)? fn bom_from_reader() {
2068                    let mut reader = Reader::from_reader("\u{feff}\u{feff}".as_bytes());
2069
2070                    assert_eq!(
2071                        reader.$read_event($buf) $(.$await)? .unwrap(),
2072                        Event::Text(BytesText::from_escaped("\u{feff}"))
2073                    );
2074
2075                    assert_eq!(
2076                        reader.$read_event($buf) $(.$await)? .unwrap(),
2077                        Event::Eof
2078                    );
2079                }
2080
2081                /// When parsing from &str, encoding is fixed (UTF-8), so
2082                /// - when `encoding` feature is disabled, the behavior the
2083                ///   same as in `bom_from_reader` text
2084                /// - when `encoding` feature is enabled, the behavior should
2085                ///   stay consistent, so the first BOM character is stripped
2086                #[$test]
2087                $($async)? fn bom_from_str() {
2088                    let mut reader = Reader::from_str("\u{feff}\u{feff}");
2089
2090                    assert_eq!(
2091                        reader.$read_event($buf) $(.$await)? .unwrap(),
2092                        Event::Text(BytesText::from_escaped("\u{feff}"))
2093                    );
2094
2095                    assert_eq!(
2096                        reader.$read_event($buf) $(.$await)? .unwrap(),
2097                        Event::Eof
2098                    );
2099                }
2100
2101                #[$test]
2102                $($async)? fn declaration() {
2103                    let mut reader = Reader::from_str("<?xml ?>");
2104
2105                    assert_eq!(
2106                        reader.$read_event($buf) $(.$await)? .unwrap(),
2107                        Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3)))
2108                    );
2109                }
2110
2111                #[$test]
2112                $($async)? fn doctype() {
2113                    let mut reader = Reader::from_str("<!DOCTYPE x>");
2114
2115                    assert_eq!(
2116                        reader.$read_event($buf) $(.$await)? .unwrap(),
2117                        Event::DocType(BytesText::from_escaped("x"))
2118                    );
2119                }
2120
2121                #[$test]
2122                $($async)? fn processing_instruction() {
2123                    let mut reader = Reader::from_str("<?xml-stylesheet '? >\" ?>");
2124
2125                    assert_eq!(
2126                        reader.$read_event($buf) $(.$await)? .unwrap(),
2127                        Event::PI(BytesPI::new("xml-stylesheet '? >\" "))
2128                    );
2129                }
2130
2131                /// Lone closing tags are not allowed, so testing it together with start tag
2132                #[$test]
2133                $($async)? fn start_and_end() {
2134                    let mut reader = Reader::from_str("<tag></tag>");
2135
2136                    assert_eq!(
2137                        reader.$read_event($buf) $(.$await)? .unwrap(),
2138                        Event::Start(BytesStart::new("tag"))
2139                    );
2140
2141                    assert_eq!(
2142                        reader.$read_event($buf) $(.$await)? .unwrap(),
2143                        Event::End(BytesEnd::new("tag"))
2144                    );
2145                }
2146
2147                #[$test]
2148                $($async)? fn empty() {
2149                    let mut reader = Reader::from_str("<tag/>");
2150
2151                    assert_eq!(
2152                        reader.$read_event($buf) $(.$await)? .unwrap(),
2153                        Event::Empty(BytesStart::new("tag"))
2154                    );
2155                }
2156
2157                #[$test]
2158                $($async)? fn text() {
2159                    let mut reader = Reader::from_str("text");
2160
2161                    assert_eq!(
2162                        reader.$read_event($buf) $(.$await)? .unwrap(),
2163                        Event::Text(BytesText::from_escaped("text"))
2164                    );
2165                }
2166
2167                #[$test]
2168                $($async)? fn cdata() {
2169                    let mut reader = Reader::from_str("<![CDATA[]]>");
2170
2171                    assert_eq!(
2172                        reader.$read_event($buf) $(.$await)? .unwrap(),
2173                        Event::CData(BytesCData::new(""))
2174                    );
2175                }
2176
2177                #[$test]
2178                $($async)? fn comment() {
2179                    let mut reader = Reader::from_str("<!---->");
2180
2181                    assert_eq!(
2182                        reader.$read_event($buf) $(.$await)? .unwrap(),
2183                        Event::Comment(BytesText::from_escaped(""))
2184                    );
2185                }
2186
2187                #[$test]
2188                $($async)? fn eof() {
2189                    let mut reader = Reader::from_str("");
2190
2191                    assert_eq!(
2192                        reader.$read_event($buf) $(.$await)? .unwrap(),
2193                        Event::Eof
2194                    );
2195                }
2196            }
2197        };
2198    }
2199
2200    // Export macros for the child modules:
2201    // - buffered_reader
2202    // - slice_reader
2203    pub(super) use check;
2204}