quick_xml/reader/
mod.rs

1//! Contains high-level interface for a pull-based XML parser.
2
3#[cfg(feature = "encoding")]
4use encoding_rs::Encoding;
5use std::io;
6use std::ops::Range;
7
8use crate::encoding::Decoder;
9#[cfg(feature = "encoding")]
10use crate::encoding::DetectedEncoding;
11use crate::errors::{Error, IllFormedError, SyntaxError};
12use crate::events::{BytesRef, Event};
13use crate::parser::{DtdParser, ElementParser, Parser, PiParser};
14use crate::reader::state::ReaderState;
15
16/// A struct that holds a parser configuration.
17///
18/// Current parser configuration can be retrieved by calling [`Reader::config()`]
19/// and changed by changing properties of the object returned by a call to
20/// [`Reader::config_mut()`].
21///
22/// [`Reader::config()`]: crate::reader::Reader::config
23/// [`Reader::config_mut()`]: crate::reader::Reader::config_mut
24#[derive(Debug, Clone, PartialEq, Eq)]
25#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
26#[cfg_attr(feature = "serde-types", derive(serde::Deserialize, serde::Serialize))]
27#[non_exhaustive]
28pub struct Config {
29    /// Whether lone ampersand character (without a paired semicolon) should be
30    /// allowed in textual content. Unless enabled, in case of a dangling ampersand,
31    /// the [`Error::IllFormed(UnclosedReference)`] is returned from read methods.
32    ///
33    /// Default: `false`
34    ///
35    /// # Example
36    ///
37    /// ```
38    /// # use quick_xml::events::{BytesRef, BytesText, Event};
39    /// # use quick_xml::reader::Reader;
40    /// # use pretty_assertions::assert_eq;
41    /// let mut reader = Reader::from_str("text with & &amp; & alone");
42    /// reader.config_mut().allow_dangling_amp = true;
43    ///
44    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::new("text with ")));
45    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::from_escaped("& ")));
46    /// assert_eq!(reader.read_event().unwrap(), Event::GeneralRef(BytesRef::new("amp")));
47    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::new(" ")));
48    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::from_escaped("& alone")));
49    /// assert_eq!(reader.read_event().unwrap(), Event::Eof);
50    /// ```
51    ///
52    /// [`Error::IllFormed(UnclosedReference)`]: crate::errors::IllFormedError::UnclosedReference
53    pub allow_dangling_amp: bool,
54
55    /// Whether unmatched closing tag names should be allowed. Unless enabled,
56    /// in case of a dangling end tag, the [`Error::IllFormed(UnmatchedEndTag)`]
57    /// is returned from read methods.
58    ///
59    /// When set to `true`, it won't check if a closing tag has a corresponding
60    /// opening tag at all. For example, `<a></a></b>` will be permitted.
61    ///
62    /// Note that the emitted [`End`] event will not be modified if this is enabled,
63    /// ie. it will contain the data of the unmatched end tag.
64    ///
65    /// Note, that setting this to `true` will lead to additional allocates that
66    /// needed to store tag name for an [`End`] event.
67    ///
68    /// Default: `false`
69    ///
70    /// [`Error::IllFormed(UnmatchedEndTag)`]: crate::errors::IllFormedError::UnmatchedEndTag
71    /// [`End`]: crate::events::Event::End
72    pub allow_unmatched_ends: bool,
73
74    /// Whether comments should be validated. If enabled, in case of invalid comment
75    /// [`Error::IllFormed(DoubleHyphenInComment)`] is returned from read methods.
76    ///
77    /// When set to `true`, every [`Comment`] event will be checked for not
78    /// containing `--`, which [is not allowed] in XML comments. Most of the time
79    /// we don't want comments at all so we don't really care about comment
80    /// correctness, thus the default value is `false` to improve performance.
81    ///
82    /// Default: `false`
83    ///
84    /// [`Error::IllFormed(DoubleHyphenInComment)`]: crate::errors::IllFormedError::DoubleHyphenInComment
85    /// [`Comment`]: crate::events::Event::Comment
86    /// [is not allowed]: https://www.w3.org/TR/xml11/#sec-comments
87    pub check_comments: bool,
88
89    /// Whether mismatched closing tag names should be detected. If enabled, in
90    /// case of mismatch the [`Error::IllFormed(MismatchedEndTag)`] is returned from
91    /// read methods.
92    ///
93    /// Note, that start and end tags [should match literally][spec], they cannot
94    /// have different prefixes even if both prefixes resolve to the same namespace.
95    /// The XML
96    ///
97    /// ```xml
98    /// <outer xmlns="namespace" xmlns:p="namespace">
99    /// </p:outer>
100    /// ```
101    ///
102    /// is not valid, even though semantically the start tag is the same as the
103    /// end tag. The reason is that namespaces are an extension of the original
104    /// XML specification (without namespaces) and it should be backward-compatible.
105    ///
106    /// When set to `false`, it won't check if a closing tag matches the corresponding
107    /// opening tag. For example, `<mytag></different_tag>` will be permitted.
108    ///
109    /// If the XML is known to be sane (already processed, etc.) this saves extra time.
110    ///
111    /// Note that the emitted [`End`] event will not be modified if this is disabled,
112    /// ie. it will contain the data of the mismatched end tag.
113    ///
114    /// Note, that setting this to `true` will lead to additional allocates that
115    /// needed to store tag name for an [`End`] event. However if [`expand_empty_elements`]
116    /// is also set, only one additional allocation will be performed that support
117    /// both these options.
118    ///
119    /// Default: `true`
120    ///
121    /// [`Error::IllFormed(MismatchedEndTag)`]: crate::errors::IllFormedError::MismatchedEndTag
122    /// [spec]: https://www.w3.org/TR/xml11/#dt-etag
123    /// [`End`]: crate::events::Event::End
124    /// [`expand_empty_elements`]: Self::expand_empty_elements
125    pub check_end_names: bool,
126
127    /// Whether empty elements should be split into an `Open` and a `Close` event.
128    ///
129    /// When set to `true`, all [`Empty`] events produced by a self-closing tag
130    /// like `<tag/>` are expanded into a [`Start`] event followed by an [`End`]
131    /// event. When set to `false` (the default), those tags are represented by
132    /// an [`Empty`] event instead.
133    ///
134    /// Note, that setting this to `true` will lead to additional allocates that
135    /// needed to store tag name for an [`End`] event. However if [`check_end_names`]
136    /// is also set, only one additional allocation will be performed that support
137    /// both these options.
138    ///
139    /// Default: `false`
140    ///
141    /// [`Empty`]: crate::events::Event::Empty
142    /// [`Start`]: crate::events::Event::Start
143    /// [`End`]: crate::events::Event::End
144    /// [`check_end_names`]: Self::check_end_names
145    pub expand_empty_elements: bool,
146
147    /// Whether trailing whitespace after the markup name are trimmed in closing
148    /// tags `</a >`.
149    ///
150    /// If `true` the emitted [`End`] event is stripped of trailing whitespace
151    /// after the markup name.
152    ///
153    /// Note that if set to `false` and [`check_end_names`] is `true` the comparison
154    /// of markup names is going to fail erroneously if a closing tag contains
155    /// trailing whitespace.
156    ///
157    /// Default: `true`
158    ///
159    /// [`End`]: crate::events::Event::End
160    /// [`check_end_names`]: Self::check_end_names
161    pub trim_markup_names_in_closing_tags: bool,
162
163    /// Whether whitespace before character data should be removed.
164    ///
165    /// When set to `true`, leading whitespace is trimmed in [`Text`] events.
166    /// If after that the event is empty it will not be pushed.
167    ///
168    /// Default: `false`
169    ///
170    /// <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
171    ///
172    /// WARNING: With this option every text events will be trimmed which is
173    /// incorrect behavior when text events delimited by comments, processing
174    /// instructions or CDATA sections. To correctly trim data manually apply
175    /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`]
176    /// only to necessary events.
177    /// </div>
178    ///
179    /// [`Text`]: crate::events::Event::Text
180    /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start
181    /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end
182    pub trim_text_start: bool,
183
184    /// Whether whitespace after character data should be removed.
185    ///
186    /// When set to `true`, trailing whitespace is trimmed in [`Text`] events.
187    /// If after that the event is empty it will not be pushed.
188    ///
189    /// Default: `false`
190    ///
191    /// <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
192    ///
193    /// WARNING: With this option every text events will be trimmed which is
194    /// incorrect behavior when text events delimited by comments, processing
195    /// instructions or CDATA sections. To correctly trim data manually apply
196    /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`]
197    /// only to necessary events.
198    /// </div>
199    ///
200    /// [`Text`]: crate::events::Event::Text
201    /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start
202    /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end
203    pub trim_text_end: bool,
204}
205
206impl Config {
207    /// Set both [`trim_text_start`] and [`trim_text_end`] to the same value.
208    ///
209    /// <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
210    ///
211    /// WARNING: With this option every text events will be trimmed which is
212    /// incorrect behavior when text events delimited by comments, processing
213    /// instructions or CDATA sections. To correctly trim data manually apply
214    /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`]
215    /// only to necessary events.
216    /// </div>
217    ///
218    /// [`trim_text_start`]: Self::trim_text_start
219    /// [`trim_text_end`]: Self::trim_text_end
220    /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start
221    /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end
222    #[inline]
223    pub fn trim_text(&mut self, trim: bool) {
224        self.trim_text_start = trim;
225        self.trim_text_end = trim;
226    }
227
228    /// Turn on or off all checks for well-formedness. Currently it is that settings:
229    /// - [`check_comments`](Self::check_comments)
230    /// - [`check_end_names`](Self::check_end_names)
231    #[inline]
232    pub fn enable_all_checks(&mut self, enable: bool) {
233        self.check_comments = enable;
234        self.check_end_names = enable;
235    }
236}
237
238impl Default for Config {
239    fn default() -> Self {
240        Self {
241            allow_dangling_amp: false,
242            allow_unmatched_ends: false,
243            check_comments: false,
244            check_end_names: true,
245            expand_empty_elements: false,
246            trim_markup_names_in_closing_tags: true,
247            trim_text_start: false,
248            trim_text_end: false,
249        }
250    }
251}
252
253////////////////////////////////////////////////////////////////////////////////////////////////////
254
255macro_rules! read_event_impl {
256    (
257        $self:ident, $buf:ident,
258        $reader:expr,
259        $read_until_close:ident
260        $(, $await:ident)?
261    ) => {{
262        let event = loop {
263            break match $self.state.state {
264                ParseState::Init => { // Go to InsideText state
265                    // If encoding set explicitly, we not need to detect it. For example,
266                    // explicit UTF-8 set automatically if Reader was created using `from_str`.
267                    // But we still need to remove BOM for consistency with no encoding
268                    // feature enabled path
269                    #[cfg(feature = "encoding")]
270                    if let Some(encoding) = $reader.detect_encoding() $(.$await)? ? {
271                        if $self.state.encoding.can_be_refined() {
272                            $self.state.encoding = crate::reader::EncodingRef::BomDetected(encoding.encoding());
273                        }
274                    }
275
276                    // Removes UTF-8 BOM if it is present
277                    #[cfg(not(feature = "encoding"))]
278                    $reader.remove_utf8_bom() $(.$await)? ?;
279
280                    $self.state.state = ParseState::InsideText;
281                    continue;
282                },
283                ParseState::InsideRef => { // Go to InsideText
284                    let start = $self.state.offset;
285                    match $reader.read_ref($buf, &mut $self.state.offset) $(.$await)? {
286                        // Emit reference, go to InsideText state
287                        ReadRefResult::Ref(bytes) => {
288                            $self.state.state = ParseState::InsideText;
289                            // +1 to skip start `&`
290                            // -1 to skip end `;`
291                            Ok(Event::GeneralRef(BytesRef::wrap(&bytes[1..bytes.len() - 1], $self.decoder())))
292                        }
293                        // Go to Done state
294                        ReadRefResult::UpToEof(bytes) if $self.state.config.allow_dangling_amp => {
295                            $self.state.state = ParseState::Done;
296                            Ok(Event::Text($self.state.emit_text(bytes)))
297                        }
298                        ReadRefResult::UpToEof(_) => {
299                            $self.state.state = ParseState::Done;
300                            $self.state.last_error_offset = start;
301                            Err(Error::IllFormed(IllFormedError::UnclosedReference))
302                        }
303                        // Do not change state, stay in InsideRef
304                        ReadRefResult::UpToRef(bytes) if $self.state.config.allow_dangling_amp => {
305                            Ok(Event::Text($self.state.emit_text(bytes)))
306                        }
307                        ReadRefResult::UpToRef(_) => {
308                            $self.state.last_error_offset = start;
309                            Err(Error::IllFormed(IllFormedError::UnclosedReference))
310                        }
311                        // Go to InsideMarkup state
312                        ReadRefResult::UpToMarkup(bytes) if $self.state.config.allow_dangling_amp => {
313                            $self.state.state = ParseState::InsideMarkup;
314                            Ok(Event::Text($self.state.emit_text(bytes)))
315                        }
316                        ReadRefResult::UpToMarkup(_) => {
317                            $self.state.state = ParseState::InsideMarkup;
318                            $self.state.last_error_offset = start;
319                            Err(Error::IllFormed(IllFormedError::UnclosedReference))
320                        }
321                        ReadRefResult::Err(e) => Err(Error::from(e)),
322                    }
323                }
324                ParseState::InsideText => { // Go to InsideMarkup or Done state
325                    if $self.state.config.trim_text_start {
326                        $reader.skip_whitespace(&mut $self.state.offset) $(.$await)? ?;
327                    }
328
329                    match $reader.read_text($buf, &mut $self.state.offset) $(.$await)? {
330                        ReadTextResult::Markup(buf) => {
331                            $self.state.state = ParseState::InsideMarkup;
332                            // Pass `buf` to the next next iteration of parsing loop
333                            $buf = buf;
334                            continue;
335                        }
336                        ReadTextResult::Ref(buf) => {
337                            $self.state.state = ParseState::InsideRef;
338                            // Pass `buf` to the next next iteration of parsing loop
339                            $buf = buf;
340                            continue;
341                        }
342                        ReadTextResult::UpToMarkup(bytes) => {
343                            $self.state.state = ParseState::InsideMarkup;
344                            // FIXME: Can produce an empty event if:
345                            // - event contains only spaces
346                            // - trim_text_start = false
347                            // - trim_text_end = true
348                            Ok(Event::Text($self.state.emit_text(bytes)))
349                        }
350                        ReadTextResult::UpToRef(bytes) => {
351                            $self.state.state = ParseState::InsideRef;
352                            // Return Text event with `bytes` content or Eof if bytes is empty
353                            Ok(Event::Text($self.state.emit_text(bytes)))
354                        }
355                        ReadTextResult::UpToEof(bytes) => {
356                            $self.state.state = ParseState::Done;
357                            // Trim bytes from end if required
358                            let event = $self.state.emit_text(bytes);
359                            if event.is_empty() {
360                                Ok(Event::Eof)
361                            } else {
362                                Ok(Event::Text(event))
363                            }
364                        }
365                        ReadTextResult::Err(e) => Err(Error::from(e)),
366                    }
367                },
368                // Go to InsideText state in next two arms
369                ParseState::InsideMarkup => $self.$read_until_close($buf) $(.$await)?,
370                ParseState::InsideEmpty => Ok(Event::End($self.state.close_expanded_empty())),
371                ParseState::Done => Ok(Event::Eof),
372            };
373        };
374        match event {
375            // #513: In case of ill-formed errors we already consume the wrong data
376            // and change the state. We can continue parsing if we wish
377            Err(Error::IllFormed(_)) => {}
378            Err(_) | Ok(Event::Eof) => $self.state.state = ParseState::Done,
379            _ => {}
380        }
381        event
382    }};
383}
384
385/// Read bytes up to the `>` and skip it. This method is expected to be called
386/// after seeing the `<` symbol and skipping it. Inspects the next (current)
387/// symbol and returns an appropriate [`Event`]:
388///
389/// |Symbol |Event
390/// |-------|-------------------------------------
391/// |`!`    |[`Comment`], [`CData`] or [`DocType`]
392/// |`/`    |[`End`]
393/// |`?`    |[`PI`]
394/// |_other_|[`Start`] or [`Empty`]
395///
396/// Moves parser to the `InsideText` state.
397///
398/// [`Comment`]: Event::Comment
399/// [`CData`]: Event::CData
400/// [`DocType`]: Event::DocType
401/// [`End`]: Event::End
402/// [`PI`]: Event::PI
403/// [`Start`]: Event::Start
404/// [`Empty`]: Event::Empty
405macro_rules! read_until_close {
406    (
407        $self:ident, $buf:ident,
408        $reader:expr
409        $(, $await:ident)?
410    ) => {{
411        $self.state.state = ParseState::InsideText;
412
413        let start = $self.state.offset;
414        match $reader.peek_one() $(.$await)? {
415            // `<!` - comment, CDATA or DOCTYPE declaration
416            Ok(Some(b'!')) => match $reader
417                .read_bang_element($buf, &mut $self.state.offset)
418                $(.$await)?
419            {
420                Ok((bang_type, bytes)) => $self.state.emit_bang(bang_type, bytes),
421                Err(e) => {
422                    // We want to report error at `<`
423                    $self.state.last_error_offset = start;
424                    Err(e)
425                }
426            },
427            // `</` - closing tag
428            // #776: We parse using ElementParser which allows us to have attributes
429            // in close tags. While such tags are not allowed by the specification,
430            // we anyway allow to parse them because:
431            // - we do not check constraints during parsing. This is performed by the
432            //   optional validate step which user should call manually
433            // - if we just look for `>` we will parse `</tag attr=">" >` as end tag
434            //   `</tag attr=">` and text `" >` which probably no one existing parser
435            //   does. This is malformed XML, however it is tolerated by some parsers
436            //   (e.g. the one used by Adobe Flash) and such documents do exist in the wild.
437            Ok(Some(b'/')) => match $reader
438                .read_with(ElementParser::Outside, $buf, &mut $self.state.offset)
439                $(.$await)?
440            {
441                Ok(bytes) => $self.state.emit_end(bytes),
442                Err(e) => {
443                    // We want to report error at `<`
444                    $self.state.last_error_offset = start;
445                    Err(e)
446                }
447            },
448            // `<?` - processing instruction
449            Ok(Some(b'?')) => match $reader
450                .read_with(PiParser(false), $buf, &mut $self.state.offset)
451                $(.$await)?
452            {
453                Ok(bytes) => $self.state.emit_question_mark(bytes),
454                Err(e) => {
455                    // We want to report error at `<`
456                    $self.state.last_error_offset = start;
457                    Err(e)
458                }
459            },
460            // `<...` - opening or self-closed tag
461            Ok(Some(_)) => match $reader
462                .read_with(ElementParser::Outside, $buf, &mut $self.state.offset)
463                $(.$await)?
464            {
465                Ok(bytes) => Ok($self.state.emit_start(bytes)),
466                Err(e) => {
467                    // We want to report error at `<`
468                    $self.state.last_error_offset = start;
469                    Err(e)
470                }
471            },
472            // `<` - syntax error, tag not closed
473            Ok(None) => {
474                // We want to report error at `<`
475                $self.state.last_error_offset = start;
476                Err(Error::Syntax(SyntaxError::UnclosedTag))
477            }
478            Err(e) => Err(Error::from(e)),
479        }
480    }};
481}
482
483/// Generalization of `read_to_end` method for buffered and borrowed readers
484macro_rules! read_to_end {
485    (
486        // $self: &mut Reader
487        $self:expr, $end:expr, $buf:expr,
488        $read_event:ident,
489        // Code block that performs clearing of internal buffer after read of each event
490        $clear:block
491        $(, $await:ident)?
492    ) => {{
493        // Because we take position after the event before the End event,
494        // it is important that this position indicates beginning of the End event.
495        // If between last event and the End event would be only spaces, then we
496        // take position before the spaces, but spaces would be skipped without
497        // generating event if `trim_text_start` is set to `true`. To prevent that
498        // we temporary disable start text trimming.
499        //
500        // We also cannot take position after getting End event, because if
501        // `trim_markup_names_in_closing_tags` is set to `true` (which is the default),
502        // we do not known the real size of the End event that it is occupies in
503        // the source and cannot correct the position after the End event.
504        // So, we in any case should tweak parser configuration.
505        let config = $self.config_mut();
506        let trim = config.trim_text_start;
507        config.trim_text_start = false;
508
509        let start = $self.buffer_position();
510        let mut depth = 0;
511        loop {
512            $clear
513            let end = $self.buffer_position();
514            match $self.$read_event($buf) $(.$await)? {
515                Err(e) => {
516                    $self.config_mut().trim_text_start = trim;
517                    return Err(e);
518                }
519
520                Ok(Event::Start(e)) if e.name() == $end => depth += 1,
521                Ok(Event::End(e)) if e.name() == $end => {
522                    if depth == 0 {
523                        $self.config_mut().trim_text_start = trim;
524                        break start..end;
525                    }
526                    depth -= 1;
527                }
528                Ok(Event::Eof) => {
529                    $self.config_mut().trim_text_start = trim;
530                    return Err(Error::missed_end($end, $self.decoder()));
531                }
532                _ => (),
533            }
534        }
535    }};
536}
537
538#[cfg(feature = "async-tokio")]
539mod async_tokio;
540mod buffered_reader;
541mod ns_reader;
542mod slice_reader;
543mod state;
544
545pub use ns_reader::NsReader;
546
547/// Range of input in bytes, that corresponds to some piece of XML
548pub type Span = Range<u64>;
549
550////////////////////////////////////////////////////////////////////////////////////////////////////
551
552/// Possible reader states. The state transition diagram (`true` and `false` shows
553/// value of [`Config::expand_empty_elements`] option):
554///
555/// ```mermaid
556/// flowchart LR
557///   subgraph _
558///     direction LR
559///
560///     Init         -- "(no event)"\n                                       --> InsideMarkup
561///     InsideMarkup -- Decl, DocType, PI\nComment, CData\nStart, Empty, End --> InsideText
562///     InsideText   -- "#lt;false#gt;\n(no event)"\nText                    --> InsideMarkup
563///     InsideRef    -- "(no event)"\nGeneralRef                             --> InsideText
564///   end
565///   InsideText     -- "#lt;true#gt;"\nStart --> InsideEmpty
566///   InsideEmpty    -- End                   --> InsideText
567///   _ -. Eof .-> Done
568/// ```
569#[derive(Clone, Debug)]
570enum ParseState {
571    /// Initial state in which reader stay after creation. Transition from that
572    /// state could produce a `Text`, `Decl`, `Comment` or `Start` event. The next
573    /// state is always `InsideMarkup`. The reader will never return to this state. The
574    /// event emitted during transition to `InsideMarkup` is a `StartEvent` if the
575    /// first symbol not `<`, otherwise no event are emitted.
576    Init,
577    /// State after seeing the `&` symbol in textual content. Depending on the next symbol all other
578    /// events could be generated.
579    ///
580    /// After generating one event the reader moves to the `ClosedTag` state.
581    InsideRef,
582    /// State after seeing the `<` symbol. Depending on the next symbol all other
583    /// events could be generated.
584    ///
585    /// After generating one event the reader moves to the `InsideText` state.
586    InsideMarkup,
587    /// State in which reader searches the `<` symbol of a markup. All bytes before
588    /// that symbol will be returned in the [`Event::Text`] event. After that
589    /// the reader moves to the `InsideMarkup` state.
590    InsideText,
591    /// This state is used only if option [`expand_empty_elements`] is set to `true`.
592    /// Reader enters to this state when it is in a `InsideText` state and emits an
593    /// [`Event::Start`] event. The next event emitted will be an [`Event::End`],
594    /// after which reader returned to the `InsideText` state.
595    ///
596    /// [`expand_empty_elements`]: Config::expand_empty_elements
597    InsideEmpty,
598    /// Reader enters this state when `Eof` event generated or an error occurred.
599    /// This is the last state, the reader stay in it forever.
600    Done,
601}
602
603/// A reference to an encoding together with information about how it was retrieved.
604///
605/// The state transition diagram:
606///
607/// ```mermaid
608/// flowchart LR
609///   Implicit    -- from_str       --> Explicit
610///   Implicit    -- BOM            --> BomDetected
611///   Implicit    -- "encoding=..." --> XmlDetected
612///   BomDetected -- "encoding=..." --> XmlDetected
613/// ```
614#[cfg(feature = "encoding")]
615#[derive(Clone, Copy, Debug)]
616enum EncodingRef {
617    /// Encoding was implicitly assumed to have a specified value. It can be refined
618    /// using BOM or by the XML declaration event (`<?xml encoding=... ?>`)
619    Implicit(&'static Encoding),
620    /// Encoding was explicitly set to the desired value. It cannot be changed
621    /// nor by BOM, nor by parsing XML declaration (`<?xml encoding=... ?>`)
622    Explicit(&'static Encoding),
623    /// Encoding was detected from a byte order mark (BOM) or by the first bytes
624    /// of the content. It can be refined by the XML declaration event (`<?xml encoding=... ?>`)
625    BomDetected(&'static Encoding),
626    /// Encoding was detected using XML declaration event (`<?xml encoding=... ?>`).
627    /// It can no longer change
628    XmlDetected(&'static Encoding),
629}
630#[cfg(feature = "encoding")]
631impl EncodingRef {
632    #[inline]
633    const fn encoding(&self) -> &'static Encoding {
634        match self {
635            Self::Implicit(e) => e,
636            Self::Explicit(e) => e,
637            Self::BomDetected(e) => e,
638            Self::XmlDetected(e) => e,
639        }
640    }
641    #[inline]
642    const fn can_be_refined(&self) -> bool {
643        match self {
644            Self::Implicit(_) | Self::BomDetected(_) => true,
645            Self::Explicit(_) | Self::XmlDetected(_) => false,
646        }
647    }
648}
649
650////////////////////////////////////////////////////////////////////////////////////////////////////
651
652/// A direct stream to the underlying [`Reader`]s reader which updates
653/// [`Reader::buffer_position()`] when read from it.
654#[derive(Debug)]
655#[must_use = "streams do nothing unless read or polled"]
656pub struct BinaryStream<'r, R> {
657    inner: &'r mut R,
658    offset: &'r mut u64,
659}
660
661impl<'r, R> BinaryStream<'r, R> {
662    /// Returns current position in bytes in the original source.
663    #[inline]
664    pub const fn offset(&self) -> u64 {
665        *self.offset
666    }
667
668    /// Gets a reference to the underlying reader.
669    #[inline]
670    pub const fn get_ref(&self) -> &R {
671        self.inner
672    }
673
674    /// Gets a mutable reference to the underlying reader.
675    ///
676    /// Avoid read from this reader because this will not update reader's position
677    /// and will lead to incorrect positions of errors. Read from this stream instead.
678    #[inline]
679    pub fn get_mut(&mut self) -> &mut R {
680        self.inner
681    }
682}
683
684impl<'r, R> io::Read for BinaryStream<'r, R>
685where
686    R: io::Read,
687{
688    #[inline]
689    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
690        let amt = self.inner.read(buf)?;
691        *self.offset += amt as u64;
692        Ok(amt)
693    }
694}
695
696impl<'r, R> io::BufRead for BinaryStream<'r, R>
697where
698    R: io::BufRead,
699{
700    #[inline]
701    fn fill_buf(&mut self) -> io::Result<&[u8]> {
702        self.inner.fill_buf()
703    }
704
705    #[inline]
706    fn consume(&mut self, amt: usize) {
707        self.inner.consume(amt);
708        *self.offset += amt as u64;
709    }
710}
711
712////////////////////////////////////////////////////////////////////////////////////////////////////
713
714/// A low level encoding-agnostic XML event reader.
715///
716/// Consumes bytes and streams XML [`Event`]s.
717///
718/// This reader does not manage namespace declarations and not able to resolve
719/// prefixes. If you want these features, use the [`NsReader`].
720///
721/// # Examples
722///
723/// ```
724/// use quick_xml::events::Event;
725/// use quick_xml::reader::Reader;
726///
727/// let xml = r#"<tag1 att1 = "test">
728///                 <tag2><!--Test comment-->Test</tag2>
729///                 <tag2>Test 2</tag2>
730///              </tag1>"#;
731/// let mut reader = Reader::from_str(xml);
732/// reader.config_mut().trim_text(true);
733///
734/// let mut count = 0;
735/// let mut txt = Vec::new();
736/// let mut buf = Vec::new();
737///
738/// // The `Reader` does not implement `Iterator` because it outputs borrowed data (`Cow`s)
739/// loop {
740///     // NOTE: this is the generic case when we don't know about the input BufRead.
741///     // when the input is a &str or a &[u8], we don't actually need to use another
742///     // buffer, we could directly call `reader.read_event()`
743///     match reader.read_event_into(&mut buf) {
744///         Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
745///         // exits the loop when reaching end of file
746///         Ok(Event::Eof) => break,
747///
748///         Ok(Event::Start(e)) => {
749///             match e.name().as_ref() {
750///                 b"tag1" => println!("attributes values: {:?}",
751///                                     e.attributes().map(|a| a.unwrap().value)
752///                                     .collect::<Vec<_>>()),
753///                 b"tag2" => count += 1,
754///                 _ => (),
755///             }
756///         }
757///         Ok(Event::Text(e)) => txt.push(e.decode().unwrap().into_owned()),
758///
759///         // There are several other `Event`s we do not consider here
760///         _ => (),
761///     }
762///     // if we don't keep a borrow elsewhere, we can clear the buffer to keep memory usage low
763///     buf.clear();
764/// }
765/// ```
766///
767/// [`NsReader`]: crate::reader::NsReader
768#[derive(Debug, Clone)]
769pub struct Reader<R> {
770    /// Source of data for parse
771    reader: R,
772    /// Configuration and current parse state
773    state: ReaderState,
774}
775
776/// Builder methods
777impl<R> Reader<R> {
778    /// Creates a `Reader` that reads from a given reader.
779    pub fn from_reader(reader: R) -> Self {
780        Self {
781            reader,
782            state: ReaderState::default(),
783        }
784    }
785
786    /// Returns reference to the parser configuration
787    pub const fn config(&self) -> &Config {
788        &self.state.config
789    }
790
791    /// Returns mutable reference to the parser configuration
792    pub fn config_mut(&mut self) -> &mut Config {
793        &mut self.state.config
794    }
795}
796
797/// Getters
798impl<R> Reader<R> {
799    /// Consumes `Reader` returning the underlying reader
800    ///
801    /// Can be used to compute line and column of a parsing error position
802    ///
803    /// # Examples
804    ///
805    /// ```
806    /// # use pretty_assertions::assert_eq;
807    /// use std::{str, io::Cursor};
808    /// use quick_xml::events::Event;
809    /// use quick_xml::reader::Reader;
810    ///
811    /// let xml = r#"<tag1 att1 = "test">
812    ///                 <tag2><!--Test comment-->Test</tag2>
813    ///                 <tag3>Test 2</tag3>
814    ///              </tag1>"#;
815    /// let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes()));
816    /// let mut buf = Vec::new();
817    ///
818    /// fn into_line_and_column(reader: Reader<Cursor<&[u8]>>) -> (usize, usize) {
819    ///     // We known that size cannot exceed usize::MAX because we created parser from single &[u8]
820    ///     let end_pos = reader.buffer_position() as usize;
821    ///     let mut cursor = reader.into_inner();
822    ///     let s = String::from_utf8(cursor.into_inner()[0..end_pos].to_owned())
823    ///         .expect("can't make a string");
824    ///     let mut line = 1;
825    ///     let mut column = 0;
826    ///     for c in s.chars() {
827    ///         if c == '\n' {
828    ///             line += 1;
829    ///             column = 0;
830    ///         } else {
831    ///             column += 1;
832    ///         }
833    ///     }
834    ///     (line, column)
835    /// }
836    ///
837    /// loop {
838    ///     match reader.read_event_into(&mut buf) {
839    ///         Ok(Event::Start(ref e)) => match e.name().as_ref() {
840    ///             b"tag1" | b"tag2" => (),
841    ///             tag => {
842    ///                 assert_eq!(b"tag3", tag);
843    ///                 assert_eq!((3, 22), into_line_and_column(reader));
844    ///                 break;
845    ///             }
846    ///         },
847    ///         Ok(Event::Eof) => unreachable!(),
848    ///         _ => (),
849    ///     }
850    ///     buf.clear();
851    /// }
852    /// ```
853    pub fn into_inner(self) -> R {
854        self.reader
855    }
856
857    /// Gets a reference to the underlying reader.
858    pub const fn get_ref(&self) -> &R {
859        &self.reader
860    }
861
862    /// Gets a mutable reference to the underlying reader.
863    ///
864    /// Avoid read from this reader because this will not update reader's position
865    /// and will lead to incorrect positions of errors. If you want to read, use
866    /// [`stream()`] instead.
867    ///
868    /// [`stream()`]: Self::stream
869    pub fn get_mut(&mut self) -> &mut R {
870        &mut self.reader
871    }
872
873    /// Gets the byte position in the input data just after the last emitted event
874    /// (i.e. this is position where data of last event ends).
875    ///
876    /// Note, that for text events which is originally ended with whitespace characters
877    /// (` `, `\t`, `\r`, and `\n`) if [`Config::trim_text_end`] is set this is position
878    /// before trim, not the position of the last byte of the [`Event::Text`] content.
879    pub const fn buffer_position(&self) -> u64 {
880        self.state.offset
881    }
882
883    /// Gets the last error byte position in the input data. If there is no errors
884    /// yet, returns `0`.
885    ///
886    /// Unlike `buffer_position` it will point to the place where it is rational
887    /// to report error to the end user. For example, all [`SyntaxError`]s are
888    /// reported when the parser sees EOF inside of some kind of markup. The
889    /// `buffer_position()` will point to the last byte of input which is not
890    /// very useful. `error_position()` will point to the start of corresponding
891    /// markup element (i. e. to the `<` character).
892    ///
893    /// This position is always `<= buffer_position()`.
894    pub const fn error_position(&self) -> u64 {
895        self.state.last_error_offset
896    }
897
898    /// Get the decoder, used to decode bytes, read by this reader, to the strings.
899    ///
900    /// If [`encoding`] feature is enabled, the used encoding may change after
901    /// parsing the XML declaration, otherwise encoding is fixed to UTF-8.
902    ///
903    /// If [`encoding`] feature is enabled and no encoding is specified in declaration,
904    /// defaults to UTF-8.
905    ///
906    /// [`encoding`]: ../index.html#encoding
907    #[inline]
908    pub const fn decoder(&self) -> Decoder {
909        self.state.decoder()
910    }
911
912    /// Get the direct access to the underlying reader, but tracks the amount of
913    /// read data and update [`Reader::buffer_position()`] accordingly.
914    ///
915    /// Note, that this method gives you access to the internal reader and read
916    /// data will not be returned in any subsequent events read by `read_event`
917    /// family of methods.
918    ///
919    /// # Example
920    ///
921    /// This example demonstrates how to read stream raw bytes from an XML document.
922    /// This could be used to implement streaming read of text, or to read raw binary
923    /// bytes embedded in an XML document. (Documents with embedded raw bytes are not
924    /// valid XML, but XML-derived file formats exist where such documents are valid).
925    ///
926    /// ```
927    /// # use pretty_assertions::assert_eq;
928    /// use std::io::{BufRead, Read};
929    /// use quick_xml::events::{BytesEnd, BytesStart, Event};
930    /// use quick_xml::reader::Reader;
931    ///
932    /// let mut reader = Reader::from_str("<tag>binary << data&></tag>");
933    /// //                                 ^    ^               ^     ^
934    /// //                                 0    5              21    27
935    ///
936    /// assert_eq!(
937    ///     (reader.read_event().unwrap(), reader.buffer_position()),
938    ///     // 5 - end of the `<tag>`
939    ///     (Event::Start(BytesStart::new("tag")), 5)
940    /// );
941    ///
942    /// // Reading directly from underlying reader will not update position
943    /// // let mut inner = reader.get_mut();
944    ///
945    /// // Reading from the stream() advances position
946    /// let mut inner = reader.stream();
947    ///
948    /// // Read binary data. We must know its size
949    /// let mut binary = [0u8; 16];
950    /// inner.read_exact(&mut binary).unwrap();
951    /// assert_eq!(&binary, b"binary << data&>");
952    /// // 21 - end of the `binary << data&>`
953    /// assert_eq!(inner.offset(), 21);
954    /// assert_eq!(reader.buffer_position(), 21);
955    ///
956    /// assert_eq!(
957    ///     (reader.read_event().unwrap(), reader.buffer_position()),
958    ///     // 27 - end of the `</tag>`
959    ///     (Event::End(BytesEnd::new("tag")), 27)
960    /// );
961    ///
962    /// assert_eq!(reader.read_event().unwrap(), Event::Eof);
963    /// ```
964    #[inline]
965    pub fn stream(&mut self) -> BinaryStream<'_, R> {
966        BinaryStream {
967            inner: &mut self.reader,
968            offset: &mut self.state.offset,
969        }
970    }
971}
972
973/// Private sync reading methods
974impl<R> Reader<R> {
975    /// Read text into the given buffer, and return an event that borrows from
976    /// either that buffer or from the input itself, based on the type of the
977    /// reader.
978    fn read_event_impl<'i, B>(&mut self, mut buf: B) -> Result<Event<'i>, Error>
979    where
980        R: XmlSource<'i, B>,
981    {
982        read_event_impl!(self, buf, self.reader, read_until_close)
983    }
984
985    /// Private function to read until `>` is found. This function expects that
986    /// it was called just after encounter a `<` symbol.
987    fn read_until_close<'i, B>(&mut self, buf: B) -> Result<Event<'i>, Error>
988    where
989        R: XmlSource<'i, B>,
990    {
991        read_until_close!(self, buf, self.reader)
992    }
993}
994
995////////////////////////////////////////////////////////////////////////////////////////////////////
996
997/// Result of an attempt to read XML textual data from the source.
998#[derive(Debug)]
999enum ReadTextResult<'r, B> {
1000    /// Start of markup (`<` character) was found in the first byte. `<` was consumed.
1001    /// Contains buffer that should be returned back to the next iteration cycle
1002    /// to satisfy borrow checker requirements.
1003    Markup(B),
1004    /// Start of reference (`&` character) was found in the first byte.
1005    /// `&` was not consumed.
1006    /// Contains buffer that should be returned back to the next iteration cycle
1007    /// to satisfy borrow checker requirements.
1008    Ref(B),
1009    /// Contains text block up to start of markup (`<` character). `<` was consumed.
1010    UpToMarkup(&'r [u8]),
1011    /// Contains text block up to start of reference (`&` character).
1012    /// `&` was not consumed.
1013    UpToRef(&'r [u8]),
1014    /// Contains text block up to EOF, neither start of markup (`<` character)
1015    /// or start of reference (`&` character) was found.
1016    UpToEof(&'r [u8]),
1017    /// IO error occurred.
1018    Err(io::Error),
1019}
1020
1021/// Result of an attempt to read general reference from the reader.
1022#[derive(Debug)]
1023enum ReadRefResult<'r> {
1024    /// Contains text block up to end of reference (`;` character).
1025    /// Result includes start `&`, but not end `;`.
1026    Ref(&'r [u8]),
1027    /// Contains text block up to EOF. Neither end of reference (`;`), start of
1028    /// another reference (`&`) or start of markup (`<`) characters was found.
1029    /// Result includes start `&`.
1030    UpToEof(&'r [u8]),
1031    /// Contains text block up to next possible reference (`&` character).
1032    /// Result includes start `&`.
1033    UpToRef(&'r [u8]),
1034    /// Contains text block up to start of markup (`<` character).
1035    /// Result includes start `&`.
1036    UpToMarkup(&'r [u8]),
1037    /// IO error occurred.
1038    Err(io::Error),
1039}
1040
1041/// Represents an input for a reader that can return borrowed data.
1042///
1043/// There are two implementors of this trait: generic one that read data from
1044/// `Self`, copies some part of it into a provided buffer of type `B` and then
1045/// returns data that borrow from that buffer.
1046///
1047/// The other implementor is for `&[u8]` and instead of copying data returns
1048/// borrowed data from `Self` instead. This implementation allows zero-copy
1049/// deserialization.
1050///
1051/// # Parameters
1052/// - `'r`: lifetime of a buffer from which events will borrow
1053/// - `B`: a type of a buffer that can be used to store data read from `Self` and
1054///   from which events can borrow
1055trait XmlSource<'r, B> {
1056    /// Removes UTF-8 BOM if it is present
1057    #[cfg(not(feature = "encoding"))]
1058    fn remove_utf8_bom(&mut self) -> io::Result<()>;
1059
1060    /// Determines encoding from the start of input and removes BOM if it is present
1061    #[cfg(feature = "encoding")]
1062    fn detect_encoding(&mut self) -> io::Result<Option<DetectedEncoding>>;
1063
1064    /// Read input until start of markup (the `<`) is found, start of general entity
1065    /// reference (the `&`) is found or end of input is reached.
1066    ///
1067    /// # Parameters
1068    /// - `buf`: Buffer that could be filled from an input (`Self`) and
1069    ///   from which [events] could borrow their data
1070    /// - `position`: Will be increased by amount of bytes consumed
1071    ///
1072    /// [events]: crate::events::Event
1073    fn read_text(&mut self, buf: B, position: &mut u64) -> ReadTextResult<'r, B>;
1074
1075    /// Read input until end of general reference (the `;`) is found, start of
1076    /// another general reference (the `&`) is found or end of input is reached.
1077    ///
1078    /// This method must be called when current character is `&`.
1079    ///
1080    /// # Parameters
1081    /// - `buf`: Buffer that could be filled from an input (`Self`) and
1082    ///   from which [events] could borrow their data
1083    /// - `position`: Will be increased by amount of bytes consumed
1084    ///
1085    /// [events]: crate::events::Event
1086    fn read_ref(&mut self, buf: B, position: &mut u64) -> ReadRefResult<'r>;
1087
1088    /// Read input until processing instruction is finished.
1089    ///
1090    /// This method expect that start sequence of a parser already was read.
1091    ///
1092    /// Returns a slice of data read up to the end of the thing being parsed.
1093    /// The end of thing and the returned content is determined by the used parser.
1094    ///
1095    /// If input (`Self`) is exhausted and no bytes was read, or if the specified
1096    /// parser could not find the ending sequence of the thing, returns `SyntaxError`.
1097    ///
1098    /// # Parameters
1099    /// - `buf`: Buffer that could be filled from an input (`Self`) and
1100    ///   from which [events] could borrow their data
1101    /// - `position`: Will be increased by amount of bytes consumed
1102    ///
1103    /// A `P` type parameter is used to preserve state between calls to the underlying
1104    /// reader which provides bytes fed into the parser.
1105    ///
1106    /// [events]: crate::events::Event
1107    fn read_with<P>(&mut self, parser: P, buf: B, position: &mut u64) -> Result<&'r [u8], Error>
1108    where
1109        P: Parser;
1110
1111    /// Read input until comment or CDATA is finished.
1112    ///
1113    /// This method expect that `<` already was read.
1114    ///
1115    /// Returns a slice of data read up to end of comment or CDATA (`>`),
1116    /// which does not include into result.
1117    ///
1118    /// If input (`Self`) is exhausted and nothing was read, returns `None`.
1119    ///
1120    /// # Parameters
1121    /// - `buf`: Buffer that could be filled from an input (`Self`) and
1122    ///   from which [events] could borrow their data
1123    /// - `position`: Will be increased by amount of bytes consumed
1124    ///
1125    /// [events]: crate::events::Event
1126    fn read_bang_element(
1127        &mut self,
1128        buf: B,
1129        position: &mut u64,
1130    ) -> Result<(BangType, &'r [u8]), Error>;
1131
1132    /// Consume and discard all the whitespace until the next non-whitespace
1133    /// character or EOF.
1134    ///
1135    /// # Parameters
1136    /// - `position`: Will be increased by amount of bytes consumed
1137    fn skip_whitespace(&mut self, position: &mut u64) -> io::Result<()>;
1138
1139    /// Return one character without consuming it, so that future `read_*` calls
1140    /// will still include it. On EOF, return `None`.
1141    fn peek_one(&mut self) -> io::Result<Option<u8>>;
1142}
1143
1144/// Possible elements started with `<!`
1145#[derive(Debug, PartialEq)]
1146enum BangType {
1147    /// <![CDATA[...]]>
1148    CData,
1149    /// <!--...-->
1150    Comment,
1151    /// <!DOCTYPE...>. Contains balance of '<' (+1) and '>' (-1)
1152    DocType(DtdParser),
1153}
1154impl BangType {
1155    #[inline(always)]
1156    const fn new(byte: Option<u8>) -> Result<Self, SyntaxError> {
1157        Ok(match byte {
1158            Some(b'[') => Self::CData,
1159            Some(b'-') => Self::Comment,
1160            Some(b'D') | Some(b'd') => Self::DocType(DtdParser::BeforeInternalSubset(0)),
1161            _ => return Err(SyntaxError::InvalidBangMarkup),
1162        })
1163    }
1164
1165    /// If element is finished, returns its content up to `>` symbol and
1166    /// an index of this symbol, otherwise returns `None`
1167    ///
1168    /// # Parameters
1169    /// - `buf`: buffer with data consumed on previous iterations
1170    /// - `chunk`: data read on current iteration and not yet consumed from reader
1171    #[inline(always)]
1172    fn feed(&mut self, buf: &[u8], chunk: &[u8]) -> Option<usize> {
1173        match self {
1174            Self::Comment => {
1175                for i in memchr::memchr_iter(b'>', chunk) {
1176                    // Need to read at least 6 symbols (`!---->`) for properly finished comment
1177                    // <!----> - XML comment
1178                    // 0123456 - i
1179                    if buf.len() + i > 5 {
1180                        if chunk[..i].ends_with(b"--") {
1181                            // We cannot strip last `--` from the buffer because we need it in case of
1182                            // check_comments enabled option. XML standard requires that comment
1183                            // will not end with `--->` sequence because this is a special case of
1184                            // `--` in the comment (https://www.w3.org/TR/xml11/#sec-comments)
1185                            return Some(i);
1186                        }
1187                        // End sequence `-|->` was splitted at |
1188                        //        buf --/   \-- chunk
1189                        if i == 1 && buf.ends_with(b"-") && chunk[0] == b'-' {
1190                            return Some(i);
1191                        }
1192                        // End sequence `--|>` was splitted at |
1193                        //         buf --/   \-- chunk
1194                        if i == 0 && buf.ends_with(b"--") {
1195                            return Some(i);
1196                        }
1197                    }
1198                }
1199            }
1200            Self::CData => {
1201                for i in memchr::memchr_iter(b'>', chunk) {
1202                    if chunk[..i].ends_with(b"]]") {
1203                        return Some(i);
1204                    }
1205                    // End sequence `]|]>` was splitted at |
1206                    //        buf --/   \-- chunk
1207                    if i == 1 && buf.ends_with(b"]") && chunk[0] == b']' {
1208                        return Some(i);
1209                    }
1210                    // End sequence `]]|>` was splitted at |
1211                    //         buf --/   \-- chunk
1212                    if i == 0 && buf.ends_with(b"]]") {
1213                        return Some(i);
1214                    }
1215                }
1216            }
1217            Self::DocType(ref mut parser) => return parser.feed(buf, chunk),
1218        }
1219        None
1220    }
1221    #[inline]
1222    const fn to_err(&self) -> SyntaxError {
1223        match self {
1224            Self::CData => SyntaxError::UnclosedCData,
1225            Self::Comment => SyntaxError::UnclosedComment,
1226            Self::DocType(_) => SyntaxError::UnclosedDoctype,
1227        }
1228    }
1229}
1230
1231////////////////////////////////////////////////////////////////////////////////////////////////////
1232
1233#[cfg(test)]
1234mod test {
1235    /// Checks the internal implementation of the various reader methods
1236    macro_rules! check {
1237        (
1238            #[$test:meta]
1239            $read_event:ident,
1240            // constructor of the XML source on which internal functions will be called
1241            $source:path,
1242            $skip:literal,
1243            // constructor of the buffer to which read data will stored
1244            $buf:expr
1245            $(, $async:ident, $await:ident)?
1246        ) => {
1247            mod read_bang_element {
1248                use super::*;
1249                use crate::errors::{Error, SyntaxError};
1250                use crate::reader::{BangType, DtdParser};
1251                use crate::utils::Bytes;
1252
1253                /// Checks that reading CDATA content works correctly
1254                mod cdata {
1255                    use super::*;
1256                    use pretty_assertions::assert_eq;
1257
1258                    /// Checks that if input begins like CDATA element, but CDATA start sequence
1259                    /// is not finished, parsing ends with an error
1260                    #[$test]
1261                    #[ignore = "start CDATA sequence fully checked outside of `read_bang_element`"]
1262                    $($async)? fn not_properly_start() {
1263                        let buf = $buf;
1264                        let mut position = 0;
1265                        let mut input = &b"<![]]>other content"[$skip..];
1266                        //                 ^= 0
1267
1268                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1269                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedCData),
1270                            x => panic!(
1271                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1272                                x
1273                            ),
1274                        }
1275                        assert_eq!(position, 1);
1276                    }
1277
1278                    /// Checks that if CDATA startup sequence was matched, but an end sequence
1279                    /// is not found, parsing ends with an error
1280                    #[$test]
1281                    $($async)? fn not_closed() {
1282                        let buf = $buf;
1283                        let mut position = 0;
1284                        let mut input = &b"<![CDATA[other content"[$skip..];
1285                        //                 ^= 0                  ^= 22
1286
1287                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1288                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedCData),
1289                            x => panic!(
1290                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1291                                x
1292                            ),
1293                        }
1294                        assert_eq!(position, 22);
1295                    }
1296
1297                    /// Checks that CDATA element without content inside parsed successfully
1298                    #[$test]
1299                    $($async)? fn empty() {
1300                        let buf = $buf;
1301                        let mut position = 0;
1302                        let mut input = &b"<![CDATA[]]>other content"[$skip..];
1303                        //                ^= 0        ^= 12
1304
1305                        let (ty, bytes) = $source(&mut input)
1306                            .read_bang_element(buf, &mut position)
1307                            $(.$await)?
1308                            .unwrap();
1309                        assert_eq!(
1310                            (ty, Bytes(bytes)),
1311                            (BangType::CData, Bytes(b"<![CDATA[]]>"))
1312                        );
1313                        assert_eq!(position, 12);
1314                    }
1315
1316                    /// Checks that CDATA element with content parsed successfully.
1317                    /// Additionally checks that sequences inside CDATA that may look like
1318                    /// a CDATA end sequence do not interrupt CDATA parsing
1319                    #[$test]
1320                    $($async)? fn with_content() {
1321                        let buf = $buf;
1322                        let mut position = 0;
1323                        let mut input = &b"<![CDATA[cdata]] ]>content]]>other content]]>"[$skip..];
1324                        //                 ^= 0                         ^= 29
1325
1326                        let (ty, bytes) = $source(&mut input)
1327                            .read_bang_element(buf, &mut position)
1328                            $(.$await)?
1329                            .unwrap();
1330                        assert_eq!(
1331                            (ty, Bytes(bytes)),
1332                            (BangType::CData, Bytes(b"<![CDATA[cdata]] ]>content]]>"))
1333                        );
1334                        assert_eq!(position, 29);
1335                    }
1336                }
1337
1338                /// Checks that reading XML comments works correctly. According to the [specification],
1339                /// comment data can contain any sequence except `--`:
1340                ///
1341                /// ```peg
1342                /// comment = '<--' (!'--' char)* '-->';
1343                /// char = [#x1-#x2C]
1344                ///      / [#x2E-#xD7FF]
1345                ///      / [#xE000-#xFFFD]
1346                ///      / [#x10000-#x10FFFF]
1347                /// ```
1348                ///
1349                /// The presence of this limitation, however, is simply a poorly designed specification
1350                /// (maybe for purpose of building of LL(1) XML parser) and quick-xml does not check for
1351                /// presence of these sequences by default. This tests allow such content.
1352                ///
1353                /// [specification]: https://www.w3.org/TR/xml11/#dt-comment
1354                mod comment {
1355                    use super::*;
1356                    use pretty_assertions::assert_eq;
1357
1358                    #[$test]
1359                    #[ignore = "start comment sequence fully checked outside of `read_bang_element`"]
1360                    $($async)? fn not_properly_start() {
1361                        let buf = $buf;
1362                        let mut position = 0;
1363                        let mut input = &b"<!- -->other content"[$skip..];
1364                        //                  ^= 1
1365
1366                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1367                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1368                            x => panic!(
1369                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1370                                x
1371                            ),
1372                        }
1373                        assert_eq!(position, 1);
1374                    }
1375
1376                    #[$test]
1377                    $($async)? fn not_properly_end() {
1378                        let buf = $buf;
1379                        let mut position = 0;
1380                        let mut input = &b"<!->other content"[$skip..];
1381                        //                 ^= 0             ^= 17
1382
1383                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1384                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1385                            x => panic!(
1386                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1387                                x
1388                            ),
1389                        }
1390                        assert_eq!(position, 17);
1391                    }
1392
1393                    #[$test]
1394                    $($async)? fn not_closed1() {
1395                        let buf = $buf;
1396                        let mut position = 0;
1397                        let mut input = &b"<!--other content"[$skip..];
1398                        //                 ^= 0             ^= 17
1399
1400                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1401                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1402                            x => panic!(
1403                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1404                                x
1405                            ),
1406                        }
1407                        assert_eq!(position, 17);
1408                    }
1409
1410                    #[$test]
1411                    $($async)? fn not_closed2() {
1412                        let buf = $buf;
1413                        let mut position = 0;
1414                        let mut input = &b"<!-->other content"[$skip..];
1415                        //                 ^= 0              ^= 18
1416
1417                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1418                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1419                            x => panic!(
1420                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1421                                x
1422                            ),
1423                        }
1424                        assert_eq!(position, 18);
1425                    }
1426
1427                    #[$test]
1428                    $($async)? fn not_closed3() {
1429                        let buf = $buf;
1430                        let mut position = 0;
1431                        let mut input = &b"<!--->other content"[$skip..];
1432                        //                 ^= 0               ^= 19
1433
1434                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1435                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1436                            x => panic!(
1437                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1438                                x
1439                            ),
1440                        }
1441                        assert_eq!(position, 19);
1442                    }
1443
1444                    #[$test]
1445                    $($async)? fn empty() {
1446                        let buf = $buf;
1447                        let mut position = 0;
1448                        let mut input = &b"<!---->other content"[$skip..];
1449                        //                 ^= 0   ^= 7
1450
1451                        let (ty, bytes) = $source(&mut input)
1452                            .read_bang_element(buf, &mut position)
1453                            $(.$await)?
1454                            .unwrap();
1455                        assert_eq!(
1456                            (ty, Bytes(bytes)),
1457                            (BangType::Comment, Bytes(b"<!---->"))
1458                        );
1459                        assert_eq!(position, 7);
1460                    }
1461
1462                    #[$test]
1463                    $($async)? fn with_content() {
1464                        let buf = $buf;
1465                        let mut position = 0;
1466                        let mut input = &b"<!--->comment<--->other content"[$skip..];
1467                        //                 ^= 0              ^= 18
1468
1469                        let (ty, bytes) = $source(&mut input)
1470                            .read_bang_element(buf, &mut position)
1471                            $(.$await)?
1472                            .unwrap();
1473                        assert_eq!(
1474                            (ty, Bytes(bytes)),
1475                            (BangType::Comment, Bytes(b"<!--->comment<--->"))
1476                        );
1477                        assert_eq!(position, 18);
1478                    }
1479                }
1480
1481                /// Checks that reading DOCTYPE definition works correctly
1482                mod doctype {
1483                    use super::*;
1484
1485                    mod uppercase {
1486                        use super::*;
1487                        use pretty_assertions::assert_eq;
1488
1489                        #[$test]
1490                        $($async)? fn not_properly_start() {
1491                            let buf = $buf;
1492                            let mut position = 0;
1493                            let mut input = &b"<!D other content"[$skip..];
1494                            //                 ^= 0             ^= 17
1495
1496                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1497                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1498                                x => panic!(
1499                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1500                                    x
1501                                ),
1502                            }
1503                            assert_eq!(position, 17);
1504                        }
1505
1506                        #[$test]
1507                        $($async)? fn without_space() {
1508                            let buf = $buf;
1509                            let mut position = 0;
1510                            let mut input = &b"<!DOCTYPEother content"[$skip..];
1511                            //                 ^= 0                  ^= 22
1512
1513                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1514                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1515                                x => panic!(
1516                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1517                                    x
1518                                ),
1519                            }
1520                            assert_eq!(position, 22);
1521                        }
1522
1523                        #[$test]
1524                        $($async)? fn empty() {
1525                            let buf = $buf;
1526                            let mut position = 0;
1527                            let mut input = &b"<!DOCTYPE>other content"[$skip..];
1528                            //                 ^= 0      ^= 10
1529
1530                            let (ty, bytes) = $source(&mut input)
1531                                .read_bang_element(buf, &mut position)
1532                                $(.$await)?
1533                                .unwrap();
1534                            assert_eq!(
1535                                (ty, Bytes(bytes)),
1536                                (BangType::DocType(DtdParser::Finished), Bytes(b"<!DOCTYPE>"))
1537                            );
1538                            assert_eq!(position, 10);
1539                        }
1540
1541                        #[$test]
1542                        $($async)? fn not_closed() {
1543                            let buf = $buf;
1544                            let mut position = 0;
1545                            let mut input = &b"<!DOCTYPE other content"[$skip..];
1546                            //                 ^= 0                   ^23
1547
1548                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1549                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1550                                x => panic!(
1551                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1552                                    x
1553                                ),
1554                            }
1555                            assert_eq!(position, 23);
1556                        }
1557                    }
1558
1559                    mod lowercase {
1560                        use super::*;
1561                        use pretty_assertions::assert_eq;
1562
1563                        #[$test]
1564                        $($async)? fn not_properly_start() {
1565                            let buf = $buf;
1566                            let mut position = 0;
1567                            let mut input = &b"<!d other content"[$skip..];
1568                            //                 ^= 0             ^= 17
1569
1570                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1571                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1572                                x => panic!(
1573                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1574                                    x
1575                                ),
1576                            }
1577                            assert_eq!(position, 17);
1578                        }
1579
1580                        #[$test]
1581                        $($async)? fn without_space() {
1582                            let buf = $buf;
1583                            let mut position = 0;
1584                            let mut input = &b"<!doctypeother content"[$skip..];
1585                            //                 ^= 0                  ^= 22
1586
1587                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1588                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1589                                x => panic!(
1590                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1591                                    x
1592                                ),
1593                            }
1594                            assert_eq!(position, 22);
1595                        }
1596
1597                        #[$test]
1598                        $($async)? fn empty() {
1599                            let buf = $buf;
1600                            let mut position = 0;
1601                            let mut input = &b"<!doctype>other content"[$skip..];
1602                            //                 ^= 0      ^= 10
1603
1604                            let (ty, bytes) = $source(&mut input)
1605                                .read_bang_element(buf, &mut position)
1606                                $(.$await)?
1607                                .unwrap();
1608                            assert_eq!(
1609                                (ty, Bytes(bytes)),
1610                                (BangType::DocType(DtdParser::Finished), Bytes(b"<!doctype>"))
1611                            );
1612                            assert_eq!(position, 10);
1613                        }
1614
1615                        #[$test]
1616                        $($async)? fn not_closed() {
1617                            let buf = $buf;
1618                            let mut position = 0;
1619                            let mut input = &b"<!doctype other content"[$skip..];
1620                            //                 ^= 0                   ^= 23
1621
1622                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1623                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1624                                x => panic!(
1625                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1626                                    x
1627                                ),
1628                            }
1629                            assert_eq!(position, 23);
1630                        }
1631                    }
1632                }
1633            }
1634
1635            mod read_text {
1636                use super::*;
1637                use crate::reader::ReadTextResult;
1638                use crate::utils::Bytes;
1639                use pretty_assertions::assert_eq;
1640
1641                #[$test]
1642                $($async)? fn empty() {
1643                    let buf = $buf;
1644                    let mut position = 1;
1645                    let mut input = b"".as_ref();
1646                    //                ^= 1
1647
1648                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1649                        ReadTextResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"")),
1650                        x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x),
1651                    }
1652                    assert_eq!(position, 1);
1653                }
1654
1655                #[$test]
1656                $($async)? fn markup() {
1657                    let buf = $buf;
1658                    let mut position = 1;
1659                    let mut input = b"<".as_ref();
1660                    //                 ^= 1
1661
1662                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1663                        ReadTextResult::Markup(b) => assert_eq!(b, $buf),
1664                        x => panic!("Expected `Markup(_)`, but got `{:?}`", x),
1665                    }
1666                    assert_eq!(position, 1);
1667                }
1668
1669                #[$test]
1670                $($async)? fn ref_() {
1671                    let buf = $buf;
1672                    let mut position = 1;
1673                    let mut input = b"&".as_ref();
1674                    //                ^= 1
1675
1676                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1677                        ReadTextResult::Ref(b) => assert_eq!(b, $buf),
1678                        x => panic!("Expected `Ref(_)`, but got `{:?}`", x),
1679                    }
1680                    assert_eq!(position, 1);
1681                }
1682
1683                #[$test]
1684                $($async)? fn up_to_markup() {
1685                    let buf = $buf;
1686                    let mut position = 1;
1687                    let mut input = b"a<".as_ref();
1688                    //                  ^= 2
1689
1690                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1691                        ReadTextResult::UpToMarkup(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")),
1692                        x => panic!("Expected `UpToMarkup(_)`, but got `{:?}`", x),
1693                    }
1694                    assert_eq!(position, 2);
1695                }
1696
1697                #[$test]
1698                $($async)? fn up_to_ref() {
1699                    let buf = $buf;
1700                    let mut position = 1;
1701                    let mut input = b"a&".as_ref();
1702                    //                 ^= 2
1703
1704                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1705                        ReadTextResult::UpToRef(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")),
1706                        x => panic!("Expected `UpToRef(_)`, but got `{:?}`", x),
1707                    }
1708                    assert_eq!(position, 2);
1709                }
1710
1711                #[$test]
1712                $($async)? fn up_to_eof() {
1713                    let buf = $buf;
1714                    let mut position = 1;
1715                    let mut input = b"a".as_ref();
1716                    //                 ^= 2
1717
1718                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1719                        ReadTextResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")),
1720                        x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x),
1721                    }
1722                    assert_eq!(position, 2);
1723                }
1724            }
1725
1726            mod read_ref {
1727                use super::*;
1728                use crate::reader::ReadRefResult;
1729                use crate::utils::Bytes;
1730                use pretty_assertions::assert_eq;
1731
1732                // Empty input is not allowed for `read_ref` so not tested.
1733                // Borrowed source triggers debug assertion,
1734                // buffered do nothing due to implementation details.
1735
1736                #[$test]
1737                $($async)? fn up_to_eof() {
1738                    let buf = $buf;
1739                    let mut position = 1;
1740                    let mut input = b"&".as_ref();
1741                    //                 ^= 2
1742
1743                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1744                        ReadRefResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
1745                        x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x),
1746                    }
1747                    assert_eq!(position, 2);
1748                }
1749
1750                #[$test]
1751                $($async)? fn up_to_ref() {
1752                    let buf = $buf;
1753                    let mut position = 1;
1754                    let mut input = b"&&".as_ref();
1755                    //                 ^= 2
1756
1757                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1758                        ReadRefResult::UpToRef(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
1759                        x => panic!("Expected `UpToRef(_)`, but got `{:?}`", x),
1760                    }
1761                    assert_eq!(position, 2);
1762                }
1763
1764                #[$test]
1765                $($async)? fn up_to_markup() {
1766                    let buf = $buf;
1767                    let mut position = 1;
1768                    let mut input = b"&<".as_ref();
1769                    //                 ^= 2
1770
1771                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1772                        ReadRefResult::UpToMarkup(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
1773                        x => panic!("Expected `UpToMarkup(_)`, but got `{:?}`", x),
1774                    }
1775                    assert_eq!(position, 2);
1776                }
1777
1778                #[$test]
1779                $($async)? fn empty_ref() {
1780                    let buf = $buf;
1781                    let mut position = 1;
1782                    let mut input = b"&;".as_ref();
1783                    //                  ^= 3
1784
1785                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1786                        ReadRefResult::Ref(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&;")),
1787                        x => panic!("Expected `Ref(_)`, but got `{:?}`", x),
1788                    }
1789                    assert_eq!(position, 3);
1790                }
1791
1792                #[$test]
1793                $($async)? fn normal() {
1794                    let buf = $buf;
1795                    let mut position = 1;
1796                    let mut input = b"&lt;".as_ref();
1797                    //                    ^= 5
1798
1799                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1800                        ReadRefResult::Ref(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&lt;")),
1801                        x => panic!("Expected `Ref(_)`, but got `{:?}`", x),
1802                    }
1803                    assert_eq!(position, 5);
1804                }
1805            }
1806
1807            mod read_element {
1808                use super::*;
1809                use crate::errors::{Error, SyntaxError};
1810                use crate::parser::ElementParser;
1811                use crate::utils::Bytes;
1812                use pretty_assertions::assert_eq;
1813
1814                /// Checks that nothing was read from empty buffer
1815                /// `<` read in peek_one that is called before read_with, that is why it in the input buffer
1816                /// peek_one, however, does not increment position for simplicity of the code
1817                #[$test]
1818                $($async)? fn empty() {
1819                    let buf = $buf;
1820                    let mut position = 0;
1821                    let mut input = &b"<"[$skip..];
1822                    //                  ^= 1
1823
1824                    match $source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? {
1825                        Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedTag),
1826                        x => panic!(
1827                            "Expected `Err(Syntax(_))`, but got `{:?}`",
1828                            x
1829                        ),
1830                    }
1831                    assert_eq!(position, 1);
1832                }
1833
1834                mod open {
1835                    use super::*;
1836                    use pretty_assertions::assert_eq;
1837
1838                    #[$test]
1839                    $($async)? fn empty_tag() {
1840                        let buf = $buf;
1841                        let mut position = 0;
1842                        let mut input = &b"<>"[$skip..];
1843                        //                   ^= 2
1844
1845                        assert_eq!(
1846                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1847                            Bytes(b"<>")
1848                        );
1849                        assert_eq!(position, 2);
1850                    }
1851
1852                    #[$test]
1853                    $($async)? fn normal() {
1854                        let buf = $buf;
1855                        let mut position = 0;
1856                        let mut input = &b"<tag>"[$skip..];
1857                        //                      ^= 5
1858
1859                        assert_eq!(
1860                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1861                            Bytes(b"<tag>")
1862                        );
1863                        assert_eq!(position, 5);
1864                    }
1865
1866                    #[$test]
1867                    $($async)? fn empty_ns_empty_tag() {
1868                        let buf = $buf;
1869                        let mut position = 0;
1870                        let mut input = &b"<:>"[$skip..];
1871                        //                    ^= 3
1872
1873                        assert_eq!(
1874                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1875                            Bytes(b"<:>")
1876                        );
1877                        assert_eq!(position, 3);
1878                    }
1879
1880                    #[$test]
1881                    $($async)? fn empty_ns() {
1882                        let buf = $buf;
1883                        let mut position = 0;
1884                        let mut input = &b"<:tag>"[$skip..];
1885                        //                       ^= 6
1886
1887                        assert_eq!(
1888                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1889                            Bytes(b"<:tag>")
1890                        );
1891                        assert_eq!(position, 6);
1892                    }
1893
1894                    #[$test]
1895                    $($async)? fn with_attributes() {
1896                        let buf = $buf;
1897                        let mut position = 0;
1898                        let mut input = &br#"<tag  attr-1=">"  attr2  =  '>'  3attr>"#[$skip..];
1899                        //                                                          ^= 39
1900
1901                        assert_eq!(
1902                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1903                            Bytes(br#"<tag  attr-1=">"  attr2  =  '>'  3attr>"#)
1904                        );
1905                        assert_eq!(position, 39);
1906                    }
1907                }
1908
1909                mod self_closed {
1910                    use super::*;
1911                    use pretty_assertions::assert_eq;
1912
1913                    #[$test]
1914                    $($async)? fn empty_tag() {
1915                        let buf = $buf;
1916                        let mut position = 0;
1917                        let mut input = &b"</>"[$skip..];
1918                        //                    ^= 3
1919
1920                        assert_eq!(
1921                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1922                            Bytes(b"</>")
1923                        );
1924                        assert_eq!(position, 3);
1925                    }
1926
1927                    #[$test]
1928                    $($async)? fn normal() {
1929                        let buf = $buf;
1930                        let mut position = 0;
1931                        let mut input = &b"<tag/>"[$skip..];
1932                        //                       ^= 6
1933
1934                        assert_eq!(
1935                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1936                            Bytes(b"<tag/>")
1937                        );
1938                        assert_eq!(position, 6);
1939                    }
1940
1941                    #[$test]
1942                    $($async)? fn empty_ns_empty_tag() {
1943                        let buf = $buf;
1944                        let mut position = 0;
1945                        let mut input = &b"<:/>"[$skip..];
1946                        //                     ^= 4
1947
1948                        assert_eq!(
1949                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1950                            Bytes(b"<:/>")
1951                        );
1952                        assert_eq!(position, 4);
1953                    }
1954
1955                    #[$test]
1956                    $($async)? fn empty_ns() {
1957                        let buf = $buf;
1958                        let mut position = 0;
1959                        let mut input = &b"<:tag/>"[$skip..];
1960                        //                        ^= 7
1961
1962                        assert_eq!(
1963                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1964                            Bytes(b"<:tag/>")
1965                        );
1966                        assert_eq!(position, 7);
1967                    }
1968
1969                    #[$test]
1970                    $($async)? fn with_attributes() {
1971                        let buf = $buf;
1972                        let mut position = 0;
1973                        let mut input = &br#"<tag  attr-1="/>"  attr2  =  '/>'  3attr/>"#[$skip..];
1974                        //                                                             ^= 42
1975
1976                        assert_eq!(
1977                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1978                            Bytes(br#"<tag  attr-1="/>"  attr2  =  '/>'  3attr/>"#)
1979                        );
1980                        assert_eq!(position, 42);
1981                    }
1982                }
1983
1984                mod close {
1985                    use super::*;
1986                    use pretty_assertions::assert_eq;
1987
1988                    #[$test]
1989                    $($async)? fn empty_tag() {
1990                        let buf = $buf;
1991                        let mut position = 0;
1992                        let mut input = &b"</ >"[$skip..];
1993                        //                     ^= 4
1994
1995                        assert_eq!(
1996                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1997                            Bytes(b"</ >")
1998                        );
1999                        assert_eq!(position, 4);
2000                    }
2001
2002                    #[$test]
2003                    $($async)? fn normal() {
2004                        let buf = $buf;
2005                        let mut position = 0;
2006                        let mut input = &b"</tag>"[$skip..];
2007                        //                       ^= 6
2008
2009                        assert_eq!(
2010                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2011                            Bytes(b"</tag>")
2012                        );
2013                        assert_eq!(position, 6);
2014                    }
2015
2016                    #[$test]
2017                    $($async)? fn empty_ns_empty_tag() {
2018                        let buf = $buf;
2019                        let mut position = 0;
2020                        let mut input = &b"</:>"[$skip..];
2021                        //                     ^= 4
2022
2023                        assert_eq!(
2024                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2025                            Bytes(b"</:>")
2026                        );
2027                        assert_eq!(position, 4);
2028                    }
2029
2030                    #[$test]
2031                    $($async)? fn empty_ns() {
2032                        let buf = $buf;
2033                        let mut position = 0;
2034                        let mut input = &b"</:tag>"[$skip..];
2035                        //                        ^= 7
2036
2037                        assert_eq!(
2038                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2039                            Bytes(b"</:tag>")
2040                        );
2041                        assert_eq!(position, 7);
2042                    }
2043
2044                    #[$test]
2045                    $($async)? fn with_attributes() {
2046                        let buf = $buf;
2047                        let mut position = 0;
2048                        let mut input = &br#"</tag  attr-1=">"  attr2  =  '>'  3attr>"#[$skip..];
2049                        //                                                           ^= 40
2050
2051                        assert_eq!(
2052                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2053                            Bytes(br#"</tag  attr-1=">"  attr2  =  '>'  3attr>"#)
2054                        );
2055                        assert_eq!(position, 40);
2056                    }
2057                }
2058            }
2059
2060            /// Ensures, that no empty `Text` events are generated
2061            mod $read_event {
2062                use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesPI, BytesStart, BytesText, Event};
2063                use crate::reader::Reader;
2064                use pretty_assertions::assert_eq;
2065
2066                /// When `encoding` feature is enabled, encoding should be detected
2067                /// from BOM (UTF-8) and BOM should be stripped.
2068                ///
2069                /// When `encoding` feature is disabled, UTF-8 is assumed and BOM
2070                /// character should be stripped for consistency
2071                #[$test]
2072                $($async)? fn bom_from_reader() {
2073                    let mut reader = Reader::from_reader("\u{feff}\u{feff}".as_bytes());
2074
2075                    assert_eq!(
2076                        reader.$read_event($buf) $(.$await)? .unwrap(),
2077                        Event::Text(BytesText::from_escaped("\u{feff}"))
2078                    );
2079
2080                    assert_eq!(
2081                        reader.$read_event($buf) $(.$await)? .unwrap(),
2082                        Event::Eof
2083                    );
2084                }
2085
2086                /// When parsing from &str, encoding is fixed (UTF-8), so
2087                /// - when `encoding` feature is disabled, the behavior the
2088                ///   same as in `bom_from_reader` text
2089                /// - when `encoding` feature is enabled, the behavior should
2090                ///   stay consistent, so the first BOM character is stripped
2091                #[$test]
2092                $($async)? fn bom_from_str() {
2093                    let mut reader = Reader::from_str("\u{feff}\u{feff}");
2094
2095                    assert_eq!(
2096                        reader.$read_event($buf) $(.$await)? .unwrap(),
2097                        Event::Text(BytesText::from_escaped("\u{feff}"))
2098                    );
2099
2100                    assert_eq!(
2101                        reader.$read_event($buf) $(.$await)? .unwrap(),
2102                        Event::Eof
2103                    );
2104                }
2105
2106                #[$test]
2107                $($async)? fn declaration() {
2108                    let mut reader = Reader::from_str("<?xml ?>");
2109
2110                    assert_eq!(
2111                        reader.$read_event($buf) $(.$await)? .unwrap(),
2112                        Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3)))
2113                    );
2114                }
2115
2116                #[$test]
2117                $($async)? fn doctype() {
2118                    let mut reader = Reader::from_str("<!DOCTYPE x>");
2119
2120                    assert_eq!(
2121                        reader.$read_event($buf) $(.$await)? .unwrap(),
2122                        Event::DocType(BytesText::from_escaped("x"))
2123                    );
2124                }
2125
2126                #[$test]
2127                $($async)? fn processing_instruction() {
2128                    let mut reader = Reader::from_str("<?xml-stylesheet '? >\" ?>");
2129
2130                    assert_eq!(
2131                        reader.$read_event($buf) $(.$await)? .unwrap(),
2132                        Event::PI(BytesPI::new("xml-stylesheet '? >\" "))
2133                    );
2134                }
2135
2136                /// Lone closing tags are not allowed, so testing it together with start tag
2137                #[$test]
2138                $($async)? fn start_and_end() {
2139                    let mut reader = Reader::from_str("<tag></tag>");
2140
2141                    assert_eq!(
2142                        reader.$read_event($buf) $(.$await)? .unwrap(),
2143                        Event::Start(BytesStart::new("tag"))
2144                    );
2145
2146                    assert_eq!(
2147                        reader.$read_event($buf) $(.$await)? .unwrap(),
2148                        Event::End(BytesEnd::new("tag"))
2149                    );
2150                }
2151
2152                #[$test]
2153                $($async)? fn empty() {
2154                    let mut reader = Reader::from_str("<tag/>");
2155
2156                    assert_eq!(
2157                        reader.$read_event($buf) $(.$await)? .unwrap(),
2158                        Event::Empty(BytesStart::new("tag"))
2159                    );
2160                }
2161
2162                #[$test]
2163                $($async)? fn text() {
2164                    let mut reader = Reader::from_str("text");
2165
2166                    assert_eq!(
2167                        reader.$read_event($buf) $(.$await)? .unwrap(),
2168                        Event::Text(BytesText::from_escaped("text"))
2169                    );
2170                }
2171
2172                #[$test]
2173                $($async)? fn cdata() {
2174                    let mut reader = Reader::from_str("<![CDATA[]]>");
2175
2176                    assert_eq!(
2177                        reader.$read_event($buf) $(.$await)? .unwrap(),
2178                        Event::CData(BytesCData::new(""))
2179                    );
2180                }
2181
2182                #[$test]
2183                $($async)? fn comment() {
2184                    let mut reader = Reader::from_str("<!---->");
2185
2186                    assert_eq!(
2187                        reader.$read_event($buf) $(.$await)? .unwrap(),
2188                        Event::Comment(BytesText::from_escaped(""))
2189                    );
2190                }
2191
2192                #[$test]
2193                $($async)? fn eof() {
2194                    let mut reader = Reader::from_str("");
2195
2196                    assert_eq!(
2197                        reader.$read_event($buf) $(.$await)? .unwrap(),
2198                        Event::Eof
2199                    );
2200                }
2201            }
2202        };
2203    }
2204
2205    // Export macros for the child modules:
2206    // - buffered_reader
2207    // - slice_reader
2208    pub(super) use check;
2209}
quick_xml/reader/mod.rs

quick_xml/reader/
mod.rs