Skip to main content

quick_xml/reader/
mod.rs

1//! Contains high-level interface for a pull-based XML parser.
2
3#[cfg(feature = "encoding")]
4use encoding_rs::Encoding;
5use std::io;
6use std::ops::Range;
7
8use crate::encoding::Decoder;
9#[cfg(feature = "encoding")]
10use crate::encoding::DetectedEncoding;
11use crate::errors::{Error, IllFormedError, SyntaxError};
12use crate::events::{BytesRef, Event};
13use crate::parser::{DtdParser, ElementParser, Parser, PiParser};
14use crate::reader::state::ReaderState;
15
16/// A struct that holds a parser configuration.
17///
18/// Current parser configuration can be retrieved by calling [`Reader::config()`]
19/// and changed by changing properties of the object returned by a call to
20/// [`Reader::config_mut()`].
21///
22/// [`Reader::config()`]: crate::reader::Reader::config
23/// [`Reader::config_mut()`]: crate::reader::Reader::config_mut
24#[derive(Debug, Clone, PartialEq, Eq)]
25#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
26#[cfg_attr(feature = "serde-types", derive(serde::Deserialize, serde::Serialize))]
27#[non_exhaustive]
28pub struct Config {
29    /// Whether lone ampersand character (without a paired semicolon) should be
30    /// allowed in textual content. Unless enabled, in case of a dangling ampersand,
31    /// the [`Error::IllFormed(UnclosedReference)`] is returned from read methods.
32    ///
33    /// Default: `false`
34    ///
35    /// # Example
36    ///
37    /// ```
38    /// # use quick_xml::events::{BytesRef, BytesText, Event};
39    /// # use quick_xml::reader::Reader;
40    /// # use pretty_assertions::assert_eq;
41    /// let mut reader = Reader::from_str("text with & & & alone");
42    /// reader.config_mut().allow_dangling_amp = true;
43    ///
44    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::new("text with ")));
45    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::from_escaped("& ")));
46    /// assert_eq!(reader.read_event().unwrap(), Event::GeneralRef(BytesRef::new("amp")));
47    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::new(" ")));
48    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::from_escaped("& alone")));
49    /// assert_eq!(reader.read_event().unwrap(), Event::Eof);
50    /// ```
51    ///
52    /// [`Error::IllFormed(UnclosedReference)`]: crate::errors::IllFormedError::UnclosedReference
53    pub allow_dangling_amp: bool,
54
55    /// Whether unmatched closing tag names should be allowed. Unless enabled,
56    /// in case of a dangling end tag, the [`Error::IllFormed(UnmatchedEndTag)`]
57    /// is returned from read methods.
58    ///
59    /// When set to `true`, it won't check if a closing tag has a corresponding
60    /// opening tag at all. For example, `<a></a></b>` will be permitted.
61    ///
62    /// Note that the emitted [`End`] event will not be modified if this is enabled,
63    /// ie. it will contain the data of the unmatched end tag.
64    ///
65    /// Note, that setting this to `true` will lead to additional allocates that
66    /// needed to store tag name for an [`End`] event.
67    ///
68    /// Default: `false`
69    ///
70    /// [`Error::IllFormed(UnmatchedEndTag)`]: crate::errors::IllFormedError::UnmatchedEndTag
71    /// [`End`]: crate::events::Event::End
72    pub allow_unmatched_ends: bool,
73
74    /// Whether comments should be validated. If enabled, in case of invalid comment
75    /// [`Error::IllFormed(DoubleHyphenInComment)`] is returned from read methods.
76    ///
77    /// When set to `true`, every [`Comment`] event will be checked for not
78    /// containing `--`, which [is not allowed] in XML comments. Most of the time
79    /// we don't want comments at all so we don't really care about comment
80    /// correctness, thus the default value is `false` to improve performance.
81    ///
82    /// Default: `false`
83    ///
84    /// [`Error::IllFormed(DoubleHyphenInComment)`]: crate::errors::IllFormedError::DoubleHyphenInComment
85    /// [`Comment`]: crate::events::Event::Comment
86    /// [is not allowed]: https://www.w3.org/TR/xml11/#sec-comments
87    pub check_comments: bool,
88
89    /// Whether mismatched closing tag names should be detected. If enabled, in
90    /// case of mismatch the [`Error::IllFormed(MismatchedEndTag)`] is returned from
91    /// read methods.
92    ///
93    /// Note, that start and end tags [should match literally][spec], they cannot
94    /// have different prefixes even if both prefixes resolve to the same namespace.
95    /// The XML
96    ///
97    /// ```xml
98    /// <outer xmlns="namespace" xmlns:p="namespace">
99    /// </p:outer>
100    /// ```
101    ///
102    /// is not valid, even though semantically the start tag is the same as the
103    /// end tag. The reason is that namespaces are an extension of the original
104    /// XML specification (without namespaces) and it should be backward-compatible.
105    ///
106    /// When set to `false`, it won't check if a closing tag matches the corresponding
107    /// opening tag. For example, `<mytag></different_tag>` will be permitted.
108    ///
109    /// If the XML is known to be sane (already processed, etc.) this saves extra time.
110    ///
111    /// Note that the emitted [`End`] event will not be modified if this is disabled,
112    /// ie. it will contain the data of the mismatched end tag.
113    ///
114    /// Note, that setting this to `true` will lead to additional allocates that
115    /// needed to store tag name for an [`End`] event. However if [`expand_empty_elements`]
116    /// is also set, only one additional allocation will be performed that support
117    /// both these options.
118    ///
119    /// Default: `true`
120    ///
121    /// [`Error::IllFormed(MismatchedEndTag)`]: crate::errors::IllFormedError::MismatchedEndTag
122    /// [spec]: https://www.w3.org/TR/xml11/#dt-etag
123    /// [`End`]: crate::events::Event::End
124    /// [`expand_empty_elements`]: Self::expand_empty_elements
125    pub check_end_names: bool,
126
127    /// Whether empty elements should be split into an `Open` and a `Close` event.
128    ///
129    /// When set to `true`, all [`Empty`] events produced by a self-closing tag
130    /// like `<tag/>` are expanded into a [`Start`] event followed by an [`End`]
131    /// event. When set to `false` (the default), those tags are represented by
132    /// an [`Empty`] event instead.
133    ///
134    /// Note, that setting this to `true` will lead to additional allocates that
135    /// needed to store tag name for an [`End`] event. However if [`check_end_names`]
136    /// is also set, only one additional allocation will be performed that support
137    /// both these options.
138    ///
139    /// Default: `false`
140    ///
141    /// [`Empty`]: crate::events::Event::Empty
142    /// [`Start`]: crate::events::Event::Start
143    /// [`End`]: crate::events::Event::End
144    /// [`check_end_names`]: Self::check_end_names
145    pub expand_empty_elements: bool,
146
147    /// Whether trailing whitespace after the markup name are trimmed in closing
148    /// tags `</a >`.
149    ///
150    /// If `true` the emitted [`End`] event is stripped of trailing whitespace
151    /// after the markup name.
152    ///
153    /// Note that if set to `false` and [`check_end_names`] is `true` the comparison
154    /// of markup names is going to fail erroneously if a closing tag contains
155    /// trailing whitespace.
156    ///
157    /// Default: `true`
158    ///
159    /// [`End`]: crate::events::Event::End
160    /// [`check_end_names`]: Self::check_end_names
161    pub trim_markup_names_in_closing_tags: bool,
162
163    /// Whether whitespace before character data should be removed.
164    ///
165    /// When set to `true`, leading whitespace is trimmed in [`Text`] events.
166    /// If after that the event is empty it will not be pushed.
167    ///
168    /// Default: `false`
169    ///
170    /// <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
171    ///
172    /// WARNING: With this option every text events will be trimmed which is
173    /// incorrect behavior when text events delimited by comments, processing
174    /// instructions or CDATA sections. To correctly trim data manually apply
175    /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`]
176    /// only to necessary events.
177    /// </div>
178    ///
179    /// [`Text`]: crate::events::Event::Text
180    /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start
181    /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end
182    pub trim_text_start: bool,
183
184    /// Whether whitespace after character data should be removed.
185    ///
186    /// When set to `true`, trailing whitespace is trimmed in [`Text`] events.
187    /// If after that the event is empty it will not be pushed.
188    ///
189    /// Default: `false`
190    ///
191    /// <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
192    ///
193    /// WARNING: With this option every text events will be trimmed which is
194    /// incorrect behavior when text events delimited by comments, processing
195    /// instructions or CDATA sections. To correctly trim data manually apply
196    /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`]
197    /// only to necessary events.
198    /// </div>
199    ///
200    /// [`Text`]: crate::events::Event::Text
201    /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start
202    /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end
203    pub trim_text_end: bool,
204}
205
206impl Config {
207    /// Set both [`trim_text_start`] and [`trim_text_end`] to the same value.
208    ///
209    /// <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
210    ///
211    /// WARNING: With this option every text events will be trimmed which is
212    /// incorrect behavior when text events delimited by comments, processing
213    /// instructions or CDATA sections. To correctly trim data manually apply
214    /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`]
215    /// only to necessary events.
216    /// </div>
217    ///
218    /// [`trim_text_start`]: Self::trim_text_start
219    /// [`trim_text_end`]: Self::trim_text_end
220    /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start
221    /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end
222    #[inline]
223    pub fn trim_text(&mut self, trim: bool) {
224        self.trim_text_start = trim;
225        self.trim_text_end = trim;
226    }
227
228    /// Turn on or off all checks for well-formedness. Currently it is that settings:
229    /// - [`check_comments`](Self::check_comments)
230    /// - [`check_end_names`](Self::check_end_names)
231    #[inline]
232    pub fn enable_all_checks(&mut self, enable: bool) {
233        self.check_comments = enable;
234        self.check_end_names = enable;
235    }
236}
237
238impl Default for Config {
239    fn default() -> Self {
240        Self {
241            allow_dangling_amp: false,
242            allow_unmatched_ends: false,
243            check_comments: false,
244            check_end_names: true,
245            expand_empty_elements: false,
246            trim_markup_names_in_closing_tags: true,
247            trim_text_start: false,
248            trim_text_end: false,
249        }
250    }
251}
252
253////////////////////////////////////////////////////////////////////////////////////////////////////
254
255macro_rules! read_event_impl {
256    (
257        $self:ident, $buf:ident,
258        $reader:expr,
259        $read_until_close:ident
260        $(, $await:ident)?
261    ) => {{
262        let event = loop {
263            break match $self.state.state {
264                ParseState::Init => { // Go to InsideText state
265                    // If encoding set explicitly, we not need to detect it. For example,
266                    // explicit UTF-8 set automatically if Reader was created using `from_str`.
267                    // But we still need to remove BOM for consistency with no encoding
268                    // feature enabled path
269                    #[cfg(feature = "encoding")]
270                    if let Some(encoding) = $reader.detect_encoding() $(.$await)? ? {
271                        if $self.state.encoding.can_be_refined() {
272                            $self.state.encoding = crate::reader::EncodingRef::BomDetected(encoding.encoding());
273                        }
274                    }
275
276                    // Removes UTF-8 BOM if it is present
277                    #[cfg(not(feature = "encoding"))]
278                    $reader.remove_utf8_bom() $(.$await)? ?;
279
280                    $self.state.state = ParseState::InsideText;
281                    continue;
282                },
283                ParseState::InsideRef => { // Go to InsideText
284                    let start = $self.state.offset;
285                    match $reader.read_ref($buf, &mut $self.state.offset) $(.$await)? {
286                        // Emit reference, go to InsideText state
287                        ReadRefResult::Ref(bytes) => {
288                            $self.state.state = ParseState::InsideText;
289                            // +1 to skip start `&`
290                            // -1 to skip end `;`
291                            Ok(Event::GeneralRef(BytesRef::wrap(&bytes[1..bytes.len() - 1], $self.decoder())))
292                        }
293                        // Go to Done state
294                        ReadRefResult::UpToEof(bytes) if $self.state.config.allow_dangling_amp => {
295                            $self.state.state = ParseState::Done;
296                            Ok(Event::Text($self.state.emit_text(bytes)))
297                        }
298                        ReadRefResult::UpToEof(_) => {
299                            $self.state.state = ParseState::Done;
300                            $self.state.last_error_offset = start;
301                            Err(Error::IllFormed(IllFormedError::UnclosedReference))
302                        }
303                        // Do not change state, stay in InsideRef
304                        ReadRefResult::UpToRef(bytes) if $self.state.config.allow_dangling_amp => {
305                            Ok(Event::Text($self.state.emit_text(bytes)))
306                        }
307                        ReadRefResult::UpToRef(_) => {
308                            $self.state.last_error_offset = start;
309                            Err(Error::IllFormed(IllFormedError::UnclosedReference))
310                        }
311                        // Go to InsideMarkup state
312                        ReadRefResult::UpToMarkup(bytes) if $self.state.config.allow_dangling_amp => {
313                            $self.state.state = ParseState::InsideMarkup;
314                            Ok(Event::Text($self.state.emit_text(bytes)))
315                        }
316                        ReadRefResult::UpToMarkup(_) => {
317                            $self.state.state = ParseState::InsideMarkup;
318                            $self.state.last_error_offset = start;
319                            Err(Error::IllFormed(IllFormedError::UnclosedReference))
320                        }
321                        ReadRefResult::Err(e) => Err(Error::from(e)),
322                    }
323                }
324                ParseState::InsideText => { // Go to InsideMarkup or Done state
325                    if $self.state.config.trim_text_start {
326                        $reader.skip_whitespace(&mut $self.state.offset) $(.$await)? ?;
327                    }
328
329                    match $reader.read_text($buf, &mut $self.state.offset) $(.$await)? {
330                        ReadTextResult::Markup(buf) => {
331                            $self.state.state = ParseState::InsideMarkup;
332                            // Pass `buf` to the next next iteration of parsing loop
333                            $buf = buf;
334                            continue;
335                        }
336                        ReadTextResult::Ref(buf) => {
337                            $self.state.state = ParseState::InsideRef;
338                            // Pass `buf` to the next next iteration of parsing loop
339                            $buf = buf;
340                            continue;
341                        }
342                        ReadTextResult::UpToMarkup(bytes) => {
343                            $self.state.state = ParseState::InsideMarkup;
344                            // FIXME: Can produce an empty event if:
345                            // - event contains only spaces
346                            // - trim_text_start = false
347                            // - trim_text_end = true
348                            Ok(Event::Text($self.state.emit_text(bytes)))
349                        }
350                        ReadTextResult::UpToRef(bytes) => {
351                            $self.state.state = ParseState::InsideRef;
352                            // Return Text event with `bytes` content or Eof if bytes is empty
353                            Ok(Event::Text($self.state.emit_text(bytes)))
354                        }
355                        ReadTextResult::UpToEof(bytes) => {
356                            $self.state.state = ParseState::Done;
357                            // Trim bytes from end if required
358                            let event = $self.state.emit_text(bytes);
359                            if event.is_empty() {
360                                Ok(Event::Eof)
361                            } else {
362                                Ok(Event::Text(event))
363                            }
364                        }
365                        ReadTextResult::Err(e) => Err(Error::from(e)),
366                    }
367                },
368                // Go to InsideText state in next two arms
369                ParseState::InsideMarkup => $self.$read_until_close($buf) $(.$await)?,
370                ParseState::InsideEmpty => Ok(Event::End($self.state.close_expanded_empty())),
371                ParseState::Done => Ok(Event::Eof),
372            };
373        };
374        match event {
375            // #513: In case of ill-formed errors we already consume the wrong data
376            // and change the state. We can continue parsing if we wish
377            Err(Error::IllFormed(_)) => {}
378            Err(_) | Ok(Event::Eof) => $self.state.state = ParseState::Done,
379            _ => {}
380        }
381        event
382    }};
383}
384
385/// Read bytes up to the `>` and skip it. This method is expected to be called
386/// after seeing the `<` symbol and skipping it. Inspects the next (current)
387/// symbol and returns an appropriate [`Event`]:
388///
389/// |Symbol |Event
390/// |-------|-------------------------------------
391/// |`!`    |[`Comment`], [`CData`] or [`DocType`]
392/// |`/`    |[`End`]
393/// |`?`    |[`PI`]
394/// |_other_|[`Start`] or [`Empty`]
395///
396/// Moves parser to the `InsideText` state.
397///
398/// [`Comment`]: Event::Comment
399/// [`CData`]: Event::CData
400/// [`DocType`]: Event::DocType
401/// [`End`]: Event::End
402/// [`PI`]: Event::PI
403/// [`Start`]: Event::Start
404/// [`Empty`]: Event::Empty
405macro_rules! read_until_close {
406    (
407        $self:ident, $buf:ident,
408        $reader:expr
409        $(, $await:ident)?
410    ) => {{
411        $self.state.state = ParseState::InsideText;
412
413        let start = $self.state.offset;
414        match $reader.peek_one() $(.$await)? {
415            // `<!` - comment, CDATA or DOCTYPE declaration
416            Ok(Some(b'!')) => match $reader
417                .read_bang_element($buf, &mut $self.state.offset)
418                $(.$await)?
419            {
420                Ok((bang_type, bytes)) => $self.state.emit_bang(bang_type, bytes),
421                Err(e) => {
422                    // We want to report error at `<`
423                    $self.state.last_error_offset = start;
424                    Err(e)
425                }
426            },
427            // `</` - closing tag
428            // #776: We parse using ElementParser which allows us to have attributes
429            // in close tags. While such tags are not allowed by the specification,
430            // we anyway allow to parse them because:
431            // - we do not check constraints during parsing. This is performed by the
432            //   optional validate step which user should call manually
433            // - if we just look for `>` we will parse `</tag attr=">" >` as end tag
434            //   `</tag attr=">` and text `" >` which probably no one existing parser
435            //   does. This is malformed XML, however it is tolerated by some parsers
436            //   (e.g. the one used by Adobe Flash) and such documents do exist in the wild.
437            Ok(Some(b'/')) => match $reader
438                .read_with(ElementParser::Outside, $buf, &mut $self.state.offset)
439                $(.$await)?
440            {
441                Ok(bytes) => $self.state.emit_end(bytes),
442                Err(e) => {
443                    // We want to report error at `<`
444                    $self.state.last_error_offset = start;
445                    Err(e)
446                }
447            },
448            // `<?` - processing instruction
449            Ok(Some(b'?')) => match $reader
450                .read_with(PiParser(false), $buf, &mut $self.state.offset)
451                $(.$await)?
452            {
453                Ok(bytes) => $self.state.emit_question_mark(bytes),
454                Err(e) => {
455                    // We want to report error at `<`
456                    $self.state.last_error_offset = start;
457                    Err(e)
458                }
459            },
460            // `<...` - opening or self-closed tag
461            Ok(Some(_)) => match $reader
462                .read_with(ElementParser::Outside, $buf, &mut $self.state.offset)
463                $(.$await)?
464            {
465                Ok(bytes) => Ok($self.state.emit_start(bytes)),
466                Err(e) => {
467                    // We want to report error at `<`
468                    $self.state.last_error_offset = start;
469                    Err(e)
470                }
471            },
472            // `<` - syntax error, tag not closed
473            Ok(None) => {
474                // We want to report error at `<`
475                $self.state.last_error_offset = start;
476                Err(Error::Syntax(SyntaxError::UnclosedTag))
477            }
478            Err(e) => Err(Error::from(e)),
479        }
480    }};
481}
482
483/// Generalization of `read_to_end` method for buffered and borrowed readers
484macro_rules! read_to_end {
485    (
486        // $self: &mut Reader
487        $self:expr, $end:expr, $buf:expr,
488        $read_event:ident,
489        // Code block that performs clearing of internal buffer after read of each event
490        $clear:block
491        $(, $await:ident)?
492    ) => {{
493        // Because we take position after the event before the End event,
494        // it is important that this position indicates beginning of the End event.
495        // If between last event and the End event would be only spaces, then we
496        // take position before the spaces, but spaces would be skipped without
497        // generating event if `trim_text_start` is set to `true`. To prevent that
498        // we temporary disable start text trimming.
499        //
500        // We also cannot take position after getting End event, because if
501        // `trim_markup_names_in_closing_tags` is set to `true` (which is the default),
502        // we do not known the real size of the End event that it is occupies in
503        // the source and cannot correct the position after the End event.
504        // So, we in any case should tweak parser configuration.
505        let config = $self.config_mut();
506        let trim = config.trim_text_start;
507        config.trim_text_start = false;
508
509        let start = $self.buffer_position();
510        let mut depth = 0;
511        loop {
512            $clear
513            let end = $self.buffer_position();
514            match $self.$read_event($buf) $(.$await)? {
515                Err(e) => {
516                    $self.config_mut().trim_text_start = trim;
517                    return Err(e);
518                }
519
520                Ok(Event::Start(e)) if e.name() == $end => depth += 1,
521                Ok(Event::End(e)) if e.name() == $end => {
522                    if depth == 0 {
523                        $self.config_mut().trim_text_start = trim;
524                        break start..end;
525                    }
526                    depth -= 1;
527                }
528                Ok(Event::Eof) => {
529                    $self.config_mut().trim_text_start = trim;
530                    return Err(Error::missed_end($end, $self.decoder()));
531                }
532                _ => (),
533            }
534        }
535    }};
536}
537
538#[cfg(feature = "async-tokio")]
539mod async_tokio;
540mod buffered_reader;
541mod ns_reader;
542mod slice_reader;
543mod state;
544
545pub use ns_reader::NsReader;
546
547/// Range of input in bytes, that corresponds to some piece of XML
548pub type Span = Range<u64>;
549
550////////////////////////////////////////////////////////////////////////////////////////////////////
551
552/// Possible reader states. The state transition diagram (`true` and `false` shows
553/// value of [`Config::expand_empty_elements`] option):
554///
555/// ```mermaid
556/// flowchart LR
557///   subgraph _
558///     direction LR
559///
560///     Init         -- "(no event)"\n                                       --> InsideMarkup
561///     InsideMarkup -- Decl, DocType, PI\nComment, CData\nStart, Empty, End --> InsideText
562///     InsideText   -- "#lt;false#gt;\n(no event)"\nText                    --> InsideMarkup
563///     InsideRef    -- "(no event)"\nGeneralRef                             --> InsideText
564///   end
565///   InsideText     -- "#lt;true#gt;"\nStart --> InsideEmpty
566///   InsideEmpty    -- End                   --> InsideText
567///   _ -. Eof .-> Done
568/// ```
569#[derive(Clone, Debug)]
570enum ParseState {
571    /// Initial state in which reader stay after creation. Transition from that
572    /// state could produce a `Text`, `Decl`, `Comment` or `Start` event. The next
573    /// state is always `InsideMarkup`. The reader will never return to this state. The
574    /// event emitted during transition to `InsideMarkup` is a `StartEvent` if the
575    /// first symbol not `<`, otherwise no event are emitted.
576    Init,
577    /// State after seeing the `&` symbol in textual content. Depending on the next symbol all other
578    /// events could be generated.
579    ///
580    /// After generating one event the reader moves to the `ClosedTag` state.
581    InsideRef,
582    /// State after seeing the `<` symbol. Depending on the next symbol all other
583    /// events could be generated.
584    ///
585    /// After generating one event the reader moves to the `InsideText` state.
586    InsideMarkup,
587    /// State in which reader searches the `<` symbol of a markup. All bytes before
588    /// that symbol will be returned in the [`Event::Text`] event. After that
589    /// the reader moves to the `InsideMarkup` state.
590    InsideText,
591    /// This state is used only if option [`expand_empty_elements`] is set to `true`.
592    /// Reader enters to this state when it is in a `InsideText` state and emits an
593    /// [`Event::Start`] event. The next event emitted will be an [`Event::End`],
594    /// after which reader returned to the `InsideText` state.
595    ///
596    /// [`expand_empty_elements`]: Config::expand_empty_elements
597    InsideEmpty,
598    /// Reader enters this state when `Eof` event generated or an error occurred.
599    /// This is the last state, the reader stay in it forever.
600    Done,
601}
602
603/// A reference to an encoding together with information about how it was retrieved.
604///
605/// The state transition diagram:
606///
607/// ```mermaid
608/// flowchart LR
609///   Implicit    -- from_str       --> Explicit
610///   Implicit    -- BOM            --> BomDetected
611///   Implicit    -- "encoding=..." --> XmlDetected
612///   BomDetected -- "encoding=..." --> XmlDetected
613/// ```
614#[cfg(feature = "encoding")]
615#[derive(Clone, Copy, Debug)]
616enum EncodingRef {
617    /// Encoding was implicitly assumed to have a specified value. It can be refined
618    /// using BOM or by the XML declaration event (`<?xml encoding=... ?>`)
619    Implicit(&'static Encoding),
620    /// Encoding was explicitly set to the desired value. It cannot be changed
621    /// nor by BOM, nor by parsing XML declaration (`<?xml encoding=... ?>`)
622    Explicit(&'static Encoding),
623    /// Encoding was detected from a byte order mark (BOM) or by the first bytes
624    /// of the content. It can be refined by the XML declaration event (`<?xml encoding=... ?>`)
625    BomDetected(&'static Encoding),
626    /// Encoding was detected using XML declaration event (`<?xml encoding=... ?>`).
627    /// It can no longer change
628    XmlDetected(&'static Encoding),
629}
630#[cfg(feature = "encoding")]
631impl EncodingRef {
632    #[inline]
633    const fn encoding(&self) -> &'static Encoding {
634        match self {
635            Self::Implicit(e) => e,
636            Self::Explicit(e) => e,
637            Self::BomDetected(e) => e,
638            Self::XmlDetected(e) => e,
639        }
640    }
641    #[inline]
642    const fn can_be_refined(&self) -> bool {
643        match self {
644            Self::Implicit(_) | Self::BomDetected(_) => true,
645            Self::Explicit(_) | Self::XmlDetected(_) => false,
646        }
647    }
648}
649
650////////////////////////////////////////////////////////////////////////////////////////////////////
651
652/// A direct stream to the underlying [`Reader`]s reader which updates
653/// [`Reader::buffer_position()`] when read from it.
654#[derive(Debug)]
655#[must_use = "streams do nothing unless read or polled"]
656pub struct BinaryStream<'r, R> {
657    inner: &'r mut R,
658    offset: &'r mut u64,
659}
660
661impl<'r, R> BinaryStream<'r, R> {
662    /// Returns current position in bytes in the original source.
663    #[inline]
664    pub const fn offset(&self) -> u64 {
665        *self.offset
666    }
667
668    /// Gets a reference to the underlying reader.
669    #[inline]
670    pub const fn get_ref(&self) -> &R {
671        self.inner
672    }
673
674    /// Gets a mutable reference to the underlying reader.
675    ///
676    /// Avoid read from this reader because this will not update reader's position
677    /// and will lead to incorrect positions of errors. Read from this stream instead.
678    #[inline]
679    pub fn get_mut(&mut self) -> &mut R {
680        self.inner
681    }
682}
683
684impl<'r, R> io::Read for BinaryStream<'r, R>
685where
686    R: io::Read,
687{
688    #[inline]
689    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
690        let amt = self.inner.read(buf)?;
691        *self.offset += amt as u64;
692        Ok(amt)
693    }
694}
695
696impl<'r, R> io::BufRead for BinaryStream<'r, R>
697where
698    R: io::BufRead,
699{
700    #[inline]
701    fn fill_buf(&mut self) -> io::Result<&[u8]> {
702        self.inner.fill_buf()
703    }
704
705    #[inline]
706    fn consume(&mut self, amt: usize) {
707        self.inner.consume(amt);
708        *self.offset += amt as u64;
709    }
710}
711
712////////////////////////////////////////////////////////////////////////////////////////////////////
713
714/// A low level encoding-agnostic XML event reader.
715///
716/// Consumes bytes and streams XML [`Event`]s.
717///
718/// This reader does not manage namespace declarations and not able to resolve
719/// prefixes. If you want these features, use the [`NsReader`].
720///
721/// # Examples
722///
723/// ```
724/// use quick_xml::events::Event;
725/// use quick_xml::reader::Reader;
726///
727/// let xml = r#"<tag1 att1 = "test">
728///                 <tag2><!--Test comment-->Test</tag2>
729///                 <tag2>Test 2</tag2>
730///              </tag1>"#;
731/// let mut reader = Reader::from_str(xml);
732/// reader.config_mut().trim_text(true);
733///
734/// let mut count = 0;
735/// let mut txt = Vec::new();
736/// let mut buf = Vec::new();
737///
738/// // The `Reader` does not implement `Iterator` because it outputs borrowed data (`Cow`s)
739/// loop {
740///     // NOTE: this is the generic case when we don't know about the input BufRead.
741///     // when the input is a &str or a &[u8], we don't actually need to use another
742///     // buffer, we could directly call `reader.read_event()`
743///     match reader.read_event_into(&mut buf) {
744///         Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
745///         // exits the loop when reaching end of file
746///         Ok(Event::Eof) => break,
747///
748///         Ok(Event::Start(e)) => {
749///             match e.name().as_ref() {
750///                 b"tag1" => println!("attributes values: {:?}",
751///                                     e.attributes().map(|a| a.unwrap().value)
752///                                     .collect::<Vec<_>>()),
753///                 b"tag2" => count += 1,
754///                 _ => (),
755///             }
756///         }
757///         Ok(Event::Text(e)) => txt.push(e.decode().unwrap().into_owned()),
758///
759///         // There are several other `Event`s we do not consider here
760///         _ => (),
761///     }
762///     // if we don't keep a borrow elsewhere, we can clear the buffer to keep memory usage low
763///     buf.clear();
764/// }
765/// ```
766///
767/// [`NsReader`]: crate::reader::NsReader
768#[derive(Debug, Clone)]
769pub struct Reader<R> {
770    /// Source of data for parse
771    reader: R,
772    /// Configuration and current parse state
773    state: ReaderState,
774}
775
776/// Builder methods
777impl<R> Reader<R> {
778    /// Creates a `Reader` that reads from a given reader.
779    pub fn from_reader(reader: R) -> Self {
780        Self {
781            reader,
782            state: ReaderState::default(),
783        }
784    }
785
786    /// Returns reference to the parser configuration
787    pub const fn config(&self) -> &Config {
788        &self.state.config
789    }
790
791    /// Returns mutable reference to the parser configuration
792    pub fn config_mut(&mut self) -> &mut Config {
793        &mut self.state.config
794    }
795}
796
797/// Getters
798impl<R> Reader<R> {
799    /// Consumes `Reader` returning the underlying reader
800    ///
801    /// Can be used to compute line and column of a parsing error position
802    ///
803    /// # Examples
804    ///
805    /// ```
806    /// # use pretty_assertions::assert_eq;
807    /// use std::{str, io::Cursor};
808    /// use quick_xml::events::Event;
809    /// use quick_xml::reader::Reader;
810    ///
811    /// let xml = r#"<tag1 att1 = "test">
812    ///                 <tag2><!--Test comment-->Test</tag2>
813    ///                 <tag3>Test 2</tag3>
814    ///              </tag1>"#;
815    /// let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes()));
816    /// let mut buf = Vec::new();
817    ///
818    /// fn into_line_and_column(reader: Reader<Cursor<&[u8]>>) -> (usize, usize) {
819    ///     // We known that size cannot exceed usize::MAX because we created parser from single &[u8]
820    ///     let end_pos = reader.buffer_position() as usize;
821    ///     let mut cursor = reader.into_inner();
822    ///     let s = String::from_utf8(cursor.into_inner()[0..end_pos].to_owned())
823    ///         .expect("can't make a string");
824    ///     let mut line = 1;
825    ///     let mut column = 0;
826    ///     for c in s.chars() {
827    ///         if c == '\n' {
828    ///             line += 1;
829    ///             column = 0;
830    ///         } else {
831    ///             column += 1;
832    ///         }
833    ///     }
834    ///     (line, column)
835    /// }
836    ///
837    /// loop {
838    ///     match reader.read_event_into(&mut buf) {
839    ///         Ok(Event::Start(ref e)) => match e.name().as_ref() {
840    ///             b"tag1" | b"tag2" => (),
841    ///             tag => {
842    ///                 assert_eq!(b"tag3", tag);
843    ///                 assert_eq!((3, 22), into_line_and_column(reader));
844    ///                 break;
845    ///             }
846    ///         },
847    ///         Ok(Event::Eof) => unreachable!(),
848    ///         _ => (),
849    ///     }
850    ///     buf.clear();
851    /// }
852    /// ```
853    pub fn into_inner(self) -> R {
854        self.reader
855    }
856
857    /// Gets a reference to the underlying reader.
858    pub const fn get_ref(&self) -> &R {
859        &self.reader
860    }
861
862    /// Gets a mutable reference to the underlying reader.
863    ///
864    /// Avoid read from this reader because this will not update reader's position
865    /// and will lead to incorrect positions of errors. If you want to read, use
866    /// [`stream()`] instead.
867    ///
868    /// [`stream()`]: Self::stream
869    pub fn get_mut(&mut self) -> &mut R {
870        &mut self.reader
871    }
872
873    /// Gets the byte position in the input data just after the last emitted event
874    /// (i.e. this is position where data of last event ends).
875    ///
876    /// Note, that for text events which is originally ended with whitespace characters
877    /// (` `, `\t`, `\r`, and `\n`) if [`Config::trim_text_end`] is set this is position
878    /// before trim, not the position of the last byte of the [`Event::Text`] content.
879    pub const fn buffer_position(&self) -> u64 {
880        self.state.offset
881    }
882
883    /// Gets the last error byte position in the input data. If there is no errors
884    /// yet, returns `0`.
885    ///
886    /// Unlike `buffer_position` it will point to the place where it is rational
887    /// to report error to the end user. For example, all [`SyntaxError`]s are
888    /// reported when the parser sees EOF inside of some kind of markup. The
889    /// `buffer_position()` will point to the last byte of input which is not
890    /// very useful. `error_position()` will point to the start of corresponding
891    /// markup element (i. e. to the `<` character).
892    ///
893    /// This position is always `<= buffer_position()`.
894    pub const fn error_position(&self) -> u64 {
895        self.state.last_error_offset
896    }
897
898    /// Get the decoder, used to decode bytes, read by this reader, to the strings.
899    ///
900    /// If [`encoding`] feature is enabled, the used encoding may change after
901    /// parsing the XML declaration, otherwise encoding is fixed to UTF-8.
902    ///
903    /// If [`encoding`] feature is enabled and no encoding is specified in declaration,
904    /// defaults to UTF-8.
905    ///
906    /// [`encoding`]: ../index.html#encoding
907    #[inline]
908    pub const fn decoder(&self) -> Decoder {
909        self.state.decoder()
910    }
911
912    /// Get the direct access to the underlying reader, but tracks the amount of
913    /// read data and update [`Reader::buffer_position()`] accordingly.
914    ///
915    /// Note, that this method gives you access to the internal reader and read
916    /// data will not be returned in any subsequent events read by `read_event`
917    /// family of methods.
918    ///
919    /// # Example
920    ///
921    /// This example demonstrates how to read stream raw bytes from an XML document.
922    /// This could be used to implement streaming read of text, or to read raw binary
923    /// bytes embedded in an XML document. (Documents with embedded raw bytes are not
924    /// valid XML, but XML-derived file formats exist where such documents are valid).
925    ///
926    /// ```
927    /// # use pretty_assertions::assert_eq;
928    /// use std::io::{BufRead, Read};
929    /// use quick_xml::events::{BytesEnd, BytesStart, Event};
930    /// use quick_xml::reader::Reader;
931    ///
932    /// let mut reader = Reader::from_str("<tag>binary << data&></tag>");
933    /// //                                 ^    ^               ^     ^
934    /// //                                 0    5              21    27
935    ///
936    /// assert_eq!(
937    ///     (reader.read_event().unwrap(), reader.buffer_position()),
938    ///     // 5 - end of the `<tag>`
939    ///     (Event::Start(BytesStart::new("tag")), 5)
940    /// );
941    ///
942    /// // Reading directly from underlying reader will not update position
943    /// // let mut inner = reader.get_mut();
944    ///
945    /// // Reading from the stream() advances position
946    /// let mut inner = reader.stream();
947    ///
948    /// // Read binary data. We must know its size
949    /// let mut binary = [0u8; 16];
950    /// inner.read_exact(&mut binary).unwrap();
951    /// assert_eq!(&binary, b"binary << data&>");
952    /// // 21 - end of the `binary << data&>`
953    /// assert_eq!(inner.offset(), 21);
954    /// assert_eq!(reader.buffer_position(), 21);
955    ///
956    /// assert_eq!(
957    ///     (reader.read_event().unwrap(), reader.buffer_position()),
958    ///     // 27 - end of the `</tag>`
959    ///     (Event::End(BytesEnd::new("tag")), 27)
960    /// );
961    ///
962    /// assert_eq!(reader.read_event().unwrap(), Event::Eof);
963    /// ```
964    #[inline]
965    pub fn stream(&mut self) -> BinaryStream<'_, R> {
966        BinaryStream {
967            inner: &mut self.reader,
968            offset: &mut self.state.offset,
969        }
970    }
971}
972
973/// Private sync reading methods
974impl<R> Reader<R> {
975    /// Read text into the given buffer, and return an event that borrows from
976    /// either that buffer or from the input itself, based on the type of the
977    /// reader.
978    fn read_event_impl<'i, B>(&mut self, mut buf: B) -> Result<Event<'i>, Error>
979    where
980        R: XmlSource<'i, B>,
981    {
982        read_event_impl!(self, buf, self.reader, read_until_close)
983    }
984
985    /// Private function to read until `>` is found. This function expects that
986    /// it was called just after encounter a `<` symbol.
987    fn read_until_close<'i, B>(&mut self, buf: B) -> Result<Event<'i>, Error>
988    where
989        R: XmlSource<'i, B>,
990    {
991        read_until_close!(self, buf, self.reader)
992    }
993}
994
995////////////////////////////////////////////////////////////////////////////////////////////////////
996
997/// Result of an attempt to read XML textual data from the source.
998#[derive(Debug)]
999enum ReadTextResult<'r, B> {
1000    /// Start of markup (`<` character) was found in the first byte. `<` was consumed.
1001    /// Contains buffer that should be returned back to the next iteration cycle
1002    /// to satisfy borrow checker requirements.
1003    Markup(B),
1004    /// Start of reference (`&` character) was found in the first byte.
1005    /// `&` was not consumed.
1006    /// Contains buffer that should be returned back to the next iteration cycle
1007    /// to satisfy borrow checker requirements.
1008    Ref(B),
1009    /// Contains text block up to start of markup (`<` character). `<` was consumed.
1010    UpToMarkup(&'r [u8]),
1011    /// Contains text block up to start of reference (`&` character).
1012    /// `&` was not consumed.
1013    UpToRef(&'r [u8]),
1014    /// Contains text block up to EOF, neither start of markup (`<` character)
1015    /// or start of reference (`&` character) was found.
1016    UpToEof(&'r [u8]),
1017    /// IO error occurred.
1018    Err(io::Error),
1019}
1020
1021/// Result of an attempt to read general reference from the reader.
1022#[derive(Debug)]
1023enum ReadRefResult<'r> {
1024    /// Contains text block up to end of reference (`;` character).
1025    /// Result includes start `&`, but not end `;`.
1026    Ref(&'r [u8]),
1027    /// Contains text block up to EOF. Neither end of reference (`;`), start of
1028    /// another reference (`&`) or start of markup (`<`) characters was found.
1029    /// Result includes start `&`.
1030    UpToEof(&'r [u8]),
1031    /// Contains text block up to next possible reference (`&` character).
1032    /// Result includes start `&`.
1033    UpToRef(&'r [u8]),
1034    /// Contains text block up to start of markup (`<` character).
1035    /// Result includes start `&`.
1036    UpToMarkup(&'r [u8]),
1037    /// IO error occurred.
1038    Err(io::Error),
1039}
1040
1041/// Represents an input for a reader that can return borrowed data.
1042///
1043/// There are two implementors of this trait: generic one that read data from
1044/// `Self`, copies some part of it into a provided buffer of type `B` and then
1045/// returns data that borrow from that buffer.
1046///
1047/// The other implementor is for `&[u8]` and instead of copying data returns
1048/// borrowed data from `Self` instead. This implementation allows zero-copy
1049/// deserialization.
1050///
1051/// # Parameters
1052/// - `'r`: lifetime of a buffer from which events will borrow
1053/// - `B`: a type of a buffer that can be used to store data read from `Self` and
1054///   from which events can borrow
1055trait XmlSource<'r, B> {
1056    /// Removes UTF-8 BOM if it is present
1057    #[cfg(not(feature = "encoding"))]
1058    fn remove_utf8_bom(&mut self) -> io::Result<()>;
1059
1060    /// Determines encoding from the start of input and removes BOM if it is present
1061    #[cfg(feature = "encoding")]
1062    fn detect_encoding(&mut self) -> io::Result<Option<DetectedEncoding>>;
1063
1064    /// Read input until start of markup (the `<`) is found, start of general entity
1065    /// reference (the `&`) is found or end of input is reached.
1066    ///
1067    /// # Parameters
1068    /// - `buf`: Buffer that could be filled from an input (`Self`) and
1069    ///   from which [events] could borrow their data
1070    /// - `position`: Will be increased by amount of bytes consumed
1071    ///
1072    /// [events]: crate::events::Event
1073    fn read_text(&mut self, buf: B, position: &mut u64) -> ReadTextResult<'r, B>;
1074
1075    /// Read input until end of general reference (the `;`) is found, start of
1076    /// another general reference (the `&`) is found or end of input is reached.
1077    ///
1078    /// This method must be called when current character is `&`.
1079    ///
1080    /// # Parameters
1081    /// - `buf`: Buffer that could be filled from an input (`Self`) and
1082    ///   from which [events] could borrow their data
1083    /// - `position`: Will be increased by amount of bytes consumed
1084    ///
1085    /// [events]: crate::events::Event
1086    fn read_ref(&mut self, buf: B, position: &mut u64) -> ReadRefResult<'r>;
1087
1088    /// Read input until processing instruction is finished.
1089    ///
1090    /// This method expect that start sequence of a parser already was read.
1091    ///
1092    /// Returns a slice of data read up to the end of the thing being parsed.
1093    /// The end of thing and the returned content is determined by the used parser.
1094    ///
1095    /// If input (`Self`) is exhausted and no bytes was read, or if the specified
1096    /// parser could not find the ending sequence of the thing, returns `SyntaxError`.
1097    ///
1098    /// # Parameters
1099    /// - `buf`: Buffer that could be filled from an input (`Self`) and
1100    ///   from which [events] could borrow their data
1101    /// - `position`: Will be increased by amount of bytes consumed
1102    ///
1103    /// A `P` type parameter is used to preserve state between calls to the underlying
1104    /// reader which provides bytes fed into the parser.
1105    ///
1106    /// [events]: crate::events::Event
1107    fn read_with<P>(&mut self, parser: P, buf: B, position: &mut u64) -> Result<&'r [u8], Error>
1108    where
1109        P: Parser;
1110
1111    /// Read input until comment or CDATA is finished.
1112    ///
1113    /// This method expect that `<` already was read.
1114    ///
1115    /// Returns a slice of data read up to end of comment or CDATA (`>`),
1116    /// which does not include into result.
1117    ///
1118    /// If input (`Self`) is exhausted and nothing was read, returns `None`.
1119    ///
1120    /// # Parameters
1121    /// - `buf`: Buffer that could be filled from an input (`Self`) and
1122    ///   from which [events] could borrow their data
1123    /// - `position`: Will be increased by amount of bytes consumed
1124    ///
1125    /// [events]: crate::events::Event
1126    fn read_bang_element(
1127        &mut self,
1128        buf: B,
1129        position: &mut u64,
1130    ) -> Result<(BangType, &'r [u8]), Error>;
1131
1132    /// Consume and discard all the whitespace until the next non-whitespace
1133    /// character or EOF.
1134    ///
1135    /// # Parameters
1136    /// - `position`: Will be increased by amount of bytes consumed
1137    fn skip_whitespace(&mut self, position: &mut u64) -> io::Result<()>;
1138
1139    /// Return one character without consuming it, so that future `read_*` calls
1140    /// will still include it. On EOF, return `None`.
1141    fn peek_one(&mut self) -> io::Result<Option<u8>>;
1142}
1143
1144/// Possible elements started with `<!`
1145#[derive(Debug, PartialEq)]
1146enum BangType {
1147    /// <![CDATA[...]]>
1148    CData,
1149    /// <!--...-->
1150    Comment,
1151    /// <!DOCTYPE...>. Contains balance of '<' (+1) and '>' (-1)
1152    DocType(DtdParser),
1153}
1154impl BangType {
1155    #[inline(always)]
1156    const fn new(byte: Option<u8>) -> Result<Self, SyntaxError> {
1157        Ok(match byte {
1158            Some(b'[') => Self::CData,
1159            Some(b'-') => Self::Comment,
1160            Some(b'D') | Some(b'd') => Self::DocType(DtdParser::BeforeInternalSubset(0)),
1161            _ => return Err(SyntaxError::InvalidBangMarkup),
1162        })
1163    }
1164
1165    /// If element is finished, returns its content up to `>` symbol and
1166    /// an index of this symbol, otherwise returns `None`
1167    ///
1168    /// # Parameters
1169    /// - `buf`: buffer with data consumed on previous iterations
1170    /// - `chunk`: data read on current iteration and not yet consumed from reader
1171    #[inline(always)]
1172    fn feed(&mut self, buf: &[u8], chunk: &[u8]) -> Option<usize> {
1173        match self {
1174            Self::Comment => {
1175                for i in memchr::memchr_iter(b'>', chunk) {
1176                    // Need to read at least 6 symbols (`!---->`) for properly finished comment
1177                    // <!----> - XML comment
1178                    // 0123456 - i
1179                    if buf.len() + i > 5 {
1180                        if chunk[..i].ends_with(b"--") {
1181                            // We cannot strip last `--` from the buffer because we need it in case of
1182                            // check_comments enabled option. XML standard requires that comment
1183                            // will not end with `--->` sequence because this is a special case of
1184                            // `--` in the comment (https://www.w3.org/TR/xml11/#sec-comments)
1185                            return Some(i);
1186                        }
1187                        // End sequence `-|->` was splitted at |
1188                        //        buf --/   \-- chunk
1189                        if i == 1 && buf.ends_with(b"-") && chunk[0] == b'-' {
1190                            return Some(i);
1191                        }
1192                        // End sequence `--|>` was splitted at |
1193                        //         buf --/   \-- chunk
1194                        if i == 0 && buf.ends_with(b"--") {
1195                            return Some(i);
1196                        }
1197                    }
1198                }
1199            }
1200            Self::CData => {
1201                for i in memchr::memchr_iter(b'>', chunk) {
1202                    if chunk[..i].ends_with(b"]]") {
1203                        return Some(i);
1204                    }
1205                    // End sequence `]|]>` was splitted at |
1206                    //        buf --/   \-- chunk
1207                    if i == 1 && buf.ends_with(b"]") && chunk[0] == b']' {
1208                        return Some(i);
1209                    }
1210                    // End sequence `]]|>` was splitted at |
1211                    //         buf --/   \-- chunk
1212                    if i == 0 && buf.ends_with(b"]]") {
1213                        return Some(i);
1214                    }
1215                }
1216            }
1217            Self::DocType(ref mut parser) => return parser.feed(buf, chunk),
1218        }
1219        None
1220    }
1221    #[inline]
1222    const fn to_err(&self) -> SyntaxError {
1223        match self {
1224            Self::CData => SyntaxError::UnclosedCData,
1225            Self::Comment => SyntaxError::UnclosedComment,
1226            Self::DocType(_) => SyntaxError::UnclosedDoctype,
1227        }
1228    }
1229}
1230
1231////////////////////////////////////////////////////////////////////////////////////////////////////
1232
1233#[cfg(test)]
1234mod test {
1235    /// Checks the internal implementation of the various reader methods
1236    macro_rules! check {
1237        (
1238            #[$test:meta]
1239            $read_event:ident,
1240            $read_until_close:ident,
1241            // constructor of the XML source on which internal functions will be called
1242            $source:path,
1243            $skip:literal,
1244            // constructor of the buffer to which read data will stored
1245            $buf:expr
1246            $(, $async:ident, $await:ident)?
1247        ) => {
1248            mod read_bang_element {
1249                use super::*;
1250                use crate::errors::{Error, SyntaxError};
1251                use crate::reader::{BangType, DtdParser};
1252                use crate::utils::Bytes;
1253
1254                /// Checks that reading CDATA content works correctly
1255                mod cdata {
1256                    use super::*;
1257                    use pretty_assertions::assert_eq;
1258
1259                    /// Checks that if input begins like CDATA element, but CDATA start sequence
1260                    /// is not finished, parsing ends with an error
1261                    #[$test]
1262                    #[ignore = "start CDATA sequence fully checked outside of `read_bang_element`"]
1263                    $($async)? fn not_properly_start() {
1264                        let buf = $buf;
1265                        let mut position = 0;
1266                        let mut input = &b"<![]]>other content"[$skip..];
1267                        //                 ^= 0
1268
1269                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1270                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedCData),
1271                            x => panic!(
1272                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1273                                x
1274                            ),
1275                        }
1276                        assert_eq!(position, 1);
1277                    }
1278
1279                    /// Checks that if CDATA startup sequence was matched, but an end sequence
1280                    /// is not found, parsing ends with an error
1281                    #[$test]
1282                    $($async)? fn not_closed() {
1283                        let buf = $buf;
1284                        let mut position = 0;
1285                        let mut input = &b"<![CDATA[other content"[$skip..];
1286                        //                 ^= 0                  ^= 22
1287
1288                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1289                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedCData),
1290                            x => panic!(
1291                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1292                                x
1293                            ),
1294                        }
1295                        assert_eq!(position, 22);
1296                    }
1297
1298                    /// Checks that CDATA element without content inside parsed successfully
1299                    #[$test]
1300                    $($async)? fn empty() {
1301                        let buf = $buf;
1302                        let mut position = 0;
1303                        let mut input = &b"<![CDATA[]]>other content"[$skip..];
1304                        //                ^= 0        ^= 12
1305
1306                        let (ty, bytes) = $source(&mut input)
1307                            .read_bang_element(buf, &mut position)
1308                            $(.$await)?
1309                            .unwrap();
1310                        assert_eq!(
1311                            (ty, Bytes(bytes)),
1312                            (BangType::CData, Bytes(b"<![CDATA[]]>"))
1313                        );
1314                        assert_eq!(position, 12);
1315                    }
1316
1317                    /// Checks that CDATA element with content parsed successfully.
1318                    /// Additionally checks that sequences inside CDATA that may look like
1319                    /// a CDATA end sequence do not interrupt CDATA parsing
1320                    #[$test]
1321                    $($async)? fn with_content() {
1322                        let buf = $buf;
1323                        let mut position = 0;
1324                        let mut input = &b"<![CDATA[cdata]] ]>content]]>other content]]>"[$skip..];
1325                        //                 ^= 0                         ^= 29
1326
1327                        let (ty, bytes) = $source(&mut input)
1328                            .read_bang_element(buf, &mut position)
1329                            $(.$await)?
1330                            .unwrap();
1331                        assert_eq!(
1332                            (ty, Bytes(bytes)),
1333                            (BangType::CData, Bytes(b"<![CDATA[cdata]] ]>content]]>"))
1334                        );
1335                        assert_eq!(position, 29);
1336                    }
1337                }
1338
1339                /// Checks that reading XML comments works correctly. According to the [specification],
1340                /// comment data can contain any sequence except `--`:
1341                ///
1342                /// ```peg
1343                /// comment = '<--' (!'--' char)* '-->';
1344                /// char = [#x1-#x2C]
1345                ///      / [#x2E-#xD7FF]
1346                ///      / [#xE000-#xFFFD]
1347                ///      / [#x10000-#x10FFFF]
1348                /// ```
1349                ///
1350                /// The presence of this limitation, however, is simply a poorly designed specification
1351                /// (maybe for purpose of building of LL(1) XML parser) and quick-xml does not check for
1352                /// presence of these sequences by default. This tests allow such content.
1353                ///
1354                /// [specification]: https://www.w3.org/TR/xml11/#dt-comment
1355                mod comment {
1356                    use super::*;
1357                    use pretty_assertions::assert_eq;
1358
1359                    #[$test]
1360                    #[ignore = "start comment sequence fully checked outside of `read_bang_element`"]
1361                    $($async)? fn not_properly_start() {
1362                        let buf = $buf;
1363                        let mut position = 0;
1364                        let mut input = &b"<!- -->other content"[$skip..];
1365                        //                  ^= 1
1366
1367                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1368                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1369                            x => panic!(
1370                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1371                                x
1372                            ),
1373                        }
1374                        assert_eq!(position, 1);
1375                    }
1376
1377                    #[$test]
1378                    $($async)? fn not_properly_end() {
1379                        let buf = $buf;
1380                        let mut position = 0;
1381                        let mut input = &b"<!->other content"[$skip..];
1382                        //                 ^= 0             ^= 17
1383
1384                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1385                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1386                            x => panic!(
1387                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1388                                x
1389                            ),
1390                        }
1391                        assert_eq!(position, 17);
1392                    }
1393
1394                    #[$test]
1395                    $($async)? fn not_closed1() {
1396                        let buf = $buf;
1397                        let mut position = 0;
1398                        let mut input = &b"<!--other content"[$skip..];
1399                        //                 ^= 0             ^= 17
1400
1401                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1402                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1403                            x => panic!(
1404                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1405                                x
1406                            ),
1407                        }
1408                        assert_eq!(position, 17);
1409                    }
1410
1411                    #[$test]
1412                    $($async)? fn not_closed2() {
1413                        let buf = $buf;
1414                        let mut position = 0;
1415                        let mut input = &b"<!-->other content"[$skip..];
1416                        //                 ^= 0              ^= 18
1417
1418                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1419                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1420                            x => panic!(
1421                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1422                                x
1423                            ),
1424                        }
1425                        assert_eq!(position, 18);
1426                    }
1427
1428                    #[$test]
1429                    $($async)? fn not_closed3() {
1430                        let buf = $buf;
1431                        let mut position = 0;
1432                        let mut input = &b"<!--->other content"[$skip..];
1433                        //                 ^= 0               ^= 19
1434
1435                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1436                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1437                            x => panic!(
1438                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1439                                x
1440                            ),
1441                        }
1442                        assert_eq!(position, 19);
1443                    }
1444
1445                    #[$test]
1446                    $($async)? fn empty() {
1447                        let buf = $buf;
1448                        let mut position = 0;
1449                        let mut input = &b"<!---->other content"[$skip..];
1450                        //                 ^= 0   ^= 7
1451
1452                        let (ty, bytes) = $source(&mut input)
1453                            .read_bang_element(buf, &mut position)
1454                            $(.$await)?
1455                            .unwrap();
1456                        assert_eq!(
1457                            (ty, Bytes(bytes)),
1458                            (BangType::Comment, Bytes(b"<!---->"))
1459                        );
1460                        assert_eq!(position, 7);
1461                    }
1462
1463                    #[$test]
1464                    $($async)? fn with_content() {
1465                        let buf = $buf;
1466                        let mut position = 0;
1467                        let mut input = &b"<!--->comment<--->other content"[$skip..];
1468                        //                 ^= 0              ^= 18
1469
1470                        let (ty, bytes) = $source(&mut input)
1471                            .read_bang_element(buf, &mut position)
1472                            $(.$await)?
1473                            .unwrap();
1474                        assert_eq!(
1475                            (ty, Bytes(bytes)),
1476                            (BangType::Comment, Bytes(b"<!--->comment<--->"))
1477                        );
1478                        assert_eq!(position, 18);
1479                    }
1480                }
1481
1482                /// Checks that reading DOCTYPE definition works correctly
1483                mod doctype {
1484                    use super::*;
1485
1486                    mod uppercase {
1487                        use super::*;
1488                        use pretty_assertions::assert_eq;
1489
1490                        #[$test]
1491                        $($async)? fn not_properly_start() {
1492                            let buf = $buf;
1493                            let mut position = 0;
1494                            let mut input = &b"<!D other content"[$skip..];
1495                            //                 ^= 0             ^= 17
1496
1497                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1498                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1499                                x => panic!(
1500                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1501                                    x
1502                                ),
1503                            }
1504                            assert_eq!(position, 17);
1505                        }
1506
1507                        #[$test]
1508                        $($async)? fn without_space() {
1509                            let buf = $buf;
1510                            let mut position = 0;
1511                            let mut input = &b"<!DOCTYPEother content"[$skip..];
1512                            //                 ^= 0                  ^= 22
1513
1514                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1515                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1516                                x => panic!(
1517                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1518                                    x
1519                                ),
1520                            }
1521                            assert_eq!(position, 22);
1522                        }
1523
1524                        #[$test]
1525                        $($async)? fn empty() {
1526                            let buf = $buf;
1527                            let mut position = 0;
1528                            let mut input = &b"<!DOCTYPE>other content"[$skip..];
1529                            //                 ^= 0      ^= 10
1530
1531                            let (ty, bytes) = $source(&mut input)
1532                                .read_bang_element(buf, &mut position)
1533                                $(.$await)?
1534                                .unwrap();
1535                            assert_eq!(
1536                                (ty, Bytes(bytes)),
1537                                (BangType::DocType(DtdParser::Finished), Bytes(b"<!DOCTYPE>"))
1538                            );
1539                            assert_eq!(position, 10);
1540                        }
1541
1542                        #[$test]
1543                        $($async)? fn not_closed() {
1544                            let buf = $buf;
1545                            let mut position = 0;
1546                            let mut input = &b"<!DOCTYPE other content"[$skip..];
1547                            //                 ^= 0                   ^23
1548
1549                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1550                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1551                                x => panic!(
1552                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1553                                    x
1554                                ),
1555                            }
1556                            assert_eq!(position, 23);
1557                        }
1558                    }
1559
1560                    mod lowercase {
1561                        use super::*;
1562                        use pretty_assertions::assert_eq;
1563
1564                        #[$test]
1565                        $($async)? fn not_properly_start() {
1566                            let buf = $buf;
1567                            let mut position = 0;
1568                            let mut input = &b"<!d other content"[$skip..];
1569                            //                 ^= 0             ^= 17
1570
1571                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1572                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1573                                x => panic!(
1574                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1575                                    x
1576                                ),
1577                            }
1578                            assert_eq!(position, 17);
1579                        }
1580
1581                        #[$test]
1582                        $($async)? fn without_space() {
1583                            let buf = $buf;
1584                            let mut position = 0;
1585                            let mut input = &b"<!doctypeother content"[$skip..];
1586                            //                 ^= 0                  ^= 22
1587
1588                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1589                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1590                                x => panic!(
1591                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1592                                    x
1593                                ),
1594                            }
1595                            assert_eq!(position, 22);
1596                        }
1597
1598                        #[$test]
1599                        $($async)? fn empty() {
1600                            let buf = $buf;
1601                            let mut position = 0;
1602                            let mut input = &b"<!doctype>other content"[$skip..];
1603                            //                 ^= 0      ^= 10
1604
1605                            let (ty, bytes) = $source(&mut input)
1606                                .read_bang_element(buf, &mut position)
1607                                $(.$await)?
1608                                .unwrap();
1609                            assert_eq!(
1610                                (ty, Bytes(bytes)),
1611                                (BangType::DocType(DtdParser::Finished), Bytes(b"<!doctype>"))
1612                            );
1613                            assert_eq!(position, 10);
1614                        }
1615
1616                        #[$test]
1617                        $($async)? fn not_closed() {
1618                            let buf = $buf;
1619                            let mut position = 0;
1620                            let mut input = &b"<!doctype other content"[$skip..];
1621                            //                 ^= 0                   ^= 23
1622
1623                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1624                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1625                                x => panic!(
1626                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1627                                    x
1628                                ),
1629                            }
1630                            assert_eq!(position, 23);
1631                        }
1632                    }
1633                }
1634            }
1635
1636            mod read_text {
1637                use super::*;
1638                use crate::reader::ReadTextResult;
1639                use crate::utils::Bytes;
1640                use pretty_assertions::assert_eq;
1641
1642                #[$test]
1643                $($async)? fn empty() {
1644                    let buf = $buf;
1645                    let mut position = 1;
1646                    let mut input = b"".as_ref();
1647                    //                ^= 1
1648
1649                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1650                        ReadTextResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"")),
1651                        x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x),
1652                    }
1653                    assert_eq!(position, 1);
1654                }
1655
1656                #[$test]
1657                $($async)? fn markup() {
1658                    let buf = $buf;
1659                    let mut position = 1;
1660                    let mut input = b"<".as_ref();
1661                    //                 ^= 1
1662
1663                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1664                        ReadTextResult::Markup(b) => assert_eq!(b, $buf),
1665                        x => panic!("Expected `Markup(_)`, but got `{:?}`", x),
1666                    }
1667                    assert_eq!(position, 1);
1668                }
1669
1670                #[$test]
1671                $($async)? fn ref_() {
1672                    let buf = $buf;
1673                    let mut position = 1;
1674                    let mut input = b"&".as_ref();
1675                    //                ^= 1
1676
1677                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1678                        ReadTextResult::Ref(b) => assert_eq!(b, $buf),
1679                        x => panic!("Expected `Ref(_)`, but got `{:?}`", x),
1680                    }
1681                    assert_eq!(position, 1);
1682                }
1683
1684                #[$test]
1685                $($async)? fn up_to_markup() {
1686                    let buf = $buf;
1687                    let mut position = 1;
1688                    let mut input = b"a<".as_ref();
1689                    //                  ^= 2
1690
1691                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1692                        ReadTextResult::UpToMarkup(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")),
1693                        x => panic!("Expected `UpToMarkup(_)`, but got `{:?}`", x),
1694                    }
1695                    assert_eq!(position, 2);
1696                }
1697
1698                #[$test]
1699                $($async)? fn up_to_ref() {
1700                    let buf = $buf;
1701                    let mut position = 1;
1702                    let mut input = b"a&".as_ref();
1703                    //                 ^= 2
1704
1705                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1706                        ReadTextResult::UpToRef(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")),
1707                        x => panic!("Expected `UpToRef(_)`, but got `{:?}`", x),
1708                    }
1709                    assert_eq!(position, 2);
1710                }
1711
1712                #[$test]
1713                $($async)? fn up_to_eof() {
1714                    let buf = $buf;
1715                    let mut position = 1;
1716                    let mut input = b"a".as_ref();
1717                    //                 ^= 2
1718
1719                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1720                        ReadTextResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")),
1721                        x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x),
1722                    }
1723                    assert_eq!(position, 2);
1724                }
1725            }
1726
1727            mod read_ref {
1728                use super::*;
1729                use crate::reader::ReadRefResult;
1730                use crate::utils::Bytes;
1731                use pretty_assertions::assert_eq;
1732
1733                // Empty input is not allowed for `read_ref` so not tested.
1734                // Borrowed source triggers debug assertion,
1735                // buffered do nothing due to implementation details.
1736
1737                #[$test]
1738                $($async)? fn up_to_eof() {
1739                    let buf = $buf;
1740                    let mut position = 1;
1741                    let mut input = b"&".as_ref();
1742                    //                 ^= 2
1743
1744                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1745                        ReadRefResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
1746                        x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x),
1747                    }
1748                    assert_eq!(position, 2);
1749                }
1750
1751                #[$test]
1752                $($async)? fn up_to_ref() {
1753                    let buf = $buf;
1754                    let mut position = 1;
1755                    let mut input = b"&&".as_ref();
1756                    //                 ^= 2
1757
1758                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1759                        ReadRefResult::UpToRef(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
1760                        x => panic!("Expected `UpToRef(_)`, but got `{:?}`", x),
1761                    }
1762                    assert_eq!(position, 2);
1763                }
1764
1765                #[$test]
1766                $($async)? fn up_to_markup() {
1767                    let buf = $buf;
1768                    let mut position = 1;
1769                    let mut input = b"&<".as_ref();
1770                    //                 ^= 2
1771
1772                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1773                        ReadRefResult::UpToMarkup(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
1774                        x => panic!("Expected `UpToMarkup(_)`, but got `{:?}`", x),
1775                    }
1776                    assert_eq!(position, 2);
1777                }
1778
1779                #[$test]
1780                $($async)? fn empty_ref() {
1781                    let buf = $buf;
1782                    let mut position = 1;
1783                    let mut input = b"&;".as_ref();
1784                    //                  ^= 3
1785
1786                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1787                        ReadRefResult::Ref(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&;")),
1788                        x => panic!("Expected `Ref(_)`, but got `{:?}`", x),
1789                    }
1790                    assert_eq!(position, 3);
1791                }
1792
1793                #[$test]
1794                $($async)? fn normal() {
1795                    let buf = $buf;
1796                    let mut position = 1;
1797                    let mut input = b"&lt;".as_ref();
1798                    //                    ^= 5
1799
1800                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1801                        ReadRefResult::Ref(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&lt;")),
1802                        x => panic!("Expected `Ref(_)`, but got `{:?}`", x),
1803                    }
1804                    assert_eq!(position, 5);
1805                }
1806            }
1807
1808            mod read_element {
1809                use super::*;
1810                use crate::errors::{Error, SyntaxError};
1811                use crate::parser::ElementParser;
1812                use crate::utils::Bytes;
1813                use pretty_assertions::assert_eq;
1814
1815                /// Checks that nothing was read from empty buffer
1816                /// `<` read in peek_one that is called before read_with, that is why it in the input buffer
1817                /// peek_one, however, does not increment position for simplicity of the code
1818                #[$test]
1819                $($async)? fn empty() {
1820                    let buf = $buf;
1821                    let mut position = 0;
1822                    let mut input = &b"<"[$skip..];
1823                    //                  ^= 1
1824
1825                    match $source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? {
1826                        Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedTag),
1827                        x => panic!(
1828                            "Expected `Err(Syntax(_))`, but got `{:?}`",
1829                            x
1830                        ),
1831                    }
1832                    assert_eq!(position, 1);
1833                }
1834
1835                mod open {
1836                    use super::*;
1837                    use pretty_assertions::assert_eq;
1838
1839                    #[$test]
1840                    $($async)? fn empty_tag() {
1841                        let buf = $buf;
1842                        let mut position = 0;
1843                        let mut input = &b"<>"[$skip..];
1844                        //                   ^= 2
1845
1846                        assert_eq!(
1847                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1848                            Bytes(b"<>")
1849                        );
1850                        assert_eq!(position, 2);
1851                    }
1852
1853                    #[$test]
1854                    $($async)? fn normal() {
1855                        let buf = $buf;
1856                        let mut position = 0;
1857                        let mut input = &b"<tag>"[$skip..];
1858                        //                      ^= 5
1859
1860                        assert_eq!(
1861                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1862                            Bytes(b"<tag>")
1863                        );
1864                        assert_eq!(position, 5);
1865                    }
1866
1867                    #[$test]
1868                    $($async)? fn empty_ns_empty_tag() {
1869                        let buf = $buf;
1870                        let mut position = 0;
1871                        let mut input = &b"<:>"[$skip..];
1872                        //                    ^= 3
1873
1874                        assert_eq!(
1875                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1876                            Bytes(b"<:>")
1877                        );
1878                        assert_eq!(position, 3);
1879                    }
1880
1881                    #[$test]
1882                    $($async)? fn empty_ns() {
1883                        let buf = $buf;
1884                        let mut position = 0;
1885                        let mut input = &b"<:tag>"[$skip..];
1886                        //                       ^= 6
1887
1888                        assert_eq!(
1889                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1890                            Bytes(b"<:tag>")
1891                        );
1892                        assert_eq!(position, 6);
1893                    }
1894
1895                    #[$test]
1896                    $($async)? fn with_attributes() {
1897                        let buf = $buf;
1898                        let mut position = 0;
1899                        let mut input = &br#"<tag  attr-1=">"  attr2  =  '>'  3attr>"#[$skip..];
1900                        //                                                          ^= 39
1901
1902                        assert_eq!(
1903                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1904                            Bytes(br#"<tag  attr-1=">"  attr2  =  '>'  3attr>"#)
1905                        );
1906                        assert_eq!(position, 39);
1907                    }
1908                }
1909
1910                mod self_closed {
1911                    use super::*;
1912                    use pretty_assertions::assert_eq;
1913
1914                    #[$test]
1915                    $($async)? fn empty_tag() {
1916                        let buf = $buf;
1917                        let mut position = 0;
1918                        let mut input = &b"</>"[$skip..];
1919                        //                    ^= 3
1920
1921                        assert_eq!(
1922                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1923                            Bytes(b"</>")
1924                        );
1925                        assert_eq!(position, 3);
1926                    }
1927
1928                    #[$test]
1929                    $($async)? fn normal() {
1930                        let buf = $buf;
1931                        let mut position = 0;
1932                        let mut input = &b"<tag/>"[$skip..];
1933                        //                       ^= 6
1934
1935                        assert_eq!(
1936                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1937                            Bytes(b"<tag/>")
1938                        );
1939                        assert_eq!(position, 6);
1940                    }
1941
1942                    #[$test]
1943                    $($async)? fn empty_ns_empty_tag() {
1944                        let buf = $buf;
1945                        let mut position = 0;
1946                        let mut input = &b"<:/>"[$skip..];
1947                        //                     ^= 4
1948
1949                        assert_eq!(
1950                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1951                            Bytes(b"<:/>")
1952                        );
1953                        assert_eq!(position, 4);
1954                    }
1955
1956                    #[$test]
1957                    $($async)? fn empty_ns() {
1958                        let buf = $buf;
1959                        let mut position = 0;
1960                        let mut input = &b"<:tag/>"[$skip..];
1961                        //                        ^= 7
1962
1963                        assert_eq!(
1964                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1965                            Bytes(b"<:tag/>")
1966                        );
1967                        assert_eq!(position, 7);
1968                    }
1969
1970                    #[$test]
1971                    $($async)? fn with_attributes() {
1972                        let buf = $buf;
1973                        let mut position = 0;
1974                        let mut input = &br#"<tag  attr-1="/>"  attr2  =  '/>'  3attr/>"#[$skip..];
1975                        //                                                             ^= 42
1976
1977                        assert_eq!(
1978                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1979                            Bytes(br#"<tag  attr-1="/>"  attr2  =  '/>'  3attr/>"#)
1980                        );
1981                        assert_eq!(position, 42);
1982                    }
1983                }
1984
1985                mod close {
1986                    use super::*;
1987                    use pretty_assertions::assert_eq;
1988
1989                    #[$test]
1990                    $($async)? fn empty_tag() {
1991                        let buf = $buf;
1992                        let mut position = 0;
1993                        let mut input = &b"</ >"[$skip..];
1994                        //                     ^= 4
1995
1996                        assert_eq!(
1997                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1998                            Bytes(b"</ >")
1999                        );
2000                        assert_eq!(position, 4);
2001                    }
2002
2003                    #[$test]
2004                    $($async)? fn normal() {
2005                        let buf = $buf;
2006                        let mut position = 0;
2007                        let mut input = &b"</tag>"[$skip..];
2008                        //                       ^= 6
2009
2010                        assert_eq!(
2011                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2012                            Bytes(b"</tag>")
2013                        );
2014                        assert_eq!(position, 6);
2015                    }
2016
2017                    #[$test]
2018                    $($async)? fn empty_ns_empty_tag() {
2019                        let buf = $buf;
2020                        let mut position = 0;
2021                        let mut input = &b"</:>"[$skip..];
2022                        //                     ^= 4
2023
2024                        assert_eq!(
2025                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2026                            Bytes(b"</:>")
2027                        );
2028                        assert_eq!(position, 4);
2029                    }
2030
2031                    #[$test]
2032                    $($async)? fn empty_ns() {
2033                        let buf = $buf;
2034                        let mut position = 0;
2035                        let mut input = &b"</:tag>"[$skip..];
2036                        //                        ^= 7
2037
2038                        assert_eq!(
2039                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2040                            Bytes(b"</:tag>")
2041                        );
2042                        assert_eq!(position, 7);
2043                    }
2044
2045                    #[$test]
2046                    $($async)? fn with_attributes() {
2047                        let buf = $buf;
2048                        let mut position = 0;
2049                        let mut input = &br#"</tag  attr-1=">"  attr2  =  '>'  3attr>"#[$skip..];
2050                        //                                                           ^= 40
2051
2052                        assert_eq!(
2053                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2054                            Bytes(br#"</tag  attr-1=">"  attr2  =  '>'  3attr>"#)
2055                        );
2056                        assert_eq!(position, 40);
2057                    }
2058                }
2059            }
2060
2061            /// Ensures, that no empty `Text` events are generated
2062            mod $read_event {
2063                use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesPI, BytesStart, BytesText, Event};
2064                use crate::reader::Reader;
2065                use pretty_assertions::assert_eq;
2066
2067                /// When `encoding` feature is enabled, encoding should be detected
2068                /// from BOM (UTF-8) and BOM should be stripped.
2069                ///
2070                /// When `encoding` feature is disabled, UTF-8 is assumed and BOM
2071                /// character should be stripped for consistency
2072                #[$test]
2073                $($async)? fn bom_from_reader() {
2074                    let mut reader = Reader::from_reader("\u{feff}\u{feff}".as_bytes());
2075
2076                    assert_eq!(
2077                        reader.$read_event($buf) $(.$await)? .unwrap(),
2078                        Event::Text(BytesText::from_escaped("\u{feff}"))
2079                    );
2080
2081                    assert_eq!(
2082                        reader.$read_event($buf) $(.$await)? .unwrap(),
2083                        Event::Eof
2084                    );
2085                }
2086
2087                /// When parsing from &str, encoding is fixed (UTF-8), so
2088                /// - when `encoding` feature is disabled, the behavior the
2089                ///   same as in `bom_from_reader` text
2090                /// - when `encoding` feature is enabled, the behavior should
2091                ///   stay consistent, so the first BOM character is stripped
2092                #[$test]
2093                $($async)? fn bom_from_str() {
2094                    let mut reader = Reader::from_str("\u{feff}\u{feff}");
2095
2096                    assert_eq!(
2097                        reader.$read_event($buf) $(.$await)? .unwrap(),
2098                        Event::Text(BytesText::from_escaped("\u{feff}"))
2099                    );
2100
2101                    assert_eq!(
2102                        reader.$read_event($buf) $(.$await)? .unwrap(),
2103                        Event::Eof
2104                    );
2105                }
2106
2107                #[$test]
2108                $($async)? fn declaration() {
2109                    let mut reader = Reader::from_str("<?xml ?>");
2110
2111                    assert_eq!(
2112                        reader.$read_event($buf) $(.$await)? .unwrap(),
2113                        Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3)))
2114                    );
2115                }
2116
2117                #[$test]
2118                $($async)? fn doctype() {
2119                    let mut reader = Reader::from_str("<!DOCTYPE x>");
2120
2121                    assert_eq!(
2122                        reader.$read_event($buf) $(.$await)? .unwrap(),
2123                        Event::DocType(BytesText::from_escaped("x"))
2124                    );
2125                }
2126
2127                #[$test]
2128                $($async)? fn processing_instruction() {
2129                    let mut reader = Reader::from_str("<?xml-stylesheet '? >\" ?>");
2130
2131                    assert_eq!(
2132                        reader.$read_event($buf) $(.$await)? .unwrap(),
2133                        Event::PI(BytesPI::new("xml-stylesheet '? >\" "))
2134                    );
2135                }
2136
2137                /// Lone closing tags are not allowed, so testing it together with start tag
2138                #[$test]
2139                $($async)? fn start_and_end() {
2140                    let mut reader = Reader::from_str("<tag></tag>");
2141
2142                    assert_eq!(
2143                        reader.$read_event($buf) $(.$await)? .unwrap(),
2144                        Event::Start(BytesStart::new("tag"))
2145                    );
2146
2147                    assert_eq!(
2148                        reader.$read_event($buf) $(.$await)? .unwrap(),
2149                        Event::End(BytesEnd::new("tag"))
2150                    );
2151                }
2152
2153                #[$test]
2154                $($async)? fn empty() {
2155                    let mut reader = Reader::from_str("<tag/>");
2156
2157                    assert_eq!(
2158                        reader.$read_event($buf) $(.$await)? .unwrap(),
2159                        Event::Empty(BytesStart::new("tag"))
2160                    );
2161                }
2162
2163                #[$test]
2164                $($async)? fn text() {
2165                    let mut reader = Reader::from_str("text");
2166
2167                    assert_eq!(
2168                        reader.$read_event($buf) $(.$await)? .unwrap(),
2169                        Event::Text(BytesText::from_escaped("text"))
2170                    );
2171                }
2172
2173                #[$test]
2174                $($async)? fn cdata() {
2175                    let mut reader = Reader::from_str("<![CDATA[]]>");
2176
2177                    assert_eq!(
2178                        reader.$read_event($buf) $(.$await)? .unwrap(),
2179                        Event::CData(BytesCData::new(""))
2180                    );
2181                }
2182
2183                #[$test]
2184                $($async)? fn comment() {
2185                    let mut reader = Reader::from_str("<!---->");
2186
2187                    assert_eq!(
2188                        reader.$read_event($buf) $(.$await)? .unwrap(),
2189                        Event::Comment(BytesText::from_escaped(""))
2190                    );
2191                }
2192
2193                #[$test]
2194                $($async)? fn eof() {
2195                    let mut reader = Reader::from_str("");
2196
2197                    assert_eq!(
2198                        reader.$read_event($buf) $(.$await)? .unwrap(),
2199                        Event::Eof
2200                    );
2201                }
2202            }
2203        };
2204    }
2205
2206    // Export macros for the child modules:
2207    // - buffered_reader
2208    // - slice_reader
2209    pub(super) use check;
2210}