quick_xml/reader/
mod.rs

1//! Contains high-level interface for a pull-based XML parser.
2
3#[cfg(feature = "encoding")]
4use encoding_rs::Encoding;
5use std::io;
6use std::ops::Range;
7
8use crate::encoding::Decoder;
9use crate::errors::{Error, IllFormedError, SyntaxError};
10use crate::events::{BytesRef, Event};
11use crate::parser::{DtdParser, ElementParser, Parser, PiParser};
12use crate::reader::state::ReaderState;
13
14/// A struct that holds a parser configuration.
15///
16/// Current parser configuration can be retrieved by calling [`Reader::config()`]
17/// and changed by changing properties of the object returned by a call to
18/// [`Reader::config_mut()`].
19///
20/// [`Reader::config()`]: crate::reader::Reader::config
21/// [`Reader::config_mut()`]: crate::reader::Reader::config_mut
22#[derive(Debug, Clone, PartialEq, Eq)]
23#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
24#[cfg_attr(feature = "serde-types", derive(serde::Deserialize, serde::Serialize))]
25#[non_exhaustive]
26pub struct Config {
27    /// Whether lone ampersand character (without a paired semicolon) should be
28    /// allowed in textual content. Unless enabled, in case of a dangling ampersand,
29    /// the [`Error::IllFormed(UnclosedReference)`] is returned from read methods.
30    ///
31    /// Default: `false`
32    ///
33    /// # Example
34    ///
35    /// ```
36    /// # use quick_xml::events::{BytesRef, BytesText, Event};
37    /// # use quick_xml::reader::Reader;
38    /// # use pretty_assertions::assert_eq;
39    /// let mut reader = Reader::from_str("text with & &amp; & alone");
40    /// reader.config_mut().allow_dangling_amp = true;
41    ///
42    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::new("text with ")));
43    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::from_escaped("& ")));
44    /// assert_eq!(reader.read_event().unwrap(), Event::GeneralRef(BytesRef::new("amp")));
45    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::new(" ")));
46    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::from_escaped("& alone")));
47    /// assert_eq!(reader.read_event().unwrap(), Event::Eof);
48    /// ```
49    ///
50    /// [`Error::IllFormed(UnclosedReference)`]: crate::errors::IllFormedError::UnclosedReference
51    pub allow_dangling_amp: bool,
52
53    /// Whether unmatched closing tag names should be allowed. Unless enabled,
54    /// in case of a dangling end tag, the [`Error::IllFormed(UnmatchedEndTag)`]
55    /// is returned from read methods.
56    ///
57    /// When set to `true`, it won't check if a closing tag has a corresponding
58    /// opening tag at all. For example, `<a></a></b>` will be permitted.
59    ///
60    /// Note that the emitted [`End`] event will not be modified if this is enabled,
61    /// ie. it will contain the data of the unmatched end tag.
62    ///
63    /// Note, that setting this to `true` will lead to additional allocates that
64    /// needed to store tag name for an [`End`] event.
65    ///
66    /// Default: `false`
67    ///
68    /// [`Error::IllFormed(UnmatchedEndTag)`]: crate::errors::IllFormedError::UnmatchedEndTag
69    /// [`End`]: crate::events::Event::End
70    pub allow_unmatched_ends: bool,
71
72    /// Whether comments should be validated. If enabled, in case of invalid comment
73    /// [`Error::IllFormed(DoubleHyphenInComment)`] is returned from read methods.
74    ///
75    /// When set to `true`, every [`Comment`] event will be checked for not
76    /// containing `--`, which [is not allowed] in XML comments. Most of the time
77    /// we don't want comments at all so we don't really care about comment
78    /// correctness, thus the default value is `false` to improve performance.
79    ///
80    /// Default: `false`
81    ///
82    /// [`Error::IllFormed(DoubleHyphenInComment)`]: crate::errors::IllFormedError::DoubleHyphenInComment
83    /// [`Comment`]: crate::events::Event::Comment
84    /// [is not allowed]: https://www.w3.org/TR/xml11/#sec-comments
85    pub check_comments: bool,
86
87    /// Whether mismatched closing tag names should be detected. If enabled, in
88    /// case of mismatch the [`Error::IllFormed(MismatchedEndTag)`] is returned from
89    /// read methods.
90    ///
91    /// Note, that start and end tags [should match literally][spec], they cannot
92    /// have different prefixes even if both prefixes resolve to the same namespace.
93    /// The XML
94    ///
95    /// ```xml
96    /// <outer xmlns="namespace" xmlns:p="namespace">
97    /// </p:outer>
98    /// ```
99    ///
100    /// is not valid, even though semantically the start tag is the same as the
101    /// end tag. The reason is that namespaces are an extension of the original
102    /// XML specification (without namespaces) and it should be backward-compatible.
103    ///
104    /// When set to `false`, it won't check if a closing tag matches the corresponding
105    /// opening tag. For example, `<mytag></different_tag>` will be permitted.
106    ///
107    /// If the XML is known to be sane (already processed, etc.) this saves extra time.
108    ///
109    /// Note that the emitted [`End`] event will not be modified if this is disabled,
110    /// ie. it will contain the data of the mismatched end tag.
111    ///
112    /// Note, that setting this to `true` will lead to additional allocates that
113    /// needed to store tag name for an [`End`] event. However if [`expand_empty_elements`]
114    /// is also set, only one additional allocation will be performed that support
115    /// both these options.
116    ///
117    /// Default: `true`
118    ///
119    /// [`Error::IllFormed(MismatchedEndTag)`]: crate::errors::IllFormedError::MismatchedEndTag
120    /// [spec]: https://www.w3.org/TR/xml11/#dt-etag
121    /// [`End`]: crate::events::Event::End
122    /// [`expand_empty_elements`]: Self::expand_empty_elements
123    pub check_end_names: bool,
124
125    /// Whether empty elements should be split into an `Open` and a `Close` event.
126    ///
127    /// When set to `true`, all [`Empty`] events produced by a self-closing tag
128    /// like `<tag/>` are expanded into a [`Start`] event followed by an [`End`]
129    /// event. When set to `false` (the default), those tags are represented by
130    /// an [`Empty`] event instead.
131    ///
132    /// Note, that setting this to `true` will lead to additional allocates that
133    /// needed to store tag name for an [`End`] event. However if [`check_end_names`]
134    /// is also set, only one additional allocation will be performed that support
135    /// both these options.
136    ///
137    /// Default: `false`
138    ///
139    /// [`Empty`]: crate::events::Event::Empty
140    /// [`Start`]: crate::events::Event::Start
141    /// [`End`]: crate::events::Event::End
142    /// [`check_end_names`]: Self::check_end_names
143    pub expand_empty_elements: bool,
144
145    /// Whether trailing whitespace after the markup name are trimmed in closing
146    /// tags `</a >`.
147    ///
148    /// If `true` the emitted [`End`] event is stripped of trailing whitespace
149    /// after the markup name.
150    ///
151    /// Note that if set to `false` and [`check_end_names`] is `true` the comparison
152    /// of markup names is going to fail erroneously if a closing tag contains
153    /// trailing whitespace.
154    ///
155    /// Default: `true`
156    ///
157    /// [`End`]: crate::events::Event::End
158    /// [`check_end_names`]: Self::check_end_names
159    pub trim_markup_names_in_closing_tags: bool,
160
161    /// Whether whitespace before character data should be removed.
162    ///
163    /// When set to `true`, leading whitespace is trimmed in [`Text`] events.
164    /// If after that the event is empty it will not be pushed.
165    ///
166    /// Default: `false`
167    ///
168    /// <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
169    ///
170    /// WARNING: With this option every text events will be trimmed which is
171    /// incorrect behavior when text events delimited by comments, processing
172    /// instructions or CDATA sections. To correctly trim data manually apply
173    /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`]
174    /// only to necessary events.
175    /// </div>
176    ///
177    /// [`Text`]: crate::events::Event::Text
178    /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start
179    /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end
180    pub trim_text_start: bool,
181
182    /// Whether whitespace after character data should be removed.
183    ///
184    /// When set to `true`, trailing whitespace is trimmed in [`Text`] events.
185    /// If after that the event is empty it will not be pushed.
186    ///
187    /// Default: `false`
188    ///
189    /// <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
190    ///
191    /// WARNING: With this option every text events will be trimmed which is
192    /// incorrect behavior when text events delimited by comments, processing
193    /// instructions or CDATA sections. To correctly trim data manually apply
194    /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`]
195    /// only to necessary events.
196    /// </div>
197    ///
198    /// [`Text`]: crate::events::Event::Text
199    /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start
200    /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end
201    pub trim_text_end: bool,
202}
203
204impl Config {
205    /// Set both [`trim_text_start`] and [`trim_text_end`] to the same value.
206    ///
207    /// <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
208    ///
209    /// WARNING: With this option every text events will be trimmed which is
210    /// incorrect behavior when text events delimited by comments, processing
211    /// instructions or CDATA sections. To correctly trim data manually apply
212    /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`]
213    /// only to necessary events.
214    /// </div>
215    ///
216    /// [`trim_text_start`]: Self::trim_text_start
217    /// [`trim_text_end`]: Self::trim_text_end
218    /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start
219    /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end
220    #[inline]
221    pub fn trim_text(&mut self, trim: bool) {
222        self.trim_text_start = trim;
223        self.trim_text_end = trim;
224    }
225
226    /// Turn on or off all checks for well-formedness. Currently it is that settings:
227    /// - [`check_comments`](Self::check_comments)
228    /// - [`check_end_names`](Self::check_end_names)
229    #[inline]
230    pub fn enable_all_checks(&mut self, enable: bool) {
231        self.check_comments = enable;
232        self.check_end_names = enable;
233    }
234}
235
236impl Default for Config {
237    fn default() -> Self {
238        Self {
239            allow_dangling_amp: false,
240            allow_unmatched_ends: false,
241            check_comments: false,
242            check_end_names: true,
243            expand_empty_elements: false,
244            trim_markup_names_in_closing_tags: true,
245            trim_text_start: false,
246            trim_text_end: false,
247        }
248    }
249}
250
251////////////////////////////////////////////////////////////////////////////////////////////////////
252
253macro_rules! read_event_impl {
254    (
255        $self:ident, $buf:ident,
256        $reader:expr,
257        $read_until_close:ident
258        $(, $await:ident)?
259    ) => {{
260        let event = loop {
261            break match $self.state.state {
262                ParseState::Init => { // Go to InsideText state
263                    // If encoding set explicitly, we not need to detect it. For example,
264                    // explicit UTF-8 set automatically if Reader was created using `from_str`.
265                    // But we still need to remove BOM for consistency with no encoding
266                    // feature enabled path
267                    #[cfg(feature = "encoding")]
268                    if let Some(encoding) = $reader.detect_encoding() $(.$await)? ? {
269                        if $self.state.encoding.can_be_refined() {
270                            $self.state.encoding = crate::reader::EncodingRef::BomDetected(encoding);
271                        }
272                    }
273
274                    // Removes UTF-8 BOM if it is present
275                    #[cfg(not(feature = "encoding"))]
276                    $reader.remove_utf8_bom() $(.$await)? ?;
277
278                    $self.state.state = ParseState::InsideText;
279                    continue;
280                },
281                ParseState::InsideRef => { // Go to InsideText
282                    let start = $self.state.offset;
283                    match $reader.read_ref($buf, &mut $self.state.offset) $(.$await)? {
284                        // Emit reference, go to InsideText state
285                        ReadRefResult::Ref(bytes) => {
286                            $self.state.state = ParseState::InsideText;
287                            // +1 to skip start `&`
288                            // -1 to skip end `;`
289                            Ok(Event::GeneralRef(BytesRef::wrap(&bytes[1..bytes.len() - 1], $self.decoder())))
290                        }
291                        // Go to Done state
292                        ReadRefResult::UpToEof(bytes) if $self.state.config.allow_dangling_amp => {
293                            $self.state.state = ParseState::Done;
294                            Ok(Event::Text($self.state.emit_text(bytes)))
295                        }
296                        ReadRefResult::UpToEof(_) => {
297                            $self.state.state = ParseState::Done;
298                            $self.state.last_error_offset = start;
299                            Err(Error::IllFormed(IllFormedError::UnclosedReference))
300                        }
301                        // Do not change state, stay in InsideRef
302                        ReadRefResult::UpToRef(bytes) if $self.state.config.allow_dangling_amp => {
303                            Ok(Event::Text($self.state.emit_text(bytes)))
304                        }
305                        ReadRefResult::UpToRef(_) => {
306                            $self.state.last_error_offset = start;
307                            Err(Error::IllFormed(IllFormedError::UnclosedReference))
308                        }
309                        // Go to InsideMarkup state
310                        ReadRefResult::UpToMarkup(bytes) if $self.state.config.allow_dangling_amp => {
311                            $self.state.state = ParseState::InsideMarkup;
312                            Ok(Event::Text($self.state.emit_text(bytes)))
313                        }
314                        ReadRefResult::UpToMarkup(_) => {
315                            $self.state.state = ParseState::InsideMarkup;
316                            $self.state.last_error_offset = start;
317                            Err(Error::IllFormed(IllFormedError::UnclosedReference))
318                        }
319                        ReadRefResult::Err(e) => Err(Error::Io(e.into())),
320                    }
321                }
322                ParseState::InsideText => { // Go to InsideMarkup or Done state
323                    if $self.state.config.trim_text_start {
324                        $reader.skip_whitespace(&mut $self.state.offset) $(.$await)? ?;
325                    }
326
327                    match $reader.read_text($buf, &mut $self.state.offset) $(.$await)? {
328                        ReadTextResult::Markup(buf) => {
329                            $self.state.state = ParseState::InsideMarkup;
330                            // Pass `buf` to the next next iteration of parsing loop
331                            $buf = buf;
332                            continue;
333                        }
334                        ReadTextResult::Ref(buf) => {
335                            $self.state.state = ParseState::InsideRef;
336                            // Pass `buf` to the next next iteration of parsing loop
337                            $buf = buf;
338                            continue;
339                        }
340                        ReadTextResult::UpToMarkup(bytes) => {
341                            $self.state.state = ParseState::InsideMarkup;
342                            // FIXME: Can produce an empty event if:
343                            // - event contains only spaces
344                            // - trim_text_start = false
345                            // - trim_text_end = true
346                            Ok(Event::Text($self.state.emit_text(bytes)))
347                        }
348                        ReadTextResult::UpToRef(bytes) => {
349                            $self.state.state = ParseState::InsideRef;
350                            // Return Text event with `bytes` content or Eof if bytes is empty
351                            Ok(Event::Text($self.state.emit_text(bytes)))
352                        }
353                        ReadTextResult::UpToEof(bytes) => {
354                            $self.state.state = ParseState::Done;
355                            // Trim bytes from end if required
356                            let event = $self.state.emit_text(bytes);
357                            if event.is_empty() {
358                                Ok(Event::Eof)
359                            } else {
360                                Ok(Event::Text(event))
361                            }
362                        }
363                        ReadTextResult::Err(e) => Err(Error::Io(e.into())),
364                    }
365                },
366                // Go to InsideText state in next two arms
367                ParseState::InsideMarkup => $self.$read_until_close($buf) $(.$await)?,
368                ParseState::InsideEmpty => Ok(Event::End($self.state.close_expanded_empty())),
369                ParseState::Done => Ok(Event::Eof),
370            };
371        };
372        match event {
373            // #513: In case of ill-formed errors we already consume the wrong data
374            // and change the state. We can continue parsing if we wish
375            Err(Error::IllFormed(_)) => {}
376            Err(_) | Ok(Event::Eof) => $self.state.state = ParseState::Done,
377            _ => {}
378        }
379        event
380    }};
381}
382
383/// Read bytes up to the `>` and skip it. This method is expected to be called
384/// after seeing the `<` symbol and skipping it. Inspects the next (current)
385/// symbol and returns an appropriate [`Event`]:
386///
387/// |Symbol |Event
388/// |-------|-------------------------------------
389/// |`!`    |[`Comment`], [`CData`] or [`DocType`]
390/// |`/`    |[`End`]
391/// |`?`    |[`PI`]
392/// |_other_|[`Start`] or [`Empty`]
393///
394/// Moves parser to the `InsideText` state.
395///
396/// [`Comment`]: Event::Comment
397/// [`CData`]: Event::CData
398/// [`DocType`]: Event::DocType
399/// [`End`]: Event::End
400/// [`PI`]: Event::PI
401/// [`Start`]: Event::Start
402/// [`Empty`]: Event::Empty
403macro_rules! read_until_close {
404    (
405        $self:ident, $buf:ident,
406        $reader:expr
407        $(, $await:ident)?
408    ) => {{
409        $self.state.state = ParseState::InsideText;
410
411        let start = $self.state.offset;
412        match $reader.peek_one() $(.$await)? {
413            // `<!` - comment, CDATA or DOCTYPE declaration
414            Ok(Some(b'!')) => match $reader
415                .read_bang_element($buf, &mut $self.state.offset)
416                $(.$await)?
417            {
418                Ok((bang_type, bytes)) => $self.state.emit_bang(bang_type, bytes),
419                Err(e) => {
420                    // We want to report error at `<`
421                    $self.state.last_error_offset = start;
422                    Err(e)
423                }
424            },
425            // `</` - closing tag
426            // #776: We parse using ElementParser which allows us to have attributes
427            // in close tags. While such tags are not allowed by the specification,
428            // we anyway allow to parse them because:
429            // - we do not check constraints during parsing. This is performed by the
430            //   optional validate step which user should call manually
431            // - if we just look for `>` we will parse `</tag attr=">" >` as end tag
432            //   `</tag attr=">` and text `" >` which probably no one existing parser
433            //   does. This is malformed XML, however it is tolerated by some parsers
434            //   (e.g. the one used by Adobe Flash) and such documents do exist in the wild.
435            Ok(Some(b'/')) => match $reader
436                .read_with(ElementParser::Outside, $buf, &mut $self.state.offset)
437                $(.$await)?
438            {
439                Ok(bytes) => $self.state.emit_end(bytes),
440                Err(e) => {
441                    // We want to report error at `<`
442                    $self.state.last_error_offset = start;
443                    Err(e)
444                }
445            },
446            // `<?` - processing instruction
447            Ok(Some(b'?')) => match $reader
448                .read_with(PiParser(false), $buf, &mut $self.state.offset)
449                $(.$await)?
450            {
451                Ok(bytes) => $self.state.emit_question_mark(bytes),
452                Err(e) => {
453                    // We want to report error at `<`
454                    $self.state.last_error_offset = start;
455                    Err(e)
456                }
457            },
458            // `<...` - opening or self-closed tag
459            Ok(Some(_)) => match $reader
460                .read_with(ElementParser::Outside, $buf, &mut $self.state.offset)
461                $(.$await)?
462            {
463                Ok(bytes) => Ok($self.state.emit_start(bytes)),
464                Err(e) => {
465                    // We want to report error at `<`
466                    $self.state.last_error_offset = start;
467                    Err(e)
468                }
469            },
470            // `<` - syntax error, tag not closed
471            Ok(None) => {
472                // We want to report error at `<`
473                $self.state.last_error_offset = start;
474                Err(Error::Syntax(SyntaxError::UnclosedTag))
475            }
476            Err(e) => Err(Error::Io(e.into())),
477        }
478    }};
479}
480
481/// Generalization of `read_to_end` method for buffered and borrowed readers
482macro_rules! read_to_end {
483    (
484        // $self: &mut Reader
485        $self:expr, $end:expr, $buf:expr,
486        $read_event:ident,
487        // Code block that performs clearing of internal buffer after read of each event
488        $clear:block
489        $(, $await:ident)?
490    ) => {{
491        // Because we take position after the event before the End event,
492        // it is important that this position indicates beginning of the End event.
493        // If between last event and the End event would be only spaces, then we
494        // take position before the spaces, but spaces would be skipped without
495        // generating event if `trim_text_start` is set to `true`. To prevent that
496        // we temporary disable start text trimming.
497        //
498        // We also cannot take position after getting End event, because if
499        // `trim_markup_names_in_closing_tags` is set to `true` (which is the default),
500        // we do not known the real size of the End event that it is occupies in
501        // the source and cannot correct the position after the End event.
502        // So, we in any case should tweak parser configuration.
503        let config = $self.config_mut();
504        let trim = config.trim_text_start;
505        config.trim_text_start = false;
506
507        let start = $self.buffer_position();
508        let mut depth = 0;
509        loop {
510            $clear
511            let end = $self.buffer_position();
512            match $self.$read_event($buf) $(.$await)? {
513                Err(e) => {
514                    $self.config_mut().trim_text_start = trim;
515                    return Err(e);
516                }
517
518                Ok(Event::Start(e)) if e.name() == $end => depth += 1,
519                Ok(Event::End(e)) if e.name() == $end => {
520                    if depth == 0 {
521                        $self.config_mut().trim_text_start = trim;
522                        break start..end;
523                    }
524                    depth -= 1;
525                }
526                Ok(Event::Eof) => {
527                    $self.config_mut().trim_text_start = trim;
528                    return Err(Error::missed_end($end, $self.decoder()));
529                }
530                _ => (),
531            }
532        }
533    }};
534}
535
536#[cfg(feature = "async-tokio")]
537mod async_tokio;
538mod buffered_reader;
539mod ns_reader;
540mod slice_reader;
541mod state;
542
543pub use ns_reader::NsReader;
544
545/// Range of input in bytes, that corresponds to some piece of XML
546pub type Span = Range<u64>;
547
548////////////////////////////////////////////////////////////////////////////////////////////////////
549
550/// Possible reader states. The state transition diagram (`true` and `false` shows
551/// value of [`Config::expand_empty_elements`] option):
552///
553/// ```mermaid
554/// flowchart LR
555///   subgraph _
556///     direction LR
557///
558///     Init         -- "(no event)"\n                                       --> InsideMarkup
559///     InsideMarkup -- Decl, DocType, PI\nComment, CData\nStart, Empty, End --> InsideText
560///     InsideText   -- "#lt;false#gt;\n(no event)"\nText                    --> InsideMarkup
561///     InsideRef    -- "(no event)"\nGeneralRef                             --> InsideText
562///   end
563///   InsideText     -- "#lt;true#gt;"\nStart --> InsideEmpty
564///   InsideEmpty    -- End                   --> InsideText
565///   _ -. Eof .-> Done
566/// ```
567#[derive(Clone, Debug)]
568enum ParseState {
569    /// Initial state in which reader stay after creation. Transition from that
570    /// state could produce a `Text`, `Decl`, `Comment` or `Start` event. The next
571    /// state is always `InsideMarkup`. The reader will never return to this state. The
572    /// event emitted during transition to `InsideMarkup` is a `StartEvent` if the
573    /// first symbol not `<`, otherwise no event are emitted.
574    Init,
575    /// State after seeing the `&` symbol in textual content. Depending on the next symbol all other
576    /// events could be generated.
577    ///
578    /// After generating one event the reader moves to the `ClosedTag` state.
579    InsideRef,
580    /// State after seeing the `<` symbol. Depending on the next symbol all other
581    /// events could be generated.
582    ///
583    /// After generating one event the reader moves to the `InsideText` state.
584    InsideMarkup,
585    /// State in which reader searches the `<` symbol of a markup. All bytes before
586    /// that symbol will be returned in the [`Event::Text`] event. After that
587    /// the reader moves to the `InsideMarkup` state.
588    InsideText,
589    /// This state is used only if option [`expand_empty_elements`] is set to `true`.
590    /// Reader enters to this state when it is in a `InsideText` state and emits an
591    /// [`Event::Start`] event. The next event emitted will be an [`Event::End`],
592    /// after which reader returned to the `InsideText` state.
593    ///
594    /// [`expand_empty_elements`]: Config::expand_empty_elements
595    InsideEmpty,
596    /// Reader enters this state when `Eof` event generated or an error occurred.
597    /// This is the last state, the reader stay in it forever.
598    Done,
599}
600
601/// A reference to an encoding together with information about how it was retrieved.
602///
603/// The state transition diagram:
604///
605/// ```mermaid
606/// flowchart LR
607///   Implicit    -- from_str       --> Explicit
608///   Implicit    -- BOM            --> BomDetected
609///   Implicit    -- "encoding=..." --> XmlDetected
610///   BomDetected -- "encoding=..." --> XmlDetected
611/// ```
612#[cfg(feature = "encoding")]
613#[derive(Clone, Copy, Debug)]
614enum EncodingRef {
615    /// Encoding was implicitly assumed to have a specified value. It can be refined
616    /// using BOM or by the XML declaration event (`<?xml encoding=... ?>`)
617    Implicit(&'static Encoding),
618    /// Encoding was explicitly set to the desired value. It cannot be changed
619    /// nor by BOM, nor by parsing XML declaration (`<?xml encoding=... ?>`)
620    Explicit(&'static Encoding),
621    /// Encoding was detected from a byte order mark (BOM) or by the first bytes
622    /// of the content. It can be refined by the XML declaration event (`<?xml encoding=... ?>`)
623    BomDetected(&'static Encoding),
624    /// Encoding was detected using XML declaration event (`<?xml encoding=... ?>`).
625    /// It can no longer change
626    XmlDetected(&'static Encoding),
627}
628#[cfg(feature = "encoding")]
629impl EncodingRef {
630    #[inline]
631    const fn encoding(&self) -> &'static Encoding {
632        match self {
633            Self::Implicit(e) => e,
634            Self::Explicit(e) => e,
635            Self::BomDetected(e) => e,
636            Self::XmlDetected(e) => e,
637        }
638    }
639    #[inline]
640    const fn can_be_refined(&self) -> bool {
641        match self {
642            Self::Implicit(_) | Self::BomDetected(_) => true,
643            Self::Explicit(_) | Self::XmlDetected(_) => false,
644        }
645    }
646}
647
648////////////////////////////////////////////////////////////////////////////////////////////////////
649
650/// A direct stream to the underlying [`Reader`]s reader which updates
651/// [`Reader::buffer_position()`] when read from it.
652#[derive(Debug)]
653#[must_use = "streams do nothing unless read or polled"]
654pub struct BinaryStream<'r, R> {
655    inner: &'r mut R,
656    offset: &'r mut u64,
657}
658
659impl<'r, R> BinaryStream<'r, R> {
660    /// Returns current position in bytes in the original source.
661    #[inline]
662    pub const fn offset(&self) -> u64 {
663        *self.offset
664    }
665
666    /// Gets a reference to the underlying reader.
667    #[inline]
668    pub const fn get_ref(&self) -> &R {
669        self.inner
670    }
671
672    /// Gets a mutable reference to the underlying reader.
673    ///
674    /// Avoid read from this reader because this will not update reader's position
675    /// and will lead to incorrect positions of errors. Read from this stream instead.
676    #[inline]
677    pub fn get_mut(&mut self) -> &mut R {
678        self.inner
679    }
680}
681
682impl<'r, R> io::Read for BinaryStream<'r, R>
683where
684    R: io::Read,
685{
686    #[inline]
687    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
688        let amt = self.inner.read(buf)?;
689        *self.offset += amt as u64;
690        Ok(amt)
691    }
692}
693
694impl<'r, R> io::BufRead for BinaryStream<'r, R>
695where
696    R: io::BufRead,
697{
698    #[inline]
699    fn fill_buf(&mut self) -> io::Result<&[u8]> {
700        self.inner.fill_buf()
701    }
702
703    #[inline]
704    fn consume(&mut self, amt: usize) {
705        self.inner.consume(amt);
706        *self.offset += amt as u64;
707    }
708}
709
710////////////////////////////////////////////////////////////////////////////////////////////////////
711
712/// A low level encoding-agnostic XML event reader.
713///
714/// Consumes bytes and streams XML [`Event`]s.
715///
716/// This reader does not manage namespace declarations and not able to resolve
717/// prefixes. If you want these features, use the [`NsReader`].
718///
719/// # Examples
720///
721/// ```
722/// use quick_xml::events::Event;
723/// use quick_xml::reader::Reader;
724///
725/// let xml = r#"<tag1 att1 = "test">
726///                 <tag2><!--Test comment-->Test</tag2>
727///                 <tag2>Test 2</tag2>
728///              </tag1>"#;
729/// let mut reader = Reader::from_str(xml);
730/// reader.config_mut().trim_text(true);
731///
732/// let mut count = 0;
733/// let mut txt = Vec::new();
734/// let mut buf = Vec::new();
735///
736/// // The `Reader` does not implement `Iterator` because it outputs borrowed data (`Cow`s)
737/// loop {
738///     // NOTE: this is the generic case when we don't know about the input BufRead.
739///     // when the input is a &str or a &[u8], we don't actually need to use another
740///     // buffer, we could directly call `reader.read_event()`
741///     match reader.read_event_into(&mut buf) {
742///         Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
743///         // exits the loop when reaching end of file
744///         Ok(Event::Eof) => break,
745///
746///         Ok(Event::Start(e)) => {
747///             match e.name().as_ref() {
748///                 b"tag1" => println!("attributes values: {:?}",
749///                                     e.attributes().map(|a| a.unwrap().value)
750///                                     .collect::<Vec<_>>()),
751///                 b"tag2" => count += 1,
752///                 _ => (),
753///             }
754///         }
755///         Ok(Event::Text(e)) => txt.push(e.decode().unwrap().into_owned()),
756///
757///         // There are several other `Event`s we do not consider here
758///         _ => (),
759///     }
760///     // if we don't keep a borrow elsewhere, we can clear the buffer to keep memory usage low
761///     buf.clear();
762/// }
763/// ```
764///
765/// [`NsReader`]: crate::reader::NsReader
766#[derive(Debug, Clone)]
767pub struct Reader<R> {
768    /// Source of data for parse
769    reader: R,
770    /// Configuration and current parse state
771    state: ReaderState,
772}
773
774/// Builder methods
775impl<R> Reader<R> {
776    /// Creates a `Reader` that reads from a given reader.
777    pub fn from_reader(reader: R) -> Self {
778        Self {
779            reader,
780            state: ReaderState::default(),
781        }
782    }
783
784    /// Returns reference to the parser configuration
785    pub const fn config(&self) -> &Config {
786        &self.state.config
787    }
788
789    /// Returns mutable reference to the parser configuration
790    pub fn config_mut(&mut self) -> &mut Config {
791        &mut self.state.config
792    }
793}
794
795/// Getters
796impl<R> Reader<R> {
797    /// Consumes `Reader` returning the underlying reader
798    ///
799    /// Can be used to compute line and column of a parsing error position
800    ///
801    /// # Examples
802    ///
803    /// ```
804    /// # use pretty_assertions::assert_eq;
805    /// use std::{str, io::Cursor};
806    /// use quick_xml::events::Event;
807    /// use quick_xml::reader::Reader;
808    ///
809    /// let xml = r#"<tag1 att1 = "test">
810    ///                 <tag2><!--Test comment-->Test</tag2>
811    ///                 <tag3>Test 2</tag3>
812    ///              </tag1>"#;
813    /// let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes()));
814    /// let mut buf = Vec::new();
815    ///
816    /// fn into_line_and_column(reader: Reader<Cursor<&[u8]>>) -> (usize, usize) {
817    ///     // We known that size cannot exceed usize::MAX because we created parser from single &[u8]
818    ///     let end_pos = reader.buffer_position() as usize;
819    ///     let mut cursor = reader.into_inner();
820    ///     let s = String::from_utf8(cursor.into_inner()[0..end_pos].to_owned())
821    ///         .expect("can't make a string");
822    ///     let mut line = 1;
823    ///     let mut column = 0;
824    ///     for c in s.chars() {
825    ///         if c == '\n' {
826    ///             line += 1;
827    ///             column = 0;
828    ///         } else {
829    ///             column += 1;
830    ///         }
831    ///     }
832    ///     (line, column)
833    /// }
834    ///
835    /// loop {
836    ///     match reader.read_event_into(&mut buf) {
837    ///         Ok(Event::Start(ref e)) => match e.name().as_ref() {
838    ///             b"tag1" | b"tag2" => (),
839    ///             tag => {
840    ///                 assert_eq!(b"tag3", tag);
841    ///                 assert_eq!((3, 22), into_line_and_column(reader));
842    ///                 break;
843    ///             }
844    ///         },
845    ///         Ok(Event::Eof) => unreachable!(),
846    ///         _ => (),
847    ///     }
848    ///     buf.clear();
849    /// }
850    /// ```
851    pub fn into_inner(self) -> R {
852        self.reader
853    }
854
855    /// Gets a reference to the underlying reader.
856    pub const fn get_ref(&self) -> &R {
857        &self.reader
858    }
859
860    /// Gets a mutable reference to the underlying reader.
861    ///
862    /// Avoid read from this reader because this will not update reader's position
863    /// and will lead to incorrect positions of errors. If you want to read, use
864    /// [`stream()`] instead.
865    ///
866    /// [`stream()`]: Self::stream
867    pub fn get_mut(&mut self) -> &mut R {
868        &mut self.reader
869    }
870
871    /// Gets the byte position in the input data just after the last emitted event
872    /// (i.e. this is position where data of last event ends).
873    ///
874    /// Note, that for text events which is originally ended with whitespace characters
875    /// (` `, `\t`, `\r`, and `\n`) if [`Config::trim_text_end`] is set this is position
876    /// before trim, not the position of the last byte of the [`Event::Text`] content.
877    pub const fn buffer_position(&self) -> u64 {
878        self.state.offset
879    }
880
881    /// Gets the last error byte position in the input data. If there is no errors
882    /// yet, returns `0`.
883    ///
884    /// Unlike `buffer_position` it will point to the place where it is rational
885    /// to report error to the end user. For example, all [`SyntaxError`]s are
886    /// reported when the parser sees EOF inside of some kind of markup. The
887    /// `buffer_position()` will point to the last byte of input which is not
888    /// very useful. `error_position()` will point to the start of corresponding
889    /// markup element (i. e. to the `<` character).
890    ///
891    /// This position is always `<= buffer_position()`.
892    pub const fn error_position(&self) -> u64 {
893        self.state.last_error_offset
894    }
895
896    /// Get the decoder, used to decode bytes, read by this reader, to the strings.
897    ///
898    /// If [`encoding`] feature is enabled, the used encoding may change after
899    /// parsing the XML declaration, otherwise encoding is fixed to UTF-8.
900    ///
901    /// If [`encoding`] feature is enabled and no encoding is specified in declaration,
902    /// defaults to UTF-8.
903    ///
904    /// [`encoding`]: ../index.html#encoding
905    #[inline]
906    pub const fn decoder(&self) -> Decoder {
907        self.state.decoder()
908    }
909
910    /// Get the direct access to the underlying reader, but tracks the amount of
911    /// read data and update [`Reader::buffer_position()`] accordingly.
912    ///
913    /// Note, that this method gives you access to the internal reader and read
914    /// data will not be returned in any subsequent events read by `read_event`
915    /// family of methods.
916    ///
917    /// # Example
918    ///
919    /// This example demonstrates how to read stream raw bytes from an XML document.
920    /// This could be used to implement streaming read of text, or to read raw binary
921    /// bytes embedded in an XML document. (Documents with embedded raw bytes are not
922    /// valid XML, but XML-derived file formats exist where such documents are valid).
923    ///
924    /// ```
925    /// # use pretty_assertions::assert_eq;
926    /// use std::io::{BufRead, Read};
927    /// use quick_xml::events::{BytesEnd, BytesStart, Event};
928    /// use quick_xml::reader::Reader;
929    ///
930    /// let mut reader = Reader::from_str("<tag>binary << data&></tag>");
931    /// //                                 ^    ^               ^     ^
932    /// //                                 0    5              21    27
933    ///
934    /// assert_eq!(
935    ///     (reader.read_event().unwrap(), reader.buffer_position()),
936    ///     // 5 - end of the `<tag>`
937    ///     (Event::Start(BytesStart::new("tag")), 5)
938    /// );
939    ///
940    /// // Reading directly from underlying reader will not update position
941    /// // let mut inner = reader.get_mut();
942    ///
943    /// // Reading from the stream() advances position
944    /// let mut inner = reader.stream();
945    ///
946    /// // Read binary data. We must know its size
947    /// let mut binary = [0u8; 16];
948    /// inner.read_exact(&mut binary).unwrap();
949    /// assert_eq!(&binary, b"binary << data&>");
950    /// // 21 - end of the `binary << data&>`
951    /// assert_eq!(inner.offset(), 21);
952    /// assert_eq!(reader.buffer_position(), 21);
953    ///
954    /// assert_eq!(
955    ///     (reader.read_event().unwrap(), reader.buffer_position()),
956    ///     // 27 - end of the `</tag>`
957    ///     (Event::End(BytesEnd::new("tag")), 27)
958    /// );
959    ///
960    /// assert_eq!(reader.read_event().unwrap(), Event::Eof);
961    /// ```
962    #[inline]
963    pub fn stream(&mut self) -> BinaryStream<'_, R> {
964        BinaryStream {
965            inner: &mut self.reader,
966            offset: &mut self.state.offset,
967        }
968    }
969}
970
971/// Private sync reading methods
972impl<R> Reader<R> {
973    /// Read text into the given buffer, and return an event that borrows from
974    /// either that buffer or from the input itself, based on the type of the
975    /// reader.
976    fn read_event_impl<'i, B>(&mut self, mut buf: B) -> Result<Event<'i>, Error>
977    where
978        R: XmlSource<'i, B>,
979    {
980        read_event_impl!(self, buf, self.reader, read_until_close)
981    }
982
983    /// Private function to read until `>` is found. This function expects that
984    /// it was called just after encounter a `<` symbol.
985    fn read_until_close<'i, B>(&mut self, buf: B) -> Result<Event<'i>, Error>
986    where
987        R: XmlSource<'i, B>,
988    {
989        read_until_close!(self, buf, self.reader)
990    }
991}
992
993////////////////////////////////////////////////////////////////////////////////////////////////////
994
995/// Result of an attempt to read XML textual data from the source.
996#[derive(Debug)]
997enum ReadTextResult<'r, B> {
998    /// Start of markup (`<` character) was found in the first byte. `<` was consumed.
999    /// Contains buffer that should be returned back to the next iteration cycle
1000    /// to satisfy borrow checker requirements.
1001    Markup(B),
1002    /// Start of reference (`&` character) was found in the first byte.
1003    /// `&` was not consumed.
1004    /// Contains buffer that should be returned back to the next iteration cycle
1005    /// to satisfy borrow checker requirements.
1006    Ref(B),
1007    /// Contains text block up to start of markup (`<` character). `<` was consumed.
1008    UpToMarkup(&'r [u8]),
1009    /// Contains text block up to start of reference (`&` character).
1010    /// `&` was not consumed.
1011    UpToRef(&'r [u8]),
1012    /// Contains text block up to EOF, neither start of markup (`<` character)
1013    /// or start of reference (`&` character) was found.
1014    UpToEof(&'r [u8]),
1015    /// IO error occurred.
1016    Err(io::Error),
1017}
1018
1019/// Result of an attempt to read general reference from the reader.
1020#[derive(Debug)]
1021enum ReadRefResult<'r> {
1022    /// Contains text block up to end of reference (`;` character).
1023    /// Result includes start `&`, but not end `;`.
1024    Ref(&'r [u8]),
1025    /// Contains text block up to EOF. Neither end of reference (`;`), start of
1026    /// another reference (`&`) or start of markup (`<`) characters was found.
1027    /// Result includes start `&`.
1028    UpToEof(&'r [u8]),
1029    /// Contains text block up to next possible reference (`&` character).
1030    /// Result includes start `&`.
1031    UpToRef(&'r [u8]),
1032    /// Contains text block up to start of markup (`<` character).
1033    /// Result includes start `&`.
1034    UpToMarkup(&'r [u8]),
1035    /// IO error occurred.
1036    Err(io::Error),
1037}
1038
1039/// Represents an input for a reader that can return borrowed data.
1040///
1041/// There are two implementors of this trait: generic one that read data from
1042/// `Self`, copies some part of it into a provided buffer of type `B` and then
1043/// returns data that borrow from that buffer.
1044///
1045/// The other implementor is for `&[u8]` and instead of copying data returns
1046/// borrowed data from `Self` instead. This implementation allows zero-copy
1047/// deserialization.
1048///
1049/// # Parameters
1050/// - `'r`: lifetime of a buffer from which events will borrow
1051/// - `B`: a type of a buffer that can be used to store data read from `Self` and
1052///   from which events can borrow
1053trait XmlSource<'r, B> {
1054    /// Removes UTF-8 BOM if it is present
1055    #[cfg(not(feature = "encoding"))]
1056    fn remove_utf8_bom(&mut self) -> io::Result<()>;
1057
1058    /// Determines encoding from the start of input and removes BOM if it is present
1059    #[cfg(feature = "encoding")]
1060    fn detect_encoding(&mut self) -> io::Result<Option<&'static Encoding>>;
1061
1062    /// Read input until start of markup (the `<`) is found, start of general entity
1063    /// reference (the `&`) is found or end of input is reached.
1064    ///
1065    /// # Parameters
1066    /// - `buf`: Buffer that could be filled from an input (`Self`) and
1067    ///   from which [events] could borrow their data
1068    /// - `position`: Will be increased by amount of bytes consumed
1069    ///
1070    /// [events]: crate::events::Event
1071    fn read_text(&mut self, buf: B, position: &mut u64) -> ReadTextResult<'r, B>;
1072
1073    /// Read input until end of general reference (the `;`) is found, start of
1074    /// another general reference (the `&`) is found or end of input is reached.
1075    ///
1076    /// This method must be called when current character is `&`.
1077    ///
1078    /// # Parameters
1079    /// - `buf`: Buffer that could be filled from an input (`Self`) and
1080    ///   from which [events] could borrow their data
1081    /// - `position`: Will be increased by amount of bytes consumed
1082    ///
1083    /// [events]: crate::events::Event
1084    fn read_ref(&mut self, buf: B, position: &mut u64) -> ReadRefResult<'r>;
1085
1086    /// Read input until processing instruction is finished.
1087    ///
1088    /// This method expect that start sequence of a parser already was read.
1089    ///
1090    /// Returns a slice of data read up to the end of the thing being parsed.
1091    /// The end of thing and the returned content is determined by the used parser.
1092    ///
1093    /// If input (`Self`) is exhausted and no bytes was read, or if the specified
1094    /// parser could not find the ending sequence of the thing, returns `SyntaxError`.
1095    ///
1096    /// # Parameters
1097    /// - `buf`: Buffer that could be filled from an input (`Self`) and
1098    ///   from which [events] could borrow their data
1099    /// - `position`: Will be increased by amount of bytes consumed
1100    ///
1101    /// A `P` type parameter is used to preserve state between calls to the underlying
1102    /// reader which provides bytes fed into the parser.
1103    ///
1104    /// [events]: crate::events::Event
1105    fn read_with<P>(&mut self, parser: P, buf: B, position: &mut u64) -> Result<&'r [u8], Error>
1106    where
1107        P: Parser;
1108
1109    /// Read input until comment or CDATA is finished.
1110    ///
1111    /// This method expect that `<` already was read.
1112    ///
1113    /// Returns a slice of data read up to end of comment or CDATA (`>`),
1114    /// which does not include into result.
1115    ///
1116    /// If input (`Self`) is exhausted and nothing was read, returns `None`.
1117    ///
1118    /// # Parameters
1119    /// - `buf`: Buffer that could be filled from an input (`Self`) and
1120    ///   from which [events] could borrow their data
1121    /// - `position`: Will be increased by amount of bytes consumed
1122    ///
1123    /// [events]: crate::events::Event
1124    fn read_bang_element(
1125        &mut self,
1126        buf: B,
1127        position: &mut u64,
1128    ) -> Result<(BangType, &'r [u8]), Error>;
1129
1130    /// Consume and discard all the whitespace until the next non-whitespace
1131    /// character or EOF.
1132    ///
1133    /// # Parameters
1134    /// - `position`: Will be increased by amount of bytes consumed
1135    fn skip_whitespace(&mut self, position: &mut u64) -> io::Result<()>;
1136
1137    /// Return one character without consuming it, so that future `read_*` calls
1138    /// will still include it. On EOF, return `None`.
1139    fn peek_one(&mut self) -> io::Result<Option<u8>>;
1140}
1141
1142/// Possible elements started with `<!`
1143#[derive(Debug, PartialEq)]
1144enum BangType {
1145    /// <![CDATA[...]]>
1146    CData,
1147    /// <!--...-->
1148    Comment,
1149    /// <!DOCTYPE...>. Contains balance of '<' (+1) and '>' (-1)
1150    DocType(DtdParser),
1151}
1152impl BangType {
1153    #[inline(always)]
1154    const fn new(byte: Option<u8>) -> Result<Self, SyntaxError> {
1155        Ok(match byte {
1156            Some(b'[') => Self::CData,
1157            Some(b'-') => Self::Comment,
1158            Some(b'D') | Some(b'd') => Self::DocType(DtdParser::BeforeInternalSubset(0)),
1159            _ => return Err(SyntaxError::InvalidBangMarkup),
1160        })
1161    }
1162
1163    /// If element is finished, returns its content up to `>` symbol and
1164    /// an index of this symbol, otherwise returns `None`
1165    ///
1166    /// # Parameters
1167    /// - `buf`: buffer with data consumed on previous iterations
1168    /// - `chunk`: data read on current iteration and not yet consumed from reader
1169    #[inline(always)]
1170    fn feed<'b>(&mut self, buf: &[u8], chunk: &'b [u8]) -> Option<usize> {
1171        match self {
1172            Self::Comment => {
1173                for i in memchr::memchr_iter(b'>', chunk) {
1174                    // Need to read at least 6 symbols (`!---->`) for properly finished comment
1175                    // <!----> - XML comment
1176                    // 0123456 - i
1177                    if buf.len() + i > 5 {
1178                        if chunk[..i].ends_with(b"--") {
1179                            // We cannot strip last `--` from the buffer because we need it in case of
1180                            // check_comments enabled option. XML standard requires that comment
1181                            // will not end with `--->` sequence because this is a special case of
1182                            // `--` in the comment (https://www.w3.org/TR/xml11/#sec-comments)
1183                            return Some(i);
1184                        }
1185                        // End sequence `-|->` was splitted at |
1186                        //        buf --/   \-- chunk
1187                        if i == 1 && buf.ends_with(b"-") && chunk[0] == b'-' {
1188                            return Some(i);
1189                        }
1190                        // End sequence `--|>` was splitted at |
1191                        //         buf --/   \-- chunk
1192                        if i == 0 && buf.ends_with(b"--") {
1193                            return Some(i);
1194                        }
1195                    }
1196                }
1197            }
1198            Self::CData => {
1199                for i in memchr::memchr_iter(b'>', chunk) {
1200                    if chunk[..i].ends_with(b"]]") {
1201                        return Some(i);
1202                    }
1203                    // End sequence `]|]>` was splitted at |
1204                    //        buf --/   \-- chunk
1205                    if i == 1 && buf.ends_with(b"]") && chunk[0] == b']' {
1206                        return Some(i);
1207                    }
1208                    // End sequence `]]|>` was splitted at |
1209                    //         buf --/   \-- chunk
1210                    if i == 0 && buf.ends_with(b"]]") {
1211                        return Some(i);
1212                    }
1213                }
1214            }
1215            Self::DocType(ref mut parser) => return parser.feed(buf, chunk),
1216        }
1217        None
1218    }
1219    #[inline]
1220    const fn to_err(&self) -> SyntaxError {
1221        match self {
1222            Self::CData => SyntaxError::UnclosedCData,
1223            Self::Comment => SyntaxError::UnclosedComment,
1224            Self::DocType(_) => SyntaxError::UnclosedDoctype,
1225        }
1226    }
1227}
1228
1229////////////////////////////////////////////////////////////////////////////////////////////////////
1230
1231#[cfg(test)]
1232mod test {
1233    /// Checks the internal implementation of the various reader methods
1234    macro_rules! check {
1235        (
1236            #[$test:meta]
1237            $read_event:ident,
1238            $read_until_close:ident,
1239            // constructor of the XML source on which internal functions will be called
1240            $source:path,
1241            $skip:literal,
1242            // constructor of the buffer to which read data will stored
1243            $buf:expr
1244            $(, $async:ident, $await:ident)?
1245        ) => {
1246            mod read_bang_element {
1247                use super::*;
1248                use crate::errors::{Error, SyntaxError};
1249                use crate::reader::{BangType, DtdParser};
1250                use crate::utils::Bytes;
1251
1252                /// Checks that reading CDATA content works correctly
1253                mod cdata {
1254                    use super::*;
1255                    use pretty_assertions::assert_eq;
1256
1257                    /// Checks that if input begins like CDATA element, but CDATA start sequence
1258                    /// is not finished, parsing ends with an error
1259                    #[$test]
1260                    #[ignore = "start CDATA sequence fully checked outside of `read_bang_element`"]
1261                    $($async)? fn not_properly_start() {
1262                        let buf = $buf;
1263                        let mut position = 0;
1264                        let mut input = &b"<![]]>other content"[$skip..];
1265                        //                 ^= 0
1266
1267                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1268                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedCData),
1269                            x => panic!(
1270                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1271                                x
1272                            ),
1273                        }
1274                        assert_eq!(position, 1);
1275                    }
1276
1277                    /// Checks that if CDATA startup sequence was matched, but an end sequence
1278                    /// is not found, parsing ends with an error
1279                    #[$test]
1280                    $($async)? fn not_closed() {
1281                        let buf = $buf;
1282                        let mut position = 0;
1283                        let mut input = &b"<![CDATA[other content"[$skip..];
1284                        //                 ^= 0                  ^= 22
1285
1286                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1287                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedCData),
1288                            x => panic!(
1289                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1290                                x
1291                            ),
1292                        }
1293                        assert_eq!(position, 22);
1294                    }
1295
1296                    /// Checks that CDATA element without content inside parsed successfully
1297                    #[$test]
1298                    $($async)? fn empty() {
1299                        let buf = $buf;
1300                        let mut position = 0;
1301                        let mut input = &b"<![CDATA[]]>other content"[$skip..];
1302                        //                ^= 0        ^= 12
1303
1304                        let (ty, bytes) = $source(&mut input)
1305                            .read_bang_element(buf, &mut position)
1306                            $(.$await)?
1307                            .unwrap();
1308                        assert_eq!(
1309                            (ty, Bytes(bytes)),
1310                            (BangType::CData, Bytes(b"<![CDATA[]]>"))
1311                        );
1312                        assert_eq!(position, 12);
1313                    }
1314
1315                    /// Checks that CDATA element with content parsed successfully.
1316                    /// Additionally checks that sequences inside CDATA that may look like
1317                    /// a CDATA end sequence do not interrupt CDATA parsing
1318                    #[$test]
1319                    $($async)? fn with_content() {
1320                        let buf = $buf;
1321                        let mut position = 0;
1322                        let mut input = &b"<![CDATA[cdata]] ]>content]]>other content]]>"[$skip..];
1323                        //                 ^= 0                         ^= 29
1324
1325                        let (ty, bytes) = $source(&mut input)
1326                            .read_bang_element(buf, &mut position)
1327                            $(.$await)?
1328                            .unwrap();
1329                        assert_eq!(
1330                            (ty, Bytes(bytes)),
1331                            (BangType::CData, Bytes(b"<![CDATA[cdata]] ]>content]]>"))
1332                        );
1333                        assert_eq!(position, 29);
1334                    }
1335                }
1336
1337                /// Checks that reading XML comments works correctly. According to the [specification],
1338                /// comment data can contain any sequence except `--`:
1339                ///
1340                /// ```peg
1341                /// comment = '<--' (!'--' char)* '-->';
1342                /// char = [#x1-#x2C]
1343                ///      / [#x2E-#xD7FF]
1344                ///      / [#xE000-#xFFFD]
1345                ///      / [#x10000-#x10FFFF]
1346                /// ```
1347                ///
1348                /// The presence of this limitation, however, is simply a poorly designed specification
1349                /// (maybe for purpose of building of LL(1) XML parser) and quick-xml does not check for
1350                /// presence of these sequences by default. This tests allow such content.
1351                ///
1352                /// [specification]: https://www.w3.org/TR/xml11/#dt-comment
1353                mod comment {
1354                    use super::*;
1355                    use pretty_assertions::assert_eq;
1356
1357                    #[$test]
1358                    #[ignore = "start comment sequence fully checked outside of `read_bang_element`"]
1359                    $($async)? fn not_properly_start() {
1360                        let buf = $buf;
1361                        let mut position = 0;
1362                        let mut input = &b"<!- -->other content"[$skip..];
1363                        //                  ^= 1
1364
1365                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1366                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1367                            x => panic!(
1368                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1369                                x
1370                            ),
1371                        }
1372                        assert_eq!(position, 1);
1373                    }
1374
1375                    #[$test]
1376                    $($async)? fn not_properly_end() {
1377                        let buf = $buf;
1378                        let mut position = 0;
1379                        let mut input = &b"<!->other content"[$skip..];
1380                        //                 ^= 0             ^= 17
1381
1382                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1383                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1384                            x => panic!(
1385                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1386                                x
1387                            ),
1388                        }
1389                        assert_eq!(position, 17);
1390                    }
1391
1392                    #[$test]
1393                    $($async)? fn not_closed1() {
1394                        let buf = $buf;
1395                        let mut position = 0;
1396                        let mut input = &b"<!--other content"[$skip..];
1397                        //                 ^= 0             ^= 17
1398
1399                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1400                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1401                            x => panic!(
1402                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1403                                x
1404                            ),
1405                        }
1406                        assert_eq!(position, 17);
1407                    }
1408
1409                    #[$test]
1410                    $($async)? fn not_closed2() {
1411                        let buf = $buf;
1412                        let mut position = 0;
1413                        let mut input = &b"<!-->other content"[$skip..];
1414                        //                 ^= 0              ^= 18
1415
1416                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1417                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1418                            x => panic!(
1419                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1420                                x
1421                            ),
1422                        }
1423                        assert_eq!(position, 18);
1424                    }
1425
1426                    #[$test]
1427                    $($async)? fn not_closed3() {
1428                        let buf = $buf;
1429                        let mut position = 0;
1430                        let mut input = &b"<!--->other content"[$skip..];
1431                        //                 ^= 0               ^= 19
1432
1433                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1434                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1435                            x => panic!(
1436                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1437                                x
1438                            ),
1439                        }
1440                        assert_eq!(position, 19);
1441                    }
1442
1443                    #[$test]
1444                    $($async)? fn empty() {
1445                        let buf = $buf;
1446                        let mut position = 0;
1447                        let mut input = &b"<!---->other content"[$skip..];
1448                        //                 ^= 0   ^= 7
1449
1450                        let (ty, bytes) = $source(&mut input)
1451                            .read_bang_element(buf, &mut position)
1452                            $(.$await)?
1453                            .unwrap();
1454                        assert_eq!(
1455                            (ty, Bytes(bytes)),
1456                            (BangType::Comment, Bytes(b"<!---->"))
1457                        );
1458                        assert_eq!(position, 7);
1459                    }
1460
1461                    #[$test]
1462                    $($async)? fn with_content() {
1463                        let buf = $buf;
1464                        let mut position = 0;
1465                        let mut input = &b"<!--->comment<--->other content"[$skip..];
1466                        //                 ^= 0              ^= 18
1467
1468                        let (ty, bytes) = $source(&mut input)
1469                            .read_bang_element(buf, &mut position)
1470                            $(.$await)?
1471                            .unwrap();
1472                        assert_eq!(
1473                            (ty, Bytes(bytes)),
1474                            (BangType::Comment, Bytes(b"<!--->comment<--->"))
1475                        );
1476                        assert_eq!(position, 18);
1477                    }
1478                }
1479
1480                /// Checks that reading DOCTYPE definition works correctly
1481                mod doctype {
1482                    use super::*;
1483
1484                    mod uppercase {
1485                        use super::*;
1486                        use pretty_assertions::assert_eq;
1487
1488                        #[$test]
1489                        $($async)? fn not_properly_start() {
1490                            let buf = $buf;
1491                            let mut position = 0;
1492                            let mut input = &b"<!D other content"[$skip..];
1493                            //                 ^= 0             ^= 17
1494
1495                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1496                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1497                                x => panic!(
1498                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1499                                    x
1500                                ),
1501                            }
1502                            assert_eq!(position, 17);
1503                        }
1504
1505                        #[$test]
1506                        $($async)? fn without_space() {
1507                            let buf = $buf;
1508                            let mut position = 0;
1509                            let mut input = &b"<!DOCTYPEother content"[$skip..];
1510                            //                 ^= 0                  ^= 22
1511
1512                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1513                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1514                                x => panic!(
1515                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1516                                    x
1517                                ),
1518                            }
1519                            assert_eq!(position, 22);
1520                        }
1521
1522                        #[$test]
1523                        $($async)? fn empty() {
1524                            let buf = $buf;
1525                            let mut position = 0;
1526                            let mut input = &b"<!DOCTYPE>other content"[$skip..];
1527                            //                 ^= 0      ^= 10
1528
1529                            let (ty, bytes) = $source(&mut input)
1530                                .read_bang_element(buf, &mut position)
1531                                $(.$await)?
1532                                .unwrap();
1533                            assert_eq!(
1534                                (ty, Bytes(bytes)),
1535                                (BangType::DocType(DtdParser::Finished), Bytes(b"<!DOCTYPE>"))
1536                            );
1537                            assert_eq!(position, 10);
1538                        }
1539
1540                        #[$test]
1541                        $($async)? fn not_closed() {
1542                            let buf = $buf;
1543                            let mut position = 0;
1544                            let mut input = &b"<!DOCTYPE other content"[$skip..];
1545                            //                 ^= 0                   ^23
1546
1547                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1548                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1549                                x => panic!(
1550                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1551                                    x
1552                                ),
1553                            }
1554                            assert_eq!(position, 23);
1555                        }
1556                    }
1557
1558                    mod lowercase {
1559                        use super::*;
1560                        use pretty_assertions::assert_eq;
1561
1562                        #[$test]
1563                        $($async)? fn not_properly_start() {
1564                            let buf = $buf;
1565                            let mut position = 0;
1566                            let mut input = &b"<!d other content"[$skip..];
1567                            //                 ^= 0             ^= 17
1568
1569                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1570                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1571                                x => panic!(
1572                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1573                                    x
1574                                ),
1575                            }
1576                            assert_eq!(position, 17);
1577                        }
1578
1579                        #[$test]
1580                        $($async)? fn without_space() {
1581                            let buf = $buf;
1582                            let mut position = 0;
1583                            let mut input = &b"<!doctypeother content"[$skip..];
1584                            //                 ^= 0                  ^= 22
1585
1586                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1587                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1588                                x => panic!(
1589                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1590                                    x
1591                                ),
1592                            }
1593                            assert_eq!(position, 22);
1594                        }
1595
1596                        #[$test]
1597                        $($async)? fn empty() {
1598                            let buf = $buf;
1599                            let mut position = 0;
1600                            let mut input = &b"<!doctype>other content"[$skip..];
1601                            //                 ^= 0      ^= 10
1602
1603                            let (ty, bytes) = $source(&mut input)
1604                                .read_bang_element(buf, &mut position)
1605                                $(.$await)?
1606                                .unwrap();
1607                            assert_eq!(
1608                                (ty, Bytes(bytes)),
1609                                (BangType::DocType(DtdParser::Finished), Bytes(b"<!doctype>"))
1610                            );
1611                            assert_eq!(position, 10);
1612                        }
1613
1614                        #[$test]
1615                        $($async)? fn not_closed() {
1616                            let buf = $buf;
1617                            let mut position = 0;
1618                            let mut input = &b"<!doctype other content"[$skip..];
1619                            //                 ^= 0                   ^= 23
1620
1621                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1622                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1623                                x => panic!(
1624                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1625                                    x
1626                                ),
1627                            }
1628                            assert_eq!(position, 23);
1629                        }
1630                    }
1631                }
1632            }
1633
1634            mod read_text {
1635                use super::*;
1636                use crate::reader::ReadTextResult;
1637                use crate::utils::Bytes;
1638                use pretty_assertions::assert_eq;
1639
1640                #[$test]
1641                $($async)? fn empty() {
1642                    let buf = $buf;
1643                    let mut position = 1;
1644                    let mut input = b"".as_ref();
1645                    //                ^= 1
1646
1647                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1648                        ReadTextResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"")),
1649                        x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x),
1650                    }
1651                    assert_eq!(position, 1);
1652                }
1653
1654                #[$test]
1655                $($async)? fn markup() {
1656                    let buf = $buf;
1657                    let mut position = 1;
1658                    let mut input = b"<".as_ref();
1659                    //                 ^= 1
1660
1661                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1662                        ReadTextResult::Markup(b) => assert_eq!(b, $buf),
1663                        x => panic!("Expected `Markup(_)`, but got `{:?}`", x),
1664                    }
1665                    assert_eq!(position, 1);
1666                }
1667
1668                #[$test]
1669                $($async)? fn ref_() {
1670                    let buf = $buf;
1671                    let mut position = 1;
1672                    let mut input = b"&".as_ref();
1673                    //                ^= 1
1674
1675                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1676                        ReadTextResult::Ref(b) => assert_eq!(b, $buf),
1677                        x => panic!("Expected `Ref(_)`, but got `{:?}`", x),
1678                    }
1679                    assert_eq!(position, 1);
1680                }
1681
1682                #[$test]
1683                $($async)? fn up_to_markup() {
1684                    let buf = $buf;
1685                    let mut position = 1;
1686                    let mut input = b"a<".as_ref();
1687                    //                  ^= 2
1688
1689                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1690                        ReadTextResult::UpToMarkup(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")),
1691                        x => panic!("Expected `UpToMarkup(_)`, but got `{:?}`", x),
1692                    }
1693                    assert_eq!(position, 2);
1694                }
1695
1696                #[$test]
1697                $($async)? fn up_to_ref() {
1698                    let buf = $buf;
1699                    let mut position = 1;
1700                    let mut input = b"a&".as_ref();
1701                    //                 ^= 2
1702
1703                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1704                        ReadTextResult::UpToRef(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")),
1705                        x => panic!("Expected `UpToRef(_)`, but got `{:?}`", x),
1706                    }
1707                    assert_eq!(position, 2);
1708                }
1709
1710                #[$test]
1711                $($async)? fn up_to_eof() {
1712                    let buf = $buf;
1713                    let mut position = 1;
1714                    let mut input = b"a".as_ref();
1715                    //                 ^= 2
1716
1717                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1718                        ReadTextResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")),
1719                        x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x),
1720                    }
1721                    assert_eq!(position, 2);
1722                }
1723            }
1724
1725            mod read_ref {
1726                use super::*;
1727                use crate::reader::ReadRefResult;
1728                use crate::utils::Bytes;
1729                use pretty_assertions::assert_eq;
1730
1731                // Empty input is not allowed for `read_ref` so not tested.
1732                // Borrowed source triggers debug assertion,
1733                // buffered do nothing due to implementation details.
1734
1735                #[$test]
1736                $($async)? fn up_to_eof() {
1737                    let buf = $buf;
1738                    let mut position = 1;
1739                    let mut input = b"&".as_ref();
1740                    //                 ^= 2
1741
1742                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1743                        ReadRefResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
1744                        x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x),
1745                    }
1746                    assert_eq!(position, 2);
1747                }
1748
1749                #[$test]
1750                $($async)? fn up_to_ref() {
1751                    let buf = $buf;
1752                    let mut position = 1;
1753                    let mut input = b"&&".as_ref();
1754                    //                 ^= 2
1755
1756                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1757                        ReadRefResult::UpToRef(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
1758                        x => panic!("Expected `UpToRef(_)`, but got `{:?}`", x),
1759                    }
1760                    assert_eq!(position, 2);
1761                }
1762
1763                #[$test]
1764                $($async)? fn up_to_markup() {
1765                    let buf = $buf;
1766                    let mut position = 1;
1767                    let mut input = b"&<".as_ref();
1768                    //                 ^= 2
1769
1770                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1771                        ReadRefResult::UpToMarkup(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
1772                        x => panic!("Expected `UpToMarkup(_)`, but got `{:?}`", x),
1773                    }
1774                    assert_eq!(position, 2);
1775                }
1776
1777                #[$test]
1778                $($async)? fn empty_ref() {
1779                    let buf = $buf;
1780                    let mut position = 1;
1781                    let mut input = b"&;".as_ref();
1782                    //                  ^= 3
1783
1784                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1785                        ReadRefResult::Ref(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&;")),
1786                        x => panic!("Expected `Ref(_)`, but got `{:?}`", x),
1787                    }
1788                    assert_eq!(position, 3);
1789                }
1790
1791                #[$test]
1792                $($async)? fn normal() {
1793                    let buf = $buf;
1794                    let mut position = 1;
1795                    let mut input = b"&lt;".as_ref();
1796                    //                    ^= 5
1797
1798                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1799                        ReadRefResult::Ref(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&lt;")),
1800                        x => panic!("Expected `Ref(_)`, but got `{:?}`", x),
1801                    }
1802                    assert_eq!(position, 5);
1803                }
1804            }
1805
1806            mod read_element {
1807                use super::*;
1808                use crate::errors::{Error, SyntaxError};
1809                use crate::parser::ElementParser;
1810                use crate::utils::Bytes;
1811                use pretty_assertions::assert_eq;
1812
1813                /// Checks that nothing was read from empty buffer
1814                /// `<` read in peek_one that is called before read_with, that is why it in the input buffer
1815                /// peek_one, however, does not increment position for simplicity of the code
1816                #[$test]
1817                $($async)? fn empty() {
1818                    let buf = $buf;
1819                    let mut position = 0;
1820                    let mut input = &b"<"[$skip..];
1821                    //                  ^= 1
1822
1823                    match $source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? {
1824                        Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedTag),
1825                        x => panic!(
1826                            "Expected `Err(Syntax(_))`, but got `{:?}`",
1827                            x
1828                        ),
1829                    }
1830                    assert_eq!(position, 1);
1831                }
1832
1833                mod open {
1834                    use super::*;
1835                    use pretty_assertions::assert_eq;
1836
1837                    #[$test]
1838                    $($async)? fn empty_tag() {
1839                        let buf = $buf;
1840                        let mut position = 0;
1841                        let mut input = &b"<>"[$skip..];
1842                        //                   ^= 2
1843
1844                        assert_eq!(
1845                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1846                            Bytes(b"<>")
1847                        );
1848                        assert_eq!(position, 2);
1849                    }
1850
1851                    #[$test]
1852                    $($async)? fn normal() {
1853                        let buf = $buf;
1854                        let mut position = 0;
1855                        let mut input = &b"<tag>"[$skip..];
1856                        //                      ^= 5
1857
1858                        assert_eq!(
1859                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1860                            Bytes(b"<tag>")
1861                        );
1862                        assert_eq!(position, 5);
1863                    }
1864
1865                    #[$test]
1866                    $($async)? fn empty_ns_empty_tag() {
1867                        let buf = $buf;
1868                        let mut position = 0;
1869                        let mut input = &b"<:>"[$skip..];
1870                        //                    ^= 3
1871
1872                        assert_eq!(
1873                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1874                            Bytes(b"<:>")
1875                        );
1876                        assert_eq!(position, 3);
1877                    }
1878
1879                    #[$test]
1880                    $($async)? fn empty_ns() {
1881                        let buf = $buf;
1882                        let mut position = 0;
1883                        let mut input = &b"<:tag>"[$skip..];
1884                        //                       ^= 6
1885
1886                        assert_eq!(
1887                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1888                            Bytes(b"<:tag>")
1889                        );
1890                        assert_eq!(position, 6);
1891                    }
1892
1893                    #[$test]
1894                    $($async)? fn with_attributes() {
1895                        let buf = $buf;
1896                        let mut position = 0;
1897                        let mut input = &br#"<tag  attr-1=">"  attr2  =  '>'  3attr>"#[$skip..];
1898                        //                                                          ^= 39
1899
1900                        assert_eq!(
1901                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1902                            Bytes(br#"<tag  attr-1=">"  attr2  =  '>'  3attr>"#)
1903                        );
1904                        assert_eq!(position, 39);
1905                    }
1906                }
1907
1908                mod self_closed {
1909                    use super::*;
1910                    use pretty_assertions::assert_eq;
1911
1912                    #[$test]
1913                    $($async)? fn empty_tag() {
1914                        let buf = $buf;
1915                        let mut position = 0;
1916                        let mut input = &b"</>"[$skip..];
1917                        //                    ^= 3
1918
1919                        assert_eq!(
1920                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1921                            Bytes(b"</>")
1922                        );
1923                        assert_eq!(position, 3);
1924                    }
1925
1926                    #[$test]
1927                    $($async)? fn normal() {
1928                        let buf = $buf;
1929                        let mut position = 0;
1930                        let mut input = &b"<tag/>"[$skip..];
1931                        //                       ^= 6
1932
1933                        assert_eq!(
1934                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1935                            Bytes(b"<tag/>")
1936                        );
1937                        assert_eq!(position, 6);
1938                    }
1939
1940                    #[$test]
1941                    $($async)? fn empty_ns_empty_tag() {
1942                        let buf = $buf;
1943                        let mut position = 0;
1944                        let mut input = &b"<:/>"[$skip..];
1945                        //                     ^= 4
1946
1947                        assert_eq!(
1948                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1949                            Bytes(b"<:/>")
1950                        );
1951                        assert_eq!(position, 4);
1952                    }
1953
1954                    #[$test]
1955                    $($async)? fn empty_ns() {
1956                        let buf = $buf;
1957                        let mut position = 0;
1958                        let mut input = &b"<:tag/>"[$skip..];
1959                        //                        ^= 7
1960
1961                        assert_eq!(
1962                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1963                            Bytes(b"<:tag/>")
1964                        );
1965                        assert_eq!(position, 7);
1966                    }
1967
1968                    #[$test]
1969                    $($async)? fn with_attributes() {
1970                        let buf = $buf;
1971                        let mut position = 0;
1972                        let mut input = &br#"<tag  attr-1="/>"  attr2  =  '/>'  3attr/>"#[$skip..];
1973                        //                                                             ^= 42
1974
1975                        assert_eq!(
1976                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1977                            Bytes(br#"<tag  attr-1="/>"  attr2  =  '/>'  3attr/>"#)
1978                        );
1979                        assert_eq!(position, 42);
1980                    }
1981                }
1982
1983                mod close {
1984                    use super::*;
1985                    use pretty_assertions::assert_eq;
1986
1987                    #[$test]
1988                    $($async)? fn empty_tag() {
1989                        let buf = $buf;
1990                        let mut position = 0;
1991                        let mut input = &b"</ >"[$skip..];
1992                        //                     ^= 4
1993
1994                        assert_eq!(
1995                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1996                            Bytes(b"</ >")
1997                        );
1998                        assert_eq!(position, 4);
1999                    }
2000
2001                    #[$test]
2002                    $($async)? fn normal() {
2003                        let buf = $buf;
2004                        let mut position = 0;
2005                        let mut input = &b"</tag>"[$skip..];
2006                        //                       ^= 6
2007
2008                        assert_eq!(
2009                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2010                            Bytes(b"</tag>")
2011                        );
2012                        assert_eq!(position, 6);
2013                    }
2014
2015                    #[$test]
2016                    $($async)? fn empty_ns_empty_tag() {
2017                        let buf = $buf;
2018                        let mut position = 0;
2019                        let mut input = &b"</:>"[$skip..];
2020                        //                     ^= 4
2021
2022                        assert_eq!(
2023                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2024                            Bytes(b"</:>")
2025                        );
2026                        assert_eq!(position, 4);
2027                    }
2028
2029                    #[$test]
2030                    $($async)? fn empty_ns() {
2031                        let buf = $buf;
2032                        let mut position = 0;
2033                        let mut input = &b"</:tag>"[$skip..];
2034                        //                        ^= 7
2035
2036                        assert_eq!(
2037                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2038                            Bytes(b"</:tag>")
2039                        );
2040                        assert_eq!(position, 7);
2041                    }
2042
2043                    #[$test]
2044                    $($async)? fn with_attributes() {
2045                        let buf = $buf;
2046                        let mut position = 0;
2047                        let mut input = &br#"</tag  attr-1=">"  attr2  =  '>'  3attr>"#[$skip..];
2048                        //                                                           ^= 40
2049
2050                        assert_eq!(
2051                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2052                            Bytes(br#"</tag  attr-1=">"  attr2  =  '>'  3attr>"#)
2053                        );
2054                        assert_eq!(position, 40);
2055                    }
2056                }
2057            }
2058
2059            /// Ensures, that no empty `Text` events are generated
2060            mod $read_event {
2061                use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesPI, BytesStart, BytesText, Event};
2062                use crate::reader::Reader;
2063                use pretty_assertions::assert_eq;
2064
2065                /// When `encoding` feature is enabled, encoding should be detected
2066                /// from BOM (UTF-8) and BOM should be stripped.
2067                ///
2068                /// When `encoding` feature is disabled, UTF-8 is assumed and BOM
2069                /// character should be stripped for consistency
2070                #[$test]
2071                $($async)? fn bom_from_reader() {
2072                    let mut reader = Reader::from_reader("\u{feff}\u{feff}".as_bytes());
2073
2074                    assert_eq!(
2075                        reader.$read_event($buf) $(.$await)? .unwrap(),
2076                        Event::Text(BytesText::from_escaped("\u{feff}"))
2077                    );
2078
2079                    assert_eq!(
2080                        reader.$read_event($buf) $(.$await)? .unwrap(),
2081                        Event::Eof
2082                    );
2083                }
2084
2085                /// When parsing from &str, encoding is fixed (UTF-8), so
2086                /// - when `encoding` feature is disabled, the behavior the
2087                ///   same as in `bom_from_reader` text
2088                /// - when `encoding` feature is enabled, the behavior should
2089                ///   stay consistent, so the first BOM character is stripped
2090                #[$test]
2091                $($async)? fn bom_from_str() {
2092                    let mut reader = Reader::from_str("\u{feff}\u{feff}");
2093
2094                    assert_eq!(
2095                        reader.$read_event($buf) $(.$await)? .unwrap(),
2096                        Event::Text(BytesText::from_escaped("\u{feff}"))
2097                    );
2098
2099                    assert_eq!(
2100                        reader.$read_event($buf) $(.$await)? .unwrap(),
2101                        Event::Eof
2102                    );
2103                }
2104
2105                #[$test]
2106                $($async)? fn declaration() {
2107                    let mut reader = Reader::from_str("<?xml ?>");
2108
2109                    assert_eq!(
2110                        reader.$read_event($buf) $(.$await)? .unwrap(),
2111                        Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3)))
2112                    );
2113                }
2114
2115                #[$test]
2116                $($async)? fn doctype() {
2117                    let mut reader = Reader::from_str("<!DOCTYPE x>");
2118
2119                    assert_eq!(
2120                        reader.$read_event($buf) $(.$await)? .unwrap(),
2121                        Event::DocType(BytesText::from_escaped("x"))
2122                    );
2123                }
2124
2125                #[$test]
2126                $($async)? fn processing_instruction() {
2127                    let mut reader = Reader::from_str("<?xml-stylesheet '? >\" ?>");
2128
2129                    assert_eq!(
2130                        reader.$read_event($buf) $(.$await)? .unwrap(),
2131                        Event::PI(BytesPI::new("xml-stylesheet '? >\" "))
2132                    );
2133                }
2134
2135                /// Lone closing tags are not allowed, so testing it together with start tag
2136                #[$test]
2137                $($async)? fn start_and_end() {
2138                    let mut reader = Reader::from_str("<tag></tag>");
2139
2140                    assert_eq!(
2141                        reader.$read_event($buf) $(.$await)? .unwrap(),
2142                        Event::Start(BytesStart::new("tag"))
2143                    );
2144
2145                    assert_eq!(
2146                        reader.$read_event($buf) $(.$await)? .unwrap(),
2147                        Event::End(BytesEnd::new("tag"))
2148                    );
2149                }
2150
2151                #[$test]
2152                $($async)? fn empty() {
2153                    let mut reader = Reader::from_str("<tag/>");
2154
2155                    assert_eq!(
2156                        reader.$read_event($buf) $(.$await)? .unwrap(),
2157                        Event::Empty(BytesStart::new("tag"))
2158                    );
2159                }
2160
2161                #[$test]
2162                $($async)? fn text() {
2163                    let mut reader = Reader::from_str("text");
2164
2165                    assert_eq!(
2166                        reader.$read_event($buf) $(.$await)? .unwrap(),
2167                        Event::Text(BytesText::from_escaped("text"))
2168                    );
2169                }
2170
2171                #[$test]
2172                $($async)? fn cdata() {
2173                    let mut reader = Reader::from_str("<![CDATA[]]>");
2174
2175                    assert_eq!(
2176                        reader.$read_event($buf) $(.$await)? .unwrap(),
2177                        Event::CData(BytesCData::new(""))
2178                    );
2179                }
2180
2181                #[$test]
2182                $($async)? fn comment() {
2183                    let mut reader = Reader::from_str("<!---->");
2184
2185                    assert_eq!(
2186                        reader.$read_event($buf) $(.$await)? .unwrap(),
2187                        Event::Comment(BytesText::from_escaped(""))
2188                    );
2189                }
2190
2191                #[$test]
2192                $($async)? fn eof() {
2193                    let mut reader = Reader::from_str("");
2194
2195                    assert_eq!(
2196                        reader.$read_event($buf) $(.$await)? .unwrap(),
2197                        Event::Eof
2198                    );
2199                }
2200            }
2201        };
2202    }
2203
2204    // Export macros for the child modules:
2205    // - buffered_reader
2206    // - slice_reader
2207    pub(super) use check;
2208}
quick_xml/reader/mod.rs

quick_xml/reader/
mod.rs