quick_xml/reader/
mod.rs

1//! Contains high-level interface for a pull-based XML parser.
2
3#[cfg(feature = "encoding")]
4use encoding_rs::Encoding;
5use std::io;
6use std::ops::Range;
7
8use crate::encoding::Decoder;
9use crate::errors::{Error, IllFormedError, SyntaxError};
10use crate::events::{BytesRef, Event};
11use crate::parser::{DtdParser, ElementParser, Parser, PiParser};
12use crate::reader::state::ReaderState;
13
14/// A struct that holds a parser configuration.
15///
16/// Current parser configuration can be retrieved by calling [`Reader::config()`]
17/// and changed by changing properties of the object returned by a call to
18/// [`Reader::config_mut()`].
19///
20/// [`Reader::config()`]: crate::reader::Reader::config
21/// [`Reader::config_mut()`]: crate::reader::Reader::config_mut
22#[derive(Debug, Clone, PartialEq, Eq)]
23#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
24#[cfg_attr(feature = "serde-types", derive(serde::Deserialize, serde::Serialize))]
25#[non_exhaustive]
26pub struct Config {
27    /// Whether lone ampersand character (without a paired semicolon) should be
28    /// allowed in textual content. Unless enabled, in case of a dangling ampersand,
29    /// the [`Error::IllFormed(UnclosedReference)`] is returned from read methods.
30    ///
31    /// Default: `false`
32    ///
33    /// # Example
34    ///
35    /// ```
36    /// # use quick_xml::events::{BytesRef, BytesText, Event};
37    /// # use quick_xml::reader::Reader;
38    /// # use pretty_assertions::assert_eq;
39    /// let mut reader = Reader::from_str("text with & &amp; & alone");
40    /// reader.config_mut().allow_dangling_amp = true;
41    ///
42    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::new("text with ")));
43    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::from_escaped("& ")));
44    /// assert_eq!(reader.read_event().unwrap(), Event::GeneralRef(BytesRef::new("amp")));
45    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::new(" ")));
46    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::from_escaped("& alone")));
47    /// assert_eq!(reader.read_event().unwrap(), Event::Eof);
48    /// ```
49    ///
50    /// [`Error::IllFormed(UnclosedReference)`]: crate::errors::IllFormedError::UnclosedReference
51    pub allow_dangling_amp: bool,
52
53    /// Whether unmatched closing tag names should be allowed. Unless enabled,
54    /// in case of a dangling end tag, the [`Error::IllFormed(UnmatchedEndTag)`]
55    /// is returned from read methods.
56    ///
57    /// When set to `true`, it won't check if a closing tag has a corresponding
58    /// opening tag at all. For example, `<a></a></b>` will be permitted.
59    ///
60    /// Note that the emitted [`End`] event will not be modified if this is enabled,
61    /// ie. it will contain the data of the unmatched end tag.
62    ///
63    /// Note, that setting this to `true` will lead to additional allocates that
64    /// needed to store tag name for an [`End`] event.
65    ///
66    /// Default: `false`
67    ///
68    /// [`Error::IllFormed(UnmatchedEndTag)`]: crate::errors::IllFormedError::UnmatchedEndTag
69    /// [`End`]: crate::events::Event::End
70    pub allow_unmatched_ends: bool,
71
72    /// Whether comments should be validated. If enabled, in case of invalid comment
73    /// [`Error::IllFormed(DoubleHyphenInComment)`] is returned from read methods.
74    ///
75    /// When set to `true`, every [`Comment`] event will be checked for not
76    /// containing `--`, which [is not allowed] in XML comments. Most of the time
77    /// we don't want comments at all so we don't really care about comment
78    /// correctness, thus the default value is `false` to improve performance.
79    ///
80    /// Default: `false`
81    ///
82    /// [`Error::IllFormed(DoubleHyphenInComment)`]: crate::errors::IllFormedError::DoubleHyphenInComment
83    /// [`Comment`]: crate::events::Event::Comment
84    /// [is not allowed]: https://www.w3.org/TR/xml11/#sec-comments
85    pub check_comments: bool,
86
87    /// Whether mismatched closing tag names should be detected. If enabled, in
88    /// case of mismatch the [`Error::IllFormed(MismatchedEndTag)`] is returned from
89    /// read methods.
90    ///
91    /// Note, that start and end tags [should match literally][spec], they cannot
92    /// have different prefixes even if both prefixes resolve to the same namespace.
93    /// The XML
94    ///
95    /// ```xml
96    /// <outer xmlns="namespace" xmlns:p="namespace">
97    /// </p:outer>
98    /// ```
99    ///
100    /// is not valid, even though semantically the start tag is the same as the
101    /// end tag. The reason is that namespaces are an extension of the original
102    /// XML specification (without namespaces) and it should be backward-compatible.
103    ///
104    /// When set to `false`, it won't check if a closing tag matches the corresponding
105    /// opening tag. For example, `<mytag></different_tag>` will be permitted.
106    ///
107    /// If the XML is known to be sane (already processed, etc.) this saves extra time.
108    ///
109    /// Note that the emitted [`End`] event will not be modified if this is disabled,
110    /// ie. it will contain the data of the mismatched end tag.
111    ///
112    /// Note, that setting this to `true` will lead to additional allocates that
113    /// needed to store tag name for an [`End`] event. However if [`expand_empty_elements`]
114    /// is also set, only one additional allocation will be performed that support
115    /// both these options.
116    ///
117    /// Default: `true`
118    ///
119    /// [`Error::IllFormed(MismatchedEndTag)`]: crate::errors::IllFormedError::MismatchedEndTag
120    /// [spec]: https://www.w3.org/TR/xml11/#dt-etag
121    /// [`End`]: crate::events::Event::End
122    /// [`expand_empty_elements`]: Self::expand_empty_elements
123    pub check_end_names: bool,
124
125    /// Whether empty elements should be split into an `Open` and a `Close` event.
126    ///
127    /// When set to `true`, all [`Empty`] events produced by a self-closing tag
128    /// like `<tag/>` are expanded into a [`Start`] event followed by an [`End`]
129    /// event. When set to `false` (the default), those tags are represented by
130    /// an [`Empty`] event instead.
131    ///
132    /// Note, that setting this to `true` will lead to additional allocates that
133    /// needed to store tag name for an [`End`] event. However if [`check_end_names`]
134    /// is also set, only one additional allocation will be performed that support
135    /// both these options.
136    ///
137    /// Default: `false`
138    ///
139    /// [`Empty`]: crate::events::Event::Empty
140    /// [`Start`]: crate::events::Event::Start
141    /// [`End`]: crate::events::Event::End
142    /// [`check_end_names`]: Self::check_end_names
143    pub expand_empty_elements: bool,
144
145    /// Whether trailing whitespace after the markup name are trimmed in closing
146    /// tags `</a >`.
147    ///
148    /// If `true` the emitted [`End`] event is stripped of trailing whitespace
149    /// after the markup name.
150    ///
151    /// Note that if set to `false` and [`check_end_names`] is `true` the comparison
152    /// of markup names is going to fail erroneously if a closing tag contains
153    /// trailing whitespace.
154    ///
155    /// Default: `true`
156    ///
157    /// [`End`]: crate::events::Event::End
158    /// [`check_end_names`]: Self::check_end_names
159    pub trim_markup_names_in_closing_tags: bool,
160
161    /// Whether whitespace before character data should be removed.
162    ///
163    /// When set to `true`, leading whitespace is trimmed in [`Text`] events.
164    /// If after that the event is empty it will not be pushed.
165    ///
166    /// Default: `false`
167    ///
168    /// <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
169    ///
170    /// WARNING: With this option every text events will be trimmed which is
171    /// incorrect behavior when text events delimited by comments, processing
172    /// instructions or CDATA sections. To correctly trim data manually apply
173    /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`]
174    /// only to necessary events.
175    /// </div>
176    ///
177    /// [`Text`]: crate::events::Event::Text
178    /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start
179    /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end
180    pub trim_text_start: bool,
181
182    /// Whether whitespace after character data should be removed.
183    ///
184    /// When set to `true`, trailing whitespace is trimmed in [`Text`] events.
185    /// If after that the event is empty it will not be pushed.
186    ///
187    /// Default: `false`
188    ///
189    /// <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
190    ///
191    /// WARNING: With this option every text events will be trimmed which is
192    /// incorrect behavior when text events delimited by comments, processing
193    /// instructions or CDATA sections. To correctly trim data manually apply
194    /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`]
195    /// only to necessary events.
196    /// </div>
197    ///
198    /// [`Text`]: crate::events::Event::Text
199    /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start
200    /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end
201    pub trim_text_end: bool,
202}
203
204impl Config {
205    /// Set both [`trim_text_start`] and [`trim_text_end`] to the same value.
206    ///
207    /// <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
208    ///
209    /// WARNING: With this option every text events will be trimmed which is
210    /// incorrect behavior when text events delimited by comments, processing
211    /// instructions or CDATA sections. To correctly trim data manually apply
212    /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`]
213    /// only to necessary events.
214    /// </div>
215    ///
216    /// [`trim_text_start`]: Self::trim_text_start
217    /// [`trim_text_end`]: Self::trim_text_end
218    /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start
219    /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end
220    #[inline]
221    pub fn trim_text(&mut self, trim: bool) {
222        self.trim_text_start = trim;
223        self.trim_text_end = trim;
224    }
225
226    /// Turn on or off all checks for well-formedness. Currently it is that settings:
227    /// - [`check_comments`](Self::check_comments)
228    /// - [`check_end_names`](Self::check_end_names)
229    #[inline]
230    pub fn enable_all_checks(&mut self, enable: bool) {
231        self.check_comments = enable;
232        self.check_end_names = enable;
233    }
234}
235
236impl Default for Config {
237    fn default() -> Self {
238        Self {
239            allow_dangling_amp: false,
240            allow_unmatched_ends: false,
241            check_comments: false,
242            check_end_names: true,
243            expand_empty_elements: false,
244            trim_markup_names_in_closing_tags: true,
245            trim_text_start: false,
246            trim_text_end: false,
247        }
248    }
249}
250
251////////////////////////////////////////////////////////////////////////////////////////////////////
252
253macro_rules! read_event_impl {
254    (
255        $self:ident, $buf:ident,
256        $reader:expr,
257        $read_until_close:ident
258        $(, $await:ident)?
259    ) => {{
260        let event = loop {
261            break match $self.state.state {
262                ParseState::Init => { // Go to InsideText state
263                    // If encoding set explicitly, we not need to detect it. For example,
264                    // explicit UTF-8 set automatically if Reader was created using `from_str`.
265                    // But we still need to remove BOM for consistency with no encoding
266                    // feature enabled path
267                    #[cfg(feature = "encoding")]
268                    if let Some(encoding) = $reader.detect_encoding() $(.$await)? ? {
269                        if $self.state.encoding.can_be_refined() {
270                            $self.state.encoding = crate::reader::EncodingRef::BomDetected(encoding);
271                        }
272                    }
273
274                    // Removes UTF-8 BOM if it is present
275                    #[cfg(not(feature = "encoding"))]
276                    $reader.remove_utf8_bom() $(.$await)? ?;
277
278                    $self.state.state = ParseState::InsideText;
279                    continue;
280                },
281                ParseState::InsideRef => { // Go to InsideText
282                    let start = $self.state.offset;
283                    match $reader.read_ref($buf, &mut $self.state.offset) $(.$await)? {
284                        // Emit reference, go to InsideText state
285                        ReadRefResult::Ref(bytes) => {
286                            $self.state.state = ParseState::InsideText;
287                            // +1 to skip start `&`
288                            Ok(Event::GeneralRef(BytesRef::wrap(&bytes[1..], $self.decoder())))
289                        }
290                        // Go to Done state
291                        ReadRefResult::UpToEof(bytes) if $self.state.config.allow_dangling_amp => {
292                            $self.state.state = ParseState::Done;
293                            Ok(Event::Text($self.state.emit_text(bytes)))
294                        }
295                        ReadRefResult::UpToEof(_) => {
296                            $self.state.state = ParseState::Done;
297                            $self.state.last_error_offset = start;
298                            Err(Error::IllFormed(IllFormedError::UnclosedReference))
299                        }
300                        // Do not change state, stay in InsideRef
301                        ReadRefResult::UpToRef(bytes) if $self.state.config.allow_dangling_amp => {
302                            Ok(Event::Text($self.state.emit_text(bytes)))
303                        }
304                        ReadRefResult::UpToRef(_) => {
305                            $self.state.last_error_offset = start;
306                            Err(Error::IllFormed(IllFormedError::UnclosedReference))
307                        }
308                        // Go to InsideMarkup state
309                        ReadRefResult::UpToMarkup(bytes) if $self.state.config.allow_dangling_amp => {
310                            $self.state.state = ParseState::InsideMarkup;
311                            Ok(Event::Text($self.state.emit_text(bytes)))
312                        }
313                        ReadRefResult::UpToMarkup(_) => {
314                            $self.state.state = ParseState::InsideMarkup;
315                            $self.state.last_error_offset = start;
316                            Err(Error::IllFormed(IllFormedError::UnclosedReference))
317                        }
318                        ReadRefResult::Err(e) => Err(Error::Io(e.into())),
319                    }
320                }
321                ParseState::InsideText => { // Go to InsideMarkup or Done state
322                    if $self.state.config.trim_text_start {
323                        $reader.skip_whitespace(&mut $self.state.offset) $(.$await)? ?;
324                    }
325
326                    match $reader.read_text($buf, &mut $self.state.offset) $(.$await)? {
327                        ReadTextResult::Markup(buf) => {
328                            $self.state.state = ParseState::InsideMarkup;
329                            // Pass `buf` to the next next iteration of parsing loop
330                            $buf = buf;
331                            continue;
332                        }
333                        ReadTextResult::Ref(buf) => {
334                            $self.state.state = ParseState::InsideRef;
335                            // Pass `buf` to the next next iteration of parsing loop
336                            $buf = buf;
337                            continue;
338                        }
339                        ReadTextResult::UpToMarkup(bytes) => {
340                            $self.state.state = ParseState::InsideMarkup;
341                            // FIXME: Can produce an empty event if:
342                            // - event contains only spaces
343                            // - trim_text_start = false
344                            // - trim_text_end = true
345                            Ok(Event::Text($self.state.emit_text(bytes)))
346                        }
347                        ReadTextResult::UpToRef(bytes) => {
348                            $self.state.state = ParseState::InsideRef;
349                            // Return Text event with `bytes` content or Eof if bytes is empty
350                            Ok(Event::Text($self.state.emit_text(bytes)))
351                        }
352                        ReadTextResult::UpToEof(bytes) => {
353                            $self.state.state = ParseState::Done;
354                            // Trim bytes from end if required
355                            let event = $self.state.emit_text(bytes);
356                            if event.is_empty() {
357                                Ok(Event::Eof)
358                            } else {
359                                Ok(Event::Text(event))
360                            }
361                        }
362                        ReadTextResult::Err(e) => Err(Error::Io(e.into())),
363                    }
364                },
365                // Go to InsideText state in next two arms
366                ParseState::InsideMarkup => $self.$read_until_close($buf) $(.$await)?,
367                ParseState::InsideEmpty => Ok(Event::End($self.state.close_expanded_empty())),
368                ParseState::Done => Ok(Event::Eof),
369            };
370        };
371        match event {
372            // #513: In case of ill-formed errors we already consume the wrong data
373            // and change the state. We can continue parsing if we wish
374            Err(Error::IllFormed(_)) => {}
375            Err(_) | Ok(Event::Eof) => $self.state.state = ParseState::Done,
376            _ => {}
377        }
378        event
379    }};
380}
381
382/// Read bytes up to the `>` and skip it. This method is expected to be called
383/// after seeing the `<` symbol and skipping it. Inspects the next (current)
384/// symbol and returns an appropriate [`Event`]:
385///
386/// |Symbol |Event
387/// |-------|-------------------------------------
388/// |`!`    |[`Comment`], [`CData`] or [`DocType`]
389/// |`/`    |[`End`]
390/// |`?`    |[`PI`]
391/// |_other_|[`Start`] or [`Empty`]
392///
393/// Moves parser to the `InsideText` state.
394///
395/// [`Comment`]: Event::Comment
396/// [`CData`]: Event::CData
397/// [`DocType`]: Event::DocType
398/// [`End`]: Event::End
399/// [`PI`]: Event::PI
400/// [`Start`]: Event::Start
401/// [`Empty`]: Event::Empty
402macro_rules! read_until_close {
403    (
404        $self:ident, $buf:ident,
405        $reader:expr
406        $(, $await:ident)?
407    ) => {{
408        $self.state.state = ParseState::InsideText;
409
410        let start = $self.state.offset;
411        match $reader.peek_one() $(.$await)? {
412            // `<!` - comment, CDATA or DOCTYPE declaration
413            Ok(Some(b'!')) => match $reader
414                .read_bang_element($buf, &mut $self.state.offset)
415                $(.$await)?
416            {
417                Ok((bang_type, bytes)) => $self.state.emit_bang(bang_type, bytes),
418                Err(e) => {
419                    // We want to report error at `<`, but offset was increased,
420                    // so return it back (-1 for `<`)
421                    $self.state.last_error_offset = start - 1;
422                    Err(e)
423                }
424            },
425            // `</` - closing tag
426            // #776: We parse using ElementParser which allows us to have attributes
427            // in close tags. While such tags are not allowed by the specification,
428            // we anyway allow to parse them because:
429            // - we do not check constraints during parsing. This is performed by the
430            //   optional validate step which user should call manually
431            // - if we just look for `>` we will parse `</tag attr=">" >` as end tag
432            //   `</tag attr=">` and text `" >` which probably no one existing parser
433            //   does. This is malformed XML, however it is tolerated by some parsers
434            //   (e.g. the one used by Adobe Flash) and such documents do exist in the wild.
435            Ok(Some(b'/')) => match $reader
436                .read_with(ElementParser::Outside, $buf, &mut $self.state.offset)
437                $(.$await)?
438            {
439                Ok(bytes) => $self.state.emit_end(bytes),
440                Err(e) => {
441                    // We want to report error at `<`, but offset was increased,
442                    // so return it back (-1 for `<`)
443                    $self.state.last_error_offset = start - 1;
444                    Err(e)
445                }
446            },
447            // `<?` - processing instruction
448            Ok(Some(b'?')) => match $reader
449                .read_with(PiParser(false), $buf, &mut $self.state.offset)
450                $(.$await)?
451            {
452                Ok(bytes) => $self.state.emit_question_mark(bytes),
453                Err(e) => {
454                    // We want to report error at `<`, but offset was increased,
455                    // so return it back (-1 for `<`)
456                    $self.state.last_error_offset = start - 1;
457                    Err(e)
458                }
459            },
460            // `<...` - opening or self-closed tag
461            Ok(Some(_)) => match $reader
462                .read_with(ElementParser::Outside, $buf, &mut $self.state.offset)
463                $(.$await)?
464            {
465                Ok(bytes) => Ok($self.state.emit_start(bytes)),
466                Err(e) => {
467                    // We want to report error at `<`, but offset was increased,
468                    // so return it back (-1 for `<`)
469                    $self.state.last_error_offset = start - 1;
470                    Err(e)
471                }
472            },
473            // `<` - syntax error, tag not closed
474            Ok(None) => {
475                // We want to report error at `<`, but offset was increased,
476                // so return it back (-1 for `<`)
477                $self.state.last_error_offset = start - 1;
478                Err(Error::Syntax(SyntaxError::UnclosedTag))
479            }
480            Err(e) => Err(Error::Io(e.into())),
481        }
482    }};
483}
484
485/// Generalization of `read_to_end` method for buffered and borrowed readers
486macro_rules! read_to_end {
487    (
488        // $self: &mut Reader
489        $self:expr, $end:expr, $buf:expr,
490        $read_event:ident,
491        // Code block that performs clearing of internal buffer after read of each event
492        $clear:block
493        $(, $await:ident)?
494    ) => {{
495        // Because we take position after the event before the End event,
496        // it is important that this position indicates beginning of the End event.
497        // If between last event and the End event would be only spaces, then we
498        // take position before the spaces, but spaces would be skipped without
499        // generating event if `trim_text_start` is set to `true`. To prevent that
500        // we temporary disable start text trimming.
501        //
502        // We also cannot take position after getting End event, because if
503        // `trim_markup_names_in_closing_tags` is set to `true` (which is the default),
504        // we do not known the real size of the End event that it is occupies in
505        // the source and cannot correct the position after the End event.
506        // So, we in any case should tweak parser configuration.
507        let config = $self.config_mut();
508        let trim = config.trim_text_start;
509        config.trim_text_start = false;
510
511        let start = $self.buffer_position();
512        let mut depth = 0;
513        loop {
514            $clear
515            let end = $self.buffer_position();
516            match $self.$read_event($buf) $(.$await)? {
517                Err(e) => {
518                    $self.config_mut().trim_text_start = trim;
519                    return Err(e);
520                }
521
522                Ok(Event::Start(e)) if e.name() == $end => depth += 1,
523                Ok(Event::End(e)) if e.name() == $end => {
524                    if depth == 0 {
525                        $self.config_mut().trim_text_start = trim;
526                        break start..end;
527                    }
528                    depth -= 1;
529                }
530                Ok(Event::Eof) => {
531                    $self.config_mut().trim_text_start = trim;
532                    return Err(Error::missed_end($end, $self.decoder()));
533                }
534                _ => (),
535            }
536        }
537    }};
538}
539
540#[cfg(feature = "async-tokio")]
541mod async_tokio;
542mod buffered_reader;
543mod ns_reader;
544mod slice_reader;
545mod state;
546
547pub use ns_reader::NsReader;
548
549/// Range of input in bytes, that corresponds to some piece of XML
550pub type Span = Range<u64>;
551
552////////////////////////////////////////////////////////////////////////////////////////////////////
553
554/// Possible reader states. The state transition diagram (`true` and `false` shows
555/// value of [`Config::expand_empty_elements`] option):
556///
557/// ```mermaid
558/// flowchart LR
559///   subgraph _
560///     direction LR
561///
562///     Init         -- "(no event)"\n                                       --> InsideMarkup
563///     InsideMarkup -- Decl, DocType, PI\nComment, CData\nStart, Empty, End --> InsideText
564///     InsideText   -- "#lt;false#gt;\n(no event)"\nText                    --> InsideMarkup
565///     InsideRef    -- "(no event)"\nGeneralRef                             --> InsideText
566///   end
567///   InsideText     -- "#lt;true#gt;"\nStart --> InsideEmpty
568///   InsideEmpty    -- End                   --> InsideText
569///   _ -. Eof .-> Done
570/// ```
571#[derive(Clone, Debug)]
572enum ParseState {
573    /// Initial state in which reader stay after creation. Transition from that
574    /// state could produce a `Text`, `Decl`, `Comment` or `Start` event. The next
575    /// state is always `InsideMarkup`. The reader will never return to this state. The
576    /// event emitted during transition to `InsideMarkup` is a `StartEvent` if the
577    /// first symbol not `<`, otherwise no event are emitted.
578    Init,
579    /// State after seeing the `&` symbol in textual content. Depending on the next symbol all other
580    /// events could be generated.
581    ///
582    /// After generating one event the reader moves to the `ClosedTag` state.
583    InsideRef,
584    /// State after seeing the `<` symbol. Depending on the next symbol all other
585    /// events could be generated.
586    ///
587    /// After generating one event the reader moves to the `InsideText` state.
588    InsideMarkup,
589    /// State in which reader searches the `<` symbol of a markup. All bytes before
590    /// that symbol will be returned in the [`Event::Text`] event. After that
591    /// the reader moves to the `InsideMarkup` state.
592    InsideText,
593    /// This state is used only if option [`expand_empty_elements`] is set to `true`.
594    /// Reader enters to this state when it is in a `InsideText` state and emits an
595    /// [`Event::Start`] event. The next event emitted will be an [`Event::End`],
596    /// after which reader returned to the `InsideText` state.
597    ///
598    /// [`expand_empty_elements`]: Config::expand_empty_elements
599    InsideEmpty,
600    /// Reader enters this state when `Eof` event generated or an error occurred.
601    /// This is the last state, the reader stay in it forever.
602    Done,
603}
604
605/// A reference to an encoding together with information about how it was retrieved.
606///
607/// The state transition diagram:
608///
609/// ```mermaid
610/// flowchart LR
611///   Implicit    -- from_str       --> Explicit
612///   Implicit    -- BOM            --> BomDetected
613///   Implicit    -- "encoding=..." --> XmlDetected
614///   BomDetected -- "encoding=..." --> XmlDetected
615/// ```
616#[cfg(feature = "encoding")]
617#[derive(Clone, Copy, Debug)]
618enum EncodingRef {
619    /// Encoding was implicitly assumed to have a specified value. It can be refined
620    /// using BOM or by the XML declaration event (`<?xml encoding=... ?>`)
621    Implicit(&'static Encoding),
622    /// Encoding was explicitly set to the desired value. It cannot be changed
623    /// nor by BOM, nor by parsing XML declaration (`<?xml encoding=... ?>`)
624    Explicit(&'static Encoding),
625    /// Encoding was detected from a byte order mark (BOM) or by the first bytes
626    /// of the content. It can be refined by the XML declaration event (`<?xml encoding=... ?>`)
627    BomDetected(&'static Encoding),
628    /// Encoding was detected using XML declaration event (`<?xml encoding=... ?>`).
629    /// It can no longer change
630    XmlDetected(&'static Encoding),
631}
632#[cfg(feature = "encoding")]
633impl EncodingRef {
634    #[inline]
635    const fn encoding(&self) -> &'static Encoding {
636        match self {
637            Self::Implicit(e) => e,
638            Self::Explicit(e) => e,
639            Self::BomDetected(e) => e,
640            Self::XmlDetected(e) => e,
641        }
642    }
643    #[inline]
644    const fn can_be_refined(&self) -> bool {
645        match self {
646            Self::Implicit(_) | Self::BomDetected(_) => true,
647            Self::Explicit(_) | Self::XmlDetected(_) => false,
648        }
649    }
650}
651
652////////////////////////////////////////////////////////////////////////////////////////////////////
653
654/// A direct stream to the underlying [`Reader`]s reader which updates
655/// [`Reader::buffer_position()`] when read from it.
656#[derive(Debug)]
657#[must_use = "streams do nothing unless read or polled"]
658pub struct BinaryStream<'r, R> {
659    inner: &'r mut R,
660    offset: &'r mut u64,
661}
662
663impl<'r, R> BinaryStream<'r, R> {
664    /// Returns current position in bytes in the original source.
665    #[inline]
666    pub const fn offset(&self) -> u64 {
667        *self.offset
668    }
669
670    /// Gets a reference to the underlying reader.
671    #[inline]
672    pub const fn get_ref(&self) -> &R {
673        self.inner
674    }
675
676    /// Gets a mutable reference to the underlying reader.
677    ///
678    /// Avoid read from this reader because this will not update reader's position
679    /// and will lead to incorrect positions of errors. Read from this stream instead.
680    #[inline]
681    pub fn get_mut(&mut self) -> &mut R {
682        self.inner
683    }
684}
685
686impl<'r, R> io::Read for BinaryStream<'r, R>
687where
688    R: io::Read,
689{
690    #[inline]
691    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
692        let amt = self.inner.read(buf)?;
693        *self.offset += amt as u64;
694        Ok(amt)
695    }
696}
697
698impl<'r, R> io::BufRead for BinaryStream<'r, R>
699where
700    R: io::BufRead,
701{
702    #[inline]
703    fn fill_buf(&mut self) -> io::Result<&[u8]> {
704        self.inner.fill_buf()
705    }
706
707    #[inline]
708    fn consume(&mut self, amt: usize) {
709        self.inner.consume(amt);
710        *self.offset += amt as u64;
711    }
712}
713
714////////////////////////////////////////////////////////////////////////////////////////////////////
715
716/// A low level encoding-agnostic XML event reader.
717///
718/// Consumes bytes and streams XML [`Event`]s.
719///
720/// This reader does not manage namespace declarations and not able to resolve
721/// prefixes. If you want these features, use the [`NsReader`].
722///
723/// # Examples
724///
725/// ```
726/// use quick_xml::events::Event;
727/// use quick_xml::reader::Reader;
728///
729/// let xml = r#"<tag1 att1 = "test">
730///                 <tag2><!--Test comment-->Test</tag2>
731///                 <tag2>Test 2</tag2>
732///              </tag1>"#;
733/// let mut reader = Reader::from_str(xml);
734/// reader.config_mut().trim_text(true);
735///
736/// let mut count = 0;
737/// let mut txt = Vec::new();
738/// let mut buf = Vec::new();
739///
740/// // The `Reader` does not implement `Iterator` because it outputs borrowed data (`Cow`s)
741/// loop {
742///     // NOTE: this is the generic case when we don't know about the input BufRead.
743///     // when the input is a &str or a &[u8], we don't actually need to use another
744///     // buffer, we could directly call `reader.read_event()`
745///     match reader.read_event_into(&mut buf) {
746///         Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
747///         // exits the loop when reaching end of file
748///         Ok(Event::Eof) => break,
749///
750///         Ok(Event::Start(e)) => {
751///             match e.name().as_ref() {
752///                 b"tag1" => println!("attributes values: {:?}",
753///                                     e.attributes().map(|a| a.unwrap().value)
754///                                     .collect::<Vec<_>>()),
755///                 b"tag2" => count += 1,
756///                 _ => (),
757///             }
758///         }
759///         Ok(Event::Text(e)) => txt.push(e.decode().unwrap().into_owned()),
760///
761///         // There are several other `Event`s we do not consider here
762///         _ => (),
763///     }
764///     // if we don't keep a borrow elsewhere, we can clear the buffer to keep memory usage low
765///     buf.clear();
766/// }
767/// ```
768///
769/// [`NsReader`]: crate::reader::NsReader
770#[derive(Debug, Clone)]
771pub struct Reader<R> {
772    /// Source of data for parse
773    reader: R,
774    /// Configuration and current parse state
775    state: ReaderState,
776}
777
778/// Builder methods
779impl<R> Reader<R> {
780    /// Creates a `Reader` that reads from a given reader.
781    pub fn from_reader(reader: R) -> Self {
782        Self {
783            reader,
784            state: ReaderState::default(),
785        }
786    }
787
788    /// Returns reference to the parser configuration
789    pub const fn config(&self) -> &Config {
790        &self.state.config
791    }
792
793    /// Returns mutable reference to the parser configuration
794    pub fn config_mut(&mut self) -> &mut Config {
795        &mut self.state.config
796    }
797}
798
799/// Getters
800impl<R> Reader<R> {
801    /// Consumes `Reader` returning the underlying reader
802    ///
803    /// Can be used to compute line and column of a parsing error position
804    ///
805    /// # Examples
806    ///
807    /// ```
808    /// # use pretty_assertions::assert_eq;
809    /// use std::{str, io::Cursor};
810    /// use quick_xml::events::Event;
811    /// use quick_xml::reader::Reader;
812    ///
813    /// let xml = r#"<tag1 att1 = "test">
814    ///                 <tag2><!--Test comment-->Test</tag2>
815    ///                 <tag3>Test 2</tag3>
816    ///              </tag1>"#;
817    /// let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes()));
818    /// let mut buf = Vec::new();
819    ///
820    /// fn into_line_and_column(reader: Reader<Cursor<&[u8]>>) -> (usize, usize) {
821    ///     // We known that size cannot exceed usize::MAX because we created parser from single &[u8]
822    ///     let end_pos = reader.buffer_position() as usize;
823    ///     let mut cursor = reader.into_inner();
824    ///     let s = String::from_utf8(cursor.into_inner()[0..end_pos].to_owned())
825    ///         .expect("can't make a string");
826    ///     let mut line = 1;
827    ///     let mut column = 0;
828    ///     for c in s.chars() {
829    ///         if c == '\n' {
830    ///             line += 1;
831    ///             column = 0;
832    ///         } else {
833    ///             column += 1;
834    ///         }
835    ///     }
836    ///     (line, column)
837    /// }
838    ///
839    /// loop {
840    ///     match reader.read_event_into(&mut buf) {
841    ///         Ok(Event::Start(ref e)) => match e.name().as_ref() {
842    ///             b"tag1" | b"tag2" => (),
843    ///             tag => {
844    ///                 assert_eq!(b"tag3", tag);
845    ///                 assert_eq!((3, 22), into_line_and_column(reader));
846    ///                 break;
847    ///             }
848    ///         },
849    ///         Ok(Event::Eof) => unreachable!(),
850    ///         _ => (),
851    ///     }
852    ///     buf.clear();
853    /// }
854    /// ```
855    pub fn into_inner(self) -> R {
856        self.reader
857    }
858
859    /// Gets a reference to the underlying reader.
860    pub const fn get_ref(&self) -> &R {
861        &self.reader
862    }
863
864    /// Gets a mutable reference to the underlying reader.
865    ///
866    /// Avoid read from this reader because this will not update reader's position
867    /// and will lead to incorrect positions of errors. If you want to read, use
868    /// [`stream()`] instead.
869    ///
870    /// [`stream()`]: Self::stream
871    pub fn get_mut(&mut self) -> &mut R {
872        &mut self.reader
873    }
874
875    /// Gets the byte position in the input data just after the last emitted event
876    /// (i.e. this is position where data of last event ends).
877    ///
878    /// Note, that for text events which is originally ended with whitespace characters
879    /// (` `, `\t`, `\r`, and `\n`) if [`Config::trim_text_end`] is set this is position
880    /// before trim, not the position of the last byte of the [`Event::Text`] content.
881    pub const fn buffer_position(&self) -> u64 {
882        // when internal state is InsideMarkup, we have actually read until '<',
883        // which we don't want to show
884        if let ParseState::InsideMarkup = self.state.state {
885            self.state.offset - 1
886        } else {
887            self.state.offset
888        }
889    }
890
891    /// Gets the last error byte position in the input data. If there is no errors
892    /// yet, returns `0`.
893    ///
894    /// Unlike `buffer_position` it will point to the place where it is rational
895    /// to report error to the end user. For example, all [`SyntaxError`]s are
896    /// reported when the parser sees EOF inside of some kind of markup. The
897    /// `buffer_position()` will point to the last byte of input which is not
898    /// very useful. `error_position()` will point to the start of corresponding
899    /// markup element (i. e. to the `<` character).
900    ///
901    /// This position is always `<= buffer_position()`.
902    pub const fn error_position(&self) -> u64 {
903        self.state.last_error_offset
904    }
905
906    /// Get the decoder, used to decode bytes, read by this reader, to the strings.
907    ///
908    /// If [`encoding`] feature is enabled, the used encoding may change after
909    /// parsing the XML declaration, otherwise encoding is fixed to UTF-8.
910    ///
911    /// If [`encoding`] feature is enabled and no encoding is specified in declaration,
912    /// defaults to UTF-8.
913    ///
914    /// [`encoding`]: ../index.html#encoding
915    #[inline]
916    pub const fn decoder(&self) -> Decoder {
917        self.state.decoder()
918    }
919
920    /// Get the direct access to the underlying reader, but tracks the amount of
921    /// read data and update [`Reader::buffer_position()`] accordingly.
922    ///
923    /// Note, that this method gives you access to the internal reader and read
924    /// data will not be returned in any subsequent events read by `read_event`
925    /// family of methods.
926    ///
927    /// # Example
928    ///
929    /// This example demonstrates how to read stream raw bytes from an XML document.
930    /// This could be used to implement streaming read of text, or to read raw binary
931    /// bytes embedded in an XML document. (Documents with embedded raw bytes are not
932    /// valid XML, but XML-derived file formats exist where such documents are valid).
933    ///
934    /// ```
935    /// # use pretty_assertions::assert_eq;
936    /// use std::io::{BufRead, Read};
937    /// use quick_xml::events::{BytesEnd, BytesStart, Event};
938    /// use quick_xml::reader::Reader;
939    ///
940    /// let mut reader = Reader::from_str("<tag>binary << data&></tag>");
941    /// //                                 ^    ^               ^     ^
942    /// //                                 0    5              21    27
943    ///
944    /// assert_eq!(
945    ///     (reader.read_event().unwrap(), reader.buffer_position()),
946    ///     // 5 - end of the `<tag>`
947    ///     (Event::Start(BytesStart::new("tag")), 5)
948    /// );
949    ///
950    /// // Reading directly from underlying reader will not update position
951    /// // let mut inner = reader.get_mut();
952    ///
953    /// // Reading from the stream() advances position
954    /// let mut inner = reader.stream();
955    ///
956    /// // Read binary data. We must know its size
957    /// let mut binary = [0u8; 16];
958    /// inner.read_exact(&mut binary).unwrap();
959    /// assert_eq!(&binary, b"binary << data&>");
960    /// // 21 - end of the `binary << data&>`
961    /// assert_eq!(inner.offset(), 21);
962    /// assert_eq!(reader.buffer_position(), 21);
963    ///
964    /// assert_eq!(
965    ///     (reader.read_event().unwrap(), reader.buffer_position()),
966    ///     // 27 - end of the `</tag>`
967    ///     (Event::End(BytesEnd::new("tag")), 27)
968    /// );
969    ///
970    /// assert_eq!(reader.read_event().unwrap(), Event::Eof);
971    /// ```
972    #[inline]
973    pub fn stream(&mut self) -> BinaryStream<'_, R> {
974        BinaryStream {
975            inner: &mut self.reader,
976            offset: &mut self.state.offset,
977        }
978    }
979}
980
981/// Private sync reading methods
982impl<R> Reader<R> {
983    /// Read text into the given buffer, and return an event that borrows from
984    /// either that buffer or from the input itself, based on the type of the
985    /// reader.
986    fn read_event_impl<'i, B>(&mut self, mut buf: B) -> Result<Event<'i>, Error>
987    where
988        R: XmlSource<'i, B>,
989    {
990        read_event_impl!(self, buf, self.reader, read_until_close)
991    }
992
993    /// Private function to read until `>` is found. This function expects that
994    /// it was called just after encounter a `<` symbol.
995    fn read_until_close<'i, B>(&mut self, buf: B) -> Result<Event<'i>, Error>
996    where
997        R: XmlSource<'i, B>,
998    {
999        read_until_close!(self, buf, self.reader)
1000    }
1001}
1002
1003////////////////////////////////////////////////////////////////////////////////////////////////////
1004
1005/// Result of an attempt to read XML textual data from the source.
1006#[derive(Debug)]
1007enum ReadTextResult<'r, B> {
1008    /// Start of markup (`<` character) was found in the first byte. `<` was consumed.
1009    /// Contains buffer that should be returned back to the next iteration cycle
1010    /// to satisfy borrow checker requirements.
1011    Markup(B),
1012    /// Start of reference (`&` character) was found in the first byte.
1013    /// `&` was not consumed.
1014    /// Contains buffer that should be returned back to the next iteration cycle
1015    /// to satisfy borrow checker requirements.
1016    Ref(B),
1017    /// Contains text block up to start of markup (`<` character). `<` was consumed.
1018    UpToMarkup(&'r [u8]),
1019    /// Contains text block up to start of reference (`&` character).
1020    /// `&` was not consumed.
1021    UpToRef(&'r [u8]),
1022    /// Contains text block up to EOF, neither start of markup (`<` character)
1023    /// or start of reference (`&` character) was found.
1024    UpToEof(&'r [u8]),
1025    /// IO error occurred.
1026    Err(io::Error),
1027}
1028
1029/// Result of an attempt to read general reference from the reader.
1030#[derive(Debug)]
1031enum ReadRefResult<'r> {
1032    /// Contains text block up to end of reference (`;` character).
1033    /// Result includes start `&`, but not end `;`.
1034    Ref(&'r [u8]),
1035    /// Contains text block up to EOF. Neither end of reference (`;`), start of
1036    /// another reference (`&`) or start of markup (`<`) characters was found.
1037    /// Result includes start `&`.
1038    UpToEof(&'r [u8]),
1039    /// Contains text block up to next possible reference (`&` character).
1040    /// Result includes start `&`.
1041    UpToRef(&'r [u8]),
1042    /// Contains text block up to start of markup (`<` character).
1043    /// Result includes start `&`.
1044    UpToMarkup(&'r [u8]),
1045    /// IO error occurred.
1046    Err(io::Error),
1047}
1048
1049/// Represents an input for a reader that can return borrowed data.
1050///
1051/// There are two implementors of this trait: generic one that read data from
1052/// `Self`, copies some part of it into a provided buffer of type `B` and then
1053/// returns data that borrow from that buffer.
1054///
1055/// The other implementor is for `&[u8]` and instead of copying data returns
1056/// borrowed data from `Self` instead. This implementation allows zero-copy
1057/// deserialization.
1058///
1059/// # Parameters
1060/// - `'r`: lifetime of a buffer from which events will borrow
1061/// - `B`: a type of a buffer that can be used to store data read from `Self` and
1062///   from which events can borrow
1063trait XmlSource<'r, B> {
1064    /// Removes UTF-8 BOM if it is present
1065    #[cfg(not(feature = "encoding"))]
1066    fn remove_utf8_bom(&mut self) -> io::Result<()>;
1067
1068    /// Determines encoding from the start of input and removes BOM if it is present
1069    #[cfg(feature = "encoding")]
1070    fn detect_encoding(&mut self) -> io::Result<Option<&'static Encoding>>;
1071
1072    /// Read input until start of markup (the `<`) is found, start of general entity
1073    /// reference (the `&`) is found or end of input is reached.
1074    ///
1075    /// # Parameters
1076    /// - `buf`: Buffer that could be filled from an input (`Self`) and
1077    ///   from which [events] could borrow their data
1078    /// - `position`: Will be increased by amount of bytes consumed
1079    ///
1080    /// [events]: crate::events::Event
1081    fn read_text(&mut self, buf: B, position: &mut u64) -> ReadTextResult<'r, B>;
1082
1083    /// Read input until end of general reference (the `;`) is found, start of
1084    /// another general reference (the `&`) is found or end of input is reached.
1085    ///
1086    /// This method must be called when current character is `&`.
1087    ///
1088    /// # Parameters
1089    /// - `buf`: Buffer that could be filled from an input (`Self`) and
1090    ///   from which [events] could borrow their data
1091    /// - `position`: Will be increased by amount of bytes consumed
1092    ///
1093    /// [events]: crate::events::Event
1094    fn read_ref(&mut self, buf: B, position: &mut u64) -> ReadRefResult<'r>;
1095
1096    /// Read input until processing instruction is finished.
1097    ///
1098    /// This method expect that start sequence of a parser already was read.
1099    ///
1100    /// Returns a slice of data read up to the end of the thing being parsed.
1101    /// The end of thing and the returned content is determined by the used parser.
1102    ///
1103    /// If input (`Self`) is exhausted and no bytes was read, or if the specified
1104    /// parser could not find the ending sequence of the thing, returns `SyntaxError`.
1105    ///
1106    /// # Parameters
1107    /// - `buf`: Buffer that could be filled from an input (`Self`) and
1108    ///   from which [events] could borrow their data
1109    /// - `position`: Will be increased by amount of bytes consumed
1110    ///
1111    /// A `P` type parameter is used to preserve state between calls to the underlying
1112    /// reader which provides bytes fed into the parser.
1113    ///
1114    /// [events]: crate::events::Event
1115    fn read_with<P>(&mut self, parser: P, buf: B, position: &mut u64) -> Result<&'r [u8], Error>
1116    where
1117        P: Parser;
1118
1119    /// Read input until comment or CDATA is finished.
1120    ///
1121    /// This method expect that `<` already was read.
1122    ///
1123    /// Returns a slice of data read up to end of comment or CDATA (`>`),
1124    /// which does not include into result.
1125    ///
1126    /// If input (`Self`) is exhausted and nothing was read, returns `None`.
1127    ///
1128    /// # Parameters
1129    /// - `buf`: Buffer that could be filled from an input (`Self`) and
1130    ///   from which [events] could borrow their data
1131    /// - `position`: Will be increased by amount of bytes consumed
1132    ///
1133    /// [events]: crate::events::Event
1134    fn read_bang_element(
1135        &mut self,
1136        buf: B,
1137        position: &mut u64,
1138    ) -> Result<(BangType, &'r [u8]), Error>;
1139
1140    /// Consume and discard all the whitespace until the next non-whitespace
1141    /// character or EOF.
1142    ///
1143    /// # Parameters
1144    /// - `position`: Will be increased by amount of bytes consumed
1145    fn skip_whitespace(&mut self, position: &mut u64) -> io::Result<()>;
1146
1147    /// Return one character without consuming it, so that future `read_*` calls
1148    /// will still include it. On EOF, return `None`.
1149    fn peek_one(&mut self) -> io::Result<Option<u8>>;
1150}
1151
1152/// Possible elements started with `<!`
1153#[derive(Debug, PartialEq)]
1154enum BangType {
1155    /// <![CDATA[...]]>
1156    CData,
1157    /// <!--...-->
1158    Comment,
1159    /// <!DOCTYPE...>. Contains balance of '<' (+1) and '>' (-1)
1160    DocType(DtdParser),
1161}
1162impl BangType {
1163    #[inline(always)]
1164    const fn new(byte: Option<u8>) -> Result<Self, SyntaxError> {
1165        Ok(match byte {
1166            Some(b'[') => Self::CData,
1167            Some(b'-') => Self::Comment,
1168            Some(b'D') | Some(b'd') => Self::DocType(DtdParser::BeforeInternalSubset(0)),
1169            _ => return Err(SyntaxError::InvalidBangMarkup),
1170        })
1171    }
1172
1173    /// If element is finished, returns its content up to `>` symbol and
1174    /// an index of this symbol, otherwise returns `None`
1175    ///
1176    /// # Parameters
1177    /// - `buf`: buffer with data consumed on previous iterations
1178    /// - `chunk`: data read on current iteration and not yet consumed from reader
1179    #[inline(always)]
1180    fn parse<'b>(&mut self, buf: &[u8], chunk: &'b [u8]) -> Option<(&'b [u8], usize)> {
1181        match self {
1182            Self::Comment => {
1183                for i in memchr::memchr_iter(b'>', chunk) {
1184                    // Need to read at least 6 symbols (`!---->`) for properly finished comment
1185                    // <!----> - XML comment
1186                    //  012345 - i
1187                    if buf.len() + i > 4 {
1188                        if chunk[..i].ends_with(b"--") {
1189                            // We cannot strip last `--` from the buffer because we need it in case of
1190                            // check_comments enabled option. XML standard requires that comment
1191                            // will not end with `--->` sequence because this is a special case of
1192                            // `--` in the comment (https://www.w3.org/TR/xml11/#sec-comments)
1193                            return Some((&chunk[..i], i + 1)); // +1 for `>`
1194                        }
1195                        // End sequence `-|->` was splitted at |
1196                        //        buf --/   \-- chunk
1197                        if i == 1 && buf.ends_with(b"-") && chunk[0] == b'-' {
1198                            return Some((&chunk[..i], i + 1)); // +1 for `>`
1199                        }
1200                        // End sequence `--|>` was splitted at |
1201                        //         buf --/   \-- chunk
1202                        if i == 0 && buf.ends_with(b"--") {
1203                            return Some((&[], i + 1)); // +1 for `>`
1204                        }
1205                    }
1206                }
1207            }
1208            Self::CData => {
1209                for i in memchr::memchr_iter(b'>', chunk) {
1210                    if chunk[..i].ends_with(b"]]") {
1211                        return Some((&chunk[..i], i + 1)); // +1 for `>`
1212                    }
1213                    // End sequence `]|]>` was splitted at |
1214                    //        buf --/   \-- chunk
1215                    if i == 1 && buf.ends_with(b"]") && chunk[0] == b']' {
1216                        return Some((&chunk[..i], i + 1)); // +1 for `>`
1217                    }
1218                    // End sequence `]]|>` was splitted at |
1219                    //         buf --/   \-- chunk
1220                    if i == 0 && buf.ends_with(b"]]") {
1221                        return Some((&[], i + 1)); // +1 for `>`
1222                    }
1223                }
1224            }
1225            Self::DocType(ref mut parser) => return parser.feed(buf, chunk),
1226        }
1227        None
1228    }
1229    #[inline]
1230    const fn to_err(&self) -> SyntaxError {
1231        match self {
1232            Self::CData => SyntaxError::UnclosedCData,
1233            Self::Comment => SyntaxError::UnclosedComment,
1234            Self::DocType(_) => SyntaxError::UnclosedDoctype,
1235        }
1236    }
1237}
1238
1239////////////////////////////////////////////////////////////////////////////////////////////////////
1240
1241#[cfg(test)]
1242mod test {
1243    /// Checks the internal implementation of the various reader methods
1244    macro_rules! check {
1245        (
1246            #[$test:meta]
1247            $read_event:ident,
1248            $read_until_close:ident,
1249            // constructor of the XML source on which internal functions will be called
1250            $source:path,
1251            // constructor of the buffer to which read data will stored
1252            $buf:expr
1253            $(, $async:ident, $await:ident)?
1254        ) => {
1255            mod read_bang_element {
1256                use super::*;
1257                use crate::errors::{Error, SyntaxError};
1258                use crate::reader::{BangType, DtdParser};
1259                use crate::utils::Bytes;
1260
1261                /// Checks that reading CDATA content works correctly
1262                mod cdata {
1263                    use super::*;
1264                    use pretty_assertions::assert_eq;
1265
1266                    /// Checks that if input begins like CDATA element, but CDATA start sequence
1267                    /// is not finished, parsing ends with an error
1268                    #[$test]
1269                    #[ignore = "start CDATA sequence fully checked outside of `read_bang_element`"]
1270                    $($async)? fn not_properly_start() {
1271                        let buf = $buf;
1272                        let mut position = 1;
1273                        let mut input = b"![]]>other content".as_ref();
1274                        //                ^= 1
1275
1276                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1277                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedCData),
1278                            x => panic!(
1279                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1280                                x
1281                            ),
1282                        }
1283                        assert_eq!(position, 1);
1284                    }
1285
1286                    /// Checks that if CDATA startup sequence was matched, but an end sequence
1287                    /// is not found, parsing ends with an error
1288                    #[$test]
1289                    $($async)? fn not_closed() {
1290                        let buf = $buf;
1291                        let mut position = 1;
1292                        let mut input = b"![CDATA[other content".as_ref();
1293                        //                ^= 1                 ^= 22
1294
1295                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1296                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedCData),
1297                            x => panic!(
1298                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1299                                x
1300                            ),
1301                        }
1302                        assert_eq!(position, 22);
1303                    }
1304
1305                    /// Checks that CDATA element without content inside parsed successfully
1306                    #[$test]
1307                    $($async)? fn empty() {
1308                        let buf = $buf;
1309                        let mut position = 1;
1310                        let mut input = b"![CDATA[]]>other content".as_ref();
1311                        //                ^= 1       ^= 12
1312
1313                        let (ty, bytes) = $source(&mut input)
1314                            .read_bang_element(buf, &mut position)
1315                            $(.$await)?
1316                            .unwrap();
1317                        assert_eq!(
1318                            (ty, Bytes(bytes)),
1319                            (BangType::CData, Bytes(b"![CDATA[]]"))
1320                        );
1321                        assert_eq!(position, 12);
1322                    }
1323
1324                    /// Checks that CDATA element with content parsed successfully.
1325                    /// Additionally checks that sequences inside CDATA that may look like
1326                    /// a CDATA end sequence do not interrupt CDATA parsing
1327                    #[$test]
1328                    $($async)? fn with_content() {
1329                        let buf = $buf;
1330                        let mut position = 1;
1331                        let mut input = b"![CDATA[cdata]] ]>content]]>other content]]>".as_ref();
1332                        //                ^= 1                        ^= 29
1333
1334                        let (ty, bytes) = $source(&mut input)
1335                            .read_bang_element(buf, &mut position)
1336                            $(.$await)?
1337                            .unwrap();
1338                        assert_eq!(
1339                            (ty, Bytes(bytes)),
1340                            (BangType::CData, Bytes(b"![CDATA[cdata]] ]>content]]"))
1341                        );
1342                        assert_eq!(position, 29);
1343                    }
1344                }
1345
1346                /// Checks that reading XML comments works correctly. According to the [specification],
1347                /// comment data can contain any sequence except `--`:
1348                ///
1349                /// ```peg
1350                /// comment = '<--' (!'--' char)* '-->';
1351                /// char = [#x1-#x2C]
1352                ///      / [#x2E-#xD7FF]
1353                ///      / [#xE000-#xFFFD]
1354                ///      / [#x10000-#x10FFFF]
1355                /// ```
1356                ///
1357                /// The presence of this limitation, however, is simply a poorly designed specification
1358                /// (maybe for purpose of building of LL(1) XML parser) and quick-xml does not check for
1359                /// presence of these sequences by default. This tests allow such content.
1360                ///
1361                /// [specification]: https://www.w3.org/TR/xml11/#dt-comment
1362                mod comment {
1363                    use super::*;
1364                    use pretty_assertions::assert_eq;
1365
1366                    #[$test]
1367                    #[ignore = "start comment sequence fully checked outside of `read_bang_element`"]
1368                    $($async)? fn not_properly_start() {
1369                        let buf = $buf;
1370                        let mut position = 1;
1371                        let mut input = b"!- -->other content".as_ref();
1372                        //                ^= 1
1373
1374                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1375                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1376                            x => panic!(
1377                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1378                                x
1379                            ),
1380                        }
1381                        assert_eq!(position, 1);
1382                    }
1383
1384                    #[$test]
1385                    $($async)? fn not_properly_end() {
1386                        let buf = $buf;
1387                        let mut position = 1;
1388                        let mut input = b"!->other content".as_ref();
1389                        //                ^= 1            ^= 17
1390
1391                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1392                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1393                            x => panic!(
1394                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1395                                x
1396                            ),
1397                        }
1398                        assert_eq!(position, 17);
1399                    }
1400
1401                    #[$test]
1402                    $($async)? fn not_closed1() {
1403                        let buf = $buf;
1404                        let mut position = 1;
1405                        let mut input = b"!--other content".as_ref();
1406                        //                ^= 1            ^= 17
1407
1408                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1409                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1410                            x => panic!(
1411                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1412                                x
1413                            ),
1414                        }
1415                        assert_eq!(position, 17);
1416                    }
1417
1418                    #[$test]
1419                    $($async)? fn not_closed2() {
1420                        let buf = $buf;
1421                        let mut position = 1;
1422                        let mut input = b"!-->other content".as_ref();
1423                        //                ^= 1             ^= 18
1424
1425                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1426                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1427                            x => panic!(
1428                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1429                                x
1430                            ),
1431                        }
1432                        assert_eq!(position, 18);
1433                    }
1434
1435                    #[$test]
1436                    $($async)? fn not_closed3() {
1437                        let buf = $buf;
1438                        let mut position = 1;
1439                        let mut input = b"!--->other content".as_ref();
1440                        //                ^= 1              ^= 19
1441
1442                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1443                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1444                            x => panic!(
1445                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1446                                x
1447                            ),
1448                        }
1449                        assert_eq!(position, 19);
1450                    }
1451
1452                    #[$test]
1453                    $($async)? fn empty() {
1454                        let buf = $buf;
1455                        let mut position = 1;
1456                        let mut input = b"!---->other content".as_ref();
1457                        //                ^= 1  ^= 7
1458
1459                        let (ty, bytes) = $source(&mut input)
1460                            .read_bang_element(buf, &mut position)
1461                            $(.$await)?
1462                            .unwrap();
1463                        assert_eq!(
1464                            (ty, Bytes(bytes)),
1465                            (BangType::Comment, Bytes(b"!----"))
1466                        );
1467                        assert_eq!(position, 7);
1468                    }
1469
1470                    #[$test]
1471                    $($async)? fn with_content() {
1472                        let buf = $buf;
1473                        let mut position = 1;
1474                        let mut input = b"!--->comment<--->other content".as_ref();
1475                        //                ^= 1             ^= 18
1476
1477                        let (ty, bytes) = $source(&mut input)
1478                            .read_bang_element(buf, &mut position)
1479                            $(.$await)?
1480                            .unwrap();
1481                        assert_eq!(
1482                            (ty, Bytes(bytes)),
1483                            (BangType::Comment, Bytes(b"!--->comment<---"))
1484                        );
1485                        assert_eq!(position, 18);
1486                    }
1487                }
1488
1489                /// Checks that reading DOCTYPE definition works correctly
1490                mod doctype {
1491                    use super::*;
1492
1493                    mod uppercase {
1494                        use super::*;
1495                        use pretty_assertions::assert_eq;
1496
1497                        #[$test]
1498                        $($async)? fn not_properly_start() {
1499                            let buf = $buf;
1500                            let mut position = 1;
1501                            let mut input = b"!D other content".as_ref();
1502                            //                ^= 1            ^= 17
1503
1504                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1505                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1506                                x => panic!(
1507                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1508                                    x
1509                                ),
1510                            }
1511                            assert_eq!(position, 17);
1512                        }
1513
1514                        #[$test]
1515                        $($async)? fn without_space() {
1516                            let buf = $buf;
1517                            let mut position = 1;
1518                            let mut input = b"!DOCTYPEother content".as_ref();
1519                            //                ^= 1                 ^= 22
1520
1521                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1522                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1523                                x => panic!(
1524                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1525                                    x
1526                                ),
1527                            }
1528                            assert_eq!(position, 22);
1529                        }
1530
1531                        #[$test]
1532                        $($async)? fn empty() {
1533                            let buf = $buf;
1534                            let mut position = 1;
1535                            let mut input = b"!DOCTYPE>other content".as_ref();
1536                            //                ^= 1     ^= 10
1537
1538                            let (ty, bytes) = $source(&mut input)
1539                                .read_bang_element(buf, &mut position)
1540                                $(.$await)?
1541                                .unwrap();
1542                            assert_eq!(
1543                                (ty, Bytes(bytes)),
1544                                (BangType::DocType(DtdParser::Finished), Bytes(b"!DOCTYPE"))
1545                            );
1546                            assert_eq!(position, 10);
1547                        }
1548
1549                        #[$test]
1550                        $($async)? fn not_closed() {
1551                            let buf = $buf;
1552                            let mut position = 1;
1553                            let mut input = b"!DOCTYPE other content".as_ref();
1554                            //                ^= 1                  ^23
1555
1556                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1557                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1558                                x => panic!(
1559                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1560                                    x
1561                                ),
1562                            }
1563                            assert_eq!(position, 23);
1564                        }
1565                    }
1566
1567                    mod lowercase {
1568                        use super::*;
1569                        use pretty_assertions::assert_eq;
1570
1571                        #[$test]
1572                        $($async)? fn not_properly_start() {
1573                            let buf = $buf;
1574                            let mut position = 1;
1575                            let mut input = b"!d other content".as_ref();
1576                            //                ^= 1            ^= 17
1577
1578                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1579                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1580                                x => panic!(
1581                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1582                                    x
1583                                ),
1584                            }
1585                            assert_eq!(position, 17);
1586                        }
1587
1588                        #[$test]
1589                        $($async)? fn without_space() {
1590                            let buf = $buf;
1591                            let mut position = 1;
1592                            let mut input = b"!doctypeother content".as_ref();
1593                            //                ^= 1                 ^= 22
1594
1595                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1596                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1597                                x => panic!(
1598                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1599                                    x
1600                                ),
1601                            }
1602                            assert_eq!(position, 22);
1603                        }
1604
1605                        #[$test]
1606                        $($async)? fn empty() {
1607                            let buf = $buf;
1608                            let mut position = 1;
1609                            let mut input = b"!doctype>other content".as_ref();
1610                            //                ^= 1     ^= 10
1611
1612                            let (ty, bytes) = $source(&mut input)
1613                                .read_bang_element(buf, &mut position)
1614                                $(.$await)?
1615                                .unwrap();
1616                            assert_eq!(
1617                                (ty, Bytes(bytes)),
1618                                (BangType::DocType(DtdParser::Finished), Bytes(b"!doctype"))
1619                            );
1620                            assert_eq!(position, 10);
1621                        }
1622
1623                        #[$test]
1624                        $($async)? fn not_closed() {
1625                            let buf = $buf;
1626                            let mut position = 1;
1627                            let mut input = b"!doctype other content".as_ref();
1628                            //                ^= 1                  ^= 23
1629
1630                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1631                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1632                                x => panic!(
1633                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1634                                    x
1635                                ),
1636                            }
1637                            assert_eq!(position, 23);
1638                        }
1639                    }
1640                }
1641            }
1642
1643            mod read_text {
1644                use super::*;
1645                use crate::reader::ReadTextResult;
1646                use crate::utils::Bytes;
1647                use pretty_assertions::assert_eq;
1648
1649                #[$test]
1650                $($async)? fn empty() {
1651                    let buf = $buf;
1652                    let mut position = 1;
1653                    let mut input = b"".as_ref();
1654                    //                ^= 1
1655
1656                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1657                        ReadTextResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"")),
1658                        x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x),
1659                    }
1660                    assert_eq!(position, 1);
1661                }
1662
1663                #[$test]
1664                $($async)? fn markup() {
1665                    let buf = $buf;
1666                    let mut position = 1;
1667                    let mut input = b"<".as_ref();
1668                    //                 ^= 2
1669
1670                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1671                        ReadTextResult::Markup(b) => assert_eq!(b, $buf),
1672                        x => panic!("Expected `Markup(_)`, but got `{:?}`", x),
1673                    }
1674                    assert_eq!(position, 2);
1675                }
1676
1677                #[$test]
1678                $($async)? fn ref_() {
1679                    let buf = $buf;
1680                    let mut position = 1;
1681                    let mut input = b"&".as_ref();
1682                    //                ^= 1
1683
1684                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1685                        ReadTextResult::Ref(b) => assert_eq!(b, $buf),
1686                        x => panic!("Expected `Ref(_)`, but got `{:?}`", x),
1687                    }
1688                    assert_eq!(position, 1);
1689                }
1690
1691                #[$test]
1692                $($async)? fn up_to_markup() {
1693                    let buf = $buf;
1694                    let mut position = 1;
1695                    let mut input = b"a<".as_ref();
1696                    //                1 ^= 3
1697
1698                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1699                        ReadTextResult::UpToMarkup(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")),
1700                        x => panic!("Expected `UpToMarkup(_)`, but got `{:?}`", x),
1701                    }
1702                    assert_eq!(position, 3);
1703                }
1704
1705                #[$test]
1706                $($async)? fn up_to_ref() {
1707                    let buf = $buf;
1708                    let mut position = 1;
1709                    let mut input = b"a&".as_ref();
1710                    //                 ^= 2
1711
1712                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1713                        ReadTextResult::UpToRef(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")),
1714                        x => panic!("Expected `UpToRef(_)`, but got `{:?}`", x),
1715                    }
1716                    assert_eq!(position, 2);
1717                }
1718
1719                #[$test]
1720                $($async)? fn up_to_eof() {
1721                    let buf = $buf;
1722                    let mut position = 1;
1723                    let mut input = b"a".as_ref();
1724                    //                 ^= 2
1725
1726                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1727                        ReadTextResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")),
1728                        x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x),
1729                    }
1730                    assert_eq!(position, 2);
1731                }
1732            }
1733
1734            mod read_ref {
1735                use super::*;
1736                use crate::reader::ReadRefResult;
1737                use crate::utils::Bytes;
1738                use pretty_assertions::assert_eq;
1739
1740                // Empty input is not allowed for `read_ref` so not tested.
1741                // Borrowed source triggers debug assertion,
1742                // buffered do nothing due to implementation details.
1743
1744                #[$test]
1745                $($async)? fn up_to_eof() {
1746                    let buf = $buf;
1747                    let mut position = 1;
1748                    let mut input = b"&".as_ref();
1749                    //                 ^= 2
1750
1751                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1752                        ReadRefResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
1753                        x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x),
1754                    }
1755                    assert_eq!(position, 2);
1756                }
1757
1758                #[$test]
1759                $($async)? fn up_to_ref() {
1760                    let buf = $buf;
1761                    let mut position = 1;
1762                    let mut input = b"&&".as_ref();
1763                    //                 ^= 2
1764
1765                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1766                        ReadRefResult::UpToRef(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
1767                        x => panic!("Expected `UpToRef(_)`, but got `{:?}`", x),
1768                    }
1769                    assert_eq!(position, 2);
1770                }
1771
1772                #[$test]
1773                $($async)? fn up_to_markup() {
1774                    let buf = $buf;
1775                    let mut position = 1;
1776                    let mut input = b"&<".as_ref();
1777                    //                  ^= 3
1778
1779                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1780                        ReadRefResult::UpToMarkup(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
1781                        x => panic!("Expected `UpToMarkup(_)`, but got `{:?}`", x),
1782                    }
1783                    assert_eq!(position, 3);
1784                }
1785
1786                #[$test]
1787                $($async)? fn empty_ref() {
1788                    let buf = $buf;
1789                    let mut position = 1;
1790                    let mut input = b"&;".as_ref();
1791                    //                  ^= 3
1792
1793                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1794                        ReadRefResult::Ref(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
1795                        x => panic!("Expected `Ref(_)`, but got `{:?}`", x),
1796                    }
1797                    assert_eq!(position, 3);
1798                }
1799
1800                #[$test]
1801                $($async)? fn normal() {
1802                    let buf = $buf;
1803                    let mut position = 1;
1804                    let mut input = b"&lt;".as_ref();
1805                    //                    ^= 5
1806
1807                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1808                        ReadRefResult::Ref(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&lt")),
1809                        x => panic!("Expected `Ref(_)`, but got `{:?}`", x),
1810                    }
1811                    assert_eq!(position, 5);
1812                }
1813            }
1814
1815            mod read_element {
1816                use super::*;
1817                use crate::errors::{Error, SyntaxError};
1818                use crate::parser::ElementParser;
1819                use crate::utils::Bytes;
1820                use pretty_assertions::assert_eq;
1821
1822                /// Checks that nothing was read from empty buffer
1823                #[$test]
1824                $($async)? fn empty() {
1825                    let buf = $buf;
1826                    let mut position = 1;
1827                    let mut input = b"".as_ref();
1828                    //                ^= 1
1829
1830                    match $source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? {
1831                        Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedTag),
1832                        x => panic!(
1833                            "Expected `Err(Syntax(_))`, but got `{:?}`",
1834                            x
1835                        ),
1836                    }
1837                    assert_eq!(position, 1);
1838                }
1839
1840                mod open {
1841                    use super::*;
1842                    use pretty_assertions::assert_eq;
1843
1844                    #[$test]
1845                    $($async)? fn empty_tag() {
1846                        let buf = $buf;
1847                        let mut position = 1;
1848                        let mut input = b">".as_ref();
1849                        //                 ^= 2
1850
1851                        assert_eq!(
1852                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1853                            Bytes(b"")
1854                        );
1855                        assert_eq!(position, 2);
1856                    }
1857
1858                    #[$test]
1859                    $($async)? fn normal() {
1860                        let buf = $buf;
1861                        let mut position = 1;
1862                        let mut input = b"tag>".as_ref();
1863                        //                    ^= 5
1864
1865                        assert_eq!(
1866                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1867                            Bytes(b"tag")
1868                        );
1869                        assert_eq!(position, 5);
1870                    }
1871
1872                    #[$test]
1873                    $($async)? fn empty_ns_empty_tag() {
1874                        let buf = $buf;
1875                        let mut position = 1;
1876                        let mut input = b":>".as_ref();
1877                        //                  ^= 3
1878
1879                        assert_eq!(
1880                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1881                            Bytes(b":")
1882                        );
1883                        assert_eq!(position, 3);
1884                    }
1885
1886                    #[$test]
1887                    $($async)? fn empty_ns() {
1888                        let buf = $buf;
1889                        let mut position = 1;
1890                        let mut input = b":tag>".as_ref();
1891                        //                     ^= 6
1892
1893                        assert_eq!(
1894                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1895                            Bytes(b":tag")
1896                        );
1897                        assert_eq!(position, 6);
1898                    }
1899
1900                    #[$test]
1901                    $($async)? fn with_attributes() {
1902                        let buf = $buf;
1903                        let mut position = 1;
1904                        let mut input = br#"tag  attr-1=">"  attr2  =  '>'  3attr>"#.as_ref();
1905                        //                                                        ^= 39
1906
1907                        assert_eq!(
1908                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1909                            Bytes(br#"tag  attr-1=">"  attr2  =  '>'  3attr"#)
1910                        );
1911                        assert_eq!(position, 39);
1912                    }
1913                }
1914
1915                mod self_closed {
1916                    use super::*;
1917                    use pretty_assertions::assert_eq;
1918
1919                    #[$test]
1920                    $($async)? fn empty_tag() {
1921                        let buf = $buf;
1922                        let mut position = 1;
1923                        let mut input = b"/>".as_ref();
1924                        //                  ^= 3
1925
1926                        assert_eq!(
1927                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1928                            Bytes(b"/")
1929                        );
1930                        assert_eq!(position, 3);
1931                    }
1932
1933                    #[$test]
1934                    $($async)? fn normal() {
1935                        let buf = $buf;
1936                        let mut position = 1;
1937                        let mut input = b"tag/>".as_ref();
1938                        //                     ^= 6
1939
1940                        assert_eq!(
1941                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1942                            Bytes(b"tag/")
1943                        );
1944                        assert_eq!(position, 6);
1945                    }
1946
1947                    #[$test]
1948                    $($async)? fn empty_ns_empty_tag() {
1949                        let buf = $buf;
1950                        let mut position = 1;
1951                        let mut input = b":/>".as_ref();
1952                        //                   ^= 4
1953
1954                        assert_eq!(
1955                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1956                            Bytes(b":/")
1957                        );
1958                        assert_eq!(position, 4);
1959                    }
1960
1961                    #[$test]
1962                    $($async)? fn empty_ns() {
1963                        let buf = $buf;
1964                        let mut position = 1;
1965                        let mut input = b":tag/>".as_ref();
1966                        //                      ^= 7
1967
1968                        assert_eq!(
1969                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1970                            Bytes(b":tag/")
1971                        );
1972                        assert_eq!(position, 7);
1973                    }
1974
1975                    #[$test]
1976                    $($async)? fn with_attributes() {
1977                        let buf = $buf;
1978                        let mut position = 1;
1979                        let mut input = br#"tag  attr-1="/>"  attr2  =  '/>'  3attr/>"#.as_ref();
1980                        //                                                           ^= 42
1981
1982                        assert_eq!(
1983                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1984                            Bytes(br#"tag  attr-1="/>"  attr2  =  '/>'  3attr/"#)
1985                        );
1986                        assert_eq!(position, 42);
1987                    }
1988                }
1989
1990                mod close {
1991                    use super::*;
1992                    use pretty_assertions::assert_eq;
1993
1994                    #[$test]
1995                    $($async)? fn empty_tag() {
1996                        let buf = $buf;
1997                        let mut position = 1;
1998                        let mut input = b"/ >".as_ref();
1999                        //                   ^= 4
2000
2001                        assert_eq!(
2002                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2003                            Bytes(b"/ ")
2004                        );
2005                        assert_eq!(position, 4);
2006                    }
2007
2008                    #[$test]
2009                    $($async)? fn normal() {
2010                        let buf = $buf;
2011                        let mut position = 1;
2012                        let mut input = b"/tag>".as_ref();
2013                        //                     ^= 6
2014
2015                        assert_eq!(
2016                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2017                            Bytes(b"/tag")
2018                        );
2019                        assert_eq!(position, 6);
2020                    }
2021
2022                    #[$test]
2023                    $($async)? fn empty_ns_empty_tag() {
2024                        let buf = $buf;
2025                        let mut position = 1;
2026                        let mut input = b"/:>".as_ref();
2027                        //                   ^= 4
2028
2029                        assert_eq!(
2030                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2031                            Bytes(b"/:")
2032                        );
2033                        assert_eq!(position, 4);
2034                    }
2035
2036                    #[$test]
2037                    $($async)? fn empty_ns() {
2038                        let buf = $buf;
2039                        let mut position = 1;
2040                        let mut input = b"/:tag>".as_ref();
2041                        //                      ^= 7
2042
2043                        assert_eq!(
2044                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2045                            Bytes(b"/:tag")
2046                        );
2047                        assert_eq!(position, 7);
2048                    }
2049
2050                    #[$test]
2051                    $($async)? fn with_attributes() {
2052                        let buf = $buf;
2053                        let mut position = 1;
2054                        let mut input = br#"/tag  attr-1=">"  attr2  =  '>'  3attr>"#.as_ref();
2055                        //                                                         ^= 40
2056
2057                        assert_eq!(
2058                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2059                            Bytes(br#"/tag  attr-1=">"  attr2  =  '>'  3attr"#)
2060                        );
2061                        assert_eq!(position, 40);
2062                    }
2063                }
2064            }
2065
2066            /// Ensures, that no empty `Text` events are generated
2067            mod $read_event {
2068                use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesPI, BytesStart, BytesText, Event};
2069                use crate::reader::Reader;
2070                use pretty_assertions::assert_eq;
2071
2072                /// When `encoding` feature is enabled, encoding should be detected
2073                /// from BOM (UTF-8) and BOM should be stripped.
2074                ///
2075                /// When `encoding` feature is disabled, UTF-8 is assumed and BOM
2076                /// character should be stripped for consistency
2077                #[$test]
2078                $($async)? fn bom_from_reader() {
2079                    let mut reader = Reader::from_reader("\u{feff}\u{feff}".as_bytes());
2080
2081                    assert_eq!(
2082                        reader.$read_event($buf) $(.$await)? .unwrap(),
2083                        Event::Text(BytesText::from_escaped("\u{feff}"))
2084                    );
2085
2086                    assert_eq!(
2087                        reader.$read_event($buf) $(.$await)? .unwrap(),
2088                        Event::Eof
2089                    );
2090                }
2091
2092                /// When parsing from &str, encoding is fixed (UTF-8), so
2093                /// - when `encoding` feature is disabled, the behavior the
2094                ///   same as in `bom_from_reader` text
2095                /// - when `encoding` feature is enabled, the behavior should
2096                ///   stay consistent, so the first BOM character is stripped
2097                #[$test]
2098                $($async)? fn bom_from_str() {
2099                    let mut reader = Reader::from_str("\u{feff}\u{feff}");
2100
2101                    assert_eq!(
2102                        reader.$read_event($buf) $(.$await)? .unwrap(),
2103                        Event::Text(BytesText::from_escaped("\u{feff}"))
2104                    );
2105
2106                    assert_eq!(
2107                        reader.$read_event($buf) $(.$await)? .unwrap(),
2108                        Event::Eof
2109                    );
2110                }
2111
2112                #[$test]
2113                $($async)? fn declaration() {
2114                    let mut reader = Reader::from_str("<?xml ?>");
2115
2116                    assert_eq!(
2117                        reader.$read_event($buf) $(.$await)? .unwrap(),
2118                        Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3)))
2119                    );
2120                }
2121
2122                #[$test]
2123                $($async)? fn doctype() {
2124                    let mut reader = Reader::from_str("<!DOCTYPE x>");
2125
2126                    assert_eq!(
2127                        reader.$read_event($buf) $(.$await)? .unwrap(),
2128                        Event::DocType(BytesText::from_escaped("x"))
2129                    );
2130                }
2131
2132                #[$test]
2133                $($async)? fn processing_instruction() {
2134                    let mut reader = Reader::from_str("<?xml-stylesheet '? >\" ?>");
2135
2136                    assert_eq!(
2137                        reader.$read_event($buf) $(.$await)? .unwrap(),
2138                        Event::PI(BytesPI::new("xml-stylesheet '? >\" "))
2139                    );
2140                }
2141
2142                /// Lone closing tags are not allowed, so testing it together with start tag
2143                #[$test]
2144                $($async)? fn start_and_end() {
2145                    let mut reader = Reader::from_str("<tag></tag>");
2146
2147                    assert_eq!(
2148                        reader.$read_event($buf) $(.$await)? .unwrap(),
2149                        Event::Start(BytesStart::new("tag"))
2150                    );
2151
2152                    assert_eq!(
2153                        reader.$read_event($buf) $(.$await)? .unwrap(),
2154                        Event::End(BytesEnd::new("tag"))
2155                    );
2156                }
2157
2158                #[$test]
2159                $($async)? fn empty() {
2160                    let mut reader = Reader::from_str("<tag/>");
2161
2162                    assert_eq!(
2163                        reader.$read_event($buf) $(.$await)? .unwrap(),
2164                        Event::Empty(BytesStart::new("tag"))
2165                    );
2166                }
2167
2168                #[$test]
2169                $($async)? fn text() {
2170                    let mut reader = Reader::from_str("text");
2171
2172                    assert_eq!(
2173                        reader.$read_event($buf) $(.$await)? .unwrap(),
2174                        Event::Text(BytesText::from_escaped("text"))
2175                    );
2176                }
2177
2178                #[$test]
2179                $($async)? fn cdata() {
2180                    let mut reader = Reader::from_str("<![CDATA[]]>");
2181
2182                    assert_eq!(
2183                        reader.$read_event($buf) $(.$await)? .unwrap(),
2184                        Event::CData(BytesCData::new(""))
2185                    );
2186                }
2187
2188                #[$test]
2189                $($async)? fn comment() {
2190                    let mut reader = Reader::from_str("<!---->");
2191
2192                    assert_eq!(
2193                        reader.$read_event($buf) $(.$await)? .unwrap(),
2194                        Event::Comment(BytesText::from_escaped(""))
2195                    );
2196                }
2197
2198                #[$test]
2199                $($async)? fn eof() {
2200                    let mut reader = Reader::from_str("");
2201
2202                    assert_eq!(
2203                        reader.$read_event($buf) $(.$await)? .unwrap(),
2204                        Event::Eof
2205                    );
2206                }
2207            }
2208        };
2209    }
2210
2211    // Export macros for the child modules:
2212    // - buffered_reader
2213    // - slice_reader
2214    pub(super) use check;
2215}
quick_xml/reader/mod.rs

quick_xml/reader/
mod.rs