quick_xml/reader/
mod.rs

1//! Contains high-level interface for a pull-based XML parser.
2
3#[cfg(feature = "encoding")]
4use encoding_rs::Encoding;
5use std::io;
6use std::ops::Range;
7
8use crate::encoding::Decoder;
9use crate::errors::{Error, IllFormedError, SyntaxError};
10use crate::events::{BytesRef, Event};
11use crate::parser::{ElementParser, Parser, PiParser};
12use crate::reader::state::ReaderState;
13
14/// A struct that holds a parser configuration.
15///
16/// Current parser configuration can be retrieved by calling [`Reader::config()`]
17/// and changed by changing properties of the object returned by a call to
18/// [`Reader::config_mut()`].
19///
20/// [`Reader::config()`]: crate::reader::Reader::config
21/// [`Reader::config_mut()`]: crate::reader::Reader::config_mut
22#[derive(Debug, Clone, PartialEq, Eq)]
23#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
24#[cfg_attr(feature = "serde-types", derive(serde::Deserialize, serde::Serialize))]
25#[non_exhaustive]
26pub struct Config {
27    /// Whether lone ampersand character (without a paired semicolon) should be
28    /// allowed in textual content. Unless enabled, in case of a dangling ampersand,
29    /// the [`Error::IllFormed(UnclosedReference)`] is returned from read methods.
30    ///
31    /// Default: `false`
32    ///
33    /// # Example
34    ///
35    /// ```
36    /// # use quick_xml::events::{BytesRef, BytesText, Event};
37    /// # use quick_xml::reader::Reader;
38    /// # use pretty_assertions::assert_eq;
39    /// let mut reader = Reader::from_str("text with & &amp; & alone");
40    /// reader.config_mut().allow_dangling_amp = true;
41    ///
42    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::new("text with ")));
43    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::from_escaped("& ")));
44    /// assert_eq!(reader.read_event().unwrap(), Event::GeneralRef(BytesRef::new("amp")));
45    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::new(" ")));
46    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::from_escaped("& alone")));
47    /// assert_eq!(reader.read_event().unwrap(), Event::Eof);
48    /// ```
49    ///
50    /// [`Error::IllFormed(UnclosedReference)`]: crate::errors::IllFormedError::UnclosedReference
51    pub allow_dangling_amp: bool,
52
53    /// Whether unmatched closing tag names should be allowed. Unless enabled,
54    /// in case of a dangling end tag, the [`Error::IllFormed(UnmatchedEndTag)`]
55    /// is returned from read methods.
56    ///
57    /// When set to `true`, it won't check if a closing tag has a corresponding
58    /// opening tag at all. For example, `<a></a></b>` will be permitted.
59    ///
60    /// Note that the emitted [`End`] event will not be modified if this is enabled,
61    /// ie. it will contain the data of the unmatched end tag.
62    ///
63    /// Note, that setting this to `true` will lead to additional allocates that
64    /// needed to store tag name for an [`End`] event.
65    ///
66    /// Default: `false`
67    ///
68    /// [`Error::IllFormed(UnmatchedEndTag)`]: crate::errors::IllFormedError::UnmatchedEndTag
69    /// [`End`]: crate::events::Event::End
70    pub allow_unmatched_ends: bool,
71
72    /// Whether comments should be validated. If enabled, in case of invalid comment
73    /// [`Error::IllFormed(DoubleHyphenInComment)`] is returned from read methods.
74    ///
75    /// When set to `true`, every [`Comment`] event will be checked for not
76    /// containing `--`, which [is not allowed] in XML comments. Most of the time
77    /// we don't want comments at all so we don't really care about comment
78    /// correctness, thus the default value is `false` to improve performance.
79    ///
80    /// Default: `false`
81    ///
82    /// [`Error::IllFormed(DoubleHyphenInComment)`]: crate::errors::IllFormedError::DoubleHyphenInComment
83    /// [`Comment`]: crate::events::Event::Comment
84    /// [is not allowed]: https://www.w3.org/TR/xml11/#sec-comments
85    pub check_comments: bool,
86
87    /// Whether mismatched closing tag names should be detected. If enabled, in
88    /// case of mismatch the [`Error::IllFormed(MismatchedEndTag)`] is returned from
89    /// read methods.
90    ///
91    /// Note, that start and end tags [should match literally][spec], they cannot
92    /// have different prefixes even if both prefixes resolve to the same namespace.
93    /// The XML
94    ///
95    /// ```xml
96    /// <outer xmlns="namespace" xmlns:p="namespace">
97    /// </p:outer>
98    /// ```
99    ///
100    /// is not valid, even though semantically the start tag is the same as the
101    /// end tag. The reason is that namespaces are an extension of the original
102    /// XML specification (without namespaces) and it should be backward-compatible.
103    ///
104    /// When set to `false`, it won't check if a closing tag matches the corresponding
105    /// opening tag. For example, `<mytag></different_tag>` will be permitted.
106    ///
107    /// If the XML is known to be sane (already processed, etc.) this saves extra time.
108    ///
109    /// Note that the emitted [`End`] event will not be modified if this is disabled,
110    /// ie. it will contain the data of the mismatched end tag.
111    ///
112    /// Note, that setting this to `true` will lead to additional allocates that
113    /// needed to store tag name for an [`End`] event. However if [`expand_empty_elements`]
114    /// is also set, only one additional allocation will be performed that support
115    /// both these options.
116    ///
117    /// Default: `true`
118    ///
119    /// [`Error::IllFormed(MismatchedEndTag)`]: crate::errors::IllFormedError::MismatchedEndTag
120    /// [spec]: https://www.w3.org/TR/xml11/#dt-etag
121    /// [`End`]: crate::events::Event::End
122    /// [`expand_empty_elements`]: Self::expand_empty_elements
123    pub check_end_names: bool,
124
125    /// Whether empty elements should be split into an `Open` and a `Close` event.
126    ///
127    /// When set to `true`, all [`Empty`] events produced by a self-closing tag
128    /// like `<tag/>` are expanded into a [`Start`] event followed by an [`End`]
129    /// event. When set to `false` (the default), those tags are represented by
130    /// an [`Empty`] event instead.
131    ///
132    /// Note, that setting this to `true` will lead to additional allocates that
133    /// needed to store tag name for an [`End`] event. However if [`check_end_names`]
134    /// is also set, only one additional allocation will be performed that support
135    /// both these options.
136    ///
137    /// Default: `false`
138    ///
139    /// [`Empty`]: crate::events::Event::Empty
140    /// [`Start`]: crate::events::Event::Start
141    /// [`End`]: crate::events::Event::End
142    /// [`check_end_names`]: Self::check_end_names
143    pub expand_empty_elements: bool,
144
145    /// Whether trailing whitespace after the markup name are trimmed in closing
146    /// tags `</a >`.
147    ///
148    /// If `true` the emitted [`End`] event is stripped of trailing whitespace
149    /// after the markup name.
150    ///
151    /// Note that if set to `false` and [`check_end_names`] is `true` the comparison
152    /// of markup names is going to fail erroneously if a closing tag contains
153    /// trailing whitespace.
154    ///
155    /// Default: `true`
156    ///
157    /// [`End`]: crate::events::Event::End
158    /// [`check_end_names`]: Self::check_end_names
159    pub trim_markup_names_in_closing_tags: bool,
160
161    /// Whether whitespace before character data should be removed.
162    ///
163    /// When set to `true`, leading whitespace is trimmed in [`Text`] events.
164    /// If after that the event is empty it will not be pushed.
165    ///
166    /// Default: `false`
167    ///
168    /// <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
169    ///
170    /// WARNING: With this option every text events will be trimmed which is
171    /// incorrect behavior when text events delimited by comments, processing
172    /// instructions or CDATA sections. To correctly trim data manually apply
173    /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`]
174    /// only to necessary events.
175    /// </div>
176    ///
177    /// [`Text`]: crate::events::Event::Text
178    /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start
179    /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end
180    pub trim_text_start: bool,
181
182    /// Whether whitespace after character data should be removed.
183    ///
184    /// When set to `true`, trailing whitespace is trimmed in [`Text`] events.
185    /// If after that the event is empty it will not be pushed.
186    ///
187    /// Default: `false`
188    ///
189    /// <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
190    ///
191    /// WARNING: With this option every text events will be trimmed which is
192    /// incorrect behavior when text events delimited by comments, processing
193    /// instructions or CDATA sections. To correctly trim data manually apply
194    /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`]
195    /// only to necessary events.
196    /// </div>
197    ///
198    /// [`Text`]: crate::events::Event::Text
199    /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start
200    /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end
201    pub trim_text_end: bool,
202}
203
204impl Config {
205    /// Set both [`trim_text_start`] and [`trim_text_end`] to the same value.
206    ///
207    /// <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
208    ///
209    /// WARNING: With this option every text events will be trimmed which is
210    /// incorrect behavior when text events delimited by comments, processing
211    /// instructions or CDATA sections. To correctly trim data manually apply
212    /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`]
213    /// only to necessary events.
214    /// </div>
215    ///
216    /// [`trim_text_start`]: Self::trim_text_start
217    /// [`trim_text_end`]: Self::trim_text_end
218    /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start
219    /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end
220    #[inline]
221    pub fn trim_text(&mut self, trim: bool) {
222        self.trim_text_start = trim;
223        self.trim_text_end = trim;
224    }
225
226    /// Turn on or off all checks for well-formedness. Currently it is that settings:
227    /// - [`check_comments`](Self::check_comments)
228    /// - [`check_end_names`](Self::check_end_names)
229    #[inline]
230    pub fn enable_all_checks(&mut self, enable: bool) {
231        self.check_comments = enable;
232        self.check_end_names = enable;
233    }
234}
235
236impl Default for Config {
237    fn default() -> Self {
238        Self {
239            allow_dangling_amp: false,
240            allow_unmatched_ends: false,
241            check_comments: false,
242            check_end_names: true,
243            expand_empty_elements: false,
244            trim_markup_names_in_closing_tags: true,
245            trim_text_start: false,
246            trim_text_end: false,
247        }
248    }
249}
250
251////////////////////////////////////////////////////////////////////////////////////////////////////
252
253macro_rules! read_event_impl {
254    (
255        $self:ident, $buf:ident,
256        $reader:expr,
257        $read_until_close:ident
258        $(, $await:ident)?
259    ) => {{
260        let event = loop {
261            break match $self.state.state {
262                ParseState::Init => { // Go to InsideText state
263                    // If encoding set explicitly, we not need to detect it. For example,
264                    // explicit UTF-8 set automatically if Reader was created using `from_str`.
265                    // But we still need to remove BOM for consistency with no encoding
266                    // feature enabled path
267                    #[cfg(feature = "encoding")]
268                    if let Some(encoding) = $reader.detect_encoding() $(.$await)? ? {
269                        if $self.state.encoding.can_be_refined() {
270                            $self.state.encoding = crate::reader::EncodingRef::BomDetected(encoding);
271                        }
272                    }
273
274                    // Removes UTF-8 BOM if it is present
275                    #[cfg(not(feature = "encoding"))]
276                    $reader.remove_utf8_bom() $(.$await)? ?;
277
278                    $self.state.state = ParseState::InsideText;
279                    continue;
280                },
281                ParseState::InsideRef => { // Go to InsideText
282                    let start = $self.state.offset;
283                    match $reader.read_ref($buf, &mut $self.state.offset) $(.$await)? {
284                        // Emit reference, go to InsideText state
285                        ReadRefResult::Ref(bytes) => {
286                            $self.state.state = ParseState::InsideText;
287                            // +1 to skip start `&`
288                            Ok(Event::GeneralRef(BytesRef::wrap(&bytes[1..], $self.decoder())))
289                        }
290                        // Go to Done state
291                        ReadRefResult::UpToEof(bytes) if $self.state.config.allow_dangling_amp => {
292                            $self.state.state = ParseState::Done;
293                            Ok(Event::Text($self.state.emit_text(bytes)))
294                        }
295                        ReadRefResult::UpToEof(_) => {
296                            $self.state.state = ParseState::Done;
297                            $self.state.last_error_offset = start;
298                            Err(Error::IllFormed(IllFormedError::UnclosedReference))
299                        }
300                        // Do not change state, stay in InsideRef
301                        ReadRefResult::UpToRef(bytes) if $self.state.config.allow_dangling_amp => {
302                            Ok(Event::Text($self.state.emit_text(bytes)))
303                        }
304                        ReadRefResult::UpToRef(_) => {
305                            $self.state.last_error_offset = start;
306                            Err(Error::IllFormed(IllFormedError::UnclosedReference))
307                        }
308                        // Go to InsideMarkup state
309                        ReadRefResult::UpToMarkup(bytes) if $self.state.config.allow_dangling_amp => {
310                            $self.state.state = ParseState::InsideMarkup;
311                            Ok(Event::Text($self.state.emit_text(bytes)))
312                        }
313                        ReadRefResult::UpToMarkup(_) => {
314                            $self.state.state = ParseState::InsideMarkup;
315                            $self.state.last_error_offset = start;
316                            Err(Error::IllFormed(IllFormedError::UnclosedReference))
317                        }
318                        ReadRefResult::Err(e) => Err(Error::Io(e.into())),
319                    }
320                }
321                ParseState::InsideText => { // Go to InsideMarkup or Done state
322                    if $self.state.config.trim_text_start {
323                        $reader.skip_whitespace(&mut $self.state.offset) $(.$await)? ?;
324                    }
325
326                    match $reader.read_text($buf, &mut $self.state.offset) $(.$await)? {
327                        ReadTextResult::Markup(buf) => {
328                            $self.state.state = ParseState::InsideMarkup;
329                            // Pass `buf` to the next next iteration of parsing loop
330                            $buf = buf;
331                            continue;
332                        }
333                        ReadTextResult::Ref(buf) => {
334                            $self.state.state = ParseState::InsideRef;
335                            // Pass `buf` to the next next iteration of parsing loop
336                            $buf = buf;
337                            continue;
338                        }
339                        ReadTextResult::UpToMarkup(bytes) => {
340                            $self.state.state = ParseState::InsideMarkup;
341                            // FIXME: Can produce an empty event if:
342                            // - event contains only spaces
343                            // - trim_text_start = false
344                            // - trim_text_end = true
345                            Ok(Event::Text($self.state.emit_text(bytes)))
346                        }
347                        ReadTextResult::UpToRef(bytes) => {
348                            $self.state.state = ParseState::InsideRef;
349                            // Return Text event with `bytes` content or Eof if bytes is empty
350                            Ok(Event::Text($self.state.emit_text(bytes)))
351                        }
352                        ReadTextResult::UpToEof(bytes) => {
353                            $self.state.state = ParseState::Done;
354                            // Trim bytes from end if required
355                            let event = $self.state.emit_text(bytes);
356                            if event.is_empty() {
357                                Ok(Event::Eof)
358                            } else {
359                                Ok(Event::Text(event))
360                            }
361                        }
362                        ReadTextResult::Err(e) => Err(Error::Io(e.into())),
363                    }
364                },
365                // Go to InsideText state in next two arms
366                ParseState::InsideMarkup => $self.$read_until_close($buf) $(.$await)?,
367                ParseState::InsideEmpty => Ok(Event::End($self.state.close_expanded_empty())),
368                ParseState::Done => Ok(Event::Eof),
369            };
370        };
371        match event {
372            // #513: In case of ill-formed errors we already consume the wrong data
373            // and change the state. We can continue parsing if we wish
374            Err(Error::IllFormed(_)) => {}
375            Err(_) | Ok(Event::Eof) => $self.state.state = ParseState::Done,
376            _ => {}
377        }
378        event
379    }};
380}
381
382/// Read bytes up to the `>` and skip it. This method is expected to be called
383/// after seeing the `<` symbol and skipping it. Inspects the next (current)
384/// symbol and returns an appropriate [`Event`]:
385///
386/// |Symbol |Event
387/// |-------|-------------------------------------
388/// |`!`    |[`Comment`], [`CData`] or [`DocType`]
389/// |`/`    |[`End`]
390/// |`?`    |[`PI`]
391/// |_other_|[`Start`] or [`Empty`]
392///
393/// Moves parser to the `InsideText` state.
394///
395/// [`Comment`]: Event::Comment
396/// [`CData`]: Event::CData
397/// [`DocType`]: Event::DocType
398/// [`End`]: Event::End
399/// [`PI`]: Event::PI
400/// [`Start`]: Event::Start
401/// [`Empty`]: Event::Empty
402macro_rules! read_until_close {
403    (
404        $self:ident, $buf:ident,
405        $reader:expr
406        $(, $await:ident)?
407    ) => {{
408        $self.state.state = ParseState::InsideText;
409
410        let start = $self.state.offset;
411        match $reader.peek_one() $(.$await)? {
412            // `<!` - comment, CDATA or DOCTYPE declaration
413            Ok(Some(b'!')) => match $reader
414                .read_bang_element($buf, &mut $self.state.offset)
415                $(.$await)?
416            {
417                Ok((bang_type, bytes)) => $self.state.emit_bang(bang_type, bytes),
418                Err(e) => {
419                    // We want to report error at `<`, but offset was increased,
420                    // so return it back (-1 for `<`)
421                    $self.state.last_error_offset = start - 1;
422                    Err(e)
423                }
424            },
425            // `</` - closing tag
426            // #776: We parse using ElementParser which allows us to have attributes
427            // in close tags. While such tags are not allowed by the specification,
428            // we anyway allow to parse them because:
429            // - we do not check constraints during parsing. This is performed by the
430            //   optional validate step which user should call manually
431            // - if we just look for `>` we will parse `</tag attr=">" >` as end tag
432            //   `</tag attr=">` and text `" >` which probably no one existing parser
433            //   does. This is malformed XML, however it is tolerated by some parsers
434            //   (e.g. the one used by Adobe Flash) and such documents do exist in the wild.
435            Ok(Some(b'/')) => match $reader
436                .read_with(ElementParser::Outside, $buf, &mut $self.state.offset)
437                $(.$await)?
438            {
439                Ok(bytes) => $self.state.emit_end(bytes),
440                Err(e) => {
441                    // We want to report error at `<`, but offset was increased,
442                    // so return it back (-1 for `<`)
443                    $self.state.last_error_offset = start - 1;
444                    Err(e)
445                }
446            },
447            // `<?` - processing instruction
448            Ok(Some(b'?')) => match $reader
449                .read_with(PiParser(false), $buf, &mut $self.state.offset)
450                $(.$await)?
451            {
452                Ok(bytes) => $self.state.emit_question_mark(bytes),
453                Err(e) => {
454                    // We want to report error at `<`, but offset was increased,
455                    // so return it back (-1 for `<`)
456                    $self.state.last_error_offset = start - 1;
457                    Err(e)
458                }
459            },
460            // `<...` - opening or self-closed tag
461            Ok(Some(_)) => match $reader
462                .read_with(ElementParser::Outside, $buf, &mut $self.state.offset)
463                $(.$await)?
464            {
465                Ok(bytes) => Ok($self.state.emit_start(bytes)),
466                Err(e) => {
467                    // We want to report error at `<`, but offset was increased,
468                    // so return it back (-1 for `<`)
469                    $self.state.last_error_offset = start - 1;
470                    Err(e)
471                }
472            },
473            // `<` - syntax error, tag not closed
474            Ok(None) => {
475                // We want to report error at `<`, but offset was increased,
476                // so return it back (-1 for `<`)
477                $self.state.last_error_offset = start - 1;
478                Err(Error::Syntax(SyntaxError::UnclosedTag))
479            }
480            Err(e) => Err(Error::Io(e.into())),
481        }
482    }};
483}
484
485/// Generalization of `read_to_end` method for buffered and borrowed readers
486macro_rules! read_to_end {
487    (
488        // $self: &mut Reader
489        $self:expr, $end:expr, $buf:expr,
490        $read_event:ident,
491        // Code block that performs clearing of internal buffer after read of each event
492        $clear:block
493        $(, $await:ident)?
494    ) => {{
495        // Because we take position after the event before the End event,
496        // it is important that this position indicates beginning of the End event.
497        // If between last event and the End event would be only spaces, then we
498        // take position before the spaces, but spaces would be skipped without
499        // generating event if `trim_text_start` is set to `true`. To prevent that
500        // we temporary disable start text trimming.
501        //
502        // We also cannot take position after getting End event, because if
503        // `trim_markup_names_in_closing_tags` is set to `true` (which is the default),
504        // we do not known the real size of the End event that it is occupies in
505        // the source and cannot correct the position after the End event.
506        // So, we in any case should tweak parser configuration.
507        let config = $self.config_mut();
508        let trim = config.trim_text_start;
509        config.trim_text_start = false;
510
511        let start = $self.buffer_position();
512        let mut depth = 0;
513        loop {
514            $clear
515            let end = $self.buffer_position();
516            match $self.$read_event($buf) $(.$await)? {
517                Err(e) => {
518                    $self.config_mut().trim_text_start = trim;
519                    return Err(e);
520                }
521
522                Ok(Event::Start(e)) if e.name() == $end => depth += 1,
523                Ok(Event::End(e)) if e.name() == $end => {
524                    if depth == 0 {
525                        $self.config_mut().trim_text_start = trim;
526                        break start..end;
527                    }
528                    depth -= 1;
529                }
530                Ok(Event::Eof) => {
531                    $self.config_mut().trim_text_start = trim;
532                    return Err(Error::missed_end($end, $self.decoder()));
533                }
534                _ => (),
535            }
536        }
537    }};
538}
539
540#[cfg(feature = "async-tokio")]
541mod async_tokio;
542mod buffered_reader;
543mod ns_reader;
544mod slice_reader;
545mod state;
546
547pub use ns_reader::NsReader;
548
549/// Range of input in bytes, that corresponds to some piece of XML
550pub type Span = Range<u64>;
551
552////////////////////////////////////////////////////////////////////////////////////////////////////
553
554/// Possible reader states. The state transition diagram (`true` and `false` shows
555/// value of [`Config::expand_empty_elements`] option):
556///
557/// ```mermaid
558/// flowchart LR
559///   subgraph _
560///     direction LR
561///
562///     Init         -- "(no event)"\n                                       --> InsideMarkup
563///     InsideMarkup -- Decl, DocType, PI\nComment, CData\nStart, Empty, End --> InsideText
564///     InsideText   -- "#lt;false#gt;\n(no event)"\nText                    --> InsideMarkup
565///     InsideRef    -- "(no event)"\nGeneralRef                             --> InsideText
566///   end
567///   InsideText     -- "#lt;true#gt;"\nStart --> InsideEmpty
568///   InsideEmpty    -- End                   --> InsideText
569///   _ -. Eof .-> Done
570/// ```
571#[derive(Clone, Debug)]
572enum ParseState {
573    /// Initial state in which reader stay after creation. Transition from that
574    /// state could produce a `Text`, `Decl`, `Comment` or `Start` event. The next
575    /// state is always `InsideMarkup`. The reader will never return to this state. The
576    /// event emitted during transition to `InsideMarkup` is a `StartEvent` if the
577    /// first symbol not `<`, otherwise no event are emitted.
578    Init,
579    /// State after seeing the `&` symbol in textual content. Depending on the next symbol all other
580    /// events could be generated.
581    ///
582    /// After generating one event the reader moves to the `ClosedTag` state.
583    InsideRef,
584    /// State after seeing the `<` symbol. Depending on the next symbol all other
585    /// events could be generated.
586    ///
587    /// After generating one event the reader moves to the `InsideText` state.
588    InsideMarkup,
589    /// State in which reader searches the `<` symbol of a markup. All bytes before
590    /// that symbol will be returned in the [`Event::Text`] event. After that
591    /// the reader moves to the `InsideMarkup` state.
592    InsideText,
593    /// This state is used only if option [`expand_empty_elements`] is set to `true`.
594    /// Reader enters to this state when it is in a `InsideText` state and emits an
595    /// [`Event::Start`] event. The next event emitted will be an [`Event::End`],
596    /// after which reader returned to the `InsideText` state.
597    ///
598    /// [`expand_empty_elements`]: Config::expand_empty_elements
599    InsideEmpty,
600    /// Reader enters this state when `Eof` event generated or an error occurred.
601    /// This is the last state, the reader stay in it forever.
602    Done,
603}
604
605/// A reference to an encoding together with information about how it was retrieved.
606///
607/// The state transition diagram:
608///
609/// ```mermaid
610/// flowchart LR
611///   Implicit    -- from_str       --> Explicit
612///   Implicit    -- BOM            --> BomDetected
613///   Implicit    -- "encoding=..." --> XmlDetected
614///   BomDetected -- "encoding=..." --> XmlDetected
615/// ```
616#[cfg(feature = "encoding")]
617#[derive(Clone, Copy, Debug)]
618enum EncodingRef {
619    /// Encoding was implicitly assumed to have a specified value. It can be refined
620    /// using BOM or by the XML declaration event (`<?xml encoding=... ?>`)
621    Implicit(&'static Encoding),
622    /// Encoding was explicitly set to the desired value. It cannot be changed
623    /// nor by BOM, nor by parsing XML declaration (`<?xml encoding=... ?>`)
624    Explicit(&'static Encoding),
625    /// Encoding was detected from a byte order mark (BOM) or by the first bytes
626    /// of the content. It can be refined by the XML declaration event (`<?xml encoding=... ?>`)
627    BomDetected(&'static Encoding),
628    /// Encoding was detected using XML declaration event (`<?xml encoding=... ?>`).
629    /// It can no longer change
630    XmlDetected(&'static Encoding),
631}
632#[cfg(feature = "encoding")]
633impl EncodingRef {
634    #[inline]
635    const fn encoding(&self) -> &'static Encoding {
636        match self {
637            Self::Implicit(e) => e,
638            Self::Explicit(e) => e,
639            Self::BomDetected(e) => e,
640            Self::XmlDetected(e) => e,
641        }
642    }
643    #[inline]
644    const fn can_be_refined(&self) -> bool {
645        match self {
646            Self::Implicit(_) | Self::BomDetected(_) => true,
647            Self::Explicit(_) | Self::XmlDetected(_) => false,
648        }
649    }
650}
651
652////////////////////////////////////////////////////////////////////////////////////////////////////
653
654/// A direct stream to the underlying [`Reader`]s reader which updates
655/// [`Reader::buffer_position()`] when read from it.
656#[derive(Debug)]
657#[must_use = "streams do nothing unless read or polled"]
658pub struct BinaryStream<'r, R> {
659    inner: &'r mut R,
660    offset: &'r mut u64,
661}
662
663impl<'r, R> BinaryStream<'r, R> {
664    /// Returns current position in bytes in the original source.
665    #[inline]
666    pub const fn offset(&self) -> u64 {
667        *self.offset
668    }
669
670    /// Gets a reference to the underlying reader.
671    #[inline]
672    pub const fn get_ref(&self) -> &R {
673        self.inner
674    }
675
676    /// Gets a mutable reference to the underlying reader.
677    ///
678    /// Avoid read from this reader because this will not update reader's position
679    /// and will lead to incorrect positions of errors. Read from this stream instead.
680    #[inline]
681    pub fn get_mut(&mut self) -> &mut R {
682        self.inner
683    }
684}
685
686impl<'r, R> io::Read for BinaryStream<'r, R>
687where
688    R: io::Read,
689{
690    #[inline]
691    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
692        let amt = self.inner.read(buf)?;
693        *self.offset += amt as u64;
694        Ok(amt)
695    }
696}
697
698impl<'r, R> io::BufRead for BinaryStream<'r, R>
699where
700    R: io::BufRead,
701{
702    #[inline]
703    fn fill_buf(&mut self) -> io::Result<&[u8]> {
704        self.inner.fill_buf()
705    }
706
707    #[inline]
708    fn consume(&mut self, amt: usize) {
709        self.inner.consume(amt);
710        *self.offset += amt as u64;
711    }
712}
713
714////////////////////////////////////////////////////////////////////////////////////////////////////
715
716/// A low level encoding-agnostic XML event reader.
717///
718/// Consumes bytes and streams XML [`Event`]s.
719///
720/// This reader does not manage namespace declarations and not able to resolve
721/// prefixes. If you want these features, use the [`NsReader`].
722///
723/// # Examples
724///
725/// ```
726/// use quick_xml::events::Event;
727/// use quick_xml::reader::Reader;
728///
729/// let xml = r#"<tag1 att1 = "test">
730///                 <tag2><!--Test comment-->Test</tag2>
731///                 <tag2>Test 2</tag2>
732///              </tag1>"#;
733/// let mut reader = Reader::from_str(xml);
734/// reader.config_mut().trim_text(true);
735///
736/// let mut count = 0;
737/// let mut txt = Vec::new();
738/// let mut buf = Vec::new();
739///
740/// // The `Reader` does not implement `Iterator` because it outputs borrowed data (`Cow`s)
741/// loop {
742///     // NOTE: this is the generic case when we don't know about the input BufRead.
743///     // when the input is a &str or a &[u8], we don't actually need to use another
744///     // buffer, we could directly call `reader.read_event()`
745///     match reader.read_event_into(&mut buf) {
746///         Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
747///         // exits the loop when reaching end of file
748///         Ok(Event::Eof) => break,
749///
750///         Ok(Event::Start(e)) => {
751///             match e.name().as_ref() {
752///                 b"tag1" => println!("attributes values: {:?}",
753///                                     e.attributes().map(|a| a.unwrap().value)
754///                                     .collect::<Vec<_>>()),
755///                 b"tag2" => count += 1,
756///                 _ => (),
757///             }
758///         }
759///         Ok(Event::Text(e)) => txt.push(e.decode().unwrap().into_owned()),
760///
761///         // There are several other `Event`s we do not consider here
762///         _ => (),
763///     }
764///     // if we don't keep a borrow elsewhere, we can clear the buffer to keep memory usage low
765///     buf.clear();
766/// }
767/// ```
768///
769/// [`NsReader`]: crate::reader::NsReader
770#[derive(Debug, Clone)]
771pub struct Reader<R> {
772    /// Source of data for parse
773    reader: R,
774    /// Configuration and current parse state
775    state: ReaderState,
776}
777
778/// Builder methods
779impl<R> Reader<R> {
780    /// Creates a `Reader` that reads from a given reader.
781    pub fn from_reader(reader: R) -> Self {
782        Self {
783            reader,
784            state: ReaderState::default(),
785        }
786    }
787
788    /// Returns reference to the parser configuration
789    pub const fn config(&self) -> &Config {
790        &self.state.config
791    }
792
793    /// Returns mutable reference to the parser configuration
794    pub fn config_mut(&mut self) -> &mut Config {
795        &mut self.state.config
796    }
797}
798
799/// Getters
800impl<R> Reader<R> {
801    /// Consumes `Reader` returning the underlying reader
802    ///
803    /// Can be used to compute line and column of a parsing error position
804    ///
805    /// # Examples
806    ///
807    /// ```
808    /// # use pretty_assertions::assert_eq;
809    /// use std::{str, io::Cursor};
810    /// use quick_xml::events::Event;
811    /// use quick_xml::reader::Reader;
812    ///
813    /// let xml = r#"<tag1 att1 = "test">
814    ///                 <tag2><!--Test comment-->Test</tag2>
815    ///                 <tag3>Test 2</tag3>
816    ///              </tag1>"#;
817    /// let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes()));
818    /// let mut buf = Vec::new();
819    ///
820    /// fn into_line_and_column(reader: Reader<Cursor<&[u8]>>) -> (usize, usize) {
821    ///     // We known that size cannot exceed usize::MAX because we created parser from single &[u8]
822    ///     let end_pos = reader.buffer_position() as usize;
823    ///     let mut cursor = reader.into_inner();
824    ///     let s = String::from_utf8(cursor.into_inner()[0..end_pos].to_owned())
825    ///         .expect("can't make a string");
826    ///     let mut line = 1;
827    ///     let mut column = 0;
828    ///     for c in s.chars() {
829    ///         if c == '\n' {
830    ///             line += 1;
831    ///             column = 0;
832    ///         } else {
833    ///             column += 1;
834    ///         }
835    ///     }
836    ///     (line, column)
837    /// }
838    ///
839    /// loop {
840    ///     match reader.read_event_into(&mut buf) {
841    ///         Ok(Event::Start(ref e)) => match e.name().as_ref() {
842    ///             b"tag1" | b"tag2" => (),
843    ///             tag => {
844    ///                 assert_eq!(b"tag3", tag);
845    ///                 assert_eq!((3, 22), into_line_and_column(reader));
846    ///                 break;
847    ///             }
848    ///         },
849    ///         Ok(Event::Eof) => unreachable!(),
850    ///         _ => (),
851    ///     }
852    ///     buf.clear();
853    /// }
854    /// ```
855    pub fn into_inner(self) -> R {
856        self.reader
857    }
858
859    /// Gets a reference to the underlying reader.
860    pub const fn get_ref(&self) -> &R {
861        &self.reader
862    }
863
864    /// Gets a mutable reference to the underlying reader.
865    ///
866    /// Avoid read from this reader because this will not update reader's position
867    /// and will lead to incorrect positions of errors. If you want to read, use
868    /// [`stream()`] instead.
869    ///
870    /// [`stream()`]: Self::stream
871    pub fn get_mut(&mut self) -> &mut R {
872        &mut self.reader
873    }
874
875    /// Gets the byte position in the input data just after the last emitted event
876    /// (i.e. this is position where data of last event ends).
877    ///
878    /// Note, that for text events which is originally ended with whitespace characters
879    /// (` `, `\t`, `\r`, and `\n`) if [`Config::trim_text_end`] is set this is position
880    /// before trim, not the position of the last byte of the [`Event::Text`] content.
881    pub const fn buffer_position(&self) -> u64 {
882        // when internal state is InsideMarkup, we have actually read until '<',
883        // which we don't want to show
884        if let ParseState::InsideMarkup = self.state.state {
885            self.state.offset - 1
886        } else {
887            self.state.offset
888        }
889    }
890
891    /// Gets the last error byte position in the input data. If there is no errors
892    /// yet, returns `0`.
893    ///
894    /// Unlike `buffer_position` it will point to the place where it is rational
895    /// to report error to the end user. For example, all [`SyntaxError`]s are
896    /// reported when the parser sees EOF inside of some kind of markup. The
897    /// `buffer_position()` will point to the last byte of input which is not
898    /// very useful. `error_position()` will point to the start of corresponding
899    /// markup element (i. e. to the `<` character).
900    ///
901    /// This position is always `<= buffer_position()`.
902    pub const fn error_position(&self) -> u64 {
903        self.state.last_error_offset
904    }
905
906    /// Get the decoder, used to decode bytes, read by this reader, to the strings.
907    ///
908    /// If [`encoding`] feature is enabled, the used encoding may change after
909    /// parsing the XML declaration, otherwise encoding is fixed to UTF-8.
910    ///
911    /// If [`encoding`] feature is enabled and no encoding is specified in declaration,
912    /// defaults to UTF-8.
913    ///
914    /// [`encoding`]: ../index.html#encoding
915    #[inline]
916    pub const fn decoder(&self) -> Decoder {
917        self.state.decoder()
918    }
919
920    /// Get the direct access to the underlying reader, but tracks the amount of
921    /// read data and update [`Reader::buffer_position()`] accordingly.
922    ///
923    /// Note, that this method gives you access to the internal reader and read
924    /// data will not be returned in any subsequent events read by `read_event`
925    /// family of methods.
926    ///
927    /// # Example
928    ///
929    /// This example demonstrates how to read stream raw bytes from an XML document.
930    /// This could be used to implement streaming read of text, or to read raw binary
931    /// bytes embedded in an XML document. (Documents with embedded raw bytes are not
932    /// valid XML, but XML-derived file formats exist where such documents are valid).
933    ///
934    /// ```
935    /// # use pretty_assertions::assert_eq;
936    /// use std::io::{BufRead, Read};
937    /// use quick_xml::events::{BytesEnd, BytesStart, Event};
938    /// use quick_xml::reader::Reader;
939    ///
940    /// let mut reader = Reader::from_str("<tag>binary << data&></tag>");
941    /// //                                 ^    ^               ^     ^
942    /// //                                 0    5              21    27
943    ///
944    /// assert_eq!(
945    ///     (reader.read_event().unwrap(), reader.buffer_position()),
946    ///     // 5 - end of the `<tag>`
947    ///     (Event::Start(BytesStart::new("tag")), 5)
948    /// );
949    ///
950    /// // Reading directly from underlying reader will not update position
951    /// // let mut inner = reader.get_mut();
952    ///
953    /// // Reading from the stream() advances position
954    /// let mut inner = reader.stream();
955    ///
956    /// // Read binary data. We must know its size
957    /// let mut binary = [0u8; 16];
958    /// inner.read_exact(&mut binary).unwrap();
959    /// assert_eq!(&binary, b"binary << data&>");
960    /// // 21 - end of the `binary << data&>`
961    /// assert_eq!(inner.offset(), 21);
962    /// assert_eq!(reader.buffer_position(), 21);
963    ///
964    /// assert_eq!(
965    ///     (reader.read_event().unwrap(), reader.buffer_position()),
966    ///     // 27 - end of the `</tag>`
967    ///     (Event::End(BytesEnd::new("tag")), 27)
968    /// );
969    ///
970    /// assert_eq!(reader.read_event().unwrap(), Event::Eof);
971    /// ```
972    #[inline]
973    pub fn stream(&mut self) -> BinaryStream<R> {
974        BinaryStream {
975            inner: &mut self.reader,
976            offset: &mut self.state.offset,
977        }
978    }
979}
980
981/// Private sync reading methods
982impl<R> Reader<R> {
983    /// Read text into the given buffer, and return an event that borrows from
984    /// either that buffer or from the input itself, based on the type of the
985    /// reader.
986    fn read_event_impl<'i, B>(&mut self, mut buf: B) -> Result<Event<'i>, Error>
987    where
988        R: XmlSource<'i, B>,
989    {
990        read_event_impl!(self, buf, self.reader, read_until_close)
991    }
992
993    /// Private function to read until `>` is found. This function expects that
994    /// it was called just after encounter a `<` symbol.
995    fn read_until_close<'i, B>(&mut self, buf: B) -> Result<Event<'i>, Error>
996    where
997        R: XmlSource<'i, B>,
998    {
999        read_until_close!(self, buf, self.reader)
1000    }
1001}
1002
1003////////////////////////////////////////////////////////////////////////////////////////////////////
1004
1005/// Result of an attempt to read XML textual data from the source.
1006#[derive(Debug)]
1007enum ReadTextResult<'r, B> {
1008    /// Start of markup (`<` character) was found in the first byte. `<` was consumed.
1009    /// Contains buffer that should be returned back to the next iteration cycle
1010    /// to satisfy borrow checker requirements.
1011    Markup(B),
1012    /// Start of reference (`&` character) was found in the first byte.
1013    /// `&` was not consumed.
1014    /// Contains buffer that should be returned back to the next iteration cycle
1015    /// to satisfy borrow checker requirements.
1016    Ref(B),
1017    /// Contains text block up to start of markup (`<` character). `<` was consumed.
1018    UpToMarkup(&'r [u8]),
1019    /// Contains text block up to start of reference (`&` character).
1020    /// `&` was not consumed.
1021    UpToRef(&'r [u8]),
1022    /// Contains text block up to EOF, neither start of markup (`<` character)
1023    /// or start of reference (`&` character) was found.
1024    UpToEof(&'r [u8]),
1025    /// IO error occurred.
1026    Err(io::Error),
1027}
1028
1029/// Result of an attempt to read general reference from the reader.
1030#[derive(Debug)]
1031enum ReadRefResult<'r> {
1032    /// Contains text block up to end of reference (`;` character).
1033    /// Result includes start `&`, but not end `;`.
1034    Ref(&'r [u8]),
1035    /// Contains text block up to EOF. Neither end of reference (`;`), start of
1036    /// another reference (`&`) or start of markup (`<`) characters was found.
1037    /// Result includes start `&`.
1038    UpToEof(&'r [u8]),
1039    /// Contains text block up to next possible reference (`&` character).
1040    /// Result includes start `&`.
1041    UpToRef(&'r [u8]),
1042    /// Contains text block up to start of markup (`<` character).
1043    /// Result includes start `&`.
1044    UpToMarkup(&'r [u8]),
1045    /// IO error occurred.
1046    Err(io::Error),
1047}
1048
1049/// Represents an input for a reader that can return borrowed data.
1050///
1051/// There are two implementors of this trait: generic one that read data from
1052/// `Self`, copies some part of it into a provided buffer of type `B` and then
1053/// returns data that borrow from that buffer.
1054///
1055/// The other implementor is for `&[u8]` and instead of copying data returns
1056/// borrowed data from `Self` instead. This implementation allows zero-copy
1057/// deserialization.
1058///
1059/// # Parameters
1060/// - `'r`: lifetime of a buffer from which events will borrow
1061/// - `B`: a type of a buffer that can be used to store data read from `Self` and
1062///   from which events can borrow
1063trait XmlSource<'r, B> {
1064    /// Removes UTF-8 BOM if it is present
1065    #[cfg(not(feature = "encoding"))]
1066    fn remove_utf8_bom(&mut self) -> io::Result<()>;
1067
1068    /// Determines encoding from the start of input and removes BOM if it is present
1069    #[cfg(feature = "encoding")]
1070    fn detect_encoding(&mut self) -> io::Result<Option<&'static Encoding>>;
1071
1072    /// Read input until start of markup (the `<`) is found, start of general entity
1073    /// reference (the `&`) is found or end of input is reached.
1074    ///
1075    /// # Parameters
1076    /// - `buf`: Buffer that could be filled from an input (`Self`) and
1077    ///   from which [events] could borrow their data
1078    /// - `position`: Will be increased by amount of bytes consumed
1079    ///
1080    /// [events]: crate::events::Event
1081    fn read_text(&mut self, buf: B, position: &mut u64) -> ReadTextResult<'r, B>;
1082
1083    /// Read input until end of general reference (the `;`) is found, start of
1084    /// another general reference (the `&`) is found or end of input is reached.
1085    ///
1086    /// This method must be called when current character is `&`.
1087    ///
1088    /// # Parameters
1089    /// - `buf`: Buffer that could be filled from an input (`Self`) and
1090    ///   from which [events] could borrow their data
1091    /// - `position`: Will be increased by amount of bytes consumed
1092    ///
1093    /// [events]: crate::events::Event
1094    fn read_ref(&mut self, buf: B, position: &mut u64) -> ReadRefResult<'r>;
1095
1096    /// Read input until processing instruction is finished.
1097    ///
1098    /// This method expect that start sequence of a parser already was read.
1099    ///
1100    /// Returns a slice of data read up to the end of the thing being parsed.
1101    /// The end of thing and the returned content is determined by the used parser.
1102    ///
1103    /// If input (`Self`) is exhausted and no bytes was read, or if the specified
1104    /// parser could not find the ending sequence of the thing, returns `SyntaxError`.
1105    ///
1106    /// # Parameters
1107    /// - `buf`: Buffer that could be filled from an input (`Self`) and
1108    ///   from which [events] could borrow their data
1109    /// - `position`: Will be increased by amount of bytes consumed
1110    ///
1111    /// A `P` type parameter is used to preserve state between calls to the underlying
1112    /// reader which provides bytes fed into the parser.
1113    ///
1114    /// [events]: crate::events::Event
1115    fn read_with<P>(&mut self, parser: P, buf: B, position: &mut u64) -> Result<&'r [u8], Error>
1116    where
1117        P: Parser;
1118
1119    /// Read input until comment or CDATA is finished.
1120    ///
1121    /// This method expect that `<` already was read.
1122    ///
1123    /// Returns a slice of data read up to end of comment or CDATA (`>`),
1124    /// which does not include into result.
1125    ///
1126    /// If input (`Self`) is exhausted and nothing was read, returns `None`.
1127    ///
1128    /// # Parameters
1129    /// - `buf`: Buffer that could be filled from an input (`Self`) and
1130    ///   from which [events] could borrow their data
1131    /// - `position`: Will be increased by amount of bytes consumed
1132    ///
1133    /// [events]: crate::events::Event
1134    fn read_bang_element(
1135        &mut self,
1136        buf: B,
1137        position: &mut u64,
1138    ) -> Result<(BangType, &'r [u8]), Error>;
1139
1140    /// Consume and discard all the whitespace until the next non-whitespace
1141    /// character or EOF.
1142    ///
1143    /// # Parameters
1144    /// - `position`: Will be increased by amount of bytes consumed
1145    fn skip_whitespace(&mut self, position: &mut u64) -> io::Result<()>;
1146
1147    /// Return one character without consuming it, so that future `read_*` calls
1148    /// will still include it. On EOF, return `None`.
1149    fn peek_one(&mut self) -> io::Result<Option<u8>>;
1150}
1151
1152/// Possible elements started with `<!`
1153#[derive(Debug, PartialEq)]
1154enum BangType {
1155    /// <![CDATA[...]]>
1156    CData,
1157    /// <!--...-->
1158    Comment,
1159    /// <!DOCTYPE...>. Contains balance of '<' (+1) and '>' (-1)
1160    DocType(i32),
1161}
1162impl BangType {
1163    #[inline(always)]
1164    const fn new(byte: Option<u8>) -> Result<Self, SyntaxError> {
1165        Ok(match byte {
1166            Some(b'[') => Self::CData,
1167            Some(b'-') => Self::Comment,
1168            Some(b'D') | Some(b'd') => Self::DocType(0),
1169            _ => return Err(SyntaxError::InvalidBangMarkup),
1170        })
1171    }
1172
1173    /// If element is finished, returns its content up to `>` symbol and
1174    /// an index of this symbol, otherwise returns `None`
1175    ///
1176    /// # Parameters
1177    /// - `buf`: buffer with data consumed on previous iterations
1178    /// - `chunk`: data read on current iteration and not yet consumed from reader
1179    #[inline(always)]
1180    fn parse<'b>(&mut self, buf: &[u8], chunk: &'b [u8]) -> Option<(&'b [u8], usize)> {
1181        match self {
1182            Self::Comment => {
1183                for i in memchr::memchr_iter(b'>', chunk) {
1184                    // Need to read at least 6 symbols (`!---->`) for properly finished comment
1185                    // <!----> - XML comment
1186                    //  012345 - i
1187                    if buf.len() + i > 4 {
1188                        if chunk[..i].ends_with(b"--") {
1189                            // We cannot strip last `--` from the buffer because we need it in case of
1190                            // check_comments enabled option. XML standard requires that comment
1191                            // will not end with `--->` sequence because this is a special case of
1192                            // `--` in the comment (https://www.w3.org/TR/xml11/#sec-comments)
1193                            return Some((&chunk[..i], i + 1)); // +1 for `>`
1194                        }
1195                        // End sequence `-|->` was splitted at |
1196                        //        buf --/   \-- chunk
1197                        if i == 1 && buf.ends_with(b"-") && chunk[0] == b'-' {
1198                            return Some((&chunk[..i], i + 1)); // +1 for `>`
1199                        }
1200                        // End sequence `--|>` was splitted at |
1201                        //         buf --/   \-- chunk
1202                        if i == 0 && buf.ends_with(b"--") {
1203                            return Some((&[], i + 1)); // +1 for `>`
1204                        }
1205                    }
1206                }
1207            }
1208            Self::CData => {
1209                for i in memchr::memchr_iter(b'>', chunk) {
1210                    if chunk[..i].ends_with(b"]]") {
1211                        return Some((&chunk[..i], i + 1)); // +1 for `>`
1212                    }
1213                    // End sequence `]|]>` was splitted at |
1214                    //        buf --/   \-- chunk
1215                    if i == 1 && buf.ends_with(b"]") && chunk[0] == b']' {
1216                        return Some((&chunk[..i], i + 1)); // +1 for `>`
1217                    }
1218                    // End sequence `]]|>` was splitted at |
1219                    //         buf --/   \-- chunk
1220                    if i == 0 && buf.ends_with(b"]]") {
1221                        return Some((&[], i + 1)); // +1 for `>`
1222                    }
1223                }
1224            }
1225            Self::DocType(ref mut balance) => {
1226                for i in memchr::memchr2_iter(b'<', b'>', chunk) {
1227                    if chunk[i] == b'<' {
1228                        *balance += 1;
1229                    } else {
1230                        if *balance == 0 {
1231                            return Some((&chunk[..i], i + 1)); // +1 for `>`
1232                        }
1233                        *balance -= 1;
1234                    }
1235                }
1236            }
1237        }
1238        None
1239    }
1240    #[inline]
1241    const fn to_err(&self) -> SyntaxError {
1242        match self {
1243            Self::CData => SyntaxError::UnclosedCData,
1244            Self::Comment => SyntaxError::UnclosedComment,
1245            Self::DocType(_) => SyntaxError::UnclosedDoctype,
1246        }
1247    }
1248}
1249
1250////////////////////////////////////////////////////////////////////////////////////////////////////
1251
1252#[cfg(test)]
1253mod test {
1254    /// Checks the internal implementation of the various reader methods
1255    macro_rules! check {
1256        (
1257            #[$test:meta]
1258            $read_event:ident,
1259            $read_until_close:ident,
1260            // constructor of the XML source on which internal functions will be called
1261            $source:path,
1262            // constructor of the buffer to which read data will stored
1263            $buf:expr
1264            $(, $async:ident, $await:ident)?
1265        ) => {
1266            mod read_bang_element {
1267                use super::*;
1268                use crate::errors::{Error, SyntaxError};
1269                use crate::reader::BangType;
1270                use crate::utils::Bytes;
1271
1272                /// Checks that reading CDATA content works correctly
1273                mod cdata {
1274                    use super::*;
1275                    use pretty_assertions::assert_eq;
1276
1277                    /// Checks that if input begins like CDATA element, but CDATA start sequence
1278                    /// is not finished, parsing ends with an error
1279                    #[$test]
1280                    #[ignore = "start CDATA sequence fully checked outside of `read_bang_element`"]
1281                    $($async)? fn not_properly_start() {
1282                        let buf = $buf;
1283                        let mut position = 1;
1284                        let mut input = b"![]]>other content".as_ref();
1285                        //                ^= 1
1286
1287                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1288                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedCData),
1289                            x => panic!(
1290                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1291                                x
1292                            ),
1293                        }
1294                        assert_eq!(position, 1);
1295                    }
1296
1297                    /// Checks that if CDATA startup sequence was matched, but an end sequence
1298                    /// is not found, parsing ends with an error
1299                    #[$test]
1300                    $($async)? fn not_closed() {
1301                        let buf = $buf;
1302                        let mut position = 1;
1303                        let mut input = b"![CDATA[other content".as_ref();
1304                        //                ^= 1                 ^= 22
1305
1306                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1307                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedCData),
1308                            x => panic!(
1309                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1310                                x
1311                            ),
1312                        }
1313                        assert_eq!(position, 22);
1314                    }
1315
1316                    /// Checks that CDATA element without content inside parsed successfully
1317                    #[$test]
1318                    $($async)? fn empty() {
1319                        let buf = $buf;
1320                        let mut position = 1;
1321                        let mut input = b"![CDATA[]]>other content".as_ref();
1322                        //                ^= 1       ^= 12
1323
1324                        let (ty, bytes) = $source(&mut input)
1325                            .read_bang_element(buf, &mut position)
1326                            $(.$await)?
1327                            .unwrap();
1328                        assert_eq!(
1329                            (ty, Bytes(bytes)),
1330                            (BangType::CData, Bytes(b"![CDATA[]]"))
1331                        );
1332                        assert_eq!(position, 12);
1333                    }
1334
1335                    /// Checks that CDATA element with content parsed successfully.
1336                    /// Additionally checks that sequences inside CDATA that may look like
1337                    /// a CDATA end sequence do not interrupt CDATA parsing
1338                    #[$test]
1339                    $($async)? fn with_content() {
1340                        let buf = $buf;
1341                        let mut position = 1;
1342                        let mut input = b"![CDATA[cdata]] ]>content]]>other content]]>".as_ref();
1343                        //                ^= 1                        ^= 29
1344
1345                        let (ty, bytes) = $source(&mut input)
1346                            .read_bang_element(buf, &mut position)
1347                            $(.$await)?
1348                            .unwrap();
1349                        assert_eq!(
1350                            (ty, Bytes(bytes)),
1351                            (BangType::CData, Bytes(b"![CDATA[cdata]] ]>content]]"))
1352                        );
1353                        assert_eq!(position, 29);
1354                    }
1355                }
1356
1357                /// Checks that reading XML comments works correctly. According to the [specification],
1358                /// comment data can contain any sequence except `--`:
1359                ///
1360                /// ```peg
1361                /// comment = '<--' (!'--' char)* '-->';
1362                /// char = [#x1-#x2C]
1363                ///      / [#x2E-#xD7FF]
1364                ///      / [#xE000-#xFFFD]
1365                ///      / [#x10000-#x10FFFF]
1366                /// ```
1367                ///
1368                /// The presence of this limitation, however, is simply a poorly designed specification
1369                /// (maybe for purpose of building of LL(1) XML parser) and quick-xml does not check for
1370                /// presence of these sequences by default. This tests allow such content.
1371                ///
1372                /// [specification]: https://www.w3.org/TR/xml11/#dt-comment
1373                mod comment {
1374                    use super::*;
1375                    use pretty_assertions::assert_eq;
1376
1377                    #[$test]
1378                    #[ignore = "start comment sequence fully checked outside of `read_bang_element`"]
1379                    $($async)? fn not_properly_start() {
1380                        let buf = $buf;
1381                        let mut position = 1;
1382                        let mut input = b"!- -->other content".as_ref();
1383                        //                ^= 1
1384
1385                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1386                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1387                            x => panic!(
1388                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1389                                x
1390                            ),
1391                        }
1392                        assert_eq!(position, 1);
1393                    }
1394
1395                    #[$test]
1396                    $($async)? fn not_properly_end() {
1397                        let buf = $buf;
1398                        let mut position = 1;
1399                        let mut input = b"!->other content".as_ref();
1400                        //                ^= 1            ^= 17
1401
1402                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1403                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1404                            x => panic!(
1405                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1406                                x
1407                            ),
1408                        }
1409                        assert_eq!(position, 17);
1410                    }
1411
1412                    #[$test]
1413                    $($async)? fn not_closed1() {
1414                        let buf = $buf;
1415                        let mut position = 1;
1416                        let mut input = b"!--other content".as_ref();
1417                        //                ^= 1            ^= 17
1418
1419                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1420                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1421                            x => panic!(
1422                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1423                                x
1424                            ),
1425                        }
1426                        assert_eq!(position, 17);
1427                    }
1428
1429                    #[$test]
1430                    $($async)? fn not_closed2() {
1431                        let buf = $buf;
1432                        let mut position = 1;
1433                        let mut input = b"!-->other content".as_ref();
1434                        //                ^= 1             ^= 18
1435
1436                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1437                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1438                            x => panic!(
1439                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1440                                x
1441                            ),
1442                        }
1443                        assert_eq!(position, 18);
1444                    }
1445
1446                    #[$test]
1447                    $($async)? fn not_closed3() {
1448                        let buf = $buf;
1449                        let mut position = 1;
1450                        let mut input = b"!--->other content".as_ref();
1451                        //                ^= 1              ^= 19
1452
1453                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1454                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1455                            x => panic!(
1456                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1457                                x
1458                            ),
1459                        }
1460                        assert_eq!(position, 19);
1461                    }
1462
1463                    #[$test]
1464                    $($async)? fn empty() {
1465                        let buf = $buf;
1466                        let mut position = 1;
1467                        let mut input = b"!---->other content".as_ref();
1468                        //                ^= 1  ^= 7
1469
1470                        let (ty, bytes) = $source(&mut input)
1471                            .read_bang_element(buf, &mut position)
1472                            $(.$await)?
1473                            .unwrap();
1474                        assert_eq!(
1475                            (ty, Bytes(bytes)),
1476                            (BangType::Comment, Bytes(b"!----"))
1477                        );
1478                        assert_eq!(position, 7);
1479                    }
1480
1481                    #[$test]
1482                    $($async)? fn with_content() {
1483                        let buf = $buf;
1484                        let mut position = 1;
1485                        let mut input = b"!--->comment<--->other content".as_ref();
1486                        //                ^= 1             ^= 18
1487
1488                        let (ty, bytes) = $source(&mut input)
1489                            .read_bang_element(buf, &mut position)
1490                            $(.$await)?
1491                            .unwrap();
1492                        assert_eq!(
1493                            (ty, Bytes(bytes)),
1494                            (BangType::Comment, Bytes(b"!--->comment<---"))
1495                        );
1496                        assert_eq!(position, 18);
1497                    }
1498                }
1499
1500                /// Checks that reading DOCTYPE definition works correctly
1501                mod doctype {
1502                    use super::*;
1503
1504                    mod uppercase {
1505                        use super::*;
1506                        use pretty_assertions::assert_eq;
1507
1508                        #[$test]
1509                        $($async)? fn not_properly_start() {
1510                            let buf = $buf;
1511                            let mut position = 1;
1512                            let mut input = b"!D other content".as_ref();
1513                            //                ^= 1            ^= 17
1514
1515                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1516                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1517                                x => panic!(
1518                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1519                                    x
1520                                ),
1521                            }
1522                            assert_eq!(position, 17);
1523                        }
1524
1525                        #[$test]
1526                        $($async)? fn without_space() {
1527                            let buf = $buf;
1528                            let mut position = 1;
1529                            let mut input = b"!DOCTYPEother content".as_ref();
1530                            //                ^= 1                 ^= 22
1531
1532                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1533                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1534                                x => panic!(
1535                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1536                                    x
1537                                ),
1538                            }
1539                            assert_eq!(position, 22);
1540                        }
1541
1542                        #[$test]
1543                        $($async)? fn empty() {
1544                            let buf = $buf;
1545                            let mut position = 1;
1546                            let mut input = b"!DOCTYPE>other content".as_ref();
1547                            //                ^= 1     ^= 10
1548
1549                            let (ty, bytes) = $source(&mut input)
1550                                .read_bang_element(buf, &mut position)
1551                                $(.$await)?
1552                                .unwrap();
1553                            assert_eq!(
1554                                (ty, Bytes(bytes)),
1555                                (BangType::DocType(0), Bytes(b"!DOCTYPE"))
1556                            );
1557                            assert_eq!(position, 10);
1558                        }
1559
1560                        #[$test]
1561                        $($async)? fn not_closed() {
1562                            let buf = $buf;
1563                            let mut position = 1;
1564                            let mut input = b"!DOCTYPE other content".as_ref();
1565                            //                ^= 1                  ^23
1566
1567                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1568                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1569                                x => panic!(
1570                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1571                                    x
1572                                ),
1573                            }
1574                            assert_eq!(position, 23);
1575                        }
1576                    }
1577
1578                    mod lowercase {
1579                        use super::*;
1580                        use pretty_assertions::assert_eq;
1581
1582                        #[$test]
1583                        $($async)? fn not_properly_start() {
1584                            let buf = $buf;
1585                            let mut position = 1;
1586                            let mut input = b"!d other content".as_ref();
1587                            //                ^= 1            ^= 17
1588
1589                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1590                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1591                                x => panic!(
1592                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1593                                    x
1594                                ),
1595                            }
1596                            assert_eq!(position, 17);
1597                        }
1598
1599                        #[$test]
1600                        $($async)? fn without_space() {
1601                            let buf = $buf;
1602                            let mut position = 1;
1603                            let mut input = b"!doctypeother content".as_ref();
1604                            //                ^= 1                 ^= 22
1605
1606                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1607                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1608                                x => panic!(
1609                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1610                                    x
1611                                ),
1612                            }
1613                            assert_eq!(position, 22);
1614                        }
1615
1616                        #[$test]
1617                        $($async)? fn empty() {
1618                            let buf = $buf;
1619                            let mut position = 1;
1620                            let mut input = b"!doctype>other content".as_ref();
1621                            //                ^= 1     ^= 10
1622
1623                            let (ty, bytes) = $source(&mut input)
1624                                .read_bang_element(buf, &mut position)
1625                                $(.$await)?
1626                                .unwrap();
1627                            assert_eq!(
1628                                (ty, Bytes(bytes)),
1629                                (BangType::DocType(0), Bytes(b"!doctype"))
1630                            );
1631                            assert_eq!(position, 10);
1632                        }
1633
1634                        #[$test]
1635                        $($async)? fn not_closed() {
1636                            let buf = $buf;
1637                            let mut position = 1;
1638                            let mut input = b"!doctype other content".as_ref();
1639                            //                ^= 1                  ^= 23
1640
1641                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1642                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1643                                x => panic!(
1644                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1645                                    x
1646                                ),
1647                            }
1648                            assert_eq!(position, 23);
1649                        }
1650                    }
1651                }
1652            }
1653
1654            mod read_text {
1655                use super::*;
1656                use crate::reader::ReadTextResult;
1657                use crate::utils::Bytes;
1658                use pretty_assertions::assert_eq;
1659
1660                #[$test]
1661                $($async)? fn empty() {
1662                    let buf = $buf;
1663                    let mut position = 1;
1664                    let mut input = b"".as_ref();
1665                    //                ^= 1
1666
1667                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1668                        ReadTextResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"")),
1669                        x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x),
1670                    }
1671                    assert_eq!(position, 1);
1672                }
1673
1674                #[$test]
1675                $($async)? fn markup() {
1676                    let buf = $buf;
1677                    let mut position = 1;
1678                    let mut input = b"<".as_ref();
1679                    //                 ^= 2
1680
1681                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1682                        ReadTextResult::Markup(b) => assert_eq!(b, $buf),
1683                        x => panic!("Expected `Markup(_)`, but got `{:?}`", x),
1684                    }
1685                    assert_eq!(position, 2);
1686                }
1687
1688                #[$test]
1689                $($async)? fn ref_() {
1690                    let buf = $buf;
1691                    let mut position = 1;
1692                    let mut input = b"&".as_ref();
1693                    //                ^= 1
1694
1695                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1696                        ReadTextResult::Ref(b) => assert_eq!(b, $buf),
1697                        x => panic!("Expected `Ref(_)`, but got `{:?}`", x),
1698                    }
1699                    assert_eq!(position, 1);
1700                }
1701
1702                #[$test]
1703                $($async)? fn up_to_markup() {
1704                    let buf = $buf;
1705                    let mut position = 1;
1706                    let mut input = b"a<".as_ref();
1707                    //                1 ^= 3
1708
1709                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1710                        ReadTextResult::UpToMarkup(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")),
1711                        x => panic!("Expected `UpToMarkup(_)`, but got `{:?}`", x),
1712                    }
1713                    assert_eq!(position, 3);
1714                }
1715
1716                #[$test]
1717                $($async)? fn up_to_ref() {
1718                    let buf = $buf;
1719                    let mut position = 1;
1720                    let mut input = b"a&".as_ref();
1721                    //                 ^= 2
1722
1723                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1724                        ReadTextResult::UpToRef(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")),
1725                        x => panic!("Expected `UpToRef(_)`, but got `{:?}`", x),
1726                    }
1727                    assert_eq!(position, 2);
1728                }
1729
1730                #[$test]
1731                $($async)? fn up_to_eof() {
1732                    let buf = $buf;
1733                    let mut position = 1;
1734                    let mut input = b"a".as_ref();
1735                    //                 ^= 2
1736
1737                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1738                        ReadTextResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")),
1739                        x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x),
1740                    }
1741                    assert_eq!(position, 2);
1742                }
1743            }
1744
1745            mod read_ref {
1746                use super::*;
1747                use crate::reader::ReadRefResult;
1748                use crate::utils::Bytes;
1749                use pretty_assertions::assert_eq;
1750
1751                // Empty input is not allowed for `read_ref` so not tested.
1752                // Borrowed source triggers debug assertion,
1753                // buffered do nothing due to implementation details.
1754
1755                #[$test]
1756                $($async)? fn up_to_eof() {
1757                    let buf = $buf;
1758                    let mut position = 1;
1759                    let mut input = b"&".as_ref();
1760                    //                 ^= 2
1761
1762                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1763                        ReadRefResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
1764                        x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x),
1765                    }
1766                    assert_eq!(position, 2);
1767                }
1768
1769                #[$test]
1770                $($async)? fn up_to_ref() {
1771                    let buf = $buf;
1772                    let mut position = 1;
1773                    let mut input = b"&&".as_ref();
1774                    //                 ^= 2
1775
1776                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1777                        ReadRefResult::UpToRef(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
1778                        x => panic!("Expected `UpToRef(_)`, but got `{:?}`", x),
1779                    }
1780                    assert_eq!(position, 2);
1781                }
1782
1783                #[$test]
1784                $($async)? fn up_to_markup() {
1785                    let buf = $buf;
1786                    let mut position = 1;
1787                    let mut input = b"&<".as_ref();
1788                    //                  ^= 3
1789
1790                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1791                        ReadRefResult::UpToMarkup(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
1792                        x => panic!("Expected `UpToMarkup(_)`, but got `{:?}`", x),
1793                    }
1794                    assert_eq!(position, 3);
1795                }
1796
1797                #[$test]
1798                $($async)? fn empty_ref() {
1799                    let buf = $buf;
1800                    let mut position = 1;
1801                    let mut input = b"&;".as_ref();
1802                    //                  ^= 3
1803
1804                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1805                        ReadRefResult::Ref(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
1806                        x => panic!("Expected `Ref(_)`, but got `{:?}`", x),
1807                    }
1808                    assert_eq!(position, 3);
1809                }
1810
1811                #[$test]
1812                $($async)? fn normal() {
1813                    let buf = $buf;
1814                    let mut position = 1;
1815                    let mut input = b"&lt;".as_ref();
1816                    //                    ^= 5
1817
1818                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1819                        ReadRefResult::Ref(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&lt")),
1820                        x => panic!("Expected `Ref(_)`, but got `{:?}`", x),
1821                    }
1822                    assert_eq!(position, 5);
1823                }
1824            }
1825
1826            mod read_element {
1827                use super::*;
1828                use crate::errors::{Error, SyntaxError};
1829                use crate::parser::ElementParser;
1830                use crate::utils::Bytes;
1831                use pretty_assertions::assert_eq;
1832
1833                /// Checks that nothing was read from empty buffer
1834                #[$test]
1835                $($async)? fn empty() {
1836                    let buf = $buf;
1837                    let mut position = 1;
1838                    let mut input = b"".as_ref();
1839                    //                ^= 1
1840
1841                    match $source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? {
1842                        Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedTag),
1843                        x => panic!(
1844                            "Expected `Err(Syntax(_))`, but got `{:?}`",
1845                            x
1846                        ),
1847                    }
1848                    assert_eq!(position, 1);
1849                }
1850
1851                mod open {
1852                    use super::*;
1853                    use pretty_assertions::assert_eq;
1854
1855                    #[$test]
1856                    $($async)? fn empty_tag() {
1857                        let buf = $buf;
1858                        let mut position = 1;
1859                        let mut input = b">".as_ref();
1860                        //                 ^= 2
1861
1862                        assert_eq!(
1863                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1864                            Bytes(b"")
1865                        );
1866                        assert_eq!(position, 2);
1867                    }
1868
1869                    #[$test]
1870                    $($async)? fn normal() {
1871                        let buf = $buf;
1872                        let mut position = 1;
1873                        let mut input = b"tag>".as_ref();
1874                        //                    ^= 5
1875
1876                        assert_eq!(
1877                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1878                            Bytes(b"tag")
1879                        );
1880                        assert_eq!(position, 5);
1881                    }
1882
1883                    #[$test]
1884                    $($async)? fn empty_ns_empty_tag() {
1885                        let buf = $buf;
1886                        let mut position = 1;
1887                        let mut input = b":>".as_ref();
1888                        //                  ^= 3
1889
1890                        assert_eq!(
1891                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1892                            Bytes(b":")
1893                        );
1894                        assert_eq!(position, 3);
1895                    }
1896
1897                    #[$test]
1898                    $($async)? fn empty_ns() {
1899                        let buf = $buf;
1900                        let mut position = 1;
1901                        let mut input = b":tag>".as_ref();
1902                        //                     ^= 6
1903
1904                        assert_eq!(
1905                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1906                            Bytes(b":tag")
1907                        );
1908                        assert_eq!(position, 6);
1909                    }
1910
1911                    #[$test]
1912                    $($async)? fn with_attributes() {
1913                        let buf = $buf;
1914                        let mut position = 1;
1915                        let mut input = br#"tag  attr-1=">"  attr2  =  '>'  3attr>"#.as_ref();
1916                        //                                                        ^= 39
1917
1918                        assert_eq!(
1919                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1920                            Bytes(br#"tag  attr-1=">"  attr2  =  '>'  3attr"#)
1921                        );
1922                        assert_eq!(position, 39);
1923                    }
1924                }
1925
1926                mod self_closed {
1927                    use super::*;
1928                    use pretty_assertions::assert_eq;
1929
1930                    #[$test]
1931                    $($async)? fn empty_tag() {
1932                        let buf = $buf;
1933                        let mut position = 1;
1934                        let mut input = b"/>".as_ref();
1935                        //                  ^= 3
1936
1937                        assert_eq!(
1938                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1939                            Bytes(b"/")
1940                        );
1941                        assert_eq!(position, 3);
1942                    }
1943
1944                    #[$test]
1945                    $($async)? fn normal() {
1946                        let buf = $buf;
1947                        let mut position = 1;
1948                        let mut input = b"tag/>".as_ref();
1949                        //                     ^= 6
1950
1951                        assert_eq!(
1952                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1953                            Bytes(b"tag/")
1954                        );
1955                        assert_eq!(position, 6);
1956                    }
1957
1958                    #[$test]
1959                    $($async)? fn empty_ns_empty_tag() {
1960                        let buf = $buf;
1961                        let mut position = 1;
1962                        let mut input = b":/>".as_ref();
1963                        //                   ^= 4
1964
1965                        assert_eq!(
1966                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1967                            Bytes(b":/")
1968                        );
1969                        assert_eq!(position, 4);
1970                    }
1971
1972                    #[$test]
1973                    $($async)? fn empty_ns() {
1974                        let buf = $buf;
1975                        let mut position = 1;
1976                        let mut input = b":tag/>".as_ref();
1977                        //                      ^= 7
1978
1979                        assert_eq!(
1980                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1981                            Bytes(b":tag/")
1982                        );
1983                        assert_eq!(position, 7);
1984                    }
1985
1986                    #[$test]
1987                    $($async)? fn with_attributes() {
1988                        let buf = $buf;
1989                        let mut position = 1;
1990                        let mut input = br#"tag  attr-1="/>"  attr2  =  '/>'  3attr/>"#.as_ref();
1991                        //                                                           ^= 42
1992
1993                        assert_eq!(
1994                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1995                            Bytes(br#"tag  attr-1="/>"  attr2  =  '/>'  3attr/"#)
1996                        );
1997                        assert_eq!(position, 42);
1998                    }
1999                }
2000
2001                mod close {
2002                    use super::*;
2003                    use pretty_assertions::assert_eq;
2004
2005                    #[$test]
2006                    $($async)? fn empty_tag() {
2007                        let buf = $buf;
2008                        let mut position = 1;
2009                        let mut input = b"/ >".as_ref();
2010                        //                   ^= 4
2011
2012                        assert_eq!(
2013                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2014                            Bytes(b"/ ")
2015                        );
2016                        assert_eq!(position, 4);
2017                    }
2018
2019                    #[$test]
2020                    $($async)? fn normal() {
2021                        let buf = $buf;
2022                        let mut position = 1;
2023                        let mut input = b"/tag>".as_ref();
2024                        //                     ^= 6
2025
2026                        assert_eq!(
2027                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2028                            Bytes(b"/tag")
2029                        );
2030                        assert_eq!(position, 6);
2031                    }
2032
2033                    #[$test]
2034                    $($async)? fn empty_ns_empty_tag() {
2035                        let buf = $buf;
2036                        let mut position = 1;
2037                        let mut input = b"/:>".as_ref();
2038                        //                   ^= 4
2039
2040                        assert_eq!(
2041                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2042                            Bytes(b"/:")
2043                        );
2044                        assert_eq!(position, 4);
2045                    }
2046
2047                    #[$test]
2048                    $($async)? fn empty_ns() {
2049                        let buf = $buf;
2050                        let mut position = 1;
2051                        let mut input = b"/:tag>".as_ref();
2052                        //                      ^= 7
2053
2054                        assert_eq!(
2055                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2056                            Bytes(b"/:tag")
2057                        );
2058                        assert_eq!(position, 7);
2059                    }
2060
2061                    #[$test]
2062                    $($async)? fn with_attributes() {
2063                        let buf = $buf;
2064                        let mut position = 1;
2065                        let mut input = br#"/tag  attr-1=">"  attr2  =  '>'  3attr>"#.as_ref();
2066                        //                                                         ^= 40
2067
2068                        assert_eq!(
2069                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2070                            Bytes(br#"/tag  attr-1=">"  attr2  =  '>'  3attr"#)
2071                        );
2072                        assert_eq!(position, 40);
2073                    }
2074                }
2075            }
2076
2077            /// Ensures, that no empty `Text` events are generated
2078            mod $read_event {
2079                use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesPI, BytesStart, BytesText, Event};
2080                use crate::reader::Reader;
2081                use pretty_assertions::assert_eq;
2082
2083                /// When `encoding` feature is enabled, encoding should be detected
2084                /// from BOM (UTF-8) and BOM should be stripped.
2085                ///
2086                /// When `encoding` feature is disabled, UTF-8 is assumed and BOM
2087                /// character should be stripped for consistency
2088                #[$test]
2089                $($async)? fn bom_from_reader() {
2090                    let mut reader = Reader::from_reader("\u{feff}\u{feff}".as_bytes());
2091
2092                    assert_eq!(
2093                        reader.$read_event($buf) $(.$await)? .unwrap(),
2094                        Event::Text(BytesText::from_escaped("\u{feff}"))
2095                    );
2096
2097                    assert_eq!(
2098                        reader.$read_event($buf) $(.$await)? .unwrap(),
2099                        Event::Eof
2100                    );
2101                }
2102
2103                /// When parsing from &str, encoding is fixed (UTF-8), so
2104                /// - when `encoding` feature is disabled, the behavior the
2105                ///   same as in `bom_from_reader` text
2106                /// - when `encoding` feature is enabled, the behavior should
2107                ///   stay consistent, so the first BOM character is stripped
2108                #[$test]
2109                $($async)? fn bom_from_str() {
2110                    let mut reader = Reader::from_str("\u{feff}\u{feff}");
2111
2112                    assert_eq!(
2113                        reader.$read_event($buf) $(.$await)? .unwrap(),
2114                        Event::Text(BytesText::from_escaped("\u{feff}"))
2115                    );
2116
2117                    assert_eq!(
2118                        reader.$read_event($buf) $(.$await)? .unwrap(),
2119                        Event::Eof
2120                    );
2121                }
2122
2123                #[$test]
2124                $($async)? fn declaration() {
2125                    let mut reader = Reader::from_str("<?xml ?>");
2126
2127                    assert_eq!(
2128                        reader.$read_event($buf) $(.$await)? .unwrap(),
2129                        Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3)))
2130                    );
2131                }
2132
2133                #[$test]
2134                $($async)? fn doctype() {
2135                    let mut reader = Reader::from_str("<!DOCTYPE x>");
2136
2137                    assert_eq!(
2138                        reader.$read_event($buf) $(.$await)? .unwrap(),
2139                        Event::DocType(BytesText::from_escaped("x"))
2140                    );
2141                }
2142
2143                #[$test]
2144                $($async)? fn processing_instruction() {
2145                    let mut reader = Reader::from_str("<?xml-stylesheet '? >\" ?>");
2146
2147                    assert_eq!(
2148                        reader.$read_event($buf) $(.$await)? .unwrap(),
2149                        Event::PI(BytesPI::new("xml-stylesheet '? >\" "))
2150                    );
2151                }
2152
2153                /// Lone closing tags are not allowed, so testing it together with start tag
2154                #[$test]
2155                $($async)? fn start_and_end() {
2156                    let mut reader = Reader::from_str("<tag></tag>");
2157
2158                    assert_eq!(
2159                        reader.$read_event($buf) $(.$await)? .unwrap(),
2160                        Event::Start(BytesStart::new("tag"))
2161                    );
2162
2163                    assert_eq!(
2164                        reader.$read_event($buf) $(.$await)? .unwrap(),
2165                        Event::End(BytesEnd::new("tag"))
2166                    );
2167                }
2168
2169                #[$test]
2170                $($async)? fn empty() {
2171                    let mut reader = Reader::from_str("<tag/>");
2172
2173                    assert_eq!(
2174                        reader.$read_event($buf) $(.$await)? .unwrap(),
2175                        Event::Empty(BytesStart::new("tag"))
2176                    );
2177                }
2178
2179                #[$test]
2180                $($async)? fn text() {
2181                    let mut reader = Reader::from_str("text");
2182
2183                    assert_eq!(
2184                        reader.$read_event($buf) $(.$await)? .unwrap(),
2185                        Event::Text(BytesText::from_escaped("text"))
2186                    );
2187                }
2188
2189                #[$test]
2190                $($async)? fn cdata() {
2191                    let mut reader = Reader::from_str("<![CDATA[]]>");
2192
2193                    assert_eq!(
2194                        reader.$read_event($buf) $(.$await)? .unwrap(),
2195                        Event::CData(BytesCData::new(""))
2196                    );
2197                }
2198
2199                #[$test]
2200                $($async)? fn comment() {
2201                    let mut reader = Reader::from_str("<!---->");
2202
2203                    assert_eq!(
2204                        reader.$read_event($buf) $(.$await)? .unwrap(),
2205                        Event::Comment(BytesText::from_escaped(""))
2206                    );
2207                }
2208
2209                #[$test]
2210                $($async)? fn eof() {
2211                    let mut reader = Reader::from_str("");
2212
2213                    assert_eq!(
2214                        reader.$read_event($buf) $(.$await)? .unwrap(),
2215                        Event::Eof
2216                    );
2217                }
2218            }
2219        };
2220    }
2221
2222    // Export macros for the child modules:
2223    // - buffered_reader
2224    // - slice_reader
2225    pub(super) use check;
2226}
quick_xml/reader/mod.rs

quick_xml/reader/
mod.rs