fast_xml/de/
mod.rs

1//! Serde `Deserializer` module
2//!
3//! # Examples
4//!
5//! Here is a simple example parsing [crates.io](https://crates.io/) source code.
6//!
7//! ```
8//! // Cargo.toml
9//! // [dependencies]
10//! // serde = { version = "1.0", features = [ "derive" ] }
11//! // fast-xml = { version = "0.22", features = [ "serialize" ] }
12//! # use pretty_assertions::assert_eq;
13//! use serde::Deserialize;
14//! use fast_xml::de::{from_str, DeError};
15//!
16//! #[derive(Debug, Deserialize, PartialEq)]
17//! struct Link {
18//!     rel: String,
19//!     href: String,
20//!     sizes: Option<String>,
21//! }
22//!
23//! #[derive(Debug, Deserialize, PartialEq)]
24//! #[serde(rename_all = "lowercase")]
25//! enum Lang {
26//!     En,
27//!     Fr,
28//!     De,
29//! }
30//!
31//! #[derive(Debug, Deserialize, PartialEq)]
32//! struct Head {
33//!     title: String,
34//!     #[serde(rename = "link", default)]
35//!     links: Vec<Link>,
36//! }
37//!
38//! #[derive(Debug, Deserialize, PartialEq)]
39//! struct Script {
40//!     src: String,
41//!     integrity: String,
42//! }
43//!
44//! #[derive(Debug, Deserialize, PartialEq)]
45//! struct Body {
46//!     #[serde(rename = "script", default)]
47//!     scripts: Vec<Script>,
48//! }
49//!
50//! #[derive(Debug, Deserialize, PartialEq)]
51//! struct Html {
52//!     lang: Option<String>,
53//!     head: Head,
54//!     body: Body,
55//! }
56//!
57//! fn crates_io() -> Result<Html, DeError> {
58//!     let xml = "<!DOCTYPE html>
59//!         <html lang=\"en\">
60//!           <head>
61//!             <meta charset=\"utf-8\">
62//!             <meta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\">
63//!             <meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">
64//!
65//!             <title>crates.io: Rust Package Registry</title>
66//!
67//!
68//!         <meta name=\"cargo/config/environment\" content=\"%7B%22modulePrefix%22%3A%22cargo%22%2C%22environment%22%3A%22production%22%2C%22rootURL%22%3A%22%2F%22%2C%22locationType%22%3A%22router-scroll%22%2C%22historySupportMiddleware%22%3Atrue%2C%22EmberENV%22%3A%7B%22FEATURES%22%3A%7B%7D%2C%22EXTEND_PROTOTYPES%22%3A%7B%22Date%22%3Afalse%7D%7D%2C%22APP%22%3A%7B%22name%22%3A%22cargo%22%2C%22version%22%3A%22b7796c9%22%7D%2C%22fastboot%22%3A%7B%22hostWhitelist%22%3A%5B%22crates.io%22%2C%7B%7D%2C%7B%7D%5D%7D%2C%22ember-cli-app-version%22%3A%7B%22version%22%3A%22b7796c9%22%7D%2C%22ember-cli-mirage%22%3A%7B%22usingProxy%22%3Afalse%2C%22useDefaultPassthroughs%22%3Atrue%7D%2C%22exportApplicationGlobal%22%3Afalse%7D\" />
69//!         <!-- EMBER_CLI_FASTBOOT_TITLE --><!-- EMBER_CLI_FASTBOOT_HEAD -->
70//!         <link rel=\"manifest\" href=\"/manifest.webmanifest\">
71//!         <link rel=\"apple-touch-icon\" href=\"/cargo-835dd6a18132048a52ac569f2615b59d.png\" sizes=\"227x227\">
72//!         <meta name=\"theme-color\" content=\"#f9f7ec\">
73//!         <meta name=\"apple-mobile-web-app-capable\" content=\"yes\">
74//!         <meta name=\"apple-mobile-web-app-title\" content=\"crates.io: Rust Package Registry\">
75//!         <meta name=\"apple-mobile-web-app-status-bar-style\" content=\"default\">
76//!
77//!             <link rel=\"stylesheet\" href=\"/assets/vendor-8d023d47762d5431764f589a6012123e.css\" integrity=\"sha256-EoB7fsYkdS7BZba47+C/9D7yxwPZojsE4pO7RIuUXdE= sha512-/SzGQGR0yj5AG6YPehZB3b6MjpnuNCTOGREQTStETobVRrpYPZKneJwcL/14B8ufcvobJGFDvnTKdcDDxbh6/A==\" >
78//!             <link rel=\"stylesheet\" href=\"/assets/cargo-cedb8082b232ce89dd449d869fb54b98.css\" integrity=\"sha256-S9K9jZr6nSyYicYad3JdiTKrvsstXZrvYqmLUX9i3tc= sha512-CDGjy3xeyiqBgUMa+GelihW394pqAARXwsU+HIiOotlnp1sLBVgO6v2ZszL0arwKU8CpvL9wHyLYBIdfX92YbQ==\" >
79//!
80//!
81//!             <link rel=\"shortcut icon\" href=\"/favicon.ico\" type=\"image/x-icon\">
82//!             <link rel=\"icon\" href=\"/cargo-835dd6a18132048a52ac569f2615b59d.png\" type=\"image/png\">
83//!             <link rel=\"search\" href=\"/opensearch.xml\" type=\"application/opensearchdescription+xml\" title=\"Cargo\">
84//!           </head>
85//!           <body>
86//!             <!-- EMBER_CLI_FASTBOOT_BODY -->
87//!             <noscript>
88//!                 <div id=\"main\">
89//!                     <div class='noscript'>
90//!                         This site requires JavaScript to be enabled.
91//!                     </div>
92//!                 </div>
93//!             </noscript>
94//!
95//!             <script src=\"/assets/vendor-bfe89101b20262535de5a5ccdc276965.js\" integrity=\"sha256-U12Xuwhz1bhJXWyFW/hRr+Wa8B6FFDheTowik5VLkbw= sha512-J/cUUuUN55TrdG8P6Zk3/slI0nTgzYb8pOQlrXfaLgzr9aEumr9D1EzmFyLy1nrhaDGpRN1T8EQrU21Jl81pJQ==\" ></script>
96//!             <script src=\"/assets/cargo-4023b68501b7b3e17b2bb31f50f5eeea.js\" integrity=\"sha256-9atimKc1KC6HMJF/B07lP3Cjtgr2tmET8Vau0Re5mVI= sha512-XJyBDQU4wtA1aPyPXaFzTE5Wh/mYJwkKHqZ/Fn4p/ezgdKzSCFu6FYn81raBCnCBNsihfhrkb88uF6H5VraHMA==\" ></script>
97//!
98//!
99//!           </body>
100//!         </html>
101//! }";
102//!     let html: Html = from_str(xml)?;
103//!     assert_eq!(&html.head.title, "crates.io: Rust Package Registr");
104//!     Ok(html)
105//! }
106//! ```
107
108// Macros should be defined before the modules that using them
109// Also, macros should be imported before using them
110use serde::serde_if_integer128;
111
112macro_rules! deserialize_type {
113    ($deserialize:ident => $visit:ident, $($mut:tt)?) => {
114        fn $deserialize<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
115        where
116            V: Visitor<'de>,
117        {
118            // No need to unescape because valid integer representations cannot be escaped
119            let text = self.next_text(false)?;
120            let string = text.decode(self.decoder())?;
121            visitor.$visit(string.parse()?)
122        }
123    };
124}
125
126/// Implement deserialization methods for scalar types, such as numbers, strings,
127/// byte arrays, booleans and identifiers.
128macro_rules! deserialize_primitives {
129    ($($mut:tt)?) => {
130        deserialize_type!(deserialize_i8 => visit_i8, $($mut)?);
131        deserialize_type!(deserialize_i16 => visit_i16, $($mut)?);
132        deserialize_type!(deserialize_i32 => visit_i32, $($mut)?);
133        deserialize_type!(deserialize_i64 => visit_i64, $($mut)?);
134
135        deserialize_type!(deserialize_u8 => visit_u8, $($mut)?);
136        deserialize_type!(deserialize_u16 => visit_u16, $($mut)?);
137        deserialize_type!(deserialize_u32 => visit_u32, $($mut)?);
138        deserialize_type!(deserialize_u64 => visit_u64, $($mut)?);
139
140        serde_if_integer128! {
141            deserialize_type!(deserialize_i128 => visit_i128, $($mut)?);
142            deserialize_type!(deserialize_u128 => visit_u128, $($mut)?);
143        }
144
145        deserialize_type!(deserialize_f32 => visit_f32, $($mut)?);
146        deserialize_type!(deserialize_f64 => visit_f64, $($mut)?);
147
148        fn deserialize_bool<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
149        where
150            V: Visitor<'de>,
151        {
152            // No need to unescape because valid boolean representations cannot be escaped
153            let text = self.next_text(false)?;
154
155            deserialize_bool(text.as_ref(), self.decoder(), visitor)
156        }
157
158        /// Representation of owned strings the same as [non-owned](#method.deserialize_str).
159        fn deserialize_string<V>(self, visitor: V) -> Result<V::Value, DeError>
160        where
161            V: Visitor<'de>,
162        {
163            self.deserialize_str(visitor)
164        }
165
166        /// Character represented as [strings](#method.deserialize_str).
167        fn deserialize_char<V>(self, visitor: V) -> Result<V::Value, DeError>
168        where
169            V: Visitor<'de>,
170        {
171            self.deserialize_str(visitor)
172        }
173
174        fn deserialize_str<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
175        where
176            V: Visitor<'de>,
177        {
178            let text = self.next_text(true)?;
179            let string = text.decode(self.decoder())?;
180            match string {
181                Cow::Borrowed(string) => visitor.visit_borrowed_str(string),
182                Cow::Owned(string) => visitor.visit_string(string),
183            }
184        }
185
186        fn deserialize_bytes<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
187        where
188            V: Visitor<'de>,
189        {
190            // No need to unescape because bytes gives access to the raw XML input
191            let text = self.next_text(false)?;
192            visitor.visit_bytes(&text)
193        }
194
195        fn deserialize_byte_buf<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
196        where
197            V: Visitor<'de>,
198        {
199            // No need to unescape because bytes gives access to the raw XML input
200            let text = self.next_text(false)?;
201            let value = text.into_inner().into_owned();
202            visitor.visit_byte_buf(value)
203        }
204
205        /// Identifiers represented as [strings](#method.deserialize_str).
206        fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value, DeError>
207        where
208            V: Visitor<'de>,
209        {
210            self.deserialize_str(visitor)
211        }
212    };
213}
214
215mod escape;
216mod map;
217mod seq;
218mod var;
219
220pub use crate::errors::serialize::DeError;
221use crate::{
222    errors::Error,
223    events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
224    reader::Decoder,
225    Reader,
226};
227use serde::de::{self, Deserialize, DeserializeOwned, Visitor};
228use std::borrow::Cow;
229use std::io::BufRead;
230
231pub(crate) const INNER_VALUE: &str = "$value";
232pub(crate) const UNFLATTEN_PREFIX: &str = "$unflatten=";
233pub(crate) const PRIMITIVE_PREFIX: &str = "$primitive=";
234
235/// Simplified event which contains only these variants that used by deserializer
236#[derive(Debug, PartialEq)]
237pub enum DeEvent<'a> {
238    /// Start tag (with attributes) `<tag attr="value">`.
239    Start(BytesStart<'a>),
240    /// End tag `</tag>`.
241    End(BytesEnd<'a>),
242    /// Escaped character data between `Start` and `End` element.
243    Text(BytesText<'a>),
244    /// Unescaped character data between `Start` and `End` element,
245    /// stored in `<![CDATA[...]]>`.
246    CData(BytesCData<'a>),
247    /// End of XML document.
248    Eof,
249}
250
251/// An xml deserializer
252pub struct Deserializer<'de, R>
253where
254    R: XmlRead<'de>,
255{
256    reader: R,
257    peek: Option<DeEvent<'de>>,
258    /// Special sing that deserialized struct have a field with the special
259    /// name (see constant `INNER_VALUE`). That field should be deserialized
260    /// from the text content of the XML node:
261    ///
262    /// ```xml
263    /// <tag>value for INNER_VALUE field<tag>
264    /// ```
265    has_value_field: bool,
266}
267
268/// Deserialize an instance of type `T` from a string of XML text.
269pub fn from_str<'de, T>(s: &'de str) -> Result<T, DeError>
270where
271    T: Deserialize<'de>,
272{
273    from_slice(s.as_bytes())
274}
275
276/// Deserialize an instance of type `T` from bytes of XML text.
277#[deprecated = "Use `from_slice` instead"]
278pub fn from_bytes<'de, T>(s: &'de [u8]) -> Result<T, DeError>
279where
280    T: Deserialize<'de>,
281{
282    from_slice(s)
283}
284
285/// Deserialize an instance of type `T` from bytes of XML text.
286pub fn from_slice<'de, T>(s: &'de [u8]) -> Result<T, DeError>
287where
288    T: Deserialize<'de>,
289{
290    let mut de = Deserializer::from_slice(s);
291    T::deserialize(&mut de)
292}
293
294/// Deserialize from a reader. This method will do internal copies of data
295/// readed from `reader`. If you want have a `&[u8]` or `&str` input and want
296/// to borrow as much as possible, use [`from_slice`] or [`from_str`]
297pub fn from_reader<R, T>(reader: R) -> Result<T, DeError>
298where
299    R: BufRead,
300    T: DeserializeOwned,
301{
302    let mut de = Deserializer::from_reader(reader);
303    T::deserialize(&mut de)
304}
305
306// TODO: According to the https://www.w3.org/TR/xmlschema-2/#boolean,
307// valid boolean representations are only "true", "false", "1", and "0"
308fn deserialize_bool<'de, V>(value: &[u8], decoder: Decoder, visitor: V) -> Result<V::Value, DeError>
309where
310    V: Visitor<'de>,
311{
312    #[cfg(feature = "encoding")]
313    {
314        let value = decoder.decode(value);
315        // No need to unescape because valid boolean representations cannot be escaped
316        match value.as_ref() {
317            "true" | "1" | "True" | "TRUE" | "t" | "Yes" | "YES" | "yes" | "y" => {
318                visitor.visit_bool(true)
319            }
320            "false" | "0" | "False" | "FALSE" | "f" | "No" | "NO" | "no" | "n" => {
321                visitor.visit_bool(false)
322            }
323            _ => Err(DeError::InvalidBoolean(value.into())),
324        }
325    }
326
327    #[cfg(not(feature = "encoding"))]
328    {
329        // No need to unescape because valid boolean representations cannot be escaped
330        match value {
331            b"true" | b"1" | b"True" | b"TRUE" | b"t" | b"Yes" | b"YES" | b"yes" | b"y" => {
332                visitor.visit_bool(true)
333            }
334            b"false" | b"0" | b"False" | b"FALSE" | b"f" | b"No" | b"NO" | b"no" | b"n" => {
335                visitor.visit_bool(false)
336            }
337            e => Err(DeError::InvalidBoolean(decoder.decode(e)?.into())),
338        }
339    }
340}
341
342impl<'de, R> Deserializer<'de, R>
343where
344    R: XmlRead<'de>,
345{
346    /// Create an XML deserializer from one of the possible quick_xml input sources.
347    ///
348    /// Typically it is more convenient to use one of these methods instead:
349    ///
350    ///  - [`Deserializer::from_str`]
351    ///  - [`Deserializer::from_slice`]
352    ///  - [`Deserializer::from_reader`]
353    pub fn new(reader: R) -> Self {
354        Deserializer {
355            reader,
356            peek: None,
357            has_value_field: false,
358        }
359    }
360
361    /// Get a new deserializer from a regular BufRead
362    #[deprecated = "Use `Deserializer::new` instead"]
363    pub fn from_borrowing_reader(reader: R) -> Self {
364        Self::new(reader)
365    }
366
367    fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> {
368        if self.peek.is_none() {
369            self.peek = Some(self.reader.next()?);
370        }
371        match self.peek.as_ref() {
372            Some(v) => Ok(v),
373            // SAFETY: a `None` variant for `self.peek` would have been replaced
374            // by a `Some` variant in the code above.
375            // TODO: Can be replaced with `unsafe { std::hint::unreachable_unchecked() }`
376            // if unsafe code will be allowed
377            None => unreachable!(),
378        }
379    }
380
381    fn next(&mut self) -> Result<DeEvent<'de>, DeError> {
382        if let Some(e) = self.peek.take() {
383            return Ok(e);
384        }
385        self.reader.next()
386    }
387
388    fn next_start(&mut self) -> Result<Option<BytesStart<'de>>, DeError> {
389        loop {
390            let e = self.next()?;
391            match e {
392                DeEvent::Start(e) => return Ok(Some(e)),
393                DeEvent::End(e) => return Err(DeError::UnexpectedEnd(e.name().to_owned())),
394                DeEvent::Eof => return Ok(None),
395                _ => (), // ignore texts
396            }
397        }
398    }
399
400    #[inline]
401    fn next_text(&mut self, unescape: bool) -> Result<BytesCData<'de>, DeError> {
402        self.next_text_impl(unescape, true)
403    }
404
405    /// Consumes a one XML element or an XML tree, returns associated text or
406    /// an empty string.
407    ///
408    /// If `allow_start` is `false`, then only one event is consumed. If that
409    /// event is [`DeEvent::Start`], then [`DeError::UnexpectedStart`] is returned.
410    ///
411    /// If `allow_start` is `true`, then first text of CDATA event inside it is
412    /// returned and all other content is skipped until corresponding end tag
413    /// will be consumed.
414    ///
415    /// # Handling events
416    ///
417    /// The table below shows how events is handled by this method:
418    ///
419    /// |Event             |XML                        |Handling
420    /// |------------------|---------------------------|----------------------------------------
421    /// |[`DeEvent::Start`]|`<tag>...</tag>`           |if `allow_start == true`, result determined by the second table, otherwise emits [`UnexpectedStart("tag")`](DeError::UnexpectedStart)
422    /// |[`DeEvent::End`]  |`</any-tag>`               |Emits [`UnexpectedEnd("any-tag")`](DeError::UnexpectedEnd)
423    /// |[`DeEvent::Text`] |`text content`             |Unescapes `text content` and returns it
424    /// |[`DeEvent::CData`]|`<![CDATA[cdata content]]>`|Returns `cdata content` unchanged
425    /// |[`DeEvent::Eof`]  |                           |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
426    ///
427    /// Second event, consumed if [`DeEvent::Start`] was received and `allow_start == true`:
428    ///
429    /// |Event             |XML                        |Handling
430    /// |------------------|---------------------------|----------------------------------------------------------------------------------
431    /// |[`DeEvent::Start`]|`<any-tag>...</any-tag>`   |Emits [`UnexpectedStart("any-tag")`](DeError::UnexpectedStart)
432    /// |[`DeEvent::End`]  |`</tag>`                   |Returns an empty slice, if close tag matched the open one
433    /// |[`DeEvent::End`]  |`</any-tag>`               |Emits [`UnexpectedEnd("any-tag")`](DeError::UnexpectedEnd)
434    /// |[`DeEvent::Text`] |`text content`             |Unescapes `text content` and returns it, consumes events up to `</tag>`
435    /// |[`DeEvent::CData`]|`<![CDATA[cdata content]]>`|Returns `cdata content` unchanged, consumes events up to `</tag>`
436    /// |[`DeEvent::Eof`]  |                           |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
437    fn next_text_impl(
438        &mut self,
439        unescape: bool,
440        allow_start: bool,
441    ) -> Result<BytesCData<'de>, DeError> {
442        match self.next()? {
443            DeEvent::Text(e) if unescape => e.unescape().map_err(|e| DeError::InvalidXml(e.into())),
444            DeEvent::Text(e) => Ok(BytesCData::new(e.into_inner())),
445            DeEvent::CData(e) => Ok(e),
446            DeEvent::Start(e) if allow_start => {
447                // allow one nested level
448                let inner = self.next()?;
449                let t = match inner {
450                    DeEvent::Text(t) if unescape => t.unescape()?,
451                    DeEvent::Text(t) => BytesCData::new(t.into_inner()),
452                    DeEvent::CData(t) => t,
453                    DeEvent::Start(s) => return Err(DeError::UnexpectedStart(s.name().to_owned())),
454                    // We can get End event in case of `<tag></tag>` or `<tag/>` input
455                    // Return empty text in that case
456                    DeEvent::End(end) if end.name() == e.name() => {
457                        return Ok(BytesCData::new(&[] as &[u8]));
458                    }
459                    DeEvent::End(end) => return Err(DeError::UnexpectedEnd(end.name().to_owned())),
460                    DeEvent::Eof => return Err(DeError::UnexpectedEof),
461                };
462                self.read_to_end(e.name())?;
463                Ok(t)
464            }
465            DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().to_owned())),
466            DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().to_owned())),
467            DeEvent::Eof => Err(DeError::UnexpectedEof),
468        }
469    }
470
471    /// Returns a decoder, used inside `deserialize_primitives!()`
472    #[inline]
473    fn decoder(&self) -> Decoder {
474        self.reader.decoder()
475    }
476
477    fn read_to_end(&mut self, name: &[u8]) -> Result<(), DeError> {
478        // First one might be in self.peek
479        match self.next()? {
480            DeEvent::Start(e) => self.reader.read_to_end(e.name())?,
481            DeEvent::End(e) if e.name() == name => return Ok(()),
482            _ => (),
483        }
484        self.reader.read_to_end(name)
485    }
486}
487
488impl<'de> Deserializer<'de, SliceReader<'de>> {
489    /// Create new deserializer that will borrow data from the specified string
490    pub fn from_str(s: &'de str) -> Self {
491        Self::from_slice(s.as_bytes())
492    }
493
494    /// Create new deserializer that will borrow data from the specified byte array
495    pub fn from_slice(bytes: &'de [u8]) -> Self {
496        let mut reader = Reader::from_bytes(bytes);
497        reader
498            .expand_empty_elements(true)
499            .check_end_names(true)
500            .trim_text(true);
501        Self::new(SliceReader { reader })
502    }
503}
504
505impl<'de, R> Deserializer<'de, IoReader<R>>
506where
507    R: BufRead,
508{
509    /// Create new deserializer that will copy data from the specified reader
510    /// into internal buffer. If you already have a string or a byte array, use
511    /// [`Self::from_str`] or [`Self::from_slice`] instead, because they will
512    /// borrow instead of copy, whenever possible
513    pub fn from_reader(reader: R) -> Self {
514        let mut reader = Reader::from_reader(reader);
515        reader
516            .expand_empty_elements(true)
517            .check_end_names(true)
518            .trim_text(true);
519
520        Self::new(IoReader {
521            reader,
522            buf: Vec::new(),
523        })
524    }
525}
526
527impl<'de, 'a, R> de::Deserializer<'de> for &'a mut Deserializer<'de, R>
528where
529    R: XmlRead<'de>,
530{
531    type Error = DeError;
532
533    deserialize_primitives!();
534
535    fn deserialize_struct<V>(
536        self,
537        _name: &'static str,
538        fields: &'static [&'static str],
539        visitor: V,
540    ) -> Result<V::Value, DeError>
541    where
542        V: Visitor<'de>,
543    {
544        // Try to go to the next `<tag ...>...</tag>` or `<tag .../>`
545        if let Some(e) = self.next_start()? {
546            let name = e.name().to_vec();
547            self.has_value_field = fields.contains(&INNER_VALUE);
548            let map = map::MapAccess::new(self, e, fields)?;
549            let value = visitor.visit_map(map)?;
550            self.has_value_field = false;
551            self.read_to_end(&name)?;
552            Ok(value)
553        } else {
554            Err(DeError::ExpectedStart)
555        }
556    }
557
558    /// Unit represented in XML as a `xs:element` or text/CDATA content.
559    /// Any content inside `xs:element` is ignored and skipped.
560    ///
561    /// Produces unit struct from any of following inputs:
562    /// - any `<tag ...>...</tag>`
563    /// - any `<tag .../>`
564    /// - any text content
565    /// - any CDATA content
566    ///
567    /// # Events handling
568    ///
569    /// |Event             |XML                        |Handling
570    /// |------------------|---------------------------|-------------------------------------------
571    /// |[`DeEvent::Start`]|`<tag>...</tag>`           |Calls `visitor.visit_unit()`, consumes all events up to corresponding `End` event
572    /// |[`DeEvent::End`]  |`</tag>`                   |Emits [`UnexpectedEnd("tag")`](DeError::UnexpectedEnd)
573    /// |[`DeEvent::Text`] |`text content`             |Calls `visitor.visit_unit()`. Text content is ignored
574    /// |[`DeEvent::CData`]|`<![CDATA[cdata content]]>`|Calls `visitor.visit_unit()`. CDATA content is ignored
575    /// |[`DeEvent::Eof`]  |                           |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
576    fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, DeError>
577    where
578        V: Visitor<'de>,
579    {
580        match self.next()? {
581            DeEvent::Start(s) => {
582                self.read_to_end(s.name())?;
583                visitor.visit_unit()
584            }
585            DeEvent::Text(_) | DeEvent::CData(_) => visitor.visit_unit(),
586            DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().to_owned())),
587            DeEvent::Eof => Err(DeError::UnexpectedEof),
588        }
589    }
590
591    /// Representation of the names units the same as [unnamed units](#method.deserialize_unit)
592    fn deserialize_unit_struct<V>(
593        self,
594        _name: &'static str,
595        visitor: V,
596    ) -> Result<V::Value, DeError>
597    where
598        V: Visitor<'de>,
599    {
600        self.deserialize_unit(visitor)
601    }
602
603    fn deserialize_newtype_struct<V>(
604        self,
605        _name: &'static str,
606        visitor: V,
607    ) -> Result<V::Value, DeError>
608    where
609        V: Visitor<'de>,
610    {
611        self.deserialize_tuple(1, visitor)
612    }
613
614    /// Representation of tuples the same as [sequences](#method.deserialize_seq).
615    fn deserialize_tuple<V>(self, _len: usize, visitor: V) -> Result<V::Value, DeError>
616    where
617        V: Visitor<'de>,
618    {
619        self.deserialize_seq(visitor)
620    }
621
622    /// Representation of named tuples the same as [unnamed tuples](#method.deserialize_tuple).
623    fn deserialize_tuple_struct<V>(
624        self,
625        _name: &'static str,
626        len: usize,
627        visitor: V,
628    ) -> Result<V::Value, DeError>
629    where
630        V: Visitor<'de>,
631    {
632        self.deserialize_tuple(len, visitor)
633    }
634
635    fn deserialize_enum<V>(
636        self,
637        _name: &'static str,
638        _variants: &'static [&'static str],
639        visitor: V,
640    ) -> Result<V::Value, DeError>
641    where
642        V: Visitor<'de>,
643    {
644        let value = visitor.visit_enum(var::EnumAccess::new(self))?;
645        Ok(value)
646    }
647
648    fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, DeError>
649    where
650        V: Visitor<'de>,
651    {
652        visitor.visit_seq(seq::SeqAccess::new(self)?)
653    }
654
655    fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, DeError>
656    where
657        V: Visitor<'de>,
658    {
659        self.deserialize_struct("", &[], visitor)
660    }
661
662    fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, DeError>
663    where
664        V: Visitor<'de>,
665    {
666        match self.peek()? {
667            DeEvent::Text(t) if t.is_empty() => visitor.visit_none(),
668            DeEvent::CData(t) if t.is_empty() => visitor.visit_none(),
669            DeEvent::Eof => visitor.visit_none(),
670            _ => visitor.visit_some(self),
671        }
672    }
673
674    /// Always call `visitor.visit_unit()` because returned value ignored in any case.
675    ///
676    /// This method consumes any single [event][DeEvent] except the [`Start`][DeEvent::Start]
677    /// event, in which case all events up to corresponding [`End`][DeEvent::End] event will
678    /// be consumed.
679    ///
680    /// This method returns error if current event is [`End`][DeEvent::End] or [`Eof`][DeEvent::Eof]
681    fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value, DeError>
682    where
683        V: Visitor<'de>,
684    {
685        match self.next()? {
686            DeEvent::Start(e) => self.read_to_end(e.name())?,
687            DeEvent::End(e) => return Err(DeError::UnexpectedEnd(e.name().to_owned())),
688            DeEvent::Eof => return Err(DeError::UnexpectedEof),
689            _ => (),
690        }
691        visitor.visit_unit()
692    }
693
694    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, DeError>
695    where
696        V: Visitor<'de>,
697    {
698        match self.peek()? {
699            DeEvent::Start(_) => self.deserialize_map(visitor),
700            // Redirect to deserialize_unit in order to consume an event and return an appropriate error
701            DeEvent::End(_) | DeEvent::Eof => self.deserialize_unit(visitor),
702            _ => self.deserialize_string(visitor),
703        }
704    }
705}
706
707/// Trait used by the deserializer for iterating over input. This is manually
708/// "specialized" for iterating over `&[u8]`.
709///
710/// You do not need to implement this trait, it is needed to abstract from
711/// [borrowing](SliceReader) and [copying](IoReader) data sources and reuse code in
712/// deserializer
713pub trait XmlRead<'i> {
714    /// Return an input-borrowing event.
715    fn next(&mut self) -> Result<DeEvent<'i>, DeError>;
716
717    /// Skips until end element is found. Unlike `next()` it will not allocate
718    /// when it cannot satisfy the lifetime.
719    fn read_to_end(&mut self, name: &[u8]) -> Result<(), DeError>;
720
721    /// A copy of the reader's decoder used to decode strings.
722    fn decoder(&self) -> Decoder;
723}
724
725/// XML input source that reads from a std::io input stream.
726///
727/// You cannot create it, it is created automatically when you call
728/// [`Deserializer::from_reader`]
729pub struct IoReader<R: BufRead> {
730    reader: Reader<R>,
731    buf: Vec<u8>,
732}
733
734impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
735    fn next(&mut self) -> Result<DeEvent<'static>, DeError> {
736        let event = loop {
737            let e = self.reader.read_event(&mut self.buf)?;
738            match e {
739                Event::Start(e) => break Ok(DeEvent::Start(e.into_owned())),
740                Event::End(e) => break Ok(DeEvent::End(e.into_owned())),
741                Event::Text(e) => break Ok(DeEvent::Text(e.into_owned())),
742                Event::CData(e) => break Ok(DeEvent::CData(e.into_owned())),
743                Event::Eof => break Ok(DeEvent::Eof),
744
745                _ => self.buf.clear(),
746            }
747        };
748
749        self.buf.clear();
750
751        event
752    }
753
754    fn read_to_end(&mut self, name: &[u8]) -> Result<(), DeError> {
755        match self.reader.read_to_end(name, &mut self.buf) {
756            Err(Error::UnexpectedEof(_)) => Err(DeError::UnexpectedEof),
757            other => Ok(other?),
758        }
759    }
760
761    fn decoder(&self) -> Decoder {
762        self.reader.decoder()
763    }
764}
765
766/// XML input source that reads from a slice of bytes and can borrow from it.
767///
768/// You cannot create it, it is created automatically when you call
769/// [`Deserializer::from_str`] or [`Deserializer::from_slice`]
770pub struct SliceReader<'de> {
771    reader: Reader<&'de [u8]>,
772}
773
774impl<'de> XmlRead<'de> for SliceReader<'de> {
775    fn next(&mut self) -> Result<DeEvent<'de>, DeError> {
776        loop {
777            let e = self.reader.read_event_unbuffered()?;
778            match e {
779                Event::Start(e) => break Ok(DeEvent::Start(e)),
780                Event::End(e) => break Ok(DeEvent::End(e)),
781                Event::Text(e) => break Ok(DeEvent::Text(e)),
782                Event::CData(e) => break Ok(DeEvent::CData(e)),
783                Event::Eof => break Ok(DeEvent::Eof),
784
785                _ => (),
786            }
787        }
788    }
789
790    fn read_to_end(&mut self, name: &[u8]) -> Result<(), DeError> {
791        match self.reader.read_to_end_unbuffered(name) {
792            Err(Error::UnexpectedEof(_)) => Err(DeError::UnexpectedEof),
793            other => Ok(other?),
794        }
795    }
796
797    fn decoder(&self) -> Decoder {
798        self.reader.decoder()
799    }
800}
801
802#[cfg(test)]
803mod tests {
804    use super::*;
805    use pretty_assertions::assert_eq;
806
807    #[test]
808    fn read_to_end() {
809        use crate::de::DeEvent::*;
810
811        let mut de = Deserializer::from_slice(
812            br#"
813            <root>
814                <tag a="1"><tag>text</tag>content</tag>
815                <tag a="2"><![CDATA[cdata content]]></tag>
816                <self-closed/>
817            </root>
818            "#,
819        );
820
821        assert_eq!(
822            de.next().unwrap(),
823            Start(BytesStart::borrowed_name(b"root"))
824        );
825
826        assert_eq!(
827            de.next().unwrap(),
828            Start(BytesStart::borrowed(br#"tag a="1""#, 3))
829        );
830        assert_eq!(de.read_to_end(b"tag").unwrap(), ());
831
832        assert_eq!(
833            de.next().unwrap(),
834            Start(BytesStart::borrowed(br#"tag a="2""#, 3))
835        );
836        assert_eq!(
837            de.next().unwrap(),
838            CData(BytesCData::from_str("cdata content"))
839        );
840        assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"tag")));
841
842        assert_eq!(
843            de.next().unwrap(),
844            Start(BytesStart::borrowed(b"self-closed", 11))
845        );
846        assert_eq!(de.read_to_end(b"self-closed").unwrap(), ());
847
848        assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"root")));
849        assert_eq!(de.next().unwrap(), Eof);
850    }
851
852    #[test]
853    fn borrowing_reader_parity() {
854        let s = r##"
855            <item name="hello" source="world.rs">Some text</item>
856            <item2/>
857            <item3 value="world" />
858    	"##
859        .as_bytes();
860
861        let mut reader1 = IoReader {
862            reader: Reader::from_reader(s),
863            buf: Vec::new(),
864        };
865        let mut reader2 = SliceReader {
866            reader: Reader::from_bytes(s),
867        };
868
869        loop {
870            let event1 = reader1.next().unwrap();
871            let event2 = reader2.next().unwrap();
872
873            if let (DeEvent::Eof, DeEvent::Eof) = (&event1, &event2) {
874                break;
875            }
876
877            assert_eq!(event1, event2);
878        }
879    }
880
881    #[test]
882    fn borrowing_reader_events() {
883        let s = r##"
884            <item name="hello" source="world.rs">Some text</item>
885            <item2></item2>
886            <item3/>
887            <item4 value="world" />
888        "##
889        .as_bytes();
890
891        let mut reader = SliceReader {
892            reader: Reader::from_bytes(s),
893        };
894
895        reader
896            .reader
897            .trim_text(true)
898            .expand_empty_elements(true)
899            .check_end_names(true);
900
901        let mut events = Vec::new();
902
903        loop {
904            let event = reader.next().unwrap();
905            if let DeEvent::Eof = event {
906                break;
907            }
908            events.push(event);
909        }
910
911        use crate::de::DeEvent::*;
912
913        assert_eq!(
914            events,
915            vec![
916                Start(BytesStart::borrowed(
917                    br#"item name="hello" source="world.rs""#,
918                    4
919                )),
920                Text(BytesText::from_escaped(b"Some text".as_ref())),
921                End(BytesEnd::borrowed(b"item")),
922                Start(BytesStart::borrowed(b"item2", 5)),
923                End(BytesEnd::borrowed(b"item2")),
924                Start(BytesStart::borrowed(b"item3", 5)),
925                End(BytesEnd::borrowed(b"item3")),
926                Start(BytesStart::borrowed(br#"item4 value="world" "#, 5)),
927                End(BytesEnd::borrowed(b"item4")),
928            ]
929        )
930    }
931
932    #[test]
933    fn borrowing_read_to_end() {
934        let s = " <item /> ";
935        let mut reader = SliceReader {
936            reader: Reader::from_str(s),
937        };
938
939        reader
940            .reader
941            .trim_text(true)
942            .expand_empty_elements(true)
943            .check_end_names(true);
944
945        assert_eq!(
946            reader.next().unwrap(),
947            DeEvent::Start(BytesStart::borrowed(b"item ", 4))
948        );
949        reader.read_to_end(b"item").unwrap();
950        assert_eq!(reader.next().unwrap(), DeEvent::Eof);
951    }
952
953    /// Ensures, that [`Deserializer::next_text()`] never can get an `End` event,
954    /// because parser reports error early
955    #[test]
956    fn next_text() {
957        match from_str::<String>(r#"</root>"#) {
958            Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
959                assert_eq!(expected, "");
960                assert_eq!(found, "root");
961            }
962            x => panic!(
963                r#"Expected `Err(InvalidXml(EndEventMismatch("", "root")))`, but found {:?}"#,
964                x
965            ),
966        }
967
968        let s: String = from_str(r#"<root></root>"#).unwrap();
969        assert_eq!(s, "");
970
971        match from_str::<String>(r#"<root></other>"#) {
972            Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
973                assert_eq!(expected, "root");
974                assert_eq!(found, "other");
975            }
976            x => panic!(
977                r#"Expected `Err(InvalidXml(EndEventMismatch("root", "other")))`, but found {:?}"#,
978                x
979            ),
980        }
981    }
982}