fast_xml/de/mod.rs
1//! Serde `Deserializer` module
2//!
3//! # Examples
4//!
5//! Here is a simple example parsing [crates.io](https://crates.io/) source code.
6//!
7//! ```
8//! // Cargo.toml
9//! // [dependencies]
10//! // serde = { version = "1.0", features = [ "derive" ] }
11//! // fast-xml = { version = "0.22", features = [ "serialize" ] }
12//! # use pretty_assertions::assert_eq;
13//! use serde::Deserialize;
14//! use fast_xml::de::{from_str, DeError};
15//!
16//! #[derive(Debug, Deserialize, PartialEq)]
17//! struct Link {
18//! rel: String,
19//! href: String,
20//! sizes: Option<String>,
21//! }
22//!
23//! #[derive(Debug, Deserialize, PartialEq)]
24//! #[serde(rename_all = "lowercase")]
25//! enum Lang {
26//! En,
27//! Fr,
28//! De,
29//! }
30//!
31//! #[derive(Debug, Deserialize, PartialEq)]
32//! struct Head {
33//! title: String,
34//! #[serde(rename = "link", default)]
35//! links: Vec<Link>,
36//! }
37//!
38//! #[derive(Debug, Deserialize, PartialEq)]
39//! struct Script {
40//! src: String,
41//! integrity: String,
42//! }
43//!
44//! #[derive(Debug, Deserialize, PartialEq)]
45//! struct Body {
46//! #[serde(rename = "script", default)]
47//! scripts: Vec<Script>,
48//! }
49//!
50//! #[derive(Debug, Deserialize, PartialEq)]
51//! struct Html {
52//! lang: Option<String>,
53//! head: Head,
54//! body: Body,
55//! }
56//!
57//! fn crates_io() -> Result<Html, DeError> {
58//! let xml = "<!DOCTYPE html>
59//! <html lang=\"en\">
60//! <head>
61//! <meta charset=\"utf-8\">
62//! <meta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\">
63//! <meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">
64//!
65//! <title>crates.io: Rust Package Registry</title>
66//!
67//!
68//! <meta name=\"cargo/config/environment\" content=\"%7B%22modulePrefix%22%3A%22cargo%22%2C%22environment%22%3A%22production%22%2C%22rootURL%22%3A%22%2F%22%2C%22locationType%22%3A%22router-scroll%22%2C%22historySupportMiddleware%22%3Atrue%2C%22EmberENV%22%3A%7B%22FEATURES%22%3A%7B%7D%2C%22EXTEND_PROTOTYPES%22%3A%7B%22Date%22%3Afalse%7D%7D%2C%22APP%22%3A%7B%22name%22%3A%22cargo%22%2C%22version%22%3A%22b7796c9%22%7D%2C%22fastboot%22%3A%7B%22hostWhitelist%22%3A%5B%22crates.io%22%2C%7B%7D%2C%7B%7D%5D%7D%2C%22ember-cli-app-version%22%3A%7B%22version%22%3A%22b7796c9%22%7D%2C%22ember-cli-mirage%22%3A%7B%22usingProxy%22%3Afalse%2C%22useDefaultPassthroughs%22%3Atrue%7D%2C%22exportApplicationGlobal%22%3Afalse%7D\" />
69//! <!-- EMBER_CLI_FASTBOOT_TITLE --><!-- EMBER_CLI_FASTBOOT_HEAD -->
70//! <link rel=\"manifest\" href=\"/manifest.webmanifest\">
71//! <link rel=\"apple-touch-icon\" href=\"/cargo-835dd6a18132048a52ac569f2615b59d.png\" sizes=\"227x227\">
72//! <meta name=\"theme-color\" content=\"#f9f7ec\">
73//! <meta name=\"apple-mobile-web-app-capable\" content=\"yes\">
74//! <meta name=\"apple-mobile-web-app-title\" content=\"crates.io: Rust Package Registry\">
75//! <meta name=\"apple-mobile-web-app-status-bar-style\" content=\"default\">
76//!
77//! <link rel=\"stylesheet\" href=\"/assets/vendor-8d023d47762d5431764f589a6012123e.css\" integrity=\"sha256-EoB7fsYkdS7BZba47+C/9D7yxwPZojsE4pO7RIuUXdE= sha512-/SzGQGR0yj5AG6YPehZB3b6MjpnuNCTOGREQTStETobVRrpYPZKneJwcL/14B8ufcvobJGFDvnTKdcDDxbh6/A==\" >
78//! <link rel=\"stylesheet\" href=\"/assets/cargo-cedb8082b232ce89dd449d869fb54b98.css\" integrity=\"sha256-S9K9jZr6nSyYicYad3JdiTKrvsstXZrvYqmLUX9i3tc= sha512-CDGjy3xeyiqBgUMa+GelihW394pqAARXwsU+HIiOotlnp1sLBVgO6v2ZszL0arwKU8CpvL9wHyLYBIdfX92YbQ==\" >
79//!
80//!
81//! <link rel=\"shortcut icon\" href=\"/favicon.ico\" type=\"image/x-icon\">
82//! <link rel=\"icon\" href=\"/cargo-835dd6a18132048a52ac569f2615b59d.png\" type=\"image/png\">
83//! <link rel=\"search\" href=\"/opensearch.xml\" type=\"application/opensearchdescription+xml\" title=\"Cargo\">
84//! </head>
85//! <body>
86//! <!-- EMBER_CLI_FASTBOOT_BODY -->
87//! <noscript>
88//! <div id=\"main\">
89//! <div class='noscript'>
90//! This site requires JavaScript to be enabled.
91//! </div>
92//! </div>
93//! </noscript>
94//!
95//! <script src=\"/assets/vendor-bfe89101b20262535de5a5ccdc276965.js\" integrity=\"sha256-U12Xuwhz1bhJXWyFW/hRr+Wa8B6FFDheTowik5VLkbw= sha512-J/cUUuUN55TrdG8P6Zk3/slI0nTgzYb8pOQlrXfaLgzr9aEumr9D1EzmFyLy1nrhaDGpRN1T8EQrU21Jl81pJQ==\" ></script>
96//! <script src=\"/assets/cargo-4023b68501b7b3e17b2bb31f50f5eeea.js\" integrity=\"sha256-9atimKc1KC6HMJF/B07lP3Cjtgr2tmET8Vau0Re5mVI= sha512-XJyBDQU4wtA1aPyPXaFzTE5Wh/mYJwkKHqZ/Fn4p/ezgdKzSCFu6FYn81raBCnCBNsihfhrkb88uF6H5VraHMA==\" ></script>
97//!
98//!
99//! </body>
100//! </html>
101//! }";
102//! let html: Html = from_str(xml)?;
103//! assert_eq!(&html.head.title, "crates.io: Rust Package Registr");
104//! Ok(html)
105//! }
106//! ```
107
108// Macros should be defined before the modules that using them
109// Also, macros should be imported before using them
110use serde::serde_if_integer128;
111
112macro_rules! deserialize_type {
113 ($deserialize:ident => $visit:ident, $($mut:tt)?) => {
114 fn $deserialize<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
115 where
116 V: Visitor<'de>,
117 {
118 // No need to unescape because valid integer representations cannot be escaped
119 let text = self.next_text(false)?;
120 let string = text.decode(self.decoder())?;
121 visitor.$visit(string.parse()?)
122 }
123 };
124}
125
126/// Implement deserialization methods for scalar types, such as numbers, strings,
127/// byte arrays, booleans and identifiers.
128macro_rules! deserialize_primitives {
129 ($($mut:tt)?) => {
130 deserialize_type!(deserialize_i8 => visit_i8, $($mut)?);
131 deserialize_type!(deserialize_i16 => visit_i16, $($mut)?);
132 deserialize_type!(deserialize_i32 => visit_i32, $($mut)?);
133 deserialize_type!(deserialize_i64 => visit_i64, $($mut)?);
134
135 deserialize_type!(deserialize_u8 => visit_u8, $($mut)?);
136 deserialize_type!(deserialize_u16 => visit_u16, $($mut)?);
137 deserialize_type!(deserialize_u32 => visit_u32, $($mut)?);
138 deserialize_type!(deserialize_u64 => visit_u64, $($mut)?);
139
140 serde_if_integer128! {
141 deserialize_type!(deserialize_i128 => visit_i128, $($mut)?);
142 deserialize_type!(deserialize_u128 => visit_u128, $($mut)?);
143 }
144
145 deserialize_type!(deserialize_f32 => visit_f32, $($mut)?);
146 deserialize_type!(deserialize_f64 => visit_f64, $($mut)?);
147
148 fn deserialize_bool<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
149 where
150 V: Visitor<'de>,
151 {
152 // No need to unescape because valid boolean representations cannot be escaped
153 let text = self.next_text(false)?;
154
155 deserialize_bool(text.as_ref(), self.decoder(), visitor)
156 }
157
158 /// Representation of owned strings the same as [non-owned](#method.deserialize_str).
159 fn deserialize_string<V>(self, visitor: V) -> Result<V::Value, DeError>
160 where
161 V: Visitor<'de>,
162 {
163 self.deserialize_str(visitor)
164 }
165
166 /// Character represented as [strings](#method.deserialize_str).
167 fn deserialize_char<V>(self, visitor: V) -> Result<V::Value, DeError>
168 where
169 V: Visitor<'de>,
170 {
171 self.deserialize_str(visitor)
172 }
173
174 fn deserialize_str<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
175 where
176 V: Visitor<'de>,
177 {
178 let text = self.next_text(true)?;
179 let string = text.decode(self.decoder())?;
180 match string {
181 Cow::Borrowed(string) => visitor.visit_borrowed_str(string),
182 Cow::Owned(string) => visitor.visit_string(string),
183 }
184 }
185
186 fn deserialize_bytes<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
187 where
188 V: Visitor<'de>,
189 {
190 // No need to unescape because bytes gives access to the raw XML input
191 let text = self.next_text(false)?;
192 visitor.visit_bytes(&text)
193 }
194
195 fn deserialize_byte_buf<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
196 where
197 V: Visitor<'de>,
198 {
199 // No need to unescape because bytes gives access to the raw XML input
200 let text = self.next_text(false)?;
201 let value = text.into_inner().into_owned();
202 visitor.visit_byte_buf(value)
203 }
204
205 /// Identifiers represented as [strings](#method.deserialize_str).
206 fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value, DeError>
207 where
208 V: Visitor<'de>,
209 {
210 self.deserialize_str(visitor)
211 }
212 };
213}
214
215mod escape;
216mod map;
217mod seq;
218mod var;
219
220pub use crate::errors::serialize::DeError;
221use crate::{
222 errors::Error,
223 events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
224 reader::Decoder,
225 Reader,
226};
227use serde::de::{self, Deserialize, DeserializeOwned, Visitor};
228use std::borrow::Cow;
229use std::io::BufRead;
230
231pub(crate) const INNER_VALUE: &str = "$value";
232pub(crate) const UNFLATTEN_PREFIX: &str = "$unflatten=";
233pub(crate) const PRIMITIVE_PREFIX: &str = "$primitive=";
234
235/// Simplified event which contains only these variants that used by deserializer
236#[derive(Debug, PartialEq)]
237pub enum DeEvent<'a> {
238 /// Start tag (with attributes) `<tag attr="value">`.
239 Start(BytesStart<'a>),
240 /// End tag `</tag>`.
241 End(BytesEnd<'a>),
242 /// Escaped character data between `Start` and `End` element.
243 Text(BytesText<'a>),
244 /// Unescaped character data between `Start` and `End` element,
245 /// stored in `<![CDATA[...]]>`.
246 CData(BytesCData<'a>),
247 /// End of XML document.
248 Eof,
249}
250
251/// An xml deserializer
252pub struct Deserializer<'de, R>
253where
254 R: XmlRead<'de>,
255{
256 reader: R,
257 peek: Option<DeEvent<'de>>,
258 /// Special sing that deserialized struct have a field with the special
259 /// name (see constant `INNER_VALUE`). That field should be deserialized
260 /// from the text content of the XML node:
261 ///
262 /// ```xml
263 /// <tag>value for INNER_VALUE field<tag>
264 /// ```
265 has_value_field: bool,
266}
267
268/// Deserialize an instance of type `T` from a string of XML text.
269pub fn from_str<'de, T>(s: &'de str) -> Result<T, DeError>
270where
271 T: Deserialize<'de>,
272{
273 from_slice(s.as_bytes())
274}
275
276/// Deserialize an instance of type `T` from bytes of XML text.
277#[deprecated = "Use `from_slice` instead"]
278pub fn from_bytes<'de, T>(s: &'de [u8]) -> Result<T, DeError>
279where
280 T: Deserialize<'de>,
281{
282 from_slice(s)
283}
284
285/// Deserialize an instance of type `T` from bytes of XML text.
286pub fn from_slice<'de, T>(s: &'de [u8]) -> Result<T, DeError>
287where
288 T: Deserialize<'de>,
289{
290 let mut de = Deserializer::from_slice(s);
291 T::deserialize(&mut de)
292}
293
294/// Deserialize from a reader. This method will do internal copies of data
295/// readed from `reader`. If you want have a `&[u8]` or `&str` input and want
296/// to borrow as much as possible, use [`from_slice`] or [`from_str`]
297pub fn from_reader<R, T>(reader: R) -> Result<T, DeError>
298where
299 R: BufRead,
300 T: DeserializeOwned,
301{
302 let mut de = Deserializer::from_reader(reader);
303 T::deserialize(&mut de)
304}
305
306// TODO: According to the https://www.w3.org/TR/xmlschema-2/#boolean,
307// valid boolean representations are only "true", "false", "1", and "0"
308fn deserialize_bool<'de, V>(value: &[u8], decoder: Decoder, visitor: V) -> Result<V::Value, DeError>
309where
310 V: Visitor<'de>,
311{
312 #[cfg(feature = "encoding")]
313 {
314 let value = decoder.decode(value);
315 // No need to unescape because valid boolean representations cannot be escaped
316 match value.as_ref() {
317 "true" | "1" | "True" | "TRUE" | "t" | "Yes" | "YES" | "yes" | "y" => {
318 visitor.visit_bool(true)
319 }
320 "false" | "0" | "False" | "FALSE" | "f" | "No" | "NO" | "no" | "n" => {
321 visitor.visit_bool(false)
322 }
323 _ => Err(DeError::InvalidBoolean(value.into())),
324 }
325 }
326
327 #[cfg(not(feature = "encoding"))]
328 {
329 // No need to unescape because valid boolean representations cannot be escaped
330 match value {
331 b"true" | b"1" | b"True" | b"TRUE" | b"t" | b"Yes" | b"YES" | b"yes" | b"y" => {
332 visitor.visit_bool(true)
333 }
334 b"false" | b"0" | b"False" | b"FALSE" | b"f" | b"No" | b"NO" | b"no" | b"n" => {
335 visitor.visit_bool(false)
336 }
337 e => Err(DeError::InvalidBoolean(decoder.decode(e)?.into())),
338 }
339 }
340}
341
342impl<'de, R> Deserializer<'de, R>
343where
344 R: XmlRead<'de>,
345{
346 /// Create an XML deserializer from one of the possible quick_xml input sources.
347 ///
348 /// Typically it is more convenient to use one of these methods instead:
349 ///
350 /// - [`Deserializer::from_str`]
351 /// - [`Deserializer::from_slice`]
352 /// - [`Deserializer::from_reader`]
353 pub fn new(reader: R) -> Self {
354 Deserializer {
355 reader,
356 peek: None,
357 has_value_field: false,
358 }
359 }
360
361 /// Get a new deserializer from a regular BufRead
362 #[deprecated = "Use `Deserializer::new` instead"]
363 pub fn from_borrowing_reader(reader: R) -> Self {
364 Self::new(reader)
365 }
366
367 fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> {
368 if self.peek.is_none() {
369 self.peek = Some(self.reader.next()?);
370 }
371 match self.peek.as_ref() {
372 Some(v) => Ok(v),
373 // SAFETY: a `None` variant for `self.peek` would have been replaced
374 // by a `Some` variant in the code above.
375 // TODO: Can be replaced with `unsafe { std::hint::unreachable_unchecked() }`
376 // if unsafe code will be allowed
377 None => unreachable!(),
378 }
379 }
380
381 fn next(&mut self) -> Result<DeEvent<'de>, DeError> {
382 if let Some(e) = self.peek.take() {
383 return Ok(e);
384 }
385 self.reader.next()
386 }
387
388 fn next_start(&mut self) -> Result<Option<BytesStart<'de>>, DeError> {
389 loop {
390 let e = self.next()?;
391 match e {
392 DeEvent::Start(e) => return Ok(Some(e)),
393 DeEvent::End(e) => return Err(DeError::UnexpectedEnd(e.name().to_owned())),
394 DeEvent::Eof => return Ok(None),
395 _ => (), // ignore texts
396 }
397 }
398 }
399
400 #[inline]
401 fn next_text(&mut self, unescape: bool) -> Result<BytesCData<'de>, DeError> {
402 self.next_text_impl(unescape, true)
403 }
404
405 /// Consumes a one XML element or an XML tree, returns associated text or
406 /// an empty string.
407 ///
408 /// If `allow_start` is `false`, then only one event is consumed. If that
409 /// event is [`DeEvent::Start`], then [`DeError::UnexpectedStart`] is returned.
410 ///
411 /// If `allow_start` is `true`, then first text of CDATA event inside it is
412 /// returned and all other content is skipped until corresponding end tag
413 /// will be consumed.
414 ///
415 /// # Handling events
416 ///
417 /// The table below shows how events is handled by this method:
418 ///
419 /// |Event |XML |Handling
420 /// |------------------|---------------------------|----------------------------------------
421 /// |[`DeEvent::Start`]|`<tag>...</tag>` |if `allow_start == true`, result determined by the second table, otherwise emits [`UnexpectedStart("tag")`](DeError::UnexpectedStart)
422 /// |[`DeEvent::End`] |`</any-tag>` |Emits [`UnexpectedEnd("any-tag")`](DeError::UnexpectedEnd)
423 /// |[`DeEvent::Text`] |`text content` |Unescapes `text content` and returns it
424 /// |[`DeEvent::CData`]|`<![CDATA[cdata content]]>`|Returns `cdata content` unchanged
425 /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
426 ///
427 /// Second event, consumed if [`DeEvent::Start`] was received and `allow_start == true`:
428 ///
429 /// |Event |XML |Handling
430 /// |------------------|---------------------------|----------------------------------------------------------------------------------
431 /// |[`DeEvent::Start`]|`<any-tag>...</any-tag>` |Emits [`UnexpectedStart("any-tag")`](DeError::UnexpectedStart)
432 /// |[`DeEvent::End`] |`</tag>` |Returns an empty slice, if close tag matched the open one
433 /// |[`DeEvent::End`] |`</any-tag>` |Emits [`UnexpectedEnd("any-tag")`](DeError::UnexpectedEnd)
434 /// |[`DeEvent::Text`] |`text content` |Unescapes `text content` and returns it, consumes events up to `</tag>`
435 /// |[`DeEvent::CData`]|`<![CDATA[cdata content]]>`|Returns `cdata content` unchanged, consumes events up to `</tag>`
436 /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
437 fn next_text_impl(
438 &mut self,
439 unescape: bool,
440 allow_start: bool,
441 ) -> Result<BytesCData<'de>, DeError> {
442 match self.next()? {
443 DeEvent::Text(e) if unescape => e.unescape().map_err(|e| DeError::InvalidXml(e.into())),
444 DeEvent::Text(e) => Ok(BytesCData::new(e.into_inner())),
445 DeEvent::CData(e) => Ok(e),
446 DeEvent::Start(e) if allow_start => {
447 // allow one nested level
448 let inner = self.next()?;
449 let t = match inner {
450 DeEvent::Text(t) if unescape => t.unescape()?,
451 DeEvent::Text(t) => BytesCData::new(t.into_inner()),
452 DeEvent::CData(t) => t,
453 DeEvent::Start(s) => return Err(DeError::UnexpectedStart(s.name().to_owned())),
454 // We can get End event in case of `<tag></tag>` or `<tag/>` input
455 // Return empty text in that case
456 DeEvent::End(end) if end.name() == e.name() => {
457 return Ok(BytesCData::new(&[] as &[u8]));
458 }
459 DeEvent::End(end) => return Err(DeError::UnexpectedEnd(end.name().to_owned())),
460 DeEvent::Eof => return Err(DeError::UnexpectedEof),
461 };
462 self.read_to_end(e.name())?;
463 Ok(t)
464 }
465 DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().to_owned())),
466 DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().to_owned())),
467 DeEvent::Eof => Err(DeError::UnexpectedEof),
468 }
469 }
470
471 /// Returns a decoder, used inside `deserialize_primitives!()`
472 #[inline]
473 fn decoder(&self) -> Decoder {
474 self.reader.decoder()
475 }
476
477 fn read_to_end(&mut self, name: &[u8]) -> Result<(), DeError> {
478 // First one might be in self.peek
479 match self.next()? {
480 DeEvent::Start(e) => self.reader.read_to_end(e.name())?,
481 DeEvent::End(e) if e.name() == name => return Ok(()),
482 _ => (),
483 }
484 self.reader.read_to_end(name)
485 }
486}
487
488impl<'de> Deserializer<'de, SliceReader<'de>> {
489 /// Create new deserializer that will borrow data from the specified string
490 pub fn from_str(s: &'de str) -> Self {
491 Self::from_slice(s.as_bytes())
492 }
493
494 /// Create new deserializer that will borrow data from the specified byte array
495 pub fn from_slice(bytes: &'de [u8]) -> Self {
496 let mut reader = Reader::from_bytes(bytes);
497 reader
498 .expand_empty_elements(true)
499 .check_end_names(true)
500 .trim_text(true);
501 Self::new(SliceReader { reader })
502 }
503}
504
505impl<'de, R> Deserializer<'de, IoReader<R>>
506where
507 R: BufRead,
508{
509 /// Create new deserializer that will copy data from the specified reader
510 /// into internal buffer. If you already have a string or a byte array, use
511 /// [`Self::from_str`] or [`Self::from_slice`] instead, because they will
512 /// borrow instead of copy, whenever possible
513 pub fn from_reader(reader: R) -> Self {
514 let mut reader = Reader::from_reader(reader);
515 reader
516 .expand_empty_elements(true)
517 .check_end_names(true)
518 .trim_text(true);
519
520 Self::new(IoReader {
521 reader,
522 buf: Vec::new(),
523 })
524 }
525}
526
527impl<'de, 'a, R> de::Deserializer<'de> for &'a mut Deserializer<'de, R>
528where
529 R: XmlRead<'de>,
530{
531 type Error = DeError;
532
533 deserialize_primitives!();
534
535 fn deserialize_struct<V>(
536 self,
537 _name: &'static str,
538 fields: &'static [&'static str],
539 visitor: V,
540 ) -> Result<V::Value, DeError>
541 where
542 V: Visitor<'de>,
543 {
544 // Try to go to the next `<tag ...>...</tag>` or `<tag .../>`
545 if let Some(e) = self.next_start()? {
546 let name = e.name().to_vec();
547 self.has_value_field = fields.contains(&INNER_VALUE);
548 let map = map::MapAccess::new(self, e, fields)?;
549 let value = visitor.visit_map(map)?;
550 self.has_value_field = false;
551 self.read_to_end(&name)?;
552 Ok(value)
553 } else {
554 Err(DeError::ExpectedStart)
555 }
556 }
557
558 /// Unit represented in XML as a `xs:element` or text/CDATA content.
559 /// Any content inside `xs:element` is ignored and skipped.
560 ///
561 /// Produces unit struct from any of following inputs:
562 /// - any `<tag ...>...</tag>`
563 /// - any `<tag .../>`
564 /// - any text content
565 /// - any CDATA content
566 ///
567 /// # Events handling
568 ///
569 /// |Event |XML |Handling
570 /// |------------------|---------------------------|-------------------------------------------
571 /// |[`DeEvent::Start`]|`<tag>...</tag>` |Calls `visitor.visit_unit()`, consumes all events up to corresponding `End` event
572 /// |[`DeEvent::End`] |`</tag>` |Emits [`UnexpectedEnd("tag")`](DeError::UnexpectedEnd)
573 /// |[`DeEvent::Text`] |`text content` |Calls `visitor.visit_unit()`. Text content is ignored
574 /// |[`DeEvent::CData`]|`<![CDATA[cdata content]]>`|Calls `visitor.visit_unit()`. CDATA content is ignored
575 /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
576 fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, DeError>
577 where
578 V: Visitor<'de>,
579 {
580 match self.next()? {
581 DeEvent::Start(s) => {
582 self.read_to_end(s.name())?;
583 visitor.visit_unit()
584 }
585 DeEvent::Text(_) | DeEvent::CData(_) => visitor.visit_unit(),
586 DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().to_owned())),
587 DeEvent::Eof => Err(DeError::UnexpectedEof),
588 }
589 }
590
591 /// Representation of the names units the same as [unnamed units](#method.deserialize_unit)
592 fn deserialize_unit_struct<V>(
593 self,
594 _name: &'static str,
595 visitor: V,
596 ) -> Result<V::Value, DeError>
597 where
598 V: Visitor<'de>,
599 {
600 self.deserialize_unit(visitor)
601 }
602
603 fn deserialize_newtype_struct<V>(
604 self,
605 _name: &'static str,
606 visitor: V,
607 ) -> Result<V::Value, DeError>
608 where
609 V: Visitor<'de>,
610 {
611 self.deserialize_tuple(1, visitor)
612 }
613
614 /// Representation of tuples the same as [sequences](#method.deserialize_seq).
615 fn deserialize_tuple<V>(self, _len: usize, visitor: V) -> Result<V::Value, DeError>
616 where
617 V: Visitor<'de>,
618 {
619 self.deserialize_seq(visitor)
620 }
621
622 /// Representation of named tuples the same as [unnamed tuples](#method.deserialize_tuple).
623 fn deserialize_tuple_struct<V>(
624 self,
625 _name: &'static str,
626 len: usize,
627 visitor: V,
628 ) -> Result<V::Value, DeError>
629 where
630 V: Visitor<'de>,
631 {
632 self.deserialize_tuple(len, visitor)
633 }
634
635 fn deserialize_enum<V>(
636 self,
637 _name: &'static str,
638 _variants: &'static [&'static str],
639 visitor: V,
640 ) -> Result<V::Value, DeError>
641 where
642 V: Visitor<'de>,
643 {
644 let value = visitor.visit_enum(var::EnumAccess::new(self))?;
645 Ok(value)
646 }
647
648 fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, DeError>
649 where
650 V: Visitor<'de>,
651 {
652 visitor.visit_seq(seq::SeqAccess::new(self)?)
653 }
654
655 fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, DeError>
656 where
657 V: Visitor<'de>,
658 {
659 self.deserialize_struct("", &[], visitor)
660 }
661
662 fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, DeError>
663 where
664 V: Visitor<'de>,
665 {
666 match self.peek()? {
667 DeEvent::Text(t) if t.is_empty() => visitor.visit_none(),
668 DeEvent::CData(t) if t.is_empty() => visitor.visit_none(),
669 DeEvent::Eof => visitor.visit_none(),
670 _ => visitor.visit_some(self),
671 }
672 }
673
674 /// Always call `visitor.visit_unit()` because returned value ignored in any case.
675 ///
676 /// This method consumes any single [event][DeEvent] except the [`Start`][DeEvent::Start]
677 /// event, in which case all events up to corresponding [`End`][DeEvent::End] event will
678 /// be consumed.
679 ///
680 /// This method returns error if current event is [`End`][DeEvent::End] or [`Eof`][DeEvent::Eof]
681 fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value, DeError>
682 where
683 V: Visitor<'de>,
684 {
685 match self.next()? {
686 DeEvent::Start(e) => self.read_to_end(e.name())?,
687 DeEvent::End(e) => return Err(DeError::UnexpectedEnd(e.name().to_owned())),
688 DeEvent::Eof => return Err(DeError::UnexpectedEof),
689 _ => (),
690 }
691 visitor.visit_unit()
692 }
693
694 fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, DeError>
695 where
696 V: Visitor<'de>,
697 {
698 match self.peek()? {
699 DeEvent::Start(_) => self.deserialize_map(visitor),
700 // Redirect to deserialize_unit in order to consume an event and return an appropriate error
701 DeEvent::End(_) | DeEvent::Eof => self.deserialize_unit(visitor),
702 _ => self.deserialize_string(visitor),
703 }
704 }
705}
706
707/// Trait used by the deserializer for iterating over input. This is manually
708/// "specialized" for iterating over `&[u8]`.
709///
710/// You do not need to implement this trait, it is needed to abstract from
711/// [borrowing](SliceReader) and [copying](IoReader) data sources and reuse code in
712/// deserializer
713pub trait XmlRead<'i> {
714 /// Return an input-borrowing event.
715 fn next(&mut self) -> Result<DeEvent<'i>, DeError>;
716
717 /// Skips until end element is found. Unlike `next()` it will not allocate
718 /// when it cannot satisfy the lifetime.
719 fn read_to_end(&mut self, name: &[u8]) -> Result<(), DeError>;
720
721 /// A copy of the reader's decoder used to decode strings.
722 fn decoder(&self) -> Decoder;
723}
724
725/// XML input source that reads from a std::io input stream.
726///
727/// You cannot create it, it is created automatically when you call
728/// [`Deserializer::from_reader`]
729pub struct IoReader<R: BufRead> {
730 reader: Reader<R>,
731 buf: Vec<u8>,
732}
733
734impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
735 fn next(&mut self) -> Result<DeEvent<'static>, DeError> {
736 let event = loop {
737 let e = self.reader.read_event(&mut self.buf)?;
738 match e {
739 Event::Start(e) => break Ok(DeEvent::Start(e.into_owned())),
740 Event::End(e) => break Ok(DeEvent::End(e.into_owned())),
741 Event::Text(e) => break Ok(DeEvent::Text(e.into_owned())),
742 Event::CData(e) => break Ok(DeEvent::CData(e.into_owned())),
743 Event::Eof => break Ok(DeEvent::Eof),
744
745 _ => self.buf.clear(),
746 }
747 };
748
749 self.buf.clear();
750
751 event
752 }
753
754 fn read_to_end(&mut self, name: &[u8]) -> Result<(), DeError> {
755 match self.reader.read_to_end(name, &mut self.buf) {
756 Err(Error::UnexpectedEof(_)) => Err(DeError::UnexpectedEof),
757 other => Ok(other?),
758 }
759 }
760
761 fn decoder(&self) -> Decoder {
762 self.reader.decoder()
763 }
764}
765
766/// XML input source that reads from a slice of bytes and can borrow from it.
767///
768/// You cannot create it, it is created automatically when you call
769/// [`Deserializer::from_str`] or [`Deserializer::from_slice`]
770pub struct SliceReader<'de> {
771 reader: Reader<&'de [u8]>,
772}
773
774impl<'de> XmlRead<'de> for SliceReader<'de> {
775 fn next(&mut self) -> Result<DeEvent<'de>, DeError> {
776 loop {
777 let e = self.reader.read_event_unbuffered()?;
778 match e {
779 Event::Start(e) => break Ok(DeEvent::Start(e)),
780 Event::End(e) => break Ok(DeEvent::End(e)),
781 Event::Text(e) => break Ok(DeEvent::Text(e)),
782 Event::CData(e) => break Ok(DeEvent::CData(e)),
783 Event::Eof => break Ok(DeEvent::Eof),
784
785 _ => (),
786 }
787 }
788 }
789
790 fn read_to_end(&mut self, name: &[u8]) -> Result<(), DeError> {
791 match self.reader.read_to_end_unbuffered(name) {
792 Err(Error::UnexpectedEof(_)) => Err(DeError::UnexpectedEof),
793 other => Ok(other?),
794 }
795 }
796
797 fn decoder(&self) -> Decoder {
798 self.reader.decoder()
799 }
800}
801
802#[cfg(test)]
803mod tests {
804 use super::*;
805 use pretty_assertions::assert_eq;
806
807 #[test]
808 fn read_to_end() {
809 use crate::de::DeEvent::*;
810
811 let mut de = Deserializer::from_slice(
812 br#"
813 <root>
814 <tag a="1"><tag>text</tag>content</tag>
815 <tag a="2"><![CDATA[cdata content]]></tag>
816 <self-closed/>
817 </root>
818 "#,
819 );
820
821 assert_eq!(
822 de.next().unwrap(),
823 Start(BytesStart::borrowed_name(b"root"))
824 );
825
826 assert_eq!(
827 de.next().unwrap(),
828 Start(BytesStart::borrowed(br#"tag a="1""#, 3))
829 );
830 assert_eq!(de.read_to_end(b"tag").unwrap(), ());
831
832 assert_eq!(
833 de.next().unwrap(),
834 Start(BytesStart::borrowed(br#"tag a="2""#, 3))
835 );
836 assert_eq!(
837 de.next().unwrap(),
838 CData(BytesCData::from_str("cdata content"))
839 );
840 assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"tag")));
841
842 assert_eq!(
843 de.next().unwrap(),
844 Start(BytesStart::borrowed(b"self-closed", 11))
845 );
846 assert_eq!(de.read_to_end(b"self-closed").unwrap(), ());
847
848 assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"root")));
849 assert_eq!(de.next().unwrap(), Eof);
850 }
851
852 #[test]
853 fn borrowing_reader_parity() {
854 let s = r##"
855 <item name="hello" source="world.rs">Some text</item>
856 <item2/>
857 <item3 value="world" />
858 "##
859 .as_bytes();
860
861 let mut reader1 = IoReader {
862 reader: Reader::from_reader(s),
863 buf: Vec::new(),
864 };
865 let mut reader2 = SliceReader {
866 reader: Reader::from_bytes(s),
867 };
868
869 loop {
870 let event1 = reader1.next().unwrap();
871 let event2 = reader2.next().unwrap();
872
873 if let (DeEvent::Eof, DeEvent::Eof) = (&event1, &event2) {
874 break;
875 }
876
877 assert_eq!(event1, event2);
878 }
879 }
880
881 #[test]
882 fn borrowing_reader_events() {
883 let s = r##"
884 <item name="hello" source="world.rs">Some text</item>
885 <item2></item2>
886 <item3/>
887 <item4 value="world" />
888 "##
889 .as_bytes();
890
891 let mut reader = SliceReader {
892 reader: Reader::from_bytes(s),
893 };
894
895 reader
896 .reader
897 .trim_text(true)
898 .expand_empty_elements(true)
899 .check_end_names(true);
900
901 let mut events = Vec::new();
902
903 loop {
904 let event = reader.next().unwrap();
905 if let DeEvent::Eof = event {
906 break;
907 }
908 events.push(event);
909 }
910
911 use crate::de::DeEvent::*;
912
913 assert_eq!(
914 events,
915 vec![
916 Start(BytesStart::borrowed(
917 br#"item name="hello" source="world.rs""#,
918 4
919 )),
920 Text(BytesText::from_escaped(b"Some text".as_ref())),
921 End(BytesEnd::borrowed(b"item")),
922 Start(BytesStart::borrowed(b"item2", 5)),
923 End(BytesEnd::borrowed(b"item2")),
924 Start(BytesStart::borrowed(b"item3", 5)),
925 End(BytesEnd::borrowed(b"item3")),
926 Start(BytesStart::borrowed(br#"item4 value="world" "#, 5)),
927 End(BytesEnd::borrowed(b"item4")),
928 ]
929 )
930 }
931
932 #[test]
933 fn borrowing_read_to_end() {
934 let s = " <item /> ";
935 let mut reader = SliceReader {
936 reader: Reader::from_str(s),
937 };
938
939 reader
940 .reader
941 .trim_text(true)
942 .expand_empty_elements(true)
943 .check_end_names(true);
944
945 assert_eq!(
946 reader.next().unwrap(),
947 DeEvent::Start(BytesStart::borrowed(b"item ", 4))
948 );
949 reader.read_to_end(b"item").unwrap();
950 assert_eq!(reader.next().unwrap(), DeEvent::Eof);
951 }
952
953 /// Ensures, that [`Deserializer::next_text()`] never can get an `End` event,
954 /// because parser reports error early
955 #[test]
956 fn next_text() {
957 match from_str::<String>(r#"</root>"#) {
958 Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
959 assert_eq!(expected, "");
960 assert_eq!(found, "root");
961 }
962 x => panic!(
963 r#"Expected `Err(InvalidXml(EndEventMismatch("", "root")))`, but found {:?}"#,
964 x
965 ),
966 }
967
968 let s: String = from_str(r#"<root></root>"#).unwrap();
969 assert_eq!(s, "");
970
971 match from_str::<String>(r#"<root></other>"#) {
972 Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
973 assert_eq!(expected, "root");
974 assert_eq!(found, "other");
975 }
976 x => panic!(
977 r#"Expected `Err(InvalidXml(EndEventMismatch("root", "other")))`, but found {:?}"#,
978 x
979 ),
980 }
981 }
982}