quick_xml/de/mod.rs
1//! Serde `Deserializer` module.
2//!
3//! Due to the complexity of the XML standard and the fact that Serde was developed
4//! with JSON in mind, not all Serde concepts apply smoothly to XML. This leads to
5//! that fact that some XML concepts are inexpressible in terms of Serde derives
6//! and may require manual deserialization.
7//!
8//! The most notable restriction is the ability to distinguish between _elements_
9//! and _attributes_, as no other format used by serde has such a conception.
10//!
11//! Due to that the mapping is performed in a best effort manner.
12//!
13//!
14//!
15//! Table of Contents
16//! =================
17//! - [Mapping XML to Rust types](#mapping-xml-to-rust-types)
18//! - [Basics](#basics)
19//! - [Optional attributes and elements](#optional-attributes-and-elements)
20//! - [Choices (`xs:choice` XML Schema type)](#choices-xschoice-xml-schema-type)
21//! - [Sequences (`xs:all` and `xs:sequence` XML Schema types)](#sequences-xsall-and-xssequence-xml-schema-types)
22//! - [Mapping of `xsi:nil`](#mapping-of-xsinil)
23//! - [Generate Rust types from XML](#generate-rust-types-from-xml)
24//! - [Composition Rules](#composition-rules)
25//! - [Enum Representations](#enum-representations)
26//! - [Normal enum variant](#normal-enum-variant)
27//! - [`$text` enum variant](#text-enum-variant)
28//! - [`$text` and `$value` special names](#text-and-value-special-names)
29//! - [`$text`](#text)
30//! - [`$value`](#value)
31//! - [Primitives and sequences of primitives](#primitives-and-sequences-of-primitives)
32//! - [Structs and sequences of structs](#structs-and-sequences-of-structs)
33//! - [Enums and sequences of enums](#enums-and-sequences-of-enums)
34//! - [Frequently Used Patterns](#frequently-used-patterns)
35//! - [`<element>` lists](#element-lists)
36//! - [Overlapped (Out-of-Order) Elements](#overlapped-out-of-order-elements)
37//! - [Internally Tagged Enums](#internally-tagged-enums)
38//!
39//!
40//!
41//! Mapping XML to Rust types
42//! =========================
43//!
44//! Type names are never considered when deserializing, so you can name your
45//! types as you wish. Other general rules:
46//! - `struct` field name could be represented in XML only as an attribute name
47//! or an element name;
48//! - `enum` variant name could be represented in XML only as an attribute name
49//! or an element name;
50//! - the unit struct, unit type `()` and unit enum variant can be deserialized
51//! from any valid XML content:
52//! - attribute and element names;
53//! - attribute and element values;
54//! - text or CDATA content (including mixed text and CDATA content).
55//!
56//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
57//!
58//! NOTE: All tests are marked with an `ignore` option, even though they do
59//! compile. This is because rustdoc marks such blocks with an information
60//! icon unlike `no_run` blocks.
61//!
62//! </div>
63//!
64//! <table>
65//! <thead>
66//! <tr><th colspan="2">
67//!
68//! ## Basics
69//!
70//! </th></tr>
71//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
72//! </thead>
73//! <tbody style="vertical-align:top;">
74//! <tr>
75//! <td>
76//! Content of attributes and text / CDATA content of elements (including mixed
77//! text and CDATA content):
78//!
79//! ```xml
80//! <... ...="content" />
81//! ```
82//! ```xml
83//! <...>content</...>
84//! ```
85//! ```xml
86//! <...><![CDATA[content]]></...>
87//! ```
88//! ```xml
89//! <...>text<![CDATA[cdata]]>text</...>
90//! ```
91//! Mixed text / CDATA content represents one logical string, `"textcdatatext"` in that case.
92//! </td>
93//! <td>
94//!
95//! You can use any type that can be deserialized from an `&str`, for example:
96//! - [`String`] and [`&str`]
97//! - [`Cow<str>`]
98//! - [`u32`], [`f32`] and other numeric types
99//! - `enum`s, like
100//! ```
101//! # use pretty_assertions::assert_eq;
102//! # use serde::Deserialize;
103//! # #[derive(Debug, PartialEq)]
104//! #[derive(Deserialize)]
105//! enum Language {
106//! Rust,
107//! Cpp,
108//! #[serde(other)]
109//! Other,
110//! }
111//! # #[derive(Debug, PartialEq, Deserialize)]
112//! # struct X { #[serde(rename = "$text")] x: Language }
113//! # assert_eq!(X { x: Language::Rust }, quick_xml::de::from_str("<x>Rust</x>").unwrap());
114//! # assert_eq!(X { x: Language::Cpp }, quick_xml::de::from_str("<x>C<![CDATA[p]]>p</x>").unwrap());
115//! # assert_eq!(X { x: Language::Other }, quick_xml::de::from_str("<x><![CDATA[other]]></x>").unwrap());
116//! ```
117//!
118//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
119//!
120//! NOTE: deserialization to non-owned types (i.e. borrow from the input),
121//! such as `&str`, is possible only if you parse document in the UTF-8
122//! encoding and content does not contain entity references such as `&`,
123//! or character references such as `
`, as well as text content represented
124//! by one piece of [text] or [CDATA] element.
125//! </div>
126//! <!-- TODO: document an error type returned -->
127//!
128//! [text]: Event::Text
129//! [CDATA]: Event::CData
130//! </td>
131//! </tr>
132//! <!-- 2 ===================================================================================== -->
133//! <tr>
134//! <td>
135//!
136//! Content of attributes and text / CDATA content of elements (including mixed
137//! text and CDATA content), which represents a space-delimited lists, as
138//! specified in the XML Schema specification for [`xs:list`] `simpleType`:
139//!
140//! ```xml
141//! <... ...="element1 element2 ..." />
142//! ```
143//! ```xml
144//! <...>
145//! element1
146//! element2
147//! ...
148//! </...>
149//! ```
150//! ```xml
151//! <...><![CDATA[
152//! element1
153//! element2
154//! ...
155//! ]]></...>
156//! ```
157//!
158//! [`xs:list`]: https://www.w3.org/TR/xmlschema11-2/#list-datatypes
159//! </td>
160//! <td>
161//!
162//! Use any type that deserialized using [`deserialize_seq()`] call, for example:
163//!
164//! ```
165//! type List = Vec<u32>;
166//! ```
167//!
168//! See the next row to learn where in your struct definition you should
169//! use that type.
170//!
171//! According to the XML Schema specification, delimiters for elements is one
172//! or more space (`' '`, `'\r'`, `'\n'`, and `'\t'`) character(s).
173//!
174//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
175//!
176//! NOTE: according to the XML Schema restrictions, you cannot escape those
177//! white-space characters, so list elements will _never_ contain them.
178//! In practice you will usually use `xs:list`s for lists of numbers or enumerated
179//! values which looks like identifiers in many languages, for example, `item`,
180//! `some_item` or `some-item`, so that shouldn't be a problem.
181//!
182//! NOTE: according to the XML Schema specification, list elements can be
183//! delimited only by spaces. Other delimiters (for example, commas) are not
184//! allowed.
185//!
186//! </div>
187//!
188//! [`deserialize_seq()`]: de::Deserializer::deserialize_seq
189//! </td>
190//! </tr>
191//! <!-- 3 ===================================================================================== -->
192//! <tr>
193//! <td>
194//! A typical XML with attributes. The root tag name does not matter:
195//!
196//! ```xml
197//! <any-tag one="..." two="..."/>
198//! ```
199//! </td>
200//! <td>
201//!
202//! A structure where each XML attribute is mapped to a field with a name
203//! starting with `@`. Because Rust identifiers do not permit the `@` character,
204//! you should use the `#[serde(rename = "@...")]` attribute to rename it.
205//! The name of the struct itself does not matter:
206//!
207//! ```
208//! # use serde::Deserialize;
209//! # type T = ();
210//! # type U = ();
211//! // Get both attributes
212//! # #[derive(Debug, PartialEq)]
213//! #[derive(Deserialize)]
214//! struct AnyName {
215//! #[serde(rename = "@one")]
216//! one: T,
217//!
218//! #[serde(rename = "@two")]
219//! two: U,
220//! }
221//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
222//! ```
223//! ```
224//! # use serde::Deserialize;
225//! # type T = ();
226//! // Get only the one attribute, ignore the other
227//! # #[derive(Debug, PartialEq)]
228//! #[derive(Deserialize)]
229//! struct AnyName {
230//! #[serde(rename = "@one")]
231//! one: T,
232//! }
233//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
234//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."/>"#).unwrap();
235//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
236//! ```
237//! ```
238//! # use serde::Deserialize;
239//! // Ignore all attributes
240//! // You can also use the `()` type (unit type)
241//! # #[derive(Debug, PartialEq)]
242//! #[derive(Deserialize)]
243//! struct AnyName;
244//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
245//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
246//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
247//! ```
248//!
249//! All these structs can be used to deserialize from an XML on the
250//! left side depending on amount of information that you want to get.
251//! Of course, you can combine them with elements extractor structs (see below).
252//!
253//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
254//!
255//! NOTE: XML allows you to have an attribute and an element with the same name
256//! inside the one element. quick-xml deals with that by prepending a `@` prefix
257//! to the name of attributes.
258//! </div>
259//! </td>
260//! </tr>
261//! <!-- 4 ===================================================================================== -->
262//! <tr>
263//! <td>
264//! A typical XML with child elements. The root tag name does not matter:
265//!
266//! ```xml
267//! <any-tag>
268//! <one>...</one>
269//! <two>...</two>
270//! </any-tag>
271//! ```
272//! </td>
273//! <td>
274//! A structure where each XML child element is mapped to the field.
275//! Each element name becomes a name of field. The name of the struct itself
276//! does not matter:
277//!
278//! ```
279//! # use serde::Deserialize;
280//! # type T = ();
281//! # type U = ();
282//! // Get both elements
283//! # #[derive(Debug, PartialEq)]
284//! #[derive(Deserialize)]
285//! struct AnyName {
286//! one: T,
287//! two: U,
288//! }
289//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
290//! #
291//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap_err();
292//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><two>...</two></any-tag>"#).unwrap_err();
293//! ```
294//! ```
295//! # use serde::Deserialize;
296//! # type T = ();
297//! // Get only the one element, ignore the other
298//! # #[derive(Debug, PartialEq)]
299//! #[derive(Deserialize)]
300//! struct AnyName {
301//! one: T,
302//! }
303//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
304//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
305//! ```
306//! ```
307//! # use serde::Deserialize;
308//! // Ignore all elements
309//! // You can also use the `()` type (unit type)
310//! # #[derive(Debug, PartialEq)]
311//! #[derive(Deserialize)]
312//! struct AnyName;
313//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
314//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
315//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><two>...</two></any-tag>"#).unwrap();
316//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
317//! ```
318//!
319//! All these structs can be used to deserialize from an XML on the
320//! left side depending on amount of information that you want to get.
321//! Of course, you can combine them with attributes extractor structs (see above).
322//!
323//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
324//!
325//! NOTE: XML allows you to have an attribute and an element with the same name
326//! inside the one element. quick-xml deals with that by prepending a `@` prefix
327//! to the name of attributes.
328//! </div>
329//! </td>
330//! </tr>
331//! <!-- 5 ===================================================================================== -->
332//! <tr>
333//! <td>
334//! An XML with an attribute and a child element named equally:
335//!
336//! ```xml
337//! <any-tag field="...">
338//! <field>...</field>
339//! </any-tag>
340//! ```
341//! </td>
342//! <td>
343//!
344//! You MUST specify `#[serde(rename = "@field")]` on a field that will be used
345//! for an attribute:
346//!
347//! ```
348//! # use pretty_assertions::assert_eq;
349//! # use serde::Deserialize;
350//! # type T = ();
351//! # type U = ();
352//! # #[derive(Debug, PartialEq)]
353//! #[derive(Deserialize)]
354//! struct AnyName {
355//! #[serde(rename = "@field")]
356//! attribute: T,
357//! field: U,
358//! }
359//! # assert_eq!(
360//! # AnyName { attribute: (), field: () },
361//! # quick_xml::de::from_str(r#"
362//! # <any-tag field="...">
363//! # <field>...</field>
364//! # </any-tag>
365//! # "#).unwrap(),
366//! # );
367//! ```
368//! </td>
369//! </tr>
370//! <!-- ======================================================================================= -->
371//! <tr><th colspan="2">
372//!
373//! ## Optional attributes and elements
374//!
375//! </th></tr>
376//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
377//! <!-- 6 ===================================================================================== -->
378//! <tr>
379//! <td>
380//! An optional XML attribute that you want to capture.
381//! The root tag name does not matter:
382//!
383//! ```xml
384//! <any-tag optional="..."/>
385//! ```
386//! ```xml
387//! <any-tag/>
388//! ```
389//! </td>
390//! <td>
391//!
392//! A structure with an optional field, renamed according to the requirements
393//! for attributes:
394//!
395//! ```
396//! # use pretty_assertions::assert_eq;
397//! # use serde::Deserialize;
398//! # type T = ();
399//! # #[derive(Debug, PartialEq)]
400//! #[derive(Deserialize)]
401//! struct AnyName {
402//! #[serde(rename = "@optional")]
403//! optional: Option<T>,
404//! }
405//! # assert_eq!(AnyName { optional: Some(()) }, quick_xml::de::from_str(r#"<any-tag optional="..."/>"#).unwrap());
406//! # assert_eq!(AnyName { optional: None }, quick_xml::de::from_str(r#"<any-tag/>"#).unwrap());
407//! ```
408//! When the XML attribute is present, type `T` will be deserialized from
409//! an attribute value (which is a string). Note, that if `T = String` or other
410//! string type, the empty attribute is mapped to a `Some("")`, whereas `None`
411//! represents the missed attribute:
412//! ```xml
413//! <any-tag optional="..."/><!-- Some("...") -->
414//! <any-tag optional=""/> <!-- Some("") -->
415//! <any-tag/> <!-- None -->
416//! ```
417//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
418//!
419//! NOTE: The behaviour is not symmetric by default. `None` will be serialized as
420//! `optional=""`. This behaviour is consistent across serde crates. You should add
421//! `#[serde(skip_serializing_if = "Option::is_none")]` attribute to the field to
422//! skip `None`s.
423//! </div>
424//! </td>
425//! </tr>
426//! <!-- 7 ===================================================================================== -->
427//! <tr>
428//! <td>
429//! An optional XML elements that you want to capture.
430//! The root tag name does not matter:
431//!
432//! ```xml
433//! <any-tag/>
434//! <optional>...</optional>
435//! </any-tag>
436//! ```
437//! ```xml
438//! <any-tag/>
439//! <optional/>
440//! </any-tag>
441//! ```
442//! ```xml
443//! <any-tag/>
444//! ```
445//! </td>
446//! <td>
447//!
448//! A structure with an optional field:
449//!
450//! ```
451//! # use pretty_assertions::assert_eq;
452//! # use serde::Deserialize;
453//! # type T = ();
454//! # #[derive(Debug, PartialEq)]
455//! #[derive(Deserialize)]
456//! struct AnyName {
457//! optional: Option<T>,
458//! }
459//! # assert_eq!(AnyName { optional: Some(()) }, quick_xml::de::from_str(r#"<any-tag><optional>...</optional></any-tag>"#).unwrap());
460//! # assert_eq!(AnyName { optional: None }, quick_xml::de::from_str(r#"<any-tag/>"#).unwrap());
461//! ```
462//! When the XML element is present, type `T` will be deserialized from an
463//! element (which is a string or a multi-mapping -- i.e. mapping which can have
464//! duplicated keys).
465//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
466//!
467//! NOTE: The behaviour is not symmetric by default. `None` will be serialized as
468//! `<optional/>`. This behaviour is consistent across serde crates. You should add
469//! `#[serde(skip_serializing_if = "Option::is_none")]` attribute to the field to
470//! skip `None`s.
471//!
472//! NOTE: Deserializer will automatically handle a [`xsi:nil`] attribute and set field to `None`.
473//! For more info see [Mapping of `xsi:nil`](#mapping-of-xsinil).
474//! </div>
475//! </td>
476//! </tr>
477//! <!-- ======================================================================================= -->
478//! <tr><th colspan="2">
479//!
480//! ## Choices (`xs:choice` XML Schema type)
481//!
482//! </th></tr>
483//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
484//! <!-- 8 ===================================================================================== -->
485//! <tr>
486//! <td>
487//! An XML with different root tag names, as well as text / CDATA content:
488//!
489//! ```xml
490//! <one field1="...">...</one>
491//! ```
492//! ```xml
493//! <two>
494//! <field2>...</field2>
495//! </two>
496//! ```
497//! ```xml
498//! Text <![CDATA[or (mixed)
499//! CDATA]]> content
500//! ```
501//! </td>
502//! <td>
503//!
504//! An enum where each variant has the name of a possible root tag. The name of
505//! the enum itself does not matter.
506//!
507//! If you need to get the textual content, mark a variant with `#[serde(rename = "$text")]`.
508//!
509//! All these structs can be used to deserialize from any XML on the
510//! left side depending on amount of information that you want to get:
511//!
512//! ```
513//! # use pretty_assertions::assert_eq;
514//! # use serde::Deserialize;
515//! # type T = ();
516//! # type U = ();
517//! # #[derive(Debug, PartialEq)]
518//! #[derive(Deserialize)]
519//! #[serde(rename_all = "snake_case")]
520//! enum AnyName {
521//! One { #[serde(rename = "@field1")] field1: T },
522//! Two { field2: U },
523//!
524//! /// Use unit variant, if you do not care of a content.
525//! /// You can use tuple variant if you want to parse
526//! /// textual content as an xs:list.
527//! /// Struct variants are will pass a string to the
528//! /// struct enum variant visitor, which typically
529//! /// returns Err(Custom)
530//! #[serde(rename = "$text")]
531//! Text(String),
532//! }
533//! # assert_eq!(AnyName::One { field1: () }, quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap());
534//! # assert_eq!(AnyName::Two { field2: () }, quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap());
535//! # assert_eq!(AnyName::Text("text cdata ".into()), quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap());
536//! ```
537//! ```
538//! # use pretty_assertions::assert_eq;
539//! # use serde::Deserialize;
540//! # type T = ();
541//! # #[derive(Debug, PartialEq)]
542//! #[derive(Deserialize)]
543//! struct Two {
544//! field2: T,
545//! }
546//! # #[derive(Debug, PartialEq)]
547//! #[derive(Deserialize)]
548//! #[serde(rename_all = "snake_case")]
549//! enum AnyName {
550//! // `field1` content discarded
551//! One,
552//! Two(Two),
553//! #[serde(rename = "$text")]
554//! Text,
555//! }
556//! # assert_eq!(AnyName::One, quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap());
557//! # assert_eq!(AnyName::Two(Two { field2: () }), quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap());
558//! # assert_eq!(AnyName::Text, quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap());
559//! ```
560//! ```
561//! # use pretty_assertions::assert_eq;
562//! # use serde::Deserialize;
563//! # #[derive(Debug, PartialEq)]
564//! #[derive(Deserialize)]
565//! #[serde(rename_all = "snake_case")]
566//! enum AnyName {
567//! One,
568//! // the <two> and textual content will be mapped to this
569//! #[serde(other)]
570//! Other,
571//! }
572//! # assert_eq!(AnyName::One, quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap());
573//! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap());
574//! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap());
575//! ```
576//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
577//!
578//! NOTE: You should have variants for all possible tag names in your enum
579//! or have an `#[serde(other)]` variant.
580//! <!-- TODO: document an error type if that requirement is violated -->
581//! </div>
582//! </td>
583//! </tr>
584//! <!-- 9 ===================================================================================== -->
585//! <tr>
586//! <td>
587//!
588//! `<xs:choice>` embedded in the other element, and at the same time you want
589//! to get access to other attributes that can appear in the same container
590//! (`<any-tag>`). Also this case can be described, as if you want to choose
591//! Rust enum variant based on a tag name:
592//!
593//! ```xml
594//! <any-tag field="...">
595//! <one>...</one>
596//! </any-tag>
597//! ```
598//! ```xml
599//! <any-tag field="...">
600//! <two>...</two>
601//! </any-tag>
602//! ```
603//! ```xml
604//! <any-tag field="...">
605//! Text <![CDATA[or (mixed)
606//! CDATA]]> content
607//! </any-tag>
608//! ```
609//! </td>
610//! <td>
611//!
612//! A structure with a field which type is an `enum`.
613//!
614//! If you need to get a textual content, mark a variant with `#[serde(rename = "$text")]`.
615//!
616//! Names of the enum, struct, and struct field with `Choice` type does not matter:
617//!
618//! ```
619//! # use pretty_assertions::assert_eq;
620//! # use serde::Deserialize;
621//! # type T = ();
622//! # #[derive(Debug, PartialEq)]
623//! #[derive(Deserialize)]
624//! #[serde(rename_all = "snake_case")]
625//! enum Choice {
626//! One,
627//! Two,
628//!
629//! /// Use unit variant, if you do not care of a content.
630//! /// You can use tuple variant if you want to parse
631//! /// textual content as an xs:list.
632//! /// Struct variants are will pass a string to the
633//! /// struct enum variant visitor, which typically
634//! /// returns Err(Custom)
635//! #[serde(rename = "$text")]
636//! Text(String),
637//! }
638//! # #[derive(Debug, PartialEq)]
639//! #[derive(Deserialize)]
640//! struct AnyName {
641//! #[serde(rename = "@field")]
642//! field: T,
643//!
644//! #[serde(rename = "$value")]
645//! any_name: Choice,
646//! }
647//! # assert_eq!(
648//! # AnyName { field: (), any_name: Choice::One },
649//! # quick_xml::de::from_str(r#"<any-tag field="..."><one>...</one></any-tag>"#).unwrap(),
650//! # );
651//! # assert_eq!(
652//! # AnyName { field: (), any_name: Choice::Two },
653//! # quick_xml::de::from_str(r#"<any-tag field="..."><two>...</two></any-tag>"#).unwrap(),
654//! # );
655//! # assert_eq!(
656//! # AnyName { field: (), any_name: Choice::Text("text cdata ".into()) },
657//! # quick_xml::de::from_str(r#"<any-tag field="...">text <![CDATA[ cdata ]]></any-tag>"#).unwrap(),
658//! # );
659//! ```
660//! </td>
661//! </tr>
662//! <!-- 10 ==================================================================================== -->
663//! <tr>
664//! <td>
665//!
666//! `<xs:choice>` embedded in the other element, and at the same time you want
667//! to get access to other elements that can appear in the same container
668//! (`<any-tag>`). Also this case can be described, as if you want to choose
669//! Rust enum variant based on a tag name:
670//!
671//! ```xml
672//! <any-tag>
673//! <field>...</field>
674//! <one>...</one>
675//! </any-tag>
676//! ```
677//! ```xml
678//! <any-tag>
679//! <two>...</two>
680//! <field>...</field>
681//! </any-tag>
682//! ```
683//! </td>
684//! <td>
685//!
686//! A structure with a field which type is an `enum`.
687//!
688//! Names of the enum, struct, and struct field with `Choice` type does not matter:
689//!
690//! ```
691//! # use pretty_assertions::assert_eq;
692//! # use serde::Deserialize;
693//! # type T = ();
694//! # #[derive(Debug, PartialEq)]
695//! #[derive(Deserialize)]
696//! #[serde(rename_all = "snake_case")]
697//! enum Choice {
698//! One,
699//! Two,
700//! }
701//! # #[derive(Debug, PartialEq)]
702//! #[derive(Deserialize)]
703//! struct AnyName {
704//! field: T,
705//!
706//! #[serde(rename = "$value")]
707//! any_name: Choice,
708//! }
709//! # assert_eq!(
710//! # AnyName { field: (), any_name: Choice::One },
711//! # quick_xml::de::from_str(r#"<any-tag><field>...</field><one>...</one></any-tag>"#).unwrap(),
712//! # );
713//! # assert_eq!(
714//! # AnyName { field: (), any_name: Choice::Two },
715//! # quick_xml::de::from_str(r#"<any-tag><two>...</two><field>...</field></any-tag>"#).unwrap(),
716//! # );
717//! ```
718//!
719//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
720//!
721//! NOTE: if your `Choice` enum would contain an `#[serde(other)]`
722//! variant, element `<field>` will be mapped to the `field` and not to the enum
723//! variant.
724//! </div>
725//!
726//! </td>
727//! </tr>
728//! <!-- 11 ==================================================================================== -->
729//! <tr>
730//! <td>
731//!
732//! `<xs:choice>` encapsulated in other element with a fixed name:
733//!
734//! ```xml
735//! <any-tag field="...">
736//! <choice>
737//! <one>...</one>
738//! </choice>
739//! </any-tag>
740//! ```
741//! ```xml
742//! <any-tag field="...">
743//! <choice>
744//! <two>...</two>
745//! </choice>
746//! </any-tag>
747//! ```
748//! </td>
749//! <td>
750//!
751//! A structure with a field of an intermediate type with one field of `enum` type.
752//! Actually, this example is not necessary, because you can construct it by yourself
753//! using the composition rules that were described above. However the XML construction
754//! described here is very common, so it is shown explicitly.
755//!
756//! Names of the enum and struct does not matter:
757//!
758//! ```
759//! # use pretty_assertions::assert_eq;
760//! # use serde::Deserialize;
761//! # type T = ();
762//! # #[derive(Debug, PartialEq)]
763//! #[derive(Deserialize)]
764//! #[serde(rename_all = "snake_case")]
765//! enum Choice {
766//! One,
767//! Two,
768//! }
769//! # #[derive(Debug, PartialEq)]
770//! #[derive(Deserialize)]
771//! struct Holder {
772//! #[serde(rename = "$value")]
773//! any_name: Choice,
774//! }
775//! # #[derive(Debug, PartialEq)]
776//! #[derive(Deserialize)]
777//! struct AnyName {
778//! #[serde(rename = "@field")]
779//! field: T,
780//!
781//! choice: Holder,
782//! }
783//! # assert_eq!(
784//! # AnyName { field: (), choice: Holder { any_name: Choice::One } },
785//! # quick_xml::de::from_str(r#"<any-tag field="..."><choice><one>...</one></choice></any-tag>"#).unwrap(),
786//! # );
787//! # assert_eq!(
788//! # AnyName { field: (), choice: Holder { any_name: Choice::Two } },
789//! # quick_xml::de::from_str(r#"<any-tag field="..."><choice><two>...</two></choice></any-tag>"#).unwrap(),
790//! # );
791//! ```
792//! </td>
793//! </tr>
794//! <!-- 12 ==================================================================================== -->
795//! <tr>
796//! <td>
797//!
798//! `<xs:choice>` encapsulated in other element with a fixed name:
799//!
800//! ```xml
801//! <any-tag>
802//! <field>...</field>
803//! <choice>
804//! <one>...</one>
805//! </choice>
806//! </any-tag>
807//! ```
808//! ```xml
809//! <any-tag>
810//! <choice>
811//! <two>...</two>
812//! </choice>
813//! <field>...</field>
814//! </any-tag>
815//! ```
816//! </td>
817//! <td>
818//!
819//! A structure with a field of an intermediate type with one field of `enum` type.
820//! Actually, this example is not necessary, because you can construct it by yourself
821//! using the composition rules that were described above. However the XML construction
822//! described here is very common, so it is shown explicitly.
823//!
824//! Names of the enum and struct does not matter:
825//!
826//! ```
827//! # use pretty_assertions::assert_eq;
828//! # use serde::Deserialize;
829//! # type T = ();
830//! # #[derive(Debug, PartialEq)]
831//! #[derive(Deserialize)]
832//! #[serde(rename_all = "snake_case")]
833//! enum Choice {
834//! One,
835//! Two,
836//! }
837//! # #[derive(Debug, PartialEq)]
838//! #[derive(Deserialize)]
839//! struct Holder {
840//! #[serde(rename = "$value")]
841//! any_name: Choice,
842//! }
843//! # #[derive(Debug, PartialEq)]
844//! #[derive(Deserialize)]
845//! struct AnyName {
846//! field: T,
847//!
848//! choice: Holder,
849//! }
850//! # assert_eq!(
851//! # AnyName { field: (), choice: Holder { any_name: Choice::One } },
852//! # quick_xml::de::from_str(r#"<any-tag><field>...</field><choice><one>...</one></choice></any-tag>"#).unwrap(),
853//! # );
854//! # assert_eq!(
855//! # AnyName { field: (), choice: Holder { any_name: Choice::Two } },
856//! # quick_xml::de::from_str(r#"<any-tag><choice><two>...</two></choice><field>...</field></any-tag>"#).unwrap(),
857//! # );
858//! ```
859//! </td>
860//! </tr>
861//! <!-- ======================================================================================== -->
862//! <tr><th colspan="2">
863//!
864//! ## Sequences (`xs:all` and `xs:sequence` XML Schema types)
865//!
866//! </th></tr>
867//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
868//! <!-- 13 ==================================================================================== -->
869//! <tr>
870//! <td>
871//! A sequence inside of a tag without a dedicated name:
872//!
873//! ```xml
874//! <any-tag/>
875//! ```
876//! ```xml
877//! <any-tag>
878//! <item/>
879//! </any-tag>
880//! ```
881//! ```xml
882//! <any-tag>
883//! <item/>
884//! <item/>
885//! <item/>
886//! </any-tag>
887//! ```
888//! </td>
889//! <td>
890//!
891//! A structure with a field which is a sequence type, for example, [`Vec`].
892//! Because XML syntax does not distinguish between empty sequences and missed
893//! elements, we should indicate that on the Rust side, because serde will require
894//! that field `item` exists. You can do that in two possible ways:
895//!
896//! Use the `#[serde(default)]` attribute for a [field] or the entire [struct]:
897//! ```
898//! # use pretty_assertions::assert_eq;
899//! # use serde::Deserialize;
900//! # type Item = ();
901//! # #[derive(Debug, PartialEq)]
902//! #[derive(Deserialize)]
903//! struct AnyName {
904//! #[serde(default)]
905//! item: Vec<Item>,
906//! }
907//! # assert_eq!(
908//! # AnyName { item: vec![] },
909//! # quick_xml::de::from_str(r#"<any-tag/>"#).unwrap(),
910//! # );
911//! # assert_eq!(
912//! # AnyName { item: vec![()] },
913//! # quick_xml::de::from_str(r#"<any-tag><item/></any-tag>"#).unwrap(),
914//! # );
915//! # assert_eq!(
916//! # AnyName { item: vec![(), (), ()] },
917//! # quick_xml::de::from_str(r#"<any-tag><item/><item/><item/></any-tag>"#).unwrap(),
918//! # );
919//! ```
920//!
921//! Use the [`Option`]. In that case inner array will always contains at least one
922//! element after deserialization:
923//! ```ignore
924//! # use pretty_assertions::assert_eq;
925//! # use serde::Deserialize;
926//! # type Item = ();
927//! # #[derive(Debug, PartialEq)]
928//! #[derive(Deserialize)]
929//! struct AnyName {
930//! item: Option<Vec<Item>>,
931//! }
932//! # assert_eq!(
933//! # AnyName { item: None },
934//! # quick_xml::de::from_str(r#"<any-tag/>"#).unwrap(),
935//! # );
936//! # assert_eq!(
937//! # AnyName { item: Some(vec![()]) },
938//! # quick_xml::de::from_str(r#"<any-tag><item/></any-tag>"#).unwrap(),
939//! # );
940//! # assert_eq!(
941//! # AnyName { item: Some(vec![(), (), ()]) },
942//! # quick_xml::de::from_str(r#"<any-tag><item/><item/><item/></any-tag>"#).unwrap(),
943//! # );
944//! ```
945//!
946//! See also [Frequently Used Patterns](#element-lists).
947//!
948//! [field]: https://serde.rs/field-attrs.html#default
949//! [struct]: https://serde.rs/container-attrs.html#default
950//! </td>
951//! </tr>
952//! <!-- 14 ==================================================================================== -->
953//! <tr>
954//! <td>
955//! A sequence with a strict order, probably with mixed content
956//! (text / CDATA and tags):
957//!
958//! ```xml
959//! <one>...</one>
960//! text
961//! <![CDATA[cdata]]>
962//! <two>...</two>
963//! <one>...</one>
964//! ```
965//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
966//!
967//! NOTE: this is just an example for showing mapping. XML does not allow
968//! multiple root tags -- you should wrap the sequence into a tag.
969//! </div>
970//! </td>
971//! <td>
972//!
973//! All elements mapped to the heterogeneous sequential type: tuple or named tuple.
974//! Each element of the tuple should be able to be deserialized from the nested
975//! element content (`...`), except the enum types which would be deserialized
976//! from the full element (`<one>...</one>`), so they could use the element name
977//! to choose the right variant:
978//!
979//! ```
980//! # use pretty_assertions::assert_eq;
981//! # use serde::Deserialize;
982//! # type One = ();
983//! # type Two = ();
984//! # /*
985//! type One = ...;
986//! type Two = ...;
987//! # */
988//! # #[derive(Debug, PartialEq)]
989//! #[derive(Deserialize)]
990//! struct AnyName(One, String, Two, One);
991//! # assert_eq!(
992//! # AnyName((), "text cdata".into(), (), ()),
993//! # quick_xml::de::from_str(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
994//! # );
995//! ```
996//! ```
997//! # use pretty_assertions::assert_eq;
998//! # use serde::Deserialize;
999//! # #[derive(Debug, PartialEq)]
1000//! #[derive(Deserialize)]
1001//! #[serde(rename_all = "snake_case")]
1002//! enum Choice {
1003//! One,
1004//! }
1005//! # type Two = ();
1006//! # /*
1007//! type Two = ...;
1008//! # */
1009//! type AnyName = (Choice, String, Two, Choice);
1010//! # assert_eq!(
1011//! # (Choice::One, "text cdata".to_string(), (), Choice::One),
1012//! # quick_xml::de::from_str(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
1013//! # );
1014//! ```
1015//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1016//!
1017//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1018//! so you cannot have two adjacent string types in your sequence.
1019//!
1020//! NOTE: In the case that the list might contain tags that are overlapped with
1021//! tags that do not correspond to the list you should add the feature [`overlapped-lists`].
1022//! </div>
1023//! </td>
1024//! </tr>
1025//! <!-- 15 ==================================================================================== -->
1026//! <tr>
1027//! <td>
1028//! A sequence with a non-strict order, probably with a mixed content
1029//! (text / CDATA and tags).
1030//!
1031//! ```xml
1032//! <one>...</one>
1033//! text
1034//! <![CDATA[cdata]]>
1035//! <two>...</two>
1036//! <one>...</one>
1037//! ```
1038//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1039//!
1040//! NOTE: this is just an example for showing mapping. XML does not allow
1041//! multiple root tags -- you should wrap the sequence into a tag.
1042//! </div>
1043//! </td>
1044//! <td>
1045//! A homogeneous sequence of elements with a fixed or dynamic size:
1046//!
1047//! ```
1048//! # use pretty_assertions::assert_eq;
1049//! # use serde::Deserialize;
1050//! # #[derive(Debug, PartialEq)]
1051//! #[derive(Deserialize)]
1052//! #[serde(rename_all = "snake_case")]
1053//! enum Choice {
1054//! One,
1055//! Two,
1056//! #[serde(other)]
1057//! Other,
1058//! }
1059//! type AnyName = [Choice; 4];
1060//! # assert_eq!(
1061//! # [Choice::One, Choice::Other, Choice::Two, Choice::One],
1062//! # quick_xml::de::from_str::<AnyName>(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
1063//! # );
1064//! ```
1065//! ```
1066//! # use pretty_assertions::assert_eq;
1067//! # use serde::Deserialize;
1068//! # #[derive(Debug, PartialEq)]
1069//! #[derive(Deserialize)]
1070//! #[serde(rename_all = "snake_case")]
1071//! enum Choice {
1072//! One,
1073//! Two,
1074//! #[serde(rename = "$text")]
1075//! Other(String),
1076//! }
1077//! type AnyName = Vec<Choice>;
1078//! # assert_eq!(
1079//! # vec![
1080//! # Choice::One,
1081//! # Choice::Other("text cdata".into()),
1082//! # Choice::Two,
1083//! # Choice::One,
1084//! # ],
1085//! # quick_xml::de::from_str::<AnyName>(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
1086//! # );
1087//! ```
1088//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1089//!
1090//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1091//! so you cannot have two adjacent string types in your sequence.
1092//! </div>
1093//! </td>
1094//! </tr>
1095//! <!-- 16 ==================================================================================== -->
1096//! <tr>
1097//! <td>
1098//! A sequence with a strict order, probably with a mixed content,
1099//! (text and tags) inside of the other element:
1100//!
1101//! ```xml
1102//! <any-tag attribute="...">
1103//! <one>...</one>
1104//! text
1105//! <![CDATA[cdata]]>
1106//! <two>...</two>
1107//! <one>...</one>
1108//! </any-tag>
1109//! ```
1110//! </td>
1111//! <td>
1112//!
1113//! A structure where all child elements mapped to the one field which have
1114//! a heterogeneous sequential type: tuple or named tuple. Each element of the
1115//! tuple should be able to be deserialized from the full element (`<one>...</one>`).
1116//!
1117//! You MUST specify `#[serde(rename = "$value")]` on that field:
1118//!
1119//! ```
1120//! # use pretty_assertions::assert_eq;
1121//! # use serde::Deserialize;
1122//! # type One = ();
1123//! # type Two = ();
1124//! # /*
1125//! type One = ...;
1126//! type Two = ...;
1127//! # */
1128//!
1129//! # #[derive(Debug, PartialEq)]
1130//! #[derive(Deserialize)]
1131//! struct AnyName {
1132//! #[serde(rename = "@attribute")]
1133//! # attribute: (),
1134//! # /*
1135//! attribute: ...,
1136//! # */
1137//! // Does not (yet?) supported by the serde
1138//! // https://github.com/serde-rs/serde/issues/1905
1139//! // #[serde(flatten)]
1140//! #[serde(rename = "$value")]
1141//! any_name: (One, String, Two, One),
1142//! }
1143//! # assert_eq!(
1144//! # AnyName { attribute: (), any_name: ((), "text cdata".into(), (), ()) },
1145//! # quick_xml::de::from_str("\
1146//! # <any-tag attribute='...'>\
1147//! # <one>...</one>\
1148//! # text \
1149//! # <![CDATA[cdata]]>\
1150//! # <two>...</two>\
1151//! # <one>...</one>\
1152//! # </any-tag>"
1153//! # ).unwrap(),
1154//! # );
1155//! ```
1156//! ```
1157//! # use pretty_assertions::assert_eq;
1158//! # use serde::Deserialize;
1159//! # type One = ();
1160//! # type Two = ();
1161//! # /*
1162//! type One = ...;
1163//! type Two = ...;
1164//! # */
1165//!
1166//! # #[derive(Debug, PartialEq)]
1167//! #[derive(Deserialize)]
1168//! struct NamedTuple(One, String, Two, One);
1169//!
1170//! # #[derive(Debug, PartialEq)]
1171//! #[derive(Deserialize)]
1172//! struct AnyName {
1173//! #[serde(rename = "@attribute")]
1174//! # attribute: (),
1175//! # /*
1176//! attribute: ...,
1177//! # */
1178//! // Does not (yet?) supported by the serde
1179//! // https://github.com/serde-rs/serde/issues/1905
1180//! // #[serde(flatten)]
1181//! #[serde(rename = "$value")]
1182//! any_name: NamedTuple,
1183//! }
1184//! # assert_eq!(
1185//! # AnyName { attribute: (), any_name: NamedTuple((), "text cdata".into(), (), ()) },
1186//! # quick_xml::de::from_str("\
1187//! # <any-tag attribute='...'>\
1188//! # <one>...</one>\
1189//! # text \
1190//! # <![CDATA[cdata]]>\
1191//! # <two>...</two>\
1192//! # <one>...</one>\
1193//! # </any-tag>"
1194//! # ).unwrap(),
1195//! # );
1196//! ```
1197//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1198//!
1199//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1200//! so you cannot have two adjacent string types in your sequence.
1201//! </div>
1202//! </td>
1203//! </tr>
1204//! <!-- 17 ==================================================================================== -->
1205//! <tr>
1206//! <td>
1207//! A sequence with a non-strict order, probably with a mixed content
1208//! (text / CDATA and tags) inside of the other element:
1209//!
1210//! ```xml
1211//! <any-tag>
1212//! <one>...</one>
1213//! text
1214//! <![CDATA[cdata]]>
1215//! <two>...</two>
1216//! <one>...</one>
1217//! </any-tag>
1218//! ```
1219//! </td>
1220//! <td>
1221//!
1222//! A structure where all child elements mapped to the one field which have
1223//! a homogeneous sequential type: array-like container. A container type `T`
1224//! should be able to be deserialized from the nested element content (`...`),
1225//! except if it is an enum type which would be deserialized from the full
1226//! element (`<one>...</one>`).
1227//!
1228//! You MUST specify `#[serde(rename = "$value")]` on that field:
1229//!
1230//! ```
1231//! # use pretty_assertions::assert_eq;
1232//! # use serde::Deserialize;
1233//! # #[derive(Debug, PartialEq)]
1234//! #[derive(Deserialize)]
1235//! #[serde(rename_all = "snake_case")]
1236//! enum Choice {
1237//! One,
1238//! Two,
1239//! #[serde(rename = "$text")]
1240//! Other(String),
1241//! }
1242//! # #[derive(Debug, PartialEq)]
1243//! #[derive(Deserialize)]
1244//! struct AnyName {
1245//! #[serde(rename = "@attribute")]
1246//! # attribute: (),
1247//! # /*
1248//! attribute: ...,
1249//! # */
1250//! // Does not (yet?) supported by the serde
1251//! // https://github.com/serde-rs/serde/issues/1905
1252//! // #[serde(flatten)]
1253//! #[serde(rename = "$value")]
1254//! any_name: [Choice; 4],
1255//! }
1256//! # assert_eq!(
1257//! # AnyName { attribute: (), any_name: [
1258//! # Choice::One,
1259//! # Choice::Other("text cdata".into()),
1260//! # Choice::Two,
1261//! # Choice::One,
1262//! # ] },
1263//! # quick_xml::de::from_str("\
1264//! # <any-tag attribute='...'>\
1265//! # <one>...</one>\
1266//! # text \
1267//! # <![CDATA[cdata]]>\
1268//! # <two>...</two>\
1269//! # <one>...</one>\
1270//! # </any-tag>"
1271//! # ).unwrap(),
1272//! # );
1273//! ```
1274//! ```
1275//! # use pretty_assertions::assert_eq;
1276//! # use serde::Deserialize;
1277//! # #[derive(Debug, PartialEq)]
1278//! #[derive(Deserialize)]
1279//! #[serde(rename_all = "snake_case")]
1280//! enum Choice {
1281//! One,
1282//! Two,
1283//! #[serde(rename = "$text")]
1284//! Other(String),
1285//! }
1286//! # #[derive(Debug, PartialEq)]
1287//! #[derive(Deserialize)]
1288//! struct AnyName {
1289//! #[serde(rename = "@attribute")]
1290//! # attribute: (),
1291//! # /*
1292//! attribute: ...,
1293//! # */
1294//! // Does not (yet?) supported by the serde
1295//! // https://github.com/serde-rs/serde/issues/1905
1296//! // #[serde(flatten)]
1297//! #[serde(rename = "$value")]
1298//! any_name: Vec<Choice>,
1299//! }
1300//! # assert_eq!(
1301//! # AnyName { attribute: (), any_name: vec![
1302//! # Choice::One,
1303//! # Choice::Other("text cdata".into()),
1304//! # Choice::Two,
1305//! # Choice::One,
1306//! # ] },
1307//! # quick_xml::de::from_str("\
1308//! # <any-tag attribute='...'>\
1309//! # <one>...</one>\
1310//! # text \
1311//! # <![CDATA[cdata]]>\
1312//! # <two>...</two>\
1313//! # <one>...</one>\
1314//! # </any-tag>"
1315//! # ).unwrap(),
1316//! # );
1317//! ```
1318//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1319//!
1320//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1321//! so you cannot have two adjacent string types in your sequence.
1322//! </div>
1323//! </td>
1324//! </tr>
1325//! </tbody>
1326//! </table>
1327//!
1328//!
1329//! Mapping of `xsi:nil`
1330//! ====================
1331//!
1332//! quick-xml supports handling of [`xsi:nil`] special attribute. When field of optional
1333//! type is mapped to the XML element which have `xsi:nil="true"` set, or if that attribute
1334//! is placed on parent XML element, the deserializer will call [`Visitor::visit_none`]
1335//! and skip XML element corresponding to a field.
1336//!
1337//! Examples:
1338//!
1339//! ```
1340//! # use pretty_assertions::assert_eq;
1341//! # use serde::Deserialize;
1342//! #[derive(Deserialize, Debug, PartialEq)]
1343//! struct TypeWithOptionalField {
1344//! element: Option<String>,
1345//! }
1346//!
1347//! assert_eq!(
1348//! TypeWithOptionalField {
1349//! element: None,
1350//! },
1351//! quick_xml::de::from_str("
1352//! <any-tag xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'>
1353//! <element xsi:nil='true'>Content is skiped because of xsi:nil='true'</element>
1354//! </any-tag>
1355//! ").unwrap(),
1356//! );
1357//! ```
1358//!
1359//! You can capture attributes from the optional type, because ` xsi:nil="true"` elements can have
1360//! attributes:
1361//! ```
1362//! # use pretty_assertions::assert_eq;
1363//! # use serde::Deserialize;
1364//! #[derive(Deserialize, Debug, PartialEq)]
1365//! struct TypeWithOptionalField {
1366//! #[serde(rename = "@attribute")]
1367//! attribute: usize,
1368//!
1369//! element: Option<String>,
1370//! non_optional: String,
1371//! }
1372//!
1373//! assert_eq!(
1374//! TypeWithOptionalField {
1375//! attribute: 42,
1376//! element: None,
1377//! non_optional: "Note, that non-optional fields will be deserialized as usual".to_string(),
1378//! },
1379//! quick_xml::de::from_str("
1380//! <any-tag attribute='42' xsi:nil='true' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'>
1381//! <element>Content is skiped because of xsi:nil='true'</element>
1382//! <non_optional>Note, that non-optional fields will be deserialized as usual</non_optional>
1383//! </any-tag>
1384//! ").unwrap(),
1385//! );
1386//! ```
1387//!
1388//! Generate Rust types from XML
1389//! ============================
1390//!
1391//! To speed up the creation of Rust types that represent a given XML file you can
1392//! use the [xml_schema_generator](https://github.com/Thomblin/xml_schema_generator).
1393//! It provides a standalone binary and a Rust library that parses one or more XML files
1394//! and generates a collection of structs that are compatible with quick_xml::de.
1395//!
1396//!
1397//!
1398//! Composition Rules
1399//! =================
1400//!
1401//! The XML format is very different from other formats supported by `serde`.
1402//! One such difference it is how data in the serialized form is related to
1403//! the Rust type. Usually each byte in the data can be associated only with
1404//! one field in the data structure. However, XML is an exception.
1405//!
1406//! For example, took this XML:
1407//!
1408//! ```xml
1409//! <any>
1410//! <key attr="value"/>
1411//! </any>
1412//! ```
1413//!
1414//! and try to deserialize it to the struct `AnyName`:
1415//!
1416//! ```no_run
1417//! # use serde::Deserialize;
1418//! #[derive(Deserialize)]
1419//! struct AnyName { // AnyName calls `deserialize_struct` on `<any><key attr="value"/></any>`
1420//! // Used data: ^^^^^^^^^^^^^^^^^^^
1421//! key: Inner, // Inner calls `deserialize_struct` on `<key attr="value"/>`
1422//! // Used data: ^^^^^^^^^^^^
1423//! }
1424//! #[derive(Deserialize)]
1425//! struct Inner {
1426//! #[serde(rename = "@attr")]
1427//! attr: String, // String calls `deserialize_string` on `value`
1428//! // Used data: ^^^^^
1429//! }
1430//! ```
1431//!
1432//! Comments shows what methods of a [`Deserializer`] called by each struct
1433//! `deserialize` method and which input their seen. **Used data** shows, what
1434//! content is actually used for deserializing. As you see, name of the inner
1435//! `<key>` tag used both as a map key / outer struct field name and as part
1436//! of the inner struct (although _value_ of the tag, i.e. `key` is not used
1437//! by it).
1438//!
1439//!
1440//!
1441//! Enum Representations
1442//! ====================
1443//!
1444//! `quick-xml` represents enums differently in normal fields, `$text` fields and
1445//! `$value` fields. A normal representation is compatible with serde's adjacent
1446//! and internal tags feature -- tag for adjacently and internally tagged enums
1447//! are serialized using [`Serializer::serialize_unit_variant`] and deserialized
1448//! using [`Deserializer::deserialize_enum`].
1449//!
1450//! Use those simple rules to remember, how enum would be represented in XML:
1451//! - In `$value` field the representation is always the same as top-level representation;
1452//! - In `$text` field the representation is always the same as in normal field,
1453//! but surrounding tags with field name are removed;
1454//! - In normal field the representation is always contains a tag with a field name.
1455//!
1456//! Normal enum variant
1457//! -------------------
1458//!
1459//! To model an `xs:choice` XML construct use `$value` field.
1460//! To model a top-level `xs:choice` just use the enum type.
1461//!
1462//! |Kind |Top-level and in `$value` field |In normal field |In `$text` field |
1463//! |-------|-----------------------------------------|---------------------|---------------------|
1464//! |Unit |`<Unit/>` |`<field>Unit</field>`|`Unit` |
1465//! |Newtype|`<Newtype>42</Newtype>` |Err(Custom) [^0] |Err(Custom) [^0] |
1466//! |Tuple |`<Tuple>42</Tuple><Tuple>answer</Tuple>` |Err(Custom) [^0] |Err(Custom) [^0] |
1467//! |Struct |`<Struct><q>42</q><a>answer</a></Struct>`|Err(Custom) [^0] |Err(Custom) [^0] |
1468//!
1469//! `$text` enum variant
1470//! --------------------
1471//!
1472//! |Kind |Top-level and in `$value` field |In normal field |In `$text` field |
1473//! |-------|-----------------------------------------|---------------------|---------------------|
1474//! |Unit |_(empty)_ |`<field/>` |_(empty)_ |
1475//! |Newtype|`42` |Err(Custom) [^0] [^1]|Err(Custom) [^0] [^2]|
1476//! |Tuple |`42 answer` |Err(Custom) [^0] [^3]|Err(Custom) [^0] [^4]|
1477//! |Struct |Err(Custom) [^0] |Err(Custom) [^0] |Err(Custom) [^0] |
1478//!
1479//! [^0]: Error is returned by the deserialized type. In case of derived implementation a `Custom`
1480//! error will be returned, but custom deserialize implementation can successfully deserialize
1481//! value from a string which will be passed to it.
1482//!
1483//! [^1]: If this serialize as `<field>42</field>` then it will be ambiguity during deserialization,
1484//! because it clash with `Unit` representation in normal field.
1485//!
1486//! [^2]: If this serialize as `42` then it will be ambiguity during deserialization,
1487//! because it clash with `Unit` representation in `$text` field.
1488//!
1489//! [^3]: If this serialize as `<field>42 answer</field>` then it will be ambiguity during deserialization,
1490//! because it clash with `Unit` representation in normal field.
1491//!
1492//! [^4]: If this serialize as `42 answer` then it will be ambiguity during deserialization,
1493//! because it clash with `Unit` representation in `$text` field.
1494//!
1495//!
1496//!
1497//! `$text` and `$value` special names
1498//! ==================================
1499//!
1500//! quick-xml supports two special names for fields -- `$text` and `$value`.
1501//! Although they may seem the same, there is a distinction. Two different
1502//! names is required mostly for serialization, because quick-xml should know
1503//! how you want to serialize certain constructs, which could be represented
1504//! through XML in multiple different ways.
1505//!
1506//! The only difference is in how complex types and sequences are serialized.
1507//! If you doubt which one you should select, begin with [`$value`](#value).
1508//!
1509//! If you have both `$text` and `$value` in you struct, then text events will be
1510//! mapped to the `$text` field:
1511//!
1512//! ```
1513//! # use serde::Deserialize;
1514//! # use quick_xml::de::from_str;
1515//! #[derive(Deserialize, PartialEq, Debug)]
1516//! struct TextAndValue {
1517//! #[serde(rename = "$text")]
1518//! text: Option<String>,
1519//!
1520//! #[serde(rename = "$value")]
1521//! value: Option<String>,
1522//! }
1523//!
1524//! let object: TextAndValue = from_str("<AnyName>text <![CDATA[and CDATA]]></AnyName>").unwrap();
1525//! assert_eq!(object, TextAndValue {
1526//! text: Some("text and CDATA".to_string()),
1527//! value: None,
1528//! });
1529//! ```
1530//!
1531//! ## `$text`
1532//! `$text` is used when you want to write your XML as a text or a CDATA content.
1533//! More formally, field with that name represents simple type definition with
1534//! `{variety} = atomic` or `{variety} = union` whose basic members are all atomic,
1535//! as described in the [specification].
1536//!
1537//! As a result, not all types of such fields can be serialized. Only serialization
1538//! of following types are supported:
1539//! - all primitive types (strings, numbers, booleans)
1540//! - unit variants of enumerations (serializes to a name of a variant)
1541//! - newtypes (delegates serialization to inner type)
1542//! - [`Option`] of above (`None` serializes to nothing)
1543//! - sequences (including tuples and tuple variants of enumerations) of above,
1544//! excluding `None` and empty string elements (because it will not be possible
1545//! to deserialize them back). The elements are separated by space(s)
1546//! - unit type `()` and unit structs (serializes to nothing)
1547//!
1548//! Complex types, such as structs and maps, are not supported in this field.
1549//! If you want them, you should use `$value`.
1550//!
1551//! Sequences serialized to a space-delimited string, that is why only certain
1552//! types are allowed in this mode:
1553//!
1554//! ```
1555//! # use serde::{Deserialize, Serialize};
1556//! # use quick_xml::de::from_str;
1557//! # use quick_xml::se::to_string;
1558//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1559//! struct AnyName {
1560//! #[serde(rename = "$text")]
1561//! field: Vec<usize>,
1562//! }
1563//!
1564//! let obj = AnyName { field: vec![1, 2, 3] };
1565//! let xml = to_string(&obj).unwrap();
1566//! assert_eq!(xml, "<AnyName>1 2 3</AnyName>");
1567//!
1568//! let object: AnyName = from_str(&xml).unwrap();
1569//! assert_eq!(object, obj);
1570//! ```
1571//!
1572//! ## `$value`
1573//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1574//!
1575//! NOTE: a name `#content` would better explain the purpose of that field,
1576//! but `$value` is used for compatibility with other XML serde crates, which
1577//! uses that name. This will allow you to switch XML crates more smoothly if required.
1578//! </div>
1579//!
1580//! The representation of primitive types in `$value` does not differ from their
1581//! representation in `$text` fields. The difference is how sequences are serialized
1582//! and deserialized. `$value` serializes each sequence item as a separate XML element.
1583//! How the name of the XML element is chosen depends on the field's type. For
1584//! `enum`s, the variant name is used. For `struct`s, the name of the `struct`
1585//! is used.
1586//!
1587//! During deserialization, if the `$value` field is an enum, then the variant's
1588//! name is matched against. That's **not** the case with structs, however, since
1589//! `serde` does not expose type names of nested fields. This does mean that **any**
1590//! type could be deserialized into a `$value` struct-type field, so long as the
1591//! struct's fields have compatible types (or are captured as text by `String`
1592//! or similar-behaving types). This can be handy when using generic types in fields
1593//! where one knows in advance what to expect. If you do not know what to expect,
1594//! however, prefer an enum with all possible variants.
1595//!
1596//! Unit structs and unit type `()` serialize to nothing and can be deserialized
1597//! from any content.
1598//!
1599//! Serialization and deserialization of `$value` field performed as usual, except
1600//! that name for an XML element will be given by the serialized type, instead of
1601//! field. The latter allow to serialize enumerated types, where variant is encoded
1602//! as a tag name, and, so, represent an XSD `xs:choice` schema by the Rust `enum`.
1603//!
1604//! In the example below, field will be serialized as `<field/>`, because elements
1605//! get their names from the field name. It cannot be deserialized, because `Enum`
1606//! expects elements `<A/>`, `<B/>` or `<C/>`, but `AnyName` looked only for `<field/>`:
1607//!
1608//! ```
1609//! # use serde::{Deserialize, Serialize};
1610//! # use pretty_assertions::assert_eq;
1611//! # #[derive(PartialEq, Debug)]
1612//! #[derive(Deserialize, Serialize)]
1613//! enum Enum { A, B, C }
1614//!
1615//! # #[derive(PartialEq, Debug)]
1616//! #[derive(Deserialize, Serialize)]
1617//! struct AnyName {
1618//! // <field>A</field>, <field>B</field>, or <field>C</field>
1619//! field: Enum,
1620//! }
1621//! # assert_eq!(
1622//! # quick_xml::se::to_string(&AnyName { field: Enum::A }).unwrap(),
1623//! # "<AnyName><field>A</field></AnyName>",
1624//! # );
1625//! # assert_eq!(
1626//! # AnyName { field: Enum::B },
1627//! # quick_xml::de::from_str("<root><field>B</field></root>").unwrap(),
1628//! # );
1629//! ```
1630//!
1631//! If you rename field to `$value`, then `field` would be serialized as `<A/>`,
1632//! `<B/>` or `<C/>`, depending on the its content. It is also possible to
1633//! deserialize it from the same elements:
1634//!
1635//! ```
1636//! # use serde::{Deserialize, Serialize};
1637//! # use pretty_assertions::assert_eq;
1638//! # #[derive(Deserialize, Serialize, PartialEq, Debug)]
1639//! # enum Enum { A, B, C }
1640//! #
1641//! # #[derive(PartialEq, Debug)]
1642//! #[derive(Deserialize, Serialize)]
1643//! struct AnyName {
1644//! // <A/>, <B/> or <C/>
1645//! #[serde(rename = "$value")]
1646//! field: Enum,
1647//! }
1648//! # assert_eq!(
1649//! # quick_xml::se::to_string(&AnyName { field: Enum::A }).unwrap(),
1650//! # "<AnyName><A/></AnyName>",
1651//! # );
1652//! # assert_eq!(
1653//! # AnyName { field: Enum::B },
1654//! # quick_xml::de::from_str("<root><B/></root>").unwrap(),
1655//! # );
1656//! ```
1657//!
1658//! The next example demonstrates how generic types can be used in conjunction
1659//! with `$value`-named fields to allow the reuse of wrapping structs. A common
1660//! example use case for this feature is SOAP messages, which can be commmonly
1661//! found wrapped around `<soapenv:Envelope> ... </soapenv:Envelope>`.
1662//!
1663//! ```rust
1664//! # use pretty_assertions::assert_eq;
1665//! # use quick_xml::de::from_str;
1666//! # use quick_xml::se::to_string;
1667//! # use serde::{Deserialize, Serialize};
1668//! #
1669//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1670//! struct Envelope<T> {
1671//! body: Body<T>,
1672//! }
1673//!
1674//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1675//! struct Body<T> {
1676//! #[serde(rename = "$value")]
1677//! inner: T,
1678//! }
1679//!
1680//! #[derive(Serialize, PartialEq, Debug)]
1681//! struct Example {
1682//! a: i32,
1683//! }
1684//!
1685//! assert_eq!(
1686//! to_string(&Envelope { body: Body { inner: Example { a: 42 } } }).unwrap(),
1687//! // Notice how `inner` is not present in the XML
1688//! "<Envelope><body><Example><a>42</a></Example></body></Envelope>",
1689//! );
1690//!
1691//! #[derive(Deserialize, PartialEq, Debug)]
1692//! struct AnotherExample {
1693//! a: i32,
1694//! }
1695//!
1696//! assert_eq!(
1697//! // Notice that tag the name does nothing for struct in `$value` field
1698//! Envelope { body: Body { inner: AnotherExample { a: 42 } } },
1699//! from_str("<Envelope><body><Example><a>42</a></Example></body></Envelope>").unwrap(),
1700//! );
1701//! ```
1702//!
1703//! ### Primitives and sequences of primitives
1704//!
1705//! Sequences serialized to a list of elements. Note, that types that does not
1706//! produce their own tag (i. e. primitives) will produce [`SeError::Unsupported`]
1707//! if they contains more that one element, because such sequence cannot be
1708//! deserialized to the same value:
1709//!
1710//! ```
1711//! # use serde::{Deserialize, Serialize};
1712//! # use pretty_assertions::assert_eq;
1713//! # use quick_xml::de::from_str;
1714//! # use quick_xml::se::to_string;
1715//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1716//! struct AnyName {
1717//! #[serde(rename = "$value")]
1718//! field: Vec<usize>,
1719//! }
1720//!
1721//! let obj = AnyName { field: vec![1, 2, 3] };
1722//! // If this object were serialized, it would be represented as "<AnyName>123</AnyName>"
1723//! to_string(&obj).unwrap_err();
1724//!
1725//! let object: AnyName = from_str("<AnyName>123</AnyName>").unwrap();
1726//! assert_eq!(object, AnyName { field: vec![123] });
1727//!
1728//! // `1 2 3` is mapped to a single `usize` element
1729//! // It is impossible to deserialize list of primitives to such field
1730//! from_str::<AnyName>("<AnyName>1 2 3</AnyName>").unwrap_err();
1731//! ```
1732//!
1733//! A particular case of that example is a string `$value` field, which probably
1734//! would be a most used example of that attribute:
1735//!
1736//! ```
1737//! # use serde::{Deserialize, Serialize};
1738//! # use pretty_assertions::assert_eq;
1739//! # use quick_xml::de::from_str;
1740//! # use quick_xml::se::to_string;
1741//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1742//! struct AnyName {
1743//! #[serde(rename = "$value")]
1744//! field: String,
1745//! }
1746//!
1747//! let obj = AnyName { field: "content".to_string() };
1748//! let xml = to_string(&obj).unwrap();
1749//! assert_eq!(xml, "<AnyName>content</AnyName>");
1750//! ```
1751//!
1752//! ### Structs and sequences of structs
1753//!
1754//! Note, that structures do not have a serializable name as well (name of the
1755//! type is never used), so it is impossible to serialize non-unit struct or
1756//! sequence of non-unit structs in `$value` field. (sequences of) unit structs
1757//! are serialized as empty string, because units itself serializing
1758//! to nothing:
1759//!
1760//! ```
1761//! # use serde::{Deserialize, Serialize};
1762//! # use pretty_assertions::assert_eq;
1763//! # use quick_xml::de::from_str;
1764//! # use quick_xml::se::to_string;
1765//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1766//! struct Unit;
1767//!
1768//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1769//! struct AnyName {
1770//! // #[serde(default)] is required to deserialization of empty lists
1771//! // This is a general note, not related to $value
1772//! #[serde(rename = "$value", default)]
1773//! field: Vec<Unit>,
1774//! }
1775//!
1776//! let obj = AnyName { field: vec![Unit, Unit, Unit] };
1777//! let xml = to_string(&obj).unwrap();
1778//! assert_eq!(xml, "<AnyName/>");
1779//!
1780//! let object: AnyName = from_str("<AnyName/>").unwrap();
1781//! assert_eq!(object, AnyName { field: vec![] });
1782//!
1783//! let object: AnyName = from_str("<AnyName></AnyName>").unwrap();
1784//! assert_eq!(object, AnyName { field: vec![] });
1785//!
1786//! let object: AnyName = from_str("<AnyName><A/><B/><C/></AnyName>").unwrap();
1787//! assert_eq!(object, AnyName { field: vec![Unit, Unit, Unit] });
1788//! ```
1789//!
1790//! ### Enums and sequences of enums
1791//!
1792//! Enumerations uses the variant name as an element name:
1793//!
1794//! ```
1795//! # use serde::{Deserialize, Serialize};
1796//! # use pretty_assertions::assert_eq;
1797//! # use quick_xml::de::from_str;
1798//! # use quick_xml::se::to_string;
1799//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1800//! struct AnyName {
1801//! #[serde(rename = "$value")]
1802//! field: Vec<Enum>,
1803//! }
1804//!
1805//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1806//! enum Enum { A, B, C }
1807//!
1808//! let obj = AnyName { field: vec![Enum::A, Enum::B, Enum::C] };
1809//! let xml = to_string(&obj).unwrap();
1810//! assert_eq!(
1811//! xml,
1812//! "<AnyName>\
1813//! <A/>\
1814//! <B/>\
1815//! <C/>\
1816//! </AnyName>"
1817//! );
1818//!
1819//! let object: AnyName = from_str(&xml).unwrap();
1820//! assert_eq!(object, obj);
1821//! ```
1822//!
1823//!
1824//!
1825//! Frequently Used Patterns
1826//! ========================
1827//!
1828//! Some XML constructs used so frequent, that it is worth to document the recommended
1829//! way to represent them in the Rust. The sections below describes them.
1830//!
1831//! `<element>` lists
1832//! -----------------
1833//! Many XML formats wrap lists of elements in the additional container,
1834//! although this is not required by the XML rules:
1835//!
1836//! ```xml
1837//! <root>
1838//! <field1/>
1839//! <field2/>
1840//! <list><!-- Container -->
1841//! <element/>
1842//! <element/>
1843//! <element/>
1844//! </list>
1845//! <field3/>
1846//! </root>
1847//! ```
1848//! In this case, there is a great desire to describe this XML in this way:
1849//! ```
1850//! /// Represents <element/>
1851//! type Element = ();
1852//!
1853//! /// Represents <root>...</root>
1854//! struct AnyName {
1855//! // Incorrect
1856//! list: Vec<Element>,
1857//! }
1858//! ```
1859//! This will not work, because potentially `<list>` element can have attributes
1860//! and other elements inside. You should define the struct for the `<list>`
1861//! explicitly, as you do that in the XSD for that XML:
1862//! ```
1863//! /// Represents <element/>
1864//! type Element = ();
1865//!
1866//! /// Represents <root>...</root>
1867//! struct AnyName {
1868//! // Correct
1869//! list: List,
1870//! }
1871//! /// Represents <list>...</list>
1872//! struct List {
1873//! element: Vec<Element>,
1874//! }
1875//! ```
1876//!
1877//! If you want to simplify your API, you could write a simple function for unwrapping
1878//! inner list and apply it via [`deserialize_with`]:
1879//!
1880//! ```
1881//! # use pretty_assertions::assert_eq;
1882//! use quick_xml::de::from_str;
1883//! use serde::{Deserialize, Deserializer};
1884//!
1885//! /// Represents <element/>
1886//! type Element = ();
1887//!
1888//! /// Represents <root>...</root>
1889//! #[derive(Deserialize, Debug, PartialEq)]
1890//! struct AnyName {
1891//! #[serde(deserialize_with = "unwrap_list")]
1892//! list: Vec<Element>,
1893//! }
1894//!
1895//! fn unwrap_list<'de, D>(deserializer: D) -> Result<Vec<Element>, D::Error>
1896//! where
1897//! D: Deserializer<'de>,
1898//! {
1899//! /// Represents <list>...</list>
1900//! #[derive(Deserialize)]
1901//! struct List {
1902//! // default allows empty list
1903//! #[serde(default)]
1904//! element: Vec<Element>,
1905//! }
1906//! Ok(List::deserialize(deserializer)?.element)
1907//! }
1908//!
1909//! assert_eq!(
1910//! AnyName { list: vec![(), (), ()] },
1911//! from_str("
1912//! <root>
1913//! <list>
1914//! <element/>
1915//! <element/>
1916//! <element/>
1917//! </list>
1918//! </root>
1919//! ").unwrap(),
1920//! );
1921//! ```
1922//!
1923//! Instead of writing such functions manually, you also could try <https://lib.rs/crates/serde-query>.
1924//!
1925//! Overlapped (Out-of-Order) Elements
1926//! ----------------------------------
1927//! In the case that the list might contain tags that are overlapped with
1928//! tags that do not correspond to the list (this is a usual case in XML
1929//! documents) like this:
1930//! ```xml
1931//! <any-name>
1932//! <item/>
1933//! <another-item/>
1934//! <item/>
1935//! <item/>
1936//! </any-name>
1937//! ```
1938//! you should enable the [`overlapped-lists`] feature to make it possible
1939//! to deserialize this to:
1940//! ```no_run
1941//! # use serde::Deserialize;
1942//! #[derive(Deserialize)]
1943//! #[serde(rename_all = "kebab-case")]
1944//! struct AnyName {
1945//! item: Vec<()>,
1946//! another_item: (),
1947//! }
1948//! ```
1949//!
1950//!
1951//! Internally Tagged Enums
1952//! -----------------------
1953//! [Tagged enums] are currently not supported because of an issue in the Serde
1954//! design (see [serde#1183] and [quick-xml#586]) and missing optimizations in
1955//! Serde which could be useful for XML parsing ([serde#1495]). This can be worked
1956//! around by manually implementing deserialize with `#[serde(deserialize_with = "func")]`
1957//! or implementing [`Deserialize`], but this can get very tedious very fast for
1958//! files with large amounts of tagged enums. To help with this issue quick-xml
1959//! provides a macro [`impl_deserialize_for_internally_tagged_enum!`]. See the
1960//! macro documentation for details.
1961//!
1962//!
1963//! [`overlapped-lists`]: ../index.html#overlapped-lists
1964//! [specification]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition
1965//! [`deserialize_with`]: https://serde.rs/field-attrs.html#deserialize_with
1966//! [`xsi:nil`]: https://www.w3.org/TR/xmlschema-1/#xsi_nil
1967//! [`Serializer::serialize_unit_variant`]: serde::Serializer::serialize_unit_variant
1968//! [`Deserializer::deserialize_enum`]: serde::Deserializer::deserialize_enum
1969//! [`SeError::Unsupported`]: crate::errors::serialize::SeError::Unsupported
1970//! [Tagged enums]: https://serde.rs/enum-representations.html#internally-tagged
1971//! [serde#1183]: https://github.com/serde-rs/serde/issues/1183
1972//! [serde#1495]: https://github.com/serde-rs/serde/issues/1495
1973//! [quick-xml#586]: https://github.com/tafia/quick-xml/issues/586
1974//! [`impl_deserialize_for_internally_tagged_enum!`]: crate::impl_deserialize_for_internally_tagged_enum
1975
1976macro_rules! forward_to_simple_type {
1977 ($deserialize:ident, $($mut:tt)?) => {
1978 #[inline]
1979 fn $deserialize<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
1980 where
1981 V: Visitor<'de>,
1982 {
1983 SimpleTypeDeserializer::from_text(self.read_string()?).$deserialize(visitor)
1984 }
1985 };
1986}
1987
1988/// Implement deserialization methods for scalar types, such as numbers, strings,
1989/// byte arrays, booleans and identifiers.
1990macro_rules! deserialize_primitives {
1991 ($($mut:tt)?) => {
1992 forward_to_simple_type!(deserialize_i8, $($mut)?);
1993 forward_to_simple_type!(deserialize_i16, $($mut)?);
1994 forward_to_simple_type!(deserialize_i32, $($mut)?);
1995 forward_to_simple_type!(deserialize_i64, $($mut)?);
1996
1997 forward_to_simple_type!(deserialize_u8, $($mut)?);
1998 forward_to_simple_type!(deserialize_u16, $($mut)?);
1999 forward_to_simple_type!(deserialize_u32, $($mut)?);
2000 forward_to_simple_type!(deserialize_u64, $($mut)?);
2001
2002 forward_to_simple_type!(deserialize_i128, $($mut)?);
2003 forward_to_simple_type!(deserialize_u128, $($mut)?);
2004
2005 forward_to_simple_type!(deserialize_f32, $($mut)?);
2006 forward_to_simple_type!(deserialize_f64, $($mut)?);
2007
2008 forward_to_simple_type!(deserialize_bool, $($mut)?);
2009 forward_to_simple_type!(deserialize_char, $($mut)?);
2010
2011 forward_to_simple_type!(deserialize_str, $($mut)?);
2012 forward_to_simple_type!(deserialize_string, $($mut)?);
2013
2014 /// Forwards deserialization to the [`deserialize_any`](#method.deserialize_any).
2015 #[inline]
2016 fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value, DeError>
2017 where
2018 V: Visitor<'de>,
2019 {
2020 self.deserialize_any(visitor)
2021 }
2022
2023 /// Forwards deserialization to the [`deserialize_bytes`](#method.deserialize_bytes).
2024 #[inline]
2025 fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value, DeError>
2026 where
2027 V: Visitor<'de>,
2028 {
2029 self.deserialize_bytes(visitor)
2030 }
2031
2032 /// Representation of the named units the same as [unnamed units](#method.deserialize_unit).
2033 #[inline]
2034 fn deserialize_unit_struct<V>(
2035 self,
2036 _name: &'static str,
2037 visitor: V,
2038 ) -> Result<V::Value, DeError>
2039 where
2040 V: Visitor<'de>,
2041 {
2042 self.deserialize_unit(visitor)
2043 }
2044
2045 /// Representation of tuples the same as [sequences](#method.deserialize_seq).
2046 #[inline]
2047 fn deserialize_tuple<V>(self, _len: usize, visitor: V) -> Result<V::Value, DeError>
2048 where
2049 V: Visitor<'de>,
2050 {
2051 self.deserialize_seq(visitor)
2052 }
2053
2054 /// Representation of named tuples the same as [unnamed tuples](#method.deserialize_tuple).
2055 #[inline]
2056 fn deserialize_tuple_struct<V>(
2057 self,
2058 _name: &'static str,
2059 len: usize,
2060 visitor: V,
2061 ) -> Result<V::Value, DeError>
2062 where
2063 V: Visitor<'de>,
2064 {
2065 self.deserialize_tuple(len, visitor)
2066 }
2067
2068 /// Forwards deserialization to the [`deserialize_struct`](#method.deserialize_struct)
2069 /// with empty name and fields.
2070 #[inline]
2071 fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, DeError>
2072 where
2073 V: Visitor<'de>,
2074 {
2075 self.deserialize_struct("", &[], visitor)
2076 }
2077
2078 /// Identifiers represented as [strings](#method.deserialize_str).
2079 #[inline]
2080 fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value, DeError>
2081 where
2082 V: Visitor<'de>,
2083 {
2084 self.deserialize_str(visitor)
2085 }
2086
2087 /// Forwards deserialization to the [`deserialize_unit`](#method.deserialize_unit).
2088 #[inline]
2089 fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value, DeError>
2090 where
2091 V: Visitor<'de>,
2092 {
2093 self.deserialize_unit(visitor)
2094 }
2095 };
2096}
2097
2098mod attributes;
2099mod key;
2100mod map;
2101mod resolver;
2102mod simple_type;
2103mod text;
2104mod var;
2105
2106pub use self::attributes::AttributesDeserializer;
2107pub use self::resolver::{EntityResolver, PredefinedEntityResolver};
2108pub use self::simple_type::SimpleTypeDeserializer;
2109pub use crate::errors::serialize::DeError;
2110use crate::XmlVersion;
2111
2112use crate::{
2113 de::map::ElementMapAccess,
2114 encoding::Decoder,
2115 errors::Error,
2116 escape::{parse_number, EscapeError},
2117 events::{BytesCData, BytesEnd, BytesRef, BytesStart, BytesText, Event},
2118 name::QName,
2119 reader::NsReader,
2120};
2121use serde::de::{
2122 self, Deserialize, DeserializeOwned, DeserializeSeed, IntoDeserializer, SeqAccess, Visitor,
2123};
2124use std::borrow::Cow;
2125#[cfg(feature = "overlapped-lists")]
2126use std::collections::VecDeque;
2127use std::io::BufRead;
2128use std::mem::replace;
2129#[cfg(feature = "overlapped-lists")]
2130use std::num::NonZeroUsize;
2131use std::ops::{Deref, Range};
2132
2133/// Data represented by a text node or a CDATA node. XML markup is not expected
2134pub(crate) const TEXT_KEY: &str = "$text";
2135/// Data represented by any XML markup inside
2136pub(crate) const VALUE_KEY: &str = "$value";
2137
2138/// A function to check whether the character is a whitespace (blank, new line, carriage return or tab).
2139#[inline]
2140const fn is_non_whitespace(ch: char) -> bool {
2141 !matches!(ch, ' ' | '\r' | '\n' | '\t')
2142}
2143
2144/// Decoded and concatenated content of consequent [`Text`] and [`CData`]
2145/// events. _Consequent_ means that events should follow each other or be
2146/// delimited only by (any count of) [`Comment`] or [`PI`] events.
2147///
2148/// Internally text is stored in `Cow<str>`. Cloning of text is cheap while it
2149/// is borrowed and makes copies of data when it is owned.
2150///
2151/// [`Text`]: Event::Text
2152/// [`CData`]: Event::CData
2153/// [`Comment`]: Event::Comment
2154/// [`PI`]: Event::PI
2155#[derive(Clone, Debug, PartialEq, Eq)]
2156pub struct Text<'a> {
2157 /// Untrimmed text after concatenating content of all
2158 /// [`Text`] and [`CData`] events
2159 ///
2160 /// [`Text`]: Event::Text
2161 /// [`CData`]: Event::CData
2162 text: Cow<'a, str>,
2163 /// A range into `text` which contains data after trimming
2164 content: Range<usize>,
2165}
2166
2167impl<'a> Text<'a> {
2168 fn new(text: Cow<'a, str>) -> Self {
2169 let start = text.find(is_non_whitespace).unwrap_or(0);
2170 let end = text.rfind(is_non_whitespace).map_or(0, |i| i + 1);
2171
2172 let content = if start >= end { 0..0 } else { start..end };
2173
2174 Self { text, content }
2175 }
2176
2177 /// Returns text without leading and trailing whitespaces as [defined] by XML specification.
2178 ///
2179 /// If you want to only check if text contains only whitespaces, use [`is_blank`](Self::is_blank),
2180 /// which will not allocate.
2181 ///
2182 /// # Example
2183 ///
2184 /// ```
2185 /// # use quick_xml::de::Text;
2186 /// # use pretty_assertions::assert_eq;
2187 /// #
2188 /// let text = Text::from("");
2189 /// assert_eq!(text.trimmed(), "");
2190 ///
2191 /// let text = Text::from(" \r\n\t ");
2192 /// assert_eq!(text.trimmed(), "");
2193 ///
2194 /// let text = Text::from(" some useful text ");
2195 /// assert_eq!(text.trimmed(), "some useful text");
2196 /// ```
2197 ///
2198 /// [defined]: https://www.w3.org/TR/xml11/#NT-S
2199 pub fn trimmed(&self) -> Cow<'a, str> {
2200 match self.text {
2201 Cow::Borrowed(text) => Cow::Borrowed(&text[self.content.clone()]),
2202 Cow::Owned(ref text) => Cow::Owned(text[self.content.clone()].to_string()),
2203 }
2204 }
2205
2206 /// Returns `true` if text is empty or contains only whitespaces as [defined] by XML specification.
2207 ///
2208 /// # Example
2209 ///
2210 /// ```
2211 /// # use quick_xml::de::Text;
2212 /// # use pretty_assertions::assert_eq;
2213 /// #
2214 /// let text = Text::from("");
2215 /// assert_eq!(text.is_blank(), true);
2216 ///
2217 /// let text = Text::from(" \r\n\t ");
2218 /// assert_eq!(text.is_blank(), true);
2219 ///
2220 /// let text = Text::from(" some useful text ");
2221 /// assert_eq!(text.is_blank(), false);
2222 /// ```
2223 ///
2224 /// [defined]: https://www.w3.org/TR/xml11/#NT-S
2225 pub fn is_blank(&self) -> bool {
2226 self.content.is_empty()
2227 }
2228}
2229
2230impl<'a> Deref for Text<'a> {
2231 type Target = str;
2232
2233 #[inline]
2234 fn deref(&self) -> &Self::Target {
2235 self.text.deref()
2236 }
2237}
2238
2239impl<'a> From<&'a str> for Text<'a> {
2240 #[inline]
2241 fn from(text: &'a str) -> Self {
2242 Self::new(Cow::Borrowed(text))
2243 }
2244}
2245
2246impl<'a> From<String> for Text<'a> {
2247 #[inline]
2248 fn from(text: String) -> Self {
2249 Self::new(Cow::Owned(text))
2250 }
2251}
2252
2253impl<'a> From<Cow<'a, str>> for Text<'a> {
2254 #[inline]
2255 fn from(text: Cow<'a, str>) -> Self {
2256 Self::new(text)
2257 }
2258}
2259
2260////////////////////////////////////////////////////////////////////////////////////////////////////
2261
2262/// Simplified event which contains only these variants that used by deserializer
2263#[derive(Clone, Debug, PartialEq, Eq)]
2264pub enum DeEvent<'a> {
2265 /// Start tag (with attributes) `<tag attr="value">`.
2266 Start(BytesStart<'a>),
2267 /// End tag `</tag>`.
2268 End(BytesEnd<'a>),
2269 /// Decoded and concatenated content of consequent [`Text`] and [`CData`]
2270 /// events. _Consequent_ means that events should follow each other or be
2271 /// delimited only by (any count of) [`Comment`] or [`PI`] events.
2272 ///
2273 /// [`Text`]: Event::Text
2274 /// [`CData`]: Event::CData
2275 /// [`Comment`]: Event::Comment
2276 /// [`PI`]: Event::PI
2277 Text(Text<'a>),
2278 /// End of XML document.
2279 Eof,
2280}
2281
2282////////////////////////////////////////////////////////////////////////////////////////////////////
2283
2284/// Simplified event which contains only these variants that used by deserializer,
2285/// but [`Text`] events not yet fully processed.
2286///
2287/// [`Text`] events should be trimmed if they does not surrounded by the other
2288/// [`Text`] or [`CData`] events. This event contains intermediate state of [`Text`]
2289/// event, where they are trimmed from the start, but not from the end. To trim
2290/// end spaces we should lookahead by one deserializer event (i. e. skip all
2291/// comments and processing instructions).
2292///
2293/// [`Text`]: Event::Text
2294/// [`CData`]: Event::CData
2295#[derive(Clone, Debug, PartialEq, Eq)]
2296pub enum PayloadEvent<'a> {
2297 /// Start tag (with attributes) `<tag attr="value">`.
2298 Start(BytesStart<'a>),
2299 /// End tag `</tag>`.
2300 End(BytesEnd<'a>),
2301 /// Escaped character data between tags.
2302 Text(BytesText<'a>),
2303 /// Unescaped character data stored in `<![CDATA[...]]>`.
2304 CData(BytesCData<'a>),
2305 /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
2306 DocType(BytesText<'a>),
2307 /// Reference `&ref;` in the textual data.
2308 GeneralRef(BytesRef<'a>),
2309 /// End of XML document.
2310 Eof,
2311}
2312
2313impl<'a> PayloadEvent<'a> {
2314 /// Ensures that all data is owned to extend the object's lifetime if necessary.
2315 #[inline]
2316 fn into_owned(self) -> PayloadEvent<'static> {
2317 match self {
2318 PayloadEvent::Start(e) => PayloadEvent::Start(e.into_owned()),
2319 PayloadEvent::End(e) => PayloadEvent::End(e.into_owned()),
2320 PayloadEvent::Text(e) => PayloadEvent::Text(e.into_owned()),
2321 PayloadEvent::CData(e) => PayloadEvent::CData(e.into_owned()),
2322 PayloadEvent::DocType(e) => PayloadEvent::DocType(e.into_owned()),
2323 PayloadEvent::GeneralRef(e) => PayloadEvent::GeneralRef(e.into_owned()),
2324 PayloadEvent::Eof => PayloadEvent::Eof,
2325 }
2326 }
2327}
2328
2329/// An intermediate reader that consumes [`PayloadEvent`]s and produces final [`DeEvent`]s.
2330/// [`PayloadEvent::Text`] events, that followed by any event except
2331/// [`PayloadEvent::Text`] or [`PayloadEvent::CData`], are trimmed from the end.
2332struct XmlReader<'i, R: XmlRead<'i>, E: EntityResolver = PredefinedEntityResolver> {
2333 /// A source of low-level XML events
2334 reader: R,
2335 /// Intermediate event, that could be returned by the next call to `next()`.
2336 /// If that is the `Text` event then leading spaces already trimmed, but
2337 /// trailing spaces is not. Before the event will be returned, trimming of
2338 /// the spaces could be necessary
2339 lookahead: Result<PayloadEvent<'i>, DeError>,
2340
2341 /// Used to resolve unknown entities that would otherwise cause the parser
2342 /// to return an [`EscapeError::UnrecognizedEntity`] error.
2343 ///
2344 /// [`EscapeError::UnrecognizedEntity`]: crate::escape::EscapeError::UnrecognizedEntity
2345 entity_resolver: E,
2346}
2347
2348impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
2349 fn new(mut reader: R, entity_resolver: E) -> Self {
2350 // Lookahead by one event immediately, so we do not need to check in the
2351 // loop if we need lookahead or not
2352 let lookahead = reader.next();
2353
2354 Self {
2355 reader,
2356 lookahead,
2357 entity_resolver,
2358 }
2359 }
2360
2361 /// Returns `true` if all events was consumed
2362 const fn is_empty(&self) -> bool {
2363 matches!(self.lookahead, Ok(PayloadEvent::Eof))
2364 }
2365
2366 /// Read next event and put it in lookahead, return the current lookahead
2367 #[inline(always)]
2368 fn next_impl(&mut self) -> Result<PayloadEvent<'i>, DeError> {
2369 replace(&mut self.lookahead, self.reader.next())
2370 }
2371
2372 /// Returns `true` when next event is not a text event in any form.
2373 #[inline(always)]
2374 const fn current_event_is_last_text(&self) -> bool {
2375 // If next event is a text or CDATA, we should not trim trailing spaces
2376 !matches!(
2377 self.lookahead,
2378 Ok(PayloadEvent::Text(_)) | Ok(PayloadEvent::CData(_) | PayloadEvent::GeneralRef(_))
2379 )
2380 }
2381
2382 /// Read all consequent [`Text`] and [`CData`] events until non-text event
2383 /// occurs. Content of all events would be appended to `result` and returned
2384 /// as [`DeEvent::Text`].
2385 ///
2386 /// [`Text`]: PayloadEvent::Text
2387 /// [`CData`]: PayloadEvent::CData
2388 fn drain_text(&mut self, mut result: Cow<'i, str>) -> Result<DeEvent<'i>, DeError> {
2389 loop {
2390 if self.current_event_is_last_text() {
2391 break;
2392 }
2393
2394 match self.next_impl()? {
2395 PayloadEvent::Text(e) => result
2396 .to_mut()
2397 .push_str(&e.xml_content(self.reader.xml_version())?),
2398 PayloadEvent::CData(e) => result
2399 .to_mut()
2400 .push_str(&e.xml_content(self.reader.xml_version())?),
2401 PayloadEvent::GeneralRef(e) => self.resolve_reference(result.to_mut(), e)?,
2402
2403 // SAFETY: current_event_is_last_text checks that event is Text, CData or GeneralRef
2404 _ => unreachable!("Only `Text`, `CData` or `GeneralRef` events can come here"),
2405 }
2406 }
2407 Ok(DeEvent::Text(Text::new(result)))
2408 }
2409
2410 /// Return an input-borrowing event.
2411 fn next(&mut self) -> Result<DeEvent<'i>, DeError> {
2412 loop {
2413 return match self.next_impl()? {
2414 PayloadEvent::Start(e) => Ok(DeEvent::Start(e)),
2415 PayloadEvent::End(e) => Ok(DeEvent::End(e)),
2416 PayloadEvent::Text(e) => self.drain_text(e.xml_content(self.reader.xml_version())?),
2417 PayloadEvent::CData(e) => {
2418 self.drain_text(e.xml_content(self.reader.xml_version())?)
2419 }
2420 PayloadEvent::DocType(e) => {
2421 self.entity_resolver
2422 .capture(e)
2423 .map_err(|err| DeError::Custom(format!("cannot parse DTD: {}", err)))?;
2424 continue;
2425 }
2426 PayloadEvent::GeneralRef(e) => {
2427 let mut text = String::new();
2428 self.resolve_reference(&mut text, e)?;
2429 self.drain_text(text.into())
2430 }
2431 PayloadEvent::Eof => Ok(DeEvent::Eof),
2432 };
2433 }
2434 }
2435
2436 fn resolve_reference(&mut self, result: &mut String, event: BytesRef) -> Result<(), DeError> {
2437 let len = event.len();
2438 let reference = self.decoder().decode(&event)?;
2439
2440 if let Some(num) = reference.strip_prefix('#') {
2441 let codepoint = parse_number(num).map_err(EscapeError::InvalidCharRef)?;
2442 result.push_str(codepoint.encode_utf8(&mut [0u8; 4]));
2443 return Ok(());
2444 }
2445 if let Some(value) = self.entity_resolver.resolve(reference.as_ref()) {
2446 result.push_str(value);
2447 return Ok(());
2448 }
2449 Err(EscapeError::UnrecognizedEntity(0..len, reference.to_string()).into())
2450 }
2451
2452 #[inline]
2453 fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
2454 match self.lookahead {
2455 // We pre-read event with the same name that is required to be skipped.
2456 // First call of `read_to_end` will end out pre-read event, the second
2457 // will consume other events
2458 Ok(PayloadEvent::Start(ref e)) if e.name() == name => {
2459 let result1 = self.reader.read_to_end(name);
2460 let result2 = self.reader.read_to_end(name);
2461
2462 // In case of error `next_impl` returns `Eof`
2463 let _ = self.next_impl();
2464 result1?;
2465 result2?;
2466 }
2467 // We pre-read event with the same name that is required to be skipped.
2468 // Because this is end event, we already consume the whole tree, so
2469 // nothing to do, just update lookahead
2470 Ok(PayloadEvent::End(ref e)) if e.name() == name => {
2471 let _ = self.next_impl();
2472 }
2473 Ok(_) => {
2474 let result = self.reader.read_to_end(name);
2475
2476 // In case of error `next_impl` returns `Eof`
2477 let _ = self.next_impl();
2478 result?;
2479 }
2480 // Read next lookahead event, unpack error from the current lookahead
2481 Err(_) => {
2482 self.next_impl()?;
2483 }
2484 }
2485 Ok(())
2486 }
2487
2488 #[inline]
2489 fn decoder(&self) -> Decoder {
2490 self.reader.decoder()
2491 }
2492}
2493
2494////////////////////////////////////////////////////////////////////////////////////////////////////
2495
2496/// Deserialize an instance of type `T` from a string of XML text.
2497pub fn from_str<'de, T>(s: &'de str) -> Result<T, DeError>
2498where
2499 T: Deserialize<'de>,
2500{
2501 let mut de = Deserializer::from_str(s);
2502 T::deserialize(&mut de)
2503}
2504
2505/// Deserialize from a reader. This method will do internal copies of data
2506/// read from `reader`. If you want have a `&str` input and want to borrow
2507/// as much as possible, use [`from_str`].
2508pub fn from_reader<R, T>(reader: R) -> Result<T, DeError>
2509where
2510 R: BufRead,
2511 T: DeserializeOwned,
2512{
2513 let mut de = Deserializer::from_reader(reader);
2514 T::deserialize(&mut de)
2515}
2516
2517////////////////////////////////////////////////////////////////////////////////////////////////////
2518
2519/// A structure that deserializes XML into Rust values.
2520pub struct Deserializer<'de, R, E: EntityResolver = PredefinedEntityResolver>
2521where
2522 R: XmlRead<'de>,
2523{
2524 /// An XML reader that streams events into this deserializer
2525 reader: XmlReader<'de, R, E>,
2526
2527 /// When deserializing sequences sometimes we have to skip unwanted events.
2528 /// That events should be stored and then replayed. This is a replay buffer,
2529 /// that streams events while not empty. When it exhausted, events will
2530 /// requested from [`Self::reader`].
2531 #[cfg(feature = "overlapped-lists")]
2532 read: VecDeque<DeEvent<'de>>,
2533 /// When deserializing sequences sometimes we have to skip events, because XML
2534 /// is tolerant to elements order and even if in the XSD order is strictly
2535 /// specified (using `xs:sequence`) most of XML parsers allows order violations.
2536 /// That means, that elements, forming a sequence, could be overlapped with
2537 /// other elements, do not related to that sequence.
2538 ///
2539 /// In order to support this, deserializer will scan events and skip unwanted
2540 /// events, store them here. After call [`Self::start_replay()`] all events
2541 /// moved from this to [`Self::read`].
2542 #[cfg(feature = "overlapped-lists")]
2543 write: VecDeque<DeEvent<'de>>,
2544 /// Maximum number of events that can be skipped when processing sequences
2545 /// that occur out-of-order. This field is used to prevent potential
2546 /// denial-of-service (DoS) attacks which could cause infinite memory
2547 /// consumption when parsing a very large amount of XML into a sequence field.
2548 #[cfg(feature = "overlapped-lists")]
2549 limit: Option<NonZeroUsize>,
2550
2551 #[cfg(not(feature = "overlapped-lists"))]
2552 peek: Option<DeEvent<'de>>,
2553
2554 /// Buffer to store attribute name as a field name exposed to serde consumers
2555 key_buf: String,
2556}
2557
2558impl<'de, R, E> Deserializer<'de, R, E>
2559where
2560 R: XmlRead<'de>,
2561 E: EntityResolver,
2562{
2563 /// Create an XML deserializer from one of the possible quick_xml input sources.
2564 ///
2565 /// Typically it is more convenient to use one of these methods instead:
2566 ///
2567 /// - [`Deserializer::from_str`]
2568 /// - [`Deserializer::from_reader`]
2569 fn new(reader: R, entity_resolver: E) -> Self {
2570 Self {
2571 reader: XmlReader::new(reader, entity_resolver),
2572
2573 #[cfg(feature = "overlapped-lists")]
2574 read: VecDeque::new(),
2575 #[cfg(feature = "overlapped-lists")]
2576 write: VecDeque::new(),
2577 #[cfg(feature = "overlapped-lists")]
2578 limit: None,
2579
2580 #[cfg(not(feature = "overlapped-lists"))]
2581 peek: None,
2582
2583 key_buf: String::new(),
2584 }
2585 }
2586
2587 /// Returns `true` if all events was consumed.
2588 pub fn is_empty(&self) -> bool {
2589 #[cfg(feature = "overlapped-lists")]
2590 let event = self.read.front();
2591
2592 #[cfg(not(feature = "overlapped-lists"))]
2593 let event = self.peek.as_ref();
2594
2595 match event {
2596 None | Some(DeEvent::Eof) => self.reader.is_empty(),
2597 _ => false,
2598 }
2599 }
2600
2601 /// Returns the underlying XML reader.
2602 ///
2603 /// ```
2604 /// # use pretty_assertions::assert_eq;
2605 /// use serde::Deserialize;
2606 /// use quick_xml::de::Deserializer;
2607 /// use quick_xml::NsReader;
2608 ///
2609 /// #[derive(Deserialize)]
2610 /// struct SomeStruct {
2611 /// field1: String,
2612 /// field2: String,
2613 /// }
2614 ///
2615 /// // Try to deserialize from broken XML
2616 /// let mut de = Deserializer::from_str(
2617 /// "<SomeStruct><field1><field2></SomeStruct>"
2618 /// // 0 ^= 28 ^= 41
2619 /// );
2620 ///
2621 /// let err = SomeStruct::deserialize(&mut de);
2622 /// assert!(err.is_err());
2623 ///
2624 /// let reader: &NsReader<_> = de.get_ref().get_ref();
2625 ///
2626 /// assert_eq!(reader.error_position(), 28);
2627 /// assert_eq!(reader.buffer_position(), 41);
2628 /// ```
2629 pub const fn get_ref(&self) -> &R {
2630 &self.reader.reader
2631 }
2632
2633 /// Set the maximum number of events that could be skipped during deserialization
2634 /// of sequences.
2635 ///
2636 /// If `<element>` contains more than specified nested elements, `$text` or
2637 /// CDATA nodes, then [`DeError::TooManyEvents`] will be returned during
2638 /// deserialization of sequence field (any type that uses [`deserialize_seq`]
2639 /// for the deserialization, for example, `Vec<T>`).
2640 ///
2641 /// This method can be used to prevent a [DoS] attack and infinite memory
2642 /// consumption when parsing a very large XML to a sequence field.
2643 ///
2644 /// It is strongly recommended to set limit to some value when you parse data
2645 /// from untrusted sources. You should choose a value that your typical XMLs
2646 /// can have _between_ different elements that corresponds to the same sequence.
2647 ///
2648 /// # Examples
2649 ///
2650 /// Let's imagine, that we deserialize such structure:
2651 /// ```
2652 /// struct List {
2653 /// item: Vec<()>,
2654 /// }
2655 /// ```
2656 ///
2657 /// The XML that we try to parse look like this:
2658 /// ```xml
2659 /// <any-name>
2660 /// <item/>
2661 /// <!-- Bufferization starts at this point -->
2662 /// <another-item>
2663 /// <some-element>with text</some-element>
2664 /// <yet-another-element/>
2665 /// </another-item>
2666 /// <!-- Buffer will be emptied at this point; 7 events were buffered -->
2667 /// <item/>
2668 /// <!-- There is nothing to buffer, because elements follows each other -->
2669 /// <item/>
2670 /// </any-name>
2671 /// ```
2672 ///
2673 /// There, when we deserialize the `item` field, we need to buffer 7 events,
2674 /// before we can deserialize the second `<item/>`:
2675 ///
2676 /// - `<another-item>`
2677 /// - `<some-element>`
2678 /// - `$text(with text)`
2679 /// - `</some-element>`
2680 /// - `<yet-another-element/>` (virtual start event)
2681 /// - `<yet-another-element/>` (virtual end event)
2682 /// - `</another-item>`
2683 ///
2684 /// Note, that `<yet-another-element/>` internally represented as 2 events:
2685 /// one for the start tag and one for the end tag. In the future this can be
2686 /// eliminated, but for now we use [auto-expanding feature] of a reader,
2687 /// because this simplifies deserializer code.
2688 ///
2689 /// [`deserialize_seq`]: serde::Deserializer::deserialize_seq
2690 /// [DoS]: https://en.wikipedia.org/wiki/Denial-of-service_attack
2691 /// [auto-expanding feature]: crate::reader::Config::expand_empty_elements
2692 #[cfg(feature = "overlapped-lists")]
2693 pub fn event_buffer_size(&mut self, limit: Option<NonZeroUsize>) -> &mut Self {
2694 self.limit = limit;
2695 self
2696 }
2697
2698 #[cfg(feature = "overlapped-lists")]
2699 fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> {
2700 if self.read.is_empty() {
2701 self.read.push_front(self.reader.next()?);
2702 }
2703 if let Some(event) = self.read.front() {
2704 return Ok(event);
2705 }
2706 // SAFETY: `self.read` was filled in the code above.
2707 // NOTE: Can be replaced with `unsafe { std::hint::unreachable_unchecked() }`
2708 // if unsafe code will be allowed
2709 unreachable!()
2710 }
2711 #[cfg(not(feature = "overlapped-lists"))]
2712 fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> {
2713 match &mut self.peek {
2714 Some(event) => Ok(event),
2715 empty_peek @ None => Ok(empty_peek.insert(self.reader.next()?)),
2716 }
2717 }
2718
2719 #[inline]
2720 fn last_peeked(&self) -> &DeEvent<'de> {
2721 #[cfg(feature = "overlapped-lists")]
2722 {
2723 self.read
2724 .front()
2725 .expect("`Deserializer::peek()` should be called")
2726 }
2727 #[cfg(not(feature = "overlapped-lists"))]
2728 {
2729 self.peek
2730 .as_ref()
2731 .expect("`Deserializer::peek()` should be called")
2732 }
2733 }
2734
2735 fn next(&mut self) -> Result<DeEvent<'de>, DeError> {
2736 // Replay skipped or peeked events
2737 #[cfg(feature = "overlapped-lists")]
2738 if let Some(event) = self.read.pop_front() {
2739 return Ok(event);
2740 }
2741 #[cfg(not(feature = "overlapped-lists"))]
2742 if let Some(e) = self.peek.take() {
2743 return Ok(e);
2744 }
2745 self.reader.next()
2746 }
2747
2748 fn skip_whitespaces(&mut self) -> Result<(), DeError> {
2749 loop {
2750 match self.peek()? {
2751 DeEvent::Text(e) if e.is_blank() => {
2752 self.next()?;
2753 }
2754 _ => break,
2755 }
2756 }
2757 Ok(())
2758 }
2759
2760 /// Returns the mark after which all events, skipped by [`Self::skip()`] call,
2761 /// should be replayed after calling [`Self::start_replay()`].
2762 #[cfg(feature = "overlapped-lists")]
2763 #[inline]
2764 #[must_use = "returned checkpoint should be used in `start_replay`"]
2765 fn skip_checkpoint(&self) -> usize {
2766 self.write.len()
2767 }
2768
2769 /// Extracts XML tree of events from and stores them in the skipped events
2770 /// buffer from which they can be retrieved later. You MUST call
2771 /// [`Self::start_replay()`] after calling this to give access to the skipped
2772 /// events and release internal buffers.
2773 #[cfg(feature = "overlapped-lists")]
2774 fn skip(&mut self) -> Result<(), DeError> {
2775 let event = self.next()?;
2776 self.skip_event(event)?;
2777 // Skip all subtree, if we skip a start event
2778 if let Some(DeEvent::Start(e)) = self.write.back() {
2779 let end = e.name().as_ref().to_owned();
2780 let mut depth = 0;
2781 loop {
2782 let event = self.next()?;
2783 match event {
2784 DeEvent::Start(ref e) if e.name().as_ref() == end => {
2785 self.skip_event(event)?;
2786 depth += 1;
2787 }
2788 DeEvent::End(ref e) if e.name().as_ref() == end => {
2789 self.skip_event(event)?;
2790 if depth == 0 {
2791 break;
2792 }
2793 depth -= 1;
2794 }
2795 DeEvent::Eof => {
2796 self.skip_event(event)?;
2797 break;
2798 }
2799 _ => self.skip_event(event)?,
2800 }
2801 }
2802 }
2803 Ok(())
2804 }
2805
2806 #[cfg(feature = "overlapped-lists")]
2807 #[inline]
2808 fn skip_event(&mut self, event: DeEvent<'de>) -> Result<(), DeError> {
2809 if let Some(max) = self.limit {
2810 if self.write.len() >= max.get() {
2811 return Err(DeError::TooManyEvents(max));
2812 }
2813 }
2814 self.write.push_back(event);
2815 Ok(())
2816 }
2817
2818 /// Moves buffered events, skipped after given `checkpoint` from [`Self::write`]
2819 /// skip buffer to [`Self::read`] buffer.
2820 ///
2821 /// After calling this method, [`Self::peek()`] and [`Self::next()`] starts
2822 /// return events that was skipped previously by calling [`Self::skip()`],
2823 /// and only when all that events will be consumed, the deserializer starts
2824 /// to drain events from underlying reader.
2825 ///
2826 /// This method MUST be called if any number of [`Self::skip()`] was called
2827 /// after [`Self::new()`] or `start_replay()` or you'll lost events.
2828 #[cfg(feature = "overlapped-lists")]
2829 fn start_replay(&mut self, checkpoint: usize) {
2830 if checkpoint == 0 {
2831 self.write.append(&mut self.read);
2832 std::mem::swap(&mut self.read, &mut self.write);
2833 } else {
2834 let mut read = self.write.split_off(checkpoint);
2835 read.append(&mut self.read);
2836 self.read = read;
2837 }
2838 }
2839
2840 #[inline]
2841 fn read_string(&mut self) -> Result<Cow<'de, str>, DeError> {
2842 self.read_string_impl(true)
2843 }
2844
2845 /// Consumes consequent [`Text`] and [`CData`] (both a referred below as a _text_)
2846 /// events, merge them into one string. If there are no such events, returns
2847 /// an empty string.
2848 ///
2849 /// If `allow_start` is `false`, then only text events are consumed, for other
2850 /// events an error is returned (see table below).
2851 ///
2852 /// If `allow_start` is `true`, then two or three events are expected:
2853 /// - [`DeEvent::Start`];
2854 /// - _(optional)_ [`DeEvent::Text`] which content is returned;
2855 /// - [`DeEvent::End`]. If text event was missed, an empty string is returned.
2856 ///
2857 /// Corresponding events are consumed.
2858 ///
2859 /// # Handling events
2860 ///
2861 /// The table below shows how events is handled by this method:
2862 ///
2863 /// |Event |XML |Handling
2864 /// |------------------|---------------------------|----------------------------------------
2865 /// |[`DeEvent::Start`]|`<tag>...</tag>` |if `allow_start == true`, result determined by the second table, otherwise emits [`UnexpectedStart("tag")`](DeError::UnexpectedStart)
2866 /// |[`DeEvent::End`] |`</any-tag>` |This is impossible situation, the method will panic if it happens
2867 /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Returns event content unchanged
2868 /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
2869 ///
2870 /// Second event, consumed if [`DeEvent::Start`] was received and `allow_start == true`:
2871 ///
2872 /// |Event |XML |Handling
2873 /// |------------------|---------------------------|----------------------------------------------------------------------------------
2874 /// |[`DeEvent::Start`]|`<any-tag>...</any-tag>` |Emits [`UnexpectedStart("any-tag")`](DeError::UnexpectedStart)
2875 /// |[`DeEvent::End`] |`</tag>` |Returns an empty slice. The reader guarantee that tag will match the open one
2876 /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Returns event content unchanged, expects the `</tag>` after that
2877 /// |[`DeEvent::Eof`] | |Emits [`InvalidXml(IllFormed(MissingEndTag))`](DeError::InvalidXml)
2878 ///
2879 /// [`Text`]: Event::Text
2880 /// [`CData`]: Event::CData
2881 fn read_string_impl(&mut self, allow_start: bool) -> Result<Cow<'de, str>, DeError> {
2882 match self.next()? {
2883 // Reached by doc tests only: this file, lines 979 and 996
2884 DeEvent::Text(e) => Ok(e.text),
2885 // allow one nested level
2886 // Reached by trivial::{...}::{field, field_nested, field_tag_after, field_tag_before, nested, tag_after, tag_before, wrapped}
2887 DeEvent::Start(e) if allow_start => self.read_text(e.name()),
2888 // TODO: not reached by any tests
2889 DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
2890 // SAFETY: The reader is guaranteed that we don't have unmatched tags
2891 // If we here, then our deserializer has a bug
2892 DeEvent::End(e) => unreachable!("{:?}", e),
2893 // Reached by trivial::{empty_doc, only_comment}
2894 DeEvent::Eof => Err(DeError::UnexpectedEof),
2895 }
2896 }
2897 /// Consumes one [`DeEvent::Text`] event and ensures that it is followed by the
2898 /// [`DeEvent::End`] event.
2899 ///
2900 /// # Parameters
2901 /// - `name`: name of a tag opened before reading text. The corresponding end tag
2902 /// should present in input just after the text
2903 fn read_text(&mut self, name: QName) -> Result<Cow<'de, str>, DeError> {
2904 match self.next()? {
2905 DeEvent::Text(e) => match self.next()? {
2906 // The matching tag name is guaranteed by the reader
2907 // Reached by trivial::{...}::{field, wrapped}
2908 DeEvent::End(_) => Ok(e.text),
2909 // SAFETY: Cannot be two consequent Text events, they would be merged into one
2910 DeEvent::Text(_) => unreachable!(),
2911 // Reached by trivial::{...}::{field_tag_after, tag_after}
2912 DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
2913 // Reached by struct_::non_closed::elements_child
2914 DeEvent::Eof => Err(Error::missed_end(name, self.reader.decoder()).into()),
2915 },
2916 // We can get End event in case of `<tag></tag>` or `<tag/>` input
2917 // Return empty text in that case
2918 // The matching tag name is guaranteed by the reader
2919 // Reached by {...}::xs_list::empty
2920 DeEvent::End(_) => Ok("".into()),
2921 // Reached by trivial::{...}::{field_nested, field_tag_before, nested, tag_before}
2922 DeEvent::Start(s) => Err(DeError::UnexpectedStart(s.name().as_ref().to_owned())),
2923 // Reached by struct_::non_closed::elements_child
2924 DeEvent::Eof => Err(Error::missed_end(name, self.reader.decoder()).into()),
2925 }
2926 }
2927
2928 /// Drops all events until event with [name](BytesEnd::name()) `name` won't be
2929 /// dropped. This method should be called after [`Self::next()`]
2930 #[cfg(feature = "overlapped-lists")]
2931 fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
2932 let mut depth = 0;
2933 loop {
2934 match self.read.pop_front() {
2935 Some(DeEvent::Start(e)) if e.name() == name => {
2936 depth += 1;
2937 }
2938 Some(DeEvent::End(e)) if e.name() == name => {
2939 if depth == 0 {
2940 break;
2941 }
2942 depth -= 1;
2943 }
2944
2945 // Drop all other skipped events
2946 Some(_) => continue,
2947
2948 // If we do not have skipped events, use effective reading that will
2949 // not allocate memory for events
2950 None => {
2951 // We should close all opened tags, because we could buffer
2952 // Start events, but not the corresponding End events. So we
2953 // keep reading events until we exit all nested tags.
2954 // `read_to_end()` will return an error if an Eof was encountered
2955 // preliminary (in case of malformed XML).
2956 //
2957 // <tag><tag></tag></tag>
2958 // ^^^^^^^^^^ - buffered in `self.read`, when `self.read_to_end()` is called, depth = 2
2959 // ^^^^^^ - read by the first call of `self.reader.read_to_end()`
2960 // ^^^^^^ - read by the second call of `self.reader.read_to_end()`
2961 loop {
2962 self.reader.read_to_end(name)?;
2963 if depth == 0 {
2964 break;
2965 }
2966 depth -= 1;
2967 }
2968 break;
2969 }
2970 }
2971 }
2972 Ok(())
2973 }
2974 #[cfg(not(feature = "overlapped-lists"))]
2975 fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
2976 // First one might be in self.peek
2977 match self.next()? {
2978 DeEvent::Start(e) => self.reader.read_to_end(e.name())?,
2979 DeEvent::End(e) if e.name() == name => return Ok(()),
2980 _ => (),
2981 }
2982 self.reader.read_to_end(name)
2983 }
2984
2985 fn skip_next_tree(&mut self) -> Result<(), DeError> {
2986 let DeEvent::Start(start) = self.next()? else {
2987 unreachable!("Only call this if the next event is a start event")
2988 };
2989 let name = start.name();
2990 self.read_to_end(name)
2991 }
2992
2993 /// Method for testing Deserializer implementation. Checks that all events was consumed during
2994 /// deserialization. Panics if the next event will not be [`DeEvent::Eof`].
2995 #[doc(hidden)]
2996 #[track_caller]
2997 pub fn check_eof_reached(&mut self) {
2998 // Deserializer may not consume trailing spaces, that is normal
2999 self.skip_whitespaces().expect("cannot skip whitespaces");
3000 let event = self.peek().expect("cannot peek event");
3001 assert_eq!(
3002 *event,
3003 DeEvent::Eof,
3004 "the whole XML document should be consumed, expected `Eof`",
3005 );
3006 }
3007}
3008
3009impl<'de> Deserializer<'de, SliceReader<'de>> {
3010 /// Create a new deserializer that will borrow data from the specified string.
3011 ///
3012 /// Deserializer created with this method will not resolve custom entities.
3013 #[allow(clippy::should_implement_trait)]
3014 pub fn from_str(source: &'de str) -> Self {
3015 Self::from_str_with_resolver(source, PredefinedEntityResolver)
3016 }
3017
3018 /// Create a new deserializer that will borrow data from the specified preconfigured
3019 /// reader.
3020 ///
3021 /// Deserializer created with this method will not resolve custom entities.
3022 ///
3023 /// Note, that config option [`Config::expand_empty_elements`] will be set to `true`.
3024 ///
3025 /// # Example
3026 ///
3027 /// ```
3028 /// # use pretty_assertions::assert_eq;
3029 /// # use quick_xml::de::Deserializer;
3030 /// # use quick_xml::NsReader;
3031 /// # use serde::Deserialize;
3032 /// #
3033 /// #[derive(Deserialize, PartialEq, Debug)]
3034 /// struct Object<'a> {
3035 /// tag: &'a str,
3036 /// }
3037 ///
3038 /// let mut reader = NsReader::from_str("<xml><tag> test </tag></xml>");
3039 ///
3040 /// let mut de = Deserializer::borrowing(reader.clone());
3041 /// let obj = Object::deserialize(&mut de).unwrap();
3042 /// assert_eq!(obj, Object { tag: " test " });
3043 ///
3044 /// reader.config_mut().trim_text(true);
3045 ///
3046 /// let mut de = Deserializer::borrowing(reader);
3047 /// let obj = Object::deserialize(&mut de).unwrap();
3048 /// assert_eq!(obj, Object { tag: "test" });
3049 /// ```
3050 ///
3051 /// [`Config::expand_empty_elements`]: crate::reader::Config::expand_empty_elements
3052 #[inline]
3053 pub fn borrowing(reader: NsReader<&'de [u8]>) -> Self {
3054 Self::borrowing_with_resolver(reader, PredefinedEntityResolver)
3055 }
3056}
3057
3058impl<'de, E> Deserializer<'de, SliceReader<'de>, E>
3059where
3060 E: EntityResolver,
3061{
3062 /// Create a new deserializer that will borrow data from the specified string
3063 /// and use the specified entity resolver.
3064 pub fn from_str_with_resolver(source: &'de str, entity_resolver: E) -> Self {
3065 Self::borrowing_with_resolver(NsReader::from_str(source), entity_resolver)
3066 }
3067
3068 /// Create a new deserializer that will borrow data from the specified preconfigured
3069 /// reader and use the specified entity resolver.
3070 ///
3071 /// Note, that config option [`Config::expand_empty_elements`] will be set to `true`.
3072 ///
3073 /// [`Config::expand_empty_elements`]: crate::reader::Config::expand_empty_elements
3074 pub fn borrowing_with_resolver(mut reader: NsReader<&'de [u8]>, entity_resolver: E) -> Self {
3075 let config = reader.config_mut();
3076 config.expand_empty_elements = true;
3077
3078 Self::new(
3079 SliceReader {
3080 reader,
3081 version: XmlVersion::Implicit1_0,
3082 },
3083 entity_resolver,
3084 )
3085 }
3086}
3087
3088impl<'de, R> Deserializer<'de, IoReader<R>>
3089where
3090 R: BufRead,
3091{
3092 /// Create a new deserializer that will copy data from the specified reader
3093 /// into internal buffer.
3094 ///
3095 /// If you already have a string use [`Self::from_str`] instead, because it
3096 /// will borrow instead of copy. If you have `&[u8]` which is known to represent
3097 /// UTF-8, you can decode it first before using [`from_str`].
3098 ///
3099 /// Deserializer created with this method will not resolve custom entities.
3100 pub fn from_reader(reader: R) -> Self {
3101 Self::with_resolver(reader, PredefinedEntityResolver)
3102 }
3103
3104 /// Create a new deserializer that will copy data from the specified preconfigured
3105 /// reader into internal buffer.
3106 ///
3107 /// Deserializer created with this method will not resolve custom entities.
3108 ///
3109 /// Note, that config option [`Config::expand_empty_elements`] will be set to `true`.
3110 ///
3111 /// # Example
3112 ///
3113 /// ```
3114 /// # use pretty_assertions::assert_eq;
3115 /// # use quick_xml::de::Deserializer;
3116 /// # use quick_xml::NsReader;
3117 /// # use serde::Deserialize;
3118 /// #
3119 /// #[derive(Deserialize, PartialEq, Debug)]
3120 /// struct Object {
3121 /// tag: String,
3122 /// }
3123 ///
3124 /// let mut reader = NsReader::from_str("<xml><tag> test </tag></xml>");
3125 ///
3126 /// let mut de = Deserializer::buffering(reader.clone());
3127 /// let obj = Object::deserialize(&mut de).unwrap();
3128 /// assert_eq!(obj, Object { tag: " test ".to_string() });
3129 ///
3130 /// reader.config_mut().trim_text(true);
3131 ///
3132 /// let mut de = Deserializer::buffering(reader);
3133 /// let obj = Object::deserialize(&mut de).unwrap();
3134 /// assert_eq!(obj, Object { tag: "test".to_string() });
3135 /// ```
3136 ///
3137 /// [`Config::expand_empty_elements`]: crate::reader::Config::expand_empty_elements
3138 #[inline]
3139 pub fn buffering(reader: NsReader<R>) -> Self {
3140 Self::buffering_with_resolver(reader, PredefinedEntityResolver)
3141 }
3142}
3143
3144impl<'de, R, E> Deserializer<'de, IoReader<R>, E>
3145where
3146 R: BufRead,
3147 E: EntityResolver,
3148{
3149 /// Create a new deserializer that will copy data from the specified reader
3150 /// into internal buffer and use the specified entity resolver.
3151 ///
3152 /// If you already have a string use [`Self::from_str`] instead, because it
3153 /// will borrow instead of copy. If you have `&[u8]` which is known to represent
3154 /// UTF-8, you can decode it first before using [`from_str`].
3155 pub fn with_resolver(reader: R, entity_resolver: E) -> Self {
3156 let mut reader = NsReader::from_reader(reader);
3157 let config = reader.config_mut();
3158 config.expand_empty_elements = true;
3159
3160 Self::new(
3161 IoReader {
3162 reader,
3163 buf: Vec::new(),
3164 version: XmlVersion::Implicit1_0,
3165 },
3166 entity_resolver,
3167 )
3168 }
3169
3170 /// Create new deserializer that will copy data from the specified preconfigured reader
3171 /// into internal buffer and use the specified entity resolver.
3172 ///
3173 /// Note, that config option [`Config::expand_empty_elements`] will be set to `true`.
3174 ///
3175 /// [`Config::expand_empty_elements`]: crate::reader::Config::expand_empty_elements
3176 pub fn buffering_with_resolver(mut reader: NsReader<R>, entity_resolver: E) -> Self {
3177 let config = reader.config_mut();
3178 config.expand_empty_elements = true;
3179
3180 Self::new(
3181 IoReader {
3182 reader,
3183 buf: Vec::new(),
3184 version: XmlVersion::Implicit1_0,
3185 },
3186 entity_resolver,
3187 )
3188 }
3189}
3190
3191impl<'de, R, E> de::Deserializer<'de> for &mut Deserializer<'de, R, E>
3192where
3193 R: XmlRead<'de>,
3194 E: EntityResolver,
3195{
3196 type Error = DeError;
3197
3198 deserialize_primitives!();
3199
3200 fn deserialize_struct<V>(
3201 self,
3202 _name: &'static str,
3203 fields: &'static [&'static str],
3204 visitor: V,
3205 ) -> Result<V::Value, DeError>
3206 where
3207 V: Visitor<'de>,
3208 {
3209 // When document is pretty-printed there could be whitespaces before the root element
3210 self.skip_whitespaces()?;
3211 match self.next()? {
3212 DeEvent::Start(e) => visitor.visit_map(ElementMapAccess::new(self, e, fields)),
3213 // SAFETY: The reader is guaranteed that we don't have unmatched tags
3214 // If we here, then our deserializer has a bug
3215 DeEvent::End(e) => unreachable!("{:?}", e),
3216 // Deserializer methods are only hints, if deserializer could not satisfy
3217 // request, it should return the data that it has. It is responsibility
3218 // of a Visitor to return an error if it does not understand the data
3219 DeEvent::Text(e) => match e.text {
3220 Cow::Borrowed(s) => visitor.visit_borrowed_str(s),
3221 Cow::Owned(s) => visitor.visit_string(s),
3222 },
3223 DeEvent::Eof => Err(DeError::UnexpectedEof),
3224 }
3225 }
3226
3227 /// Unit represented in XML as a `xs:element` or text/CDATA content.
3228 /// Any content inside `xs:element` is ignored and skipped.
3229 ///
3230 /// Produces unit struct from any of following inputs:
3231 /// - any `<tag ...>...</tag>`
3232 /// - any `<tag .../>`
3233 /// - any consequent text / CDATA content (can consist of several parts
3234 /// delimited by comments and processing instructions)
3235 ///
3236 /// # Events handling
3237 ///
3238 /// |Event |XML |Handling
3239 /// |------------------|---------------------------|-------------------------------------------
3240 /// |[`DeEvent::Start`]|`<tag>...</tag>` |Calls `visitor.visit_unit()`, consumes all events up to and including corresponding `End` event
3241 /// |[`DeEvent::End`] |`</tag>` |This is impossible situation, the method will panic if it happens
3242 /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Calls `visitor.visit_unit()`. The content is ignored
3243 /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
3244 fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, DeError>
3245 where
3246 V: Visitor<'de>,
3247 {
3248 match self.next()? {
3249 DeEvent::Start(s) => {
3250 self.read_to_end(s.name())?;
3251 visitor.visit_unit()
3252 }
3253 DeEvent::Text(_) => visitor.visit_unit(),
3254 // SAFETY: The reader is guaranteed that we don't have unmatched tags
3255 // If we here, then our deserializer has a bug
3256 DeEvent::End(e) => unreachable!("{:?}", e),
3257 DeEvent::Eof => Err(DeError::UnexpectedEof),
3258 }
3259 }
3260
3261 /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`]
3262 /// with the same deserializer.
3263 fn deserialize_newtype_struct<V>(
3264 self,
3265 _name: &'static str,
3266 visitor: V,
3267 ) -> Result<V::Value, DeError>
3268 where
3269 V: Visitor<'de>,
3270 {
3271 visitor.visit_newtype_struct(self)
3272 }
3273
3274 fn deserialize_enum<V>(
3275 self,
3276 _name: &'static str,
3277 _variants: &'static [&'static str],
3278 visitor: V,
3279 ) -> Result<V::Value, DeError>
3280 where
3281 V: Visitor<'de>,
3282 {
3283 // When document is pretty-printed there could be whitespaces before the root element
3284 // which represents the enum variant
3285 // Checked by `top_level::list_of_enum` test in serde-de-seq
3286 self.skip_whitespaces()?;
3287 visitor.visit_enum(var::EnumAccess::new(self))
3288 }
3289
3290 fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, DeError>
3291 where
3292 V: Visitor<'de>,
3293 {
3294 visitor.visit_seq(self)
3295 }
3296
3297 fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, DeError>
3298 where
3299 V: Visitor<'de>,
3300 {
3301 // We cannot use result of `peek()` directly because of borrow checker
3302 let _ = self.peek()?;
3303 match self.last_peeked() {
3304 DeEvent::Text(t) if t.is_empty() => visitor.visit_none(),
3305 DeEvent::Eof => visitor.visit_none(),
3306 // if the `xsi:nil` attribute is set to true we got a none value
3307 DeEvent::Start(start) if self.reader.reader.has_nil_attr(start) => {
3308 self.skip_next_tree()?;
3309 visitor.visit_none()
3310 }
3311 _ => visitor.visit_some(self),
3312 }
3313 }
3314
3315 fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, DeError>
3316 where
3317 V: Visitor<'de>,
3318 {
3319 match self.peek()? {
3320 DeEvent::Text(_) => self.deserialize_str(visitor),
3321 _ => self.deserialize_map(visitor),
3322 }
3323 }
3324}
3325
3326/// An accessor to sequence elements forming a value for top-level sequence of XML
3327/// elements.
3328///
3329/// Technically, multiple top-level elements violates XML rule of only one top-level
3330/// element, but we consider this as several concatenated XML documents.
3331impl<'de, R, E> SeqAccess<'de> for &mut Deserializer<'de, R, E>
3332where
3333 R: XmlRead<'de>,
3334 E: EntityResolver,
3335{
3336 type Error = DeError;
3337
3338 fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, Self::Error>
3339 where
3340 T: DeserializeSeed<'de>,
3341 {
3342 // When document is pretty-printed there could be whitespaces before, between
3343 // and after root elements. We cannot defer decision if we need to skip spaces
3344 // or not: if we have a sequence of type that does not accept blank text, it
3345 // will need to return something and it can return only error. For example,
3346 // it can be enum without `$text` variant
3347 // Checked by `top_level::list_of_enum` test in serde-de-seq
3348 self.skip_whitespaces()?;
3349 match self.peek()? {
3350 DeEvent::Eof => Ok(None),
3351
3352 // Start(tag), End(tag), Text
3353 _ => seed.deserialize(&mut **self).map(Some),
3354 }
3355 }
3356}
3357
3358impl<'de, R, E> IntoDeserializer<'de, DeError> for &mut Deserializer<'de, R, E>
3359where
3360 R: XmlRead<'de>,
3361 E: EntityResolver,
3362{
3363 type Deserializer = Self;
3364
3365 #[inline]
3366 fn into_deserializer(self) -> Self {
3367 self
3368 }
3369}
3370
3371////////////////////////////////////////////////////////////////////////////////////////////////////
3372
3373/// Converts raw reader's event into a payload event.
3374/// Returns `None`, if event should be skipped.
3375#[inline(always)]
3376fn skip_uninterested<'a>(event: Event<'a>) -> Option<PayloadEvent<'a>> {
3377 let event = match event {
3378 Event::DocType(e) => PayloadEvent::DocType(e),
3379 Event::Start(e) => PayloadEvent::Start(e),
3380 Event::End(e) => PayloadEvent::End(e),
3381 Event::Eof => PayloadEvent::Eof,
3382
3383 // Do not trim next text event after Text, CDATA or reference event
3384 Event::CData(e) => PayloadEvent::CData(e),
3385 Event::Text(e) => PayloadEvent::Text(e),
3386 Event::GeneralRef(e) => PayloadEvent::GeneralRef(e),
3387
3388 _ => return None,
3389 };
3390 Some(event)
3391}
3392
3393////////////////////////////////////////////////////////////////////////////////////////////////////
3394
3395/// Trait used by the deserializer for iterating over input. This is manually
3396/// "specialized" for iterating over `&[u8]`.
3397///
3398/// You do not need to implement this trait, it is needed to abstract from
3399/// [borrowing](SliceReader) and [copying](IoReader) data sources and reuse code in
3400/// deserializer
3401pub trait XmlRead<'i> {
3402 /// Return an input-borrowing event.
3403 fn next(&mut self) -> Result<PayloadEvent<'i>, DeError>;
3404
3405 /// Skips until end element is found. Unlike `next()` it will not allocate
3406 /// when it cannot satisfy the lifetime.
3407 fn read_to_end(&mut self, name: QName) -> Result<(), DeError>;
3408
3409 /// Return an XML version of the source.
3410 fn xml_version(&self) -> XmlVersion;
3411
3412 /// A copy of the reader's decoder used to decode strings.
3413 fn decoder(&self) -> Decoder;
3414
3415 /// Checks if the `start` tag has a [`xsi:nil`] attribute. This method ignores
3416 /// any errors in attributes.
3417 ///
3418 /// [`xsi:nil`]: https://www.w3.org/TR/xmlschema-1/#xsi_nil
3419 fn has_nil_attr(&self, start: &BytesStart) -> bool;
3420}
3421
3422/// XML input source that reads from a std::io input stream.
3423///
3424/// You cannot create it, it is created automatically when you call
3425/// [`Deserializer::from_reader`]
3426pub struct IoReader<R: BufRead> {
3427 reader: NsReader<R>,
3428 buf: Vec<u8>,
3429 version: XmlVersion,
3430}
3431
3432impl<R: BufRead> IoReader<R> {
3433 /// Returns the underlying XML reader.
3434 ///
3435 /// ```
3436 /// # use pretty_assertions::assert_eq;
3437 /// use serde::Deserialize;
3438 /// use std::io::Cursor;
3439 /// use quick_xml::de::Deserializer;
3440 /// use quick_xml::NsReader;
3441 ///
3442 /// #[derive(Deserialize)]
3443 /// struct SomeStruct {
3444 /// field1: String,
3445 /// field2: String,
3446 /// }
3447 ///
3448 /// // Try to deserialize from broken XML
3449 /// let mut de = Deserializer::from_reader(Cursor::new(
3450 /// "<SomeStruct><field1><field2></SomeStruct>"
3451 /// // 0 ^= 28 ^= 41
3452 /// ));
3453 ///
3454 /// let err = SomeStruct::deserialize(&mut de);
3455 /// assert!(err.is_err());
3456 ///
3457 /// let reader: &NsReader<Cursor<&str>> = de.get_ref().get_ref();
3458 ///
3459 /// assert_eq!(reader.error_position(), 28);
3460 /// assert_eq!(reader.buffer_position(), 41);
3461 /// ```
3462 pub const fn get_ref(&self) -> &NsReader<R> {
3463 &self.reader
3464 }
3465}
3466
3467impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
3468 fn next(&mut self) -> Result<PayloadEvent<'static>, DeError> {
3469 loop {
3470 self.buf.clear();
3471
3472 let event = self.reader.read_event_into(&mut self.buf)?;
3473 if let Event::Decl(e) = &event {
3474 self.version = e.xml_version()?;
3475 }
3476 if let Some(event) = skip_uninterested(event) {
3477 return Ok(event.into_owned());
3478 }
3479 }
3480 }
3481
3482 fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
3483 match self.reader.read_to_end_into(name, &mut self.buf) {
3484 Err(e) => Err(e.into()),
3485 Ok(_) => Ok(()),
3486 }
3487 }
3488
3489 #[inline]
3490 fn xml_version(&self) -> XmlVersion {
3491 self.version
3492 }
3493
3494 #[inline]
3495 fn decoder(&self) -> Decoder {
3496 self.reader.decoder()
3497 }
3498
3499 fn has_nil_attr(&self, start: &BytesStart) -> bool {
3500 start.attributes().has_nil(self.reader.resolver())
3501 }
3502}
3503
3504/// XML input source that reads from a slice of bytes and can borrow from it.
3505///
3506/// You cannot create it, it is created automatically when you call
3507/// [`Deserializer::from_str`].
3508pub struct SliceReader<'de> {
3509 reader: NsReader<&'de [u8]>,
3510 version: XmlVersion,
3511}
3512
3513impl<'de> SliceReader<'de> {
3514 /// Returns the underlying XML reader.
3515 ///
3516 /// ```
3517 /// # use pretty_assertions::assert_eq;
3518 /// use serde::Deserialize;
3519 /// use quick_xml::de::Deserializer;
3520 /// use quick_xml::NsReader;
3521 ///
3522 /// #[derive(Deserialize)]
3523 /// struct SomeStruct {
3524 /// field1: String,
3525 /// field2: String,
3526 /// }
3527 ///
3528 /// // Try to deserialize from broken XML
3529 /// let mut de = Deserializer::from_str(
3530 /// "<SomeStruct><field1><field2></SomeStruct>"
3531 /// // 0 ^= 28 ^= 41
3532 /// );
3533 ///
3534 /// let err = SomeStruct::deserialize(&mut de);
3535 /// assert!(err.is_err());
3536 ///
3537 /// let reader: &NsReader<&[u8]> = de.get_ref().get_ref();
3538 ///
3539 /// assert_eq!(reader.error_position(), 28);
3540 /// assert_eq!(reader.buffer_position(), 41);
3541 /// ```
3542 pub const fn get_ref(&self) -> &NsReader<&'de [u8]> {
3543 &self.reader
3544 }
3545}
3546
3547impl<'de> XmlRead<'de> for SliceReader<'de> {
3548 fn next(&mut self) -> Result<PayloadEvent<'de>, DeError> {
3549 loop {
3550 let event = self.reader.read_event()?;
3551 if let Event::Decl(e) = &event {
3552 self.version = e.xml_version()?;
3553 }
3554 if let Some(event) = skip_uninterested(event) {
3555 return Ok(event);
3556 }
3557 }
3558 }
3559
3560 fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
3561 match self.reader.read_to_end(name) {
3562 Err(e) => Err(e.into()),
3563 Ok(_) => Ok(()),
3564 }
3565 }
3566
3567 #[inline]
3568 fn xml_version(&self) -> XmlVersion {
3569 self.version
3570 }
3571
3572 #[inline]
3573 fn decoder(&self) -> Decoder {
3574 self.reader.decoder()
3575 }
3576
3577 fn has_nil_attr(&self, start: &BytesStart) -> bool {
3578 start.attributes().has_nil(self.reader.resolver())
3579 }
3580}
3581
3582#[cfg(test)]
3583mod tests {
3584 use super::*;
3585 use crate::errors::IllFormedError;
3586 use pretty_assertions::assert_eq;
3587
3588 fn make_de<'de>(source: &'de str) -> Deserializer<'de, SliceReader<'de>> {
3589 dbg!(source);
3590 Deserializer::from_str(source)
3591 }
3592
3593 #[cfg(feature = "overlapped-lists")]
3594 mod skip {
3595 use super::*;
3596 use crate::de::DeEvent::*;
3597 use crate::events::BytesEnd;
3598 use pretty_assertions::assert_eq;
3599
3600 /// Checks that `peek()` and `read()` behaves correctly after `skip()`
3601 #[test]
3602 fn read_and_peek() {
3603 let mut de = make_de(
3604 "\
3605 <root>\
3606 <inner>\
3607 text\
3608 <inner/>\
3609 </inner>\
3610 <next/>\
3611 <target/>\
3612 </root>\
3613 ",
3614 );
3615
3616 // Initial conditions - both are empty
3617 assert_eq!(de.read, vec![]);
3618 assert_eq!(de.write, vec![]);
3619
3620 assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3621 assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("inner")));
3622
3623 // Mark that start_replay() should begin replay from this point
3624 let checkpoint = de.skip_checkpoint();
3625 assert_eq!(checkpoint, 0);
3626
3627 // Should skip first <inner> tree
3628 de.skip().unwrap();
3629 assert_eq!(de.read, vec![]);
3630 assert_eq!(
3631 de.write,
3632 vec![
3633 Start(BytesStart::new("inner")),
3634 Text("text".into()),
3635 Start(BytesStart::new("inner")),
3636 End(BytesEnd::new("inner")),
3637 End(BytesEnd::new("inner")),
3638 ]
3639 );
3640
3641 // Consume <next/>. Now unconsumed XML looks like:
3642 //
3643 // <inner>
3644 // text
3645 // <inner/>
3646 // </inner>
3647 // <target/>
3648 // </root>
3649 assert_eq!(de.next().unwrap(), Start(BytesStart::new("next")));
3650 assert_eq!(de.next().unwrap(), End(BytesEnd::new("next")));
3651
3652 // We finish writing. Next call to `next()` should start replay that messages:
3653 //
3654 // <inner>
3655 // text
3656 // <inner/>
3657 // </inner>
3658 //
3659 // and after that stream that messages:
3660 //
3661 // <target/>
3662 // </root>
3663 de.start_replay(checkpoint);
3664 assert_eq!(
3665 de.read,
3666 vec![
3667 Start(BytesStart::new("inner")),
3668 Text("text".into()),
3669 Start(BytesStart::new("inner")),
3670 End(BytesEnd::new("inner")),
3671 End(BytesEnd::new("inner")),
3672 ]
3673 );
3674 assert_eq!(de.write, vec![]);
3675 assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner")));
3676
3677 // Mark that start_replay() should begin replay from this point
3678 let checkpoint = de.skip_checkpoint();
3679 assert_eq!(checkpoint, 0);
3680
3681 // Skip `$text` node and consume <inner/> after it
3682 de.skip().unwrap();
3683 assert_eq!(
3684 de.read,
3685 vec![
3686 Start(BytesStart::new("inner")),
3687 End(BytesEnd::new("inner")),
3688 End(BytesEnd::new("inner")),
3689 ]
3690 );
3691 assert_eq!(
3692 de.write,
3693 vec![
3694 // This comment here to keep the same formatting of both arrays
3695 // otherwise rustfmt suggest one-line it
3696 Text("text".into()),
3697 ]
3698 );
3699
3700 assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner")));
3701 assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner")));
3702
3703 // We finish writing. Next call to `next()` should start replay messages:
3704 //
3705 // text
3706 // </inner>
3707 //
3708 // and after that stream that messages:
3709 //
3710 // <target/>
3711 // </root>
3712 de.start_replay(checkpoint);
3713 assert_eq!(
3714 de.read,
3715 vec![
3716 // This comment here to keep the same formatting as others
3717 // otherwise rustfmt suggest one-line it
3718 Text("text".into()),
3719 End(BytesEnd::new("inner")),
3720 ]
3721 );
3722 assert_eq!(de.write, vec![]);
3723 assert_eq!(de.next().unwrap(), Text("text".into()));
3724 assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner")));
3725 assert_eq!(de.next().unwrap(), Start(BytesStart::new("target")));
3726 assert_eq!(de.next().unwrap(), End(BytesEnd::new("target")));
3727 assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
3728 assert_eq!(de.next().unwrap(), Eof);
3729 }
3730
3731 /// Checks that `read_to_end()` behaves correctly after `skip()`
3732 #[test]
3733 fn read_to_end() {
3734 let mut de = make_de(
3735 "\
3736 <root>\
3737 <skip>\
3738 text\
3739 <skip/>\
3740 </skip>\
3741 <target>\
3742 <target/>\
3743 </target>\
3744 </root>\
3745 ",
3746 );
3747
3748 // Initial conditions - both are empty
3749 assert_eq!(de.read, vec![]);
3750 assert_eq!(de.write, vec![]);
3751
3752 assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3753
3754 // Mark that start_replay() should begin replay from this point
3755 let checkpoint = de.skip_checkpoint();
3756 assert_eq!(checkpoint, 0);
3757
3758 // Skip the <skip> tree
3759 de.skip().unwrap();
3760 assert_eq!(de.read, vec![]);
3761 assert_eq!(
3762 de.write,
3763 vec![
3764 Start(BytesStart::new("skip")),
3765 Text("text".into()),
3766 Start(BytesStart::new("skip")),
3767 End(BytesEnd::new("skip")),
3768 End(BytesEnd::new("skip")),
3769 ]
3770 );
3771
3772 // Drop all events that represents <target> tree. Now unconsumed XML looks like:
3773 //
3774 // <skip>
3775 // text
3776 // <skip/>
3777 // </skip>
3778 // </root>
3779 assert_eq!(de.next().unwrap(), Start(BytesStart::new("target")));
3780 de.read_to_end(QName(b"target")).unwrap();
3781 assert_eq!(de.read, vec![]);
3782 assert_eq!(
3783 de.write,
3784 vec![
3785 Start(BytesStart::new("skip")),
3786 Text("text".into()),
3787 Start(BytesStart::new("skip")),
3788 End(BytesEnd::new("skip")),
3789 End(BytesEnd::new("skip")),
3790 ]
3791 );
3792
3793 // We finish writing. Next call to `next()` should start replay that messages:
3794 //
3795 // <skip>
3796 // text
3797 // <skip/>
3798 // </skip>
3799 //
3800 // and after that stream that messages:
3801 //
3802 // </root>
3803 de.start_replay(checkpoint);
3804 assert_eq!(
3805 de.read,
3806 vec![
3807 Start(BytesStart::new("skip")),
3808 Text("text".into()),
3809 Start(BytesStart::new("skip")),
3810 End(BytesEnd::new("skip")),
3811 End(BytesEnd::new("skip")),
3812 ]
3813 );
3814 assert_eq!(de.write, vec![]);
3815
3816 assert_eq!(de.next().unwrap(), Start(BytesStart::new("skip")));
3817 de.read_to_end(QName(b"skip")).unwrap();
3818
3819 assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
3820 assert_eq!(de.next().unwrap(), Eof);
3821 }
3822
3823 /// Checks that replay replayes only part of events
3824 /// Test for https://github.com/tafia/quick-xml/issues/435
3825 #[test]
3826 fn partial_replay() {
3827 let mut de = make_de(
3828 "\
3829 <root>\
3830 <skipped-1/>\
3831 <skipped-2/>\
3832 <inner>\
3833 <skipped-3/>\
3834 <skipped-4/>\
3835 <target-2/>\
3836 </inner>\
3837 <target-1/>\
3838 </root>\
3839 ",
3840 );
3841
3842 // Initial conditions - both are empty
3843 assert_eq!(de.read, vec![]);
3844 assert_eq!(de.write, vec![]);
3845
3846 assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3847
3848 // start_replay() should start replay from this point
3849 let checkpoint1 = de.skip_checkpoint();
3850 assert_eq!(checkpoint1, 0);
3851
3852 // Should skip first and second <skipped-N/> elements
3853 de.skip().unwrap(); // skipped-1
3854 de.skip().unwrap(); // skipped-2
3855 assert_eq!(de.read, vec![]);
3856 assert_eq!(
3857 de.write,
3858 vec![
3859 Start(BytesStart::new("skipped-1")),
3860 End(BytesEnd::new("skipped-1")),
3861 Start(BytesStart::new("skipped-2")),
3862 End(BytesEnd::new("skipped-2")),
3863 ]
3864 );
3865
3866 ////////////////////////////////////////////////////////////////////////////////////////
3867
3868 assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner")));
3869 assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("skipped-3")));
3870 assert_eq!(
3871 de.read,
3872 vec![
3873 // This comment here to keep the same formatting of both arrays
3874 // otherwise rustfmt suggest one-line it
3875 Start(BytesStart::new("skipped-3")),
3876 ]
3877 );
3878 assert_eq!(
3879 de.write,
3880 vec![
3881 Start(BytesStart::new("skipped-1")),
3882 End(BytesEnd::new("skipped-1")),
3883 Start(BytesStart::new("skipped-2")),
3884 End(BytesEnd::new("skipped-2")),
3885 ]
3886 );
3887
3888 // start_replay() should start replay from this point
3889 let checkpoint2 = de.skip_checkpoint();
3890 assert_eq!(checkpoint2, 4);
3891
3892 // Should skip third and forth <skipped-N/> elements
3893 de.skip().unwrap(); // skipped-3
3894 de.skip().unwrap(); // skipped-4
3895 assert_eq!(de.read, vec![]);
3896 assert_eq!(
3897 de.write,
3898 vec![
3899 // checkpoint 1
3900 Start(BytesStart::new("skipped-1")),
3901 End(BytesEnd::new("skipped-1")),
3902 Start(BytesStart::new("skipped-2")),
3903 End(BytesEnd::new("skipped-2")),
3904 // checkpoint 2
3905 Start(BytesStart::new("skipped-3")),
3906 End(BytesEnd::new("skipped-3")),
3907 Start(BytesStart::new("skipped-4")),
3908 End(BytesEnd::new("skipped-4")),
3909 ]
3910 );
3911 assert_eq!(de.next().unwrap(), Start(BytesStart::new("target-2")));
3912 assert_eq!(de.next().unwrap(), End(BytesEnd::new("target-2")));
3913 assert_eq!(de.peek().unwrap(), &End(BytesEnd::new("inner")));
3914 assert_eq!(
3915 de.read,
3916 vec![
3917 // This comment here to keep the same formatting of both arrays
3918 // otherwise rustfmt suggest one-line it
3919 End(BytesEnd::new("inner")),
3920 ]
3921 );
3922 assert_eq!(
3923 de.write,
3924 vec![
3925 // checkpoint 1
3926 Start(BytesStart::new("skipped-1")),
3927 End(BytesEnd::new("skipped-1")),
3928 Start(BytesStart::new("skipped-2")),
3929 End(BytesEnd::new("skipped-2")),
3930 // checkpoint 2
3931 Start(BytesStart::new("skipped-3")),
3932 End(BytesEnd::new("skipped-3")),
3933 Start(BytesStart::new("skipped-4")),
3934 End(BytesEnd::new("skipped-4")),
3935 ]
3936 );
3937
3938 // Start replay events from checkpoint 2
3939 de.start_replay(checkpoint2);
3940 assert_eq!(
3941 de.read,
3942 vec![
3943 Start(BytesStart::new("skipped-3")),
3944 End(BytesEnd::new("skipped-3")),
3945 Start(BytesStart::new("skipped-4")),
3946 End(BytesEnd::new("skipped-4")),
3947 End(BytesEnd::new("inner")),
3948 ]
3949 );
3950 assert_eq!(
3951 de.write,
3952 vec![
3953 Start(BytesStart::new("skipped-1")),
3954 End(BytesEnd::new("skipped-1")),
3955 Start(BytesStart::new("skipped-2")),
3956 End(BytesEnd::new("skipped-2")),
3957 ]
3958 );
3959
3960 // Replayed events
3961 assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-3")));
3962 assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-3")));
3963 assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-4")));
3964 assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-4")));
3965
3966 assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner")));
3967 assert_eq!(de.read, vec![]);
3968 assert_eq!(
3969 de.write,
3970 vec![
3971 Start(BytesStart::new("skipped-1")),
3972 End(BytesEnd::new("skipped-1")),
3973 Start(BytesStart::new("skipped-2")),
3974 End(BytesEnd::new("skipped-2")),
3975 ]
3976 );
3977
3978 ////////////////////////////////////////////////////////////////////////////////////////
3979
3980 // New events
3981 assert_eq!(de.next().unwrap(), Start(BytesStart::new("target-1")));
3982 assert_eq!(de.next().unwrap(), End(BytesEnd::new("target-1")));
3983
3984 assert_eq!(de.read, vec![]);
3985 assert_eq!(
3986 de.write,
3987 vec![
3988 Start(BytesStart::new("skipped-1")),
3989 End(BytesEnd::new("skipped-1")),
3990 Start(BytesStart::new("skipped-2")),
3991 End(BytesEnd::new("skipped-2")),
3992 ]
3993 );
3994
3995 // Start replay events from checkpoint 1
3996 de.start_replay(checkpoint1);
3997 assert_eq!(
3998 de.read,
3999 vec![
4000 Start(BytesStart::new("skipped-1")),
4001 End(BytesEnd::new("skipped-1")),
4002 Start(BytesStart::new("skipped-2")),
4003 End(BytesEnd::new("skipped-2")),
4004 ]
4005 );
4006 assert_eq!(de.write, vec![]);
4007
4008 // Replayed events
4009 assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-1")));
4010 assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-1")));
4011 assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-2")));
4012 assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-2")));
4013
4014 assert_eq!(de.read, vec![]);
4015 assert_eq!(de.write, vec![]);
4016
4017 // New events
4018 assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
4019 assert_eq!(de.next().unwrap(), Eof);
4020 }
4021
4022 /// Checks that limiting buffer size works correctly
4023 #[test]
4024 fn limit() {
4025 use serde::Deserialize;
4026
4027 #[derive(Debug, Deserialize)]
4028 #[allow(unused)]
4029 struct List {
4030 item: Vec<()>,
4031 }
4032
4033 let mut de = make_de(
4034 "\
4035 <any-name>\
4036 <item/>\
4037 <another-item>\
4038 <some-element>with text</some-element>\
4039 <yet-another-element/>\
4040 </another-item>\
4041 <item/>\
4042 <item/>\
4043 </any-name>\
4044 ",
4045 );
4046 de.event_buffer_size(NonZeroUsize::new(3));
4047
4048 match List::deserialize(&mut de) {
4049 Err(DeError::TooManyEvents(count)) => assert_eq!(count.get(), 3),
4050 e => panic!("Expected `Err(TooManyEvents(3))`, but got `{:?}`", e),
4051 }
4052 }
4053
4054 /// Without handling Eof in `skip` this test failed with memory allocation
4055 #[test]
4056 fn invalid_xml() {
4057 use crate::de::DeEvent::*;
4058
4059 let mut de = make_de("<root>");
4060
4061 // Cache all events
4062 let checkpoint = de.skip_checkpoint();
4063 de.skip().unwrap();
4064 de.start_replay(checkpoint);
4065 assert_eq!(de.read, vec![Start(BytesStart::new("root")), Eof]);
4066 }
4067 }
4068
4069 mod read_to_end {
4070 use super::*;
4071 use crate::de::DeEvent::*;
4072 use pretty_assertions::assert_eq;
4073
4074 #[test]
4075 fn complex() {
4076 let mut de = make_de(
4077 r#"
4078 <root>
4079 <tag a="1"><tag>text</tag>content</tag>
4080 <tag a="2"><![CDATA[cdata content]]></tag>
4081 <self-closed/>
4082 </root>
4083 "#,
4084 );
4085
4086 assert_eq!(de.next().unwrap(), Text("\n ".into()));
4087 assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
4088
4089 assert_eq!(de.next().unwrap(), Text("\n ".into()));
4090 assert_eq!(
4091 de.next().unwrap(),
4092 Start(BytesStart::from_content(r#"tag a="1""#, 3))
4093 );
4094 assert_eq!(de.read_to_end(QName(b"tag")).unwrap(), ());
4095
4096 assert_eq!(de.next().unwrap(), Text("\n ".into()));
4097 assert_eq!(
4098 de.next().unwrap(),
4099 Start(BytesStart::from_content(r#"tag a="2""#, 3))
4100 );
4101 assert_eq!(de.next().unwrap(), Text("cdata content".into()));
4102 assert_eq!(de.next().unwrap(), End(BytesEnd::new("tag")));
4103
4104 assert_eq!(de.next().unwrap(), Text("\n ".into()));
4105 assert_eq!(de.next().unwrap(), Start(BytesStart::new("self-closed")));
4106 assert_eq!(de.read_to_end(QName(b"self-closed")).unwrap(), ());
4107
4108 assert_eq!(de.next().unwrap(), Text("\n ".into()));
4109 assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
4110 assert_eq!(de.next().unwrap(), Text("\n ".into()));
4111 assert_eq!(de.next().unwrap(), Eof);
4112 }
4113
4114 #[test]
4115 fn invalid_xml1() {
4116 let mut de = make_de("<tag><tag></tag>");
4117
4118 assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag")));
4119 assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("tag")));
4120
4121 match de.read_to_end(QName(b"tag")) {
4122 Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4123 assert_eq!(cause, IllFormedError::MissingEndTag("tag".into()))
4124 }
4125 x => panic!(
4126 "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4127 x
4128 ),
4129 }
4130 assert_eq!(de.next().unwrap(), Eof);
4131 }
4132
4133 #[test]
4134 fn invalid_xml2() {
4135 let mut de = make_de("<tag><![CDATA[]]><tag></tag>");
4136
4137 assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag")));
4138 assert_eq!(de.peek().unwrap(), &Text("".into()));
4139
4140 match de.read_to_end(QName(b"tag")) {
4141 Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4142 assert_eq!(cause, IllFormedError::MissingEndTag("tag".into()))
4143 }
4144 x => panic!(
4145 "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4146 x
4147 ),
4148 }
4149 assert_eq!(de.next().unwrap(), Eof);
4150 }
4151 }
4152
4153 #[test]
4154 fn borrowing_reader_parity() {
4155 let s = r#"
4156 <item name="hello" source="world.rs">Some text</item>
4157 <item2/>
4158 <item3 value="world" />
4159 "#;
4160
4161 let mut reader1 = IoReader {
4162 reader: NsReader::from_reader(s.as_bytes()),
4163 buf: Vec::new(),
4164 version: XmlVersion::Implicit1_0,
4165 };
4166 let mut reader2 = SliceReader {
4167 reader: NsReader::from_str(s),
4168 version: XmlVersion::Implicit1_0,
4169 };
4170
4171 loop {
4172 let event1 = reader1.next().unwrap();
4173 let event2 = reader2.next().unwrap();
4174
4175 if let (PayloadEvent::Eof, PayloadEvent::Eof) = (&event1, &event2) {
4176 break;
4177 }
4178
4179 assert_eq!(event1, event2);
4180 }
4181 }
4182
4183 #[test]
4184 fn borrowing_reader_events() {
4185 let s = r#"
4186 <item name="hello" source="world.rs">Some text</item>
4187 <item2></item2>
4188 <item3/>
4189 <item4 value="world" />
4190 "#;
4191
4192 let mut reader = SliceReader {
4193 reader: NsReader::from_str(s),
4194 version: XmlVersion::Implicit1_0,
4195 };
4196
4197 let config = reader.reader.config_mut();
4198 config.expand_empty_elements = true;
4199
4200 let mut events = Vec::new();
4201
4202 loop {
4203 let event = reader.next().unwrap();
4204 if let PayloadEvent::Eof = event {
4205 break;
4206 }
4207 events.push(event);
4208 }
4209
4210 use crate::de::PayloadEvent::*;
4211
4212 assert_eq!(
4213 events,
4214 vec![
4215 Text(BytesText::from_escaped("\n ")),
4216 Start(BytesStart::from_content(
4217 r#"item name="hello" source="world.rs""#,
4218 4
4219 )),
4220 Text(BytesText::from_escaped("Some text")),
4221 End(BytesEnd::new("item")),
4222 Text(BytesText::from_escaped("\n ")),
4223 Start(BytesStart::from_content("item2", 5)),
4224 End(BytesEnd::new("item2")),
4225 Text(BytesText::from_escaped("\n ")),
4226 Start(BytesStart::from_content("item3", 5)),
4227 End(BytesEnd::new("item3")),
4228 Text(BytesText::from_escaped("\n ")),
4229 Start(BytesStart::from_content(r#"item4 value="world" "#, 5)),
4230 End(BytesEnd::new("item4")),
4231 Text(BytesText::from_escaped("\n ")),
4232 ]
4233 )
4234 }
4235
4236 /// Ensures, that [`Deserializer::read_string()`] never can get an `End` event,
4237 /// because parser reports error early
4238 #[test]
4239 fn read_string() {
4240 match from_str::<String>(r#"</root>"#) {
4241 Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4242 assert_eq!(cause, IllFormedError::UnmatchedEndTag("root".into()));
4243 }
4244 x => panic!(
4245 "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4246 x
4247 ),
4248 }
4249
4250 let s: String = from_str(r#"<root></root>"#).unwrap();
4251 assert_eq!(s, "");
4252
4253 match from_str::<String>(r#"<root></other>"#) {
4254 Err(DeError::InvalidXml(Error::IllFormed(cause))) => assert_eq!(
4255 cause,
4256 IllFormedError::MismatchedEndTag {
4257 expected: "root".into(),
4258 found: "other".into(),
4259 }
4260 ),
4261 x => panic!("Expected `Err(InvalidXml(IllFormed(_))`, but got `{:?}`", x),
4262 }
4263 }
4264
4265 /// Tests for https://github.com/tafia/quick-xml/issues/474.
4266 ///
4267 /// That tests ensures that comments and processed instructions is ignored
4268 /// and can split one logical string in pieces.
4269 mod merge_text {
4270 use super::*;
4271 use pretty_assertions::assert_eq;
4272
4273 #[test]
4274 fn text() {
4275 let mut de = make_de("text");
4276 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4277 }
4278
4279 #[test]
4280 fn cdata() {
4281 let mut de = make_de("<![CDATA[cdata]]>");
4282 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata".into()));
4283 }
4284
4285 #[test]
4286 fn text_and_cdata() {
4287 let mut de = make_de("text and <![CDATA[cdata]]>");
4288 assert_eq!(de.next().unwrap(), DeEvent::Text("text and cdata".into()));
4289 }
4290
4291 #[test]
4292 fn text_and_empty_cdata() {
4293 let mut de = make_de("text and <![CDATA[]]>");
4294 assert_eq!(de.next().unwrap(), DeEvent::Text("text and ".into()));
4295 }
4296
4297 #[test]
4298 fn cdata_and_text() {
4299 let mut de = make_de("<![CDATA[cdata]]> and text");
4300 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata and text".into()));
4301 }
4302
4303 #[test]
4304 fn empty_cdata_and_text() {
4305 let mut de = make_de("<![CDATA[]]> and text");
4306 assert_eq!(de.next().unwrap(), DeEvent::Text(" and text".into()));
4307 }
4308
4309 #[test]
4310 fn cdata_and_cdata() {
4311 let mut de = make_de(
4312 "\
4313 <![CDATA[cdata]]]]>\
4314 <![CDATA[>cdata]]>\
4315 ",
4316 );
4317 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
4318 }
4319
4320 mod comment_between {
4321 use super::*;
4322 use pretty_assertions::assert_eq;
4323
4324 #[test]
4325 fn text() {
4326 let mut de = make_de(
4327 "\
4328 text \
4329 <!--comment 1--><!--comment 2--> \
4330 text\
4331 ",
4332 );
4333 assert_eq!(de.next().unwrap(), DeEvent::Text("text text".into()));
4334 }
4335
4336 #[test]
4337 fn cdata() {
4338 let mut de = make_de(
4339 "\
4340 <![CDATA[cdata]]]]>\
4341 <!--comment 1--><!--comment 2-->\
4342 <![CDATA[>cdata]]>\
4343 ",
4344 );
4345 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
4346 }
4347
4348 #[test]
4349 fn text_and_cdata() {
4350 let mut de = make_de(
4351 "\
4352 text \
4353 <!--comment 1--><!--comment 2-->\
4354 <![CDATA[ cdata]]>\
4355 ",
4356 );
4357 assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata".into()));
4358 }
4359
4360 #[test]
4361 fn text_and_empty_cdata() {
4362 let mut de = make_de(
4363 "\
4364 text \
4365 <!--comment 1--><!--comment 2-->\
4366 <![CDATA[]]>\
4367 ",
4368 );
4369 assert_eq!(de.next().unwrap(), DeEvent::Text("text ".into()));
4370 }
4371
4372 #[test]
4373 fn cdata_and_text() {
4374 let mut de = make_de(
4375 "\
4376 <![CDATA[cdata ]]>\
4377 <!--comment 1--><!--comment 2--> \
4378 text \
4379 ",
4380 );
4381 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata text ".into()));
4382 }
4383
4384 #[test]
4385 fn empty_cdata_and_text() {
4386 let mut de = make_de(
4387 "\
4388 <![CDATA[]]>\
4389 <!--comment 1--><!--comment 2--> \
4390 text \
4391 ",
4392 );
4393 assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into()));
4394 }
4395
4396 #[test]
4397 fn cdata_and_cdata() {
4398 let mut de = make_de(
4399 "\
4400 <![CDATA[cdata]]]>\
4401 <!--comment 1--><!--comment 2-->\
4402 <![CDATA[]>cdata]]>\
4403 ",
4404 );
4405 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
4406 }
4407 }
4408
4409 mod pi_between {
4410 use super::*;
4411 use pretty_assertions::assert_eq;
4412
4413 #[test]
4414 fn text() {
4415 let mut de = make_de(
4416 "\
4417 text \
4418 <?pi 1?><?pi 2?> \
4419 text\
4420 ",
4421 );
4422 assert_eq!(de.next().unwrap(), DeEvent::Text("text text".into()));
4423 }
4424
4425 #[test]
4426 fn cdata() {
4427 let mut de = make_de(
4428 "\
4429 <![CDATA[cdata]]]]>\
4430 <?pi 1?><?pi 2?>\
4431 <![CDATA[>cdata]]>\
4432 ",
4433 );
4434 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
4435 }
4436
4437 #[test]
4438 fn text_and_cdata() {
4439 let mut de = make_de(
4440 "\
4441 text \
4442 <?pi 1?><?pi 2?>\
4443 <![CDATA[ cdata]]>\
4444 ",
4445 );
4446 assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata".into()));
4447 }
4448
4449 #[test]
4450 fn text_and_empty_cdata() {
4451 let mut de = make_de(
4452 "\
4453 text \
4454 <?pi 1?><?pi 2?>\
4455 <![CDATA[]]>\
4456 ",
4457 );
4458 assert_eq!(de.next().unwrap(), DeEvent::Text("text ".into()));
4459 }
4460
4461 #[test]
4462 fn cdata_and_text() {
4463 let mut de = make_de(
4464 "\
4465 <![CDATA[cdata ]]>\
4466 <?pi 1?><?pi 2?> \
4467 text \
4468 ",
4469 );
4470 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata text ".into()));
4471 }
4472
4473 #[test]
4474 fn empty_cdata_and_text() {
4475 let mut de = make_de(
4476 "\
4477 <![CDATA[]]>\
4478 <?pi 1?><?pi 2?> \
4479 text \
4480 ",
4481 );
4482 assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into()));
4483 }
4484
4485 #[test]
4486 fn cdata_and_cdata() {
4487 let mut de = make_de(
4488 "\
4489 <![CDATA[cdata]]]>\
4490 <?pi 1?><?pi 2?>\
4491 <![CDATA[]>cdata]]>\
4492 ",
4493 );
4494 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
4495 }
4496 }
4497 }
4498
4499 /// Tests for https://github.com/tafia/quick-xml/issues/474.
4500 ///
4501 /// This tests ensures that any combination of payload data is processed
4502 /// as expected.
4503 mod triples {
4504 use super::*;
4505 use pretty_assertions::assert_eq;
4506
4507 mod start {
4508 use super::*;
4509
4510 /// <tag1><tag2>...
4511 // The same name is intentional
4512 #[allow(clippy::module_inception)]
4513 mod start {
4514 use super::*;
4515 use pretty_assertions::assert_eq;
4516
4517 #[test]
4518 fn start() {
4519 let mut de = make_de("<tag1><tag2><tag3>");
4520 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4521 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4522 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag3")));
4523 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4524 }
4525
4526 /// Not matching end tag will result to error
4527 #[test]
4528 fn end() {
4529 let mut de = make_de("<tag1><tag2></tag2>");
4530 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4531 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4532 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag2")));
4533 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4534 }
4535
4536 #[test]
4537 fn text() {
4538 let mut de = make_de("<tag1><tag2> text ");
4539 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4540 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4541 assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into()));
4542 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4543 }
4544
4545 #[test]
4546 fn cdata() {
4547 let mut de = make_de("<tag1><tag2><![CDATA[ cdata ]]>");
4548 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4549 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4550 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4551 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4552 }
4553
4554 #[test]
4555 fn eof() {
4556 let mut de = make_de("<tag1><tag2>");
4557 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4558 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4559 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4560 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4561 }
4562 }
4563
4564 /// <tag></tag>...
4565 mod end {
4566 use super::*;
4567 use pretty_assertions::assert_eq;
4568
4569 #[test]
4570 fn start() {
4571 let mut de = make_de("<tag></tag><tag2>");
4572 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4573 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4574 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4575 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4576 }
4577
4578 #[test]
4579 fn end() {
4580 let mut de = make_de("<tag></tag></tag2>");
4581 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4582 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4583 match de.next() {
4584 Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4585 assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag2".into()));
4586 }
4587 x => panic!(
4588 "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4589 x
4590 ),
4591 }
4592 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4593 }
4594
4595 #[test]
4596 fn text() {
4597 let mut de = make_de("<tag></tag> text ");
4598 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4599 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4600 assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into()));
4601 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4602 }
4603
4604 #[test]
4605 fn cdata() {
4606 let mut de = make_de("<tag></tag><![CDATA[ cdata ]]>");
4607 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4608 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4609 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4610 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4611 }
4612
4613 #[test]
4614 fn eof() {
4615 let mut de = make_de("<tag></tag>");
4616 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4617 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4618 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4619 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4620 }
4621 }
4622
4623 /// <tag> text ...
4624 mod text {
4625 use super::*;
4626 use pretty_assertions::assert_eq;
4627
4628 #[test]
4629 fn start() {
4630 let mut de = make_de("<tag> text <tag2>");
4631 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4632 assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into()));
4633 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4634 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4635 }
4636
4637 #[test]
4638 fn end() {
4639 let mut de = make_de("<tag> text </tag>");
4640 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4641 assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into()));
4642 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4643 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4644 }
4645
4646 // start::text::text has no difference from start::text
4647
4648 #[test]
4649 fn cdata() {
4650 let mut de = make_de("<tag> text <![CDATA[ cdata ]]>");
4651 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4652 assert_eq!(de.next().unwrap(), DeEvent::Text(" text cdata ".into()));
4653 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4654 }
4655
4656 #[test]
4657 fn eof() {
4658 let mut de = make_de("<tag> text ");
4659 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4660 assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into()));
4661 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4662 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4663 }
4664 }
4665
4666 /// <tag><![CDATA[ cdata ]]>...
4667 mod cdata {
4668 use super::*;
4669 use pretty_assertions::assert_eq;
4670
4671 #[test]
4672 fn start() {
4673 let mut de = make_de("<tag><![CDATA[ cdata ]]><tag2>");
4674 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4675 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4676 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4677 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4678 }
4679
4680 #[test]
4681 fn end() {
4682 let mut de = make_de("<tag><![CDATA[ cdata ]]></tag>");
4683 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4684 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4685 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4686 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4687 }
4688
4689 #[test]
4690 fn text() {
4691 let mut de = make_de("<tag><![CDATA[ cdata ]]> text ");
4692 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4693 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text ".into()));
4694 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4695 }
4696
4697 #[test]
4698 fn cdata() {
4699 let mut de = make_de("<tag><![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>");
4700 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4701 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into()));
4702 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4703 }
4704
4705 #[test]
4706 fn eof() {
4707 let mut de = make_de("<tag><![CDATA[ cdata ]]>");
4708 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4709 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4710 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4711 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4712 }
4713 }
4714 }
4715
4716 /// Start from End event will always generate an error
4717 #[test]
4718 fn end() {
4719 let mut de = make_de("</tag>");
4720 match de.next() {
4721 Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4722 assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4723 }
4724 x => panic!(
4725 "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4726 x
4727 ),
4728 }
4729 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4730 }
4731
4732 mod text {
4733 use super::*;
4734 use pretty_assertions::assert_eq;
4735
4736 mod start {
4737 use super::*;
4738 use pretty_assertions::assert_eq;
4739
4740 #[test]
4741 fn start() {
4742 let mut de = make_de(" text <tag1><tag2>");
4743 assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into()));
4744 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4745 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4746 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4747 }
4748
4749 /// Not matching end tag will result in error
4750 #[test]
4751 fn end() {
4752 let mut de = make_de(" text <tag></tag>");
4753 assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into()));
4754 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4755 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4756 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4757 }
4758
4759 #[test]
4760 fn text() {
4761 let mut de = make_de(" text <tag> text2 ");
4762 assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into()));
4763 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4764 assert_eq!(de.next().unwrap(), DeEvent::Text(" text2 ".into()));
4765 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4766 }
4767
4768 #[test]
4769 fn cdata() {
4770 let mut de = make_de(" text <tag><![CDATA[ cdata ]]>");
4771 assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into()));
4772 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4773 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4774 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4775 }
4776
4777 #[test]
4778 fn eof() {
4779 let mut de = make_de(" text <tag>");
4780 assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into()));
4781 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4782 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4783 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4784 }
4785 }
4786
4787 /// End event without corresponding start event will always generate an error
4788 #[test]
4789 fn end() {
4790 let mut de = make_de(" text </tag>");
4791 assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into()));
4792 match de.next() {
4793 Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4794 assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4795 }
4796 x => panic!(
4797 "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4798 x
4799 ),
4800 }
4801 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4802 }
4803
4804 // text::text::something is equivalent to text::something
4805
4806 mod cdata {
4807 use super::*;
4808 use pretty_assertions::assert_eq;
4809
4810 #[test]
4811 fn start() {
4812 let mut de = make_de(" text <![CDATA[ cdata ]]><tag>");
4813 assert_eq!(de.next().unwrap(), DeEvent::Text(" text cdata ".into()));
4814 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4815 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4816 }
4817
4818 #[test]
4819 fn end() {
4820 let mut de = make_de(" text <![CDATA[ cdata ]]></tag>");
4821 assert_eq!(de.next().unwrap(), DeEvent::Text(" text cdata ".into()));
4822 match de.next() {
4823 Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4824 assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4825 }
4826 x => panic!(
4827 "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4828 x
4829 ),
4830 }
4831 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4832 }
4833
4834 #[test]
4835 fn text() {
4836 let mut de = make_de(" text <![CDATA[ cdata ]]> text2 ");
4837 assert_eq!(
4838 de.next().unwrap(),
4839 DeEvent::Text(" text cdata text2 ".into())
4840 );
4841 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4842 }
4843
4844 #[test]
4845 fn cdata() {
4846 let mut de = make_de(" text <![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>");
4847 assert_eq!(
4848 de.next().unwrap(),
4849 DeEvent::Text(" text cdata cdata2 ".into())
4850 );
4851 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4852 }
4853
4854 #[test]
4855 fn eof() {
4856 let mut de = make_de(" text <![CDATA[ cdata ]]>");
4857 assert_eq!(de.next().unwrap(), DeEvent::Text(" text cdata ".into()));
4858 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4859 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4860 }
4861 }
4862 }
4863
4864 mod cdata {
4865 use super::*;
4866 use pretty_assertions::assert_eq;
4867
4868 mod start {
4869 use super::*;
4870 use pretty_assertions::assert_eq;
4871
4872 #[test]
4873 fn start() {
4874 let mut de = make_de("<![CDATA[ cdata ]]><tag1><tag2>");
4875 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4876 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4877 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4878 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4879 }
4880
4881 /// Not matching end tag will result in error
4882 #[test]
4883 fn end() {
4884 let mut de = make_de("<![CDATA[ cdata ]]><tag></tag>");
4885 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4886 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4887 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4888 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4889 }
4890
4891 #[test]
4892 fn text() {
4893 let mut de = make_de("<![CDATA[ cdata ]]><tag> text ");
4894 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4895 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4896 assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into()));
4897 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4898 }
4899
4900 #[test]
4901 fn cdata() {
4902 let mut de = make_de("<![CDATA[ cdata ]]><tag><![CDATA[ cdata2 ]]>");
4903 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4904 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4905 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata2 ".into()));
4906 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4907 }
4908
4909 #[test]
4910 fn eof() {
4911 let mut de = make_de("<![CDATA[ cdata ]]><tag>");
4912 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4913 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4914 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4915 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4916 }
4917 }
4918
4919 /// End event without corresponding start event will always generate an error
4920 #[test]
4921 fn end() {
4922 let mut de = make_de("<![CDATA[ cdata ]]></tag>");
4923 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4924 match de.next() {
4925 Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4926 assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4927 }
4928 x => panic!(
4929 "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4930 x
4931 ),
4932 }
4933 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4934 }
4935
4936 mod text {
4937 use super::*;
4938 use pretty_assertions::assert_eq;
4939
4940 #[test]
4941 fn start() {
4942 let mut de = make_de("<![CDATA[ cdata ]]> text <tag>");
4943 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text ".into()));
4944 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4945 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4946 }
4947
4948 #[test]
4949 fn end() {
4950 let mut de = make_de("<![CDATA[ cdata ]]> text </tag>");
4951 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text ".into()));
4952 match de.next() {
4953 Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4954 assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4955 }
4956 x => panic!(
4957 "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4958 x
4959 ),
4960 }
4961 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4962 }
4963
4964 // cdata::text::text is equivalent to cdata::text
4965
4966 #[test]
4967 fn cdata() {
4968 let mut de = make_de("<![CDATA[ cdata ]]> text <![CDATA[ cdata2 ]]>");
4969 assert_eq!(
4970 de.next().unwrap(),
4971 DeEvent::Text(" cdata text cdata2 ".into())
4972 );
4973 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4974 }
4975
4976 #[test]
4977 fn eof() {
4978 let mut de = make_de("<![CDATA[ cdata ]]> text ");
4979 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text ".into()));
4980 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4981 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4982 }
4983 }
4984
4985 // The same name is intentional
4986 #[allow(clippy::module_inception)]
4987 mod cdata {
4988 use super::*;
4989 use pretty_assertions::assert_eq;
4990
4991 #[test]
4992 fn start() {
4993 let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]><tag>");
4994 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into()));
4995 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4996 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4997 }
4998
4999 #[test]
5000 fn end() {
5001 let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]></tag>");
5002 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into()));
5003 match de.next() {
5004 Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
5005 assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
5006 }
5007 x => panic!(
5008 "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
5009 x
5010 ),
5011 }
5012 assert_eq!(de.next().unwrap(), DeEvent::Eof);
5013 }
5014
5015 #[test]
5016 fn text() {
5017 let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]> text ");
5018 assert_eq!(
5019 de.next().unwrap(),
5020 DeEvent::Text(" cdata cdata2 text ".into())
5021 );
5022 assert_eq!(de.next().unwrap(), DeEvent::Eof);
5023 }
5024
5025 #[test]
5026 fn cdata() {
5027 let mut de =
5028 make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]><![CDATA[ cdata3 ]]>");
5029 assert_eq!(
5030 de.next().unwrap(),
5031 DeEvent::Text(" cdata cdata2 cdata3 ".into())
5032 );
5033 assert_eq!(de.next().unwrap(), DeEvent::Eof);
5034 }
5035
5036 #[test]
5037 fn eof() {
5038 let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>");
5039 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into()));
5040 assert_eq!(de.next().unwrap(), DeEvent::Eof);
5041 assert_eq!(de.next().unwrap(), DeEvent::Eof);
5042 }
5043 }
5044 }
5045 }
5046}