Skip to main content

quick_xml/de/
key.rs

1use crate::de::simple_type::UnitOnly;
2use crate::encoding::Decoder;
3use crate::errors::serialize::DeError;
4use crate::events::BytesStart;
5use crate::name::QName;
6use crate::utils::CowRef;
7use serde::de::{DeserializeSeed, Deserializer, EnumAccess, Visitor};
8use serde::forward_to_deserialize_any;
9use std::borrow::Cow;
10
11macro_rules! deserialize_num {
12    ($method:ident, $visit:ident) => {
13        fn $method<V>(self, visitor: V) -> Result<V::Value, Self::Error>
14        where
15            V: Visitor<'de>,
16        {
17            match self.name.parse() {
18                Ok(number) => visitor.$visit(number),
19                Err(_) => self.name.deserialize_str(visitor),
20            }
21        }
22    };
23}
24
25/// Decodes raw bytes using the deserializer encoding.
26/// The method will borrow if encoding is UTF-8 compatible and `name` contains
27/// only UTF-8 compatible characters (usually only ASCII characters).
28#[inline]
29fn decode_name<'n>(name: QName<'n>, decoder: Decoder) -> Result<Cow<'n, str>, DeError> {
30    let local = name.local_name();
31    Ok(decoder.decode(local.into_inner())?)
32}
33
34/// A deserializer for xml names of elements and attributes.
35///
36/// Used for deserializing values from:
37/// - attribute names (`<... name="..." ...>`)
38/// - element names (`<name>...</name>`)
39///
40/// Converts a name to an identifier string using the following rules:
41///
42/// - if it is an [`attribute`] name, put `@` in front of the identifier
43/// - if it is a namespace binding (`xmlns` or `xmlns:xxx`) put the decoded name
44///   to the identifier
45/// - if it is an attribute in the `xml` namespace, put the decoded name
46///   to the identifier
47/// - put the decoded [`local_name()`] of a name to the identifier
48///
49/// The final identifier looks like `[@]local_name`, or `@xmlns`, or `@xmlns:binding` or
50/// `xml:attribute` (where `[]` means optional element).
51///
52/// The deserializer also supports deserializing names as other primitive types:
53/// - numbers
54/// - booleans
55/// - unit (`()`) and unit structs
56/// - unit variants of the enumerations
57///
58/// Because `serde` does not define on which side type conversion should be
59/// performed, and because [`Deserialize`] implementation for that primitives
60/// in serde does not accept strings, the deserializer will perform conversion
61/// by itself.
62///
63/// The deserializer is able to deserialize unit and unit structs, but any name
64/// will be converted to the same unit instance. This is asymmetry with a serializer,
65/// which not able to serialize those types, because empty names are impossible
66/// in XML.
67///
68/// `deserialize_any()` returns the same result as `deserialize_identifier()`.
69///
70/// # Lifetimes
71///
72/// - `'i`: lifetime of the data that the deserializer borrows from the parsed input
73/// - `'d`: lifetime of a deserializer that holds a buffer with content of events
74///
75/// [`attribute`]: Self::from_attr
76/// [`local_name()`]: QName::local_name
77/// [`Deserialize`]: serde::Deserialize
78pub struct QNameDeserializer<'i, 'd> {
79    name: CowRef<'i, 'd, str>,
80}
81
82impl<'i, 'd> QNameDeserializer<'i, 'd> {
83    /// Creates deserializer from name of an attribute
84    pub fn from_attr(
85        name: QName<'d>,
86        decoder: Decoder,
87        key_buf: &'d mut String,
88    ) -> Result<Self, DeError> {
89        // https://github.com/tafia/quick-xml/issues/537
90        // Namespace bindings (xmlns:xxx) map to `@xmlns:xxx` instead of `@xxx`
91        if name.as_namespace_binding().is_some() {
92            decoder.decode_into(name.into_inner(), key_buf)?;
93        } else {
94            // https://github.com/tafia/quick-xml/issues/841
95            // we also want to map to the full name for `xml:xxx`, because `xml:xxx` attributes
96            // can apper only in this literal form, as `xml` prefix cannot be redeclared or unbound
97            let (local, prefix_opt) = name.decompose();
98            if prefix_opt.map_or(false, |prefix| prefix.is_xml()) {
99                decoder.decode_into(name.into_inner(), key_buf)?;
100            } else {
101                decoder.decode_into(local.into_inner(), key_buf)?;
102            }
103        };
104
105        Ok(Self {
106            name: CowRef::Slice(key_buf),
107        })
108    }
109
110    /// Creates deserializer from name of an element
111    pub fn from_elem(start: &'d BytesStart<'i>) -> Result<Self, DeError> {
112        let local = match start.buf {
113            Cow::Borrowed(b) => match decode_name(QName(&b[..start.name_len]), start.decoder())? {
114                Cow::Borrowed(borrowed) => CowRef::Input(borrowed),
115                Cow::Owned(owned) => CowRef::Owned(owned),
116            },
117            Cow::Owned(ref o) => match decode_name(QName(&o[..start.name_len]), start.decoder())? {
118                Cow::Borrowed(borrowed) => CowRef::Slice(borrowed),
119                Cow::Owned(owned) => CowRef::Owned(owned),
120            },
121        };
122
123        Ok(Self { name: local })
124    }
125}
126
127impl<'de, 'd> Deserializer<'de> for QNameDeserializer<'de, 'd> {
128    type Error = DeError;
129
130    forward_to_deserialize_any! {
131        char str string
132        bytes byte_buf
133        seq tuple tuple_struct
134        map struct
135        ignored_any
136    }
137
138    /// According to the <https://www.w3.org/TR/xmlschema11-2/#boolean>,
139    /// valid boolean representations are only `"true"`, `"false"`, `"1"`,
140    /// and `"0"`.
141    fn deserialize_bool<V>(self, visitor: V) -> Result<V::Value, Self::Error>
142    where
143        V: Visitor<'de>,
144    {
145        self.name.deserialize_bool(visitor)
146    }
147
148    deserialize_num!(deserialize_i8, visit_i8);
149    deserialize_num!(deserialize_i16, visit_i16);
150    deserialize_num!(deserialize_i32, visit_i32);
151    deserialize_num!(deserialize_i64, visit_i64);
152
153    deserialize_num!(deserialize_u8, visit_u8);
154    deserialize_num!(deserialize_u16, visit_u16);
155    deserialize_num!(deserialize_u32, visit_u32);
156    deserialize_num!(deserialize_u64, visit_u64);
157
158    deserialize_num!(deserialize_i128, visit_i128);
159    deserialize_num!(deserialize_u128, visit_u128);
160
161    deserialize_num!(deserialize_f32, visit_f32);
162    deserialize_num!(deserialize_f64, visit_f64);
163
164    /// Calls [`Visitor::visit_unit`]
165    fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, Self::Error>
166    where
167        V: Visitor<'de>,
168    {
169        visitor.visit_unit()
170    }
171
172    /// Forwards deserialization to the [`Self::deserialize_unit`]
173    fn deserialize_unit_struct<V>(
174        self,
175        _name: &'static str,
176        visitor: V,
177    ) -> Result<V::Value, Self::Error>
178    where
179        V: Visitor<'de>,
180    {
181        self.deserialize_unit(visitor)
182    }
183
184    /// Forwards deserialization to the [`Self::deserialize_identifier`]
185    #[inline]
186    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
187    where
188        V: Visitor<'de>,
189    {
190        self.deserialize_identifier(visitor)
191    }
192
193    /// If `name` is an empty string then calls [`Visitor::visit_none`],
194    /// otherwise calls [`Visitor::visit_some`] with itself
195    fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
196    where
197        V: Visitor<'de>,
198    {
199        if self.name.is_empty() {
200            visitor.visit_none()
201        } else {
202            visitor.visit_some(self)
203        }
204    }
205
206    fn deserialize_newtype_struct<V>(
207        self,
208        _name: &'static str,
209        visitor: V,
210    ) -> Result<V::Value, Self::Error>
211    where
212        V: Visitor<'de>,
213    {
214        visitor.visit_newtype_struct(self)
215    }
216
217    /// Calls a [`Visitor::visit_str`] if [`name`] contains only UTF-8
218    /// compatible encoded characters and represents an element name and
219    /// a [`Visitor::visit_string`] in all other cases.
220    ///
221    /// [`name`]: Self::name
222    fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value, Self::Error>
223    where
224        V: Visitor<'de>,
225    {
226        match self.name {
227            CowRef::Input(name) => visitor.visit_borrowed_str(name),
228            CowRef::Slice(name) => visitor.visit_str(name),
229            CowRef::Owned(name) => visitor.visit_string(name),
230        }
231    }
232
233    fn deserialize_enum<V>(
234        self,
235        _name: &str,
236        _variants: &'static [&'static str],
237        visitor: V,
238    ) -> Result<V::Value, Self::Error>
239    where
240        V: Visitor<'de>,
241    {
242        visitor.visit_enum(self)
243    }
244}
245
246impl<'de, 'd> EnumAccess<'de> for QNameDeserializer<'de, 'd> {
247    type Error = DeError;
248    type Variant = UnitOnly;
249
250    fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error>
251    where
252        V: DeserializeSeed<'de>,
253    {
254        let name = seed.deserialize(self)?;
255        Ok((name, UnitOnly))
256    }
257}
258
259////////////////////////////////////////////////////////////////////////////////////////////////////
260
261#[cfg(test)]
262mod tests {
263    use super::*;
264    use crate::se::key::QNameSerializer;
265    use crate::utils::{ByteBuf, Bytes};
266    use pretty_assertions::assert_eq;
267    use serde::de::IgnoredAny;
268    use serde::{Deserialize, Serialize};
269    use std::collections::HashMap;
270
271    #[derive(Debug, Deserialize, Serialize, PartialEq)]
272    struct Unit;
273
274    #[derive(Debug, Deserialize, Serialize, PartialEq)]
275    struct Newtype(String);
276
277    #[derive(Debug, Deserialize, Serialize, PartialEq)]
278    struct Tuple((), ());
279
280    #[derive(Debug, Deserialize, Serialize, PartialEq)]
281    struct Struct {
282        key: String,
283        val: usize,
284    }
285
286    #[derive(Debug, Deserialize, Serialize, PartialEq)]
287    enum Enum {
288        Unit,
289        #[serde(rename = "@Attr")]
290        Attr,
291        Newtype(String),
292        Tuple(String, usize),
293        Struct {
294            key: String,
295            val: usize,
296        },
297    }
298
299    #[derive(Debug, Deserialize, PartialEq)]
300    #[serde(field_identifier)]
301    enum Id {
302        Field,
303    }
304
305    #[derive(Debug, Deserialize)]
306    #[serde(transparent)]
307    struct Any(IgnoredAny);
308    impl PartialEq for Any {
309        fn eq(&self, _other: &Any) -> bool {
310            true
311        }
312    }
313
314    /// Checks that given `$input` successfully deserializing into given `$result`
315    macro_rules! deserialized_to_only {
316        ($name:ident: $type:ty = $input:literal => $result:expr) => {
317            #[test]
318            fn $name() {
319                let de = QNameDeserializer {
320                    name: CowRef::Input($input),
321                };
322                let data: $type = Deserialize::deserialize(de).unwrap();
323
324                assert_eq!(data, $result);
325            }
326        };
327    }
328
329    /// Checks that given `$input` successfully deserializing into given `$result`
330    macro_rules! deserialized_to {
331        ($name:ident: $type:ty = $input:literal => $result:expr) => {
332            #[test]
333            fn $name() {
334                let de = QNameDeserializer {
335                    name: CowRef::Input($input),
336                };
337                let data: $type = Deserialize::deserialize(de).unwrap();
338
339                assert_eq!(data, $result);
340
341                // Roundtrip to ensure that serializer corresponds to deserializer
342                assert_eq!(
343                    data.serialize(QNameSerializer {
344                        writer: String::new()
345                    })
346                    .unwrap(),
347                    $input
348                );
349            }
350        };
351    }
352
353    /// Checks that attempt to deserialize given `$input` as a `$type` results to a
354    /// deserialization error `$kind` with `$reason`
355    macro_rules! err {
356        ($name:ident: $type:ty = $input:literal => $kind:ident($reason:literal)) => {
357            #[test]
358            fn $name() {
359                let de = QNameDeserializer {
360                    name: CowRef::Input($input),
361                };
362                let err = <$type as Deserialize>::deserialize(de).unwrap_err();
363
364                match err {
365                    DeError::$kind(e) => assert_eq!(e, $reason),
366                    _ => panic!(
367                        "Expected `Err({}({}))`, but got `{:?}`",
368                        stringify!($kind),
369                        $reason,
370                        err
371                    ),
372                }
373            }
374        };
375    }
376
377    deserialized_to!(false_: bool = "false" => false);
378    deserialized_to!(true_: bool  = "true" => true);
379
380    deserialized_to!(i8_:  i8  = "-2" => -2);
381    deserialized_to!(i16_: i16 = "-2" => -2);
382    deserialized_to!(i32_: i32 = "-2" => -2);
383    deserialized_to!(i64_: i64 = "-2" => -2);
384
385    deserialized_to!(u8_:  u8  = "3" => 3);
386    deserialized_to!(u16_: u16 = "3" => 3);
387    deserialized_to!(u32_: u32 = "3" => 3);
388    deserialized_to!(u64_: u64 = "3" => 3);
389
390    deserialized_to!(i128_: i128 = "-2" => -2);
391    deserialized_to!(u128_: u128 = "2" => 2);
392
393    deserialized_to!(f32_: f32 = "1.23" => 1.23);
394    deserialized_to!(f64_: f64 = "1.23" => 1.23);
395
396    deserialized_to!(char_unescaped: char = "h" => 'h');
397    err!(char_escaped: char = "&lt;"
398        => Custom("invalid value: string \"&lt;\", expected a character"));
399
400    deserialized_to!(string: String = "&lt;escaped&#x20;string" => "&lt;escaped&#x20;string");
401    deserialized_to!(borrowed_str: &str = "name" => "name");
402
403    err!(byte_buf: ByteBuf = "&lt;escaped&#x20;string"
404        => Custom("invalid type: string \"&lt;escaped&#x20;string\", expected byte data"));
405    err!(borrowed_bytes: Bytes = "name"
406        => Custom("invalid type: string \"name\", expected borrowed bytes"));
407
408    deserialized_to!(option_none: Option<String> = "" => None);
409    deserialized_to!(option_some: Option<String> = "name" => Some("name".into()));
410
411    // Unit structs cannot be represented in some meaningful way, but it meaningful
412    // to use them as a placeholder when we want to deserialize _something_
413    deserialized_to_only!(unit: () = "anything" => ());
414    deserialized_to_only!(unit_struct: Unit = "anything" => Unit);
415
416    deserialized_to!(newtype: Newtype = "&lt;escaped&#x20;string" => Newtype("&lt;escaped&#x20;string".into()));
417
418    err!(seq: Vec<()> = "name"
419        => Custom("invalid type: string \"name\", expected a sequence"));
420    err!(tuple: ((), ()) = "name"
421        => Custom("invalid type: string \"name\", expected a tuple of size 2"));
422    err!(tuple_struct: Tuple = "name"
423        => Custom("invalid type: string \"name\", expected tuple struct Tuple"));
424
425    err!(map: HashMap<(), ()> = "name"
426        => Custom("invalid type: string \"name\", expected a map"));
427    err!(struct_: Struct = "name"
428        => Custom("invalid type: string \"name\", expected struct Struct"));
429
430    deserialized_to!(enum_unit: Enum = "Unit" => Enum::Unit);
431    deserialized_to!(enum_unit_for_attr: Enum = "@Attr" => Enum::Attr);
432    err!(enum_newtype: Enum = "Newtype"
433        => Custom("invalid type: unit value, expected a string"));
434    err!(enum_tuple: Enum = "Tuple"
435        => Custom("invalid type: unit value, expected tuple variant Enum::Tuple"));
436    err!(enum_struct: Enum = "Struct"
437        => Custom("invalid type: unit value, expected struct variant Enum::Struct"));
438
439    // Field identifiers cannot be serialized, and IgnoredAny represented _something_
440    // which is not concrete
441    deserialized_to_only!(identifier: Id = "Field" => Id::Field);
442    deserialized_to_only!(ignored_any: Any = "any-name" => Any(IgnoredAny));
443}