quick_xml/de/
key.rs

1use crate::de::simple_type::UnitOnly;
2use crate::encoding::Decoder;
3use crate::errors::serialize::DeError;
4use crate::events::BytesStart;
5use crate::name::QName;
6use crate::utils::CowRef;
7use serde::de::{DeserializeSeed, Deserializer, EnumAccess, Visitor};
8use serde::{forward_to_deserialize_any, serde_if_integer128};
9use std::borrow::Cow;
10
11macro_rules! deserialize_num {
12    ($method:ident, $visit:ident) => {
13        fn $method<V>(self, visitor: V) -> Result<V::Value, Self::Error>
14        where
15            V: Visitor<'de>,
16        {
17            match self.name.parse() {
18                Ok(number) => visitor.$visit(number),
19                Err(_) => self.name.deserialize_str(visitor),
20            }
21        }
22    };
23}
24
25/// Decodes raw bytes using the deserializer encoding.
26/// The method will borrow if encoding is UTF-8 compatible and `name` contains
27/// only UTF-8 compatible characters (usually only ASCII characters).
28#[inline]
29fn decode_name<'n>(name: QName<'n>, decoder: Decoder) -> Result<Cow<'n, str>, DeError> {
30    let local = name.local_name();
31    Ok(decoder.decode(local.into_inner())?)
32}
33
34/// A deserializer for xml names of elements and attributes.
35///
36/// Used for deserializing values from:
37/// - attribute names (`<... name="..." ...>`)
38/// - element names (`<name>...</name>`)
39///
40/// Converts a name to an identifier string using the following rules:
41///
42/// - if it is an [`attribute`] name, put `@` in front of the identifier
43/// - if it is a namespace binding (`xmlns` or `xmlns:xxx`) put the decoded name
44///   to the identifier
45/// - if it is an attribute in the `xml` namespace, put the decoded name
46///   to the identifier
47/// - put the decoded [`local_name()`] of a name to the identifier
48///
49/// The final identifier looks like `[@]local_name`, or `@xmlns`, or `@xmlns:binding` or
50/// `xml:attribute` (where `[]` means optional element).
51///
52/// The deserializer also supports deserializing names as other primitive types:
53/// - numbers
54/// - booleans
55/// - unit (`()`) and unit structs
56/// - unit variants of the enumerations
57///
58/// Because `serde` does not define on which side type conversion should be
59/// performed, and because [`Deserialize`] implementation for that primitives
60/// in serde does not accept strings, the deserializer will perform conversion
61/// by itself.
62///
63/// The deserializer is able to deserialize unit and unit structs, but any name
64/// will be converted to the same unit instance. This is asymmetry with a serializer,
65/// which not able to serialize those types, because empty names are impossible
66/// in XML.
67///
68/// `deserialize_any()` returns the same result as `deserialize_identifier()`.
69///
70/// # Lifetimes
71///
72/// - `'i`: lifetime of the data that the deserializer borrows from the parsed input
73/// - `'d`: lifetime of a deserializer that holds a buffer with content of events
74///
75/// [`attribute`]: Self::from_attr
76/// [`local_name()`]: QName::local_name
77/// [`Deserialize`]: serde::Deserialize
78pub struct QNameDeserializer<'i, 'd> {
79    name: CowRef<'i, 'd, str>,
80}
81
82impl<'i, 'd> QNameDeserializer<'i, 'd> {
83    /// Creates deserializer from name of an attribute
84    pub fn from_attr(
85        name: QName<'d>,
86        decoder: Decoder,
87        key_buf: &'d mut String,
88    ) -> Result<Self, DeError> {
89        // https://github.com/tafia/quick-xml/issues/537
90        // Namespace bindings (xmlns:xxx) map to `@xmlns:xxx` instead of `@xxx`
91        if name.as_namespace_binding().is_some() {
92            decoder.decode_into(name.into_inner(), key_buf)?;
93        } else {
94            // https://github.com/tafia/quick-xml/issues/841
95            // we also want to map to the full name for `xml:xxx`, because `xml:xxx` attributes
96            // can apper only in this literal form, as `xml` prefix cannot be redeclared or unbound
97            let (local, prefix_opt) = name.decompose();
98            if prefix_opt.map_or(false, |prefix| prefix.is_xml()) {
99                decoder.decode_into(name.into_inner(), key_buf)?;
100            } else {
101                decoder.decode_into(local.into_inner(), key_buf)?;
102            }
103        };
104
105        Ok(Self {
106            name: CowRef::Slice(key_buf),
107        })
108    }
109
110    /// Creates deserializer from name of an element
111    pub fn from_elem(start: &'d BytesStart<'i>) -> Result<Self, DeError> {
112        let local = match start.buf {
113            Cow::Borrowed(b) => match decode_name(QName(&b[..start.name_len]), start.decoder())? {
114                Cow::Borrowed(borrowed) => CowRef::Input(borrowed),
115                Cow::Owned(owned) => CowRef::Owned(owned),
116            },
117            Cow::Owned(ref o) => match decode_name(QName(&o[..start.name_len]), start.decoder())? {
118                Cow::Borrowed(borrowed) => CowRef::Slice(borrowed),
119                Cow::Owned(owned) => CowRef::Owned(owned),
120            },
121        };
122
123        Ok(Self { name: local })
124    }
125}
126
127impl<'de, 'd> Deserializer<'de> for QNameDeserializer<'de, 'd> {
128    type Error = DeError;
129
130    forward_to_deserialize_any! {
131        char str string
132        bytes byte_buf
133        seq tuple tuple_struct
134        map struct
135        ignored_any
136    }
137
138    /// According to the <https://www.w3.org/TR/xmlschema11-2/#boolean>,
139    /// valid boolean representations are only `"true"`, `"false"`, `"1"`,
140    /// and `"0"`.
141    fn deserialize_bool<V>(self, visitor: V) -> Result<V::Value, Self::Error>
142    where
143        V: Visitor<'de>,
144    {
145        self.name.deserialize_bool(visitor)
146    }
147
148    deserialize_num!(deserialize_i8, visit_i8);
149    deserialize_num!(deserialize_i16, visit_i16);
150    deserialize_num!(deserialize_i32, visit_i32);
151    deserialize_num!(deserialize_i64, visit_i64);
152
153    deserialize_num!(deserialize_u8, visit_u8);
154    deserialize_num!(deserialize_u16, visit_u16);
155    deserialize_num!(deserialize_u32, visit_u32);
156    deserialize_num!(deserialize_u64, visit_u64);
157
158    serde_if_integer128! {
159        deserialize_num!(deserialize_i128, visit_i128);
160        deserialize_num!(deserialize_u128, visit_u128);
161    }
162
163    deserialize_num!(deserialize_f32, visit_f32);
164    deserialize_num!(deserialize_f64, visit_f64);
165
166    /// Calls [`Visitor::visit_unit`]
167    fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, Self::Error>
168    where
169        V: Visitor<'de>,
170    {
171        visitor.visit_unit()
172    }
173
174    /// Forwards deserialization to the [`Self::deserialize_unit`]
175    fn deserialize_unit_struct<V>(
176        self,
177        _name: &'static str,
178        visitor: V,
179    ) -> Result<V::Value, Self::Error>
180    where
181        V: Visitor<'de>,
182    {
183        self.deserialize_unit(visitor)
184    }
185
186    /// Forwards deserialization to the [`Self::deserialize_identifier`]
187    #[inline]
188    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
189    where
190        V: Visitor<'de>,
191    {
192        self.deserialize_identifier(visitor)
193    }
194
195    /// If `name` is an empty string then calls [`Visitor::visit_none`],
196    /// otherwise calls [`Visitor::visit_some`] with itself
197    fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
198    where
199        V: Visitor<'de>,
200    {
201        if self.name.is_empty() {
202            visitor.visit_none()
203        } else {
204            visitor.visit_some(self)
205        }
206    }
207
208    fn deserialize_newtype_struct<V>(
209        self,
210        _name: &'static str,
211        visitor: V,
212    ) -> Result<V::Value, Self::Error>
213    where
214        V: Visitor<'de>,
215    {
216        visitor.visit_newtype_struct(self)
217    }
218
219    /// Calls a [`Visitor::visit_str`] if [`name`] contains only UTF-8
220    /// compatible encoded characters and represents an element name and
221    /// a [`Visitor::visit_string`] in all other cases.
222    ///
223    /// [`name`]: Self::name
224    fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value, Self::Error>
225    where
226        V: Visitor<'de>,
227    {
228        match self.name {
229            CowRef::Input(name) => visitor.visit_borrowed_str(name),
230            CowRef::Slice(name) => visitor.visit_str(name),
231            CowRef::Owned(name) => visitor.visit_string(name),
232        }
233    }
234
235    fn deserialize_enum<V>(
236        self,
237        _name: &str,
238        _variants: &'static [&'static str],
239        visitor: V,
240    ) -> Result<V::Value, Self::Error>
241    where
242        V: Visitor<'de>,
243    {
244        visitor.visit_enum(self)
245    }
246}
247
248impl<'de, 'd> EnumAccess<'de> for QNameDeserializer<'de, 'd> {
249    type Error = DeError;
250    type Variant = UnitOnly;
251
252    fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error>
253    where
254        V: DeserializeSeed<'de>,
255    {
256        let name = seed.deserialize(self)?;
257        Ok((name, UnitOnly))
258    }
259}
260
261////////////////////////////////////////////////////////////////////////////////////////////////////
262
263#[cfg(test)]
264mod tests {
265    use super::*;
266    use crate::se::key::QNameSerializer;
267    use crate::utils::{ByteBuf, Bytes};
268    use pretty_assertions::assert_eq;
269    use serde::de::IgnoredAny;
270    use serde::{Deserialize, Serialize};
271    use std::collections::HashMap;
272
273    #[derive(Debug, Deserialize, Serialize, PartialEq)]
274    struct Unit;
275
276    #[derive(Debug, Deserialize, Serialize, PartialEq)]
277    struct Newtype(String);
278
279    #[derive(Debug, Deserialize, Serialize, PartialEq)]
280    struct Tuple((), ());
281
282    #[derive(Debug, Deserialize, Serialize, PartialEq)]
283    struct Struct {
284        key: String,
285        val: usize,
286    }
287
288    #[derive(Debug, Deserialize, Serialize, PartialEq)]
289    enum Enum {
290        Unit,
291        #[serde(rename = "@Attr")]
292        Attr,
293        Newtype(String),
294        Tuple(String, usize),
295        Struct {
296            key: String,
297            val: usize,
298        },
299    }
300
301    #[derive(Debug, Deserialize, PartialEq)]
302    #[serde(field_identifier)]
303    enum Id {
304        Field,
305    }
306
307    #[derive(Debug, Deserialize)]
308    #[serde(transparent)]
309    struct Any(IgnoredAny);
310    impl PartialEq for Any {
311        fn eq(&self, _other: &Any) -> bool {
312            true
313        }
314    }
315
316    /// Checks that given `$input` successfully deserializing into given `$result`
317    macro_rules! deserialized_to_only {
318        ($name:ident: $type:ty = $input:literal => $result:expr) => {
319            #[test]
320            fn $name() {
321                let de = QNameDeserializer {
322                    name: CowRef::Input($input),
323                };
324                let data: $type = Deserialize::deserialize(de).unwrap();
325
326                assert_eq!(data, $result);
327            }
328        };
329    }
330
331    /// Checks that given `$input` successfully deserializing into given `$result`
332    macro_rules! deserialized_to {
333        ($name:ident: $type:ty = $input:literal => $result:expr) => {
334            #[test]
335            fn $name() {
336                let de = QNameDeserializer {
337                    name: CowRef::Input($input),
338                };
339                let data: $type = Deserialize::deserialize(de).unwrap();
340
341                assert_eq!(data, $result);
342
343                // Roundtrip to ensure that serializer corresponds to deserializer
344                assert_eq!(
345                    data.serialize(QNameSerializer {
346                        writer: String::new()
347                    })
348                    .unwrap(),
349                    $input
350                );
351            }
352        };
353    }
354
355    /// Checks that attempt to deserialize given `$input` as a `$type` results to a
356    /// deserialization error `$kind` with `$reason`
357    macro_rules! err {
358        ($name:ident: $type:ty = $input:literal => $kind:ident($reason:literal)) => {
359            #[test]
360            fn $name() {
361                let de = QNameDeserializer {
362                    name: CowRef::Input($input),
363                };
364                let err = <$type as Deserialize>::deserialize(de).unwrap_err();
365
366                match err {
367                    DeError::$kind(e) => assert_eq!(e, $reason),
368                    _ => panic!(
369                        "Expected `Err({}({}))`, but got `{:?}`",
370                        stringify!($kind),
371                        $reason,
372                        err
373                    ),
374                }
375            }
376        };
377    }
378
379    deserialized_to!(false_: bool = "false" => false);
380    deserialized_to!(true_: bool  = "true" => true);
381
382    deserialized_to!(i8_:  i8  = "-2" => -2);
383    deserialized_to!(i16_: i16 = "-2" => -2);
384    deserialized_to!(i32_: i32 = "-2" => -2);
385    deserialized_to!(i64_: i64 = "-2" => -2);
386
387    deserialized_to!(u8_:  u8  = "3" => 3);
388    deserialized_to!(u16_: u16 = "3" => 3);
389    deserialized_to!(u32_: u32 = "3" => 3);
390    deserialized_to!(u64_: u64 = "3" => 3);
391
392    serde_if_integer128! {
393        deserialized_to!(i128_: i128 = "-2" => -2);
394        deserialized_to!(u128_: u128 = "2" => 2);
395    }
396
397    deserialized_to!(f32_: f32 = "1.23" => 1.23);
398    deserialized_to!(f64_: f64 = "1.23" => 1.23);
399
400    deserialized_to!(char_unescaped: char = "h" => 'h');
401    err!(char_escaped: char = "&lt;"
402        => Custom("invalid value: string \"&lt;\", expected a character"));
403
404    deserialized_to!(string: String = "&lt;escaped&#x20;string" => "&lt;escaped&#x20;string");
405    deserialized_to!(borrowed_str: &str = "name" => "name");
406
407    err!(byte_buf: ByteBuf = "&lt;escaped&#x20;string"
408        => Custom("invalid type: string \"&lt;escaped&#x20;string\", expected byte data"));
409    err!(borrowed_bytes: Bytes = "name"
410        => Custom("invalid type: string \"name\", expected borrowed bytes"));
411
412    deserialized_to!(option_none: Option<String> = "" => None);
413    deserialized_to!(option_some: Option<String> = "name" => Some("name".into()));
414
415    // Unit structs cannot be represented in some meaningful way, but it meaningful
416    // to use them as a placeholder when we want to deserialize _something_
417    deserialized_to_only!(unit: () = "anything" => ());
418    deserialized_to_only!(unit_struct: Unit = "anything" => Unit);
419
420    deserialized_to!(newtype: Newtype = "&lt;escaped&#x20;string" => Newtype("&lt;escaped&#x20;string".into()));
421
422    err!(seq: Vec<()> = "name"
423        => Custom("invalid type: string \"name\", expected a sequence"));
424    err!(tuple: ((), ()) = "name"
425        => Custom("invalid type: string \"name\", expected a tuple of size 2"));
426    err!(tuple_struct: Tuple = "name"
427        => Custom("invalid type: string \"name\", expected tuple struct Tuple"));
428
429    err!(map: HashMap<(), ()> = "name"
430        => Custom("invalid type: string \"name\", expected a map"));
431    err!(struct_: Struct = "name"
432        => Custom("invalid type: string \"name\", expected struct Struct"));
433
434    deserialized_to!(enum_unit: Enum = "Unit" => Enum::Unit);
435    deserialized_to!(enum_unit_for_attr: Enum = "@Attr" => Enum::Attr);
436    err!(enum_newtype: Enum = "Newtype"
437        => Custom("invalid type: unit value, expected a string"));
438    err!(enum_tuple: Enum = "Tuple"
439        => Custom("invalid type: unit value, expected tuple variant Enum::Tuple"));
440    err!(enum_struct: Enum = "Struct"
441        => Custom("invalid type: unit value, expected struct variant Enum::Struct"));
442
443    // Field identifiers cannot be serialized, and IgnoredAny represented _something_
444    // which is not concrete
445    deserialized_to_only!(identifier: Id = "Field" => Id::Field);
446    deserialized_to_only!(ignored_any: Any = "any-name" => Any(IgnoredAny));
447}