xsd_parser/quick_xml/
deserialize.rs

1use std::borrow::Cow;
2use std::fmt::Debug;
3use std::marker::PhantomData;
4use std::str::{from_utf8, FromStr};
5
6use quick_xml::{
7    events::{attributes::Attribute, BytesStart, Event},
8    name::{Namespace, QName, ResolveResult},
9};
10use thiserror::Error;
11
12use super::{Error, ErrorKind, RawByteStr, XmlReader, XmlReaderSync};
13
14/// Trait that defines the [`Deserializer`] for a type.
15pub trait WithDeserializer: Sized {
16    /// The deserializer to use for this type.
17    type Deserializer: for<'de> Deserializer<'de, Self>;
18}
19
20impl<X> WithDeserializer for X
21where
22    X: DeserializeBytes + Debug,
23{
24    type Deserializer = ContentDeserializer<X>;
25}
26
27/// Trait that defines a deserializer that can be used to construct a type from a
28/// XML [`Event`]s.
29pub trait Deserializer<'de, T>: Debug + Sized
30where
31    T: WithDeserializer<Deserializer = Self>,
32{
33    /// Initializes a new deserializer from the passed `reader` and the initial `event`.
34    ///
35    /// # Errors
36    ///
37    /// Returns an [`struct@Error`] if the initialization of the deserializer failed.
38    fn init<R>(reader: &R, event: Event<'de>) -> DeserializerResult<'de, T>
39    where
40        R: XmlReader;
41
42    /// Processes the next XML [`Event`].
43    ///
44    /// # Errors
45    ///
46    /// Returns an [`struct@Error`] if processing the event failed.
47    fn next<R>(self, reader: &R, event: Event<'de>) -> DeserializerResult<'de, T>
48    where
49        R: XmlReader;
50
51    /// Force the deserializer to finish.
52    ///
53    /// # Errors
54    ///
55    /// Returns an [`struct@Error`] if the deserializer could not finish.
56    fn finish<R>(self, reader: &R) -> Result<T, Error>
57    where
58        R: XmlReader;
59}
60
61/// Result type returned by the [`Deserializer`] trait.
62pub type DeserializerResult<'a, T> = Result<DeserializerOutput<'a, T>, Error>;
63
64/// Controls the flow of the deserializer
65#[derive(Debug)]
66pub enum ElementHandlerOutput<'a> {
67    /// Continue with the deserialization
68    Continue {
69        /// Event to continue the deserialization process with.
70        event: Event<'a>,
71
72        /// Wether if any element is allowed for the current deserializer.
73        allow_any: bool,
74    },
75
76    /// Break the deserialization
77    Break {
78        /// Instructions how to deal with a maybe unhandled event
79        /// returned by the child deserializer .
80        event: DeserializerEvent<'a>,
81
82        /// Wether if any element is allowed for the current deserializer.
83        allow_any: bool,
84    },
85}
86
87impl<'a> ElementHandlerOutput<'a> {
88    /// Create a [`Continue`](Self::Continue) instance.
89    #[must_use]
90    pub fn continue_(event: Event<'a>, allow_any: bool) -> Self {
91        Self::Continue { event, allow_any }
92    }
93
94    /// Create a [`Break`](Self::Break) instance.
95    #[must_use]
96    pub fn break_(event: DeserializerEvent<'a>, allow_any: bool) -> Self {
97        Self::Break { event, allow_any }
98    }
99
100    /// Create a [`Break`](Self::Break) instance that will return the passed
101    /// `event` to the parent deserializers for further processing.
102    #[must_use]
103    pub fn return_to_parent(event: Event<'a>, allow_any: bool) -> Self {
104        Self::break_(DeserializerEvent::Continue(event), allow_any)
105    }
106
107    /// Create a [`Break`](Self::Break) instance that will return the passed
108    /// `event` to root of the deserialization process.
109    #[must_use]
110    pub fn return_to_root(event: Event<'a>, allow_any: bool) -> Self {
111        Self::break_(DeserializerEvent::Break(event), allow_any)
112    }
113
114    /// Create a [`Continue`](Self::Continue) instance if the passed `event` is
115    /// a `Continue(Start)`, `Continue(Empty)`, or `Continue(End)`,
116    /// a [`Break`](Self::Break) instance otherwise.
117    #[must_use]
118    pub fn from_event(event: DeserializerEvent<'a>, allow_any: bool) -> Self {
119        match event {
120            DeserializerEvent::Continue(
121                event @ (Event::Start(_) | Event::Empty(_) | Event::End(_)),
122            ) => Self::continue_(event, allow_any),
123            event => Self::break_(event, allow_any),
124        }
125    }
126
127    /// Create a [`Continue`](Self::Continue) instance if the passed `event` is
128    /// a `Continue(End)`, a [`Break`](Self::Break) instance otherwise.
129    #[must_use]
130    pub fn from_event_end(event: DeserializerEvent<'a>, allow_any: bool) -> Self {
131        match event {
132            DeserializerEvent::Continue(event @ Event::End(_)) => Self::continue_(event, allow_any),
133            DeserializerEvent::Continue(event) => {
134                Self::break_(DeserializerEvent::Break(event), allow_any)
135            }
136            event => Self::break_(event, allow_any),
137        }
138    }
139}
140
141/// Type that is used to bundle the output of a [`Deserializer`] operation.
142#[derive(Debug)]
143pub struct DeserializerOutput<'a, T>
144where
145    T: WithDeserializer,
146{
147    /// Artifact produced by the deserializer.
148    pub artifact: DeserializerArtifact<T>,
149
150    /// Contains the processed event if it was not consumed by the deserializer.
151    pub event: DeserializerEvent<'a>,
152
153    /// Whether the deserializer allows other XML elements in the current state or not.
154    /// If this is set to `true` and the `event` is not consumed, the event should
155    /// be skipped. For [`Event::Start`] this would mean to skip the whole element
156    /// until the corresponding [`Event::End`] is received.
157    pub allow_any: bool,
158}
159
160/// Artifact that is returned by a [`Deserializer`].
161///
162/// This contains either the deserialized data or the deserializer itself.
163#[derive(Debug)]
164pub enum DeserializerArtifact<T>
165where
166    T: WithDeserializer,
167{
168    /// Is returned if the deserialization process is finished and not data was produced.
169    None,
170
171    /// Contains the actual type constructed by the deserializer, once the deserializer has
172    /// finished it's construction.
173    Data(T),
174
175    /// Contains the deserializer after an operation on the deserializer has been executed.
176    /// This will be returned if the deserialization of the type is not finished yet.
177    Deserializer(T::Deserializer),
178}
179
180impl<T> DeserializerArtifact<T>
181where
182    T: WithDeserializer,
183{
184    /// Check if this is a [`DeserializerArtifact::None`].
185    pub fn is_none(&self) -> bool {
186        matches!(self, Self::None)
187    }
188
189    /// Create a new [`DeserializerArtifact`] instance from the passed `data`.
190    ///
191    /// If `data` is `Some` a [`DeserializerArtifact::Data`] is created. If it
192    /// is a `None` a [`DeserializerArtifact::None`] is crated.
193    pub fn from_data(data: Option<T>) -> Self {
194        if let Some(data) = data {
195            Self::Data(data)
196        } else {
197            Self::None
198        }
199    }
200
201    /// Create a new [`DeserializerArtifact`] instance from the passed `deserializer`.
202    ///
203    /// If `data` is `Some` a [`DeserializerArtifact::Deserializer`] is created.
204    /// If it is a `None` a [`DeserializerArtifact::None`] is crated.
205    pub fn from_deserializer(deserializer: Option<T::Deserializer>) -> Self {
206        if let Some(deserializer) = deserializer {
207            Self::Deserializer(deserializer)
208        } else {
209            Self::None
210        }
211    }
212
213    /// Split the deserializer artifact into two options.
214    /// One for the data and one for the deserializer.
215    #[inline]
216    pub fn into_parts(self) -> (Option<T>, Option<T::Deserializer>) {
217        match self {
218            Self::None => (None, None),
219            Self::Data(data) => (Some(data), None),
220            Self::Deserializer(deserializer) => (None, Some(deserializer)),
221        }
222    }
223
224    /// Maps the data or the deserializer to new types using the passed mappers.
225    #[inline]
226    pub fn map<F, G, X>(self, data_mapper: F, deserializer_mapper: G) -> DeserializerArtifact<X>
227    where
228        X: WithDeserializer,
229        F: FnOnce(T) -> X,
230        G: FnOnce(T::Deserializer) -> X::Deserializer,
231    {
232        match self {
233            Self::None => DeserializerArtifact::None,
234            Self::Data(data) => DeserializerArtifact::Data(data_mapper(data)),
235            Self::Deserializer(deserializer) => {
236                DeserializerArtifact::Deserializer(deserializer_mapper(deserializer))
237            }
238        }
239    }
240}
241
242/// Indicates what to do with a event returned by a deserializer
243#[derive(Debug)]
244pub enum DeserializerEvent<'a> {
245    /// The event was consumed by the deserializer, nothing to handle here.
246    None,
247
248    /// The event is handled and should be returned to the deserialization root
249    /// for additional evaluation.
250    Break(Event<'a>),
251
252    /// The event was not consumed by the deserializer an may be processed again
253    /// by it's any of it's parents.
254    Continue(Event<'a>),
255}
256
257impl<'a> DeserializerEvent<'a> {
258    /// Extract the event as `Option`.
259    #[must_use]
260    pub fn into_event(self) -> Option<Event<'a>> {
261        match self {
262            Self::None => None,
263            Self::Break(event) | Self::Continue(event) => Some(event),
264        }
265    }
266}
267
268/// Trait that could be implemented by types to support deserialization from XML
269/// using the [`quick_xml`] crate.
270pub trait DeserializeSync<'de, R>: Sized
271where
272    R: XmlReaderSync<'de>,
273{
274    /// Error that is returned by the `deserialize` method.
275    type Error;
276
277    /// Deserialize the type from the passed `reader`.
278    ///
279    /// # Errors
280    ///
281    /// Will return a suitable error if the operation failed.
282    fn deserialize(reader: &mut R) -> Result<Self, Self::Error>;
283}
284
285impl<'de, R, X> DeserializeSync<'de, R> for X
286where
287    R: XmlReaderSync<'de>,
288    X: WithDeserializer,
289{
290    type Error = Error;
291
292    fn deserialize(reader: &mut R) -> Result<Self, Self::Error> {
293        DeserializeHelper::new(reader).deserialize_sync()
294    }
295}
296
297/// Trait that could be implemented by types to support asynchronous
298/// deserialization from XML using the [`quick_xml`] crate.
299#[cfg(feature = "async")]
300pub trait DeserializeAsync<'de, R>: Sized
301where
302    R: super::XmlReaderAsync<'de>,
303{
304    /// Future that is returned by the [`deserialize_async`] method.
305    type Future<'x>: std::future::Future<Output = Result<Self, Self::Error>>
306    where
307        R: 'x,
308        'de: 'x;
309
310    /// Error that is returned by the future generated by the [`deserialize_async`] method.
311    type Error;
312
313    /// Asynchronously deserializes the type from the passed `reader`.
314    fn deserialize_async<'x>(reader: &'x mut R) -> Self::Future<'x>
315    where
316        'de: 'x;
317}
318
319#[cfg(feature = "async")]
320impl<'de, R, X> DeserializeAsync<'de, R> for X
321where
322    R: super::XmlReaderAsync<'de>,
323    X: WithDeserializer,
324{
325    type Future<'x>
326        = std::pin::Pin<Box<dyn std::future::Future<Output = Result<Self, Self::Error>> + 'x>>
327    where
328        R: 'x,
329        'de: 'x;
330
331    type Error = Error;
332
333    fn deserialize_async<'x>(reader: &'x mut R) -> Self::Future<'x>
334    where
335        'de: 'x,
336    {
337        Box::pin(async move { DeserializeHelper::new(reader).deserialize_async().await })
338    }
339}
340
341/// Trait that could be implemented by types to support deserialization from
342/// XML byte streams using the [`quick_xml`] crate.
343///
344/// This is usually implemented for simple types like numbers, strings or enums.
345pub trait DeserializeBytes: Sized {
346    /// Try to deserialize the type from bytes.
347    ///
348    /// This is used to deserialize the type from attributes or raw element
349    /// content.
350    ///
351    /// # Errors
352    ///
353    /// Returns a suitable [`struct@Error`] if the deserialization was not successful.
354    fn deserialize_bytes<R: XmlReader>(reader: &R, bytes: &[u8]) -> Result<Self, Error>;
355}
356
357/// Error that is raised by the [`DeserializeBytes`] trait if the type implements
358/// [`FromStr`], but the conversion from the string has failed.
359#[derive(Debug, Error)]
360#[error("Unable to deserialize value from string (value = {value}, error = {error})")]
361pub struct DeserializeStrError<E> {
362    /// Value that could not be parsed.
363    pub value: String,
364
365    /// Error forwarded from [`FromStr`].
366    pub error: E,
367}
368
369impl<X> DeserializeBytes for X
370where
371    X: FromStr,
372    X::Err: std::error::Error + Send + Sync + 'static,
373{
374    fn deserialize_bytes<R: XmlReader>(reader: &R, bytes: &[u8]) -> Result<Self, Error> {
375        let _reader = reader;
376        let s = from_utf8(bytes).map_err(Error::from)?;
377
378        X::from_str(s).map_err(|error| {
379            Error::custom(DeserializeStrError {
380                value: s.into(),
381                error,
382            })
383        })
384    }
385}
386
387/// Implements a [`Deserializer`] for any type that implements [`DeserializeBytes`].
388#[derive(Debug)]
389pub struct ContentDeserializer<T> {
390    data: Vec<u8>,
391    marker: PhantomData<T>,
392}
393
394impl<'de, T> Deserializer<'de, T> for ContentDeserializer<T>
395where
396    T: DeserializeBytes + Debug,
397{
398    fn init<R>(reader: &R, event: Event<'de>) -> DeserializerResult<'de, T>
399    where
400        R: XmlReader,
401    {
402        match event {
403            Event::Start(_) => Ok(DeserializerOutput {
404                artifact: DeserializerArtifact::Deserializer(Self {
405                    data: Vec::new(),
406                    marker: PhantomData,
407                }),
408                event: DeserializerEvent::None,
409                allow_any: false,
410            }),
411            Event::Empty(_) => {
412                let data = T::deserialize_bytes(reader, &[])?;
413
414                Ok(DeserializerOutput {
415                    artifact: DeserializerArtifact::Data(data),
416                    event: DeserializerEvent::None,
417                    allow_any: false,
418                })
419            }
420            event => Ok(DeserializerOutput {
421                artifact: DeserializerArtifact::None,
422                event: DeserializerEvent::Continue(event),
423                allow_any: false,
424            }),
425        }
426    }
427
428    fn next<R>(mut self, reader: &R, event: Event<'de>) -> DeserializerResult<'de, T>
429    where
430        R: XmlReader,
431    {
432        match event {
433            Event::Text(x) => {
434                self.data.extend_from_slice(&x.into_inner());
435
436                Ok(DeserializerOutput {
437                    artifact: DeserializerArtifact::Deserializer(self),
438                    event: DeserializerEvent::None,
439                    allow_any: false,
440                })
441            }
442            Event::End(_) => {
443                let data = self.finish(reader)?;
444
445                Ok(DeserializerOutput {
446                    artifact: DeserializerArtifact::Data(data),
447                    event: DeserializerEvent::None,
448                    allow_any: false,
449                })
450            }
451            event => Ok(DeserializerOutput {
452                artifact: DeserializerArtifact::Deserializer(self),
453                event: DeserializerEvent::Break(event),
454                allow_any: false,
455            }),
456        }
457    }
458
459    fn finish<R>(self, reader: &R) -> Result<T, Error>
460    where
461        R: XmlReader,
462    {
463        T::deserialize_bytes(reader, self.data[..].trim_ascii())
464    }
465}
466
467/* DeserializeReader */
468
469/// Reader trait with additional helper methods for deserializing.
470pub trait DeserializeReader: XmlReader {
471    /// Helper function to convert and store an attribute from the XML event.
472    ///
473    /// # Errors
474    ///
475    /// Returns an [`struct@Error`] with [`ErrorKind::DuplicateAttribute`] if `store`
476    /// already contained a value.
477    fn read_attrib<T>(
478        &self,
479        store: &mut Option<T>,
480        name: &'static [u8],
481        value: &[u8],
482    ) -> Result<(), Error>
483    where
484        T: DeserializeBytes,
485    {
486        if store.is_some() {
487            self.err(ErrorKind::DuplicateAttribute(RawByteStr::from(name)))?;
488        }
489
490        let value = self.map_result(T::deserialize_bytes(self, value))?;
491        *store = Some(value);
492
493        Ok(())
494    }
495
496    /// Raise the [`UnexpectedAttribute`](ErrorKind::UnexpectedAttribute) error
497    /// for the passed `attrib`.
498    ///
499    /// # Errors
500    ///
501    /// Will always return the [`UnexpectedAttribute`](ErrorKind::UnexpectedAttribute)
502    /// error.
503    fn raise_unexpected_attrib(&self, attrib: Attribute<'_>) -> Result<(), Error> {
504        self.err(ErrorKind::UnexpectedAttribute(RawByteStr::from_slice(
505            attrib.key.into_inner(),
506        )))
507    }
508
509    /// Try to resolve the local name of the passed qname and the expected namespace.
510    ///
511    /// Checks if the passed [`QName`] `name` matches the expected namespace `ns`
512    /// and returns the local name of it. If `name` does not have a namespace prefix
513    /// to resolve, the local name is just returned as is.
514    fn resolve_local_name<'a>(&self, name: QName<'a>, ns: &[u8]) -> Option<&'a [u8]> {
515        match self.resolve(name, true) {
516            (ResolveResult::Unbound, local) => Some(local.into_inner()),
517            (ResolveResult::Bound(x), local) if x.0 == ns => Some(local.into_inner()),
518            (_, _) => None,
519        }
520    }
521
522    /// Try to extract the resolved tag name of either a [`Start`](Event::Start) or a
523    /// [`Empty`](Event::Empty) event.
524    fn check_start_tag_name(&self, event: &Event<'_>, ns: Option<&[u8]>, name: &[u8]) -> bool {
525        let (Event::Start(x) | Event::Empty(x)) = event else {
526            return false;
527        };
528
529        if let Some(ns) = ns {
530            matches!(self.resolve_local_name(x.name(), ns), Some(x) if x == name)
531        } else {
532            x.name().local_name().as_ref() == name
533        }
534    }
535
536    /// Try to extract the type name of a dynamic type from the passed event.
537    ///
538    /// This method will try to extract the name of a dynamic type from
539    /// [`Event::Start`] or [`Event::Empty`] by either using the explicit set name
540    /// in the `type` attribute or by using the name of the xml tag.
541    ///
542    /// # Errors
543    ///
544    /// Raise an error if the attributes of the tag could not be resolved.
545    fn get_dynamic_type_name<'a>(
546        &self,
547        event: &'a Event<'_>,
548    ) -> Result<Option<Cow<'a, [u8]>>, Error> {
549        let (Event::Start(b) | Event::Empty(b)) = &event else {
550            return Ok(None);
551        };
552
553        let attrib = b
554            .attributes()
555            .find(|attrib| {
556                let Ok(attrib) = attrib else { return false };
557                let (resolve, name) = self.resolve(attrib.key, true);
558                matches!(
559                    resolve,
560                    ResolveResult::Unbound
561                        | ResolveResult::Bound(Namespace(
562                            b"http://www.w3.org/2001/XMLSchema-instance"
563                        ))
564                ) && name.as_ref() == b"type"
565            })
566            .transpose()?;
567
568        let name = attrib.map_or_else(|| Cow::Borrowed(b.name().0), |attrib| attrib.value);
569
570        Ok(Some(name))
571    }
572
573    /// Initializes a deserializer from the passed `event`.
574    ///
575    /// If the event is [`Start`](Event::Start) or [`Empty`](Event::Empty), the passed
576    /// function `f` is called with the [`BytesStart`] from the event to initialize the actual
577    /// deserializer.
578    ///
579    /// # Errors
580    ///
581    /// Forwards the errors from raised by `f`.
582    fn init_deserializer_from_start_event<'a, T, F>(
583        &self,
584        event: Event<'a>,
585        f: F,
586    ) -> Result<DeserializerOutput<'a, T>, Error>
587    where
588        T: WithDeserializer,
589        F: FnOnce(&Self, &BytesStart<'a>) -> Result<<T as WithDeserializer>::Deserializer, Error>,
590    {
591        match event {
592            Event::Start(start) => {
593                let deserializer = f(self, &start)?;
594
595                Ok(DeserializerOutput {
596                    artifact: DeserializerArtifact::Deserializer(deserializer),
597                    event: DeserializerEvent::None,
598                    allow_any: false,
599                })
600            }
601            Event::Empty(start) => {
602                let deserializer = f(self, &start)?;
603                let data = deserializer.finish(self)?;
604
605                Ok(DeserializerOutput {
606                    artifact: DeserializerArtifact::Data(data),
607                    event: DeserializerEvent::None,
608                    allow_any: false,
609                })
610            }
611            event => Ok(DeserializerOutput {
612                artifact: DeserializerArtifact::None,
613                event: DeserializerEvent::Continue(event),
614                allow_any: false,
615            }),
616        }
617    }
618}
619
620impl<X> DeserializeReader for X where X: XmlReader {}
621
622/* DeserializeHelper */
623
624struct DeserializeHelper<'a, 'de, T, R>
625where
626    T: WithDeserializer,
627{
628    reader: &'a mut R,
629    deserializer: Option<T::Deserializer>,
630    skip_depth: Option<usize>,
631    marker: PhantomData<&'de ()>,
632}
633
634impl<'a, 'de, T, R> DeserializeHelper<'a, 'de, T, R>
635where
636    T: WithDeserializer,
637    R: XmlReader,
638{
639    fn new(reader: &'a mut R) -> Self {
640        Self {
641            reader,
642            deserializer: None,
643            skip_depth: None,
644            marker: PhantomData,
645        }
646    }
647
648    fn handle_event(&mut self, event: Event<'_>) -> Result<Option<T>, Error> {
649        let ret = match self.deserializer.take() {
650            None => T::Deserializer::init(self.reader, event),
651            Some(b) => b.next(self.reader, event),
652        };
653        let ret = self.reader.map_result(ret);
654
655        let DeserializerOutput {
656            artifact,
657            event,
658            allow_any,
659        } = ret?;
660
661        let (data, deserializer) = artifact.into_parts();
662
663        self.deserializer = deserializer;
664
665        match event.into_event() {
666            None
667            | Some(
668                Event::Decl(_)
669                | Event::Text(_)
670                | Event::Comment(_)
671                | Event::DocType(_)
672                | Event::PI(_),
673            ) => (),
674            Some(event) if allow_any => {
675                if matches!(event, Event::Start(_)) {
676                    self.skip_depth = Some(1);
677                }
678            }
679            Some(event) => return Err(ErrorKind::UnexpectedEvent(event.into_owned()).into()),
680        }
681
682        Ok(data)
683    }
684
685    fn handle_skip(&mut self, event: Event<'de>) -> Option<Event<'de>> {
686        let Some(skip_depth) = self.skip_depth.as_mut() else {
687            return Some(event);
688        };
689
690        match event {
691            Event::Start(_) => *skip_depth += 1,
692            Event::End(_) if *skip_depth == 1 => {
693                self.skip_depth = None;
694
695                return None;
696            }
697            Event::End(_) => *skip_depth -= 1,
698            Event::Eof => return Some(Event::Eof),
699            _ => (),
700        }
701
702        None
703    }
704}
705
706impl<'de, T, R> DeserializeHelper<'_, 'de, T, R>
707where
708    T: WithDeserializer,
709    R: XmlReaderSync<'de>,
710{
711    fn deserialize_sync(&mut self) -> Result<T, Error> {
712        loop {
713            let event = self.reader.read_event()?;
714
715            if let Some(event) = self.handle_skip(event) {
716                if let Some(data) = self
717                    .handle_event(event)
718                    .map_err(|error| self.reader.extend_error(error))?
719                {
720                    return Ok(data);
721                }
722            }
723        }
724    }
725}
726#[cfg(feature = "async")]
727impl<'de, T, R> DeserializeHelper<'_, 'de, T, R>
728where
729    T: WithDeserializer,
730    R: super::XmlReaderAsync<'de>,
731{
732    async fn deserialize_async(&mut self) -> Result<T, Error> {
733        loop {
734            let event = self.reader.read_event_async().await?;
735
736            if let Some(event) = self.handle_skip(event) {
737                if let Some(data) = self.handle_event(event)? {
738                    return Ok(data);
739                }
740            }
741        }
742    }
743}