xsd_parser/parser/
mod.rs

1//! The `parser` module contains the schema [`Parser`] and all related types.
2
3pub mod resolver;
4
5mod error;
6
7use std::collections::{BTreeMap, HashSet, VecDeque};
8use std::fmt::Debug;
9use std::io::BufRead;
10use std::path::Path;
11
12use quick_xml::{
13    events::Event,
14    name::{LocalName, PrefixIter, QName, ResolveResult},
15};
16use resolver::{FileResolver, NoOpResolver, ResolveRequest, Resolver};
17use tracing::instrument;
18use url::Url;
19
20use crate::quick_xml::{
21    DeserializeSync, Error as QuickXmlError, IoReader, SliceReader, XmlReader, XmlReaderSync,
22};
23use crate::schema::xs::{Import, Include, Schema, SchemaContent};
24use crate::schema::{Namespace, NamespacePrefix, Schemas};
25
26pub use self::error::Error;
27
28/// The [`Parser`] is used to load and parse XML schema information from different
29/// sources.
30///
31/// This structure can be used to load XML schemas information from different
32/// sources using so called [`Resolver`]s. After the content of a schema was load
33/// is it parsed and added to the list of schemas, managed by this parser.
34///
35/// The resulting [`Schemas`] instance can then be used by an
36/// [`Interpreter`](crate::interpreter::Interpreter), to generate the more common
37/// [`Types`](crate::types::Types) structure out of it.
38#[must_use]
39#[derive(Default, Debug)]
40pub struct Parser<TResolver = NoOpResolver> {
41    cache: HashSet<Url>,
42    schemas: Schemas,
43    pending: VecDeque<ResolveRequest>,
44
45    resolver: TResolver,
46    resolve_includes: bool,
47}
48
49impl Parser {
50    /// Create a new [`Parser`] instance.
51    pub fn new() -> Self {
52        Self::default()
53    }
54}
55
56impl<TResolver> Parser<TResolver> {
57    /// Set the default resolver for this parser.
58    ///
59    /// The default resolver is just a simple [`FileResolver`].
60    pub fn with_default_resolver(self) -> Parser<FileResolver> {
61        self.with_resolver(FileResolver)
62    }
63
64    /// Set a custom defined resolver for this parser.
65    pub fn with_resolver<XResolver: Resolver + 'static>(
66        self,
67        resolver: XResolver,
68    ) -> Parser<XResolver> {
69        let Self { schemas, .. } = self;
70
71        let cache = HashSet::new();
72        let pending = VecDeque::new();
73
74        Parser {
75            cache,
76            schemas,
77            pending,
78
79            resolver,
80            resolve_includes: true,
81        }
82    }
83
84    /// Enable or disable resolving includes of parsed XML schemas.
85    pub fn resolve_includes(mut self, value: bool) -> Self {
86        self.resolve_includes = value;
87
88        self
89    }
90
91    /// Finish the parsing process by returning the generated [`Schemas`] instance
92    /// containing all parsed schemas.
93    pub fn finish(self) -> Schemas {
94        self.schemas
95    }
96}
97
98impl<TResolver> Parser<TResolver>
99where
100    TResolver: Resolver,
101{
102    /// Add the default namespaces to this parser.
103    ///
104    /// The default namespaces are:
105    /// - [`NamespacePrefix::XS`] [`Namespace::XS`]
106    /// - [`NamespacePrefix::XML`] [`Namespace::XML`]
107    ///
108    /// # Errors
109    ///
110    /// Forwards the errors from [`with_namespace`](Self::with_namespace).
111    pub fn with_default_namespaces(self) -> Self {
112        self.with_namespace(NamespacePrefix::XS, Namespace::XS)
113            .with_namespace(NamespacePrefix::XML, Namespace::XML)
114    }
115
116    /// Add a new namespace to this parser.
117    ///
118    /// This method will add a new namespace to the parser. This can be useful to
119    /// pre-heat the prefixes for known namespace, or to define namespaces for
120    /// custom defined types.
121    ///
122    /// This will not add any schema information. It's just a namespace definition.
123    ///
124    /// # Errors
125    ///
126    /// Will return an error if a problem or mismatch with the already existing
127    /// namespaces was encountered.
128    pub fn with_namespace(mut self, prefix: NamespacePrefix, namespace: Namespace) -> Self {
129        self.schemas
130            .get_or_create_namespace_info_mut(Some(prefix), Some(namespace));
131
132        self
133    }
134}
135
136impl<TResolver> Parser<TResolver>
137where
138    TResolver: Resolver,
139    TResolver::Buffer: BufRead,
140{
141    /// Add a new XML schema from the passed string.
142    ///
143    /// This will parse the XML schema represented by the provided string and add
144    /// all schema information to the resulting [`Schemas`] structure.
145    ///
146    /// # Errors
147    ///
148    /// Will return an suitable error if the parser could not parse the provided
149    /// schema.
150    #[instrument(err, level = "trace", skip(self, schema))]
151    pub fn add_schema_from_str(mut self, schema: &str) -> Result<Self, Error<TResolver::Error>> {
152        let reader = SliceReader::new(schema);
153        let mut reader = SchemaReader::new(reader);
154
155        let schema = Schema::deserialize(&mut reader)?;
156
157        self.add_schema(schema, &reader.namespaces, None);
158        self.resolve_pending()?;
159
160        Ok(self)
161    }
162
163    /// Add a new XML schema from the passed `reader`.
164    ///
165    /// This will parse the XML schema represented by the provided reader and add
166    /// all schema information to the resulting [`Schemas`] structure.
167    ///
168    /// # Errors
169    ///
170    /// Will return an suitable error if the parser could not read the data from
171    /// the reader, or parse the schema provided by the reader.
172    #[instrument(err, level = "trace", skip(self, reader))]
173    pub fn add_schema_from_reader<R: BufRead>(
174        mut self,
175        reader: R,
176    ) -> Result<Self, Error<TResolver::Error>> {
177        let reader = IoReader::new(reader);
178        let mut reader = SchemaReader::new(reader);
179
180        let schema = Schema::deserialize(&mut reader)?;
181
182        self.add_schema(schema, &reader.namespaces, None);
183        self.resolve_pending()?;
184
185        Ok(self)
186    }
187
188    /// Add a new XML schema from the passed file `path`.
189    ///
190    /// This will parse the XML schema represented by the provided filepath and
191    /// add all schema information to the resulting [`Schemas`] structure.
192    ///
193    /// # Errors
194    ///
195    /// Will return an suitable error if the parser could not read the data from
196    /// the file, or parse the schema content.
197    #[instrument(err, level = "trace", skip(self))]
198    pub fn add_schema_from_file<P: AsRef<Path> + Debug>(
199        self,
200        path: P,
201    ) -> Result<Self, Error<TResolver::Error>> {
202        let path = path.as_ref().canonicalize()?;
203        let url = Url::from_file_path(&path).map_err(|()| Error::InvalidFilePath(path))?;
204
205        self.add_schema_from_url(url)
206    }
207
208    /// Add a new XML schema from the passed file `url`.
209    ///
210    /// This will parse the XML schema represented by the provided url and
211    /// add all schema information to the resulting [`Schemas`] structure.
212    ///
213    /// # Errors
214    ///
215    /// Will return an suitable error if the parser could not resolve the URL
216    /// using the provided resolver or the data from the resolver could not be
217    /// parsed.
218    #[instrument(err, level = "trace", skip(self))]
219    pub fn add_schema_from_url(mut self, url: Url) -> Result<Self, Error<TResolver::Error>> {
220        let req = ResolveRequest::new(url);
221
222        self.resolve_location(req)?;
223        self.resolve_pending()?;
224
225        Ok(self)
226    }
227
228    fn add_pending(&mut self, req: ResolveRequest) {
229        tracing::debug!("Add pending resolve request: {req:#?}");
230
231        self.pending.push_back(req);
232    }
233
234    fn resolve_pending(&mut self) -> Result<(), Error<TResolver::Error>> {
235        while let Some(req) = self.pending.pop_front() {
236            self.resolve_location(req)?;
237        }
238
239        Ok(())
240    }
241
242    #[instrument(err, level = "trace", skip(self))]
243    fn resolve_location(&mut self, req: ResolveRequest) -> Result<(), Error<TResolver::Error>> {
244        tracing::debug!("Process resolve request: {req:#?}");
245
246        let Some((location, buffer)) = self.resolver.resolve(&req).map_err(Error::resolver)? else {
247            return Err(Error::UnableToResolve(Box::new(req)));
248        };
249        if self.cache.contains(&location) {
250            return Ok(());
251        }
252
253        let reader = IoReader::new(buffer);
254        let mut reader = SchemaReader::new(reader);
255
256        let schema = Schema::deserialize(&mut reader)?;
257
258        self.add_schema(schema, &reader.namespaces, Some(&location));
259        self.cache.insert(location);
260
261        Ok(())
262    }
263
264    fn add_schema(
265        &mut self,
266        schema: Schema,
267        namespaces: &Namespaces,
268        current_location: Option<&Url>,
269    ) {
270        tracing::debug!(
271            "Process schema (location={:?}, target_namespace={:?}",
272            current_location.as_ref().map(|url| url.as_str()),
273            &schema.target_namespace
274        );
275
276        let target_ns = schema
277            .target_namespace
278            .as_deref()
279            .map(|ns| Namespace::from(ns.as_bytes().to_owned()));
280        let prefix = namespaces.get(&target_ns).cloned().flatten();
281
282        if self.resolve_includes {
283            for content in &schema.content {
284                match content {
285                    SchemaContent::Import(x) => {
286                        if let Some(req) = import_req(x, target_ns.clone(), current_location) {
287                            self.add_pending(req);
288                        }
289                    }
290                    SchemaContent::Include(x) => {
291                        self.add_pending(include_req(x, target_ns.clone(), current_location));
292                    }
293                    _ => (),
294                }
295            }
296        }
297
298        self.schemas.add_schema(prefix, target_ns, schema);
299    }
300}
301
302struct SchemaReader<R> {
303    inner: R,
304    namespaces: Namespaces,
305}
306
307type Namespaces = BTreeMap<Option<Namespace>, Option<NamespacePrefix>>;
308
309impl<R> SchemaReader<R> {
310    fn new(inner: R) -> Self {
311        Self {
312            inner,
313            namespaces: BTreeMap::new(),
314        }
315    }
316}
317
318impl<R> XmlReader for SchemaReader<R>
319where
320    R: XmlReader,
321{
322    fn resolve<'n>(&self, name: QName<'n>, attribute: bool) -> (ResolveResult<'_>, LocalName<'n>) {
323        self.inner.resolve(name, attribute)
324    }
325
326    fn prefixes(&self) -> PrefixIter<'_> {
327        self.inner.prefixes()
328    }
329
330    fn current_position(&self) -> u64 {
331        self.inner.current_position()
332    }
333
334    fn error_position(&self) -> u64 {
335        self.inner.error_position()
336    }
337}
338
339impl<'a, R> XmlReaderSync<'a> for SchemaReader<R>
340where
341    R: XmlReaderSync<'a>,
342{
343    fn read_event(&mut self) -> Result<Event<'a>, QuickXmlError> {
344        let event = self.inner.read_event()?;
345
346        if let Event::Start(x) | Event::Empty(x) = &event {
347            for a in x.attributes() {
348                let a = a?;
349                if matches!(a.key.prefix(), Some(x) if x.as_ref() == b"xmlns") {
350                    let prefix = NamespacePrefix::new(a.key.local_name().as_ref().to_owned());
351                    let namespace = Namespace::new(a.value.into_owned());
352
353                    self.namespaces
354                        .entry(Some(namespace))
355                        .or_insert(Some(prefix));
356                }
357            }
358        }
359
360        Ok(event)
361    }
362}
363
364fn import_req(
365    import: &Import,
366    current_ns: Option<Namespace>,
367    current_location: Option<&Url>,
368) -> Option<ResolveRequest> {
369    let location = import.schema_location.as_ref()?;
370
371    let mut req = ResolveRequest::new(location);
372
373    if let Some(ns) = current_ns {
374        req = req.current_ns(ns);
375    }
376
377    if let Some(ns) = &import.namespace {
378        req = req.requested_ns(Namespace::from(ns.as_bytes().to_owned()));
379    }
380
381    if let Some(current_location) = current_location {
382        req = req.current_location(current_location.clone());
383    }
384
385    Some(req)
386}
387
388fn include_req(
389    include: &Include,
390    current_ns: Option<Namespace>,
391    current_location: Option<&Url>,
392) -> ResolveRequest {
393    let mut req = ResolveRequest::new(&include.schema_location);
394
395    if let Some(ns) = current_ns {
396        req = req.current_ns(ns);
397    }
398
399    if let Some(current_location) = current_location {
400        req = req.current_location(current_location.clone());
401    }
402
403    req
404}