Skip to main content

xsd_parser/pipeline/interpreter/
mod.rs

1//! Schema interpretation logic for transforming parsed XML schemas into semantic
2//! type definitions.
3//!
4//! This module defines the [`Interpreter`] type, which processes raw [`Schemas`] loaded
5//! by the [`Parser`](crate::Parser) and converts them into semantic [`MetaTypes`].
6//! These types represent meaningful, structured representations such as complex types,
7//! enums, references, and attributes.
8//!
9//! The interpreter is capable of:
10//! - registering custom or user-defined types
11//! - resolving XSD primitive types and typedefs
12//! - adding default build-in or XML-specific types (e.g., `xs:string`, `xs:anyType`)
13//! - integrating numeric backends (e.g., `num::BigInt`) for large integers
14//!
15//! The resulting [`MetaTypes`] structure can then be passed to the generator to
16//! generate Rust specific type structures.
17//!
18//! # Example
19//! ```rust,ignore
20//! let meta_types = Interpreter::new(&schemas)
21//!     .with_buildin_types()?
22//!     .with_default_typedefs()?
23//!     .finish()?;
24//! ```
25
26mod error;
27mod state;
28
29use std::fmt::Debug;
30
31use quote::quote;
32use tracing::instrument;
33
34use xsd_parser_types::misc::Namespace;
35
36use crate::models::meta::{
37    AnyAttributeMeta, AnyMeta, AttributeMeta, BuildInMeta, ComplexMeta, CustomMeta, ElementMeta,
38    GroupMeta, MetaType, MetaTypeVariant, MetaTypes, ReferenceMeta, SimpleMeta,
39};
40use crate::models::schema::{
41    xs::{FormChoiceType, ProcessContentsType},
42    MaxOccurs, NamespaceId, Schemas,
43};
44use crate::models::{AttributeIdent, ElementIdent, IdentCache, Name, TypeIdent};
45use crate::pipeline::generator::{
46    Context as GeneratorContext, Error as GeneratorError, ValueGenerator, ValueGeneratorMode,
47};
48use crate::pipeline::renderer::{Context as RendererContext, ValueRendererBox};
49use crate::traits::{NameBuilderExt as _, Naming};
50
51pub use error::Error;
52
53use self::state::State;
54
55/// The `Interpreter` transforms raw parsed XML schema data into semantically
56/// meaningful Rust-compatible type metadata.
57///
58/// It operates on a [`Schemas`] structure produced by the [`Parser`](crate::Parser)
59/// and produces a [`MetaTypes`] structure, which is the central format used for
60/// code generation.
61///
62/// This abstraction allows the intermediate schema format to be reshaped into a form
63/// suitable for deterministic and idiomatic Rust code generation.
64#[must_use]
65#[derive(Debug)]
66pub struct Interpreter<'a> {
67    state: State<'a>,
68}
69
70impl<'a> Interpreter<'a> {
71    /// Create a new [`Interpreter`] instance using the passed `schemas` reference.
72    pub fn new(schemas: &'a Schemas) -> Self {
73        let state = State::new(schemas);
74
75        Self { state }
76    }
77
78    /// Add a custom [`MetaType`] information for the passed `ident`ifier to the
79    /// resulting [`MetaTypes`] structure.
80    ///
81    /// # Errors
82    ///
83    /// Returns a suitable [`Error`] if the operation was not successful.
84    #[instrument(err, level = "trace", skip(self))]
85    pub fn with_type<I, T>(mut self, ident: I, type_: T) -> Result<Self, Error>
86    where
87        I: Into<TypeIdent> + Debug,
88        T: Into<MetaType> + Debug,
89    {
90        self.state.add_type(ident, type_)?;
91
92        Ok(self)
93    }
94
95    /// Add a simple type definition to the resulting [`MetaTypes`] structure using
96    /// `ident` as identifier for the new type and `type_` as target type for the
97    /// type definition.
98    ///
99    /// # Errors
100    ///
101    /// Returns a suitable [`Error`] if the operation was not successful.
102    #[instrument(err, level = "trace", skip(self))]
103    pub fn with_typedef<I, T>(mut self, ident: I, type_: T) -> Result<Self, Error>
104    where
105        I: Into<TypeIdent> + Debug,
106        T: Into<TypeIdent> + Debug,
107    {
108        self.state.add_type(ident, ReferenceMeta::new(type_))?;
109
110        Ok(self)
111    }
112
113    /// Adds the default build-in types to the resulting [`MetaTypes`] structure.
114    ///
115    /// # Errors
116    ///
117    /// Returns a suitable [`Error`] if the operation was not successful.
118    #[instrument(err, level = "trace", skip(self))]
119    pub fn with_buildin_types(mut self) -> Result<Self, Error> {
120        let anonymous = self
121            .state
122            .schemas()
123            .resolve_namespace(&None)
124            .ok_or_else(|| Error::AnonymousNamespaceIsUndefined)?;
125
126        macro_rules! add {
127            ($ident:ident, $type:ident) => {{
128                let ident = TypeIdent::$ident.with_ns(anonymous);
129                let ty = BuildInMeta::$type;
130
131                self.state.add_type(ident, ty)?;
132            }};
133        }
134
135        add!(U8, U8);
136        add!(U16, U16);
137        add!(U32, U32);
138        add!(U64, U64);
139        add!(U128, U128);
140        add!(USIZE, Usize);
141
142        add!(I8, I8);
143        add!(I16, I16);
144        add!(I32, I32);
145        add!(I64, I64);
146        add!(I128, I128);
147        add!(ISIZE, Isize);
148
149        add!(F32, F32);
150        add!(F64, F64);
151
152        add!(BOOL, Bool);
153        add!(STR, Str);
154        add!(STRING, String);
155
156        Ok(self)
157    }
158
159    /// Adds the type definitions for common XML types (like `xs:string` or `xs:int`)
160    /// to the resulting [`MetaTypes`] structure.
161    ///
162    /// # Errors
163    ///
164    /// Returns a suitable [`Error`] if the operation was not successful.
165    #[instrument(err, level = "trace", skip(self))]
166    pub fn with_default_typedefs(mut self) -> Result<Self, Error> {
167        let anonymous = self
168            .state
169            .schemas()
170            .resolve_namespace(&None)
171            .ok_or_else(|| Error::AnonymousNamespaceIsUndefined)?;
172        let xs = self
173            .state
174            .schemas()
175            .resolve_namespace(&Some(Namespace::XS))
176            .ok_or_else(|| Error::UnknownNamespace(Namespace::XS.clone()))?;
177
178        macro_rules! add {
179            ($src:expr, $dst:ident) => {{
180                let src = TypeIdent::type_($src).with_ns(xs);
181                let dst = TypeIdent::$dst.with_ns(anonymous);
182
183                self.state.add_type(src, ReferenceMeta::new(dst))?;
184            }};
185        }
186        macro_rules! add_list {
187            ($src:expr, $dst:ident) => {{
188                let src = TypeIdent::type_($src).with_ns(xs);
189                let dst = TypeIdent::$dst.with_ns(anonymous);
190
191                self.state.add_type(
192                    src,
193                    ReferenceMeta::new(dst)
194                        .min_occurs(0)
195                        .max_occurs(MaxOccurs::Unbounded),
196                )?;
197            }};
198        }
199
200        /* Primitive Types */
201
202        add!("string", STRING);
203        add!("boolean", BOOL);
204        add!("decimal", F64);
205        add!("float", F32);
206        add!("double", F64);
207
208        /* time related types */
209
210        add!("duration", STRING);
211        add!("dateTime", STRING);
212        add!("time", STRING);
213        add!("date", STRING);
214        add!("gYearMonth", STRING);
215        add!("gYear", STRING);
216        add!("gMonthDay", STRING);
217        add!("gMonth", STRING);
218        add!("gDay", STRING);
219
220        /* Data related types */
221
222        add!("hexBinary", STRING);
223        add!("base64Binary", STRING);
224
225        /* URL related types */
226
227        add!("anyURI", STRING);
228        add!("QName", STRING);
229        add!("NOTATION", STRING);
230
231        /* Numeric Types */
232
233        add!("long", I64);
234        add!("int", I32);
235        add!("integer", I32);
236        add!("short", I16);
237        add!("byte", I8);
238        add!("negativeInteger", ISIZE);
239        add!("nonPositiveInteger", ISIZE);
240
241        add!("unsignedLong", U64);
242        add!("unsignedInt", U32);
243        add!("unsignedShort", U16);
244        add!("unsignedByte", U8);
245        add!("positiveInteger", USIZE);
246        add!("nonNegativeInteger", USIZE);
247
248        /* String Types */
249
250        add!("normalizedString", STRING);
251        add!("token", STRING);
252        add!("language", STRING);
253        add!("NMTOKEN", STRING);
254        add!("Name", STRING);
255        add!("NCName", STRING);
256        add!("ID", STRING);
257        add!("IDREF", STRING);
258        add!("ENTITY", STRING);
259
260        add!("anySimpleType", STRING);
261
262        add_list!("NMTOKENS", STRING);
263        add_list!("IDREFS", STRING);
264        add_list!("ENTITIES", STRING);
265
266        Ok(self)
267    }
268
269    /// Adds a default type definition for `xs:anyType`.
270    ///
271    /// # Errors
272    ///
273    /// Returns a suitable [`Error`] if the operation was not successful.
274    #[instrument(err, level = "trace", skip(self))]
275    pub fn with_xs_any_type(mut self) -> Result<Self, Error> {
276        let xs = self
277            .state
278            .schemas()
279            .resolve_namespace(&Some(Namespace::XS))
280            .ok_or_else(|| Error::UnknownNamespace(Namespace::XS.clone()))?;
281
282        /* content type */
283
284        let any_ident = ElementIdent::new(Name::ANY);
285        let mut any = ElementMeta::any(
286            any_ident,
287            AnyMeta {
288                id: None,
289                namespace: None,
290                not_q_name: None,
291                not_namespace: None,
292                process_contents: ProcessContentsType::Lax,
293            },
294        );
295        any.min_occurs = 0;
296        any.max_occurs = MaxOccurs::Unbounded;
297
298        let mut content_sequence = GroupMeta {
299            is_mixed: true,
300            ..GroupMeta::default()
301        };
302        content_sequence.elements.push(any);
303
304        let content_name = self.state.name_builder().shared_name("Content").finish();
305        let content_ident = TypeIdent::new(content_name).with_ns(xs);
306        let content_variant = MetaTypeVariant::Sequence(content_sequence);
307        let content_type = MetaType::new(content_variant);
308
309        self.state.add_type(content_ident.clone(), content_type)?;
310
311        /* xs:anyType */
312
313        let ident = TypeIdent::new(Name::ANY_TYPE).with_ns(xs);
314        let any_attribute_ident = AttributeIdent::new(Name::ANY_ATTRIBUTE);
315        let any_attribute = AttributeMeta::any(
316            any_attribute_ident,
317            AnyAttributeMeta {
318                id: None,
319                namespace: None,
320                not_q_name: None,
321                not_namespace: None,
322                process_contents: ProcessContentsType::Lax,
323            },
324        );
325
326        let mut complex = ComplexMeta {
327            content: Some(content_ident),
328            is_mixed: true,
329            min_occurs: 1,
330            max_occurs: MaxOccurs::Bounded(1),
331            ..Default::default()
332        };
333        complex.attributes.push(any_attribute);
334
335        let variant = MetaTypeVariant::ComplexType(complex);
336        let type_ = MetaType::new(variant);
337
338        self.state.add_type(ident, type_)?;
339
340        Ok(self)
341    }
342
343    /// Adds a default type definition for `xs:anySimpleType`.
344    ///
345    /// # Errors
346    ///
347    /// Returns a suitable [`Error`] if the operation was not successful.
348    #[instrument(err, level = "trace", skip(self))]
349    pub fn with_xs_any_simple_type(mut self) -> Result<Self, Error> {
350        let xs = self
351            .state
352            .schemas()
353            .resolve_namespace(&Some(Namespace::XS))
354            .ok_or_else(|| Error::UnknownNamespace(Namespace::XS.clone()))?;
355        let xsi = self
356            .state
357            .schemas()
358            .resolve_namespace(&Some(Namespace::XSI))
359            .ok_or_else(|| Error::UnknownNamespace(Namespace::XSI.clone()))?;
360
361        /* content type */
362
363        let content_name = self.state.name_builder().shared_name("Content").finish();
364        let content_ident = TypeIdent::new(content_name).with_ns(xs);
365        let content_type = MetaType::new(MetaTypeVariant::SimpleType(SimpleMeta::new(
366            TypeIdent::STRING,
367        )));
368
369        self.state.add_type(content_ident.clone(), content_type)?;
370
371        /* xs:anySimpleType */
372
373        let type_attribute_ident = AttributeIdent::new(Name::TYPE).with_ns(xsi);
374        let type_attribute = AttributeMeta::new(
375            type_attribute_ident,
376            TypeIdent::STRING,
377            FormChoiceType::Qualified,
378        );
379
380        let mut complex = ComplexMeta {
381            content: Some(content_ident),
382            is_mixed: true,
383            min_occurs: 1,
384            max_occurs: MaxOccurs::Bounded(1),
385            ..Default::default()
386        };
387        complex.attributes.push(type_attribute);
388
389        let ident = TypeIdent::new(Name::ANY_SIMPLE_TYPE).with_ns(xs);
390        let variant = MetaTypeVariant::ComplexType(complex);
391        let type_ = MetaType::new(variant);
392
393        self.state.add_type(ident, type_)?;
394
395        Ok(self)
396    }
397
398    /// Add a type definition for `xs:QName` that uses the
399    /// `xsd_parser_types::xml::QName` type.
400    pub fn with_qname_type(self) -> Result<Self, Error> {
401        self.with_qname_type_from("::xsd_parser_types::xml::QName")
402    }
403
404    /// Add a type definition for `xs:QName` that uses the type defined at the passed `path`.
405    pub fn with_qname_type_from(self, path: &str) -> Result<Self, Error> {
406        let xs = self
407            .state
408            .schemas()
409            .resolve_namespace(&Some(Namespace::XS))
410            .ok_or_else(|| Error::UnknownNamespace(Namespace::XS.clone()))?;
411
412        let name = path.rsplit_once("::").map_or(path, |(_, name)| name);
413
414        self.with_type(
415            TypeIdent::type_("QName").with_ns(xs),
416            CustomMeta::new(name)
417                .include_from(path)
418                .with_namespace(xs)
419                .with_default(crate::misc::qname_default),
420        )
421    }
422
423    /// Add type definitions for numeric XML types (like `xs:int`) that
424    /// uses `num::BigInt` and `num::BigUint` instead of build-in integer types.
425    ///
426    /// # Errors
427    ///
428    /// Returns a suitable [`Error`] if the operation was not successful.
429    pub fn with_num_big_int(mut self) -> Result<Self, Error> {
430        let xs = self
431            .state
432            .schemas()
433            .resolve_namespace(&Some(Namespace::XS))
434            .ok_or_else(|| Error::UnknownNamespace(Namespace::XS.clone()))?;
435
436        macro_rules! add {
437            ($src:expr, $dst:expr) => {{
438                self.state
439                    .add_type(TypeIdent::type_($src).with_ns(xs), ReferenceMeta::new($dst))?;
440            }};
441        }
442
443        let big_int = CustomMeta::new("BigInt")
444            .include_from("::num::BigInt")
445            .with_default(make_from_str_value_generator("::num::BigInt"));
446
447        let big_uint = CustomMeta::new("BigUint")
448            .include_from("::num::BigUint")
449            .with_default(make_from_str_value_generator("::num::BigUint"));
450
451        let ident_big_int = TypeIdent::type_("BigInt").with_ns(NamespaceId::ANONYMOUS);
452        let ident_big_uint = TypeIdent::type_("BigUint").with_ns(NamespaceId::ANONYMOUS);
453
454        self.state.add_type(ident_big_int.clone(), big_int)?;
455        self.state.add_type(ident_big_uint.clone(), big_uint)?;
456
457        add!("integer", ident_big_int.clone());
458        add!("negativeInteger", ident_big_int.clone());
459        add!("nonPositiveInteger", ident_big_int);
460
461        add!("positiveInteger", ident_big_uint.clone());
462        add!("nonNegativeInteger", ident_big_uint);
463
464        Ok(self)
465    }
466
467    /// Add type definitions for numeric XML types (like `xs:positiveInteger`) that
468    /// uses `::core::num::NonZeroIsize` and `::core::num::NonZeroUsize` instead
469    /// of the simple integer types.
470    ///
471    /// # Errors
472    ///
473    /// Returns a suitable [`Error`] if the operation was not successful.
474    pub fn with_nonzero_typedefs(mut self) -> Result<Self, Error> {
475        let xs = self
476            .state
477            .schemas()
478            .resolve_namespace(&Some(Namespace::XS))
479            .ok_or_else(|| Error::UnknownNamespace(Namespace::XS.clone()))?;
480
481        macro_rules! add {
482            ($src:expr, $dst:expr) => {{
483                self.state
484                    .add_type(TypeIdent::type_($src).with_ns(xs), ReferenceMeta::new($dst))?;
485            }};
486        }
487
488        let non_zero_usize = CustomMeta::new("NonZeroUsize")
489            .include_from("::core::num::NonZeroUsize")
490            .with_default(make_from_str_value_generator("::core::num::NonZeroUsize"));
491        let non_zero_isize = CustomMeta::new("NonZeroIsize")
492            .include_from("::core::num::NonZeroIsize")
493            .with_default(make_from_str_value_generator("::core::num::NonZeroIsize"));
494
495        let ident_non_zero_usize = TypeIdent::type_("NonZeroUsize").with_ns(NamespaceId::ANONYMOUS);
496        let ident_non_zero_isize = TypeIdent::type_("NonZeroIsize").with_ns(NamespaceId::ANONYMOUS);
497
498        self.state
499            .add_type(ident_non_zero_usize.clone(), non_zero_usize)?;
500        self.state
501            .add_type(ident_non_zero_isize.clone(), non_zero_isize)?;
502
503        add!("positiveInteger", ident_non_zero_usize);
504        add!("negativeInteger", ident_non_zero_isize);
505
506        Ok(self)
507    }
508
509    /// Set the [`Naming`](Naming) trait that is used to generate and format names.
510    ///
511    /// This accepts any type that implements the [`Naming`](Naming) trait.
512    /// If you want to use an already boxed version have a look at
513    /// [`with_naming_boxed`](Self::with_naming_boxed).
514    #[instrument(level = "trace", skip(self))]
515    pub fn with_naming<X>(self, naming: X) -> Self
516    where
517        X: Naming + 'static,
518    {
519        self.with_naming_boxed(Box::new(naming))
520    }
521
522    /// Set the [`Naming`] trait that is used to generate and format names.
523    ///
524    /// This accepts only boxed [`Naming`](Naming) trait. For easier use you can
525    /// use [`with_naming`](Self::with_naming) instead.
526    #[instrument(level = "trace", skip(self))]
527    pub fn with_naming_boxed(mut self, naming: Box<dyn Naming>) -> Self {
528        self.state.set_naming(naming);
529
530        self
531    }
532
533    /// Finishes the interpretation of the [`Schemas`] structure and returns
534    /// the [`MetaTypes`] structure with the generated type information.
535    ///
536    /// # Errors
537    ///
538    /// Returns a suitable [`Error`] if the operation was not successful.
539    #[instrument(err, level = "trace", skip(self))]
540    pub fn finish(self) -> Result<(MetaTypes, IdentCache), Error> {
541        self.state.finish()
542    }
543}
544
545fn make_from_str_value_generator(type_path: &'static str) -> impl ValueGenerator + 'static {
546    move |ctx: &GeneratorContext<'_, '_>,
547          value: &str,
548          mode: ValueGeneratorMode|
549          -> Result<ValueRendererBox, GeneratorError> {
550        if mode != ValueGeneratorMode::Value {
551            return Err(GeneratorError::InvalidDefaultValue {
552                ident: ctx.ident.clone(),
553                value: value.into(),
554                mode,
555            });
556        }
557
558        let s = value.to_string();
559
560        Ok(Box::new(move |ctx: &RendererContext<'_, '_>| {
561            let type_ = ctx.resolve_ident_path(type_path);
562            let from_str = ctx.resolve_ident_path("::core::str::FromStr");
563
564            quote! {
565                <#type_ as #from_str>::from_str(#s).unwrap()
566            }
567        }))
568    }
569}