xsd_parser/pipeline/interpreter/
mod.rs

1//! Schema interpretation logic for transforming parsed XML schemas into semantic
2//! type definitions.
3//!
4//! This module defines the [`Interpreter`] type, which processes raw [`Schemas`] loaded
5//! by the [`Parser`](crate::Parser) and converts them into semantic [`MetaTypes`].
6//! These types represent meaningful, structured representations such as complex types,
7//! enums, references, and attributes.
8//!
9//! The interpreter is capable of:
10//! - registering custom or user-defined types
11//! - resolving XSD primitive types and typedefs
12//! - adding default build-in or XML-specific types (e.g., `xs:string`, `xs:anyType`)
13//! - integrating numeric backends (e.g., `num::BigInt`) for large integers
14//!
15//! The resulting [`MetaTypes`] structure can then be passed to the generator to
16//! generate Rust specific type structures.
17//!
18//! # Example
19//! ```rust,ignore
20//! let meta_types = Interpreter::new(&schemas)
21//!     .with_buildin_types()?
22//!     .with_default_typedefs()?
23//!     .finish()?;
24//! ```
25
26mod error;
27mod name_builder;
28mod schema;
29mod state;
30mod variant_builder;
31
32use std::fmt::Debug;
33
34use quote::quote;
35use tracing::instrument;
36
37use crate::config::Namespace;
38use crate::models::{
39    meta::{
40        AnyAttributeMeta, AnyMeta, AttributeMeta, BuildInMeta, ComplexMeta, CustomMeta,
41        ElementMeta, GroupMeta, MetaType, MetaTypeVariant, MetaTypes, ModuleMeta, ReferenceMeta,
42        SchemaMeta,
43    },
44    schema::{xs::ProcessContentsType, MaxOccurs, Schemas},
45    Ident, IdentType, Name,
46};
47
48pub use error::Error;
49
50use self::schema::SchemaInterpreter;
51use self::state::{Node, StackEntry, State};
52use self::variant_builder::VariantBuilder;
53
54/// The `Interpreter` transforms raw parsed XML schema data into semantically
55/// meaningful Rust-compatible type metadata.
56///
57/// It operates on a [`Schemas`] structure produced by the [`Parser`](crate::Parser)
58/// and produces a [`MetaTypes`] structure, which is the central format used for
59/// code generation.
60///
61/// This abstraction allows the intermediate schema format to be reshaped into a form
62/// suitable for deterministic and idiomatic Rust code generation.
63#[must_use]
64#[derive(Debug)]
65pub struct Interpreter<'a> {
66    state: State<'a>,
67    schemas: &'a Schemas,
68}
69
70impl<'a> Interpreter<'a> {
71    /// Create a new [`Interpreter`] instance using the passed `schemas` reference.
72    pub fn new(schemas: &'a Schemas) -> Self {
73        let state = State::default();
74
75        Self { state, schemas }
76    }
77
78    /// Add a custom [`MetaType`] information for the passed `ident`ifier to the
79    /// resulting [`MetaTypes`] structure.
80    ///
81    /// # Errors
82    ///
83    /// Returns a suitable [`Error`] if the operation was not successful.
84    #[instrument(err, level = "trace", skip(self))]
85    pub fn with_type<I, T>(mut self, ident: I, type_: T) -> Result<Self, Error>
86    where
87        I: Into<Ident> + Debug,
88        T: Into<MetaType> + Debug,
89    {
90        self.state.add_type(ident, type_, true)?;
91
92        Ok(self)
93    }
94
95    /// Add a simple type definition to the resulting [`MetaTypes`] structure using
96    /// `ident` as identifier for the new type and `type_` as target type for the
97    /// type definition.
98    ///
99    /// # Errors
100    ///
101    /// Returns a suitable [`Error`] if the operation was not successful.
102    #[instrument(err, level = "trace", skip(self))]
103    pub fn with_typedef<I, T>(mut self, ident: I, type_: T) -> Result<Self, Error>
104    where
105        I: Into<Ident> + Debug,
106        T: Into<Ident> + Debug,
107    {
108        self.state
109            .add_type(ident, ReferenceMeta::new(type_), true)?;
110
111        Ok(self)
112    }
113
114    /// Adds the default build-in types to the resulting [`MetaTypes`] structure.
115    ///
116    /// # Errors
117    ///
118    /// Returns a suitable [`Error`] if the operation was not successful.
119    #[instrument(err, level = "trace", skip(self))]
120    pub fn with_buildin_types(mut self) -> Result<Self, Error> {
121        macro_rules! add {
122            ($ident:ident, $type:ident) => {
123                self.state
124                    .add_type(Ident::$ident, BuildInMeta::$type, true)?;
125            };
126        }
127
128        add!(U8, U8);
129        add!(U16, U16);
130        add!(U32, U32);
131        add!(U64, U64);
132        add!(U128, U128);
133        add!(USIZE, Usize);
134
135        add!(I8, I8);
136        add!(I16, I16);
137        add!(I32, I32);
138        add!(I64, I64);
139        add!(I128, I128);
140        add!(ISIZE, Isize);
141
142        add!(F32, F32);
143        add!(F64, F64);
144
145        add!(BOOL, Bool);
146        add!(STRING, String);
147
148        Ok(self)
149    }
150
151    /// Adds the type definitions for common XML types (like `xs:string` or `xs:int`)
152    /// to the resulting [`MetaTypes`] structure.
153    ///
154    /// # Errors
155    ///
156    /// Returns a suitable [`Error`] if the operation was not successful.
157    #[instrument(err, level = "trace", skip(self))]
158    pub fn with_default_typedefs(mut self) -> Result<Self, Error> {
159        let xs = self
160            .schemas
161            .resolve_namespace(&Some(Namespace::XS))
162            .ok_or_else(|| Error::UnknownNamespace(Namespace::XS.clone()))?;
163
164        macro_rules! add {
165            ($ns:ident, $src:expr, $dst:ident) => {
166                self.state.add_type(
167                    Ident::type_($src).with_ns(Some($ns)),
168                    ReferenceMeta::new(Ident::$dst),
169                    true,
170                )?;
171            };
172        }
173        macro_rules! add_list {
174            ($ns:ident, $src:expr, $dst:ident) => {
175                self.state.add_type(
176                    Ident::type_($src).with_ns(Some($ns)),
177                    ReferenceMeta::new(Ident::$dst)
178                        .min_occurs(0)
179                        .max_occurs(MaxOccurs::Unbounded),
180                    true,
181                )?;
182            };
183        }
184
185        /* Primitive Types */
186
187        add!(xs, "string", STRING);
188        add!(xs, "boolean", BOOL);
189        add!(xs, "decimal", F64);
190        add!(xs, "float", F32);
191        add!(xs, "double", F64);
192
193        /* time related types */
194
195        add!(xs, "duration", STRING);
196        add!(xs, "dateTime", STRING);
197        add!(xs, "time", STRING);
198        add!(xs, "date", STRING);
199        add!(xs, "gYearMonth", STRING);
200        add!(xs, "gYear", STRING);
201        add!(xs, "gMonthDay", STRING);
202        add!(xs, "gMonth", STRING);
203        add!(xs, "gDay", STRING);
204
205        /* Data related types */
206
207        add!(xs, "hexBinary", STRING);
208        add!(xs, "base64Binary", STRING);
209
210        /* URL related types */
211
212        add!(xs, "anyURI", STRING);
213        add!(xs, "QName", STRING);
214        add!(xs, "NOTATION", STRING);
215
216        /* Numeric Types */
217
218        add!(xs, "long", I64);
219        add!(xs, "int", I32);
220        add!(xs, "integer", I32);
221        add!(xs, "short", I16);
222        add!(xs, "byte", I8);
223        add!(xs, "negativeInteger", ISIZE);
224        add!(xs, "nonPositiveInteger", ISIZE);
225
226        add!(xs, "unsignedLong", U64);
227        add!(xs, "unsignedInt", U32);
228        add!(xs, "unsignedShort", U16);
229        add!(xs, "unsignedByte", U8);
230        add!(xs, "positiveInteger", USIZE);
231        add!(xs, "nonNegativeInteger", USIZE);
232
233        /* String Types */
234
235        add!(xs, "normalizedString", STRING);
236        add!(xs, "token", STRING);
237        add!(xs, "language", STRING);
238        add!(xs, "NMTOKEN", STRING);
239        add!(xs, "Name", STRING);
240        add!(xs, "NCName", STRING);
241        add!(xs, "ID", STRING);
242        add!(xs, "IDREF", STRING);
243
244        add!(xs, "anySimpleType", STRING);
245
246        add_list!(xs, "NMTOKENS", STRING);
247        add_list!(xs, "IDREFS", STRING);
248        add_list!(xs, "ENTITY", STRING);
249        add_list!(xs, "ENTITIES", STRING);
250
251        Ok(self)
252    }
253
254    /// Adds a default type definition for `xs:anyType`.
255    ///
256    /// # Errors
257    ///
258    /// Returns a suitable [`Error`] if the operation was not successful.
259    #[instrument(err, level = "trace", skip(self))]
260    pub fn with_xs_any_type(mut self) -> Result<Self, Error> {
261        let xs = self
262            .schemas
263            .resolve_namespace(&Some(Namespace::XS))
264            .ok_or_else(|| Error::UnknownNamespace(Namespace::XS.clone()))?;
265
266        /* content type */
267
268        let any_name = Name::named("any");
269        let any_ident = Ident::new(any_name).with_type(IdentType::Element);
270        let mut any = ElementMeta::any(
271            any_ident,
272            AnyMeta {
273                id: None,
274                namespace: None,
275                not_q_name: None,
276                not_namespace: None,
277                process_contents: ProcessContentsType::Lax,
278            },
279        );
280        any.min_occurs = 0;
281        any.max_occurs = MaxOccurs::Unbounded;
282
283        let mut content_sequence = GroupMeta::default();
284        content_sequence.elements.push_any(any);
285
286        let content_name = self.state.name_builder().shared_name("Content").finish();
287        let content_ident = Ident::new(content_name).with_ns(Some(xs));
288        let content_variant = MetaTypeVariant::Sequence(content_sequence);
289        let content_type = MetaType::new(content_variant);
290
291        self.state
292            .add_type(content_ident.clone(), content_type, true)?;
293
294        /* xs:anyType */
295
296        let ident = Ident::type_("anyType").with_ns(Some(xs));
297
298        let any_attribute_name = Name::named("any_attribute");
299        let any_attribute_ident = Ident::new(any_attribute_name).with_type(IdentType::Attribute);
300        let any_attribute = AttributeMeta::any(
301            any_attribute_ident,
302            AnyAttributeMeta {
303                id: None,
304                namespace: None,
305                not_q_name: None,
306                not_namespace: None,
307                process_contents: ProcessContentsType::Lax,
308            },
309        );
310
311        let mut complex = ComplexMeta {
312            content: Some(content_ident),
313            min_occurs: 1,
314            max_occurs: MaxOccurs::Bounded(1),
315            ..Default::default()
316        };
317        complex.attributes.push(any_attribute);
318
319        let variant = MetaTypeVariant::ComplexType(complex);
320        let type_ = MetaType::new(variant);
321
322        self.state.add_type(ident, type_, true)?;
323
324        Ok(self)
325    }
326
327    /// Add type definitions for numeric XML types (like `xs:int`) that
328    /// uses `num::BigInt` and `num::BigUint` instead of build-in integer types.
329    ///
330    /// # Errors
331    ///
332    /// Returns a suitable [`Error`] if the operation was not successful.
333    pub fn with_num_big_int(mut self) -> Result<Self, Error> {
334        let xs = self
335            .schemas
336            .resolve_namespace(&Some(Namespace::XS))
337            .ok_or_else(|| Error::UnknownNamespace(Namespace::XS.clone()))?;
338
339        macro_rules! add {
340            ($ns:ident, $src:expr, $dst:literal) => {{
341                self.state.add_type(
342                    Ident::type_($src).with_ns(Some($ns)),
343                    ReferenceMeta::new(Ident::type_($dst)),
344                    true,
345                )?;
346            }};
347        }
348
349        let big_int = CustomMeta::new("BigInt")
350            .include_from("num::BigInt")
351            .with_default(|s: &str| {
352                let code = quote! {
353                    <num::BigInt as core::str::FromStr>::from_str(#s).unwrap()
354                };
355
356                Some(code)
357            });
358
359        let big_uint = CustomMeta::new("BigUint")
360            .include_from("num::BigUint")
361            .with_default(|s: &str| {
362                let code = quote! {
363                    <num::BigUint as core::str::FromStr>::from_str(#s).unwrap()
364                };
365
366                Some(code)
367            });
368
369        self.state.add_type(Ident::type_("BigInt"), big_int, true)?;
370        self.state
371            .add_type(Ident::type_("BigUint"), big_uint, true)?;
372
373        add!(xs, "integer", "BigInt");
374        add!(xs, "positiveInteger", "BigUint");
375        add!(xs, "nonNegativeInteger", "BigUint");
376        add!(xs, "negativeInteger", "BigInt");
377        add!(xs, "nonPositiveInteger", "BigInt");
378
379        Ok(self)
380    }
381
382    /// Finishes the interpretation of the [`Schemas`] structure and returns
383    /// the [`MetaTypes`] structure with the generated type information.
384    ///
385    /// # Errors
386    ///
387    /// Returns a suitable [`Error`] if the operation was not successful.
388    #[instrument(err, level = "trace", skip(self))]
389    pub fn finish(mut self) -> Result<MetaTypes, Error> {
390        for (id, info) in self.schemas.namespaces() {
391            let prefix = info
392                .prefix
393                .as_ref()
394                .map(ToString::to_string)
395                .map(Name::new_named);
396            let name = info
397                .module_name
398                .clone()
399                .map(Name::new_named)
400                .or_else(|| prefix.clone());
401            let namespace = info.namespace.clone();
402            let schema_count = info.schemas.len();
403
404            let module = ModuleMeta {
405                name,
406                prefix,
407                namespace,
408                schema_count,
409            };
410
411            self.state.types.modules.insert(*id, module);
412        }
413
414        for (id, info) in self.schemas.schemas() {
415            let schema = SchemaMeta {
416                name: info.name.clone().map(Name::new_named),
417                namespace: info.namespace_id(),
418            };
419
420            self.state.types.schemas.insert(*id, schema);
421
422            SchemaInterpreter::process(&mut self.state, *id, &info.schema, self.schemas)?;
423        }
424
425        Ok(self.state.types)
426    }
427}