Skip to main content

xsd_schema/
builder.rs

1//! SchemaSet builder with compile() pattern
2//!
3//! Provides a fluent API for loading multiple schemas before compilation,
4//! similar to .NET's XmlSchemaSet pattern.
5//!
6//! XSD version is set on the builder — the parser derives it automatically.
7//! Use `SchemaSetBuilder::xsd11()` for XSD 1.1, `SchemaSetBuilder::new()` for XSD 1.0.
8//!
9//! # Example
10//!
11//! ```
12//! use xsd_schema::SchemaSetBuilder;
13//!
14//! let compiled = SchemaSetBuilder::new()
15//!     .add_source(r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
16//!         <xs:element name="root" type="xs:string"/>
17//!     </xs:schema>"#, "schema.xsd")
18//!     .expect("parse failed")
19//!     .compile()
20//!     .expect("compile failed");
21//!
22//! println!("Loaded {} documents", compiled.stats.documents_loaded);
23//! ```
24
25use crate::error::{SchemaError, SchemaResult};
26use crate::ids::DocumentId;
27use crate::parser::parse::parse_schema_with_config;
28use crate::parser::resolver::{
29    fixup_composition_edges, resolve_all_directives, ResolverConfig, SchemaLoader, SchemaResolver,
30};
31#[cfg(feature = "async")]
32use crate::parser::resolver::{resolve_all_directives_async, AsyncSchemaLoader};
33use crate::pipeline::process_loaded_schemas;
34use crate::schema::model::{RegexCompat, XsdVersion};
35use crate::schema::SchemaSet;
36use std::path::{Path, PathBuf};
37
38/// Builder for creating compiled schema sets.
39///
40/// Implements the C# XmlSchemaSet pattern where schemas are added first,
41/// then compiled together as a group.
42///
43/// # Example
44///
45/// ```
46/// use xsd_schema::SchemaSetBuilder;
47///
48/// let compiled = SchemaSetBuilder::new()
49///     .add_source(r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
50///         <xs:element name="root" type="xs:string"/>
51///     </xs:schema>"#, "inline.xsd")
52///     .expect("parse failed")
53///     .compile()
54///     .expect("compile failed");
55///
56/// assert_eq!(compiled.stats.documents_loaded, 1);
57/// ```
58pub struct SchemaSetBuilder {
59    schema_set: SchemaSet,
60    resolver: SchemaResolver,
61    pending_docs: Vec<DocumentId>,
62    /// Errors from mandatory directives (include/redefine/override)
63    errors: Vec<SchemaError>,
64    /// Errors from xs:import directives
65    import_errors: Vec<SchemaError>,
66}
67
68impl SchemaSetBuilder {
69    /// Create a new builder with default configuration.
70    ///
71    /// Uses the default loader chain (embedded + filesystem) and
72    /// automatically adds the XML catalog for well-known namespaces.
73    pub fn new() -> Self {
74        let mut resolver = SchemaResolver::new();
75        resolver.catalog_mut().add_xml_catalog();
76
77        Self {
78            schema_set: SchemaSet::new(),
79            resolver,
80            pending_docs: Vec::new(),
81            errors: Vec::new(),
82            import_errors: Vec::new(),
83        }
84    }
85
86    /// Create builder with custom resolver configuration.
87    pub fn with_config(config: ResolverConfig) -> Self {
88        let mut resolver = SchemaResolver::with_config(config);
89        resolver.catalog_mut().add_xml_catalog();
90
91        Self {
92            schema_set: SchemaSet::new(),
93            resolver,
94            pending_docs: Vec::new(),
95            errors: Vec::new(),
96            import_errors: Vec::new(),
97        }
98    }
99
100    /// Create builder with custom loader.
101    pub fn with_loader(loader: Box<dyn SchemaLoader>) -> Self {
102        let mut resolver = SchemaResolver::with_loader(loader);
103        resolver.catalog_mut().add_xml_catalog();
104
105        Self {
106            schema_set: SchemaSet::new(),
107            resolver,
108            pending_docs: Vec::new(),
109            errors: Vec::new(),
110            import_errors: Vec::new(),
111        }
112    }
113
114    /// Create a builder configured for a specific XSD version.
115    pub fn with_version(version: XsdVersion) -> Self {
116        let mut resolver = SchemaResolver::new();
117        resolver.catalog_mut().add_xml_catalog();
118
119        Self {
120            schema_set: SchemaSet::with_version(version),
121            resolver,
122            pending_docs: Vec::new(),
123            errors: Vec::new(),
124            import_errors: Vec::new(),
125        }
126    }
127
128    /// Create a builder configured for XSD 1.1.
129    pub fn xsd11() -> Self {
130        Self::with_version(XsdVersion::V1_1)
131    }
132
133    /// Set the regex compatibility mode on the underlying schema set.
134    ///
135    /// Affects how pattern facets in subsequently added/compiled schemas
136    /// are validated. See [`RegexCompat`] and `doc/INTRODUCTION.md` for
137    /// the closed list of leniencies enabled by `LenientMs`. Default is
138    /// `RegexCompat::Strict`.
139    pub fn set_regex_compatibility(&mut self, compat: RegexCompat) -> &mut Self {
140        self.schema_set.set_regex_compatibility(compat);
141        self
142    }
143
144    /// Create a builder configured for XSD 1.1 with a custom schema loader.
145    pub fn xsd11_with_loader(loader: Box<dyn SchemaLoader>) -> Self {
146        let mut resolver = SchemaResolver::with_loader(loader);
147        resolver.catalog_mut().add_xml_catalog();
148        Self {
149            schema_set: SchemaSet::with_version(XsdVersion::V1_1),
150            resolver,
151            pending_docs: Vec::new(),
152            errors: Vec::new(),
153            import_errors: Vec::new(),
154        }
155    }
156
157    /// Create a builder with a custom async loader for non-blocking I/O.
158    ///
159    /// The async loader is used by [`add_async`](SchemaSetBuilder::add_async)
160    /// and [`compile_async`](SchemaSetBuilder::compile_async).
161    #[cfg(feature = "async")]
162    pub fn with_async_loader(loader: Box<dyn AsyncSchemaLoader>) -> Self {
163        let mut resolver = SchemaResolver::with_async_loader(loader);
164        resolver.catalog_mut().add_xml_catalog();
165
166        Self {
167            schema_set: SchemaSet::new(),
168            resolver,
169            pending_docs: Vec::new(),
170            errors: Vec::new(),
171            import_errors: Vec::new(),
172        }
173    }
174
175    /// Re-load all schemas from an existing compiled schema set.
176    ///
177    /// Iterates the loaded locations in `schema_set` and adds each one to
178    /// this builder. This lets you seed a new builder from a previously
179    /// compiled set without manually tracking the original file paths —
180    /// useful for enriching with `xsi:schemaLocation` hints.
181    ///
182    /// Locations that fail to load (e.g. inline sources without a file
183    /// path) are silently skipped. Already-loaded locations are
184    /// deduplicated.
185    ///
186    /// # Example
187    ///
188    /// ```rust,ignore
189    /// let mut builder = SchemaSetBuilder::new();
190    /// builder.add_from(&original_schema_set);
191    /// load_hints_into_builder(&mut builder, &sl_hints, &nnsl_hints);
192    /// let enriched = builder.compile()?;
193    /// ```
194    pub fn add_from(&mut self, schema_set: &SchemaSet) -> &mut Self {
195        for location in schema_set.loaded_schema_locations() {
196            let _ = self.try_add(location);
197        }
198        self
199    }
200
201    /// Add a schema by namespace and location.
202    ///
203    /// Matches the C# `XmlSchemaSet.Add(namespace, location)` pattern.
204    /// The namespace parameter is for documentation/validation purposes;
205    /// the actual namespace comes from the schema's targetNamespace attribute.
206    ///
207    /// # Arguments
208    ///
209    /// * `_namespace` - Expected namespace (for documentation; not enforced)
210    /// * `location` - File path or URI to load the schema from
211    ///
212    /// # Example
213    ///
214    /// ```
215    /// use xsd_schema::SchemaSetBuilder;
216    ///
217    /// let builder = SchemaSetBuilder::new()
218    ///     .add("urn:books", "examples/books.xsd")
219    ///     .expect("failed to load books.xsd");
220    ///
221    /// assert_eq!(builder.schema_count(), 1);
222    /// ```
223    pub fn add(mut self, _namespace: &str, location: &str) -> SchemaResult<Self> {
224        self.try_add(location)?;
225        Ok(self)
226    }
227
228    /// Add a schema by location without consuming the builder.
229    ///
230    /// Returns `Ok(true)` if the schema was freshly loaded, `Ok(false)` if
231    /// it was already present (dedup). Returns `Err` on load/parse failure.
232    ///
233    /// The location is first normalized via the resolver so that relative
234    /// and absolute forms of the same path are correctly deduplicated.
235    pub fn try_add(&mut self, location: &str) -> SchemaResult<bool> {
236        let normalized = normalize_loaded_location(&self.resolver, location, "");
237        if self.schema_set.is_loaded(&normalized) {
238            return Ok(false);
239        }
240        let content = self.resolver.load_content(&normalized)?;
241        let doc_id = parse_schema_with_config(
242            content.as_bytes(),
243            &normalized,
244            &mut self.schema_set,
245            &self.resolver.config.parser_config,
246        )?;
247        self.pending_docs.push(doc_id);
248        self.schema_set.mark_loaded(normalized, doc_id);
249        Ok(true)
250    }
251
252    /// Add a schema by resolving a relative location against a base URI.
253    ///
254    /// Uses the builder's resolver for URI resolution (handles Windows
255    /// paths, URL normalization, etc.). The resolved absolute URI is used
256    /// for loading and dedup tracking.
257    ///
258    /// Returns `Ok(true)` if freshly loaded, `Ok(false)` if already present.
259    pub fn try_add_relative(&mut self, location: &str, base_uri: &str) -> SchemaResult<bool> {
260        let normalized = normalize_loaded_location(&self.resolver, location, base_uri);
261        self.try_add(&normalized)
262    }
263
264    /// Add a schema from XML source string.
265    ///
266    /// # Arguments
267    ///
268    /// * `xml` - The schema XML content as a string
269    /// * `base_uri` - Base URI for resolving relative references
270    ///
271    /// # Example
272    ///
273    /// ```
274    /// use xsd_schema::SchemaSetBuilder;
275    ///
276    /// let builder = SchemaSetBuilder::new()
277    ///     .add_source(r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
278    ///         <xs:element name="root" type="xs:string"/>
279    ///     </xs:schema>"#, "inline.xsd")
280    ///     .expect("parse failed");
281    ///
282    /// assert_eq!(builder.schema_count(), 1);
283    /// ```
284    pub fn add_source(mut self, xml: &str, base_uri: &str) -> SchemaResult<Self> {
285        let normalized = normalize_loaded_location(&self.resolver, base_uri, "");
286        let doc_id = parse_schema_with_config(
287            xml.as_bytes(),
288            &normalized,
289            &mut self.schema_set,
290            &self.resolver.config.parser_config,
291        )?;
292        self.pending_docs.push(doc_id);
293        self.schema_set.mark_loaded(normalized, doc_id);
294        Ok(self)
295    }
296
297    /// Add a schema from bytes.
298    ///
299    /// # Arguments
300    ///
301    /// * `xml` - The schema XML content as bytes
302    /// * `base_uri` - Base URI for resolving relative references
303    pub fn add_bytes(mut self, xml: &[u8], base_uri: &str) -> SchemaResult<Self> {
304        let normalized = normalize_loaded_location(&self.resolver, base_uri, "");
305        let doc_id = parse_schema_with_config(
306            xml,
307            &normalized,
308            &mut self.schema_set,
309            &self.resolver.config.parser_config,
310        )?;
311        self.pending_docs.push(doc_id);
312        self.schema_set.mark_loaded(normalized, doc_id);
313        Ok(self)
314    }
315
316    /// Get the number of schemas added so far.
317    pub fn schema_count(&self) -> usize {
318        self.pending_docs.len()
319    }
320
321    /// Check if a schema location has already been loaded.
322    pub fn is_loaded(&self, location: &str) -> bool {
323        self.schema_set.is_loaded(location)
324    }
325
326    /// Compile all added schemas.
327    ///
328    /// This performs the following phases:
329    /// 1. **Directive Resolution** - Process include/import/redefine/override directives
330    /// 2. **Redefine/Override Application** - Apply component replacements
331    /// 3. **Inline Type Assembly** - Materialize inline type definitions
332    /// 4. **Reference Resolution** - Resolve QName references to component keys
333    /// 5. **Particle Allocation** - Allocate element declarations for content particles
334    ///
335    /// # Returns
336    ///
337    /// A [`CompiledSchemaSet`] containing the fully processed schema set and
338    /// compilation statistics.
339    ///
340    /// # Errors
341    ///
342    /// Returns an error if any phase fails (invalid schema, missing references, etc.)
343    pub fn compile(mut self) -> SchemaResult<CompiledSchemaSet> {
344        // Phase 1: Resolve directives for all pending documents
345        // Collect into a temp vec to avoid borrow issues
346        let pending: Vec<_> = self.pending_docs.drain(..).collect();
347        for doc_id in pending {
348            self.resolve_directives_recursive(doc_id)?;
349        }
350
351        // Fixup cycle edges now that all documents have been loaded
352        fixup_composition_edges(&mut self.schema_set);
353
354        // Propagate schema-content errors from directive resolution.
355        // Resolution/IO errors (file not found, network denied) are non-fatal
356        // for all directive types — the target may be unavailable.
357        // Schema-content errors (structural, XML parse, namespace) mean the
358        // target was found but is invalid — those are always fatal.
359        if let Some(err) = self
360            .errors
361            .into_iter()
362            .chain(self.import_errors)
363            .find(|e| e.is_schema_content_error())
364        {
365            return Err(err);
366        }
367
368        // Phases 2-5: Delegate to the pipeline's shared processing function
369        // (redefine/override, inline assembly, reference resolution, particle allocation)
370        let (inline_stats, resolution_stats) = process_loaded_schemas(&mut self.schema_set)?;
371
372        let documents_loaded = self.schema_set.documents.len();
373        Ok(CompiledSchemaSet {
374            schema_set: self.schema_set,
375            stats: CompilationStats {
376                documents_loaded,
377                inline_types_assembled: inline_stats.total_inline_types,
378                types_resolved: resolution_stats.types_resolved,
379                elements_resolved: resolution_stats.elements_resolved,
380                attributes_resolved: resolution_stats.attributes_resolved,
381                groups_resolved: resolution_stats.groups_resolved,
382                attribute_groups_resolved: resolution_stats.attribute_groups_resolved,
383            },
384        })
385    }
386
387    /// Resolve directives recursively for a document and any loaded dependencies.
388    fn resolve_directives_recursive(&mut self, doc_id: DocumentId) -> SchemaResult<()> {
389        let result = resolve_all_directives(doc_id, &mut self.resolver, &mut self.schema_set);
390
391        // Recursively process newly loaded documents
392        for loaded_id in result.loaded {
393            self.resolve_directives_recursive(loaded_id)?;
394        }
395
396        // Collect errors (but don't fail immediately - continue processing)
397        self.errors.extend(result.errors);
398        self.import_errors.extend(result.import_errors);
399
400        Ok(())
401    }
402
403    /// Resolve directives recursively using async loading.
404    #[cfg(feature = "async")]
405    async fn resolve_directives_recursive_async(&mut self, doc_id: DocumentId) -> SchemaResult<()> {
406        let result =
407            resolve_all_directives_async(doc_id, &mut self.resolver, &mut self.schema_set).await;
408
409        // Recursively process newly loaded documents
410        for loaded_id in result.loaded {
411            Box::pin(self.resolve_directives_recursive_async(loaded_id)).await?;
412        }
413
414        self.errors.extend(result.errors);
415        self.import_errors.extend(result.import_errors);
416
417        Ok(())
418    }
419
420    /// Add a schema by namespace and location, loading content asynchronously.
421    ///
422    /// Async variant of [`add`](SchemaSetBuilder::add).
423    #[cfg(feature = "async")]
424    pub async fn add_async(mut self, _namespace: &str, location: &str) -> SchemaResult<Self> {
425        let content = self.resolver.load_content_async(location).await?;
426        let doc_id = parse_schema_with_config(
427            content.as_bytes(),
428            location,
429            &mut self.schema_set,
430            &self.resolver.config.parser_config,
431        )?;
432        self.pending_docs.push(doc_id);
433        self.schema_set.mark_loaded(location.to_string(), doc_id);
434        Ok(self)
435    }
436
437    /// Compile all added schemas using async directive resolution.
438    ///
439    /// Async variant of [`compile`](SchemaSetBuilder::compile). Only directive
440    /// resolution (I/O) is async; all computation phases remain synchronous.
441    #[cfg(feature = "async")]
442    pub async fn compile_async(mut self) -> SchemaResult<CompiledSchemaSet> {
443        // Phase 1: Resolve directives asynchronously for all pending documents
444        let pending: Vec<_> = self.pending_docs.drain(..).collect();
445        for doc_id in pending {
446            self.resolve_directives_recursive_async(doc_id).await?;
447        }
448
449        // Fixup cycle edges now that all documents have been loaded
450        fixup_composition_edges(&mut self.schema_set);
451
452        // Propagate schema-content errors from directive resolution
453        if let Some(err) = self
454            .errors
455            .into_iter()
456            .chain(self.import_errors)
457            .find(|e| e.is_schema_content_error())
458        {
459            return Err(err);
460        }
461
462        // Phases 2-5: Delegate to the pipeline's shared processing function (sync)
463        let (inline_stats, resolution_stats) = process_loaded_schemas(&mut self.schema_set)?;
464
465        let documents_loaded = self.schema_set.documents.len();
466        Ok(CompiledSchemaSet {
467            schema_set: self.schema_set,
468            stats: CompilationStats {
469                documents_loaded,
470                inline_types_assembled: inline_stats.total_inline_types,
471                types_resolved: resolution_stats.types_resolved,
472                elements_resolved: resolution_stats.elements_resolved,
473                attributes_resolved: resolution_stats.attributes_resolved,
474                groups_resolved: resolution_stats.groups_resolved,
475                attribute_groups_resolved: resolution_stats.attribute_groups_resolved,
476            },
477        })
478    }
479}
480
481fn normalize_loaded_location(resolver: &SchemaResolver, location: &str, base_uri: &str) -> String {
482    let resolved = resolver
483        .resolve_location(location, base_uri)
484        .unwrap_or_else(|_| location.to_string());
485    if is_absolute_location(&resolved) {
486        return resolved;
487    }
488
489    let cwd = match std::env::current_dir() {
490        Ok(cwd) => cwd,
491        Err(_) => return resolved,
492    };
493    normalize_path(&cwd.join(&resolved))
494        .to_string_lossy()
495        .into_owned()
496}
497
498fn is_absolute_location(location: &str) -> bool {
499    location.starts_with("http://")
500        || location.starts_with("https://")
501        || location.starts_with("file://")
502        || Path::new(location).is_absolute()
503        || (location.len() >= 2 && location.as_bytes().get(1) == Some(&b':'))
504}
505
506fn normalize_path(path: &Path) -> PathBuf {
507    let mut result = PathBuf::new();
508
509    for component in path.components() {
510        match component {
511            std::path::Component::ParentDir => {
512                result.pop();
513            }
514            std::path::Component::CurDir => {}
515            _ => result.push(component),
516        }
517    }
518
519    result
520}
521
522impl Default for SchemaSetBuilder {
523    fn default() -> Self {
524        Self::new()
525    }
526}
527
528/// Compiled schema set ready for validation.
529///
530/// Contains the fully processed [`SchemaSet`] with all references resolved
531/// and inline types assembled.
532pub struct CompiledSchemaSet {
533    /// The compiled schema set
534    pub schema_set: SchemaSet,
535    /// Compilation statistics
536    pub stats: CompilationStats,
537}
538
539impl CompiledSchemaSet {
540    /// Get a reference to the underlying schema set.
541    pub fn schema_set(&self) -> &SchemaSet {
542        &self.schema_set
543    }
544
545    /// Consume self and return the underlying schema set.
546    pub fn into_schema_set(self) -> SchemaSet {
547        self.schema_set
548    }
549}
550
551/// Statistics from schema compilation.
552#[derive(Debug, Default, Clone)]
553pub struct CompilationStats {
554    /// Number of schema documents loaded
555    pub documents_loaded: usize,
556    /// Number of inline types assembled
557    pub inline_types_assembled: usize,
558    /// Number of type references resolved
559    pub types_resolved: usize,
560    /// Number of element references resolved
561    pub elements_resolved: usize,
562    /// Number of attribute references resolved
563    pub attributes_resolved: usize,
564    /// Number of group references resolved
565    pub groups_resolved: usize,
566    /// Number of attribute group references resolved
567    pub attribute_groups_resolved: usize,
568}
569
570impl CompilationStats {
571    /// Get total number of references resolved
572    pub fn total_references_resolved(&self) -> usize {
573        self.types_resolved
574            + self.elements_resolved
575            + self.attributes_resolved
576            + self.groups_resolved
577            + self.attribute_groups_resolved
578    }
579}
580
581#[cfg(test)]
582mod tests {
583    use super::*;
584
585    #[test]
586    fn test_builder_new() {
587        let builder = SchemaSetBuilder::new();
588        assert_eq!(builder.schema_count(), 0);
589    }
590
591    #[test]
592    fn test_builder_add_source() {
593        let xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
594            <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
595                <xs:element name="root" type="xs:string"/>
596            </xs:schema>"#;
597
598        let builder = SchemaSetBuilder::new()
599            .add_source(xsd, "test.xsd")
600            .expect("Should parse schema");
601
602        assert_eq!(builder.schema_count(), 1);
603    }
604
605    #[test]
606    fn test_builder_compile() {
607        let xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
608            <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
609                <xs:element name="person">
610                    <xs:complexType>
611                        <xs:sequence>
612                            <xs:element name="name" type="xs:string"/>
613                        </xs:sequence>
614                    </xs:complexType>
615                </xs:element>
616            </xs:schema>"#;
617
618        let compiled = SchemaSetBuilder::new()
619            .add_source(xsd, "test.xsd")
620            .expect("Should parse schema")
621            .compile()
622            .expect("Should compile");
623
624        assert_eq!(compiled.stats.documents_loaded, 1);
625        assert!(compiled.stats.inline_types_assembled > 0);
626    }
627
628    #[test]
629    fn test_builder_multiple_schemas() {
630        let xsd1 = r#"<?xml version="1.0" encoding="UTF-8"?>
631            <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
632                       targetNamespace="http://example.com/schema1">
633                <xs:element name="item1" type="xs:string"/>
634            </xs:schema>"#;
635
636        let xsd2 = r#"<?xml version="1.0" encoding="UTF-8"?>
637            <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
638                       targetNamespace="http://example.com/schema2">
639                <xs:element name="item2" type="xs:int"/>
640            </xs:schema>"#;
641
642        let compiled = SchemaSetBuilder::new()
643            .add_source(xsd1, "schema1.xsd")
644            .expect("Should parse schema1")
645            .add_source(xsd2, "schema2.xsd")
646            .expect("Should parse schema2")
647            .compile()
648            .expect("Should compile");
649
650        assert_eq!(compiled.stats.documents_loaded, 2);
651    }
652
653    #[test]
654    fn test_compilation_stats() {
655        let stats = CompilationStats {
656            documents_loaded: 2,
657            inline_types_assembled: 5,
658            types_resolved: 10,
659            elements_resolved: 8,
660            attributes_resolved: 3,
661            groups_resolved: 2,
662            attribute_groups_resolved: 1,
663        };
664
665        assert_eq!(stats.total_references_resolved(), 24);
666    }
667}