Skip to main content

xsd_schema/
pipeline.rs

1//! Schema processing pipeline
2//!
3//! This module provides a high-level orchestration function that coordinates
4//! all phases of schema processing:
5//!
6//! 1. **Parse Phase**: Parse the primary XSD document
7//! 2. **Directive Resolution Phase**: Process include/import/redefine/override directives
8//! 3. **Redefine/Override Application Phase**: Apply component replacements
9//! 4. **Inline Type Assembly Phase**: Materialize inline type definitions
10//! 5. **Reference Resolution Phase**: Resolve QName references to component keys
11//!
12//! # Usage
13//!
14//! ```
15//! use xsd_schema::{SchemaSet, load_and_process_schema};
16//!
17//! let mut schema_set = SchemaSet::new();
18//! let xsd = r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
19//!     <xs:element name="root" type="xs:string"/>
20//! </xs:schema>"#;
21//!
22//! // XSD version is derived from schema_set (V1_0 by default).
23//! // Use SchemaSet::xsd11() for XSD 1.1 schemas.
24//! let result = load_and_process_schema(xsd.as_bytes(), "test.xsd", &mut schema_set, None)
25//!     .expect("failed to process schema");
26//! println!("Processed {} inline types", result.inline_stats.unwrap().total_inline_types);
27//! println!("Resolved {} type references", result.resolution_stats.unwrap().types_resolved);
28//! ```
29
30use crate::error::SchemaResult;
31use crate::ids::DocumentId;
32use crate::parser::parse::{parse_schema_with_config, ParserConfig};
33#[cfg(feature = "async")]
34use crate::parser::resolver::resolve_all_directives_async;
35use crate::parser::resolver::{
36    fixup_composition_edges, resolve_all_directives, ResolutionResult, ResolverConfig,
37    SchemaResolver,
38};
39use crate::schema::{
40    allocate_content_particle_elements, allocate_model_group_particle_elements,
41    assemble_inline_types, build_dependency_graph, compile_all_patterns, resolve_all_references,
42    validate_all_derivations, validate_attribute_id_constraints,
43    validate_attribute_value_constraints, validate_element_value_constraints, InlineAssemblyStats,
44    ResolutionStats,
45};
46use crate::SchemaSet;
47
48/// Configuration for the schema processing pipeline
49#[derive(Debug, Clone)]
50pub struct PipelineConfig {
51    /// Parser configuration
52    pub parser: ParserConfig,
53    /// Resolver configuration for include/import handling
54    pub resolver: ResolverConfig,
55    /// Whether to load external schemas via include/import/redefine/override directives.
56    /// When false, no I/O is performed and redefine/override application is deferred
57    /// (callers should use `process_loaded_schemas` after all schemas are parsed).
58    pub resolve_directives: bool,
59    /// Whether to assemble inline types
60    pub assemble_inline_types: bool,
61    /// Whether to resolve QName references
62    pub resolve_references: bool,
63}
64
65impl Default for PipelineConfig {
66    fn default() -> Self {
67        Self {
68            parser: ParserConfig::default(),
69            resolver: ResolverConfig::default(),
70            resolve_directives: true,
71            assemble_inline_types: true,
72            resolve_references: true,
73        }
74    }
75}
76
77impl PipelineConfig {
78    /// Create a minimal configuration that only parses (no directive/type resolution)
79    pub fn parse_only() -> Self {
80        Self {
81            parser: ParserConfig::default(),
82            resolver: ResolverConfig::default(),
83            resolve_directives: false,
84            assemble_inline_types: false,
85            resolve_references: false,
86        }
87    }
88
89    /// Create a configuration for full processing
90    pub fn full() -> Self {
91        Self::default()
92    }
93}
94
95/// Statistics from processing the entire pipeline
96#[derive(Debug, Default)]
97pub struct PipelineStats {
98    /// The primary document ID
99    pub doc_id: DocumentId,
100    /// Document IDs loaded via include/import directives
101    pub loaded_docs: Vec<DocumentId>,
102    /// Directive resolution result
103    pub directive_result: Option<DirectiveStats>,
104    /// Inline type assembly statistics
105    pub inline_stats: Option<InlineAssemblyStats>,
106    /// Reference resolution statistics
107    pub resolution_stats: Option<ResolutionStats>,
108}
109
110/// Statistics from directive resolution
111#[derive(Debug, Default)]
112pub struct DirectiveStats {
113    /// Number of schemas loaded successfully
114    pub loaded_count: usize,
115    /// Number of schemas skipped (already loaded/circular)
116    pub skipped_count: usize,
117    /// Number of errors during directive resolution
118    pub error_count: usize,
119}
120
121impl From<&ResolutionResult> for DirectiveStats {
122    fn from(result: &ResolutionResult) -> Self {
123        Self {
124            loaded_count: result.loaded.len(),
125            skipped_count: result.skipped.len(),
126            error_count: result.errors.len() + result.import_errors.len(),
127        }
128    }
129}
130
131/// Load and fully process an XSD schema document
132///
133/// This is the main entry point for schema processing. It orchestrates all
134/// phases of schema handling:
135///
136/// 1. **Parse**: Parse the primary XSD document
137/// 2. **Directives**: Load and parse included/imported/redefined/overridden schemas
138/// 3. **Redefine/Override**: Apply component replacements from redefine/override directives
139/// 4. **Inline Assembly**: Allocate inline type definitions in arenas
140/// 5. **Reference Resolution**: Resolve QName references to component keys
141///
142/// # Arguments
143///
144/// * `xml` - Raw XML bytes of the schema document
145/// * `base_uri` - Base URI for this document (for error messages and directive resolution)
146/// * `schema_set` - Schema set to add the parsed document to
147/// * `config` - Optional pipeline configuration (uses defaults if None)
148///
149/// # Returns
150///
151/// Pipeline statistics including document IDs and processing counts, or an error.
152///
153/// # Example
154///
155/// ```
156/// use xsd_schema::{SchemaSet, load_and_process_schema};
157///
158/// let mut schema_set = SchemaSet::new();
159/// let xsd = r#"<?xml version="1.0"?>
160/// <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
161///     <xs:element name="root">
162///         <xs:complexType>
163///             <xs:sequence>
164///                 <xs:element name="child" type="xs:string"/>
165///             </xs:sequence>
166///         </xs:complexType>
167///     </xs:element>
168/// </xs:schema>"#;
169///
170/// let stats = load_and_process_schema(xsd.as_bytes(), "schema.xsd", &mut schema_set, None)
171///     .expect("failed to process schema");
172/// assert!(stats.inline_stats.unwrap().total_inline_types > 0);
173/// ```
174pub fn load_and_process_schema(
175    xml: &[u8],
176    base_uri: &str,
177    schema_set: &mut SchemaSet,
178    config: Option<PipelineConfig>,
179) -> SchemaResult<PipelineStats> {
180    let config = config.unwrap_or_default();
181    let mut stats = PipelineStats::default();
182
183    // Phase 1: Parse the primary schema document
184    let doc_id = parse_schema_with_config(xml, base_uri, schema_set, &config.parser)?;
185    stats.doc_id = doc_id;
186
187    // Phase 2: Resolve directives (include/import/redefine)
188    if config.resolve_directives {
189        let mut resolver = SchemaResolver::with_config(config.resolver.clone());
190
191        // Process directives for the primary document
192        let dir_result = resolve_all_directives(doc_id, &mut resolver, schema_set);
193
194        // Collect loaded document IDs and errors
195        stats.loaded_docs.extend(dir_result.loaded.iter().copied());
196        stats.directive_result = Some(DirectiveStats::from(&dir_result));
197        let mut directive_errors: Vec<crate::error::SchemaError> = dir_result.errors;
198        let mut import_errors: Vec<crate::error::SchemaError> = dir_result.import_errors;
199
200        // Recursively process directives in loaded documents
201        let mut pending_docs = dir_result.loaded.clone();
202        while !pending_docs.is_empty() {
203            let current_batch: Vec<_> = std::mem::take(&mut pending_docs);
204            for loaded_doc_id in current_batch {
205                let nested_result =
206                    resolve_all_directives(loaded_doc_id, &mut resolver, schema_set);
207                stats
208                    .loaded_docs
209                    .extend(nested_result.loaded.iter().copied());
210                pending_docs.extend(nested_result.loaded.iter().copied());
211
212                // Accumulate stats
213                if let Some(ref mut dir_stats) = stats.directive_result {
214                    dir_stats.loaded_count += nested_result.loaded.len();
215                    dir_stats.skipped_count += nested_result.skipped.len();
216                    dir_stats.error_count +=
217                        nested_result.errors.len() + nested_result.import_errors.len();
218                }
219                directive_errors.extend(nested_result.errors);
220                import_errors.extend(nested_result.import_errors);
221            }
222        }
223
224        // Fixup cycle edges now that all documents have been loaded
225        fixup_composition_edges(schema_set);
226
227        // Propagate schema-content errors from directive resolution.
228        // Resolution/IO errors are non-fatal for all directive types.
229        if let Some(err) = directive_errors
230            .into_iter()
231            .chain(import_errors)
232            .find(|e| e.is_schema_content_error())
233        {
234            return Err(err);
235        }
236    }
237
238    // Fail early if parsing collected structural errors (error-recovery mode)
239    if !schema_set.parsing_errors.is_empty() {
240        let errors = std::mem::take(&mut schema_set.parsing_errors);
241        return Err(errors.into_iter().next().unwrap());
242    }
243
244    // Phase 2.5: Apply redefine/override directives (operates on already-parsed
245    // data, no I/O). Skipped in parse-only mode because not all schemas may be
246    // loaded yet; callers use process_loaded_schemas() to apply later.
247    if config.assemble_inline_types || config.resolve_references {
248        crate::schema::apply_redefine_override(schema_set)?;
249    }
250
251    // Phase 3: Assemble inline types (global operation across all documents)
252    if config.assemble_inline_types {
253        let inline_stats = assemble_inline_types(schema_set)?;
254        stats.inline_stats = Some(inline_stats);
255    }
256
257    // Phase 4: Resolve all QName references (global operation across all documents)
258    if config.resolve_references {
259        let resolution_stats = resolve_all_references(schema_set)?;
260        stats.resolution_stats = Some(resolution_stats);
261    }
262
263    // Phase 4.5: Compile all deferred pattern facets
264    if config.resolve_references {
265        compile_all_patterns(schema_set)?;
266    }
267
268    // Phase 4.6 (XSD 1.1): Validate default open content declarations
269    #[cfg(feature = "xsd11")]
270    if config.resolve_references {
271        crate::compiler::validate_all_default_open_content(schema_set)?;
272    }
273
274    // Phase 4.7: Validate type derivation constraints (cos-ct-extends, derivation-ok-restriction, etc.)
275    if config.resolve_references {
276        let (dep_graph, _dep_stats) = build_dependency_graph(schema_set)?;
277        validate_all_derivations(schema_set, &dep_graph)?;
278    }
279
280    // Phase 4.75: Validate cos-attribute-decl (XSD 1.0: ID attrs must not have default/fixed)
281    // and e-props-correct.2 / .4 (element default/fixed values).
282    if config.resolve_references {
283        validate_attribute_id_constraints(schema_set)?;
284        validate_attribute_value_constraints(schema_set)?;
285        validate_element_value_constraints(schema_set)?;
286        #[cfg(feature = "xsd11")]
287        xsd11_pre_resolution_validations(schema_set)?;
288    }
289
290    // Phase 4.76 (XSD 1.0): strict xs:anyURI lexical check on annotation
291    // source attributes. XSD 1.1 explicitly relaxed the rule, so this is
292    // a no-op there.
293    if config.resolve_references {
294        crate::schema::validate_xsd10_annotation_source_anyuri(schema_set)?;
295    }
296
297    // Phase 4.77: ct-props-correct.4 / ag-props-correct.2 — every complex
298    // type's effective attribute uses must be unique by (namespace, name).
299    // Applies to BOTH XSD 1.0 and 1.1.
300    if config.resolve_references {
301        crate::schema::validate_complex_type_attribute_uniqueness(schema_set)?;
302    }
303
304    // Phase 4.8: Validate substitution group membership constraints (e-props-correct.4)
305    if config.resolve_references {
306        crate::compiler::substitution::validate_all_substitution_groups(schema_set)?;
307    }
308
309    // Phase 5: Allocate arena element declarations for local elements in content particles
310    if config.assemble_inline_types && config.resolve_references {
311        allocate_content_particle_elements(schema_set)?;
312        allocate_model_group_particle_elements(schema_set)?;
313        finalize_local_element_pass(schema_set)?;
314    }
315
316    Ok(stats)
317}
318
319/// Load and process a schema with full processing (convenience function)
320///
321/// This is a simplified version of `load_and_process_schema` that uses
322/// default configuration for full processing.
323pub fn load_schema(
324    xml: &[u8],
325    base_uri: &str,
326    schema_set: &mut SchemaSet,
327) -> SchemaResult<PipelineStats> {
328    load_and_process_schema(xml, base_uri, schema_set, Some(PipelineConfig::full()))
329}
330
331/// Parse a schema without processing directives or resolving references
332///
333/// This is useful when you want to manually control the processing phases
334/// or when parsing multiple schemas before batch processing.
335pub fn parse_schema_only(
336    xml: &[u8],
337    base_uri: &str,
338    schema_set: &mut SchemaSet,
339) -> SchemaResult<DocumentId> {
340    let config = PipelineConfig::parse_only();
341    let stats = load_and_process_schema(xml, base_uri, schema_set, Some(config))?;
342    Ok(stats.doc_id)
343}
344
345/// Process inline types and references for schemas already loaded
346///
347/// Call this after manually loading multiple schemas to perform
348/// the redefine/override application, inline assembly, and reference resolution phases.
349///
350/// **Precondition**: All participating schemas — including redefine/override targets —
351/// must have been parsed and loaded into the schema set before calling this function.
352pub fn process_loaded_schemas(
353    schema_set: &mut SchemaSet,
354) -> SchemaResult<(InlineAssemblyStats, ResolutionStats)> {
355    // Fail early if parsing collected structural errors (error-recovery mode)
356    if !schema_set.parsing_errors.is_empty() {
357        let errors = std::mem::take(&mut schema_set.parsing_errors);
358        return Err(errors.into_iter().next().unwrap());
359    }
360
361    // Apply redefine/override directives before assembly
362    crate::schema::apply_redefine_override(schema_set)?;
363
364    let inline_stats = assemble_inline_types(schema_set)?;
365    let resolution_stats = resolve_all_references(schema_set)?;
366
367    // Compile all deferred pattern facets
368    compile_all_patterns(schema_set)?;
369
370    // XSD 1.1: Validate default open content declarations
371    #[cfg(feature = "xsd11")]
372    crate::compiler::validate_all_default_open_content(schema_set)?;
373
374    // Validate type derivation constraints
375    let (dep_graph, _dep_stats) = build_dependency_graph(schema_set)?;
376    validate_all_derivations(schema_set, &dep_graph)?;
377
378    // Validate cos-attribute-decl (XSD 1.0: ID attrs must not have default/fixed)
379    validate_attribute_id_constraints(schema_set)?;
380
381    // a-props-correct.3 — validate attribute default/fixed values are type-valid
382    validate_attribute_value_constraints(schema_set)?;
383
384    // e-props-correct.2 / e-props-correct.4 — validate element default/fixed values
385    validate_element_value_constraints(schema_set)?;
386
387    #[cfg(feature = "xsd11")]
388    xsd11_pre_resolution_validations(schema_set)?;
389
390    // (XSD 1.0): strict xs:anyURI lexical check on annotation source
391    // attributes. XSD 1.1 explicitly relaxed the rule, so no-op there.
392    crate::schema::validate_xsd10_annotation_source_anyuri(schema_set)?;
393
394    // ct-props-correct.4 / ag-props-correct.2 — every complex type's
395    // effective attribute uses must be unique by (namespace, name).
396    crate::schema::validate_complex_type_attribute_uniqueness(schema_set)?;
397
398    // src-element §3.3.3 clause 4.3 / src-attribute §3.2.3 clause 6.3:
399    // a local element/attribute's explicit `targetNamespace` may differ from
400    // the schema's only inside a <complexContent>/<restriction> of a non-
401    // anyType base.
402    crate::schema::validate_local_decl_target_namespace(schema_set)?;
403
404    // §3.2.6.4 (`no-xsi`): user-declared attributes must not live in the
405    // XML Schema instance namespace.
406    crate::schema::validate_no_xsi_attribute_declarations(schema_set)?;
407
408    // Validate substitution group membership constraints (e-props-correct.4)
409    crate::compiler::substitution::validate_all_substitution_groups(schema_set)?;
410
411    allocate_content_particle_elements(schema_set)?;
412    allocate_model_group_particle_elements(schema_set)?;
413
414    // §3.8.6.3 (cos-element-consistent): when a content model contains a
415    // local element with QName Q and an element ref whose substitution-
416    // group expansion includes another declaration of Q, both must agree
417    // on `{type definition}`. Runs after particle-element allocation so
418    // local elements are tracked through their ElementKey.
419    crate::schema::validate_substitution_group_element_consistency(schema_set)?;
420    finalize_local_element_pass(schema_set)?;
421    Ok((inline_stats, resolution_stats))
422}
423
424/// Validation passes that depend on local element declarations having been
425/// allocated by `allocate_content_particle_elements`. Both pipeline entry
426/// points run these in identical order to finalize IC `@ref` resolution,
427/// re-validate default/fixed values, and (under xsd11) re-run CTA analysis
428/// against newly-visible local-element alternatives.
429fn finalize_local_element_pass(schema_set: &mut SchemaSet) -> SchemaResult<()> {
430    // XSD 1.1: assemble inline alternative types attached to local elements
431    // (which only acquired their ElementKey in the allocation pass).
432    #[cfg(feature = "xsd11")]
433    {
434        let new_alt_types = crate::schema::inline::resolve_local_element_alternatives(schema_set)?;
435        if !new_alt_types.is_empty() {
436            resolve_all_references(schema_set)?;
437            allocate_content_particle_elements(schema_set)?;
438        }
439    }
440    // IC `@ref`s on top-level elements whose target IC lives on a local
441    // element couldn't resolve until now (the local IC was registered by the
442    // allocation pass).
443    crate::schema::finalize_pending_ic_refs(schema_set)?;
444    // e-props-correct.2 / .4 against local element default/fixed values
445    // (saxon `s3_3_4si07/08`).
446    validate_element_value_constraints(schema_set)?;
447    #[cfg(feature = "xsd11")]
448    {
449        crate::schema::validate_cta_xpath(schema_set)?;
450        crate::schema::validate_cta_substitutability(schema_set)?;
451    }
452    #[cfg(feature = "xsd11")]
453    xsd11_element_consistency_checks(schema_set)?;
454    validate_all_group_outer_occurs(schema_set)?;
455    validate_all_group_content(schema_set)?;
456    validate_all_group_placement(schema_set)?;
457    validate_all_particle_occurs(schema_set)?;
458    validate_all_upa_constraints(schema_set)?;
459    Ok(())
460}
461
462/// XSD 1.1 schema-validity checks that run after type derivation but before
463/// inline alternative-type resolution and content-particle allocation.
464///
465/// Covers the three §3.12.x / §3.10.6.1 passes that touch alternative
466/// declarations and wildcard `notQName` lists. No-op when xsd11 is disabled
467/// (every callee gates internally on `schema_set.is_xsd11()`).
468#[cfg(feature = "xsd11")]
469fn xsd11_pre_resolution_validations(schema_set: &SchemaSet) -> SchemaResult<()> {
470    // src-type-alternative: only the last <xs:alternative> may omit @test.
471    crate::schema::validate_element_type_alternatives(schema_set)?;
472    // §3.12.4: undefined variables, unbound prefixes, user-defined types
473    // in instance-of/cast.
474    crate::schema::validate_cta_xpath(schema_set)?;
475    // §3.12.6 cos-ct-alternative-substitutable.
476    crate::schema::validate_cta_substitutability(schema_set)?;
477    // §3.10.6.1 rule 4: notQName entries must lie within the wildcard's
478    // namespace constraint.
479    crate::schema::validate_wildcard_disallowed_names(schema_set)?;
480    Ok(())
481}
482
483/// XSD 1.1 cos-element-consistent passes that run after content-particle
484/// allocation has populated each complex type's
485/// `resolved_content_particle_elements` (which carries the post-resolution
486/// alternative type-table the parser-frame copy lacks).
487#[cfg(feature = "xsd11")]
488fn xsd11_element_consistency_checks(schema_set: &SchemaSet) -> SchemaResult<()> {
489    // §3.8.6.3: same-named local element declarations within one content
490    // model must have equivalent type tables.
491    crate::schema::validate_local_element_type_table_consistency(schema_set)?;
492    // §3.8.6.3 (extended): local element + lax/strict wildcard + global
493    // element with same QName ⇒ type tables must agree.
494    crate::schema::validate_wildcard_element_type_table_consistency(schema_set)?;
495    // §3.4.6.3: when a restriction-derived complex type re-issues a base
496    // local element, the type tables must remain equivalent.
497    crate::schema::validate_restriction_local_element_type_table_consistency(schema_set)?;
498    Ok(())
499}
500
501/// Validate outer occurrence constraints on top-level all-groups.
502///
503/// XSD 1.0 (cos-all-limited.2): a particle whose term is an all-group must
504/// have minOccurs in {0, 1} and maxOccurs = 1. This check runs on every
505/// complex type independently of UPA validation.
506fn validate_all_group_outer_occurs(schema_set: &SchemaSet) -> SchemaResult<()> {
507    use crate::compiler::{
508        is_top_level_all_group, resolve_top_level_all_group_ref, validate_outer_all_group_occurs,
509    };
510
511    for (_, type_def) in schema_set.arenas.complex_types.iter() {
512        let Some(particle) = (match &type_def.content {
513            crate::parser::frames::ComplexContentResult::Complex(content) => {
514                content.particle.as_ref()
515            }
516            crate::parser::frames::ComplexContentResult::Empty
517            | crate::parser::frames::ComplexContentResult::Simple(_) => None,
518        }) else {
519            continue;
520        };
521
522        let is_all = is_top_level_all_group(particle).is_some()
523            || resolve_top_level_all_group_ref(particle, schema_set).is_some();
524        if !is_all {
525            continue;
526        }
527
528        validate_outer_all_group_occurs(particle, schema_set.xsd_version).map_err(|error| {
529            let location = error
530                .location()
531                .and_then(|source| schema_set.source_maps.locate(source));
532            crate::error::SchemaError::structural("cos-all-limited", format!("{}", error), location)
533        })?;
534    }
535
536    Ok(())
537}
538
539/// Validate all-group content constraints.
540///
541/// XSD 1.0 §3.8 cos-all-limited: every particle inside an `xs:all` must be
542/// an element declaration whose `minOccurs` is 0 or 1 and whose `maxOccurs`
543/// is 1 — wildcards (`xs:any`) and nested groups are forbidden, and a member
544/// element with `maxOccurs > 1` is illegal even when its sibling has the
545/// default `maxOccurs="1"`. XSD 1.1 relaxes these constraints (any
546/// `maxOccurs`, wildcards, group refs are allowed subject to cos-all-limited
547/// 1.3 / rule 2 enforced at compile time).
548fn validate_all_group_content(schema_set: &SchemaSet) -> SchemaResult<()> {
549    use crate::parser::frames::{ComplexContentResult, Compositor};
550
551    if !schema_set.is_xsd10() {
552        return Ok(());
553    }
554
555    // Check named model groups
556    for (_, mg) in schema_set.arenas.model_groups.iter() {
557        if mg.compositor == Some(Compositor::All) {
558            check_all_group_xsd10_constraints(&mg.particles, schema_set)?;
559        }
560    }
561
562    // Check content particles in complex types
563    for (_, type_def) in schema_set.arenas.complex_types.iter() {
564        if let ComplexContentResult::Complex(content) = &type_def.content {
565            if let Some(particle) = content.particle.as_ref() {
566                check_particle_all_group_constraints(particle, schema_set)?;
567            }
568        }
569    }
570
571    Ok(())
572}
573
574fn check_all_group_xsd10_constraints(
575    particles: &[crate::parser::frames::ParticleResult],
576    schema_set: &SchemaSet,
577) -> SchemaResult<()> {
578    use crate::parser::frames::ParticleTerm;
579
580    for particle in particles {
581        let particle_loc = schema_set.locate(particle.source.as_ref());
582        match &particle.term {
583            ParticleTerm::Any(wc) => {
584                let location = schema_set
585                    .locate(wc.source.as_ref())
586                    .or_else(|| particle_loc.clone());
587                return Err(crate::error::SchemaError::structural(
588                    "cos-all-limited",
589                    "In XSD 1.0, xs:any (wildcard) is not allowed inside an xs:all group"
590                        .to_string(),
591                    location,
592                ));
593            }
594            ParticleTerm::Group(mg) => {
595                let location = schema_set
596                    .locate(mg.source.as_ref())
597                    .or_else(|| particle_loc.clone());
598                return Err(crate::error::SchemaError::structural(
599                    "cos-all-limited",
600                    "In XSD 1.0, a nested model group is not allowed inside an xs:all group"
601                        .to_string(),
602                    location,
603                ));
604            }
605            ParticleTerm::Element(_) => {}
606        }
607
608        if particle.min_occurs > 1 {
609            return Err(crate::error::SchemaError::structural(
610                "cos-all-limited",
611                format!(
612                    "In XSD 1.0, an xs:all member's minOccurs must be 0 or 1, found {}",
613                    particle.min_occurs
614                ),
615                particle_loc,
616            ));
617        }
618
619        match particle.max_occurs {
620            Some(0) | Some(1) => {}
621            Some(n) => {
622                return Err(crate::error::SchemaError::structural(
623                    "cos-all-limited",
624                    format!(
625                        "In XSD 1.0, an xs:all member's maxOccurs must be 0 or 1, found {}",
626                        n
627                    ),
628                    particle_loc,
629                ));
630            }
631            None => {
632                return Err(crate::error::SchemaError::structural(
633                    "cos-all-limited",
634                    "In XSD 1.0, an xs:all member's maxOccurs must be 0 or 1 (unbounded not allowed)"
635                        .to_string(),
636                    particle_loc,
637                ));
638            }
639        }
640    }
641    Ok(())
642}
643
644fn check_particle_all_group_constraints(
645    particle: &crate::parser::frames::ParticleResult,
646    schema_set: &SchemaSet,
647) -> SchemaResult<()> {
648    use crate::parser::frames::{Compositor, ParticleTerm};
649
650    if let ParticleTerm::Group(mg) = &particle.term {
651        if mg.compositor == Some(Compositor::All) {
652            check_all_group_xsd10_constraints(&mg.particles, schema_set)?;
653        }
654        // Recurse into child particles regardless of compositor
655        for child in &mg.particles {
656            check_particle_all_group_constraints(child, schema_set)?;
657        }
658    }
659    Ok(())
660}
661
662/// Validate XSD 1.0 cos-all-limited.1.2 placement: an `xs:all` model group
663/// (whether inline or reached through a named-group reference) may only
664/// appear as the top-level particle of a complex type's content. It must
665/// not appear nested inside `xs:sequence` / `xs:choice`, nor reached via a
666/// chain of group references that pass through a non-all compositor.
667fn validate_all_group_placement(schema_set: &SchemaSet) -> SchemaResult<()> {
668    use crate::compiler::{is_top_level_all_group, resolve_top_level_all_group_ref};
669    use crate::parser::frames::ComplexContentResult;
670
671    if !schema_set.is_xsd10() {
672        return Ok(());
673    }
674
675    for (_, type_def) in schema_set.arenas.complex_types.iter() {
676        let ComplexContentResult::Complex(content) = &type_def.content else {
677            continue;
678        };
679        let Some(particle) = content.particle.as_ref() else {
680            continue;
681        };
682
683        // Per-particle constraints on a top-level all-group are enforced
684        // by validate_all_group_content; only deeper placements need the
685        // recursion guard below.
686        if is_top_level_all_group(particle).is_some()
687            || resolve_top_level_all_group_ref(particle, schema_set).is_some()
688        {
689            continue;
690        }
691
692        let mut visited = std::collections::HashSet::new();
693        check_no_nested_all_group(particle, schema_set, &mut visited)?;
694    }
695    Ok(())
696}
697
698fn check_no_nested_all_group(
699    particle: &crate::parser::frames::ParticleResult,
700    schema_set: &SchemaSet,
701    visited: &mut std::collections::HashSet<crate::ids::ModelGroupKey>,
702) -> SchemaResult<()> {
703    use crate::parser::frames::{Compositor, ParticleTerm, ParticleResult, ModelGroupDefResult};
704
705    fn placement_location(
706        particle: &ParticleResult,
707        group: &ModelGroupDefResult,
708        schema_set: &SchemaSet,
709    ) -> Option<crate::parser::location::SourceLocation> {
710        schema_set
711            .locate(particle.source.as_ref())
712            .or_else(|| schema_set.locate(group.source.as_ref()))
713    }
714
715    let ParticleTerm::Group(group) = &particle.term else {
716        return Ok(());
717    };
718
719    if group.compositor == Some(Compositor::All) && group.ref_name.is_none() {
720        return Err(crate::error::SchemaError::structural(
721            "cos-all-limited",
722            "In XSD 1.0, an xs:all model group may only appear as the top-level \
723             particle of a complex type definition"
724                .to_string(),
725            placement_location(particle, group, schema_set),
726        ));
727    }
728
729    if let Some(ref_name) = group.ref_name.as_ref() {
730        if let Some(group_key) =
731            schema_set.lookup_model_group(ref_name.namespace, ref_name.local_name)
732        {
733            if let Some(group_data) = schema_set.arenas.get_model_group(group_key) {
734                if group_data.compositor == Some(Compositor::All) {
735                    return Err(crate::error::SchemaError::structural(
736                        "cos-all-limited",
737                        "In XSD 1.0, a reference to a named xs:all group may only \
738                         appear as the top-level particle of a complex type definition"
739                            .to_string(),
740                        placement_location(particle, group, schema_set),
741                    ));
742                }
743                if visited.insert(group_key) {
744                    for child in &group_data.particles {
745                        check_no_nested_all_group(child, schema_set, visited)?;
746                    }
747                    visited.remove(&group_key);
748                }
749            }
750        }
751    } else {
752        for child in &group.particles {
753            check_no_nested_all_group(child, schema_set, visited)?;
754        }
755    }
756    Ok(())
757}
758
759/// Validate occurrence constraints (p-props-correct clause 2.1):
760/// minOccurs must not exceed maxOccurs for all particles.
761fn validate_all_particle_occurs(schema_set: &SchemaSet) -> SchemaResult<()> {
762    for (_, type_def) in schema_set.arenas.complex_types.iter() {
763        if let crate::parser::frames::ComplexContentResult::Complex(content) = &type_def.content {
764            if let Some(particle) = content.particle.as_ref() {
765                validate_particle_occurs_recursive(particle, schema_set)?;
766            }
767        }
768    }
769    for (_, mg) in schema_set.arenas.model_groups.iter() {
770        for particle in &mg.particles {
771            validate_particle_occurs_recursive(particle, schema_set)?;
772        }
773    }
774    Ok(())
775}
776
777fn validate_particle_occurs_recursive(
778    particle: &crate::parser::frames::ParticleResult,
779    schema_set: &SchemaSet,
780) -> SchemaResult<()> {
781    if let Some(max) = particle.max_occurs {
782        if particle.min_occurs > max {
783            let location = particle
784                .source
785                .as_ref()
786                .and_then(|s| schema_set.source_maps.locate(s));
787            return Err(crate::error::SchemaError::structural(
788                "p-props-correct",
789                format!(
790                    "minOccurs ({}) exceeds maxOccurs ({})",
791                    particle.min_occurs, max
792                ),
793                location,
794            ));
795        }
796    }
797    if let crate::parser::frames::ParticleTerm::Group(ref mg) = particle.term {
798        for child in &mg.particles {
799            validate_particle_occurs_recursive(child, schema_set)?;
800        }
801    }
802    Ok(())
803}
804
805fn validate_all_upa_constraints(schema_set: &SchemaSet) -> SchemaResult<()> {
806    for (_, type_def) in schema_set.arenas.complex_types.iter() {
807        // Skip built-in types (xs:anyType etc.) — they have no source location
808        // and are valid by construction.
809        if type_def.source.is_none() {
810            continue;
811        }
812
813        let Some(_particle) = (match &type_def.content {
814            crate::parser::frames::ComplexContentResult::Complex(content) => {
815                content.particle.as_ref()
816            }
817            crate::parser::frames::ComplexContentResult::Empty
818            | crate::parser::frames::ComplexContentResult::Simple(_) => None,
819        }) else {
820            continue;
821        };
822
823        // Compile with capped occurrence bounds for UPA checking.
824        // All maxOccurs values are reduced to <=2, producing a counter-free NFA.
825        let matcher = crate::compiler::compile_content_model_for_upa(schema_set, type_def)
826            .map_err(|error| {
827                let location = error
828                    .location()
829                    .and_then(|source| schema_set.source_maps.locate(source));
830                crate::error::SchemaError::structural(
831                    "cos-nonambig",
832                    format!(
833                        "Failed to compile content model for UPA checking: {}",
834                        error
835                    ),
836                    location,
837                )
838            })?;
839
840        match matcher {
841            crate::compiler::ContentModelMatcher::Nfa(nfa)
842            | crate::compiler::ContentModelMatcher::WithOpenContent { nfa, .. } => {
843                crate::compiler::check_upa(&nfa, schema_set, type_def.target_namespace)?;
844            }
845            crate::compiler::ContentModelMatcher::AllGroup(model) => {
846                crate::compiler::check_all_group_upa(
847                    &model,
848                    schema_set,
849                    type_def.target_namespace,
850                )?;
851            }
852            #[cfg(feature = "xsd11")]
853            crate::compiler::ContentModelMatcher::AllGroupExtension {
854                base_model,
855                extension_nfa,
856            } => {
857                crate::compiler::check_all_group_upa(
858                    &base_model,
859                    schema_set,
860                    type_def.target_namespace,
861                )?;
862                crate::compiler::check_upa(&extension_nfa, schema_set, type_def.target_namespace)?;
863            }
864        }
865    }
866
867    Ok(())
868}
869
870// ============================================================================
871// Async Pipeline Functions (feature = "async")
872// ============================================================================
873
874/// Load and fully process an XSD schema document asynchronously.
875///
876/// Async variant of [`load_and_process_schema`]. Only the directive resolution
877/// phase (I/O) is async; all computation phases (parse, assembly, resolution)
878/// remain synchronous.
879///
880/// # Arguments
881///
882/// * `xml` - Raw XML bytes of the schema document
883/// * `base_uri` - Base URI for this document
884/// * `schema_set` - Schema set to add the parsed document to
885/// * `config` - Optional pipeline configuration (uses defaults if None)
886#[cfg(feature = "async")]
887pub async fn load_and_process_schema_async(
888    xml: &[u8],
889    base_uri: &str,
890    schema_set: &mut SchemaSet,
891    config: Option<PipelineConfig>,
892) -> SchemaResult<PipelineStats> {
893    let config = config.unwrap_or_default();
894    let mut stats = PipelineStats::default();
895
896    // Phase 1: Parse the primary schema document (sync — CPU-bound)
897    let doc_id = parse_schema_with_config(xml, base_uri, schema_set, &config.parser)?;
898    stats.doc_id = doc_id;
899
900    // Phase 2: Resolve directives asynchronously
901    if config.resolve_directives {
902        let mut resolver = SchemaResolver::with_config(config.resolver.clone());
903
904        let dir_result = resolve_all_directives_async(doc_id, &mut resolver, schema_set).await;
905
906        stats.loaded_docs.extend(dir_result.loaded.iter().copied());
907        stats.directive_result = Some(DirectiveStats::from(&dir_result));
908        let mut directive_errors: Vec<crate::error::SchemaError> = dir_result.errors;
909        let mut import_errors: Vec<crate::error::SchemaError> = dir_result.import_errors;
910
911        // Recursively process directives in loaded documents
912        let mut pending_docs = dir_result.loaded.clone();
913        while !pending_docs.is_empty() {
914            let current_batch: Vec<_> = std::mem::take(&mut pending_docs);
915            for loaded_doc_id in current_batch {
916                let nested_result =
917                    resolve_all_directives_async(loaded_doc_id, &mut resolver, schema_set).await;
918                stats
919                    .loaded_docs
920                    .extend(nested_result.loaded.iter().copied());
921                pending_docs.extend(nested_result.loaded.iter().copied());
922
923                if let Some(ref mut dir_stats) = stats.directive_result {
924                    dir_stats.loaded_count += nested_result.loaded.len();
925                    dir_stats.skipped_count += nested_result.skipped.len();
926                    dir_stats.error_count +=
927                        nested_result.errors.len() + nested_result.import_errors.len();
928                }
929                directive_errors.extend(nested_result.errors);
930                import_errors.extend(nested_result.import_errors);
931            }
932        }
933
934        // Fixup cycle edges now that all documents have been loaded
935        fixup_composition_edges(schema_set);
936
937        // Propagate schema-content errors from directive resolution
938        if let Some(err) = directive_errors
939            .into_iter()
940            .chain(import_errors)
941            .find(|e| e.is_schema_content_error())
942        {
943            return Err(err);
944        }
945    }
946
947    // Phase 2.5: Apply redefine/override directives (sync)
948    if config.assemble_inline_types || config.resolve_references {
949        crate::schema::apply_redefine_override(schema_set)?;
950    }
951
952    // Phase 3: Assemble inline types (sync)
953    if config.assemble_inline_types {
954        let inline_stats = assemble_inline_types(schema_set)?;
955        stats.inline_stats = Some(inline_stats);
956    }
957
958    // Phase 4: Resolve all QName references (sync)
959    if config.resolve_references {
960        let resolution_stats = resolve_all_references(schema_set)?;
961        stats.resolution_stats = Some(resolution_stats);
962    }
963
964    // Phase 4.5: Compile all deferred pattern facets (sync)
965    if config.resolve_references {
966        compile_all_patterns(schema_set)?;
967    }
968
969    // Phase 4.6 (XSD 1.1): Validate default open content declarations
970    #[cfg(feature = "xsd11")]
971    if config.resolve_references {
972        crate::compiler::validate_all_default_open_content(schema_set)?;
973    }
974
975    // Phase 4.7: Validate type derivation constraints
976    if config.resolve_references {
977        let (dep_graph, _dep_stats) = build_dependency_graph(schema_set)?;
978        validate_all_derivations(schema_set, &dep_graph)?;
979    }
980
981    // Phase 4.75: Validate cos-attribute-decl (XSD 1.0: ID attrs must not have default/fixed)
982    // and e-props-correct.2 / .4 (element default/fixed values).
983    if config.resolve_references {
984        validate_attribute_id_constraints(schema_set)?;
985        validate_attribute_value_constraints(schema_set)?;
986        validate_element_value_constraints(schema_set)?;
987        #[cfg(feature = "xsd11")]
988        xsd11_pre_resolution_validations(schema_set)?;
989    }
990
991    // Phase 4.76 (XSD 1.0): strict xs:anyURI lexical check on annotation
992    // source attributes. XSD 1.1 explicitly relaxed the rule, so this is
993    // a no-op there.
994    if config.resolve_references {
995        crate::schema::validate_xsd10_annotation_source_anyuri(schema_set)?;
996    }
997
998    // Phase 4.77: ct-props-correct.4 / ag-props-correct.2 — every complex
999    // type's effective attribute uses must be unique by (namespace, name).
1000    // Applies to BOTH XSD 1.0 and 1.1.
1001    if config.resolve_references {
1002        crate::schema::validate_complex_type_attribute_uniqueness(schema_set)?;
1003    }
1004
1005    // Phase 5: Allocate arena element declarations (sync)
1006    if config.assemble_inline_types && config.resolve_references {
1007        allocate_content_particle_elements(schema_set)?;
1008        allocate_model_group_particle_elements(schema_set)?;
1009        // XSD 1.1: assemble inline alternative types attached to local
1010        // elements (which only acquired their ElementKey above).
1011        #[cfg(feature = "xsd11")]
1012        {
1013            let new_alt_types =
1014                crate::schema::inline::resolve_local_element_alternatives(schema_set)?;
1015            if !new_alt_types.is_empty() {
1016                resolve_all_references(schema_set)?;
1017                allocate_content_particle_elements(schema_set)?;
1018            }
1019        }
1020        #[cfg(feature = "xsd11")]
1021        xsd11_element_consistency_checks(schema_set)?;
1022    }
1023
1024    Ok(stats)
1025}
1026
1027/// Load and process a schema asynchronously with full processing (convenience function).
1028///
1029/// Async variant of [`load_schema`].
1030#[cfg(feature = "async")]
1031pub async fn load_schema_async(
1032    xml: &[u8],
1033    base_uri: &str,
1034    schema_set: &mut SchemaSet,
1035) -> SchemaResult<PipelineStats> {
1036    load_and_process_schema_async(xml, base_uri, schema_set, Some(PipelineConfig::full())).await
1037}
1038
1039#[cfg(test)]
1040#[path = "pipeline_tests.rs"]
1041mod tests;