Skip to main content

xsd_schema/schema/
model.rs

1//! Schema model - SchemaSet, SchemaDocument, NamespaceTable
2//!
3//! This module contains the core schema organization structures:
4//! - `SchemaSet` - Complete schema collection with all documents and components
5//! - `SchemaDocument` - Individual schema document (root or included/imported)
6//! - `NamespaceTable` - Per-namespace component lookup
7
8use bitflags::bitflags;
9use std::collections::HashMap;
10
11use crate::arenas::SchemaArenas;
12use crate::ids::*;
13use crate::namespace::table::well_known;
14use crate::namespace::NameTable;
15use crate::namespace::QualifiedName;
16use crate::parser::location::{SourceLocation, SourceMapStorage, SourceRef};
17use crate::schema::annotation::Annotation;
18use crate::schema::composition::{
19    ComponentIdentity, ComponentKind, CompositionEdge, DocumentComponentIndex, EffectiveComponent,
20};
21use crate::schema::wildcard::ElementWildcard;
22use crate::types::{BuiltinTypes, XmlTypeCode};
23
24/// XSD version mode
25#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
26pub enum XsdVersion {
27    #[default]
28    V1_0,
29    V1_1,
30}
31
32/// Regex compatibility mode.
33///
34/// Controls how strictly the pattern facet grammar is enforced. The default
35/// `Strict` rejects any construct outside XSD Part 2 §F (1.0) / §G (1.1)
36/// regex grammar. `LenientMs` enables a closed list of safely-stripable MS
37/// dialect leniencies for schemas authored against .NET's regex engine —
38/// see `doc/INTRODUCTION.md` for the exact construct list.
39///
40/// This is an enum (not a bool) so future modes can be added without
41/// breaking the API.
42#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
43pub enum RegexCompat {
44    /// Strict XSD Part 2 regex grammar. Default.
45    #[default]
46    Strict,
47    /// Tolerate a closed list of MS dialect leniencies (anchors at
48    /// pattern start/end, `(?#...)` comments). See `doc/INTRODUCTION.md`.
49    LenientMs,
50}
51
52bitflags! {
53    /// Derivation control flags (for final, block attributes)
54    #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
55    pub struct DerivationSet: u8 {
56        const EXTENSION = 0x01;
57        const RESTRICTION = 0x02;
58        const LIST = 0x04;
59        const UNION = 0x08;
60        const SUBSTITUTION = 0x10;
61
62        /// All derivation methods blocked
63        const ALL = Self::EXTENSION.bits() | Self::RESTRICTION.bits() |
64                   Self::LIST.bits() | Self::UNION.bits() | Self::SUBSTITUTION.bits();
65
66        /// Element-relevant block bits (LIST and UNION are not meaningful for elements)
67        const ELEMENT_BLOCK = Self::EXTENSION.bits() | Self::RESTRICTION.bits() | Self::SUBSTITUTION.bits();
68    }
69}
70
71impl DerivationSet {
72    /// Create a DerivationSet with only EXTENSION
73    pub fn extension() -> Self {
74        Self::EXTENSION
75    }
76
77    /// Create a DerivationSet with only RESTRICTION
78    pub fn restriction() -> Self {
79        Self::RESTRICTION
80    }
81
82    /// Check if extension is blocked/final
83    pub fn contains_extension(&self) -> bool {
84        self.contains(Self::EXTENSION)
85    }
86
87    /// Check if restriction is blocked/final
88    pub fn contains_restriction(&self) -> bool {
89        self.contains(Self::RESTRICTION)
90    }
91
92    /// Check if list derivation is blocked/final
93    pub fn contains_list(&self) -> bool {
94        self.contains(Self::LIST)
95    }
96
97    /// Check if union derivation is blocked/final
98    pub fn contains_union(&self) -> bool {
99        self.contains(Self::UNION)
100    }
101
102    /// Check if substitution is blocked
103    pub fn contains_substitution(&self) -> bool {
104        self.contains(Self::SUBSTITUTION)
105    }
106
107    /// Mask to only element-relevant block bits (extension, restriction, substitution).
108    /// LIST and UNION are simple-type derivation methods and have no meaning in element
109    /// block attributes. Per the spec, element `block="#all"` means `{extension, restriction,
110    /// substitution}` only.
111    pub fn element_block_mask(self) -> Self {
112        self & Self::ELEMENT_BLOCK
113    }
114}
115
116/// Form choice for element/attribute form defaults
117#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
118pub enum FormChoice {
119    #[default]
120    Unqualified,
121    Qualified,
122}
123
124/// Complete schema set (possibly from multiple documents)
125///
126/// This is the main entry point for working with XSD schemas.
127/// It owns all schema components and provides namespace-based lookup.
128#[derive(Debug)]
129pub struct SchemaSet {
130    /// String interning table for names and namespace URIs
131    pub name_table: NameTable,
132
133    /// Centralized source map storage for all documents
134    pub source_maps: SourceMapStorage,
135
136    /// All parsed schema documents
137    pub documents: Vec<SchemaDocument>,
138
139    /// Per-namespace component tables (keyed by NameId; None = no namespace)
140    pub namespaces: HashMap<Option<NameId>, NamespaceTable>,
141
142    /// XSD version mode (1.0 or 1.1)
143    pub xsd_version: XsdVersion,
144
145    /// Regex compatibility mode for pattern facets. Default `Strict`.
146    pub regex_compatibility: RegexCompat,
147
148    /// Arena storage for all components
149    pub arenas: SchemaArenas,
150
151    /// Loaded schema locations (for cycle detection)
152    pub loaded_locations: HashMap<String, DocumentId>,
153
154    /// Secondary cache for chameleon schema variants loaded under different
155    /// target namespaces. Keyed by `(resolved_uri, adopted_namespace)`.
156    /// Allows the same no-namespace schema to be loaded separately for
157    /// each including namespace per §4.2.3.
158    pub chameleon_cache: HashMap<(String, NameId), DocumentId>,
159
160    /// Composition graph edges recorded during directive resolution
161    pub composition_edges: Vec<CompositionEdge>,
162
163    /// Effective component map with provenance (populated by composition phase).
164    /// Keyed by `ComponentIdentity` so redefine/override *replaces* the entry
165    /// instead of appending, producing the final visible component set.
166    pub effective_components: HashMap<ComponentIdentity, EffectiveComponent>,
167
168    /// Built-in type registry with well-known type IDs
169    builtin_types: Option<BuiltinTypes>,
170
171    /// Parsing errors collected during error-recovery mode.
172    /// These are structural errors that make the schema invalid but
173    /// were deferred so parsing could continue for better diagnostics.
174    pub parsing_errors: Vec<crate::error::SchemaError>,
175}
176
177impl SchemaSet {
178    /// Create a new empty schema set
179    pub fn new() -> Self {
180        Self::with_version(XsdVersion::V1_0)
181    }
182
183    /// Create a new schema set configured for XSD 1.0.
184    pub fn xsd10() -> Self {
185        Self::with_version(XsdVersion::V1_0)
186    }
187
188    /// Create a new schema set configured for XSD 1.1.
189    pub fn xsd11() -> Self {
190        Self::with_version(XsdVersion::V1_1)
191    }
192
193    /// Create a new schema set with specified version
194    pub fn with_version(version: XsdVersion) -> Self {
195        let mut set = Self {
196            name_table: NameTable::new(),
197            source_maps: SourceMapStorage::new(),
198            documents: Vec::new(),
199            namespaces: HashMap::new(),
200            xsd_version: version,
201            regex_compatibility: RegexCompat::Strict,
202            arenas: SchemaArenas::new(),
203            loaded_locations: HashMap::new(),
204            chameleon_cache: HashMap::new(),
205            composition_edges: Vec::new(),
206            effective_components: HashMap::new(),
207            builtin_types: None,
208            parsing_errors: Vec::new(),
209        };
210
211        // Initialize built-in types
212        let builtin_types = BuiltinTypes::new(&mut set);
213        set.builtin_types = Some(builtin_types);
214
215        set
216    }
217
218    /// Returns `true` if this schema set is configured for XSD 1.0.
219    pub fn is_xsd10(&self) -> bool {
220        self.xsd_version == XsdVersion::V1_0
221    }
222
223    /// Returns `true` if this schema set is configured for XSD 1.1.
224    pub fn is_xsd11(&self) -> bool {
225        self.xsd_version == XsdVersion::V1_1
226    }
227
228    /// Set the regex compatibility mode for this schema set.
229    ///
230    /// Affects how pattern facets in subsequently compiled schemas are
231    /// validated. Has no effect on already-compiled patterns. Default is
232    /// `RegexCompat::Strict`.
233    pub fn set_regex_compatibility(&mut self, compat: RegexCompat) {
234        self.regex_compatibility = compat;
235    }
236
237    /// Get the regex compatibility mode for this schema set.
238    pub fn regex_compatibility(&self) -> RegexCompat {
239        self.regex_compatibility
240    }
241
242    /// Resolve an optional `SourceRef` to its line/column location.
243    /// Returns `None` if the source is absent or cannot be located.
244    pub fn locate(&self, source: Option<&SourceRef>) -> Option<SourceLocation> {
245        source.and_then(|s| self.source_maps.locate(s))
246    }
247
248    /// Returns `true` if any parsing errors were collected during error-recovery parsing.
249    pub fn has_parsing_errors(&self) -> bool {
250        !self.parsing_errors.is_empty()
251    }
252
253    /// Iterate the normalized locations of all loaded schema documents.
254    ///
255    /// Useful for seeding a new [`SchemaSetBuilder`] with the same schemas
256    /// when enriching with `xsi:schemaLocation` hints.
257    pub fn loaded_schema_locations(&self) -> impl Iterator<Item = &str> {
258        self.loaded_locations.keys().map(|s| s.as_str())
259    }
260
261    /// Check if a schema location has already been loaded
262    pub fn is_loaded(&self, location: &str) -> bool {
263        self.loaded_locations.contains_key(location)
264    }
265
266    /// Mark a schema location as loaded
267    pub fn mark_loaded(&mut self, location: String, doc_id: DocumentId) {
268        self.loaded_locations.insert(location, doc_id);
269    }
270
271    /// Get or create namespace table for a namespace
272    pub fn get_or_create_namespace(&mut self, ns: Option<NameId>) -> &mut NamespaceTable {
273        self.namespaces.entry(ns).or_default()
274    }
275
276    /// Look up a type by namespace and name
277    pub fn lookup_type(&self, ns: Option<NameId>, name: NameId) -> Option<TypeKey> {
278        self.namespaces.get(&ns)?.types.get(&name).copied()
279    }
280
281    /// Look up an element by namespace and name
282    pub fn lookup_element(&self, ns: Option<NameId>, name: NameId) -> Option<ElementKey> {
283        self.namespaces.get(&ns)?.elements.get(&name).copied()
284    }
285
286    /// Look up an attribute by namespace and name
287    pub fn lookup_attribute(&self, ns: Option<NameId>, name: NameId) -> Option<AttributeKey> {
288        self.namespaces.get(&ns)?.attributes.get(&name).copied()
289    }
290
291    /// Look up a model group by namespace and name
292    pub fn lookup_model_group(&self, ns: Option<NameId>, name: NameId) -> Option<ModelGroupKey> {
293        self.namespaces.get(&ns)?.model_groups.get(&name).copied()
294    }
295
296    /// Look up an attribute group by namespace and name
297    pub fn lookup_attribute_group(
298        &self,
299        ns: Option<NameId>,
300        name: NameId,
301    ) -> Option<AttributeGroupKey> {
302        self.namespaces
303            .get(&ns)?
304            .attribute_groups
305            .get(&name)
306            .copied()
307    }
308
309    /// Look up a notation by namespace and name
310    pub fn lookup_notation(&self, ns: Option<NameId>, name: NameId) -> Option<NotationKey> {
311        self.namespaces.get(&ns)?.notations.get(&name).copied()
312    }
313
314    // ========================================================================
315    // Built-in type access
316    // ========================================================================
317
318    /// Get the built-in types registry.
319    ///
320    /// This provides access to well-known type IDs for all 47+ built-in XSD types.
321    pub fn builtin_types(&self) -> &BuiltinTypes {
322        self.builtin_types
323            .as_ref()
324            .expect("BuiltinTypes should always be initialized")
325    }
326
327    /// Get a built-in simple type by QName (namespace + local name).
328    ///
329    /// This only looks up built-in types in the XS namespace.
330    /// For user-defined types, use `lookup_type` instead.
331    ///
332    /// # Arguments
333    /// * `namespace` - The namespace URI (should be XS namespace for built-in types)
334    /// * `local_name` - The local name of the type
335    ///
336    /// # Returns
337    /// The `SimpleTypeKey` for the built-in type, or `None` if not found.
338    pub fn get_built_in_simple_type_by_qname(
339        &self,
340        namespace: Option<NameId>,
341        local_name: NameId,
342    ) -> Option<SimpleTypeKey> {
343        // Built-in types are only in the XS namespace
344        if namespace != Some(well_known::XS_NAMESPACE) {
345            return None;
346        }
347        self.builtin_types().get_by_local_name(local_name)
348    }
349
350    /// Get a built-in type by QName (namespace + local name).
351    ///
352    /// This includes the built-in complex type `xs:anyType` and all built-in simple types.
353    pub fn get_built_in_type_by_qname(
354        &self,
355        namespace: Option<NameId>,
356        local_name: NameId,
357    ) -> Option<TypeKey> {
358        if namespace != Some(well_known::XS_NAMESPACE) {
359            return None;
360        }
361
362        if let Some(any_type_name) = self.name_table.get("anyType") {
363            if local_name == any_type_name {
364                return Some(TypeKey::Complex(self.builtin_types().any_type));
365            }
366        }
367
368        self.get_built_in_simple_type_by_qname(namespace, local_name)
369            .map(TypeKey::Simple)
370    }
371
372    /// Get the built-in `xs:anyType` key.
373    pub fn any_type_key(&self) -> ComplexTypeKey {
374        self.builtin_types().any_type
375    }
376
377    /// Check if the given type key refers to `xs:anyType`.
378    pub fn is_any_type(&self, type_key: TypeKey) -> bool {
379        matches!(type_key, TypeKey::Complex(key) if key == self.builtin_types().any_type)
380    }
381
382    /// Get a built-in simple type by its XmlTypeCode.
383    ///
384    /// # Returns
385    /// The `SimpleTypeKey` for the built-in type, or `None` if not a simple type code.
386    pub fn get_built_in_simple_type_by_code(&self, code: XmlTypeCode) -> Option<SimpleTypeKey> {
387        self.builtin_types().get_by_type_code(code)
388    }
389
390    /// Get the XmlTypeCode for a simple type.
391    ///
392    /// Returns `None` if the type is not a built-in type.
393    pub fn get_type_code(&self, type_id: SimpleTypeKey) -> Option<XmlTypeCode> {
394        self.builtin_types().get_type_code(type_id)
395    }
396
397    /// Check if `derived` derives from `base` (transitively).
398    ///
399    /// For built-in types, this uses the standard XSD derivation hierarchy.
400    /// For user-defined types, this walks the base type chain using resolved references.
401    ///
402    /// # Returns
403    /// - `true` if `derived == base`
404    /// - `true` if `derived` has `base` somewhere in its derivation chain
405    /// - `false` otherwise
406    pub fn derives_from(&self, derived: SimpleTypeKey, base: SimpleTypeKey) -> bool {
407        // Same type derives from itself
408        if derived == base {
409            return true;
410        }
411
412        // First, check if both are built-in types and use the built-in derivation
413        let builtin = self.builtin_types();
414        if builtin.is_builtin(derived) && builtin.is_builtin(base) {
415            return builtin.derives_from(derived, base);
416        }
417
418        // For user-defined types (or mixed), walk the resolved base type chain
419        let mut current = derived;
420        let mut visited = std::collections::HashSet::new();
421
422        while visited.insert(current) {
423            // Get the simple type data
424            if let Some(type_def) = self.arenas.simple_types.get(current) {
425                // Check the resolved base type
426                if let Some(crate::ids::TypeKey::Simple(simple_base)) = type_def.resolved_base_type
427                {
428                    if simple_base == base {
429                        return true;
430                    }
431                    current = simple_base;
432                    continue;
433                }
434            }
435
436            // If no resolved base type, try built-in derivation as fallback
437            if builtin.is_builtin(current) {
438                if let Some(parent) = builtin.get_base_type(current) {
439                    if parent == base {
440                        return true;
441                    }
442                    current = parent;
443                    continue;
444                }
445            }
446
447            // No more base types to traverse
448            break;
449        }
450
451        false
452    }
453
454    // ========================================================================
455    // Type derivation checking (analog of C# XmlSchemaType.IsDerivedFrom)
456    // ========================================================================
457
458    /// Check if `derived` is derived from `base`, optionally filtering by derivation method.
459    ///
460    /// This mirrors C#'s `XmlSchemaType.IsDerivedFrom(derivedType, baseType, method)`.
461    ///
462    /// # Arguments
463    /// * `derived` - The potentially derived type
464    /// * `base` - The potential base type
465    /// * `exclude_methods` - Derivation methods to exclude from the check.
466    ///   Use `DerivationSet::empty()` to allow any method (like C#'s Empty).
467    ///
468    /// # Returns
469    /// - `true` if `derived == base`
470    /// - `true` if `derived` derives from `base` via a non-excluded derivation method
471    /// - `false` otherwise
472    pub fn is_type_derived_from(
473        &self,
474        derived: TypeKey,
475        base: TypeKey,
476        exclude_methods: DerivationSet,
477    ) -> bool {
478        // Same type derives from itself
479        if derived == base {
480            return true;
481        }
482
483        // Everything derives from anyType. With no method exclusions we can
484        // short-circuit, but with a non-empty exclusion mask we must walk the
485        // chain to verify no step uses a blocked method (§3.4.6.5 / §3.16.6).
486        // For Simple→AnyType the chain *always* terminates with the
487        // anySimpleType→anyType restriction step, so an exclusion containing
488        // RESTRICTION (or matching the simple's variety method) blocks it
489        // (cvc-elt.4.3 with `block="restriction"` / `block="#all"` and
490        // declared anyType: elemT026/27/28/29, elemT054/55/56/57).
491        if self.is_any_type(base) {
492            if exclude_methods.is_empty() {
493                return true;
494            }
495            if let TypeKey::Simple(d) = derived {
496                return self.is_simple_chain_to_any_type_ok(d, exclude_methods);
497            }
498            // Complex→AnyType falls through to is_complex_type_derived_from below.
499        }
500
501        match (derived, base) {
502            // Case 1: Both are simple types
503            (TypeKey::Simple(d), TypeKey::Simple(b)) => {
504                self.is_simple_type_derived_from(d, b, exclude_methods)
505            }
506
507            // Case 2: Both are complex types
508            (TypeKey::Complex(d), TypeKey::Complex(b)) => {
509                self.is_complex_type_derived_from(d, b, exclude_methods)
510            }
511
512            // Case 3: Simple derives from Complex
513            // All simple types derive from anyType (via anySimpleType).
514            (TypeKey::Simple(_), TypeKey::Complex(_)) => false,
515
516            // Case 4: Complex derives from Simple
517            // Complex types with simpleContent can derive from simple types
518            (TypeKey::Complex(d), TypeKey::Simple(b)) => {
519                self.is_complex_derived_from_simple(d, b, exclude_methods)
520            }
521        }
522    }
523
524    /// Whether the simple→anyType derivation chain rooted at `derived` avoids
525    /// every method in `exclude_methods`. Used by `is_type_derived_from` when
526    /// `base = anyType` and `exclude_methods` is non-empty (§3.16.6 + the
527    /// implicit anySimpleType→anyType restriction step).
528    fn is_simple_chain_to_any_type_ok(
529        &self,
530        derived: SimpleTypeKey,
531        exclude_methods: DerivationSet,
532    ) -> bool {
533        use crate::parser::frames::SimpleTypeVariety;
534
535        let mut current = derived;
536        let mut visited = std::collections::HashSet::new();
537
538        while visited.insert(current) {
539            if let Some(type_def) = self.arenas.simple_types.get(current) {
540                let method_flag = match type_def.variety {
541                    SimpleTypeVariety::Atomic => DerivationSet::RESTRICTION,
542                    SimpleTypeVariety::List => DerivationSet::LIST,
543                    SimpleTypeVariety::Union => DerivationSet::UNION,
544                };
545                if exclude_methods.contains(method_flag) {
546                    return false;
547                }
548                if let Some(TypeKey::Simple(simple_base)) = type_def.resolved_base_type {
549                    current = simple_base;
550                    continue;
551                }
552            }
553            // Fell off the user-defined simple chain — the next step is the
554            // anySimpleType→anyType restriction in the built-in hierarchy.
555            return !exclude_methods.contains(DerivationSet::RESTRICTION);
556        }
557        // Cycle detected (shouldn't happen for resolved schemas).
558        false
559    }
560
561    /// Check if simple type `derived` is derived from simple type `base` with method filtering.
562    ///
563    /// Implements XSD spec §3.16.6.3 "Type Derivation OK (Simple)":
564    /// - Clause 2.2.1/2.2.2: walks the `resolved_base_type` chain
565    /// - Clause 2.2.4: if `base` is a union, checks transitive member types
566    fn is_simple_type_derived_from(
567        &self,
568        derived: SimpleTypeKey,
569        base: SimpleTypeKey,
570        exclude_methods: DerivationSet,
571    ) -> bool {
572        use crate::parser::frames::SimpleTypeVariety;
573
574        // Clause 1: Same type
575        if derived == base {
576            return true;
577        }
578
579        // Clause 2.2.1/2.2.2: Walk the base type chain
580        let builtin = self.builtin_types();
581        let mut current = derived;
582        let mut visited = std::collections::HashSet::new();
583
584        while visited.insert(current) {
585            // Get type definition
586            if let Some(type_def) = self.arenas.simple_types.get(current) {
587                // Determine derivation method based on variety
588                let method_flag = match type_def.variety {
589                    SimpleTypeVariety::Atomic => DerivationSet::RESTRICTION,
590                    SimpleTypeVariety::List => DerivationSet::LIST,
591                    SimpleTypeVariety::Union => DerivationSet::UNION,
592                };
593
594                // If this derivation method is excluded, stop traversal
595                if exclude_methods.contains(method_flag) {
596                    break;
597                }
598
599                // Check resolved base type
600                if let Some(TypeKey::Simple(simple_base)) = type_def.resolved_base_type {
601                    if simple_base == base {
602                        return true;
603                    }
604                    current = simple_base;
605                    continue;
606                }
607            }
608
609            // Fallback to built-in derivation
610            if builtin.is_builtin(current) {
611                // For built-in types, derivation is always by restriction
612                if exclude_methods.contains(DerivationSet::RESTRICTION) {
613                    break;
614                }
615                if let Some(parent) = builtin.get_base_type(current) {
616                    if parent == base {
617                        return true;
618                    }
619                    current = parent;
620                    continue;
621                }
622            }
623
624            break;
625        }
626
627        // Clause 2.2.4: If base is a union with no facets, check whether
628        // derived is derived from a transitive member type.
629        if let Some(base_def) = self.arenas.simple_types.get(base) {
630            if base_def.variety == SimpleTypeVariety::Union && base_def.facets.is_empty() {
631                for &member_type_key in &base_def.resolved_member_types {
632                    if let TypeKey::Simple(member_key) = member_type_key {
633                        if self.is_simple_type_derived_from(derived, member_key, exclude_methods) {
634                            return true;
635                        }
636                    }
637                }
638            }
639        }
640
641        false
642    }
643
644    /// Check if complex type `derived` is derived from complex type `base` with method filtering.
645    fn is_complex_type_derived_from(
646        &self,
647        derived: ComplexTypeKey,
648        base: ComplexTypeKey,
649        exclude_methods: DerivationSet,
650    ) -> bool {
651        use crate::parser::frames::DerivationMethod;
652
653        if derived == base {
654            return true;
655        }
656
657        let mut current = derived;
658        let mut visited = std::collections::HashSet::new();
659
660        while visited.insert(current) {
661            if let Some(type_def) = self.arenas.complex_types.get(current) {
662                // Determine derivation method flag
663                let method_flag = match type_def.derivation_method {
664                    Some(DerivationMethod::Extension) => DerivationSet::EXTENSION,
665                    Some(DerivationMethod::Restriction) | None => DerivationSet::RESTRICTION,
666                };
667
668                // If this derivation method is excluded, stop traversal
669                if exclude_methods.contains(method_flag) {
670                    return false;
671                }
672
673                // Check resolved base type
674                if let Some(TypeKey::Complex(complex_base)) = type_def.resolved_base_type {
675                    if complex_base == base {
676                        return true;
677                    }
678                    current = complex_base;
679                    continue;
680                }
681
682                // resolved_base_type is None (shorthand complex type) or
683                // Some(TypeKey::Simple(_)) (simpleContent). Both ultimately
684                // derive from xs:anyType — check if that is the target.
685                return base == self.any_type_key();
686            }
687
688            break;
689        }
690
691        false
692    }
693
694    /// Check if complex type `derived` (with simpleContent) derives from simple type `base`.
695    fn is_complex_derived_from_simple(
696        &self,
697        derived: ComplexTypeKey,
698        base: SimpleTypeKey,
699        exclude_methods: DerivationSet,
700    ) -> bool {
701        use crate::parser::frames::DerivationMethod;
702
703        // Walk the complex type chain, stepping through each derivation level.
704        // A complex type with simpleContent can have a chain:
705        //   ct_n (restriction of ct_{n-1}) → … → ct_1 (extension of simple_type)
706        let mut current = derived;
707        let mut visited = std::collections::HashSet::new();
708
709        while visited.insert(current) {
710            let Some(type_def) = self.arenas.complex_types.get(current) else {
711                break;
712            };
713
714            let method_flag = match type_def.derivation_method {
715                Some(DerivationMethod::Extension) => DerivationSet::EXTENSION,
716                Some(DerivationMethod::Restriction) | None => DerivationSet::RESTRICTION,
717            };
718
719            if exclude_methods.contains(method_flag) {
720                return false;
721            }
722
723            match type_def.resolved_base_type {
724                Some(TypeKey::Simple(simple_base)) => {
725                    if simple_base == base {
726                        return true;
727                    }
728                    // Walk further up the simple type chain.
729                    return self.is_simple_type_derived_from(simple_base, base, exclude_methods);
730                }
731                Some(TypeKey::Complex(complex_base)) => {
732                    // Base is another complex type; keep walking.
733                    current = complex_base;
734                }
735                None => break,
736            }
737        }
738
739        false
740    }
741
742    /// Format a provenance note for a component (returns empty string if none/declared).
743    ///
744    /// Used to enrich error messages with information about where a component
745    /// originated (e.g., redefined from another schema document).
746    pub fn format_provenance_note(
747        &self,
748        kind: ComponentKind,
749        namespace: Option<NameId>,
750        name: NameId,
751    ) -> String {
752        use crate::schema::composition::CompositionAction;
753
754        let identity = ComponentIdentity {
755            kind,
756            name,
757            namespace,
758        };
759        match self.effective_components.get(&identity) {
760            Some(eff) => match &eff.action {
761                CompositionAction::Redefined { from_doc, replaced } => {
762                    let target_uri = replaced
763                        .owner_doc
764                        .and_then(|id| self.documents.get(id as usize))
765                        .map(|d| d.base_uri.as_str())
766                        .unwrap_or("unknown");
767                    let from_uri = from_doc
768                        .and_then(|id| self.documents.get(id as usize))
769                        .map(|d| d.base_uri.as_str())
770                        .unwrap_or("unknown");
771                    format!(" (originally in {}, redefined by {})", target_uri, from_uri)
772                }
773                #[cfg(feature = "xsd11")]
774                CompositionAction::Overridden { from_doc, replaced } => {
775                    let target_uri = replaced
776                        .owner_doc
777                        .and_then(|id| self.documents.get(id as usize))
778                        .map(|d| d.base_uri.as_str())
779                        .unwrap_or("unknown");
780                    let from_uri = from_doc
781                        .and_then(|id| self.documents.get(id as usize))
782                        .map(|d| d.base_uri.as_str())
783                        .unwrap_or("unknown");
784                    format!(
785                        " (originally in {}, overridden by {})",
786                        target_uri, from_uri
787                    )
788                }
789                CompositionAction::Included { from_doc } => {
790                    let uri = self
791                        .documents
792                        .get(*from_doc as usize)
793                        .map(|d| d.base_uri.as_str())
794                        .unwrap_or("unknown");
795                    format!(" (included by {})", uri)
796                }
797                CompositionAction::Declared => String::new(),
798            },
799            None => String::new(),
800        }
801    }
802
803    /// Compute the effective namespace for a local element declaration per XSD spec.
804    ///
805    /// Rules: explicit targetNamespace > form attribute > elementFormDefault > Unqualified.
806    /// Qualified → document target namespace; Unqualified → None.
807    pub fn effective_local_element_namespace(
808        &self,
809        elem_target_namespace: Option<NameId>,
810        elem_form: Option<&str>,
811        source: Option<&SourceRef>,
812        fallback_namespace: Option<NameId>,
813    ) -> Option<NameId> {
814        self.effective_local_namespace(
815            elem_target_namespace,
816            elem_form,
817            source,
818            fallback_namespace,
819            |d| d.element_form_default,
820        )
821    }
822
823    /// Compute the effective namespace for a local attribute declaration per XSD spec.
824    ///
825    /// Rules: explicit targetNamespace > form attribute > attributeFormDefault > Unqualified.
826    /// Qualified → document target namespace; Unqualified → None.
827    pub fn effective_local_attribute_namespace(
828        &self,
829        attr_target_namespace: Option<NameId>,
830        attr_form: Option<&str>,
831        source: Option<&SourceRef>,
832        fallback_namespace: Option<NameId>,
833    ) -> Option<NameId> {
834        self.effective_local_namespace(
835            attr_target_namespace,
836            attr_form,
837            source,
838            fallback_namespace,
839            |d| d.attribute_form_default,
840        )
841    }
842
843    fn effective_local_namespace(
844        &self,
845        explicit_target_namespace: Option<NameId>,
846        form: Option<&str>,
847        source: Option<&SourceRef>,
848        fallback_namespace: Option<NameId>,
849        form_default: impl Fn(&SchemaDocument) -> FormChoice,
850    ) -> Option<NameId> {
851        if explicit_target_namespace.is_some() {
852            return explicit_target_namespace;
853        }
854        // Use defaults_doc() so override children read the overridden
855        // document's form defaults per §4.2.5 / F.2 semantics.
856        let doc = source.and_then(|s| self.documents.get(s.defaults_doc() as usize));
857        let default_form = doc.map(&form_default).unwrap_or(FormChoice::Unqualified);
858        let target_namespace = doc
859            .map(|d| d.target_namespace)
860            .unwrap_or(fallback_namespace);
861        let resolved_form = match form {
862            Some("qualified") => FormChoice::Qualified,
863            Some("unqualified") => FormChoice::Unqualified,
864            _ => default_form,
865        };
866        match resolved_form {
867            FormChoice::Qualified => target_namespace,
868            FormChoice::Unqualified => None,
869        }
870    }
871}
872
873impl Default for SchemaSet {
874    fn default() -> Self {
875        Self::new()
876    }
877}
878
879/// A single schema document (root or included/imported)
880///
881/// Represents one XSD file with its components and directives.
882#[derive(Debug)]
883pub struct SchemaDocument {
884    /// Document ID for source map reference
885    pub id: DocumentId,
886
887    /// Base URI (location) of this document
888    pub base_uri: String,
889
890    /// The `targetNamespace` as declared in the `<xs:schema>` element.
891    /// `None` when the schema document omits `targetNamespace`.
892    /// Preserved even after chameleon adoption so the original fact
893    /// "this document had no declared namespace" is never lost.
894    pub declared_target_namespace: Option<NameId>,
895
896    /// Effective target namespace after chameleon pre-processing (§4.2.3).
897    /// Equals `declared_target_namespace` for non-chameleon documents;
898    /// set to the includer's namespace for chameleon-adopted documents.
899    pub target_namespace: Option<NameId>,
900
901    /// Schema-level attributes
902    pub version: Option<String>,
903    pub element_form_default: FormChoice,
904    pub attribute_form_default: FormChoice,
905    pub block_default: DerivationSet,
906    pub final_default: DerivationSet,
907    pub schema_id: Option<String>,
908    pub xml_lang: Option<String>,
909
910    /// XSD 1.1: Default attributes group reference
911    pub default_attributes: Option<QualifiedName>,
912
913    /// XSD 1.1: Default namespace for XPath
914    pub xpath_default_namespace: Option<NameId>,
915
916    /// Composition directives (in document order)
917    pub includes: Vec<IncludeDirective>,
918    pub imports: Vec<ImportDirective>,
919    pub redefines: Vec<RedefineDirective>,
920    pub overrides: Vec<OverrideDirective>, // XSD 1.1
921
922    /// XSD 1.1: Default open content
923    pub default_open_content: Option<DefaultOpenContent>,
924
925    /// Schema-level annotations
926    pub annotations: Vec<Annotation>,
927
928    /// Per-document index of top-level components declared in this document.
929    /// Populated during assembly; used for document-scoped lookup in
930    /// `apply_redefine()` and `apply_override()`.
931    pub component_index: DocumentComponentIndex,
932
933    /// Source reference for error reporting
934    pub source: Option<SourceRef>,
935}
936
937impl SchemaDocument {
938    /// Whether this document had no declared `targetNamespace` and adopted
939    /// one via chameleon include pre-processing (§4.2.3 clause 2.3).
940    pub fn is_chameleon(&self) -> bool {
941        self.declared_target_namespace.is_none() && self.target_namespace.is_some()
942    }
943
944    /// Per-document QName visibility per XSD §3.17.6.2 `src-resolve` clause 4.
945    ///
946    /// Returns `true` when a QName whose resolved namespace is `qname_ns`
947    /// may be referenced from this schema document. Resolution is strictly
948    /// per-document and lexical: imports are not transitive.
949    ///
950    /// Reads `declared_target_namespace` for clause 4.1.1 / 4.2.1 so chameleon
951    /// includes (no declared `targetNamespace`) take 4.1.1 for absent-NS QNames
952    /// instead of failing 4.2.1 against the includer's NS. For chameleon docs
953    /// whose QNames were rewritten by §4.2.3 adoption to the includer's NS,
954    /// also accept the post-adoption `target_namespace`.
955    pub fn can_see_namespace(&self, qname_ns: Option<NameId>, name_table: &NameTable) -> bool {
956        if qname_ns.is_none() {
957            // 4.1
958            return self.declared_target_namespace.is_none()
959                || self.imports.iter().any(|i| i.namespace.is_none());
960        }
961        // 4.2.1 (incl. chameleon-adopted target namespace)
962        if qname_ns == self.declared_target_namespace
963            || (self.is_chameleon() && qname_ns == self.target_namespace)
964        {
965            return true;
966        }
967        // 4.2.3 / 4.2.4
968        if qname_ns == Some(well_known::XS_NAMESPACE) || qname_ns == Some(well_known::XSI_NAMESPACE)
969        {
970            return true;
971        }
972        // 4.2.2. `import.namespace` is `Option<String>` (not pre-interned), but every
973        // *used* namespace is interned at parse time, so `get` (read-only) suffices —
974        // a string never interned cannot match the already-interned `qname_ns`.
975        self.imports.iter().any(|i| {
976            i.namespace
977                .as_deref()
978                .and_then(|s| name_table.get(s))
979                .map(|id| Some(id) == qname_ns)
980                .unwrap_or(false)
981        })
982    }
983
984    /// Create a new schema document
985    pub fn new(id: DocumentId, base_uri: String) -> Self {
986        Self {
987            id,
988            base_uri,
989            declared_target_namespace: None,
990            target_namespace: None,
991            version: None,
992            element_form_default: FormChoice::default(),
993            attribute_form_default: FormChoice::default(),
994            block_default: DerivationSet::empty(),
995            final_default: DerivationSet::empty(),
996            schema_id: None,
997            xml_lang: None,
998            default_attributes: None,
999            xpath_default_namespace: None,
1000            includes: Vec::new(),
1001            imports: Vec::new(),
1002            redefines: Vec::new(),
1003            overrides: Vec::new(),
1004            default_open_content: None,
1005            annotations: Vec::new(),
1006            component_index: DocumentComponentIndex::new(),
1007            source: None,
1008        }
1009    }
1010}
1011
1012/// Per-namespace component lookup tables
1013///
1014/// Each namespace has its own table mapping local names to component keys.
1015/// Uses NameId as keys for fast equality checks.
1016#[derive(Debug, Default)]
1017pub struct NamespaceTable {
1018    /// Type definitions (simple and complex)
1019    pub types: HashMap<NameId, TypeKey>,
1020    /// Element declarations
1021    pub elements: HashMap<NameId, ElementKey>,
1022    /// Attribute declarations
1023    pub attributes: HashMap<NameId, AttributeKey>,
1024    /// Attribute groups
1025    pub attribute_groups: HashMap<NameId, AttributeGroupKey>,
1026    /// Named model groups
1027    pub model_groups: HashMap<NameId, ModelGroupKey>,
1028    /// Notations
1029    pub notations: HashMap<NameId, NotationKey>,
1030    /// Identity constraints (global, for XSD 1.1 refs)
1031    pub identity_constraints: HashMap<NameId, IdentityConstraintKey>,
1032}
1033
1034impl NamespaceTable {
1035    pub fn new() -> Self {
1036        Self::default()
1037    }
1038
1039    /// Register a type in this namespace
1040    pub fn register_type(&mut self, name: NameId, key: TypeKey) -> Option<TypeKey> {
1041        self.types.insert(name, key)
1042    }
1043
1044    /// Register an element in this namespace
1045    pub fn register_element(&mut self, name: NameId, key: ElementKey) -> Option<ElementKey> {
1046        self.elements.insert(name, key)
1047    }
1048
1049    /// Register an attribute in this namespace
1050    pub fn register_attribute(&mut self, name: NameId, key: AttributeKey) -> Option<AttributeKey> {
1051        self.attributes.insert(name, key)
1052    }
1053
1054    /// Register a model group in this namespace
1055    pub fn register_model_group(
1056        &mut self,
1057        name: NameId,
1058        key: ModelGroupKey,
1059    ) -> Option<ModelGroupKey> {
1060        self.model_groups.insert(name, key)
1061    }
1062
1063    /// Register an attribute group in this namespace
1064    pub fn register_attribute_group(
1065        &mut self,
1066        name: NameId,
1067        key: AttributeGroupKey,
1068    ) -> Option<AttributeGroupKey> {
1069        self.attribute_groups.insert(name, key)
1070    }
1071
1072    /// Register a notation in this namespace
1073    pub fn register_notation(&mut self, name: NameId, key: NotationKey) -> Option<NotationKey> {
1074        self.notations.insert(name, key)
1075    }
1076}
1077
1078// Schema composition directives
1079
1080/// xs:include directive
1081#[derive(Debug, Clone)]
1082pub struct IncludeDirective {
1083    pub source: Option<SourceRef>,
1084    pub schema_location: String,
1085    pub resolved_doc_id: Option<DocumentId>,
1086}
1087
1088/// xs:import directive
1089#[derive(Debug, Clone)]
1090pub struct ImportDirective {
1091    pub source: Option<SourceRef>,
1092    pub namespace: Option<String>,
1093    pub schema_location: Option<String>,
1094    pub resolved_doc_id: Option<DocumentId>,
1095}
1096
1097/// xs:redefine directive (deprecated in XSD 1.1)
1098#[derive(Debug, Clone)]
1099pub struct RedefineDirective {
1100    pub source: Option<SourceRef>,
1101    pub schema_location: String,
1102    pub resolved_doc_id: Option<DocumentId>,
1103    pub simple_types: Vec<SimpleTypeKey>,
1104    pub complex_types: Vec<ComplexTypeKey>,
1105    pub groups: Vec<ModelGroupKey>,
1106    pub attribute_groups: Vec<AttributeGroupKey>,
1107}
1108
1109/// xs:override directive (XSD 1.1)
1110#[derive(Debug, Clone)]
1111pub struct OverrideDirective {
1112    pub source: Option<SourceRef>,
1113    pub schema_location: String,
1114    pub resolved_doc_id: Option<DocumentId>,
1115    pub components: Vec<OverrideComponent>,
1116}
1117
1118/// Component that can appear in xs:override
1119#[derive(Debug, Clone)]
1120pub enum OverrideComponent {
1121    SimpleType(SimpleTypeKey),
1122    ComplexType(ComplexTypeKey),
1123    Group(ModelGroupKey),
1124    AttributeGroup(AttributeGroupKey),
1125    Element(ElementKey),
1126    Attribute(AttributeKey),
1127    Notation(NotationKey),
1128}
1129
1130/// Default open content at schema level (XSD 1.1)
1131#[derive(Debug, Clone)]
1132pub struct DefaultOpenContent {
1133    pub source: Option<SourceRef>,
1134    pub applies_to_empty: bool,
1135    pub mode: OpenContentMode,
1136    pub wildcard: Option<ElementWildcard>,
1137}
1138
1139/// Open content mode (XSD 1.1)
1140#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
1141pub enum OpenContentMode {
1142    None,
1143    #[default]
1144    Interleave,
1145    Suffix,
1146}
1147
1148impl From<crate::parser::frames::OpenContentMode> for OpenContentMode {
1149    fn from(m: crate::parser::frames::OpenContentMode) -> Self {
1150        use crate::parser::frames::OpenContentMode as Src;
1151        match m {
1152            Src::None => Self::None,
1153            Src::Interleave => Self::Interleave,
1154            Src::Suffix => Self::Suffix,
1155        }
1156    }
1157}
1158
1159#[cfg(test)]
1160mod tests {
1161    use super::*;
1162    use crate::arenas::ComplexTypeDefData;
1163    use crate::parser::frames::ComplexContentResult;
1164
1165    #[test]
1166    fn test_schema_set_creation() {
1167        let set = SchemaSet::new();
1168        assert_eq!(set.xsd_version, XsdVersion::V1_0);
1169        assert!(set.documents.is_empty());
1170        // XSI namespace is pre-populated with built-in attribute declarations
1171        assert!(set
1172            .namespaces
1173            .contains_key(&Some(crate::namespace::table::well_known::XSI_NAMESPACE)));
1174    }
1175
1176    #[test]
1177    fn test_schema_set_with_version() {
1178        let set = SchemaSet::with_version(XsdVersion::V1_1);
1179        assert_eq!(set.xsd_version, XsdVersion::V1_1);
1180    }
1181
1182    #[test]
1183    fn test_schema_set_xsd10() {
1184        let set = SchemaSet::xsd10();
1185        assert_eq!(set.xsd_version, XsdVersion::V1_0);
1186    }
1187
1188    #[test]
1189    fn test_schema_set_xsd11() {
1190        let set = SchemaSet::xsd11();
1191        assert_eq!(set.xsd_version, XsdVersion::V1_1);
1192    }
1193
1194    #[test]
1195    fn test_namespace_table_registration() {
1196        use slotmap::SlotMap;
1197        let mut table = NamespaceTable::new();
1198
1199        // Create a dummy key for testing
1200        let mut dummy_map: SlotMap<SimpleTypeKey, ()> = SlotMap::with_key();
1201        let key1 = dummy_map.insert(());
1202        let key2 = dummy_map.insert(());
1203
1204        // Register a type
1205        let old = table.register_type(NameId(1), TypeKey::Simple(key1));
1206        assert!(old.is_none());
1207
1208        // Re-registering returns old value
1209        let old = table.register_type(NameId(1), TypeKey::Simple(key2));
1210        assert!(old.is_some());
1211    }
1212
1213    #[test]
1214    fn test_schema_set_load_tracking() {
1215        let mut set = SchemaSet::new();
1216
1217        assert!(!set.is_loaded("test.xsd"));
1218        set.mark_loaded("test.xsd".to_string(), 0);
1219        assert!(set.is_loaded("test.xsd"));
1220    }
1221
1222    #[test]
1223    fn test_derivation_set_flags() {
1224        let mut flags = DerivationSet::empty();
1225        assert!(flags.is_empty());
1226
1227        flags |= DerivationSet::EXTENSION;
1228        assert!(flags.contains(DerivationSet::EXTENSION));
1229        assert!(!flags.contains(DerivationSet::RESTRICTION));
1230
1231        let all = DerivationSet::ALL;
1232        assert!(all.contains(DerivationSet::EXTENSION));
1233        assert!(all.contains(DerivationSet::RESTRICTION));
1234    }
1235
1236    #[test]
1237    fn test_form_choice_default() {
1238        assert_eq!(FormChoice::default(), FormChoice::Unqualified);
1239    }
1240
1241    // ========================================================================
1242    // Tests for is_type_derived_from (analog of C# XmlSchemaType.IsDerivedFrom)
1243    // ========================================================================
1244
1245    #[test]
1246    fn test_is_type_derived_from_same_type() {
1247        let set = SchemaSet::new();
1248        let string_key = set.builtin_types().string;
1249
1250        // Same type derives from itself
1251        assert!(set.is_type_derived_from(
1252            TypeKey::Simple(string_key),
1253            TypeKey::Simple(string_key),
1254            DerivationSet::empty()
1255        ));
1256    }
1257
1258    #[test]
1259    fn test_is_type_derived_from_direct_derivation() {
1260        let set = SchemaSet::new();
1261        let builtin = set.builtin_types();
1262
1263        // xs:normalizedString derives from xs:string
1264        assert!(set.is_type_derived_from(
1265            TypeKey::Simple(builtin.normalized_string),
1266            TypeKey::Simple(builtin.string),
1267            DerivationSet::empty()
1268        ));
1269
1270        // xs:integer derives from xs:decimal
1271        assert!(set.is_type_derived_from(
1272            TypeKey::Simple(builtin.integer),
1273            TypeKey::Simple(builtin.decimal),
1274            DerivationSet::empty()
1275        ));
1276    }
1277
1278    #[test]
1279    fn test_is_type_derived_from_transitive() {
1280        let set = SchemaSet::new();
1281        let builtin = set.builtin_types();
1282
1283        // xs:NCName derives from xs:string (NCName < Name < token < normalizedString < string)
1284        assert!(set.is_type_derived_from(
1285            TypeKey::Simple(builtin.ncname),
1286            TypeKey::Simple(builtin.string),
1287            DerivationSet::empty()
1288        ));
1289
1290        // xs:byte derives from xs:decimal (byte < short < int < long < integer < decimal)
1291        assert!(set.is_type_derived_from(
1292            TypeKey::Simple(builtin.byte),
1293            TypeKey::Simple(builtin.decimal),
1294            DerivationSet::empty()
1295        ));
1296
1297        // xs:ID derives from xs:string (ID < NCName < Name < token < normalizedString < string)
1298        assert!(set.is_type_derived_from(
1299            TypeKey::Simple(builtin.id),
1300            TypeKey::Simple(builtin.string),
1301            DerivationSet::empty()
1302        ));
1303    }
1304
1305    #[test]
1306    fn test_is_type_derived_from_not_derived() {
1307        let set = SchemaSet::new();
1308        let builtin = set.builtin_types();
1309
1310        // xs:string does NOT derive from xs:integer
1311        assert!(!set.is_type_derived_from(
1312            TypeKey::Simple(builtin.string),
1313            TypeKey::Simple(builtin.integer),
1314            DerivationSet::empty()
1315        ));
1316
1317        // xs:decimal does NOT derive from xs:integer (reverse direction)
1318        assert!(!set.is_type_derived_from(
1319            TypeKey::Simple(builtin.decimal),
1320            TypeKey::Simple(builtin.integer),
1321            DerivationSet::empty()
1322        ));
1323
1324        // xs:date does NOT derive from xs:duration
1325        assert!(!set.is_type_derived_from(
1326            TypeKey::Simple(builtin.date),
1327            TypeKey::Simple(builtin.duration),
1328            DerivationSet::empty()
1329        ));
1330    }
1331
1332    #[test]
1333    fn test_is_type_derived_from_any_simple_type() {
1334        let set = SchemaSet::new();
1335        let builtin = set.builtin_types();
1336
1337        // All simple types derive from xs:anySimpleType
1338        assert!(set.is_type_derived_from(
1339            TypeKey::Simple(builtin.string),
1340            TypeKey::Simple(builtin.any_simple_type),
1341            DerivationSet::empty()
1342        ));
1343
1344        assert!(set.is_type_derived_from(
1345            TypeKey::Simple(builtin.integer),
1346            TypeKey::Simple(builtin.any_simple_type),
1347            DerivationSet::empty()
1348        ));
1349
1350        assert!(set.is_type_derived_from(
1351            TypeKey::Simple(builtin.byte),
1352            TypeKey::Simple(builtin.any_simple_type),
1353            DerivationSet::empty()
1354        ));
1355    }
1356
1357    #[test]
1358    fn test_is_type_derived_from_any_type() {
1359        let mut set = SchemaSet::new();
1360        let any_type = set.builtin_types().any_type;
1361        let string_type = set.builtin_types().string;
1362
1363        assert!(set.is_type_derived_from(
1364            TypeKey::Simple(string_type),
1365            TypeKey::Complex(any_type),
1366            DerivationSet::empty()
1367        ));
1368
1369        let ct_key = set.arenas.alloc_complex_type(ComplexTypeDefData {
1370            name: None,
1371            target_namespace: None,
1372            base_type: None,
1373            derivation_method: None,
1374            content: ComplexContentResult::Empty,
1375            open_content: None,
1376            attributes: Vec::new(),
1377            attribute_groups: Vec::new(),
1378            attribute_wildcard: None,
1379            mixed: false,
1380            is_abstract: false,
1381            final_derivation: DerivationSet::empty(),
1382            block: DerivationSet::empty(),
1383            default_attributes_apply: true,
1384            id: None,
1385            #[cfg(feature = "xsd11")]
1386            assertions: Vec::new(),
1387            #[cfg(feature = "xsd11")]
1388            xpath_default_namespace: None,
1389            annotation: None,
1390            source: None,
1391            resolved_base_type: None,
1392            resolved_attribute_groups: Vec::new(),
1393            resolved_attributes: Vec::new(),
1394            resolved_content_particle_types: Vec::new(),
1395            resolved_content_particle_elements: Vec::new(),
1396            resolved_simple_content_type: None,
1397            redefine_original: None,
1398        });
1399
1400        assert!(set.is_type_derived_from(
1401            TypeKey::Complex(ct_key),
1402            TypeKey::Complex(any_type),
1403            DerivationSet::empty()
1404        ));
1405    }
1406
1407    #[test]
1408    fn test_is_type_derived_from_exclude_restriction() {
1409        let set = SchemaSet::new();
1410        let builtin = set.builtin_types();
1411
1412        // With RESTRICTION excluded, xs:normalizedString does NOT derive from xs:string
1413        assert!(!set.is_type_derived_from(
1414            TypeKey::Simple(builtin.normalized_string),
1415            TypeKey::Simple(builtin.string),
1416            DerivationSet::RESTRICTION
1417        ));
1418
1419        // Same type still derives from itself even with exclusions
1420        assert!(set.is_type_derived_from(
1421            TypeKey::Simple(builtin.string),
1422            TypeKey::Simple(builtin.string),
1423            DerivationSet::RESTRICTION
1424        ));
1425    }
1426
1427    #[test]
1428    fn test_is_type_derived_from_list_types() {
1429        let set = SchemaSet::new();
1430        let builtin = set.builtin_types();
1431
1432        // xs:NMTOKENS is a list type that derives from xs:anySimpleType
1433        assert!(set.is_type_derived_from(
1434            TypeKey::Simple(builtin.nmtokens),
1435            TypeKey::Simple(builtin.any_simple_type),
1436            DerivationSet::empty()
1437        ));
1438
1439        // With LIST excluded, xs:NMTOKENS should not derive from xs:anySimpleType
1440        assert!(!set.is_type_derived_from(
1441            TypeKey::Simple(builtin.nmtokens),
1442            TypeKey::Simple(builtin.any_simple_type),
1443            DerivationSet::LIST
1444        ));
1445    }
1446
1447    /// Clause 2.2.4: D is derived from union B if D is derived from a
1448    /// transitive member of B and B has no facets.
1449    #[test]
1450    fn test_union_member_derivation_clause_2_2_4() {
1451        use crate::pipeline::load_and_process_schema;
1452
1453        let xsd = r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1454            <xs:simpleType name="myUnion">
1455                <xs:union memberTypes="xs:float xs:integer"/>
1456            </xs:simpleType>
1457        </xs:schema>"#;
1458
1459        let mut set = SchemaSet::new();
1460        load_and_process_schema(xsd.as_bytes(), "test.xsd", &mut set, None)
1461            .expect("schema should parse");
1462
1463        let integer_key = TypeKey::Simple(set.builtin_types().integer);
1464        let float_key = TypeKey::Simple(set.builtin_types().float);
1465
1466        // Find myUnion by name
1467        let union_name = set.name_table.add("myUnion");
1468        let union_key = set
1469            .namespaces
1470            .get(&None)
1471            .unwrap()
1472            .types
1473            .get(&union_name)
1474            .copied()
1475            .expect("myUnion should exist");
1476
1477        // xs:integer is a member of myUnion → should be "derived" via 2.2.4
1478        assert!(
1479            set.is_type_derived_from(integer_key, union_key, DerivationSet::empty()),
1480            "xs:integer should be derived from union(float, integer) via clause 2.2.4"
1481        );
1482
1483        // xs:float is also a member
1484        assert!(
1485            set.is_type_derived_from(float_key, union_key, DerivationSet::empty()),
1486            "xs:float should be derived from union(float, integer) via clause 2.2.4"
1487        );
1488
1489        // xs:string is NOT a member
1490        let string_key = TypeKey::Simple(set.builtin_types().string);
1491        assert!(
1492            !set.is_type_derived_from(string_key, union_key, DerivationSet::empty()),
1493            "xs:string should NOT be derived from union(float, integer)"
1494        );
1495    }
1496
1497    /// Clause 2.2.4 with nested unions: D derived from a member of
1498    /// a union that is itself a member of B.
1499    #[test]
1500    fn test_union_member_derivation_transitive() {
1501        use crate::pipeline::load_and_process_schema;
1502
1503        let xsd = r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1504            <xs:simpleType name="innerUnion">
1505                <xs:union memberTypes="xs:boolean xs:date"/>
1506            </xs:simpleType>
1507            <xs:simpleType name="outerUnion">
1508                <xs:union memberTypes="xs:integer innerUnion"/>
1509            </xs:simpleType>
1510        </xs:schema>"#;
1511
1512        let mut set = SchemaSet::new();
1513        load_and_process_schema(xsd.as_bytes(), "test.xsd", &mut set, None)
1514            .expect("schema should parse");
1515
1516        let outer_name = set.name_table.add("outerUnion");
1517        let outer_key = set
1518            .namespaces
1519            .get(&None)
1520            .unwrap()
1521            .types
1522            .get(&outer_name)
1523            .copied()
1524            .expect("outerUnion should exist");
1525
1526        // xs:boolean is in innerUnion which is in outerUnion → transitive
1527        let bool_key = TypeKey::Simple(set.builtin_types().boolean);
1528        assert!(
1529            set.is_type_derived_from(bool_key, outer_key, DerivationSet::empty()),
1530            "xs:boolean should be transitively derived from outerUnion via innerUnion"
1531        );
1532
1533        // xs:integer is a direct member
1534        let int_key = TypeKey::Simple(set.builtin_types().integer);
1535        assert!(
1536            set.is_type_derived_from(int_key, outer_key, DerivationSet::empty()),
1537            "xs:integer should be derived from outerUnion as direct member"
1538        );
1539    }
1540}