Skip to main content

xsd_schema/parser/
parse.rs

1//! Main XSD parser event loop
2//!
3//! This module provides the main parser that processes XSD documents using
4//! a frame-based state machine. Each XSD element type is handled by a
5//! corresponding frame that validates structure and collects content.
6//!
7//! # Architecture
8//!
9//! The parser uses:
10//! - `TrackedReader` for XML parsing with byte position tracking
11//! - `NamespaceContext` for scoped namespace management
12//! - Frame stack for nested element handling
13//! - `create_frame` factory for frame instantiation
14//!
15//! # Example
16//!
17//! ```
18//! use xsd_schema::parser::parse::parse_schema;
19//! use xsd_schema::SchemaSet;
20//!
21//! let mut schema_set = SchemaSet::new();
22//! let xsd = r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
23//!     <xs:element name="root" type="xs:string"/>
24//! </xs:schema>"#;
25//!
26//! let doc_id = parse_schema(xsd.as_bytes(), "test.xsd", &mut schema_set)
27//!     .expect("parse failed");
28//! assert_eq!(doc_id, 0);
29//! ```
30
31use std::collections::HashSet;
32
33use quick_xml::events::Event;
34
35use crate::error::{SchemaError, SchemaResult};
36use crate::ids::{DocumentId, NameId};
37use crate::namespace::{is_ncname, NameTable, NamespaceContext, XS_NAMESPACE};
38use crate::parser::assemble::assemble_schema;
39use crate::parser::attrs::{categorize_attributes, parse_attributes, AttributeMap};
40use crate::parser::frames::{
41    create_frame, create_frame_recovering, xsd_names, Frame, FrameResult, SchemaFrameResult,
42    SkipFrame,
43};
44use crate::parser::location::{SourceLocation, SourceMap, SourceRef, SourceSpan};
45use crate::parser::reader::{split_qname, TrackedReader};
46use crate::parser::structure::{
47    validate_attribute_group_structure, validate_attribute_structure,
48    validate_complex_type_structure, validate_element_structure, validate_extension_structure,
49    validate_group_structure, validate_import_structure, validate_include_structure,
50    validate_key_unique_structure, validate_keyref_structure, validate_notation_structure,
51    validate_redefine_structure, validate_schema_structure, validate_simple_type_structure,
52    validate_xsd_version_attribute, validate_xsd_version_element, ValidationContext,
53};
54use crate::schema::annotation::ForeignAttribute;
55use crate::schema::model::XsdVersion;
56use crate::SchemaSet;
57
58/// Parser configuration options
59#[derive(Debug, Clone)]
60pub struct ParserConfig {
61    /// Whether to recover from errors and continue parsing
62    pub error_recovery: bool,
63    /// Whether to collect foreign attributes
64    pub collect_foreign_attributes: bool,
65    /// Maximum nesting depth (0 = unlimited)
66    pub max_depth: usize,
67    /// XSD version mode (1.0 or 1.1).
68    /// Derived from `SchemaSet.xsd_version` in `parse_schema_with_config`.
69    pub(crate) xsd_version: XsdVersion,
70}
71
72impl Default for ParserConfig {
73    fn default() -> Self {
74        Self {
75            error_recovery: true,
76            collect_foreign_attributes: true,
77            max_depth: 0,
78            xsd_version: XsdVersion::V1_0,
79        }
80    }
81}
82
83/// Parser state during schema parsing
84struct ParserState<'a, 'b, 'c> {
85    /// Namespace context for prefix resolution
86    ns_context: NamespaceContext<'a>,
87    /// Stack of parser frames
88    frame_stack: Vec<Box<dyn Frame>>,
89    /// Current document ID
90    doc_id: DocumentId,
91    /// Errors collected during parsing
92    errors: Vec<SchemaError>,
93    /// Parser configuration
94    config: &'b ParserConfig,
95    /// XSD namespace ID (cached)
96    xsd_ns_id: Option<NameId>,
97    /// Source map for location resolution
98    source_map: &'c SourceMap,
99    /// Completed root schema result (set when root frame finishes)
100    root_schema: Option<SchemaFrameResult>,
101    /// Collected xs:ID values for document-level uniqueness checking
102    id_values: HashSet<String>,
103    /// True when the root xs:schema element has a vc: version condition that
104    /// excludes this document; all children of xs:schema are then skipped.
105    vc_schema_excluded: bool,
106    /// Chameleon namespace to adopt (§4.2.3 clause 2.3) when this document
107    /// is being included by a schema with a target namespace and itself has
108    /// neither a `targetNamespace` attribute nor an explicit `xmlns` default.
109    /// Applied to the root `<xs:schema>` scope so that unqualified QName
110    /// references inside the included document resolve to the includer's
111    /// target namespace (matching how top-level definitions are re-namespaced).
112    chameleon_namespace: Option<NameId>,
113}
114
115impl<'a, 'b, 'c> ParserState<'a, 'b, 'c> {
116    fn new(
117        name_table: &'a mut NameTable,
118        doc_id: DocumentId,
119        config: &'b ParserConfig,
120        source_map: &'c SourceMap,
121        chameleon_namespace: Option<NameId>,
122    ) -> Self {
123        let ns_context = NamespaceContext::new(name_table);
124        Self {
125            ns_context,
126            frame_stack: Vec::new(),
127            doc_id,
128            errors: Vec::new(),
129            config,
130            xsd_ns_id: None,
131            source_map,
132            root_schema: None,
133            id_values: HashSet::new(),
134            vc_schema_excluded: false,
135            chameleon_namespace,
136        }
137    }
138
139    /// Get the XSD namespace ID, caching it for efficiency
140    fn get_xsd_ns_id(&mut self) -> Option<NameId> {
141        if self.xsd_ns_id.is_none() {
142            self.xsd_ns_id = self.ns_context.name_table().get(XS_NAMESPACE);
143        }
144        self.xsd_ns_id
145    }
146
147    /// Check if a namespace URI is the XSD namespace
148    fn is_in_xsd_namespace(&mut self, namespace: Option<NameId>) -> bool {
149        match (namespace, self.get_xsd_ns_id()) {
150            (Some(ns), Some(xsd_ns)) => ns == xsd_ns,
151            (None, _) => false, // Unqualified elements are not XSD elements
152            _ => false,
153        }
154    }
155
156    /// Push a namespace scope
157    fn push_scope(&mut self) {
158        self.ns_context.push_scope();
159    }
160
161    /// Pop a namespace scope
162    fn pop_scope(&mut self) {
163        self.ns_context.pop_scope();
164    }
165
166    /// Get current frame
167    fn current_frame(&self) -> Option<&dyn Frame> {
168        self.frame_stack.last().map(|b| b.as_ref())
169    }
170
171    /// Get current frame mutably
172    fn current_frame_mut(&mut self) -> Option<&mut Box<dyn Frame>> {
173        self.frame_stack.last_mut()
174    }
175
176    /// Add an error
177    fn add_error(&mut self, error: SchemaError) {
178        self.errors.push(error);
179    }
180
181    /// In error-recovery mode, collect the error and continue; otherwise fail.
182    fn recover_or_fail(&mut self, error: SchemaError) -> SchemaResult<()> {
183        if self.config.error_recovery {
184            self.add_error(error);
185            Ok(())
186        } else {
187            Err(error)
188        }
189    }
190
191    /// Create a source reference for the given span
192    fn source_ref(&self, span: SourceSpan) -> SourceRef {
193        SourceRef::new(self.doc_id, span)
194    }
195
196    /// Create validation context for structural checks.
197    /// Elements are top-level if their parent frame reports `children_are_top_level`
198    /// (schema, redefine, and override frames).
199    fn validation_context(&self, source: Option<SourceRef>) -> ValidationContext {
200        let is_top_level = self
201            .frame_stack
202            .last()
203            .map(|f| f.children_are_top_level())
204            .unwrap_or(false);
205        // Walk the frame stack to detect a `<complexType>` lexical ancestor.
206        // Stop walking when we hit a frame whose children are top-level
207        // (schema/redefine/override) — there's nothing more to look at.
208        let inside_complex_type = self
209            .frame_stack
210            .iter()
211            .rev()
212            .any(|f| f.children_inside_complex_type());
213        ValidationContext {
214            xsd_version: self.config.xsd_version,
215            is_top_level,
216            inside_complex_type,
217            source,
218        }
219    }
220}
221
222/// Parse an XSD schema document
223///
224/// This is the main entry point for parsing XSD documents.
225///
226/// # Arguments
227///
228/// * `xml` - Raw XML bytes of the schema document
229/// * `base_uri` - Base URI for this document (for error messages and include resolution)
230/// * `schema_set` - Schema set to add the parsed document to
231///
232/// # Returns
233///
234/// The document ID of the parsed schema, or an error if parsing failed.
235pub fn parse_schema(
236    xml: &[u8],
237    base_uri: &str,
238    schema_set: &mut SchemaSet,
239) -> SchemaResult<DocumentId> {
240    let config = ParserConfig::default();
241    parse_schema_with_config(xml, base_uri, schema_set, &config)
242}
243
244/// Parse an XSD schema document with custom configuration.
245///
246/// The XSD version is always derived from `schema_set.xsd_version`,
247/// regardless of what `config.xsd_version` contains.
248pub fn parse_schema_with_config(
249    xml: &[u8],
250    base_uri: &str,
251    schema_set: &mut SchemaSet,
252    config: &ParserConfig,
253) -> SchemaResult<DocumentId> {
254    parse_schema_with_chameleon(xml, base_uri, schema_set, config, None)
255}
256
257/// Parse an XSD schema document with chameleon namespace support.
258///
259/// If `chameleon_namespace` is `Some` and the parsed document has no
260/// `targetNamespace`, the chameleon namespace is adopted per §4.2.3
261/// clause 2.3 (chameleon include pre-processing).
262pub fn parse_schema_with_chameleon(
263    xml: &[u8],
264    base_uri: &str,
265    schema_set: &mut SchemaSet,
266    config: &ParserConfig,
267    chameleon_namespace: Option<NameId>,
268) -> SchemaResult<DocumentId> {
269    // Override parser version from the single source of truth
270    let mut config = config.clone();
271    config.xsd_version = schema_set.xsd_version;
272
273    // Create source map - keep local reference for location resolution during parsing
274    let source_text = String::from_utf8_lossy(xml).into_owned();
275    let source_map = SourceMap::new(base_uri.to_string(), source_text);
276
277    // Pre-assign document ID (will be used when we add the source map later)
278    let doc_id = schema_set.source_maps.len() as DocumentId;
279
280    // Create parser state with reference to source_map
281    let mut state = ParserState::new(
282        &mut schema_set.name_table,
283        doc_id,
284        &config,
285        &source_map,
286        chameleon_namespace,
287    );
288
289    // Create XML reader
290    let mut reader = TrackedReader::from_bytes(xml);
291    let mut buf = Vec::new();
292
293    // Track if we've seen the root schema element
294    let mut seen_root = false;
295
296    // Main event loop
297    loop {
298        buf.clear();
299        let tracked_event = reader.read_event(&mut buf)?;
300        let span = tracked_event.span;
301
302        match tracked_event.event {
303            Event::Start(ref e) => {
304                handle_start_element(&mut state, e, span, &mut seen_root)?;
305            }
306            Event::Empty(ref e) => {
307                // Empty elements are treated as Start + End
308                handle_start_element(&mut state, e, span, &mut seen_root)?;
309                handle_end_element(&mut state, span)?;
310            }
311            Event::End(_) => {
312                handle_end_element(&mut state, span)?;
313            }
314            Event::Text(ref e) => {
315                handle_text(&mut state, e, span)?;
316            }
317            Event::CData(ref e) => {
318                handle_cdata(&mut state, e, span)?;
319            }
320            Event::Comment(_) => {
321                // Ignore comments
322            }
323            Event::PI(_) => {
324                // Ignore processing instructions
325            }
326            Event::Decl(_) => {
327                // Ignore XML declaration
328            }
329            Event::DocType(_) => {
330                // Ignore DOCTYPE
331            }
332            Event::Eof => break,
333        }
334    }
335
336    // Check for incomplete parsing
337    if !state.frame_stack.is_empty() {
338        return Err(SchemaError::structural(
339            "src-resolve",
340            "Schema document ended with unclosed elements",
341            None,
342        ));
343    }
344
345    // Store any collected parsing errors on the schema set so they can be
346    // surfaced later (e.g. when process_loaded_schemas runs).
347    let parsing_errors = std::mem::take(&mut state.errors);
348
349    let mut root_schema = state
350        .root_schema
351        .take()
352        .ok_or_else(|| SchemaError::internal("No schema result produced during parsing"))?;
353    drop(state);
354
355    schema_set.parsing_errors.extend(parsing_errors);
356
357    // Record the declared targetNamespace before chameleon adoption.
358    let declared_target_namespace = root_schema.target_namespace;
359
360    // src-include §4.2.3 clause 2.1: when included via `<xs:include>` from a
361    // schema with a `targetNamespace`, the included document's declared
362    // `targetNamespace`, when present, must equal the includer's.
363    if let Some(includer_ns) = chameleon_namespace {
364        if let Some(declared) = declared_target_namespace {
365            if declared != includer_ns {
366                return Err(SchemaError::structural(
367                    "src-include",
368                    format!(
369                        "Included schema's targetNamespace '{}' does not match \
370                         including schema's targetNamespace '{}'",
371                        schema_set.name_table.resolve(declared),
372                        schema_set.name_table.resolve(includer_ns),
373                    ),
374                    None,
375                ));
376            }
377        }
378    }
379
380    // Chameleon pre-processing (§4.2.3 clause 2.3): if the parsed document
381    // has no targetNamespace and the includer specifies one, adopt it.
382    if root_schema.target_namespace.is_none() {
383        if let Some(ns) = chameleon_namespace {
384            root_schema.target_namespace = Some(ns);
385        }
386    }
387
388    // Add the source map to storage now that parsing is complete
389    // Note: We ensured doc_id matches the position where this will be added
390    let added_id = schema_set.source_maps.add(source_map);
391    debug_assert_eq!(doc_id, added_id, "Document ID mismatch");
392
393    let mut doc = assemble_schema(schema_set, doc_id, base_uri, root_schema)?;
394    doc.declared_target_namespace = declared_target_namespace;
395    schema_set.documents.push(doc);
396
397    Ok(doc_id)
398}
399
400/// Validate element-specific structural constraints
401///
402/// Dispatches to the appropriate validation function based on element name.
403/// This enforces constraints like name/ref exclusivity, required attributes, etc.
404fn validate_element_attributes(
405    local_name: &str,
406    attrs: &AttributeMap,
407    name_table: &NameTable,
408    ctx: &ValidationContext,
409) -> SchemaResult<()> {
410    match local_name {
411        xsd_names::ELEMENT => validate_element_structure(attrs, name_table, ctx),
412        xsd_names::ATTRIBUTE => validate_attribute_structure(attrs, name_table, ctx),
413        xsd_names::SIMPLE_TYPE => validate_simple_type_structure(attrs, name_table, ctx),
414        xsd_names::COMPLEX_TYPE => validate_complex_type_structure(attrs, name_table, ctx),
415        xsd_names::GROUP => validate_group_structure(attrs, name_table, ctx),
416        xsd_names::ATTRIBUTE_GROUP => validate_attribute_group_structure(attrs, name_table, ctx),
417        xsd_names::NOTATION => validate_notation_structure(attrs, name_table, ctx),
418        xsd_names::INCLUDE => validate_include_structure(attrs, name_table),
419        xsd_names::IMPORT => validate_import_structure(attrs, name_table),
420        xsd_names::REDEFINE => validate_redefine_structure(attrs, name_table),
421        xsd_names::SCHEMA => validate_schema_structure(attrs, name_table),
422        xsd_names::KEY | xsd_names::UNIQUE => validate_key_unique_structure(attrs, name_table),
423        xsd_names::KEYREF => validate_keyref_structure(attrs, name_table),
424        xsd_names::EXTENSION => validate_extension_structure(attrs, name_table),
425        // Note: restriction and list/union validation requires child info (has_inline_type),
426        // so they're validated at frame finish time, not here
427        _ => Ok(()),
428    }
429}
430
431fn intern_attribute_values(local_name: &str, attrs: &AttributeMap, name_table: &mut NameTable) {
432    fn add_if_present(attrs: &AttributeMap, name_table: &mut NameTable, attr: &str) {
433        if let Some(value) = attrs.get_value_by_name(name_table, attr) {
434            name_table.add(value);
435        }
436    }
437
438    match local_name {
439        xsd_names::SCHEMA => {
440            add_if_present(attrs, name_table, "targetNamespace");
441            add_if_present(attrs, name_table, "defaultAttributes");
442        }
443        xsd_names::SIMPLE_TYPE | xsd_names::COMPLEX_TYPE => {
444            add_if_present(attrs, name_table, "name");
445        }
446        xsd_names::ELEMENT | xsd_names::ATTRIBUTE => {
447            add_if_present(attrs, name_table, "name");
448            add_if_present(attrs, name_table, "targetNamespace");
449        }
450        xsd_names::GROUP | xsd_names::ATTRIBUTE_GROUP | xsd_names::NOTATION => {
451            add_if_present(attrs, name_table, "name");
452        }
453        xsd_names::KEY | xsd_names::KEYREF | xsd_names::UNIQUE => {
454            add_if_present(attrs, name_table, "name");
455        }
456        _ => {}
457    }
458}
459
460/// Handle a start element event
461fn handle_start_element(
462    state: &mut ParserState,
463    element: &quick_xml::events::BytesStart,
464    span: SourceSpan,
465    seen_root: &mut bool,
466) -> SchemaResult<()> {
467    // Push namespace scope for this element
468    state.push_scope();
469
470    // Parse element name
471    let name = element.name();
472    let name_bytes = name.as_ref();
473    let (local_name_bytes, prefix_bytes) = split_qname(name_bytes);
474
475    let local_name = std::str::from_utf8(local_name_bytes).map_err(|e| {
476        SchemaError::xml(
477            format!("Invalid UTF-8 in element name: {}", e),
478            Some(state.source_ref(span).to_location(state.source_map)),
479        )
480    })?;
481
482    // First, process namespace declarations from attributes
483    for attr_result in element.attributes() {
484        let attr =
485            attr_result.map_err(|e| SchemaError::xml(format!("Attribute error: {}", e), None))?;
486
487        let attr_name = attr.key.as_ref();
488        let attr_value = attr
489            .unescape_value()
490            .map_err(|e| SchemaError::xml(format!("Attribute value error: {}", e), None))?;
491
492        // Check for xmlns declarations
493        if attr_name == b"xmlns" {
494            // Default namespace
495            state.ns_context.add_namespace("", &attr_value);
496        } else if attr_name.starts_with(b"xmlns:") {
497            // Prefixed namespace
498            let prefix = std::str::from_utf8(&attr_name[6..]).unwrap_or("");
499            state.ns_context.add_namespace(prefix, &attr_value);
500        }
501    }
502
503    // Now resolve the element's namespace
504    let element_ns = if let Some(prefix) = prefix_bytes {
505        let prefix_str = std::str::from_utf8(prefix).unwrap_or("");
506        state.ns_context.lookup_namespace(prefix_str)
507    } else {
508        state.ns_context.default_namespace()
509    };
510
511    // Check if this is the root schema element
512    if !*seen_root {
513        *seen_root = true;
514
515        // Must be xs:schema
516        if local_name != xsd_names::SCHEMA || !state.is_in_xsd_namespace(element_ns) {
517            return Err(SchemaError::structural(
518                "sch-props-correct",
519                format!("Root element must be xs:schema, found '{}'", local_name),
520                None,
521            ));
522        }
523    }
524
525    // Parse and categorize attributes
526    let source_ref = Some(state.source_ref(span));
527    let parsed_attrs = parse_attributes(
528        element.attributes(),
529        &mut state.ns_context,
530        source_ref.clone(),
531    )?;
532    // sch-props-correct: attributes on XSD-namespace elements must be
533    // unqualified — an explicit `xsd:`/`xs:` prefix on an XSD attribute
534    // (e.g. `xsd:targetNamespace`) is not the same lexical attribute as the
535    // unqualified one, and the schema-for-schemas content model only
536    // declares the unqualified attribute. addB070a (test64756.xsd):
537    // `<xsd:schema ... xsd:targetNamespace="http://foobar">` must be rejected.
538    if state.is_in_xsd_namespace(element_ns) {
539        let xsd_ns = state.get_xsd_ns_id();
540        for attr in &parsed_attrs {
541            if attr.prefix.is_some() && attr.namespace == xsd_ns {
542                let attr_name = state.ns_context.name_table().resolve(attr.local_name);
543                let location = attr
544                    .source
545                    .as_ref()
546                    .map(|s| s.to_location(state.source_map));
547                state.recover_or_fail(SchemaError::structural(
548                    "sch-props-correct",
549                    format!(
550                        "XSD attribute '{}' on element '{}' must be unqualified, not in \
551                         the XSD namespace",
552                        attr_name, local_name,
553                    ),
554                    location,
555                ))?;
556            }
557        }
558    }
559    let (xsd_attrs, foreign_attrs) =
560        categorize_attributes(parsed_attrs, state.ns_context.name_table());
561    let attr_map = AttributeMap::new(xsd_attrs);
562
563    // Chameleon include (§4.2.3 clause 2.3): if this is the root `<xs:schema>`
564    // of a document being chameleon-included and the document declares neither
565    // its own `targetNamespace` nor an explicit `xmlns` default, install the
566    // includer's target namespace as the default for QName resolution. Unqualified
567    // type/element/group/attribute references inside the document will then
568    // resolve to the includer's namespace — matching the fact that top-level
569    // definitions in the included schema are re-namespaced into the includer's
570    // target namespace (see `parse_schema_with_chameleon` below).
571    if state.frame_stack.is_empty()
572        && local_name == xsd_names::SCHEMA
573        && state.is_in_xsd_namespace(element_ns)
574    {
575        if let Some(chameleon_ns) = state.chameleon_namespace {
576            let has_own_tns = attr_map
577                .get_value_by_name(state.ns_context.name_table(), "targetNamespace")
578                .is_some();
579            let default_is_null = state.ns_context.default_namespace().is_none();
580            if !has_own_tns && default_is_null {
581                state
582                    .ns_context
583                    .set_default_namespace_id(Some(chameleon_ns));
584            }
585        }
586    }
587
588    // §F (XSD 1.1 Appendix F): conditional inclusion via vc:* attributes.
589    let vc_excluded = if foreign_attrs.is_empty() {
590        false
591    } else {
592        let ns_snapshot = state.ns_context.snapshot();
593        should_skip_for_vc(
594            &foreign_attrs,
595            state.ns_context.name_table(),
596            &ns_snapshot,
597            state.config.xsd_version,
598        )?
599    };
600    if state.frame_stack.is_empty() {
601        if vc_excluded {
602            state.vc_schema_excluded = true;
603        }
604    } else if vc_excluded || state.vc_schema_excluded {
605        push_skip_frame(state, source_ref, foreign_attrs)?;
606        return Ok(());
607    }
608
609    // Check if this is an XSD element (must do before borrowing frame)
610    let is_in_xsd_ns = state.is_in_xsd_namespace(element_ns);
611
612    // Check if current frame allows this child and handle skip frames
613    let (allows_child, has_frame, in_skip_frame, accepts_foreign) = {
614        if let Some(frame) = state.current_frame() {
615            let mut allowed = frame.allows(local_name, state.ns_context.name_table());
616            // Reject duplicate annotations: each XSD element allows at most one annotation
617            if allowed && local_name == xsd_names::ANNOTATION && frame.has_annotation() {
618                allowed = false;
619            }
620            (
621                allowed,
622                true,
623                frame.is_skip_frame(),
624                frame.accepts_foreign_children(),
625            )
626        } else {
627            (true, false, false, false)
628        }
629    };
630
631    if has_frame {
632        // If we're inside a skip frame, absorb all children without creating new frames
633        if in_skip_frame {
634            // Just notify the skip frame (increments depth) and return
635            if let Some(mut frame) = state.frame_stack.pop() {
636                frame.on_child_start(local_name, state.ns_context.name_table());
637                state.frame_stack.push(frame);
638            }
639            return Ok(());
640        }
641
642        // `xs:appinfo` / `xs:documentation` (and any frame opting into
643        // `accepts_foreign_children`) treats its content as opaque XML —
644        // including child elements that happen to be in the XSD namespace.
645        // Without this gate a `<xs:complexType>` literal embedded in
646        // documentation prose (addB194) would be parsed as a real
647        // schema-level complexType and fail src-ct's "inline complexType
648        // cannot have name" check.
649        if accepts_foreign {
650            push_skip_frame(state, source_ref, foreign_attrs)?;
651            return Ok(());
652        }
653
654        if !is_in_xsd_ns {
655            // Non-XSD child element. The schema-for-schemas content model
656            // forbids foreign elements (sch-props-correct). Surface a
657            // structural error and (in error recovery mode) skip the
658            // subtree so subsequent valid content can still be parsed.
659            let location = source_ref.as_ref().map(|s| s.to_location(state.source_map));
660            state.recover_or_fail(SchemaError::structural(
661                "sch-props-correct",
662                format!(
663                    "Foreign-namespace element '{}' is not allowed here",
664                    local_name
665                ),
666                location,
667            ))?;
668            push_skip_frame(state, source_ref, foreign_attrs)?;
669            return Ok(());
670        }
671
672        if !allows_child {
673            if state.config.error_recovery {
674                // Push a skip frame for error recovery
675                state.add_error(SchemaError::structural(
676                    "sch-props-correct",
677                    format!("Unexpected element '{}' in current context", local_name),
678                    None,
679                ));
680                push_skip_frame(state, source_ref, foreign_attrs)?;
681                return Ok(());
682            } else {
683                return Err(SchemaError::structural(
684                    "sch-props-correct",
685                    format!("Unexpected element '{}' in current context", local_name),
686                    None,
687                ));
688            }
689        }
690
691        // Notify current frame about child start
692        // Pop the frame, call method, push it back to avoid borrow issues
693        if let Some(mut frame) = state.frame_stack.pop() {
694            frame.on_child_start(local_name, state.ns_context.name_table());
695            state.frame_stack.push(frame);
696        }
697    }
698
699    // Validate XSD version compatibility
700    let validation_ctx = state.validation_context(source_ref.clone());
701    if let Err(e) = validate_xsd_version_element(local_name, &validation_ctx) {
702        if state.config.error_recovery {
703            state.add_error(e);
704            push_skip_frame(state, source_ref, foreign_attrs)?;
705            return Ok(());
706        } else {
707            return Err(e);
708        }
709    }
710
711    // Perform element-specific structural validation
712    if let Err(e) = validate_element_attributes(
713        local_name,
714        &attr_map,
715        state.ns_context.name_table(),
716        &validation_ctx,
717    ) {
718        state.recover_or_fail(e)?;
719    }
720
721    // §3.13.2 / xs:annotation: xml:lang on documentation/appinfo, when present,
722    // must be a valid xs:language value. Empty / whitespace-only values are
723    // rejected by the language regex.
724    if matches!(local_name, xsd_names::DOCUMENTATION | xsd_names::APPINFO) {
725        let xml_ns = state
726            .ns_context
727            .name_table()
728            .get(crate::namespace::XML_NAMESPACE);
729        let lang_local = state.ns_context.name_table().get("lang");
730        if let (Some(xml_ns), Some(lang_local)) = (xml_ns, lang_local) {
731            for fa in &foreign_attrs {
732                if fa.namespace == Some(xml_ns)
733                    && fa.local_name == lang_local
734                    && !crate::types::validators::is_valid_language(
735                        &crate::types::facets::normalize_whitespace(
736                            &fa.value,
737                            crate::types::facets::WhitespaceMode::Collapse,
738                        ),
739                    )
740                {
741                    state.recover_or_fail(SchemaError::structural(
742                        "s4s-att-invalid-value",
743                        format!(
744                            "'{}' xml:lang value '{}' is not a valid xs:language",
745                            local_name, fa.value
746                        ),
747                        source_ref.as_ref().map(|s| s.to_location(state.source_map)),
748                    ))?;
749                }
750            }
751        }
752    }
753
754    // Validate XSD version for individual attributes
755    if is_in_xsd_ns {
756        for attr_name_id in attr_map.names() {
757            let attr_name = state.ns_context.name_table().resolve(attr_name_id);
758            if let Err(e) = validate_xsd_version_attribute(&attr_name, local_name, &validation_ctx)
759            {
760                state.recover_or_fail(e)?;
761            }
762        }
763    }
764
765    // Validate xs:ID attribute (NCName format + document-level uniqueness).
766    // Skip xs:appinfo and xs:documentation — they don't define `id` in the XSD spec.
767    if !matches!(local_name, xsd_names::APPINFO | xsd_names::DOCUMENTATION) {
768        if let Some(id_val) = attr_map.get_value_by_name(state.ns_context.name_table(), "id") {
769            if !is_ncname(id_val) {
770                state.recover_or_fail(SchemaError::structural(
771                    "s4s-att-invalid-value",
772                    format!(
773                        "'{}' attribute 'id' has invalid value '{}': not a valid xs:ID",
774                        local_name, id_val
775                    ),
776                    source_ref.as_ref().map(|s| s.to_location(state.source_map)),
777                ))?;
778            } else if !state.id_values.insert(id_val.to_string()) {
779                state.recover_or_fail(SchemaError::structural(
780                    "s4s-att-invalid-value",
781                    format!(
782                        "Duplicate xs:ID value '{}' on element '{}'",
783                        id_val, local_name
784                    ),
785                    source_ref.as_ref().map(|s| s.to_location(state.source_map)),
786                ))?;
787            }
788        }
789    }
790
791    // Intern attribute values that are represented as NameId in frame results
792    if is_in_xsd_ns {
793        intern_attribute_values(local_name, &attr_map, state.ns_context.name_table_mut());
794    }
795
796    // Create namespace snapshot for QName resolution during frame construction
797    let ns_snapshot = state.ns_context.snapshot();
798
799    // Create the new frame
800    let frame = if state.config.error_recovery {
801        let mut frame = create_frame_recovering(
802            local_name,
803            &attr_map,
804            state.ns_context.name_table(),
805            source_ref.clone(),
806            &ns_snapshot,
807            &mut state.errors,
808        );
809        frame.set_foreign_attributes(foreign_attrs);
810        // Set namespace context for annotation content frames
811        if matches!(local_name, xsd_names::APPINFO | xsd_names::DOCUMENTATION) {
812            frame.set_namespaces(ns_snapshot.clone());
813        }
814        frame
815    } else {
816        let mut frame = create_frame(
817            local_name,
818            &attr_map,
819            state.ns_context.name_table(),
820            source_ref.clone(),
821            &ns_snapshot,
822        )?;
823        frame.set_foreign_attributes(foreign_attrs);
824        // Set namespace context for annotation content frames
825        if matches!(local_name, xsd_names::APPINFO | xsd_names::DOCUMENTATION) {
826            frame.set_namespaces(ns_snapshot.clone());
827        }
828        frame
829    };
830
831    // Push frame onto stack
832    state.frame_stack.push(frame);
833
834    Ok(())
835}
836
837/// Handle an end element event
838fn handle_end_element(state: &mut ParserState, _span: SourceSpan) -> SchemaResult<()> {
839    // Check if current frame is a skip frame with pending depth
840    {
841        if let Some(mut frame) = state.frame_stack.pop() {
842            if frame.is_skip_frame() {
843                // Call on_child_end to decrement depth
844                // Returns true if this is the final end element for the skipped element
845                if !frame.on_child_end() {
846                    // Still inside nested children, put frame back and just pop scope
847                    state.frame_stack.push(frame);
848                    state.pop_scope();
849                    return Ok(());
850                }
851            }
852            // Put frame back for normal processing
853            state.frame_stack.push(frame);
854        }
855    }
856
857    // Pop the current frame and get its result
858    let frame = match state.frame_stack.pop() {
859        Some(f) => f,
860        None => {
861            return Err(SchemaError::internal("End element with no frame on stack"));
862        }
863    };
864
865    // Save source ref before finish() consumes the frame
866    let source_ref = frame.source().cloned();
867
868    let result = match frame.finish() {
869        Ok(r) => r,
870        Err(e) => {
871            // Add source location to error if available
872            let e = if let Some(ref src) = source_ref {
873                e.with_location(state.source_map.locate(src.span.start))
874            } else {
875                e
876            };
877            return Err(e);
878        }
879    };
880
881    // Pop namespace scope
882    state.pop_scope();
883
884    // Attach result to parent frame. Errors from attach() (e.g. st-props-correct
885    // duplicate facet from apply_facet) are attributed to the child frame's
886    // source location — that is the offending element per §3.16.2.
887    if let Some(parent) = state.current_frame_mut() {
888        if let Err(e) = parent.attach(result) {
889            let e = if let Some(ref src) = source_ref {
890                e.with_location(state.source_map.locate(src.span.start))
891            } else {
892                e
893            };
894            return Err(e);
895        }
896    }
897    // If no parent, store the root schema result
898    else if let FrameResult::Schema(schema_result) = result {
899        state.root_schema = Some(schema_result);
900    } else {
901        return Err(SchemaError::internal(
902            "Root frame did not produce a schema result",
903        ));
904    }
905
906    Ok(())
907}
908
909/// Handle a text event
910fn handle_text(
911    state: &mut ParserState,
912    text: &quick_xml::events::BytesText,
913    span: SourceSpan,
914) -> SchemaResult<()> {
915    let text_content = text
916        .unescape()
917        .map_err(|e| SchemaError::xml(format!("Text content error: {}", e), None))?;
918
919    // Pass text to current frame if it accepts text content; otherwise reject
920    // any non-whitespace text. XSD elements like xs:notation, xs:complexType,
921    // xs:sequence, etc. only allow inter-element whitespace as text content
922    // (sch-props-correct / element-specific content models).
923    if let Some(mut frame) = state.frame_stack.pop() {
924        if frame.accepts_text() {
925            frame.on_text(&text_content);
926        } else if !frame.is_skip_frame() && !text_content.trim().is_empty() {
927            let source_ref = state.source_ref(span);
928            state.frame_stack.push(frame);
929            return state.recover_or_fail(SchemaError::structural(
930                "sch-props-correct",
931                "Non-whitespace text is not allowed here",
932                Some(source_ref.to_location(state.source_map)),
933            ));
934        }
935        state.frame_stack.push(frame);
936    }
937
938    Ok(())
939}
940
941/// Handle a CDATA section
942fn handle_cdata(
943    state: &mut ParserState,
944    cdata: &quick_xml::events::BytesCData,
945    span: SourceSpan,
946) -> SchemaResult<()> {
947    // CDATA is similar to text, typically in annotations
948    if let Some(mut frame) = state.frame_stack.pop() {
949        if frame.accepts_text() {
950            // Convert CDATA to string
951            if let Ok(cdata_str) = std::str::from_utf8(cdata.as_ref()) {
952                frame.on_cdata(cdata_str);
953            }
954        } else if !frame.is_skip_frame() {
955            // CDATA content is significant — even whitespace-only CDATA represents
956            // intentional text. In an XSD element that doesn't allow text content,
957            // this is invalid.
958            let cdata_is_whitespace = std::str::from_utf8(cdata.as_ref())
959                .map(|s| s.trim().is_empty())
960                .unwrap_or(false);
961            if !cdata_is_whitespace {
962                let source_ref = state.source_ref(span);
963                state.frame_stack.push(frame);
964                return state.recover_or_fail(SchemaError::structural(
965                    "sch-props-correct",
966                    "Non-whitespace CDATA is not allowed here",
967                    Some(source_ref.to_location(state.source_map)),
968                ));
969            }
970        }
971        state.frame_stack.push(frame);
972    }
973    Ok(())
974}
975
976/// XSD 1.1 Appendix F — conditional inclusion via vc:* version attributes.
977///
978/// Returns `Ok(true)` if the element should be excluded (skipped), `Ok(false)`
979/// if it should be included. Returns `Err` if any vc:* version attribute has
980/// an invalid decimal value (the schema is then structurally invalid).
981fn should_skip_for_vc(
982    foreign_attrs: &[ForeignAttribute],
983    name_table: &NameTable,
984    ns_snapshot: &crate::namespace::NamespaceContextSnapshot,
985    xsd_version: XsdVersion,
986) -> SchemaResult<bool> {
987    const VC_NAMESPACE: &str = "http://www.w3.org/2007/XMLSchema-versioning";
988    let Some(vc_ns_id) = name_table.get(VC_NAMESPACE) else {
989        return Ok(false);
990    };
991    let current: f64 = match xsd_version {
992        XsdVersion::V1_0 => 1.0,
993        XsdVersion::V1_1 => 1.1,
994    };
995    for attr in foreign_attrs {
996        if attr.namespace != Some(vc_ns_id) {
997            continue;
998        }
999        let local = name_table.resolve_ref(attr.local_name);
1000        let include = match local {
1001            "minVersion" | "maxVersion" | "minVersionExclusive" | "maxVersionExclusive" => {
1002                let bound = match attr.value.trim().parse::<f64>() {
1003                    Ok(v) => v,
1004                    Err(_) => {
1005                        if xsd_version == XsdVersion::V1_1 {
1006                            return Err(err_versioning(format!(
1007                                "Invalid vc:{} value '{}': must be a valid xs:decimal",
1008                                local,
1009                                attr.value.trim()
1010                            )));
1011                        }
1012                        // XSD 1.0: vc: attributes are informational only; ignore invalid values
1013                        continue;
1014                    }
1015                };
1016                match local {
1017                    "minVersion" => current >= bound,
1018                    "maxVersion" => current <= bound,
1019                    "minVersionExclusive" => current > bound,
1020                    _ => current < bound,
1021                }
1022            }
1023            "typeAvailable" | "typeUnavailable" | "facetAvailable" | "facetUnavailable" => {
1024                // The W3C XMLSchema-versioning spec says vc: attributes "SHOULD
1025                // be honored" when recognized. Saxon honors them under both
1026                // XSD 1.0 and 1.1, and the suite tests (e.g. vc014 under
1027                // version="1.0 1.1") expect xs:error / XSD-1.1-only types to
1028                // be considered "unavailable" under XSD 1.0. Evaluate in both
1029                // modes so VC-driven branches are filtered correctly.
1030                let is_available_attr = matches!(local, "typeAvailable" | "facetAvailable");
1031                let is_type_check = matches!(local, "typeAvailable" | "typeUnavailable");
1032                let mut available_count = 0usize;
1033                let mut total_count = 0usize;
1034                for token in attr.value.split_whitespace() {
1035                    total_count += 1;
1036                    if vc_token_available(
1037                        token,
1038                        local,
1039                        is_type_check,
1040                        ns_snapshot,
1041                        name_table,
1042                        xsd_version,
1043                    )? {
1044                        available_count += 1;
1045                    }
1046                }
1047                if total_count == 0 {
1048                    continue;
1049                }
1050                // typeAvailable/facetAvailable: include iff ALL items available.
1051                // typeUnavailable/facetUnavailable: include iff ANY item is unavailable.
1052                if is_available_attr {
1053                    available_count == total_count
1054                } else {
1055                    available_count < total_count
1056                }
1057            }
1058            _ => continue,
1059        };
1060        if !include {
1061            return Ok(true);
1062        }
1063    }
1064    Ok(false)
1065}
1066
1067fn err_versioning(msg: String) -> SchemaError {
1068    SchemaError::structural("src-versioning", msg, None)
1069}
1070
1071/// Evaluate a single QName token in a vc:typeAvailable / facetAvailable list.
1072fn vc_token_available(
1073    token: &str,
1074    local: &str,
1075    is_type_check: bool,
1076    ns_snapshot: &crate::namespace::NamespaceContextSnapshot,
1077    name_table: &NameTable,
1078    xsd_version: XsdVersion,
1079) -> SchemaResult<bool> {
1080    use crate::namespace::is_ncname;
1081    let (prefix_str, local_str) = match token.find(':') {
1082        Some(pos) => (Some(&token[..pos]), &token[pos + 1..]),
1083        None => (None, token),
1084    };
1085    if !is_ncname(local_str) {
1086        return Err(err_versioning(format!(
1087            "Invalid QName '{}' in vc:{}: '{}' is not a valid NCName",
1088            token, local, local_str
1089        )));
1090    }
1091    let ns_id = match prefix_str {
1092        Some(p) => {
1093            if !is_ncname(p) {
1094                return Err(err_versioning(format!(
1095                    "Invalid QName '{}' in vc:{}: '{}' is not a valid NCName prefix",
1096                    token, local, p
1097                )));
1098            }
1099            let p_id = name_table.get(p).ok_or_else(|| {
1100                err_versioning(format!(
1101                    "Undeclared prefix '{}' in vc:{} value '{}'",
1102                    p, local, token
1103                ))
1104            })?;
1105            Some(ns_snapshot.resolve_prefix(p_id).ok_or_else(|| {
1106                err_versioning(format!(
1107                    "Undeclared prefix '{}' in vc:{} value '{}'",
1108                    p, local, token
1109                ))
1110            })?)
1111        }
1112        None => None,
1113    };
1114    if ns_id != Some(crate::namespace::well_known::XS_NAMESPACE) {
1115        return Ok(false);
1116    }
1117    Ok(if is_type_check {
1118        vc_is_xs_type_available(local_str, xsd_version)
1119    } else {
1120        vc_is_xs_facet_available(local_str)
1121    })
1122}
1123
1124fn vc_is_xs_type_available(local_name: &str, xsd_version: XsdVersion) -> bool {
1125    match crate::types::XmlTypeCode::from_local_name(local_name) {
1126        Some(code) => !code.is_xsd11() || xsd_version == XsdVersion::V1_1,
1127        None => false,
1128    }
1129}
1130
1131fn vc_is_xs_facet_available(local_name: &str) -> bool {
1132    matches!(
1133        local_name,
1134        "minLength"
1135            | "maxLength"
1136            | "length"
1137            | "pattern"
1138            | "enumeration"
1139            | "whiteSpace"
1140            | "totalDigits"
1141            | "fractionDigits"
1142            | "minInclusive"
1143            | "maxInclusive"
1144            | "minExclusive"
1145            | "maxExclusive"
1146            | "assertion"
1147            | "explicitTimezone"
1148    )
1149}
1150
1151/// Push a skip frame for error recovery
1152fn push_skip_frame(
1153    state: &mut ParserState,
1154    source: Option<SourceRef>,
1155    foreign_attrs: Vec<ForeignAttribute>,
1156) -> SchemaResult<()> {
1157    let mut frame: Box<dyn Frame> = Box::new(SkipFrame::new(source));
1158    frame.set_foreign_attributes(foreign_attrs);
1159    state.frame_stack.push(frame);
1160    Ok(())
1161}
1162
1163/// Helper extension for SourceRef to convert to SourceLocation
1164trait SourceRefExt {
1165    fn to_location(&self, source_map: &SourceMap) -> SourceLocation;
1166}
1167
1168impl SourceRefExt for SourceRef {
1169    fn to_location(&self, source_map: &SourceMap) -> SourceLocation {
1170        source_map.locate(self.span.start)
1171    }
1172}
1173
1174#[cfg(test)]
1175mod tests {
1176    use super::*;
1177    use crate::ids::TypeKey;
1178    use crate::schema::model::FormChoice;
1179
1180    #[test]
1181    fn test_parse_minimal_schema() {
1182        let mut schema_set = SchemaSet::new();
1183        let xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
1184            <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1185            </xs:schema>"#;
1186
1187        let result = parse_schema(xsd.as_bytes(), "test.xsd", &mut schema_set);
1188        assert!(result.is_ok());
1189    }
1190
1191    #[test]
1192    fn test_parse_schema_with_element() {
1193        let mut schema_set = SchemaSet::new();
1194        let xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
1195            <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1196                <xs:element name="root" type="xs:string"/>
1197            </xs:schema>"#;
1198
1199        let result = parse_schema(xsd.as_bytes(), "test.xsd", &mut schema_set);
1200        assert!(result.is_ok());
1201    }
1202
1203    #[test]
1204    fn test_parse_schema_with_complex_type() {
1205        let mut schema_set = SchemaSet::new();
1206        let xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
1207            <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1208                <xs:complexType name="PersonType">
1209                    <xs:sequence>
1210                        <xs:element name="name" type="xs:string"/>
1211                        <xs:element name="age" type="xs:int"/>
1212                    </xs:sequence>
1213                </xs:complexType>
1214            </xs:schema>"#;
1215
1216        let result = parse_schema(xsd.as_bytes(), "test.xsd", &mut schema_set);
1217        assert!(result.is_ok());
1218    }
1219
1220    #[test]
1221    fn test_parse_schema_with_simple_type() {
1222        let mut schema_set = SchemaSet::new();
1223        let xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
1224            <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1225                <xs:simpleType name="StringList">
1226                    <xs:list itemType="xs:string"/>
1227                </xs:simpleType>
1228            </xs:schema>"#;
1229
1230        let result = parse_schema(xsd.as_bytes(), "test.xsd", &mut schema_set);
1231        assert!(result.is_ok());
1232    }
1233
1234    #[test]
1235    fn test_parse_schema_with_target_namespace() {
1236        let mut schema_set = SchemaSet::new();
1237        let xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
1238            <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
1239                       targetNamespace="http://example.com/test">
1240            </xs:schema>"#;
1241
1242        let result = parse_schema(xsd.as_bytes(), "test.xsd", &mut schema_set);
1243        assert!(result.is_ok());
1244    }
1245
1246    #[test]
1247    fn test_parse_schema_with_import() {
1248        let mut schema_set = SchemaSet::new();
1249        let xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
1250            <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1251                <xs:import namespace="http://www.w3.org/XML/1998/namespace"/>
1252            </xs:schema>"#;
1253
1254        let result = parse_schema(xsd.as_bytes(), "test.xsd", &mut schema_set);
1255        assert!(result.is_ok());
1256    }
1257
1258    #[cfg(feature = "xsd11")]
1259    #[test]
1260    fn test_parse_schema_assembles_arena_fields() {
1261        use crate::parser::frames::TypeFrameResult;
1262        use crate::schema::model::OpenContentMode;
1263        use crate::schema::wildcard::{NamespaceConstraint, ProcessContents};
1264
1265        let mut schema_set = SchemaSet::xsd11();
1266        let xsd = r###"<?xml version="1.0" encoding="UTF-8"?>
1267            <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
1268                       defaultAttributes="common">
1269                <xs:defaultOpenContent mode="suffix">
1270                    <xs:any namespace="##other" processContents="lax"/>
1271                </xs:defaultOpenContent>
1272                <xs:attributeGroup name="common">
1273                    <xs:attribute name="lang" type="xs:string"/>
1274                </xs:attributeGroup>
1275                <xs:element name="head1" type="xs:string"/>
1276                <xs:element name="head2" type="xs:string"/>
1277                <xs:element name="root" substitutionGroup="head1 head2">
1278                    <xs:complexType>
1279                        <xs:attribute name="code" type="xs:string"/>
1280                    </xs:complexType>
1281                </xs:element>
1282            </xs:schema>"###;
1283
1284        let doc_id = parse_schema_with_config(
1285            xsd.as_bytes(),
1286            "test.xsd",
1287            &mut schema_set,
1288            &ParserConfig::default(),
1289        )
1290        .unwrap();
1291
1292        let doc = &schema_set.documents[doc_id as usize];
1293        let default_attrs = doc.default_attributes.as_ref().expect("defaultAttributes");
1294        assert_eq!(
1295            schema_set.name_table.resolve(default_attrs.local_name),
1296            "common"
1297        );
1298        assert!(default_attrs.namespace_uri.is_none());
1299
1300        let default_open = doc
1301            .default_open_content
1302            .as_ref()
1303            .expect("defaultOpenContent");
1304        assert_eq!(default_open.mode, OpenContentMode::Suffix);
1305        let wildcard = default_open.wildcard.as_ref().expect("wildcard");
1306        assert!(matches!(
1307            wildcard.namespace_constraint,
1308            NamespaceConstraint::Other
1309        ));
1310        assert_eq!(wildcard.process_contents, ProcessContents::Lax);
1311
1312        let common_id = schema_set.name_table.get("common").unwrap();
1313        let group_key = schema_set
1314            .lookup_attribute_group(None, common_id)
1315            .expect("attributeGroup lookup");
1316        let group = schema_set.arenas.get_attribute_group(group_key).unwrap();
1317        assert_eq!(group.attributes.len(), 1);
1318        let lang_id = group.attributes[0].attribute.name.unwrap();
1319        assert_eq!(schema_set.name_table.resolve(lang_id), "lang");
1320
1321        let root_id = schema_set.name_table.get("root").unwrap();
1322        let root_key = schema_set
1323            .lookup_element(None, root_id)
1324            .expect("element lookup");
1325        let root = schema_set.arenas.get_element(root_key).unwrap();
1326        assert_eq!(root.substitution_group.len(), 2);
1327        assert_eq!(
1328            schema_set
1329                .name_table
1330                .resolve(root.substitution_group[0].local_name),
1331            "head1"
1332        );
1333        assert_eq!(
1334            schema_set
1335                .name_table
1336                .resolve(root.substitution_group[1].local_name),
1337            "head2"
1338        );
1339
1340        let inline = root.inline_type.as_ref().expect("inline type");
1341        match inline.as_ref() {
1342            TypeFrameResult::Complex(ct) => {
1343                assert_eq!(ct.attributes.len(), 1);
1344                let code_id = ct.attributes[0].attribute.name.unwrap();
1345                assert_eq!(schema_set.name_table.resolve(code_id), "code");
1346            }
1347            _ => panic!("expected inline complex type"),
1348        }
1349    }
1350
1351    #[test]
1352    fn test_parse_invalid_root() {
1353        let mut schema_set = SchemaSet::new();
1354        let xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
1355            <notSchema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1356            </notSchema>"#;
1357
1358        let result = parse_schema(xsd.as_bytes(), "test.xsd", &mut schema_set);
1359        assert!(result.is_err());
1360    }
1361
1362    #[test]
1363    fn test_parse_form_choice() {
1364        assert_eq!(
1365            crate::parser::assemble::parse_form_choice(Some("qualified")),
1366            FormChoice::Qualified
1367        );
1368        assert_eq!(
1369            crate::parser::assemble::parse_form_choice(Some("unqualified")),
1370            FormChoice::Unqualified
1371        );
1372        assert_eq!(
1373            crate::parser::assemble::parse_form_choice(None),
1374            FormChoice::Unqualified
1375        );
1376    }
1377
1378    #[test]
1379    fn test_parser_config_default() {
1380        let config = ParserConfig::default();
1381        assert!(config.error_recovery);
1382        assert!(config.collect_foreign_attributes);
1383        assert_eq!(config.max_depth, 0);
1384    }
1385
1386    #[test]
1387    fn test_apply_schema_defaults_to_elements_and_types() {
1388        let mut schema_set = SchemaSet::new();
1389        let xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
1390            <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
1391                       blockDefault="extension"
1392                       finalDefault="restriction">
1393              <xs:element name="head" type="xs:string"/>
1394              <xs:complexType name="Base"/>
1395              <xs:simpleType name="Simple">
1396                <xs:restriction base="xs:string"/>
1397              </xs:simpleType>
1398            </xs:schema>"#;
1399
1400        let result = parse_schema(xsd.as_bytes(), "test.xsd", &mut schema_set);
1401        assert!(result.is_ok());
1402
1403        let name_id = schema_set.name_table.get("head").expect("name id for head");
1404        let ns_table = schema_set
1405            .namespaces
1406            .get(&None)
1407            .expect("default namespace table");
1408        let elem_key = ns_table.elements.get(&name_id).expect("element key");
1409        let elem = schema_set
1410            .arenas
1411            .elements
1412            .get(*elem_key)
1413            .expect("element data");
1414        assert!(elem.block.contains_extension());
1415        assert!(elem.final_derivation.contains_restriction());
1416
1417        let base_id = schema_set.name_table.get("Base").expect("name id for Base");
1418        let base_key = ns_table.types.get(&base_id).expect("type key for Base");
1419        match base_key {
1420            TypeKey::Complex(key) => {
1421                let base = schema_set
1422                    .arenas
1423                    .complex_types
1424                    .get(*key)
1425                    .expect("complex type data");
1426                assert!(base.block.contains_extension());
1427                assert!(base.final_derivation.contains_restriction());
1428            }
1429            _ => panic!("expected complex type for Base"),
1430        }
1431
1432        let simple_id = schema_set
1433            .name_table
1434            .get("Simple")
1435            .expect("name id for Simple");
1436        let simple_key = ns_table.types.get(&simple_id).expect("type key for Simple");
1437        match simple_key {
1438            TypeKey::Simple(key) => {
1439                let simple = schema_set
1440                    .arenas
1441                    .simple_types
1442                    .get(*key)
1443                    .expect("simple type data");
1444                assert!(simple.final_derivation.contains_restriction());
1445            }
1446            _ => panic!("expected simple type for Simple"),
1447        }
1448    }
1449
1450    /// §3.3.1.2 / §3.4.1: `final=""` on an element/type is an explicit empty override —
1451    /// it must NOT be replaced by the document-level `finalDefault`. Only absent `final=`
1452    /// inherits `finalDefault`. This test verifies that the assembler correctly distinguishes
1453    /// the two cases (T22b fix).
1454    #[test]
1455    fn test_final_explicit_empty_overrides_final_default() {
1456        let mut schema_set = SchemaSet::new();
1457        let xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
1458            <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
1459                       finalDefault="restriction">
1460              <!-- final="" is explicit override: no derivation blocked, despite finalDefault -->
1461              <xs:element name="unlocked" type="xs:string" final=""/>
1462              <!-- absent final= inherits finalDefault="restriction" -->
1463              <xs:element name="inherited" type="xs:string"/>
1464              <xs:complexType name="UnlockedType" final=""/>
1465              <xs:complexType name="InheritedType"/>
1466              <xs:simpleType name="UnlockedSimple" final="">
1467                <xs:restriction base="xs:string"/>
1468              </xs:simpleType>
1469              <xs:simpleType name="InheritedSimple">
1470                <xs:restriction base="xs:string"/>
1471              </xs:simpleType>
1472            </xs:schema>"#;
1473
1474        let result = parse_schema(xsd.as_bytes(), "test.xsd", &mut schema_set);
1475        assert!(result.is_ok());
1476
1477        let ns_table = schema_set.namespaces.get(&None).expect("default namespace");
1478
1479        // Element: final="" → empty (NOT restriction)
1480        let unlocked_id = schema_set.name_table.get("unlocked").expect("unlocked");
1481        let unlocked_key = ns_table.elements.get(&unlocked_id).expect("element key");
1482        let unlocked = schema_set
1483            .arenas
1484            .elements
1485            .get(*unlocked_key)
1486            .expect("element");
1487        assert!(
1488            unlocked.final_derivation.is_empty(),
1489            "final=\"\" must produce empty set, not inherit finalDefault"
1490        );
1491
1492        // Element: absent final → restriction (from finalDefault)
1493        let inherited_id = schema_set.name_table.get("inherited").expect("inherited");
1494        let inherited_key = ns_table.elements.get(&inherited_id).expect("element key");
1495        let inherited = schema_set
1496            .arenas
1497            .elements
1498            .get(*inherited_key)
1499            .expect("element");
1500        assert!(
1501            inherited.final_derivation.contains_restriction(),
1502            "absent final= must inherit finalDefault=restriction"
1503        );
1504
1505        // ComplexType: final="" → empty
1506        let ut_id = schema_set
1507            .name_table
1508            .get("UnlockedType")
1509            .expect("UnlockedType");
1510        let ut_key = ns_table.types.get(&ut_id).expect("type key");
1511        if let crate::ids::TypeKey::Complex(key) = ut_key {
1512            let ct = schema_set
1513                .arenas
1514                .complex_types
1515                .get(*key)
1516                .expect("complex type");
1517            assert!(
1518                ct.final_derivation.is_empty(),
1519                "complexType final=\"\" must not inherit finalDefault"
1520            );
1521        }
1522
1523        // ComplexType: absent final → restriction
1524        let it_id = schema_set
1525            .name_table
1526            .get("InheritedType")
1527            .expect("InheritedType");
1528        let it_key = ns_table.types.get(&it_id).expect("type key");
1529        if let crate::ids::TypeKey::Complex(key) = it_key {
1530            let ct = schema_set
1531                .arenas
1532                .complex_types
1533                .get(*key)
1534                .expect("complex type");
1535            assert!(
1536                ct.final_derivation.contains_restriction(),
1537                "complexType absent final= must inherit finalDefault"
1538            );
1539        }
1540
1541        // SimpleType: final="" → empty
1542        let us_id = schema_set
1543            .name_table
1544            .get("UnlockedSimple")
1545            .expect("UnlockedSimple");
1546        let us_key = ns_table.types.get(&us_id).expect("type key");
1547        if let crate::ids::TypeKey::Simple(key) = us_key {
1548            let st = schema_set
1549                .arenas
1550                .simple_types
1551                .get(*key)
1552                .expect("simple type");
1553            assert!(
1554                st.final_derivation.is_empty(),
1555                "simpleType final=\"\" must not inherit finalDefault"
1556            );
1557        }
1558
1559        // SimpleType: absent final → restriction
1560        let is_id = schema_set
1561            .name_table
1562            .get("InheritedSimple")
1563            .expect("InheritedSimple");
1564        let is_key = ns_table.types.get(&is_id).expect("type key");
1565        if let crate::ids::TypeKey::Simple(key) = is_key {
1566            let st = schema_set
1567                .arenas
1568                .simple_types
1569                .get(*key)
1570                .expect("simple type");
1571            assert!(
1572                st.final_derivation.contains_restriction(),
1573                "simpleType absent final= must inherit finalDefault"
1574            );
1575        }
1576    }
1577
1578    #[test]
1579    fn test_duplicate_id_detected() {
1580        let mut schema_set = SchemaSet::new();
1581        let xsd = r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1582            <xs:element id="foo123" name="a" type="xs:string"/>
1583            <xs:element id="foo123" name="b" type="xs:string"/>
1584        </xs:schema>"#;
1585        let result = parse_schema(xsd.as_bytes(), "test.xsd", &mut schema_set);
1586        assert!(result.is_ok());
1587        assert!(schema_set
1588            .parsing_errors
1589            .iter()
1590            .any(|e| { e.to_string().contains("Duplicate xs:ID value 'foo123'") }));
1591    }
1592
1593    #[test]
1594    fn test_unique_ids_valid() {
1595        let mut schema_set = SchemaSet::new();
1596        let xsd = r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1597            <xs:element id="id1" name="a" type="xs:string"/>
1598            <xs:element id="id2" name="b" type="xs:string"/>
1599        </xs:schema>"#;
1600        let result = parse_schema(xsd.as_bytes(), "test.xsd", &mut schema_set);
1601        assert!(result.is_ok());
1602        assert!(schema_set.parsing_errors.is_empty());
1603    }
1604
1605    #[test]
1606    fn test_invalid_id_format() {
1607        let mut schema_set = SchemaSet::new();
1608        let xsd = r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1609            <xs:element id="123bad" name="a" type="xs:string"/>
1610        </xs:schema>"#;
1611        let result = parse_schema(xsd.as_bytes(), "test.xsd", &mut schema_set);
1612        assert!(result.is_ok());
1613        assert!(schema_set
1614            .parsing_errors
1615            .iter()
1616            .any(|e| { e.to_string().contains("not a valid xs:ID") }));
1617    }
1618}