Skip to main content

xsd_schema/parser/
resolver.rs

1//! Schema resolution for include, import, and redefine directives
2//!
3//! This module handles resolving and loading external schema documents
4//! referenced by `xs:include`, `xs:import`, and `xs:redefine` directives.
5//!
6//! # Resolution Process
7//!
8//! 1. **Include** - Same target namespace, required schemaLocation
9//!    - Loads the referenced schema
10//!    - Merges components into the same namespace
11//!    - Supports chameleon includes (no targetNamespace)
12//!
13//! 2. **Import** - Different namespace, optional schemaLocation
14//!    - Loads schema for the specified namespace
15//!    - Components remain in their declared namespace
16//!    - Without schemaLocation, relies on catalog or pre-loaded schemas
17//!
18//! 3. **Redefine** - Same namespace, extends/restricts existing types
19//!    - Deprecated in XSD 1.1 (use override instead)
20//!    - Allows redefining types/groups from included schema
21//!
22//! # Circular Dependencies
23//!
24//! The resolver tracks loaded schema locations to:
25//! - Detect circular includes (allowed, just skip)
26//! - Prevent infinite loops
27//! - Enable caching of resolved schemas
28//!
29//! # URI Resolution
30//!
31//! The resolver supports:
32//! - Absolute file paths
33//! - Relative paths (resolved against base URI)
34//! - HTTP/HTTPS URLs (via async trait)
35//! - Catalog-based resolution
36//!
37//! # Customizable Loading
38//!
39//! The [`SchemaLoader`] trait allows custom loading strategies:
40//! - [`FileSystemLoader`] - Loads from local file system
41//! - [`EmbeddedLoader`] - Loads from embedded static assets
42//! - [`LoaderChain`] - Combines multiple loaders with priority
43
44use std::collections::HashSet;
45use std::fmt::Debug;
46use std::path::{Path, PathBuf};
47#[cfg(feature = "async")]
48use std::pin::Pin;
49
50use crate::error::{SchemaError, SchemaResult};
51use crate::ids::{DocumentId, NameId};
52use crate::parser::parse::ParserConfig;
53use crate::schema::composition::{CompositionEdge, CompositionEdgeKind};
54use crate::SchemaSet;
55
56/// Result of a single `load_schema` call, distinguishing three outcomes.
57#[derive(Debug)]
58pub enum LoadOutcome {
59    /// Schema was freshly loaded and parsed.
60    Loaded(DocumentId),
61    /// Schema was already in `loaded_locations`.
62    AlreadyLoaded(DocumentId),
63    /// Schema is currently mid-parse (in the `resolving` set). Contains the
64    /// resolved URI so the caller can record a cycle edge and fix it up later.
65    Cycle(String),
66}
67
68// ============================================================================
69// Encoding-Aware Decoding
70// ============================================================================
71
72const UTF8_BOM: &[u8] = &[0xEF, 0xBB, 0xBF];
73const UTF16_LE_BOM: &[u8] = &[0xFF, 0xFE];
74const UTF16_BE_BOM: &[u8] = &[0xFE, 0xFF];
75
76#[derive(Copy, Clone)]
77enum Endian {
78    Le,
79    Be,
80}
81
82/// Decode raw XML bytes into UTF-8 bytes, sniffing common Unicode encodings
83/// per XML 1.0 §F.1.
84///
85/// Recognizes UTF-8 with/without BOM and UTF-16 LE/BE with/without BOM. The
86/// returned `Vec<u8>` is the input buffer unchanged when it is already UTF-8
87/// with no BOM (zero-copy fast path).
88pub fn decode_xml_to_utf8_bytes(bytes: Vec<u8>) -> SchemaResult<Vec<u8>> {
89    if bytes.starts_with(UTF8_BOM) {
90        return Ok(bytes[UTF8_BOM.len()..].to_vec());
91    }
92    if bytes.starts_with(UTF16_LE_BOM) {
93        return Ok(decode_utf16(&bytes[UTF16_LE_BOM.len()..], Endian::Le)?.into_bytes());
94    }
95    if bytes.starts_with(UTF16_BE_BOM) {
96        return Ok(decode_utf16(&bytes[UTF16_BE_BOM.len()..], Endian::Be)?.into_bytes());
97    }
98    if let Some(endian) = sniff_utf16_no_bom(&bytes) {
99        return Ok(decode_utf16(&bytes, endian)?.into_bytes());
100    }
101    Ok(bytes)
102}
103
104/// Decode raw XML bytes into a UTF-8 `String`, sniffing common Unicode
105/// encodings per XML 1.0 §F.1. See [`decode_xml_to_utf8_bytes`].
106pub fn decode_xml_bytes(bytes: Vec<u8>) -> SchemaResult<String> {
107    let utf8 = decode_xml_to_utf8_bytes(bytes)?;
108    String::from_utf8(utf8)
109        .map_err(|e| SchemaError::resolution(format!("Invalid UTF-8 content: {}", e)))
110}
111
112fn sniff_utf16_no_bom(bytes: &[u8]) -> Option<Endian> {
113    // XML 1.0 §F.1: with no BOM, the first four bytes of '<?' in UTF-16 LE
114    // are `3C 00 ?? 00` and in UTF-16 BE are `00 3C 00 ??`. The non-null
115    // third/fourth byte distinguishes UTF-16 from UTF-32.
116    if bytes.len() < 4 {
117        return None;
118    }
119    match (bytes[0], bytes[1]) {
120        (0x3C, 0x00) if bytes[2] != 0x00 && bytes[3] == 0x00 => Some(Endian::Le),
121        (0x00, 0x3C) if bytes[2] == 0x00 && bytes[3] != 0x00 => Some(Endian::Be),
122        _ => None,
123    }
124}
125
126fn decode_utf16(bytes: &[u8], endian: Endian) -> SchemaResult<String> {
127    if !bytes.len().is_multiple_of(2) {
128        return Err(SchemaError::resolution(
129            "UTF-16 byte stream has an odd number of bytes".to_string(),
130        ));
131    }
132    let units: Vec<u16> = bytes
133        .chunks_exact(2)
134        .map(|c| match endian {
135            Endian::Le => u16::from_le_bytes([c[0], c[1]]),
136            Endian::Be => u16::from_be_bytes([c[0], c[1]]),
137        })
138        .collect();
139    String::from_utf16(&units)
140        .map_err(|e| SchemaError::resolution(format!("Invalid UTF-16 sequence: {}", e)))
141}
142
143// ============================================================================
144// SchemaLoader Trait and Implementations
145// ============================================================================
146
147/// Trait for loading schema content from various sources.
148///
149/// Implementations can support file systems, HTTP, embedded resources, etc.
150/// The loader chain uses priority to determine which loader handles a request.
151pub trait SchemaLoader: Send + Sync + Debug {
152    /// Load schema content from the given location.
153    ///
154    /// Returns the schema content as a string, or an error if loading fails.
155    fn load(&self, location: &str) -> SchemaResult<String>;
156
157    /// Check if this loader can handle the given location.
158    ///
159    /// Used by [`LoaderChain`] to find an appropriate loader.
160    fn can_load(&self, location: &str) -> bool;
161
162    /// Priority for loader chain (higher = checked first).
163    ///
164    /// Default is 0. Embedded loader uses 100 to be checked before file system.
165    fn priority(&self) -> i32 {
166        0
167    }
168}
169
170/// File system schema loader (default).
171///
172/// Loads schemas from local file system paths.
173#[derive(Debug, Clone, Default)]
174pub struct FileSystemLoader {
175    /// Base directory for resolving relative paths (not currently used directly)
176    pub base_dir: Option<PathBuf>,
177}
178
179impl FileSystemLoader {
180    /// Create a new file system loader.
181    pub fn new() -> Self {
182        Self::default()
183    }
184
185    /// Create a file system loader with a base directory.
186    pub fn with_base_dir(base_dir: PathBuf) -> Self {
187        Self {
188            base_dir: Some(base_dir),
189        }
190    }
191}
192
193impl SchemaLoader for FileSystemLoader {
194    fn load(&self, location: &str) -> SchemaResult<String> {
195        let path = Path::new(location);
196        let bytes = std::fs::read(path).map_err(|e| {
197            SchemaError::resolution(format!("Failed to read file '{}': {}", location, e))
198        })?;
199        decode_xml_bytes(bytes)
200    }
201
202    fn can_load(&self, location: &str) -> bool {
203        !location.starts_with("http://")
204            && !location.starts_with("https://")
205            && !location.starts_with("embedded://")
206    }
207
208    fn priority(&self) -> i32 {
209        0
210    }
211}
212
213/// Embedded resource loader for built-in schemas.
214///
215/// Loads schemas from static assets embedded in the binary using the
216/// `embedded://` URI scheme.
217#[derive(Debug, Clone, Default)]
218pub struct EmbeddedLoader;
219
220impl EmbeddedLoader {
221    /// Create a new embedded loader.
222    pub fn new() -> Self {
223        Self
224    }
225}
226
227impl SchemaLoader for EmbeddedLoader {
228    fn load(&self, location: &str) -> SchemaResult<String> {
229        if let Some(rest) = location.strip_prefix("embedded://") {
230            match rest {
231                "xml.xsd" => {
232                    let bytes = crate::embedded::XML_XSD;
233                    String::from_utf8(bytes.to_vec()).map_err(|e| {
234                        SchemaError::resolution(format!("Invalid UTF-8 in embedded schema: {}", e))
235                    })
236                }
237                "xlink.xsd" => {
238                    let bytes = crate::embedded::XLINK_XSD;
239                    String::from_utf8(bytes.to_vec()).map_err(|e| {
240                        SchemaError::resolution(format!("Invalid UTF-8 in embedded schema: {}", e))
241                    })
242                }
243                _ => Err(SchemaError::resolution(format!(
244                    "Unknown embedded schema: {}",
245                    rest
246                ))),
247            }
248        } else {
249            Err(SchemaError::resolution(format!(
250                "Not an embedded location: {}",
251                location
252            )))
253        }
254    }
255
256    fn can_load(&self, location: &str) -> bool {
257        location.starts_with("embedded://")
258    }
259
260    fn priority(&self) -> i32 {
261        100 // High priority - checked before file system
262    }
263}
264
265/// Composite loader that chains multiple loaders.
266///
267/// Loaders are tried in priority order (highest first) until one can handle
268/// the requested location.
269#[derive(Debug, Default)]
270pub struct LoaderChain {
271    loaders: Vec<Box<dyn SchemaLoader>>,
272}
273
274impl LoaderChain {
275    /// Create a new empty loader chain.
276    pub fn new() -> Self {
277        Self {
278            loaders: Vec::new(),
279        }
280    }
281
282    /// Create a loader chain with default loaders (embedded + filesystem).
283    pub fn with_defaults() -> Self {
284        let mut chain = Self::new();
285        chain.add(Box::new(EmbeddedLoader::new()));
286        chain.add(Box::new(FileSystemLoader::new()));
287        chain
288    }
289
290    /// Add a loader to the chain.
291    ///
292    /// Loaders are automatically sorted by priority (highest first).
293    pub fn add(&mut self, loader: Box<dyn SchemaLoader>) {
294        self.loaders.push(loader);
295        self.loaders
296            .sort_by_key(|b| std::cmp::Reverse(b.priority()));
297    }
298
299    /// Get the number of loaders in the chain.
300    pub fn len(&self) -> usize {
301        self.loaders.len()
302    }
303
304    /// Check if the chain is empty.
305    pub fn is_empty(&self) -> bool {
306        self.loaders.is_empty()
307    }
308}
309
310impl SchemaLoader for LoaderChain {
311    fn load(&self, location: &str) -> SchemaResult<String> {
312        for loader in &self.loaders {
313            if loader.can_load(location) {
314                return loader.load(location);
315            }
316        }
317        Err(SchemaError::resolution(format!(
318            "No loader available for: {}",
319            location
320        )))
321    }
322
323    fn can_load(&self, location: &str) -> bool {
324        self.loaders.iter().any(|l| l.can_load(location))
325    }
326
327    fn priority(&self) -> i32 {
328        // Chain priority is max of all loaders
329        self.loaders.iter().map(|l| l.priority()).max().unwrap_or(0)
330    }
331}
332
333// ============================================================================
334// Schema Resolver
335// ============================================================================
336
337/// Schema resolver for loading external schema documents.
338///
339/// Uses a [`SchemaLoader`] chain to support multiple loading strategies
340/// (file system, embedded assets, HTTP, etc.).
341pub struct SchemaResolver {
342    /// Configuration for resolution
343    pub config: ResolverConfig,
344    /// Set of locations currently being resolved (for cycle detection)
345    resolving: HashSet<String>,
346    /// Catalog for namespace-to-location mapping
347    catalog: SchemaCatalog,
348    /// Schema loader chain
349    loader: Box<dyn SchemaLoader>,
350    /// Optional async loader for non-blocking I/O (HTTP, cloud storage, etc.)
351    ///
352    /// When set, async methods use this loader instead of wrapping the sync
353    /// loader. When `None`, async methods fall back to the sync `loader`.
354    #[cfg(feature = "async")]
355    async_loader: Option<Box<dyn AsyncSchemaLoader>>,
356}
357
358/// Configuration for schema resolution
359#[derive(Debug, Clone)]
360pub struct ResolverConfig {
361    /// Base directory for resolving relative paths
362    pub base_dir: Option<PathBuf>,
363    /// Whether to allow network access for HTTP URLs
364    pub allow_network: bool,
365    /// Maximum depth for nested includes
366    pub max_depth: usize,
367    /// Parser configuration to use for resolved schemas
368    pub parser_config: ParserConfig,
369}
370
371impl Default for ResolverConfig {
372    fn default() -> Self {
373        Self {
374            base_dir: None,
375            allow_network: false,
376            max_depth: 100,
377            parser_config: ParserConfig::default(),
378        }
379    }
380}
381
382/// Catalog for mapping namespaces to schema locations
383#[derive(Debug, Clone, Default)]
384pub struct SchemaCatalog {
385    /// Namespace URI to schema location mapping
386    entries: Vec<CatalogEntry>,
387}
388
389/// A single catalog entry
390#[derive(Debug, Clone)]
391pub struct CatalogEntry {
392    /// Namespace URI
393    pub namespace: String,
394    /// Schema location (file path or URL)
395    pub location: String,
396}
397
398impl SchemaCatalog {
399    /// Create a new empty catalog
400    pub fn new() -> Self {
401        Self::default()
402    }
403
404    /// Add an entry to the catalog
405    pub fn add(&mut self, namespace: impl Into<String>, location: impl Into<String>) {
406        self.entries.push(CatalogEntry {
407            namespace: namespace.into(),
408            location: location.into(),
409        });
410    }
411
412    /// Look up a location by namespace
413    pub fn lookup(&self, namespace: &str) -> Option<&str> {
414        self.entries
415            .iter()
416            .find(|e| e.namespace == namespace)
417            .map(|e| e.location.as_str())
418    }
419
420    /// Add well-known XML namespaces with embedded schema locations.
421    ///
422    /// Maps standard XML namespaces to `embedded://` URIs that are resolved
423    /// by the [`EmbeddedLoader`].
424    pub fn add_xml_catalog(&mut self) {
425        // XML namespace (xml:lang, xml:space, xml:base) - uses embedded schema
426        self.add("http://www.w3.org/XML/1998/namespace", "embedded://xml.xsd");
427
428        // XLink namespace (xlink:type, xlink:href, etc.) - uses embedded schema
429        self.add("http://www.w3.org/1999/xlink", "embedded://xlink.xsd");
430
431        // XML Schema instance namespace (xsi:type, xsi:nil, etc.)
432        // Note: This could be embedded in the future
433        self.add(
434            "http://www.w3.org/2001/XMLSchema-instance",
435            "http://www.w3.org/2001/XMLSchema-instance.xsd",
436        );
437    }
438}
439
440impl SchemaResolver {
441    /// Create a new resolver with default configuration and loader chain.
442    ///
443    /// Uses [`LoaderChain::with_defaults()`] which includes:
444    /// - [`EmbeddedLoader`] for `embedded://` URIs
445    /// - [`FileSystemLoader`] for file paths
446    pub fn new() -> Self {
447        Self {
448            config: ResolverConfig::default(),
449            resolving: HashSet::new(),
450            catalog: SchemaCatalog::new(),
451            loader: Box::new(LoaderChain::with_defaults()),
452            #[cfg(feature = "async")]
453            async_loader: None,
454        }
455    }
456
457    /// Create a resolver with the specified configuration.
458    ///
459    /// Uses the default loader chain.
460    pub fn with_config(config: ResolverConfig) -> Self {
461        Self {
462            config,
463            resolving: HashSet::new(),
464            catalog: SchemaCatalog::new(),
465            loader: Box::new(LoaderChain::with_defaults()),
466            #[cfg(feature = "async")]
467            async_loader: None,
468        }
469    }
470
471    /// Create a resolver with a custom loader.
472    ///
473    /// # Example
474    /// ```
475    /// use xsd_schema::{SchemaResolver, LoaderChain};
476    ///
477    /// let loader = LoaderChain::with_defaults();
478    /// let resolver = SchemaResolver::with_loader(Box::new(loader));
479    /// ```
480    pub fn with_loader(loader: Box<dyn SchemaLoader>) -> Self {
481        Self {
482            config: ResolverConfig::default(),
483            resolving: HashSet::new(),
484            catalog: SchemaCatalog::new(),
485            loader,
486            #[cfg(feature = "async")]
487            async_loader: None,
488        }
489    }
490
491    /// Create a resolver with custom configuration and loader.
492    pub fn with_config_and_loader(config: ResolverConfig, loader: Box<dyn SchemaLoader>) -> Self {
493        Self {
494            config,
495            resolving: HashSet::new(),
496            catalog: SchemaCatalog::new(),
497            loader,
498            #[cfg(feature = "async")]
499            async_loader: None,
500        }
501    }
502
503    /// Create a resolver with a custom async loader for non-blocking I/O.
504    ///
505    /// The async loader is used by `load_content_async` and `load_schema_async`.
506    /// The default sync loader chain is still used for sync methods.
507    #[cfg(feature = "async")]
508    pub fn with_async_loader(async_loader: Box<dyn AsyncSchemaLoader>) -> Self {
509        Self {
510            config: ResolverConfig::default(),
511            resolving: HashSet::new(),
512            catalog: SchemaCatalog::new(),
513            loader: Box::new(LoaderChain::with_defaults()),
514            async_loader: Some(async_loader),
515        }
516    }
517
518    /// Create a resolver with custom configuration and an async loader.
519    #[cfg(feature = "async")]
520    pub fn with_config_and_async_loader(
521        config: ResolverConfig,
522        async_loader: Box<dyn AsyncSchemaLoader>,
523    ) -> Self {
524        Self {
525            config,
526            resolving: HashSet::new(),
527            catalog: SchemaCatalog::new(),
528            loader: Box::new(LoaderChain::with_defaults()),
529            async_loader: Some(async_loader),
530        }
531    }
532
533    /// Get a mutable reference to the catalog
534    pub fn catalog_mut(&mut self) -> &mut SchemaCatalog {
535        &mut self.catalog
536    }
537
538    /// Resolve a schema location to an absolute path or URL
539    pub fn resolve_location(&self, schema_location: &str, base_uri: &str) -> SchemaResult<String> {
540        // Check if it's already absolute
541        if is_absolute_uri(schema_location) {
542            return Ok(schema_location.to_string());
543        }
544
545        // Try to resolve relative to base URI
546        let resolved = resolve_relative_uri(schema_location, base_uri)?;
547        Ok(resolved)
548    }
549
550    /// Load and parse a schema from a location.
551    ///
552    /// Returns a [`LoadOutcome`] distinguishing freshly loaded, already loaded,
553    /// and cycle-in-progress cases.
554    ///
555    /// If `chameleon_namespace` is `Some` and the loaded schema has no
556    /// `targetNamespace`, the chameleon namespace is adopted per §4.2.3.
557    pub fn load_schema(
558        &mut self,
559        location: &str,
560        base_uri: &str,
561        schema_set: &mut SchemaSet,
562        chameleon_namespace: Option<NameId>,
563    ) -> SchemaResult<LoadOutcome> {
564        // Resolve the location
565        let resolved = self.resolve_location(location, base_uri)?;
566
567        // Check if already loaded (chameleon-aware).
568        if let Some(id) = check_loaded_cache(schema_set, &resolved, chameleon_namespace) {
569            return Ok(LoadOutcome::AlreadyLoaded(id));
570        }
571
572        // Check for circular resolution
573        if self.resolving.contains(&resolved) {
574            // Circular include is allowed, just skip
575            return Ok(LoadOutcome::Cycle(resolved));
576        }
577
578        // Mark as being resolved (cycle detection)
579        self.resolving.insert(resolved.clone());
580
581        // Load the schema content — clean up resolving set on error
582        let content = match self.load_content(&resolved) {
583            Ok(c) => c,
584            Err(e) => {
585                self.resolving.remove(&resolved);
586                return Err(e);
587            }
588        };
589
590        // Parse the schema — clean up resolving set on error.
591        // Apply chameleon namespace adoption if specified.
592        let doc_id = match crate::parser::parse::parse_schema_with_chameleon(
593            content.as_bytes(),
594            &resolved,
595            schema_set,
596            &self.config.parser_config,
597            chameleon_namespace,
598        ) {
599            Ok(id) => id,
600            Err(e) => {
601                self.resolving.remove(&resolved);
602                return Err(e);
603            }
604        };
605
606        // Mark as loaded (chameleon-aware).
607        mark_loaded_chameleon_aware(schema_set, &resolved, doc_id, chameleon_namespace);
608
609        // Remove from resolving set
610        self.resolving.remove(&resolved);
611
612        Ok(LoadOutcome::Loaded(doc_id))
613    }
614
615    /// Load content from a location using the configured loader chain.
616    ///
617    /// Supports embedded://, file paths, and potentially HTTP (if configured).
618    pub fn load_content(&self, location: &str) -> SchemaResult<String> {
619        // Check network access for HTTP URLs
620        if (location.starts_with("http://") || location.starts_with("https://"))
621            && !self.config.allow_network
622        {
623            return Err(SchemaError::resolution(format!(
624                "Network access not allowed for: {}",
625                location
626            )));
627        }
628
629        // Use the loader chain
630        self.loader.load(location)
631    }
632
633    /// Process an include directive.
634    ///
635    /// Passes `target_namespace` as the chameleon namespace: if the included
636    /// schema has no `targetNamespace`, it adopts the includer's (§4.2.3).
637    pub fn process_include(
638        &mut self,
639        schema_location: &str,
640        base_uri: &str,
641        target_namespace: Option<NameId>,
642        schema_set: &mut SchemaSet,
643    ) -> SchemaResult<LoadOutcome> {
644        self.load_schema(schema_location, base_uri, schema_set, target_namespace)
645    }
646
647    /// Process an import directive.
648    ///
649    /// Returns `Ok(None)` only when there is no `schemaLocation` and no
650    /// catalog match (namespace-only import). All other paths return
651    /// `Ok(Some(LoadOutcome))`.
652    ///
653    /// Per src-import (§4.2.6.1) the loaded schema's `targetNamespace` is
654    /// validated against the directive's `namespace` attribute:
655    /// 1.1 (`namespace` absent) — the imported schema must have an absent
656    ///     `targetNamespace`.
657    /// 1.2 (`namespace` present) — the imported schema's `targetNamespace`
658    ///     must equal the directive's `namespace` value.
659    pub fn process_import(
660        &mut self,
661        namespace: Option<&str>,
662        schema_location: Option<&str>,
663        base_uri: &str,
664        schema_set: &mut SchemaSet,
665    ) -> SchemaResult<Option<LoadOutcome>> {
666        // Import does not do chameleon namespace adoption.
667        //
668        // Resolution order:
669        //   1. Explicit `schemaLocation` (when loadable). The user's hint wins
670        //      over a catalog entry whenever the file can actually be read,
671        //      which is what saxon `over030` (xml-namespace overlay) relies
672        //      on.
673        //   2. Catalog redirect (used when no schemaLocation is given OR the
674        //      schemaLocation cannot be loaded — typically because it points
675        //      at a remote URL with network access disabled). This keeps
676        //      embedded `xml`/`xlink` schemas in play for the W3C suite's
677        //      `xsts.xsd` harness, which references `http://...xlink.xsd`.
678        //   3. Namespace-only import (relies on the namespace being loaded
679        //      elsewhere or provided externally).
680        if let Some(location) = schema_location {
681            match self.load_schema(location, base_uri, schema_set, None) {
682                Ok(outcome) => {
683                    validate_import_target_namespace(schema_set, &outcome, namespace)?;
684                    return Ok(Some(outcome));
685                }
686                Err(load_err) => {
687                    if let Some(cat_loc) = namespace.and_then(|ns| self.catalog.lookup(ns)) {
688                        let cat_loc = cat_loc.to_string();
689                        return self.try_catalog_load(&cat_loc, base_uri, namespace, schema_set);
690                    }
691                    return Err(load_err);
692                }
693            }
694        }
695
696        if let Some(cat_loc) = namespace.and_then(|ns| self.catalog.lookup(ns)) {
697            let cat_loc = cat_loc.to_string();
698            return self.try_catalog_load(&cat_loc, base_uri, namespace, schema_set);
699        }
700
701        // Import without schemaLocation and no catalog entry is allowed
702        // (the namespace might already be loaded or provided externally)
703        Ok(None)
704    }
705
706    /// Load a catalog-resolved import target, validating namespace match.
707    /// Returns `Ok(None)` when the catalog entry is already loaded, or when
708    /// the requested namespace is already covered by a previously loaded
709    /// schema document — embedded catalog schemas (xml.xsd, xlink.xsd) are
710    /// only injected when nothing else has supplied that namespace, so a
711    /// user-provided overlay (msData/additional `test264908_*` chain;
712    /// saxon `over030`) does not collide with the embedded copy.
713    fn try_catalog_load(
714        &mut self,
715        catalog_location: &str,
716        base_uri: &str,
717        namespace: Option<&str>,
718        schema_set: &mut SchemaSet,
719    ) -> SchemaResult<Option<LoadOutcome>> {
720        let already_loaded = self
721            .resolve_location(catalog_location, base_uri)
722            .ok()
723            .is_some_and(|r| schema_set.loaded_locations.contains_key(&r));
724        if already_loaded {
725            return Ok(None);
726        }
727        if namespace_already_covered(schema_set, namespace) {
728            return Ok(None);
729        }
730        let outcome = self.load_schema(catalog_location, base_uri, schema_set, None)?;
731        validate_import_target_namespace(schema_set, &outcome, namespace)?;
732        Ok(Some(outcome))
733    }
734
735    /// Process a redefine directive.
736    ///
737    /// Passes `target_namespace` as the chameleon namespace: if the redefined
738    /// schema has no `targetNamespace`, it adopts the redefiner's (§4.2.4).
739    pub fn process_redefine(
740        &mut self,
741        schema_location: &str,
742        base_uri: &str,
743        target_namespace: Option<NameId>,
744        schema_set: &mut SchemaSet,
745    ) -> SchemaResult<LoadOutcome> {
746        self.load_schema(schema_location, base_uri, schema_set, target_namespace)
747    }
748
749    /// Process an override directive (XSD 1.1).
750    ///
751    /// Passes `target_namespace` as the chameleon namespace: if the overridden
752    /// schema has no `targetNamespace`, it adopts the overrider's (§4.2.5).
753    #[cfg(feature = "xsd11")]
754    pub fn process_override(
755        &mut self,
756        schema_location: &str,
757        base_uri: &str,
758        target_namespace: Option<NameId>,
759        schema_set: &mut SchemaSet,
760    ) -> SchemaResult<LoadOutcome> {
761        self.load_schema(schema_location, base_uri, schema_set, target_namespace)
762    }
763}
764
765impl Default for SchemaResolver {
766    fn default() -> Self {
767        Self::new()
768    }
769}
770
771/// Validate that an `<xs:import>` directive's `namespace` attribute matches
772/// the loaded schema's `targetNamespace`, per src-import (§4.2.6.1):
773///
774/// - 1.1: `namespace` absent ⇒ imported schema must have absent
775///   `targetNamespace`.
776/// - 1.2: `namespace` present ⇒ imported schema must have a `targetNamespace`
777///   equal to that value.
778///
779/// Re-checks both freshly `Loaded` and `AlreadyLoaded` outcomes so a stale
780/// duplicate import that disagrees with a previously-loaded document fails
781/// the same way as the first import would.
782/// Whether any already-loaded schema document declares (or chameleon-adopts)
783/// the given namespace URI. Used to suppress a redundant catalog fallback
784/// that would otherwise inject the embedded copy of `xml.xsd` / `xlink.xsd`
785/// on top of a user-supplied schema for the same namespace (msData
786/// `test264908_*` chain; saxon `over030`).
787fn namespace_already_covered(schema_set: &SchemaSet, namespace: Option<&str>) -> bool {
788    let Some(ns_str) = namespace else {
789        return false;
790    };
791    let Some(ns_id) = schema_set.name_table.get(ns_str) else {
792        return false;
793    };
794    schema_set.documents.iter().any(|d| {
795        d.declared_target_namespace == Some(ns_id) || d.target_namespace == Some(ns_id)
796    })
797}
798
799fn validate_import_target_namespace(
800    schema_set: &SchemaSet,
801    outcome: &LoadOutcome,
802    namespace: Option<&str>,
803) -> SchemaResult<()> {
804    let doc_id = match outcome {
805        LoadOutcome::Loaded(id) | LoadOutcome::AlreadyLoaded(id) => *id,
806        LoadOutcome::Cycle(_) => return Ok(()),
807    };
808    let Some(doc) = schema_set.documents.get(doc_id as usize) else {
809        return Ok(());
810    };
811    let imported_tns = doc
812        .target_namespace
813        .map(|n| schema_set.name_table.resolve_ref(n));
814    if namespace == imported_tns {
815        return Ok(());
816    }
817    let msg = match (namespace, imported_tns) {
818        (None, Some(tns)) => format!(
819            "Import directive has no namespace attribute, but imported schema has \
820             targetNamespace='{}' (src-import clause 1.1 requires absent targetNamespace)",
821            tns
822        ),
823        (Some(ns), None) => format!(
824            "Import directive namespace='{}' does not match imported schema's absent \
825             targetNamespace (src-import clause 1.2)",
826            ns
827        ),
828        (Some(ns), Some(tns)) => format!(
829            "Import directive namespace='{}' does not match imported schema's \
830             targetNamespace='{}' (src-import clause 1.2)",
831            ns, tns
832        ),
833        (None, None) => unreachable!("handled by early return above"),
834    };
835    Err(SchemaError::structural("src-import", msg, None))
836}
837
838// ============================================================================
839// Async Schema Loading (feature = "async")
840// ============================================================================
841
842/// Trait for loading schema content asynchronously.
843///
844/// Implementations can provide truly non-blocking I/O for HTTP, cloud storage,
845/// or other async sources. Pass a `Box<dyn AsyncSchemaLoader>` to
846/// [`SchemaResolver::with_async_loader`] to enable async loading.
847///
848/// When no async loader is configured, async resolver methods fall back to the
849/// sync [`SchemaLoader`] (blocking the current task).
850///
851/// The trait is object-safe (`Pin<Box<dyn Future>>`), so it can be stored as
852/// `Box<dyn AsyncSchemaLoader>` without conflicting with sync trait impls.
853#[cfg(feature = "async")]
854pub trait AsyncSchemaLoader: Send + Sync + Debug {
855    /// Load schema content asynchronously from the given location.
856    fn load_async(
857        &self,
858        location: &str,
859    ) -> Pin<Box<dyn std::future::Future<Output = SchemaResult<String>> + Send + '_>>;
860
861    /// Check if this loader can handle the given location.
862    fn can_load(&self, location: &str) -> bool;
863}
864
865#[cfg(feature = "async")]
866impl SchemaResolver {
867    /// Load content asynchronously from a location.
868    ///
869    /// Uses the [`AsyncSchemaLoader`] if one was provided via
870    /// [`with_async_loader`](SchemaResolver::with_async_loader); otherwise
871    /// falls back to the sync [`SchemaLoader`].
872    pub async fn load_content_async(&self, location: &str) -> SchemaResult<String> {
873        // Check network access for HTTP URLs
874        if (location.starts_with("http://") || location.starts_with("https://"))
875            && !self.config.allow_network
876        {
877            return Err(SchemaError::resolution(format!(
878                "Network access not allowed for: {}",
879                location
880            )));
881        }
882
883        // Use the async loader only when it can handle this location;
884        // otherwise fall back to the sync loader chain (embedded, filesystem, etc.)
885        if let Some(ref async_loader) = self.async_loader {
886            if async_loader.can_load(location) {
887                return async_loader.load_async(location).await;
888            }
889        }
890        self.loader.load(location)
891    }
892
893    /// Load and parse a schema asynchronously from a location.
894    ///
895    /// Returns a [`LoadOutcome`] distinguishing freshly loaded, already loaded,
896    /// and cycle-in-progress cases.
897    pub async fn load_schema_async(
898        &mut self,
899        location: &str,
900        base_uri: &str,
901        schema_set: &mut SchemaSet,
902        chameleon_namespace: Option<NameId>,
903    ) -> SchemaResult<LoadOutcome> {
904        // Resolve the location
905        let resolved = self.resolve_location(location, base_uri)?;
906
907        // Check if already loaded (chameleon-aware).
908        if let Some(id) = check_loaded_cache(schema_set, &resolved, chameleon_namespace) {
909            return Ok(LoadOutcome::AlreadyLoaded(id));
910        }
911
912        // Check for circular resolution
913        if self.resolving.contains(&resolved) {
914            return Ok(LoadOutcome::Cycle(resolved));
915        }
916
917        // Mark as being resolved (cycle detection)
918        self.resolving.insert(resolved.clone());
919
920        // Load the schema content asynchronously — clean up on error
921        let content = match self.load_content_async(&resolved).await {
922            Ok(c) => c,
923            Err(e) => {
924                self.resolving.remove(&resolved);
925                return Err(e);
926            }
927        };
928
929        // Parse the schema (sync — CPU-bound) — clean up on error.
930        // Apply chameleon namespace adoption if specified.
931        let doc_id = match crate::parser::parse::parse_schema_with_chameleon(
932            content.as_bytes(),
933            &resolved,
934            schema_set,
935            &self.config.parser_config,
936            chameleon_namespace,
937        ) {
938            Ok(id) => id,
939            Err(e) => {
940                self.resolving.remove(&resolved);
941                return Err(e);
942            }
943        };
944
945        // Mark as loaded (chameleon-aware).
946        mark_loaded_chameleon_aware(schema_set, &resolved, doc_id, chameleon_namespace);
947
948        // Remove from resolving set
949        self.resolving.remove(&resolved);
950
951        Ok(LoadOutcome::Loaded(doc_id))
952    }
953}
954
955/// Resolve all directives in a schema document asynchronously.
956///
957/// Same structure as [`resolve_all_directives`] but uses async loading.
958#[cfg(feature = "async")]
959pub async fn resolve_all_directives_async(
960    doc_id: DocumentId,
961    resolver: &mut SchemaResolver,
962    schema_set: &mut SchemaSet,
963) -> ResolutionResult {
964    let mut result = ResolutionResult::default();
965
966    // Get the document
967    let doc = match schema_set.documents.get(doc_id as usize) {
968        Some(d) => d,
969        None => {
970            result.errors.push(SchemaError::internal(format!(
971                "Document {} not found",
972                doc_id
973            )));
974            return result;
975        }
976    };
977
978    let base_uri = doc.base_uri.clone();
979    let target_namespace = doc.target_namespace;
980
981    // Clone directives to avoid borrow issues
982    let includes: Vec<_> = doc.includes.to_vec();
983    let imports: Vec<_> = doc.imports.to_vec();
984    let redefines: Vec<_> = doc.redefines.to_vec();
985    #[cfg(feature = "xsd11")]
986    let overrides: Vec<_> = doc.overrides.to_vec();
987
988    // Process includes (pass chameleon namespace)
989    for (i, include) in includes.iter().enumerate() {
990        match resolver
991            .load_schema_async(
992                &include.schema_location,
993                &base_uri,
994                schema_set,
995                target_namespace,
996            )
997            .await
998        {
999            Ok(ref outcome) => {
1000                if let LoadOutcome::Loaded(id) | LoadOutcome::AlreadyLoaded(id) = outcome {
1001                    result.loaded.push(*id);
1002                    schema_set.documents[doc_id as usize].includes[i].resolved_doc_id = Some(*id);
1003                } else {
1004                    result.skipped.push(include.schema_location.clone());
1005                }
1006                record_edge(
1007                    schema_set,
1008                    doc_id,
1009                    outcome,
1010                    CompositionEdgeKind::Include,
1011                    include.source.as_ref(),
1012                    &include.schema_location,
1013                );
1014            }
1015            Err(e) => result.errors.push(e),
1016        }
1017    }
1018
1019    // Process imports — catalog takes priority over schemaLocation
1020    for (i, import) in imports.iter().enumerate() {
1021        // Check catalog first (namespace mapping overrides location hints)
1022        let catalog_location = import
1023            .namespace
1024            .as_deref()
1025            .and_then(|ns| resolver.catalog.lookup(ns).map(|l| l.to_string()));
1026
1027        if let Some(location) = catalog_location {
1028            let catalog_already_loaded = resolver
1029                .resolve_location(&location, &base_uri)
1030                .ok()
1031                .is_some_and(|r| schema_set.loaded_locations.contains_key(&r));
1032            if catalog_already_loaded {
1033                continue;
1034            }
1035            match resolver
1036                .load_schema_async(&location, &base_uri, schema_set, None)
1037                .await
1038            {
1039                Ok(ref outcome) => {
1040                    if let LoadOutcome::Loaded(id) | LoadOutcome::AlreadyLoaded(id) = outcome {
1041                        result.loaded.push(*id);
1042                        schema_set.documents[doc_id as usize].imports[i].resolved_doc_id =
1043                            Some(*id);
1044                    } else {
1045                        result.skipped.push(location.clone());
1046                    }
1047                    record_edge(
1048                        schema_set,
1049                        doc_id,
1050                        outcome,
1051                        CompositionEdgeKind::Import,
1052                        import.source.as_ref(),
1053                        &location,
1054                    );
1055                }
1056                Err(e) => result.import_errors.push(e),
1057            }
1058        } else if let Some(location) = import.schema_location.as_deref() {
1059            match resolver
1060                .load_schema_async(location, &base_uri, schema_set, None)
1061                .await
1062            {
1063                Ok(ref outcome) => {
1064                    if let LoadOutcome::Loaded(id) | LoadOutcome::AlreadyLoaded(id) = outcome {
1065                        result.loaded.push(*id);
1066                        schema_set.documents[doc_id as usize].imports[i].resolved_doc_id =
1067                            Some(*id);
1068                    } else {
1069                        result.skipped.push(location.to_string());
1070                    }
1071                    record_edge(
1072                        schema_set,
1073                        doc_id,
1074                        outcome,
1075                        CompositionEdgeKind::Import,
1076                        import.source.as_ref(),
1077                        location,
1078                    );
1079                }
1080                Err(e) => result.import_errors.push(e),
1081            }
1082        }
1083    }
1084
1085    // Process redefines (pass chameleon namespace)
1086    for (i, redefine) in redefines.iter().enumerate() {
1087        match resolver
1088            .load_schema_async(
1089                &redefine.schema_location,
1090                &base_uri,
1091                schema_set,
1092                target_namespace,
1093            )
1094            .await
1095        {
1096            Ok(ref outcome) => {
1097                if let LoadOutcome::Loaded(id) | LoadOutcome::AlreadyLoaded(id) = outcome {
1098                    result.loaded.push(*id);
1099                    schema_set.documents[doc_id as usize].redefines[i].resolved_doc_id = Some(*id);
1100                } else {
1101                    result.skipped.push(redefine.schema_location.clone());
1102                }
1103                record_edge(
1104                    schema_set,
1105                    doc_id,
1106                    outcome,
1107                    CompositionEdgeKind::Redefine,
1108                    redefine.source.as_ref(),
1109                    &redefine.schema_location,
1110                );
1111            }
1112            Err(e) => result.errors.push(e),
1113        }
1114    }
1115
1116    // Process overrides (XSD 1.1, pass chameleon namespace)
1117    #[cfg(feature = "xsd11")]
1118    for (i, override_dir) in overrides.iter().enumerate() {
1119        match resolver
1120            .load_schema_async(
1121                &override_dir.schema_location,
1122                &base_uri,
1123                schema_set,
1124                target_namespace,
1125            )
1126            .await
1127        {
1128            Ok(ref outcome) => {
1129                if let LoadOutcome::Loaded(id) | LoadOutcome::AlreadyLoaded(id) = outcome {
1130                    result.loaded.push(*id);
1131                    schema_set.documents[doc_id as usize].overrides[i].resolved_doc_id = Some(*id);
1132                } else {
1133                    result.skipped.push(override_dir.schema_location.clone());
1134                }
1135                record_edge(
1136                    schema_set,
1137                    doc_id,
1138                    outcome,
1139                    CompositionEdgeKind::Override,
1140                    override_dir.source.as_ref(),
1141                    &override_dir.schema_location,
1142                );
1143            }
1144            Err(e) => result.errors.push(e),
1145        }
1146    }
1147
1148    result
1149}
1150
1151/// Check if a URI is absolute (has a scheme)
1152fn is_absolute_uri(uri: &str) -> bool {
1153    // Check for common schemes
1154    uri.starts_with("http://")
1155        || uri.starts_with("https://")
1156        || uri.starts_with("file://")
1157        || uri.starts_with("embedded://")
1158        || (cfg!(windows) && uri.len() >= 2 && &uri[1..2] == ":")
1159        || uri.starts_with('/')
1160}
1161
1162/// Resolve a relative URI against a base URI
1163fn resolve_relative_uri(relative: &str, base: &str) -> SchemaResult<String> {
1164    // Simple implementation for file paths
1165    if base.starts_with("http://") || base.starts_with("https://") {
1166        // URL base
1167        resolve_relative_url(relative, base)
1168    } else {
1169        // File path base
1170        resolve_relative_path(relative, base)
1171    }
1172}
1173
1174/// Resolve a relative URL against a base URL
1175fn resolve_relative_url(relative: &str, base: &str) -> SchemaResult<String> {
1176    // Find the last slash in the base URL (excluding protocol slashes)
1177    let base_without_file = if let Some(pos) = base.rfind('/') {
1178        // Check if this slash is after the protocol
1179        if pos > base.find("://").map_or(0, |p| p + 2) {
1180            &base[..=pos]
1181        } else {
1182            base
1183        }
1184    } else {
1185        base
1186    };
1187
1188    Ok(format!("{}{}", base_without_file, relative))
1189}
1190
1191/// Resolve a relative file path against a base file path
1192fn resolve_relative_path(relative: &str, base: &str) -> SchemaResult<String> {
1193    let base_path = Path::new(base);
1194    let base_dir = base_path.parent().unwrap_or(Path::new("."));
1195    let resolved = base_dir.join(relative);
1196
1197    // Normalize the path
1198    let normalized = normalize_path(&resolved);
1199
1200    Ok(normalized.to_string_lossy().into_owned())
1201}
1202
1203/// Normalize a path by resolving . and .. components
1204fn normalize_path(path: &Path) -> PathBuf {
1205    let mut result = PathBuf::new();
1206
1207    for component in path.components() {
1208        match component {
1209            std::path::Component::ParentDir => {
1210                result.pop();
1211            }
1212            std::path::Component::CurDir => {
1213                // Skip current dir
1214            }
1215            _ => {
1216                result.push(component);
1217            }
1218        }
1219    }
1220
1221    result
1222}
1223
1224/// Result of resolving all directives in a schema
1225#[derive(Debug, Default)]
1226pub struct ResolutionResult {
1227    /// Document IDs of successfully loaded schemas
1228    pub loaded: Vec<DocumentId>,
1229    /// Errors from include/redefine/override directives
1230    pub errors: Vec<SchemaError>,
1231    /// Errors from xs:import directives
1232    pub import_errors: Vec<SchemaError>,
1233    /// Schemas that were already loaded (circular references)
1234    pub skipped: Vec<String>,
1235}
1236
1237impl ResolutionResult {
1238    /// Check if resolution was fully successful
1239    pub fn is_ok(&self) -> bool {
1240        self.errors.is_empty() && self.import_errors.is_empty()
1241    }
1242
1243    /// Check if any schemas were loaded
1244    pub fn has_loaded(&self) -> bool {
1245        !self.loaded.is_empty()
1246    }
1247}
1248
1249/// Record a composition edge from a [`LoadOutcome`].
1250///
1251/// Edges are always recorded. For `Cycle` outcomes, `target_doc` is `None`
1252/// and will be filled in by [`fixup_composition_edges`] after resolution.
1253fn record_edge(
1254    schema_set: &mut SchemaSet,
1255    source_doc: DocumentId,
1256    outcome: &LoadOutcome,
1257    kind: CompositionEdgeKind,
1258    source: Option<&crate::parser::location::SourceRef>,
1259    schema_location: &str,
1260) {
1261    let (target_doc, resolved_location) = match outcome {
1262        LoadOutcome::Loaded(id) | LoadOutcome::AlreadyLoaded(id) => {
1263            // The resolved URI is stored as the document's base_uri during parsing.
1264            let loc = schema_set.documents[*id as usize].base_uri.clone();
1265            (Some(*id), loc)
1266        }
1267        LoadOutcome::Cycle(resolved) => (None, resolved.clone()),
1268    };
1269    schema_set.composition_edges.push(CompositionEdge {
1270        source_doc,
1271        target_doc,
1272        resolved_location,
1273        kind,
1274        source: source.cloned(),
1275        schema_location: schema_location.to_string(),
1276    });
1277}
1278
1279/// Check the chameleon-aware loaded-location caches for a previously loaded
1280/// document.  Returns `Some(doc_id)` when the cached document is compatible
1281/// with the requested `chameleon_namespace`, `None` otherwise.
1282///
1283/// Shared by both `load_schema` (sync) and `load_schema_async`.
1284fn check_loaded_cache(
1285    schema_set: &SchemaSet,
1286    resolved: &str,
1287    chameleon_namespace: Option<NameId>,
1288) -> Option<DocumentId> {
1289    // Check chameleon-specific cache first.
1290    if let Some(ns) = chameleon_namespace {
1291        if let Some(&id) = schema_set.chameleon_cache.get(&(resolved.to_owned(), ns)) {
1292            return Some(id);
1293        }
1294    }
1295    // Then check primary cache with reusability check.
1296    if let Some(&id) = schema_set.loaded_locations.get(resolved) {
1297        let reusable = schema_set.documents.get(id as usize).is_none_or(|doc| {
1298            if doc.is_chameleon() {
1299                false
1300            } else if doc.target_namespace.is_some() {
1301                true
1302            } else {
1303                // Raw no-namespace document — reusable only when no
1304                // chameleon adoption is requested (§4.2.3).
1305                chameleon_namespace.is_none()
1306            }
1307        });
1308        if reusable {
1309            return Some(id);
1310        }
1311    }
1312    None
1313}
1314
1315/// Record a freshly loaded document in the appropriate caches.
1316///
1317/// Chameleon variants are stored in `chameleon_cache`; the primary
1318/// `loaded_locations` only gets the first entry per URI.
1319///
1320/// Shared by both `load_schema` (sync) and `load_schema_async`.
1321fn mark_loaded_chameleon_aware(
1322    schema_set: &mut SchemaSet,
1323    resolved: &str,
1324    doc_id: DocumentId,
1325    chameleon_namespace: Option<NameId>,
1326) {
1327    let doc_is_chameleon = schema_set
1328        .documents
1329        .get(doc_id as usize)
1330        .is_some_and(|doc| doc.is_chameleon());
1331    if doc_is_chameleon {
1332        if let Some(ns) = chameleon_namespace {
1333            schema_set
1334                .chameleon_cache
1335                .insert((resolved.to_owned(), ns), doc_id);
1336        }
1337    }
1338    if !schema_set.loaded_locations.contains_key(resolved) {
1339        schema_set.mark_loaded(resolved.to_owned(), doc_id);
1340    }
1341}
1342
1343/// Fixup pass: fill in `target_doc` on cycle edges whose target has since
1344/// been loaded. Call after all directive resolution rounds complete.
1345pub fn fixup_composition_edges(schema_set: &mut SchemaSet) {
1346    for edge in &mut schema_set.composition_edges {
1347        if edge.target_doc.is_none() {
1348            edge.target_doc = schema_set
1349                .loaded_locations
1350                .get(&edge.resolved_location)
1351                .copied();
1352        }
1353    }
1354}
1355
1356/// Resolve all directives in a schema document
1357pub fn resolve_all_directives(
1358    doc_id: DocumentId,
1359    resolver: &mut SchemaResolver,
1360    schema_set: &mut SchemaSet,
1361) -> ResolutionResult {
1362    let mut result = ResolutionResult::default();
1363
1364    // Get the document
1365    let doc = match schema_set.documents.get(doc_id as usize) {
1366        Some(d) => d,
1367        None => {
1368            result.errors.push(SchemaError::internal(format!(
1369                "Document {} not found",
1370                doc_id
1371            )));
1372            return result;
1373        }
1374    };
1375
1376    let base_uri = doc.base_uri.clone();
1377    let target_namespace = doc.target_namespace;
1378
1379    // Clone directives to avoid borrow issues
1380    let includes: Vec<_> = doc.includes.to_vec();
1381    let imports: Vec<_> = doc.imports.to_vec();
1382    let redefines: Vec<_> = doc.redefines.to_vec();
1383    #[cfg(feature = "xsd11")]
1384    let overrides: Vec<_> = doc.overrides.to_vec();
1385
1386    // Process includes
1387    for (i, include) in includes.iter().enumerate() {
1388        match resolver.process_include(
1389            &include.schema_location,
1390            &base_uri,
1391            target_namespace,
1392            schema_set,
1393        ) {
1394            Ok(ref outcome) => {
1395                match outcome {
1396                    LoadOutcome::Loaded(id) | LoadOutcome::AlreadyLoaded(id) => {
1397                        schema_set.documents[doc_id as usize].includes[i].resolved_doc_id =
1398                            Some(*id);
1399                        if matches!(outcome, LoadOutcome::Loaded(_)) {
1400                            result.loaded.push(*id);
1401                        }
1402                        // src-include §4.2.3 clause 2.2: when the including schema
1403                        // has no `targetNamespace`, the included schema must also
1404                        // have absent `targetNamespace`.
1405                        if target_namespace.is_none() {
1406                            let included_declared = schema_set
1407                                .documents
1408                                .get(*id as usize)
1409                                .and_then(|d| d.declared_target_namespace);
1410                            if let Some(declared) = included_declared {
1411                                let location = include
1412                                    .source
1413                                    .as_ref()
1414                                    .and_then(|s| schema_set.source_maps.locate(s));
1415                                let declared_str =
1416                                    schema_set.name_table.resolve(declared).to_string();
1417                                result.errors.push(SchemaError::structural(
1418                                    "src-include",
1419                                    format!(
1420                                        "Included schema has targetNamespace '{}' \
1421                                         but the including schema has no \
1422                                         targetNamespace",
1423                                        declared_str
1424                                    ),
1425                                    location,
1426                                ));
1427                            }
1428                        }
1429                    }
1430                    _ => {
1431                        result.skipped.push(include.schema_location.clone());
1432                    }
1433                }
1434                record_edge(
1435                    schema_set,
1436                    doc_id,
1437                    outcome,
1438                    CompositionEdgeKind::Include,
1439                    include.source.as_ref(),
1440                    &include.schema_location,
1441                );
1442            }
1443            Err(e) => result.errors.push(e),
1444        }
1445    }
1446
1447    // Process imports
1448    for (i, import) in imports.iter().enumerate() {
1449        // src-import §4.2.3 (XSD 1.0): the namespace of an `<xs:import>`
1450        // must not be the same as the enclosing schema's targetNamespace.
1451        // XSD 1.1 explicitly relaxed this — own-namespace imports are
1452        // permitted (errata, W3C bug 4126 / cleanup).
1453        if schema_set.is_xsd10() {
1454            if let Some(import_ns_str) = import.namespace.as_deref() {
1455                let tns_str = target_namespace.map(|n| schema_set.name_table.resolve(n));
1456                if Some(import_ns_str) == tns_str.as_deref() {
1457                    result.errors.push(SchemaError::structural(
1458                        "src-import",
1459                        format!(
1460                            "xs:import namespace '{}' must not equal the enclosing \
1461                             schema's targetNamespace in XSD 1.0",
1462                            import_ns_str
1463                        ),
1464                        import
1465                            .source
1466                            .as_ref()
1467                            .and_then(|s| schema_set.source_maps.locate(s)),
1468                    ));
1469                    continue;
1470                }
1471            }
1472        }
1473        // src-import §4.2.3 clause 1.2 (XSD 1.0) / §4.2.6.1 clause 1.2 (XSD
1474        // 1.1): if the `namespace` attribute is absent, the enclosing
1475        // <schema> must have a targetNamespace attribute. addB008 / addB035.
1476        if import.namespace.is_none() && target_namespace.is_none() {
1477            result.errors.push(SchemaError::structural(
1478                "src-import",
1479                "xs:import without 'namespace' requires the enclosing schema to have \
1480                 a 'targetNamespace' attribute",
1481                import
1482                    .source
1483                    .as_ref()
1484                    .and_then(|s| schema_set.source_maps.locate(s)),
1485            ));
1486            continue;
1487        }
1488        match resolver.process_import(
1489            import.namespace.as_deref(),
1490            import.schema_location.as_deref(),
1491            &base_uri,
1492            schema_set,
1493        ) {
1494            Ok(Some(ref outcome)) => {
1495                match outcome {
1496                    LoadOutcome::Loaded(id) => {
1497                        result.loaded.push(*id);
1498                        schema_set.documents[doc_id as usize].imports[i].resolved_doc_id =
1499                            Some(*id);
1500                    }
1501                    LoadOutcome::AlreadyLoaded(id) => {
1502                        // Already processed — record doc_id but don't add to loaded.
1503                        schema_set.documents[doc_id as usize].imports[i].resolved_doc_id =
1504                            Some(*id);
1505                    }
1506                    _ => {
1507                        if let Some(loc) = &import.schema_location {
1508                            result.skipped.push(loc.clone());
1509                        }
1510                    }
1511                }
1512                record_edge(
1513                    schema_set,
1514                    doc_id,
1515                    outcome,
1516                    CompositionEdgeKind::Import,
1517                    import.source.as_ref(),
1518                    import.schema_location.as_deref().unwrap_or_default(),
1519                );
1520            }
1521            Ok(None) => {
1522                // No schemaLocation and no catalog match — no edge to record
1523            }
1524            Err(e) => result.import_errors.push(e),
1525        }
1526    }
1527
1528    // Process redefines
1529    for (i, redefine) in redefines.iter().enumerate() {
1530        match resolver.process_redefine(
1531            &redefine.schema_location,
1532            &base_uri,
1533            target_namespace,
1534            schema_set,
1535        ) {
1536            Ok(ref outcome) => {
1537                match outcome {
1538                    LoadOutcome::Loaded(id) => {
1539                        result.loaded.push(*id);
1540                        schema_set.documents[doc_id as usize].redefines[i].resolved_doc_id =
1541                            Some(*id);
1542                    }
1543                    LoadOutcome::AlreadyLoaded(id) => {
1544                        schema_set.documents[doc_id as usize].redefines[i].resolved_doc_id =
1545                            Some(*id);
1546                    }
1547                    _ => {
1548                        result.skipped.push(redefine.schema_location.clone());
1549                    }
1550                }
1551                record_edge(
1552                    schema_set,
1553                    doc_id,
1554                    outcome,
1555                    CompositionEdgeKind::Redefine,
1556                    redefine.source.as_ref(),
1557                    &redefine.schema_location,
1558                );
1559            }
1560            Err(e) => result.errors.push(e),
1561        }
1562    }
1563
1564    // Process overrides (XSD 1.1)
1565    #[cfg(feature = "xsd11")]
1566    for (i, override_dir) in overrides.iter().enumerate() {
1567        match resolver.process_override(
1568            &override_dir.schema_location,
1569            &base_uri,
1570            target_namespace,
1571            schema_set,
1572        ) {
1573            Ok(ref outcome) => {
1574                match outcome {
1575                    LoadOutcome::Loaded(id) => {
1576                        result.loaded.push(*id);
1577                        schema_set.documents[doc_id as usize].overrides[i].resolved_doc_id =
1578                            Some(*id);
1579                    }
1580                    LoadOutcome::AlreadyLoaded(id) => {
1581                        schema_set.documents[doc_id as usize].overrides[i].resolved_doc_id =
1582                            Some(*id);
1583                    }
1584                    _ => {
1585                        result.skipped.push(override_dir.schema_location.clone());
1586                    }
1587                }
1588                record_edge(
1589                    schema_set,
1590                    doc_id,
1591                    outcome,
1592                    CompositionEdgeKind::Override,
1593                    override_dir.source.as_ref(),
1594                    &override_dir.schema_location,
1595                );
1596            }
1597            Err(e) => result.errors.push(e),
1598        }
1599    }
1600
1601    result
1602}
1603
1604#[cfg(test)]
1605mod tests {
1606    use super::*;
1607
1608    #[test]
1609    fn test_is_absolute_uri() {
1610        assert!(is_absolute_uri("http://example.com/schema.xsd"));
1611        assert!(is_absolute_uri("https://example.com/schema.xsd"));
1612        assert!(is_absolute_uri("/absolute/path/schema.xsd"));
1613        assert!(!is_absolute_uri("relative/path/schema.xsd"));
1614        assert!(!is_absolute_uri("../parent/schema.xsd"));
1615    }
1616
1617    #[test]
1618    fn test_resolve_relative_path() {
1619        let resolved = resolve_relative_path("types.xsd", "/home/user/schema.xsd").unwrap();
1620        assert!(resolved.contains("types.xsd"));
1621    }
1622
1623    #[test]
1624    fn test_resolve_relative_path_parent() {
1625        let resolved =
1626            resolve_relative_path("../common/types.xsd", "/home/user/schemas/main.xsd").unwrap();
1627        // Should resolve to something like /home/user/common/types.xsd
1628        assert!(resolved.contains("common"));
1629        assert!(resolved.contains("types.xsd"));
1630    }
1631
1632    #[test]
1633    fn test_resolve_relative_url() {
1634        let resolved =
1635            resolve_relative_url("types.xsd", "http://example.com/schemas/main.xsd").unwrap();
1636        assert_eq!(resolved, "http://example.com/schemas/types.xsd");
1637    }
1638
1639    #[test]
1640    fn test_catalog_lookup() {
1641        let mut catalog = SchemaCatalog::new();
1642        catalog.add("http://example.com/ns", "/path/to/schema.xsd");
1643
1644        assert_eq!(
1645            catalog.lookup("http://example.com/ns"),
1646            Some("/path/to/schema.xsd")
1647        );
1648        assert_eq!(catalog.lookup("http://other.com/ns"), None);
1649    }
1650
1651    #[test]
1652    fn test_resolver_config_default() {
1653        let config = ResolverConfig::default();
1654        assert!(!config.allow_network);
1655        assert_eq!(config.max_depth, 100);
1656    }
1657
1658    #[test]
1659    fn test_resolver_new() {
1660        let resolver = SchemaResolver::new();
1661        assert!(resolver.resolving.is_empty());
1662    }
1663
1664    #[test]
1665    fn test_normalize_path() {
1666        let path = Path::new("/home/user/../other/./schema.xsd");
1667        let normalized = normalize_path(path);
1668        assert!(!normalized.to_string_lossy().contains(".."));
1669        assert!(!normalized.to_string_lossy().contains("./"));
1670    }
1671
1672    #[test]
1673    fn test_resolution_result_default() {
1674        let result = ResolutionResult::default();
1675        assert!(result.is_ok());
1676        assert!(!result.has_loaded());
1677    }
1678
1679    #[test]
1680    fn test_catalog_xml_namespaces() {
1681        let mut catalog = SchemaCatalog::new();
1682        catalog.add_xml_catalog();
1683
1684        assert_eq!(
1685            catalog.lookup("http://www.w3.org/XML/1998/namespace"),
1686            Some("embedded://xml.xsd")
1687        );
1688        assert!(catalog
1689            .lookup("http://www.w3.org/2001/XMLSchema-instance")
1690            .is_some());
1691    }
1692
1693    #[test]
1694    fn test_embedded_loader() {
1695        let loader = EmbeddedLoader::new();
1696
1697        // Can load embedded URIs
1698        assert!(loader.can_load("embedded://xml.xsd"));
1699        assert!(!loader.can_load("/path/to/file.xsd"));
1700        assert!(!loader.can_load("http://example.com/schema.xsd"));
1701
1702        // Load xml.xsd
1703        let content = loader.load("embedded://xml.xsd").unwrap();
1704        assert!(content.contains("targetNamespace=\"http://www.w3.org/XML/1998/namespace\""));
1705
1706        // Unknown embedded schema
1707        assert!(loader.load("embedded://unknown.xsd").is_err());
1708    }
1709
1710    #[test]
1711    fn test_file_system_loader() {
1712        let loader = FileSystemLoader::new();
1713
1714        // Can load file paths, not embedded or HTTP
1715        assert!(loader.can_load("/path/to/file.xsd"));
1716        assert!(loader.can_load("relative/path.xsd"));
1717        assert!(!loader.can_load("embedded://xml.xsd"));
1718        assert!(!loader.can_load("http://example.com/schema.xsd"));
1719        assert!(!loader.can_load("https://example.com/schema.xsd"));
1720    }
1721
1722    #[test]
1723    fn test_loader_chain() {
1724        let chain = LoaderChain::with_defaults();
1725
1726        // Can load both embedded and file paths
1727        assert!(chain.can_load("embedded://xml.xsd"));
1728        assert!(chain.can_load("/path/to/file.xsd"));
1729
1730        // Load embedded schema through chain
1731        let content = chain.load("embedded://xml.xsd").unwrap();
1732        assert!(content.contains("http://www.w3.org/XML/1998/namespace"));
1733
1734        // Chain has expected number of loaders
1735        assert_eq!(chain.len(), 2);
1736    }
1737
1738    #[test]
1739    fn test_loader_chain_priority() {
1740        let mut chain = LoaderChain::new();
1741        chain.add(Box::new(FileSystemLoader::new())); // priority 0
1742        chain.add(Box::new(EmbeddedLoader::new())); // priority 100
1743
1744        // EmbeddedLoader should be first due to higher priority
1745        assert_eq!(chain.priority(), 100);
1746    }
1747
1748    #[test]
1749    fn test_resolver_with_embedded_loader() {
1750        let resolver = SchemaResolver::new();
1751
1752        // Load embedded xml.xsd
1753        let content = resolver.load_content("embedded://xml.xsd").unwrap();
1754        assert!(content.contains("http://www.w3.org/XML/1998/namespace"));
1755    }
1756
1757    #[test]
1758    fn test_composition_edges_recorded() {
1759        use crate::parser::parse::parse_schema;
1760        use crate::schema::composition::CompositionEdgeKind;
1761        use crate::schema::SchemaSet;
1762
1763        let tmp = std::env::temp_dir().join("xsd_test_composition_edges");
1764        std::fs::create_dir_all(&tmp).unwrap();
1765
1766        // Base schema with a simple type
1767        let base_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
1768<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1769    <xs:simpleType name="MyString">
1770        <xs:restriction base="xs:string"/>
1771    </xs:simpleType>
1772</xs:schema>"#;
1773        let base_path = tmp.join("comp_base.xsd");
1774        std::fs::write(&base_path, base_xsd).unwrap();
1775
1776        // Main schema with include + redefine
1777        let main_xsd = format!(
1778            r#"<?xml version="1.0" encoding="UTF-8"?>
1779<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1780    <xs:include schemaLocation="{loc}"/>
1781    <xs:redefine schemaLocation="{loc}">
1782        <xs:simpleType name="MyString">
1783            <xs:restriction base="MyString">
1784                <xs:maxLength value="50"/>
1785            </xs:restriction>
1786        </xs:simpleType>
1787    </xs:redefine>
1788</xs:schema>"#,
1789            loc = base_path.to_string_lossy()
1790        );
1791
1792        let mut schema_set = SchemaSet::new();
1793        let main_path = tmp.join("comp_main.xsd").to_string_lossy().to_string();
1794        let doc_id = parse_schema(main_xsd.as_bytes(), &main_path, &mut schema_set).unwrap();
1795
1796        let mut resolver = SchemaResolver::new();
1797        let result = resolve_all_directives(doc_id, &mut resolver, &mut schema_set);
1798        assert!(result.is_ok(), "Resolution should succeed");
1799
1800        // Should have edges for include and redefine
1801        let edges = &schema_set.composition_edges;
1802        assert!(
1803            edges.len() >= 2,
1804            "Expected at least 2 edges, got {}",
1805            edges.len()
1806        );
1807
1808        let include_edges: Vec<_> = edges
1809            .iter()
1810            .filter(|e| e.kind == CompositionEdgeKind::Include)
1811            .collect();
1812        assert!(!include_edges.is_empty(), "Should have an include edge");
1813        assert_eq!(include_edges[0].source_doc, doc_id);
1814
1815        let redefine_edges: Vec<_> = edges
1816            .iter()
1817            .filter(|e| e.kind == CompositionEdgeKind::Redefine)
1818            .collect();
1819        assert!(!redefine_edges.is_empty(), "Should have a redefine edge");
1820        assert_eq!(redefine_edges[0].source_doc, doc_id);
1821
1822        // Both edges should point to the same target document
1823        assert!(include_edges[0].target_doc.is_some());
1824        assert_eq!(include_edges[0].target_doc, redefine_edges[0].target_doc);
1825
1826        let _ = std::fs::remove_dir_all(&tmp);
1827    }
1828
1829    #[test]
1830    fn test_composition_edges_cycle() {
1831        use crate::parser::parse::parse_schema;
1832        use crate::schema::composition::CompositionEdgeKind;
1833        use crate::schema::SchemaSet;
1834
1835        let tmp = std::env::temp_dir().join("xsd_test_composition_cycle");
1836        std::fs::create_dir_all(&tmp).unwrap();
1837
1838        let a_path = tmp.join("cycle_a.xsd");
1839        let b_path = tmp.join("cycle_b.xsd");
1840
1841        // a.xsd includes b.xsd
1842        let a_xsd = format!(
1843            r#"<?xml version="1.0" encoding="UTF-8"?>
1844<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1845    <xs:include schemaLocation="{}"/>
1846    <xs:element name="A" type="xs:string"/>
1847</xs:schema>"#,
1848            b_path.to_string_lossy()
1849        );
1850
1851        // b.xsd includes a.xsd (creates cycle)
1852        let b_xsd = format!(
1853            r#"<?xml version="1.0" encoding="UTF-8"?>
1854<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1855    <xs:include schemaLocation="{}"/>
1856    <xs:element name="B" type="xs:string"/>
1857</xs:schema>"#,
1858            a_path.to_string_lossy()
1859        );
1860
1861        std::fs::write(&a_path, &a_xsd).unwrap();
1862        std::fs::write(&b_path, &b_xsd).unwrap();
1863
1864        let mut schema_set = SchemaSet::new();
1865        let a_uri = a_path.to_string_lossy().to_string();
1866        let a_doc_id = parse_schema(
1867            std::fs::read_to_string(&a_path).unwrap().as_bytes(),
1868            &a_uri,
1869            &mut schema_set,
1870        )
1871        .unwrap();
1872
1873        // Mark a.xsd as loaded so cycle detection works
1874        schema_set.mark_loaded(a_uri, a_doc_id);
1875
1876        let mut resolver = SchemaResolver::new();
1877
1878        // First resolution: a.xsd's directives (loads b.xsd)
1879        let result_a = resolve_all_directives(a_doc_id, &mut resolver, &mut schema_set);
1880        assert!(result_a.is_ok(), "Resolution of a.xsd should succeed");
1881        assert_eq!(result_a.loaded.len(), 1, "Should have loaded b.xsd");
1882
1883        let b_doc_id = result_a.loaded[0];
1884
1885        // Second resolution: b.xsd's directives (a.xsd already loaded)
1886        let result_b = resolve_all_directives(b_doc_id, &mut resolver, &mut schema_set);
1887        assert!(result_b.is_ok(), "Resolution of b.xsd should succeed");
1888
1889        // Should have edges for both directions
1890        let edges = &schema_set.composition_edges;
1891
1892        // a→b edge (from first resolution, Loaded branch)
1893        let a_to_b: Vec<_> = edges
1894            .iter()
1895            .filter(|e| e.source_doc == a_doc_id && e.target_doc == Some(b_doc_id))
1896            .collect();
1897        assert_eq!(a_to_b.len(), 1, "Should have exactly one a→b edge");
1898        assert_eq!(a_to_b[0].kind, CompositionEdgeKind::Include);
1899
1900        // b→a edge (from second resolution, AlreadyLoaded branch)
1901        let b_to_a: Vec<_> = edges
1902            .iter()
1903            .filter(|e| e.source_doc == b_doc_id && e.target_doc == Some(a_doc_id))
1904            .collect();
1905        assert_eq!(b_to_a.len(), 1, "Should have exactly one b→a edge");
1906        assert_eq!(b_to_a[0].kind, CompositionEdgeKind::Include);
1907
1908        let _ = std::fs::remove_dir_all(&tmp);
1909    }
1910
1911    #[test]
1912    fn test_resolved_doc_id_populated() {
1913        use crate::parser::parse::parse_schema;
1914        use crate::schema::SchemaSet;
1915
1916        let tmp = std::env::temp_dir().join("xsd_test_resolved_doc_id");
1917        std::fs::create_dir_all(&tmp).unwrap();
1918
1919        // Base schema with a simple type
1920        let base_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
1921<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1922    <xs:simpleType name="MyString">
1923        <xs:restriction base="xs:string"/>
1924    </xs:simpleType>
1925</xs:schema>"#;
1926        let base_path = tmp.join("base.xsd");
1927        std::fs::write(&base_path, base_xsd).unwrap();
1928
1929        // Main schema that includes and redefines the base
1930        let main_xsd = format!(
1931            r#"<?xml version="1.0" encoding="UTF-8"?>
1932<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1933    <xs:include schemaLocation="{loc}"/>
1934    <xs:redefine schemaLocation="{loc}">
1935        <xs:simpleType name="MyString">
1936            <xs:restriction base="MyString">
1937                <xs:maxLength value="50"/>
1938            </xs:restriction>
1939        </xs:simpleType>
1940    </xs:redefine>
1941</xs:schema>"#,
1942            loc = base_path.to_string_lossy()
1943        );
1944
1945        let mut schema_set = SchemaSet::new();
1946        let main_path = tmp.join("main.xsd").to_string_lossy().to_string();
1947        let doc_id = parse_schema(main_xsd.as_bytes(), &main_path, &mut schema_set).unwrap();
1948
1949        let mut resolver = SchemaResolver::new();
1950        let result = resolve_all_directives(doc_id, &mut resolver, &mut schema_set);
1951        assert!(result.is_ok(), "Resolution should succeed");
1952
1953        let doc = &schema_set.documents[doc_id as usize];
1954        assert!(
1955            doc.includes[0].resolved_doc_id.is_some(),
1956            "Include should have resolved_doc_id"
1957        );
1958        assert!(
1959            doc.redefines[0].resolved_doc_id.is_some(),
1960            "Redefine should have resolved_doc_id"
1961        );
1962
1963        let _ = std::fs::remove_dir_all(&tmp);
1964    }
1965
1966    #[test]
1967    fn test_document_component_index_populated() {
1968        use crate::parser::parse::parse_schema;
1969        use crate::schema::SchemaSet;
1970
1971        let xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
1972<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1973    <xs:simpleType name="MyString">
1974        <xs:restriction base="xs:string"/>
1975    </xs:simpleType>
1976    <xs:element name="root" type="MyString"/>
1977</xs:schema>"#;
1978
1979        let mut schema_set = SchemaSet::new();
1980        let doc_id = parse_schema(xsd.as_bytes(), "test.xsd", &mut schema_set).unwrap();
1981
1982        let doc = &schema_set.documents[doc_id as usize];
1983        assert!(
1984            !doc.component_index.is_empty(),
1985            "Component index should be populated"
1986        );
1987
1988        // Should find the simple type
1989        assert!(
1990            doc.component_index
1991                .lookup_type(None, schema_set.name_table.get("MyString").unwrap())
1992                .is_some(),
1993            "Should find MyString type in document component index"
1994        );
1995
1996        // Should find the element
1997        assert!(
1998            doc.component_index
1999                .lookup_element(None, schema_set.name_table.get("root").unwrap())
2000                .is_some(),
2001            "Should find root element in document component index"
2002        );
2003
2004        // Should NOT find a non-existent component
2005        assert!(
2006            doc.component_index
2007                .lookup_type(None, schema_set.name_table.get("root").unwrap())
2008                .is_none(),
2009            "Should not find 'root' as a type"
2010        );
2011    }
2012
2013    #[test]
2014    fn test_redefine_uses_document_scoped_lookup() {
2015        use crate::parser::parse::parse_schema;
2016        use crate::schema::SchemaSet;
2017
2018        let tmp = std::env::temp_dir().join("xsd_test_redefine_doc_scoped");
2019        std::fs::create_dir_all(&tmp).unwrap();
2020
2021        // Base schema with a simple type
2022        let base_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
2023<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
2024    <xs:simpleType name="MyString">
2025        <xs:restriction base="xs:string"/>
2026    </xs:simpleType>
2027</xs:schema>"#;
2028        let base_path = tmp.join("redef_base.xsd");
2029        std::fs::write(&base_path, base_xsd).unwrap();
2030
2031        // Main schema that redefines the base type
2032        let main_xsd = format!(
2033            r#"<?xml version="1.0" encoding="UTF-8"?>
2034<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
2035    <xs:redefine schemaLocation="{loc}">
2036        <xs:simpleType name="MyString">
2037            <xs:restriction base="MyString">
2038                <xs:maxLength value="50"/>
2039            </xs:restriction>
2040        </xs:simpleType>
2041    </xs:redefine>
2042</xs:schema>"#,
2043            loc = base_path.to_string_lossy()
2044        );
2045
2046        let mut schema_set = SchemaSet::new();
2047        let main_path = tmp.join("redef_main.xsd").to_string_lossy().to_string();
2048        let doc_id = parse_schema(main_xsd.as_bytes(), &main_path, &mut schema_set).unwrap();
2049
2050        // Resolve directives (loads base.xsd, populates resolved_doc_id)
2051        let mut resolver = SchemaResolver::new();
2052        let result = resolve_all_directives(doc_id, &mut resolver, &mut schema_set);
2053        assert!(result.is_ok(), "Resolution should succeed");
2054
2055        let main_doc = &schema_set.documents[doc_id as usize];
2056        let target_doc_id = main_doc.redefines[0].resolved_doc_id;
2057        assert!(
2058            target_doc_id.is_some(),
2059            "Redefine should have resolved_doc_id"
2060        );
2061
2062        // Verify the target document's component index has MyString
2063        let target_doc = &schema_set.documents[target_doc_id.unwrap() as usize];
2064        let my_string_name = schema_set.name_table.get("MyString").unwrap();
2065        assert!(
2066            target_doc
2067                .component_index
2068                .lookup_type(None, my_string_name)
2069                .is_some(),
2070            "Target document should have MyString in component index"
2071        );
2072
2073        // Apply redefine — should succeed using document-scoped lookup
2074        crate::schema::apply_redefine_override(&mut schema_set).unwrap();
2075
2076        // Verify the namespace table now has the redefined type
2077        let type_key = schema_set.lookup_type(None, my_string_name);
2078        assert!(
2079            type_key.is_some(),
2080            "MyString should still be in namespace table after redefine"
2081        );
2082
2083        let _ = std::fs::remove_dir_all(&tmp);
2084    }
2085
2086    #[test]
2087    fn test_effective_components_provenance_populated() {
2088        use crate::parser::parse::parse_schema;
2089        use crate::schema::composition::CompositionAction;
2090        use crate::schema::SchemaSet;
2091
2092        let tmp = std::env::temp_dir().join("xsd_test_provenance");
2093        std::fs::create_dir_all(&tmp).unwrap();
2094
2095        let base_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
2096<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
2097    <xs:simpleType name="MyStr">
2098        <xs:restriction base="xs:string"/>
2099    </xs:simpleType>
2100    <xs:element name="root" type="MyStr"/>
2101</xs:schema>"#;
2102        let base_path = tmp.join("prov_base.xsd");
2103        std::fs::write(&base_path, base_xsd).unwrap();
2104
2105        let main_xsd = format!(
2106            r#"<?xml version="1.0" encoding="UTF-8"?>
2107<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
2108    <xs:redefine schemaLocation="{loc}">
2109        <xs:simpleType name="MyStr">
2110            <xs:restriction base="MyStr">
2111                <xs:maxLength value="50"/>
2112            </xs:restriction>
2113        </xs:simpleType>
2114    </xs:redefine>
2115</xs:schema>"#,
2116            loc = base_path.to_string_lossy()
2117        );
2118
2119        let mut schema_set = SchemaSet::new();
2120        let main_path = tmp.join("prov_main.xsd").to_string_lossy().to_string();
2121        let doc_id = parse_schema(main_xsd.as_bytes(), &main_path, &mut schema_set).unwrap();
2122
2123        let mut resolver = SchemaResolver::new();
2124        let result = resolve_all_directives(doc_id, &mut resolver, &mut schema_set);
2125        assert!(result.is_ok());
2126
2127        // Apply composition — builds effective components
2128        crate::schema::apply_redefine_override(&mut schema_set).unwrap();
2129
2130        assert!(
2131            !schema_set.effective_components.is_empty(),
2132            "Effective components should be populated after composition"
2133        );
2134
2135        // The redefined component (MyStr) should have Redefined action,
2136        // NOT a separate Declared entry — redefine replaces the declared entry.
2137        let my_str_name = schema_set.name_table.get("MyStr").unwrap();
2138        let my_str_identity = crate::schema::composition::ComponentIdentity {
2139            kind: crate::schema::composition::ComponentKind::SimpleType,
2140            name: my_str_name,
2141            namespace: None,
2142        };
2143        let my_str_eff = schema_set.effective_components.get(&my_str_identity);
2144        assert!(
2145            my_str_eff.is_some(),
2146            "MyStr should be in effective components"
2147        );
2148        let my_str_eff = my_str_eff.unwrap();
2149        assert!(
2150            matches!(my_str_eff.action, CompositionAction::Redefined { .. }),
2151            "MyStr should have Redefined action, not Declared"
2152        );
2153        // origin should point at the redefining document (main), not the target
2154        assert_eq!(
2155            my_str_eff.origin.owner_doc,
2156            Some(doc_id),
2157            "Redefined component origin should be the redefining document"
2158        );
2159
2160        // The other component (root element) from base.xsd should still be Declared
2161        let declared_count = schema_set
2162            .effective_components
2163            .values()
2164            .filter(|c| matches!(c.action, CompositionAction::Declared))
2165            .count();
2166        assert!(
2167            declared_count > 0,
2168            "Should have declared components for non-redefined items"
2169        );
2170
2171        let _ = std::fs::remove_dir_all(&tmp);
2172    }
2173
2174    /// When resolved_doc_id is Some but the target document does NOT declare
2175    /// the component, redefine must fail — it must not fall back to a
2176    /// same-name component from another document in the global namespace table.
2177    #[test]
2178    fn test_redefine_no_fallback_to_global_when_scoped() {
2179        use crate::parser::parse::parse_schema;
2180        use crate::schema::model::RedefineDirective;
2181        use crate::schema::redefine::apply_redefine;
2182        use crate::schema::SchemaSet;
2183
2184        let tmp = std::env::temp_dir().join("xsd_test_redefine_no_fallback");
2185        std::fs::create_dir_all(&tmp).unwrap();
2186
2187        // doc_a.xsd declares MyType (simple type)
2188        let doc_a_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
2189<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
2190    <xs:simpleType name="MyType">
2191        <xs:restriction base="xs:string"/>
2192    </xs:simpleType>
2193</xs:schema>"#;
2194        let doc_a_path = tmp.join("no_fallback_a.xsd");
2195        std::fs::write(&doc_a_path, doc_a_xsd).unwrap();
2196
2197        // doc_b.xsd declares a DIFFERENT type (not MyType)
2198        let doc_b_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
2199<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
2200    <xs:simpleType name="OtherType">
2201        <xs:restriction base="xs:string"/>
2202    </xs:simpleType>
2203</xs:schema>"#;
2204        let doc_b_path = tmp.join("no_fallback_b.xsd");
2205        std::fs::write(&doc_b_path, doc_b_xsd).unwrap();
2206
2207        // Parse both documents
2208        let mut schema_set = SchemaSet::new();
2209        let _doc_a_id = parse_schema(
2210            std::fs::read_to_string(&doc_a_path).unwrap().as_bytes(),
2211            &doc_a_path.to_string_lossy(),
2212            &mut schema_set,
2213        )
2214        .unwrap();
2215        let doc_b_id = parse_schema(
2216            std::fs::read_to_string(&doc_b_path).unwrap().as_bytes(),
2217            &doc_b_path.to_string_lossy(),
2218            &mut schema_set,
2219        )
2220        .unwrap();
2221
2222        // MyType IS in global namespace table (from doc_a)
2223        let my_type_name = schema_set.name_table.get("MyType").unwrap();
2224        assert!(
2225            schema_set.lookup_type(None, my_type_name).is_some(),
2226            "MyType should be in global namespace table from doc_a"
2227        );
2228
2229        // Create a fake redefine that points resolved_doc_id at doc_b
2230        // (which does NOT declare MyType). The redefine's replacement type
2231        // needs to exist in the arena with the right name.
2232        let redef_key = schema_set
2233            .arenas
2234            .alloc_simple_type(crate::arenas::SimpleTypeDefData {
2235                name: Some(my_type_name),
2236                target_namespace: None,
2237                variety: crate::parser::frames::SimpleTypeVariety::Atomic,
2238                base_type: Some(crate::parser::frames::TypeRefResult::QName(
2239                    crate::parser::frames::QNameRef {
2240                        namespace: None,
2241                        local_name: my_type_name,
2242                        prefix: None,
2243                    },
2244                )),
2245                item_type: None,
2246                member_types: Vec::new(),
2247                facets: Default::default(),
2248                final_derivation: crate::schema::model::DerivationSet::empty(),
2249                id: None,
2250                derivation_id: None,
2251                annotation: None,
2252                source: None,
2253                resolved_base_type: None,
2254                resolved_item_type: None,
2255                resolved_member_types: Vec::new(),
2256                redefine_original: None,
2257                deferred_item_type_error: None,
2258            });
2259
2260        let redefine = RedefineDirective {
2261            source: None,
2262            schema_location: doc_b_path.to_string_lossy().to_string(),
2263            resolved_doc_id: Some(doc_b_id), // points at doc_b, which has no MyType
2264            simple_types: vec![redef_key],
2265            complex_types: Vec::new(),
2266            groups: Vec::new(),
2267            attribute_groups: Vec::new(),
2268        };
2269
2270        // This MUST fail: doc_b does not declare MyType, and the lookup
2271        // must not fall back to the global table where doc_a's MyType lives.
2272        let result = apply_redefine(&mut schema_set, &redefine);
2273        assert!(
2274            result.is_err(),
2275            "Redefine should fail when target document lacks the component (no global fallback)"
2276        );
2277
2278        let _ = std::fs::remove_dir_all(&tmp);
2279    }
2280
2281    /// When the target document has a complex type named "Foo" but the
2282    /// redefine is for a simple type named "Foo", it must not match —
2283    /// kind-sensitive lookup must reject the cross-kind match.
2284    #[test]
2285    fn test_redefine_simple_vs_complex_kind_mismatch() {
2286        use crate::parser::parse::parse_schema;
2287        use crate::schema::model::RedefineDirective;
2288        use crate::schema::redefine::apply_redefine;
2289        use crate::schema::SchemaSet;
2290
2291        let tmp = std::env::temp_dir().join("xsd_test_redefine_kind_mismatch");
2292        std::fs::create_dir_all(&tmp).unwrap();
2293
2294        // target.xsd declares Foo as a COMPLEX type
2295        let target_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
2296<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
2297    <xs:complexType name="Foo">
2298        <xs:sequence>
2299            <xs:element name="bar" type="xs:string"/>
2300        </xs:sequence>
2301    </xs:complexType>
2302</xs:schema>"#;
2303        let target_path = tmp.join("kind_target.xsd");
2304        std::fs::write(&target_path, target_xsd).unwrap();
2305
2306        let mut schema_set = SchemaSet::new();
2307        let target_id = parse_schema(
2308            std::fs::read_to_string(&target_path).unwrap().as_bytes(),
2309            &target_path.to_string_lossy(),
2310            &mut schema_set,
2311        )
2312        .unwrap();
2313
2314        let foo_name = schema_set.name_table.get("Foo").unwrap();
2315
2316        // Verify target doc has Foo as complex type, NOT simple type
2317        let target_doc = &schema_set.documents[target_id as usize];
2318        assert!(
2319            target_doc
2320                .component_index
2321                .lookup_complex_type(None, foo_name)
2322                .is_some(),
2323            "Target should have Foo as complex type"
2324        );
2325        assert!(
2326            target_doc
2327                .component_index
2328                .lookup_simple_type(None, foo_name)
2329                .is_none(),
2330            "Target should NOT have Foo as simple type"
2331        );
2332
2333        // Create a simple type redefine for "Foo" pointing at target doc
2334        let redef_key = schema_set
2335            .arenas
2336            .alloc_simple_type(crate::arenas::SimpleTypeDefData {
2337                name: Some(foo_name),
2338                target_namespace: None,
2339                variety: crate::parser::frames::SimpleTypeVariety::Atomic,
2340                base_type: Some(crate::parser::frames::TypeRefResult::QName(
2341                    crate::parser::frames::QNameRef {
2342                        namespace: None,
2343                        local_name: foo_name,
2344                        prefix: None,
2345                    },
2346                )),
2347                item_type: None,
2348                member_types: Vec::new(),
2349                facets: Default::default(),
2350                final_derivation: crate::schema::model::DerivationSet::empty(),
2351                id: None,
2352                derivation_id: None,
2353                annotation: None,
2354                source: None,
2355                resolved_base_type: None,
2356                resolved_item_type: None,
2357                resolved_member_types: Vec::new(),
2358                redefine_original: None,
2359                deferred_item_type_error: None,
2360            });
2361
2362        let redefine = RedefineDirective {
2363            source: None,
2364            schema_location: target_path.to_string_lossy().to_string(),
2365            resolved_doc_id: Some(target_id),
2366            simple_types: vec![redef_key],
2367            complex_types: Vec::new(),
2368            groups: Vec::new(),
2369            attribute_groups: Vec::new(),
2370        };
2371
2372        // Must fail: target has complex type "Foo", not simple type "Foo"
2373        let result = apply_redefine(&mut schema_set, &redefine);
2374        assert!(
2375            result.is_err(),
2376            "Simple type redefine must not match a same-name complex type in target document"
2377        );
2378
2379        let _ = std::fs::remove_dir_all(&tmp);
2380    }
2381
2382    /// Chameleon include: a no-namespace schema included by a namespace-bearing
2383    /// schema should adopt the includer's targetNamespace (§4.2.3 clause 2.3).
2384    #[test]
2385    fn test_chameleon_include_adopts_namespace() {
2386        use crate::parser::parse::parse_schema;
2387        use crate::schema::SchemaSet;
2388
2389        let tmp = std::env::temp_dir().join("xsd_test_chameleon_include");
2390        std::fs::create_dir_all(&tmp).unwrap();
2391
2392        // chameleon.xsd: no targetNamespace — declares MyType
2393        let chameleon_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
2394<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
2395    <xs:simpleType name="MyType">
2396        <xs:restriction base="xs:string"/>
2397    </xs:simpleType>
2398</xs:schema>"#;
2399        let chameleon_path = tmp.join("chameleon.xsd");
2400        std::fs::write(&chameleon_path, chameleon_xsd).unwrap();
2401
2402        // main.xsd: has targetNamespace, includes chameleon.xsd
2403        let main_xsd = format!(
2404            r#"<?xml version="1.0" encoding="UTF-8"?>
2405<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
2406           targetNamespace="http://example.com/main">
2407    <xs:include schemaLocation="{}"/>
2408    <xs:element name="root" type="tns:MyType" xmlns:tns="http://example.com/main"/>
2409</xs:schema>"#,
2410            chameleon_path.to_string_lossy()
2411        );
2412
2413        let mut schema_set = SchemaSet::new();
2414        let main_path = tmp.join("main.xsd").to_string_lossy().to_string();
2415        let doc_id = parse_schema(main_xsd.as_bytes(), &main_path, &mut schema_set).unwrap();
2416
2417        // Resolve directives — this triggers chameleon namespace adoption
2418        let mut resolver = SchemaResolver::new();
2419        let result = resolve_all_directives(doc_id, &mut resolver, &mut schema_set);
2420        assert!(result.is_ok(), "Resolution should succeed");
2421        assert!(
2422            !result.loaded.is_empty(),
2423            "Should have loaded chameleon.xsd"
2424        );
2425
2426        // The chameleon document should have adopted the includer's namespace
2427        let chameleon_doc_id = result.loaded[0];
2428        let chameleon_doc = &schema_set.documents[chameleon_doc_id as usize];
2429        let main_ns = schema_set
2430            .name_table
2431            .get("http://example.com/main")
2432            .unwrap();
2433        assert_eq!(
2434            chameleon_doc.target_namespace,
2435            Some(main_ns),
2436            "Chameleon document should adopt includer's targetNamespace"
2437        );
2438
2439        // MyType should be registered in the main namespace, not no-namespace
2440        let my_type_name = schema_set.name_table.get("MyType").unwrap();
2441        assert!(
2442            schema_set
2443                .lookup_type(Some(main_ns), my_type_name)
2444                .is_some(),
2445            "MyType should be in the includer's namespace after chameleon adoption"
2446        );
2447        assert!(
2448            schema_set.lookup_type(None, my_type_name).is_none(),
2449            "MyType should NOT be in no-namespace after chameleon adoption"
2450        );
2451
2452        let _ = std::fs::remove_dir_all(&tmp);
2453    }
2454
2455    /// Chameleon redefine: a no-namespace schema redefined by a namespace-bearing
2456    /// schema should adopt the redefiner's targetNamespace (§4.2.4).
2457    #[test]
2458    fn test_chameleon_redefine_adopts_namespace() {
2459        use crate::parser::parse::parse_schema;
2460        use crate::schema::SchemaSet;
2461
2462        let tmp = std::env::temp_dir().join("xsd_test_chameleon_redefine");
2463        std::fs::create_dir_all(&tmp).unwrap();
2464
2465        // chameleon.xsd: no targetNamespace
2466        let chameleon_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
2467<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
2468    <xs:simpleType name="MyStr">
2469        <xs:restriction base="xs:string"/>
2470    </xs:simpleType>
2471</xs:schema>"#;
2472        let chameleon_path = tmp.join("cham_redef.xsd");
2473        std::fs::write(&chameleon_path, chameleon_xsd).unwrap();
2474
2475        // main.xsd: has targetNamespace, redefines from chameleon.xsd
2476        let main_xsd = format!(
2477            r#"<?xml version="1.0" encoding="UTF-8"?>
2478<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
2479           targetNamespace="http://example.com/ns">
2480    <xs:redefine schemaLocation="{}">
2481        <xs:simpleType name="MyStr">
2482            <xs:restriction base="MyStr">
2483                <xs:maxLength value="50"/>
2484            </xs:restriction>
2485        </xs:simpleType>
2486    </xs:redefine>
2487</xs:schema>"#,
2488            chameleon_path.to_string_lossy()
2489        );
2490
2491        let mut schema_set = SchemaSet::new();
2492        let main_path = tmp.join("cham_main.xsd").to_string_lossy().to_string();
2493        let doc_id = parse_schema(main_xsd.as_bytes(), &main_path, &mut schema_set).unwrap();
2494
2495        let mut resolver = SchemaResolver::new();
2496        let result = resolve_all_directives(doc_id, &mut resolver, &mut schema_set);
2497        assert!(result.is_ok(), "Resolution should succeed");
2498
2499        // The chameleon document should have adopted the namespace
2500        let chameleon_doc_id = result.loaded[0];
2501        let chameleon_doc = &schema_set.documents[chameleon_doc_id as usize];
2502        let ns = schema_set.name_table.get("http://example.com/ns").unwrap();
2503        assert_eq!(
2504            chameleon_doc.target_namespace,
2505            Some(ns),
2506            "Chameleon redefine target should adopt redefiner's namespace"
2507        );
2508
2509        let _ = std::fs::remove_dir_all(&tmp);
2510    }
2511
2512    /// A no-namespace (chameleon) schema included from two schemas with
2513    /// different target namespaces must produce two separate document views,
2514    /// each adopting the includer's namespace.  Previously the second
2515    /// include returned the first document's ID unchanged (§4.2.3 violation).
2516    #[test]
2517    fn test_chameleon_multi_namespace_creates_separate_views() {
2518        use crate::parser::parse::parse_schema;
2519        use crate::schema::SchemaSet;
2520
2521        let tmp = std::env::temp_dir().join("xsd_test_chameleon_multi_ns");
2522        std::fs::create_dir_all(&tmp).unwrap();
2523
2524        // chameleon.xsd: no targetNamespace
2525        let chameleon_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
2526<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
2527    <xs:simpleType name="SharedType">
2528        <xs:restriction base="xs:string"/>
2529    </xs:simpleType>
2530</xs:schema>"#;
2531        let chameleon_path = tmp.join("multi_ns_chameleon.xsd");
2532        std::fs::write(&chameleon_path, chameleon_xsd).unwrap();
2533
2534        // ns_a.xsd: targetNamespace="urn:a", includes chameleon
2535        let ns_a_xsd = format!(
2536            r#"<?xml version="1.0" encoding="UTF-8"?>
2537<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
2538           targetNamespace="urn:a">
2539    <xs:include schemaLocation="{}"/>
2540</xs:schema>"#,
2541            chameleon_path.to_string_lossy()
2542        );
2543        let ns_a_path = tmp.join("multi_ns_a.xsd");
2544        std::fs::write(&ns_a_path, &ns_a_xsd).unwrap();
2545
2546        // ns_b.xsd: targetNamespace="urn:b", includes same chameleon
2547        let ns_b_xsd = format!(
2548            r#"<?xml version="1.0" encoding="UTF-8"?>
2549<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
2550           targetNamespace="urn:b">
2551    <xs:include schemaLocation="{}"/>
2552</xs:schema>"#,
2553            chameleon_path.to_string_lossy()
2554        );
2555        let ns_b_path = tmp.join("multi_ns_b.xsd");
2556        std::fs::write(&ns_b_path, &ns_b_xsd).unwrap();
2557
2558        let mut schema_set = SchemaSet::new();
2559
2560        // Parse ns_a and resolve its include (loads chameleon as urn:a)
2561        let ns_a_uri = ns_a_path.to_string_lossy().to_string();
2562        let doc_a = parse_schema(
2563            std::fs::read_to_string(&ns_a_path).unwrap().as_bytes(),
2564            &ns_a_uri,
2565            &mut schema_set,
2566        )
2567        .unwrap();
2568        let mut resolver = SchemaResolver::new();
2569        let res_a = resolve_all_directives(doc_a, &mut resolver, &mut schema_set);
2570        assert!(res_a.is_ok(), "ns_a resolution should succeed");
2571        let chameleon_a_id = res_a.loaded[0];
2572
2573        // Parse ns_b and resolve its include (must re-parse chameleon as urn:b)
2574        let ns_b_uri = ns_b_path.to_string_lossy().to_string();
2575        let doc_b = parse_schema(
2576            std::fs::read_to_string(&ns_b_path).unwrap().as_bytes(),
2577            &ns_b_uri,
2578            &mut schema_set,
2579        )
2580        .unwrap();
2581        let res_b = resolve_all_directives(doc_b, &mut resolver, &mut schema_set);
2582        assert!(res_b.is_ok(), "ns_b resolution should succeed");
2583        let chameleon_b_id = res_b.loaded[0];
2584
2585        // The two chameleon loads must produce DIFFERENT document IDs
2586        assert_ne!(
2587            chameleon_a_id, chameleon_b_id,
2588            "Chameleon schema included from different namespaces must produce separate documents"
2589        );
2590
2591        // Each must adopt its includer's namespace
2592        let ns_a_name = schema_set.name_table.get("urn:a").unwrap();
2593        let ns_b_name = schema_set.name_table.get("urn:b").unwrap();
2594        assert_eq!(
2595            schema_set.documents[chameleon_a_id as usize].target_namespace,
2596            Some(ns_a_name),
2597            "First chameleon copy should have urn:a namespace"
2598        );
2599        assert_eq!(
2600            schema_set.documents[chameleon_b_id as usize].target_namespace,
2601            Some(ns_b_name),
2602            "Second chameleon copy should have urn:b namespace"
2603        );
2604
2605        // Both should be flagged as chameleon
2606        assert!(schema_set.documents[chameleon_a_id as usize].is_chameleon());
2607        assert!(schema_set.documents[chameleon_b_id as usize].is_chameleon());
2608
2609        let _ = std::fs::remove_dir_all(&tmp);
2610    }
2611
2612    /// A no-namespace schema first loaded without chameleon adoption (from a
2613    /// no-namespace context) must NOT be reused when a later include requests
2614    /// chameleon adoption into a namespace.  schema(chameleon(tns,D2)) ≠
2615    /// schema(D2) per §4.2.3.
2616    #[test]
2617    fn test_raw_no_namespace_not_reused_for_chameleon() {
2618        use crate::parser::parse::parse_schema;
2619        use crate::schema::SchemaSet;
2620
2621        let tmp = std::env::temp_dir().join("xsd_test_raw_no_ns_chameleon");
2622        std::fs::create_dir_all(&tmp).unwrap();
2623
2624        // shared.xsd: no targetNamespace
2625        let shared_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
2626<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
2627    <xs:element name="Shared" type="xs:string"/>
2628</xs:schema>"#;
2629        let shared_path = tmp.join("raw_shared.xsd");
2630        std::fs::write(&shared_path, shared_xsd).unwrap();
2631
2632        // no_ns.xsd: no targetNamespace, includes shared (raw, no adoption)
2633        let no_ns_xsd = format!(
2634            r#"<?xml version="1.0" encoding="UTF-8"?>
2635<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
2636    <xs:include schemaLocation="{}"/>
2637</xs:schema>"#,
2638            shared_path.to_string_lossy()
2639        );
2640        let no_ns_path = tmp.join("raw_no_ns.xsd");
2641        std::fs::write(&no_ns_path, &no_ns_xsd).unwrap();
2642
2643        // with_ns.xsd: targetNamespace="urn:test", includes same shared
2644        let with_ns_xsd = format!(
2645            r#"<?xml version="1.0" encoding="UTF-8"?>
2646<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
2647           targetNamespace="urn:test">
2648    <xs:include schemaLocation="{}"/>
2649</xs:schema>"#,
2650            shared_path.to_string_lossy()
2651        );
2652        let with_ns_path = tmp.join("raw_with_ns.xsd");
2653        std::fs::write(&with_ns_path, &with_ns_xsd).unwrap();
2654
2655        let mut schema_set = SchemaSet::new();
2656        let mut resolver = SchemaResolver::new();
2657
2658        // First: load from no-namespace context (raw, no chameleon adoption)
2659        let no_ns_uri = no_ns_path.to_string_lossy().to_string();
2660        let doc_no_ns = parse_schema(
2661            std::fs::read_to_string(&no_ns_path).unwrap().as_bytes(),
2662            &no_ns_uri,
2663            &mut schema_set,
2664        )
2665        .unwrap();
2666        let res1 = resolve_all_directives(doc_no_ns, &mut resolver, &mut schema_set);
2667        assert!(res1.is_ok());
2668        let raw_id = res1.loaded[0];
2669
2670        // The raw load should NOT be chameleon
2671        assert!(!schema_set.documents[raw_id as usize].is_chameleon());
2672        assert!(schema_set.documents[raw_id as usize]
2673            .target_namespace
2674            .is_none());
2675
2676        // Second: load from namespace-bearing context (chameleon adoption)
2677        let with_ns_uri = with_ns_path.to_string_lossy().to_string();
2678        let doc_with_ns = parse_schema(
2679            std::fs::read_to_string(&with_ns_path).unwrap().as_bytes(),
2680            &with_ns_uri,
2681            &mut schema_set,
2682        )
2683        .unwrap();
2684        let res2 = resolve_all_directives(doc_with_ns, &mut resolver, &mut schema_set);
2685        assert!(res2.is_ok());
2686        let chameleon_id = res2.loaded[0];
2687
2688        // Must be a DIFFERENT document — the raw no-namespace copy must not
2689        // leak into the namespace-bearing context.
2690        assert_ne!(
2691            raw_id, chameleon_id,
2692            "Raw no-namespace document must not be reused for chameleon adoption"
2693        );
2694        let ns_name = schema_set.name_table.get("urn:test").unwrap();
2695        assert_eq!(
2696            schema_set.documents[chameleon_id as usize].target_namespace,
2697            Some(ns_name),
2698            "Chameleon copy should adopt urn:test namespace"
2699        );
2700        assert!(schema_set.documents[chameleon_id as usize].is_chameleon());
2701
2702        let _ = std::fs::remove_dir_all(&tmp);
2703    }
2704}