use std::collections::HashSet;
use std::fmt::Debug;
use std::path::{Path, PathBuf};
#[cfg(feature = "async")]
use std::pin::Pin;
use crate::error::{SchemaError, SchemaResult};
use crate::ids::{DocumentId, NameId};
use crate::parser::parse::ParserConfig;
use crate::schema::composition::{CompositionEdge, CompositionEdgeKind};
use crate::SchemaSet;
#[derive(Debug)]
pub enum LoadOutcome {
Loaded(DocumentId),
AlreadyLoaded(DocumentId),
Cycle(String),
}
const UTF8_BOM: &[u8] = &[0xEF, 0xBB, 0xBF];
const UTF16_LE_BOM: &[u8] = &[0xFF, 0xFE];
const UTF16_BE_BOM: &[u8] = &[0xFE, 0xFF];
#[derive(Copy, Clone)]
enum Endian {
Le,
Be,
}
pub fn decode_xml_to_utf8_bytes(bytes: Vec<u8>) -> SchemaResult<Vec<u8>> {
if bytes.starts_with(UTF8_BOM) {
return Ok(bytes[UTF8_BOM.len()..].to_vec());
}
if bytes.starts_with(UTF16_LE_BOM) {
return Ok(decode_utf16(&bytes[UTF16_LE_BOM.len()..], Endian::Le)?.into_bytes());
}
if bytes.starts_with(UTF16_BE_BOM) {
return Ok(decode_utf16(&bytes[UTF16_BE_BOM.len()..], Endian::Be)?.into_bytes());
}
if let Some(endian) = sniff_utf16_no_bom(&bytes) {
return Ok(decode_utf16(&bytes, endian)?.into_bytes());
}
Ok(bytes)
}
pub fn decode_xml_bytes(bytes: Vec<u8>) -> SchemaResult<String> {
let utf8 = decode_xml_to_utf8_bytes(bytes)?;
String::from_utf8(utf8)
.map_err(|e| SchemaError::resolution(format!("Invalid UTF-8 content: {}", e)))
}
fn sniff_utf16_no_bom(bytes: &[u8]) -> Option<Endian> {
if bytes.len() < 4 {
return None;
}
match (bytes[0], bytes[1]) {
(0x3C, 0x00) if bytes[2] != 0x00 && bytes[3] == 0x00 => Some(Endian::Le),
(0x00, 0x3C) if bytes[2] == 0x00 && bytes[3] != 0x00 => Some(Endian::Be),
_ => None,
}
}
fn decode_utf16(bytes: &[u8], endian: Endian) -> SchemaResult<String> {
if !bytes.len().is_multiple_of(2) {
return Err(SchemaError::resolution(
"UTF-16 byte stream has an odd number of bytes".to_string(),
));
}
let units: Vec<u16> = bytes
.chunks_exact(2)
.map(|c| match endian {
Endian::Le => u16::from_le_bytes([c[0], c[1]]),
Endian::Be => u16::from_be_bytes([c[0], c[1]]),
})
.collect();
String::from_utf16(&units)
.map_err(|e| SchemaError::resolution(format!("Invalid UTF-16 sequence: {}", e)))
}
pub trait SchemaLoader: Send + Sync + Debug {
fn load(&self, location: &str) -> SchemaResult<String>;
fn can_load(&self, location: &str) -> bool;
fn priority(&self) -> i32 {
0
}
}
#[derive(Debug, Clone, Default)]
pub struct FileSystemLoader {
pub base_dir: Option<PathBuf>,
}
impl FileSystemLoader {
pub fn new() -> Self {
Self::default()
}
pub fn with_base_dir(base_dir: PathBuf) -> Self {
Self {
base_dir: Some(base_dir),
}
}
}
impl SchemaLoader for FileSystemLoader {
fn load(&self, location: &str) -> SchemaResult<String> {
let path = Path::new(location);
let bytes = std::fs::read(path).map_err(|e| {
SchemaError::resolution(format!("Failed to read file '{}': {}", location, e))
})?;
decode_xml_bytes(bytes)
}
fn can_load(&self, location: &str) -> bool {
!location.starts_with("http://")
&& !location.starts_with("https://")
&& !location.starts_with("embedded://")
}
fn priority(&self) -> i32 {
0
}
}
#[derive(Debug, Clone, Default)]
pub struct EmbeddedLoader;
impl EmbeddedLoader {
pub fn new() -> Self {
Self
}
}
impl SchemaLoader for EmbeddedLoader {
fn load(&self, location: &str) -> SchemaResult<String> {
if let Some(rest) = location.strip_prefix("embedded://") {
match rest {
"xml.xsd" => {
let bytes = crate::embedded::XML_XSD;
String::from_utf8(bytes.to_vec()).map_err(|e| {
SchemaError::resolution(format!("Invalid UTF-8 in embedded schema: {}", e))
})
}
"xlink.xsd" => {
let bytes = crate::embedded::XLINK_XSD;
String::from_utf8(bytes.to_vec()).map_err(|e| {
SchemaError::resolution(format!("Invalid UTF-8 in embedded schema: {}", e))
})
}
_ => Err(SchemaError::resolution(format!(
"Unknown embedded schema: {}",
rest
))),
}
} else {
Err(SchemaError::resolution(format!(
"Not an embedded location: {}",
location
)))
}
}
fn can_load(&self, location: &str) -> bool {
location.starts_with("embedded://")
}
fn priority(&self) -> i32 {
100 }
}
#[derive(Debug, Default)]
pub struct LoaderChain {
loaders: Vec<Box<dyn SchemaLoader>>,
}
impl LoaderChain {
pub fn new() -> Self {
Self {
loaders: Vec::new(),
}
}
pub fn with_defaults() -> Self {
let mut chain = Self::new();
chain.add(Box::new(EmbeddedLoader::new()));
chain.add(Box::new(FileSystemLoader::new()));
chain
}
pub fn add(&mut self, loader: Box<dyn SchemaLoader>) {
self.loaders.push(loader);
self.loaders
.sort_by_key(|b| std::cmp::Reverse(b.priority()));
}
pub fn len(&self) -> usize {
self.loaders.len()
}
pub fn is_empty(&self) -> bool {
self.loaders.is_empty()
}
}
impl SchemaLoader for LoaderChain {
fn load(&self, location: &str) -> SchemaResult<String> {
for loader in &self.loaders {
if loader.can_load(location) {
return loader.load(location);
}
}
Err(SchemaError::resolution(format!(
"No loader available for: {}",
location
)))
}
fn can_load(&self, location: &str) -> bool {
self.loaders.iter().any(|l| l.can_load(location))
}
fn priority(&self) -> i32 {
self.loaders.iter().map(|l| l.priority()).max().unwrap_or(0)
}
}
pub struct SchemaResolver {
pub config: ResolverConfig,
resolving: HashSet<String>,
catalog: SchemaCatalog,
loader: Box<dyn SchemaLoader>,
#[cfg(feature = "async")]
async_loader: Option<Box<dyn AsyncSchemaLoader>>,
}
#[derive(Debug, Clone)]
pub struct ResolverConfig {
pub base_dir: Option<PathBuf>,
pub allow_network: bool,
pub max_depth: usize,
pub parser_config: ParserConfig,
}
impl Default for ResolverConfig {
fn default() -> Self {
Self {
base_dir: None,
allow_network: false,
max_depth: 100,
parser_config: ParserConfig::default(),
}
}
}
#[derive(Debug, Clone, Default)]
pub struct SchemaCatalog {
entries: Vec<CatalogEntry>,
}
#[derive(Debug, Clone)]
pub struct CatalogEntry {
pub namespace: String,
pub location: String,
}
impl SchemaCatalog {
pub fn new() -> Self {
Self::default()
}
pub fn add(&mut self, namespace: impl Into<String>, location: impl Into<String>) {
self.entries.push(CatalogEntry {
namespace: namespace.into(),
location: location.into(),
});
}
pub fn lookup(&self, namespace: &str) -> Option<&str> {
self.entries
.iter()
.find(|e| e.namespace == namespace)
.map(|e| e.location.as_str())
}
pub fn add_xml_catalog(&mut self) {
self.add("http://www.w3.org/XML/1998/namespace", "embedded://xml.xsd");
self.add("http://www.w3.org/1999/xlink", "embedded://xlink.xsd");
self.add(
"http://www.w3.org/2001/XMLSchema-instance",
"http://www.w3.org/2001/XMLSchema-instance.xsd",
);
}
}
impl SchemaResolver {
pub fn new() -> Self {
Self {
config: ResolverConfig::default(),
resolving: HashSet::new(),
catalog: SchemaCatalog::new(),
loader: Box::new(LoaderChain::with_defaults()),
#[cfg(feature = "async")]
async_loader: None,
}
}
pub fn with_config(config: ResolverConfig) -> Self {
Self {
config,
resolving: HashSet::new(),
catalog: SchemaCatalog::new(),
loader: Box::new(LoaderChain::with_defaults()),
#[cfg(feature = "async")]
async_loader: None,
}
}
pub fn with_loader(loader: Box<dyn SchemaLoader>) -> Self {
Self {
config: ResolverConfig::default(),
resolving: HashSet::new(),
catalog: SchemaCatalog::new(),
loader,
#[cfg(feature = "async")]
async_loader: None,
}
}
pub fn with_config_and_loader(config: ResolverConfig, loader: Box<dyn SchemaLoader>) -> Self {
Self {
config,
resolving: HashSet::new(),
catalog: SchemaCatalog::new(),
loader,
#[cfg(feature = "async")]
async_loader: None,
}
}
#[cfg(feature = "async")]
pub fn with_async_loader(async_loader: Box<dyn AsyncSchemaLoader>) -> Self {
Self {
config: ResolverConfig::default(),
resolving: HashSet::new(),
catalog: SchemaCatalog::new(),
loader: Box::new(LoaderChain::with_defaults()),
async_loader: Some(async_loader),
}
}
#[cfg(feature = "async")]
pub fn with_config_and_async_loader(
config: ResolverConfig,
async_loader: Box<dyn AsyncSchemaLoader>,
) -> Self {
Self {
config,
resolving: HashSet::new(),
catalog: SchemaCatalog::new(),
loader: Box::new(LoaderChain::with_defaults()),
async_loader: Some(async_loader),
}
}
pub fn catalog_mut(&mut self) -> &mut SchemaCatalog {
&mut self.catalog
}
pub fn resolve_location(&self, schema_location: &str, base_uri: &str) -> SchemaResult<String> {
if is_absolute_uri(schema_location) {
return Ok(schema_location.to_string());
}
let resolved = resolve_relative_uri(schema_location, base_uri)?;
Ok(resolved)
}
pub fn load_schema(
&mut self,
location: &str,
base_uri: &str,
schema_set: &mut SchemaSet,
chameleon_namespace: Option<NameId>,
) -> SchemaResult<LoadOutcome> {
let resolved = self.resolve_location(location, base_uri)?;
if let Some(id) = check_loaded_cache(schema_set, &resolved, chameleon_namespace) {
return Ok(LoadOutcome::AlreadyLoaded(id));
}
if self.resolving.contains(&resolved) {
return Ok(LoadOutcome::Cycle(resolved));
}
self.resolving.insert(resolved.clone());
let content = match self.load_content(&resolved) {
Ok(c) => c,
Err(e) => {
self.resolving.remove(&resolved);
return Err(e);
}
};
let doc_id = match crate::parser::parse::parse_schema_with_chameleon(
content.as_bytes(),
&resolved,
schema_set,
&self.config.parser_config,
chameleon_namespace,
) {
Ok(id) => id,
Err(e) => {
self.resolving.remove(&resolved);
return Err(e);
}
};
mark_loaded_chameleon_aware(schema_set, &resolved, doc_id, chameleon_namespace);
self.resolving.remove(&resolved);
Ok(LoadOutcome::Loaded(doc_id))
}
pub fn load_content(&self, location: &str) -> SchemaResult<String> {
if (location.starts_with("http://") || location.starts_with("https://"))
&& !self.config.allow_network
{
return Err(SchemaError::resolution(format!(
"Network access not allowed for: {}",
location
)));
}
self.loader.load(location)
}
pub fn process_include(
&mut self,
schema_location: &str,
base_uri: &str,
target_namespace: Option<NameId>,
schema_set: &mut SchemaSet,
) -> SchemaResult<LoadOutcome> {
self.load_schema(schema_location, base_uri, schema_set, target_namespace)
}
pub fn process_import(
&mut self,
namespace: Option<&str>,
schema_location: Option<&str>,
base_uri: &str,
schema_set: &mut SchemaSet,
) -> SchemaResult<Option<LoadOutcome>> {
if let Some(location) = schema_location {
match self.load_schema(location, base_uri, schema_set, None) {
Ok(outcome) => {
validate_import_target_namespace(schema_set, &outcome, namespace)?;
return Ok(Some(outcome));
}
Err(load_err) => {
if let Some(cat_loc) = namespace.and_then(|ns| self.catalog.lookup(ns)) {
let cat_loc = cat_loc.to_string();
return self.try_catalog_load(&cat_loc, base_uri, namespace, schema_set);
}
return Err(load_err);
}
}
}
if let Some(cat_loc) = namespace.and_then(|ns| self.catalog.lookup(ns)) {
let cat_loc = cat_loc.to_string();
return self.try_catalog_load(&cat_loc, base_uri, namespace, schema_set);
}
Ok(None)
}
fn try_catalog_load(
&mut self,
catalog_location: &str,
base_uri: &str,
namespace: Option<&str>,
schema_set: &mut SchemaSet,
) -> SchemaResult<Option<LoadOutcome>> {
let already_loaded = self
.resolve_location(catalog_location, base_uri)
.ok()
.is_some_and(|r| schema_set.loaded_locations.contains_key(&r));
if already_loaded {
return Ok(None);
}
if namespace_already_covered(schema_set, namespace) {
return Ok(None);
}
let outcome = self.load_schema(catalog_location, base_uri, schema_set, None)?;
validate_import_target_namespace(schema_set, &outcome, namespace)?;
Ok(Some(outcome))
}
pub fn process_redefine(
&mut self,
schema_location: &str,
base_uri: &str,
target_namespace: Option<NameId>,
schema_set: &mut SchemaSet,
) -> SchemaResult<LoadOutcome> {
self.load_schema(schema_location, base_uri, schema_set, target_namespace)
}
#[cfg(feature = "xsd11")]
pub fn process_override(
&mut self,
schema_location: &str,
base_uri: &str,
target_namespace: Option<NameId>,
schema_set: &mut SchemaSet,
) -> SchemaResult<LoadOutcome> {
self.load_schema(schema_location, base_uri, schema_set, target_namespace)
}
}
impl Default for SchemaResolver {
fn default() -> Self {
Self::new()
}
}
fn namespace_already_covered(schema_set: &SchemaSet, namespace: Option<&str>) -> bool {
let Some(ns_str) = namespace else {
return false;
};
let Some(ns_id) = schema_set.name_table.get(ns_str) else {
return false;
};
schema_set.documents.iter().any(|d| {
d.declared_target_namespace == Some(ns_id) || d.target_namespace == Some(ns_id)
})
}
fn validate_import_target_namespace(
schema_set: &SchemaSet,
outcome: &LoadOutcome,
namespace: Option<&str>,
) -> SchemaResult<()> {
let doc_id = match outcome {
LoadOutcome::Loaded(id) | LoadOutcome::AlreadyLoaded(id) => *id,
LoadOutcome::Cycle(_) => return Ok(()),
};
let Some(doc) = schema_set.documents.get(doc_id as usize) else {
return Ok(());
};
let imported_tns = doc
.target_namespace
.map(|n| schema_set.name_table.resolve_ref(n));
if namespace == imported_tns {
return Ok(());
}
let msg = match (namespace, imported_tns) {
(None, Some(tns)) => format!(
"Import directive has no namespace attribute, but imported schema has \
targetNamespace='{}' (src-import clause 1.1 requires absent targetNamespace)",
tns
),
(Some(ns), None) => format!(
"Import directive namespace='{}' does not match imported schema's absent \
targetNamespace (src-import clause 1.2)",
ns
),
(Some(ns), Some(tns)) => format!(
"Import directive namespace='{}' does not match imported schema's \
targetNamespace='{}' (src-import clause 1.2)",
ns, tns
),
(None, None) => unreachable!("handled by early return above"),
};
Err(SchemaError::structural("src-import", msg, None))
}
#[cfg(feature = "async")]
pub trait AsyncSchemaLoader: Send + Sync + Debug {
fn load_async(
&self,
location: &str,
) -> Pin<Box<dyn std::future::Future<Output = SchemaResult<String>> + Send + '_>>;
fn can_load(&self, location: &str) -> bool;
}
#[cfg(feature = "async")]
impl SchemaResolver {
pub async fn load_content_async(&self, location: &str) -> SchemaResult<String> {
if (location.starts_with("http://") || location.starts_with("https://"))
&& !self.config.allow_network
{
return Err(SchemaError::resolution(format!(
"Network access not allowed for: {}",
location
)));
}
if let Some(ref async_loader) = self.async_loader {
if async_loader.can_load(location) {
return async_loader.load_async(location).await;
}
}
self.loader.load(location)
}
pub async fn load_schema_async(
&mut self,
location: &str,
base_uri: &str,
schema_set: &mut SchemaSet,
chameleon_namespace: Option<NameId>,
) -> SchemaResult<LoadOutcome> {
let resolved = self.resolve_location(location, base_uri)?;
if let Some(id) = check_loaded_cache(schema_set, &resolved, chameleon_namespace) {
return Ok(LoadOutcome::AlreadyLoaded(id));
}
if self.resolving.contains(&resolved) {
return Ok(LoadOutcome::Cycle(resolved));
}
self.resolving.insert(resolved.clone());
let content = match self.load_content_async(&resolved).await {
Ok(c) => c,
Err(e) => {
self.resolving.remove(&resolved);
return Err(e);
}
};
let doc_id = match crate::parser::parse::parse_schema_with_chameleon(
content.as_bytes(),
&resolved,
schema_set,
&self.config.parser_config,
chameleon_namespace,
) {
Ok(id) => id,
Err(e) => {
self.resolving.remove(&resolved);
return Err(e);
}
};
mark_loaded_chameleon_aware(schema_set, &resolved, doc_id, chameleon_namespace);
self.resolving.remove(&resolved);
Ok(LoadOutcome::Loaded(doc_id))
}
}
#[cfg(feature = "async")]
pub async fn resolve_all_directives_async(
doc_id: DocumentId,
resolver: &mut SchemaResolver,
schema_set: &mut SchemaSet,
) -> ResolutionResult {
let mut result = ResolutionResult::default();
let doc = match schema_set.documents.get(doc_id as usize) {
Some(d) => d,
None => {
result.errors.push(SchemaError::internal(format!(
"Document {} not found",
doc_id
)));
return result;
}
};
let base_uri = doc.base_uri.clone();
let target_namespace = doc.target_namespace;
let includes: Vec<_> = doc.includes.to_vec();
let imports: Vec<_> = doc.imports.to_vec();
let redefines: Vec<_> = doc.redefines.to_vec();
#[cfg(feature = "xsd11")]
let overrides: Vec<_> = doc.overrides.to_vec();
for (i, include) in includes.iter().enumerate() {
match resolver
.load_schema_async(
&include.schema_location,
&base_uri,
schema_set,
target_namespace,
)
.await
{
Ok(ref outcome) => {
if let LoadOutcome::Loaded(id) | LoadOutcome::AlreadyLoaded(id) = outcome {
result.loaded.push(*id);
schema_set.documents[doc_id as usize].includes[i].resolved_doc_id = Some(*id);
} else {
result.skipped.push(include.schema_location.clone());
}
record_edge(
schema_set,
doc_id,
outcome,
CompositionEdgeKind::Include,
include.source.as_ref(),
&include.schema_location,
);
}
Err(e) => result.errors.push(e),
}
}
for (i, import) in imports.iter().enumerate() {
let catalog_location = import
.namespace
.as_deref()
.and_then(|ns| resolver.catalog.lookup(ns).map(|l| l.to_string()));
if let Some(location) = catalog_location {
let catalog_already_loaded = resolver
.resolve_location(&location, &base_uri)
.ok()
.is_some_and(|r| schema_set.loaded_locations.contains_key(&r));
if catalog_already_loaded {
continue;
}
match resolver
.load_schema_async(&location, &base_uri, schema_set, None)
.await
{
Ok(ref outcome) => {
if let LoadOutcome::Loaded(id) | LoadOutcome::AlreadyLoaded(id) = outcome {
result.loaded.push(*id);
schema_set.documents[doc_id as usize].imports[i].resolved_doc_id =
Some(*id);
} else {
result.skipped.push(location.clone());
}
record_edge(
schema_set,
doc_id,
outcome,
CompositionEdgeKind::Import,
import.source.as_ref(),
&location,
);
}
Err(e) => result.import_errors.push(e),
}
} else if let Some(location) = import.schema_location.as_deref() {
match resolver
.load_schema_async(location, &base_uri, schema_set, None)
.await
{
Ok(ref outcome) => {
if let LoadOutcome::Loaded(id) | LoadOutcome::AlreadyLoaded(id) = outcome {
result.loaded.push(*id);
schema_set.documents[doc_id as usize].imports[i].resolved_doc_id =
Some(*id);
} else {
result.skipped.push(location.to_string());
}
record_edge(
schema_set,
doc_id,
outcome,
CompositionEdgeKind::Import,
import.source.as_ref(),
location,
);
}
Err(e) => result.import_errors.push(e),
}
}
}
for (i, redefine) in redefines.iter().enumerate() {
match resolver
.load_schema_async(
&redefine.schema_location,
&base_uri,
schema_set,
target_namespace,
)
.await
{
Ok(ref outcome) => {
if let LoadOutcome::Loaded(id) | LoadOutcome::AlreadyLoaded(id) = outcome {
result.loaded.push(*id);
schema_set.documents[doc_id as usize].redefines[i].resolved_doc_id = Some(*id);
} else {
result.skipped.push(redefine.schema_location.clone());
}
record_edge(
schema_set,
doc_id,
outcome,
CompositionEdgeKind::Redefine,
redefine.source.as_ref(),
&redefine.schema_location,
);
}
Err(e) => result.errors.push(e),
}
}
#[cfg(feature = "xsd11")]
for (i, override_dir) in overrides.iter().enumerate() {
match resolver
.load_schema_async(
&override_dir.schema_location,
&base_uri,
schema_set,
target_namespace,
)
.await
{
Ok(ref outcome) => {
if let LoadOutcome::Loaded(id) | LoadOutcome::AlreadyLoaded(id) = outcome {
result.loaded.push(*id);
schema_set.documents[doc_id as usize].overrides[i].resolved_doc_id = Some(*id);
} else {
result.skipped.push(override_dir.schema_location.clone());
}
record_edge(
schema_set,
doc_id,
outcome,
CompositionEdgeKind::Override,
override_dir.source.as_ref(),
&override_dir.schema_location,
);
}
Err(e) => result.errors.push(e),
}
}
result
}
fn is_absolute_uri(uri: &str) -> bool {
uri.starts_with("http://")
|| uri.starts_with("https://")
|| uri.starts_with("file://")
|| uri.starts_with("embedded://")
|| (cfg!(windows) && uri.len() >= 2 && &uri[1..2] == ":")
|| uri.starts_with('/')
}
fn resolve_relative_uri(relative: &str, base: &str) -> SchemaResult<String> {
if base.starts_with("http://") || base.starts_with("https://") {
resolve_relative_url(relative, base)
} else {
resolve_relative_path(relative, base)
}
}
fn resolve_relative_url(relative: &str, base: &str) -> SchemaResult<String> {
let base_without_file = if let Some(pos) = base.rfind('/') {
if pos > base.find("://").map_or(0, |p| p + 2) {
&base[..=pos]
} else {
base
}
} else {
base
};
Ok(format!("{}{}", base_without_file, relative))
}
fn resolve_relative_path(relative: &str, base: &str) -> SchemaResult<String> {
let base_path = Path::new(base);
let base_dir = base_path.parent().unwrap_or(Path::new("."));
let resolved = base_dir.join(relative);
let normalized = normalize_path(&resolved);
Ok(normalized.to_string_lossy().into_owned())
}
fn normalize_path(path: &Path) -> PathBuf {
let mut result = PathBuf::new();
for component in path.components() {
match component {
std::path::Component::ParentDir => {
result.pop();
}
std::path::Component::CurDir => {
}
_ => {
result.push(component);
}
}
}
result
}
#[derive(Debug, Default)]
pub struct ResolutionResult {
pub loaded: Vec<DocumentId>,
pub errors: Vec<SchemaError>,
pub import_errors: Vec<SchemaError>,
pub skipped: Vec<String>,
}
impl ResolutionResult {
pub fn is_ok(&self) -> bool {
self.errors.is_empty() && self.import_errors.is_empty()
}
pub fn has_loaded(&self) -> bool {
!self.loaded.is_empty()
}
}
fn record_edge(
schema_set: &mut SchemaSet,
source_doc: DocumentId,
outcome: &LoadOutcome,
kind: CompositionEdgeKind,
source: Option<&crate::parser::location::SourceRef>,
schema_location: &str,
) {
let (target_doc, resolved_location) = match outcome {
LoadOutcome::Loaded(id) | LoadOutcome::AlreadyLoaded(id) => {
let loc = schema_set.documents[*id as usize].base_uri.clone();
(Some(*id), loc)
}
LoadOutcome::Cycle(resolved) => (None, resolved.clone()),
};
schema_set.composition_edges.push(CompositionEdge {
source_doc,
target_doc,
resolved_location,
kind,
source: source.cloned(),
schema_location: schema_location.to_string(),
});
}
fn check_loaded_cache(
schema_set: &SchemaSet,
resolved: &str,
chameleon_namespace: Option<NameId>,
) -> Option<DocumentId> {
if let Some(ns) = chameleon_namespace {
if let Some(&id) = schema_set.chameleon_cache.get(&(resolved.to_owned(), ns)) {
return Some(id);
}
}
if let Some(&id) = schema_set.loaded_locations.get(resolved) {
let reusable = schema_set.documents.get(id as usize).is_none_or(|doc| {
if doc.is_chameleon() {
false
} else if doc.target_namespace.is_some() {
true
} else {
chameleon_namespace.is_none()
}
});
if reusable {
return Some(id);
}
}
None
}
fn mark_loaded_chameleon_aware(
schema_set: &mut SchemaSet,
resolved: &str,
doc_id: DocumentId,
chameleon_namespace: Option<NameId>,
) {
let doc_is_chameleon = schema_set
.documents
.get(doc_id as usize)
.is_some_and(|doc| doc.is_chameleon());
if doc_is_chameleon {
if let Some(ns) = chameleon_namespace {
schema_set
.chameleon_cache
.insert((resolved.to_owned(), ns), doc_id);
}
}
if !schema_set.loaded_locations.contains_key(resolved) {
schema_set.mark_loaded(resolved.to_owned(), doc_id);
}
}
pub fn fixup_composition_edges(schema_set: &mut SchemaSet) {
for edge in &mut schema_set.composition_edges {
if edge.target_doc.is_none() {
edge.target_doc = schema_set
.loaded_locations
.get(&edge.resolved_location)
.copied();
}
}
}
pub fn resolve_all_directives(
doc_id: DocumentId,
resolver: &mut SchemaResolver,
schema_set: &mut SchemaSet,
) -> ResolutionResult {
let mut result = ResolutionResult::default();
let doc = match schema_set.documents.get(doc_id as usize) {
Some(d) => d,
None => {
result.errors.push(SchemaError::internal(format!(
"Document {} not found",
doc_id
)));
return result;
}
};
let base_uri = doc.base_uri.clone();
let target_namespace = doc.target_namespace;
let includes: Vec<_> = doc.includes.to_vec();
let imports: Vec<_> = doc.imports.to_vec();
let redefines: Vec<_> = doc.redefines.to_vec();
#[cfg(feature = "xsd11")]
let overrides: Vec<_> = doc.overrides.to_vec();
for (i, include) in includes.iter().enumerate() {
match resolver.process_include(
&include.schema_location,
&base_uri,
target_namespace,
schema_set,
) {
Ok(ref outcome) => {
match outcome {
LoadOutcome::Loaded(id) | LoadOutcome::AlreadyLoaded(id) => {
schema_set.documents[doc_id as usize].includes[i].resolved_doc_id =
Some(*id);
if matches!(outcome, LoadOutcome::Loaded(_)) {
result.loaded.push(*id);
}
if target_namespace.is_none() {
let included_declared = schema_set
.documents
.get(*id as usize)
.and_then(|d| d.declared_target_namespace);
if let Some(declared) = included_declared {
let location = include
.source
.as_ref()
.and_then(|s| schema_set.source_maps.locate(s));
let declared_str =
schema_set.name_table.resolve(declared).to_string();
result.errors.push(SchemaError::structural(
"src-include",
format!(
"Included schema has targetNamespace '{}' \
but the including schema has no \
targetNamespace",
declared_str
),
location,
));
}
}
}
_ => {
result.skipped.push(include.schema_location.clone());
}
}
record_edge(
schema_set,
doc_id,
outcome,
CompositionEdgeKind::Include,
include.source.as_ref(),
&include.schema_location,
);
}
Err(e) => result.errors.push(e),
}
}
for (i, import) in imports.iter().enumerate() {
if schema_set.is_xsd10() {
if let Some(import_ns_str) = import.namespace.as_deref() {
let tns_str = target_namespace.map(|n| schema_set.name_table.resolve(n));
if Some(import_ns_str) == tns_str.as_deref() {
result.errors.push(SchemaError::structural(
"src-import",
format!(
"xs:import namespace '{}' must not equal the enclosing \
schema's targetNamespace in XSD 1.0",
import_ns_str
),
import
.source
.as_ref()
.and_then(|s| schema_set.source_maps.locate(s)),
));
continue;
}
}
}
if import.namespace.is_none() && target_namespace.is_none() {
result.errors.push(SchemaError::structural(
"src-import",
"xs:import without 'namespace' requires the enclosing schema to have \
a 'targetNamespace' attribute",
import
.source
.as_ref()
.and_then(|s| schema_set.source_maps.locate(s)),
));
continue;
}
match resolver.process_import(
import.namespace.as_deref(),
import.schema_location.as_deref(),
&base_uri,
schema_set,
) {
Ok(Some(ref outcome)) => {
match outcome {
LoadOutcome::Loaded(id) => {
result.loaded.push(*id);
schema_set.documents[doc_id as usize].imports[i].resolved_doc_id =
Some(*id);
}
LoadOutcome::AlreadyLoaded(id) => {
schema_set.documents[doc_id as usize].imports[i].resolved_doc_id =
Some(*id);
}
_ => {
if let Some(loc) = &import.schema_location {
result.skipped.push(loc.clone());
}
}
}
record_edge(
schema_set,
doc_id,
outcome,
CompositionEdgeKind::Import,
import.source.as_ref(),
import.schema_location.as_deref().unwrap_or_default(),
);
}
Ok(None) => {
}
Err(e) => result.import_errors.push(e),
}
}
for (i, redefine) in redefines.iter().enumerate() {
match resolver.process_redefine(
&redefine.schema_location,
&base_uri,
target_namespace,
schema_set,
) {
Ok(ref outcome) => {
match outcome {
LoadOutcome::Loaded(id) => {
result.loaded.push(*id);
schema_set.documents[doc_id as usize].redefines[i].resolved_doc_id =
Some(*id);
}
LoadOutcome::AlreadyLoaded(id) => {
schema_set.documents[doc_id as usize].redefines[i].resolved_doc_id =
Some(*id);
}
_ => {
result.skipped.push(redefine.schema_location.clone());
}
}
record_edge(
schema_set,
doc_id,
outcome,
CompositionEdgeKind::Redefine,
redefine.source.as_ref(),
&redefine.schema_location,
);
}
Err(e) => result.errors.push(e),
}
}
#[cfg(feature = "xsd11")]
for (i, override_dir) in overrides.iter().enumerate() {
match resolver.process_override(
&override_dir.schema_location,
&base_uri,
target_namespace,
schema_set,
) {
Ok(ref outcome) => {
match outcome {
LoadOutcome::Loaded(id) => {
result.loaded.push(*id);
schema_set.documents[doc_id as usize].overrides[i].resolved_doc_id =
Some(*id);
}
LoadOutcome::AlreadyLoaded(id) => {
schema_set.documents[doc_id as usize].overrides[i].resolved_doc_id =
Some(*id);
}
_ => {
result.skipped.push(override_dir.schema_location.clone());
}
}
record_edge(
schema_set,
doc_id,
outcome,
CompositionEdgeKind::Override,
override_dir.source.as_ref(),
&override_dir.schema_location,
);
}
Err(e) => result.errors.push(e),
}
}
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_absolute_uri() {
assert!(is_absolute_uri("http://example.com/schema.xsd"));
assert!(is_absolute_uri("https://example.com/schema.xsd"));
assert!(is_absolute_uri("/absolute/path/schema.xsd"));
assert!(!is_absolute_uri("relative/path/schema.xsd"));
assert!(!is_absolute_uri("../parent/schema.xsd"));
}
#[test]
fn test_resolve_relative_path() {
let resolved = resolve_relative_path("types.xsd", "/home/user/schema.xsd").unwrap();
assert!(resolved.contains("types.xsd"));
}
#[test]
fn test_resolve_relative_path_parent() {
let resolved =
resolve_relative_path("../common/types.xsd", "/home/user/schemas/main.xsd").unwrap();
assert!(resolved.contains("common"));
assert!(resolved.contains("types.xsd"));
}
#[test]
fn test_resolve_relative_url() {
let resolved =
resolve_relative_url("types.xsd", "http://example.com/schemas/main.xsd").unwrap();
assert_eq!(resolved, "http://example.com/schemas/types.xsd");
}
#[test]
fn test_catalog_lookup() {
let mut catalog = SchemaCatalog::new();
catalog.add("http://example.com/ns", "/path/to/schema.xsd");
assert_eq!(
catalog.lookup("http://example.com/ns"),
Some("/path/to/schema.xsd")
);
assert_eq!(catalog.lookup("http://other.com/ns"), None);
}
#[test]
fn test_resolver_config_default() {
let config = ResolverConfig::default();
assert!(!config.allow_network);
assert_eq!(config.max_depth, 100);
}
#[test]
fn test_resolver_new() {
let resolver = SchemaResolver::new();
assert!(resolver.resolving.is_empty());
}
#[test]
fn test_normalize_path() {
let path = Path::new("/home/user/../other/./schema.xsd");
let normalized = normalize_path(path);
assert!(!normalized.to_string_lossy().contains(".."));
assert!(!normalized.to_string_lossy().contains("./"));
}
#[test]
fn test_resolution_result_default() {
let result = ResolutionResult::default();
assert!(result.is_ok());
assert!(!result.has_loaded());
}
#[test]
fn test_catalog_xml_namespaces() {
let mut catalog = SchemaCatalog::new();
catalog.add_xml_catalog();
assert_eq!(
catalog.lookup("http://www.w3.org/XML/1998/namespace"),
Some("embedded://xml.xsd")
);
assert!(catalog
.lookup("http://www.w3.org/2001/XMLSchema-instance")
.is_some());
}
#[test]
fn test_embedded_loader() {
let loader = EmbeddedLoader::new();
assert!(loader.can_load("embedded://xml.xsd"));
assert!(!loader.can_load("/path/to/file.xsd"));
assert!(!loader.can_load("http://example.com/schema.xsd"));
let content = loader.load("embedded://xml.xsd").unwrap();
assert!(content.contains("targetNamespace=\"http://www.w3.org/XML/1998/namespace\""));
assert!(loader.load("embedded://unknown.xsd").is_err());
}
#[test]
fn test_file_system_loader() {
let loader = FileSystemLoader::new();
assert!(loader.can_load("/path/to/file.xsd"));
assert!(loader.can_load("relative/path.xsd"));
assert!(!loader.can_load("embedded://xml.xsd"));
assert!(!loader.can_load("http://example.com/schema.xsd"));
assert!(!loader.can_load("https://example.com/schema.xsd"));
}
#[test]
fn test_loader_chain() {
let chain = LoaderChain::with_defaults();
assert!(chain.can_load("embedded://xml.xsd"));
assert!(chain.can_load("/path/to/file.xsd"));
let content = chain.load("embedded://xml.xsd").unwrap();
assert!(content.contains("http://www.w3.org/XML/1998/namespace"));
assert_eq!(chain.len(), 2);
}
#[test]
fn test_loader_chain_priority() {
let mut chain = LoaderChain::new();
chain.add(Box::new(FileSystemLoader::new())); chain.add(Box::new(EmbeddedLoader::new()));
assert_eq!(chain.priority(), 100);
}
#[test]
fn test_resolver_with_embedded_loader() {
let resolver = SchemaResolver::new();
let content = resolver.load_content("embedded://xml.xsd").unwrap();
assert!(content.contains("http://www.w3.org/XML/1998/namespace"));
}
#[test]
fn test_composition_edges_recorded() {
use crate::parser::parse::parse_schema;
use crate::schema::composition::CompositionEdgeKind;
use crate::schema::SchemaSet;
let tmp = std::env::temp_dir().join("xsd_test_composition_edges");
std::fs::create_dir_all(&tmp).unwrap();
let base_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:simpleType name="MyString">
<xs:restriction base="xs:string"/>
</xs:simpleType>
</xs:schema>"#;
let base_path = tmp.join("comp_base.xsd");
std::fs::write(&base_path, base_xsd).unwrap();
let main_xsd = format!(
r#"<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:include schemaLocation="{loc}"/>
<xs:redefine schemaLocation="{loc}">
<xs:simpleType name="MyString">
<xs:restriction base="MyString">
<xs:maxLength value="50"/>
</xs:restriction>
</xs:simpleType>
</xs:redefine>
</xs:schema>"#,
loc = base_path.to_string_lossy()
);
let mut schema_set = SchemaSet::new();
let main_path = tmp.join("comp_main.xsd").to_string_lossy().to_string();
let doc_id = parse_schema(main_xsd.as_bytes(), &main_path, &mut schema_set).unwrap();
let mut resolver = SchemaResolver::new();
let result = resolve_all_directives(doc_id, &mut resolver, &mut schema_set);
assert!(result.is_ok(), "Resolution should succeed");
let edges = &schema_set.composition_edges;
assert!(
edges.len() >= 2,
"Expected at least 2 edges, got {}",
edges.len()
);
let include_edges: Vec<_> = edges
.iter()
.filter(|e| e.kind == CompositionEdgeKind::Include)
.collect();
assert!(!include_edges.is_empty(), "Should have an include edge");
assert_eq!(include_edges[0].source_doc, doc_id);
let redefine_edges: Vec<_> = edges
.iter()
.filter(|e| e.kind == CompositionEdgeKind::Redefine)
.collect();
assert!(!redefine_edges.is_empty(), "Should have a redefine edge");
assert_eq!(redefine_edges[0].source_doc, doc_id);
assert!(include_edges[0].target_doc.is_some());
assert_eq!(include_edges[0].target_doc, redefine_edges[0].target_doc);
let _ = std::fs::remove_dir_all(&tmp);
}
#[test]
fn test_composition_edges_cycle() {
use crate::parser::parse::parse_schema;
use crate::schema::composition::CompositionEdgeKind;
use crate::schema::SchemaSet;
let tmp = std::env::temp_dir().join("xsd_test_composition_cycle");
std::fs::create_dir_all(&tmp).unwrap();
let a_path = tmp.join("cycle_a.xsd");
let b_path = tmp.join("cycle_b.xsd");
let a_xsd = format!(
r#"<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:include schemaLocation="{}"/>
<xs:element name="A" type="xs:string"/>
</xs:schema>"#,
b_path.to_string_lossy()
);
let b_xsd = format!(
r#"<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:include schemaLocation="{}"/>
<xs:element name="B" type="xs:string"/>
</xs:schema>"#,
a_path.to_string_lossy()
);
std::fs::write(&a_path, &a_xsd).unwrap();
std::fs::write(&b_path, &b_xsd).unwrap();
let mut schema_set = SchemaSet::new();
let a_uri = a_path.to_string_lossy().to_string();
let a_doc_id = parse_schema(
std::fs::read_to_string(&a_path).unwrap().as_bytes(),
&a_uri,
&mut schema_set,
)
.unwrap();
schema_set.mark_loaded(a_uri, a_doc_id);
let mut resolver = SchemaResolver::new();
let result_a = resolve_all_directives(a_doc_id, &mut resolver, &mut schema_set);
assert!(result_a.is_ok(), "Resolution of a.xsd should succeed");
assert_eq!(result_a.loaded.len(), 1, "Should have loaded b.xsd");
let b_doc_id = result_a.loaded[0];
let result_b = resolve_all_directives(b_doc_id, &mut resolver, &mut schema_set);
assert!(result_b.is_ok(), "Resolution of b.xsd should succeed");
let edges = &schema_set.composition_edges;
let a_to_b: Vec<_> = edges
.iter()
.filter(|e| e.source_doc == a_doc_id && e.target_doc == Some(b_doc_id))
.collect();
assert_eq!(a_to_b.len(), 1, "Should have exactly one a→b edge");
assert_eq!(a_to_b[0].kind, CompositionEdgeKind::Include);
let b_to_a: Vec<_> = edges
.iter()
.filter(|e| e.source_doc == b_doc_id && e.target_doc == Some(a_doc_id))
.collect();
assert_eq!(b_to_a.len(), 1, "Should have exactly one b→a edge");
assert_eq!(b_to_a[0].kind, CompositionEdgeKind::Include);
let _ = std::fs::remove_dir_all(&tmp);
}
#[test]
fn test_resolved_doc_id_populated() {
use crate::parser::parse::parse_schema;
use crate::schema::SchemaSet;
let tmp = std::env::temp_dir().join("xsd_test_resolved_doc_id");
std::fs::create_dir_all(&tmp).unwrap();
let base_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:simpleType name="MyString">
<xs:restriction base="xs:string"/>
</xs:simpleType>
</xs:schema>"#;
let base_path = tmp.join("base.xsd");
std::fs::write(&base_path, base_xsd).unwrap();
let main_xsd = format!(
r#"<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:include schemaLocation="{loc}"/>
<xs:redefine schemaLocation="{loc}">
<xs:simpleType name="MyString">
<xs:restriction base="MyString">
<xs:maxLength value="50"/>
</xs:restriction>
</xs:simpleType>
</xs:redefine>
</xs:schema>"#,
loc = base_path.to_string_lossy()
);
let mut schema_set = SchemaSet::new();
let main_path = tmp.join("main.xsd").to_string_lossy().to_string();
let doc_id = parse_schema(main_xsd.as_bytes(), &main_path, &mut schema_set).unwrap();
let mut resolver = SchemaResolver::new();
let result = resolve_all_directives(doc_id, &mut resolver, &mut schema_set);
assert!(result.is_ok(), "Resolution should succeed");
let doc = &schema_set.documents[doc_id as usize];
assert!(
doc.includes[0].resolved_doc_id.is_some(),
"Include should have resolved_doc_id"
);
assert!(
doc.redefines[0].resolved_doc_id.is_some(),
"Redefine should have resolved_doc_id"
);
let _ = std::fs::remove_dir_all(&tmp);
}
#[test]
fn test_document_component_index_populated() {
use crate::parser::parse::parse_schema;
use crate::schema::SchemaSet;
let xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:simpleType name="MyString">
<xs:restriction base="xs:string"/>
</xs:simpleType>
<xs:element name="root" type="MyString"/>
</xs:schema>"#;
let mut schema_set = SchemaSet::new();
let doc_id = parse_schema(xsd.as_bytes(), "test.xsd", &mut schema_set).unwrap();
let doc = &schema_set.documents[doc_id as usize];
assert!(
!doc.component_index.is_empty(),
"Component index should be populated"
);
assert!(
doc.component_index
.lookup_type(None, schema_set.name_table.get("MyString").unwrap())
.is_some(),
"Should find MyString type in document component index"
);
assert!(
doc.component_index
.lookup_element(None, schema_set.name_table.get("root").unwrap())
.is_some(),
"Should find root element in document component index"
);
assert!(
doc.component_index
.lookup_type(None, schema_set.name_table.get("root").unwrap())
.is_none(),
"Should not find 'root' as a type"
);
}
#[test]
fn test_redefine_uses_document_scoped_lookup() {
use crate::parser::parse::parse_schema;
use crate::schema::SchemaSet;
let tmp = std::env::temp_dir().join("xsd_test_redefine_doc_scoped");
std::fs::create_dir_all(&tmp).unwrap();
let base_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:simpleType name="MyString">
<xs:restriction base="xs:string"/>
</xs:simpleType>
</xs:schema>"#;
let base_path = tmp.join("redef_base.xsd");
std::fs::write(&base_path, base_xsd).unwrap();
let main_xsd = format!(
r#"<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:redefine schemaLocation="{loc}">
<xs:simpleType name="MyString">
<xs:restriction base="MyString">
<xs:maxLength value="50"/>
</xs:restriction>
</xs:simpleType>
</xs:redefine>
</xs:schema>"#,
loc = base_path.to_string_lossy()
);
let mut schema_set = SchemaSet::new();
let main_path = tmp.join("redef_main.xsd").to_string_lossy().to_string();
let doc_id = parse_schema(main_xsd.as_bytes(), &main_path, &mut schema_set).unwrap();
let mut resolver = SchemaResolver::new();
let result = resolve_all_directives(doc_id, &mut resolver, &mut schema_set);
assert!(result.is_ok(), "Resolution should succeed");
let main_doc = &schema_set.documents[doc_id as usize];
let target_doc_id = main_doc.redefines[0].resolved_doc_id;
assert!(
target_doc_id.is_some(),
"Redefine should have resolved_doc_id"
);
let target_doc = &schema_set.documents[target_doc_id.unwrap() as usize];
let my_string_name = schema_set.name_table.get("MyString").unwrap();
assert!(
target_doc
.component_index
.lookup_type(None, my_string_name)
.is_some(),
"Target document should have MyString in component index"
);
crate::schema::apply_redefine_override(&mut schema_set).unwrap();
let type_key = schema_set.lookup_type(None, my_string_name);
assert!(
type_key.is_some(),
"MyString should still be in namespace table after redefine"
);
let _ = std::fs::remove_dir_all(&tmp);
}
#[test]
fn test_effective_components_provenance_populated() {
use crate::parser::parse::parse_schema;
use crate::schema::composition::CompositionAction;
use crate::schema::SchemaSet;
let tmp = std::env::temp_dir().join("xsd_test_provenance");
std::fs::create_dir_all(&tmp).unwrap();
let base_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:simpleType name="MyStr">
<xs:restriction base="xs:string"/>
</xs:simpleType>
<xs:element name="root" type="MyStr"/>
</xs:schema>"#;
let base_path = tmp.join("prov_base.xsd");
std::fs::write(&base_path, base_xsd).unwrap();
let main_xsd = format!(
r#"<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:redefine schemaLocation="{loc}">
<xs:simpleType name="MyStr">
<xs:restriction base="MyStr">
<xs:maxLength value="50"/>
</xs:restriction>
</xs:simpleType>
</xs:redefine>
</xs:schema>"#,
loc = base_path.to_string_lossy()
);
let mut schema_set = SchemaSet::new();
let main_path = tmp.join("prov_main.xsd").to_string_lossy().to_string();
let doc_id = parse_schema(main_xsd.as_bytes(), &main_path, &mut schema_set).unwrap();
let mut resolver = SchemaResolver::new();
let result = resolve_all_directives(doc_id, &mut resolver, &mut schema_set);
assert!(result.is_ok());
crate::schema::apply_redefine_override(&mut schema_set).unwrap();
assert!(
!schema_set.effective_components.is_empty(),
"Effective components should be populated after composition"
);
let my_str_name = schema_set.name_table.get("MyStr").unwrap();
let my_str_identity = crate::schema::composition::ComponentIdentity {
kind: crate::schema::composition::ComponentKind::SimpleType,
name: my_str_name,
namespace: None,
};
let my_str_eff = schema_set.effective_components.get(&my_str_identity);
assert!(
my_str_eff.is_some(),
"MyStr should be in effective components"
);
let my_str_eff = my_str_eff.unwrap();
assert!(
matches!(my_str_eff.action, CompositionAction::Redefined { .. }),
"MyStr should have Redefined action, not Declared"
);
assert_eq!(
my_str_eff.origin.owner_doc,
Some(doc_id),
"Redefined component origin should be the redefining document"
);
let declared_count = schema_set
.effective_components
.values()
.filter(|c| matches!(c.action, CompositionAction::Declared))
.count();
assert!(
declared_count > 0,
"Should have declared components for non-redefined items"
);
let _ = std::fs::remove_dir_all(&tmp);
}
#[test]
fn test_redefine_no_fallback_to_global_when_scoped() {
use crate::parser::parse::parse_schema;
use crate::schema::model::RedefineDirective;
use crate::schema::redefine::apply_redefine;
use crate::schema::SchemaSet;
let tmp = std::env::temp_dir().join("xsd_test_redefine_no_fallback");
std::fs::create_dir_all(&tmp).unwrap();
let doc_a_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:simpleType name="MyType">
<xs:restriction base="xs:string"/>
</xs:simpleType>
</xs:schema>"#;
let doc_a_path = tmp.join("no_fallback_a.xsd");
std::fs::write(&doc_a_path, doc_a_xsd).unwrap();
let doc_b_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:simpleType name="OtherType">
<xs:restriction base="xs:string"/>
</xs:simpleType>
</xs:schema>"#;
let doc_b_path = tmp.join("no_fallback_b.xsd");
std::fs::write(&doc_b_path, doc_b_xsd).unwrap();
let mut schema_set = SchemaSet::new();
let _doc_a_id = parse_schema(
std::fs::read_to_string(&doc_a_path).unwrap().as_bytes(),
&doc_a_path.to_string_lossy(),
&mut schema_set,
)
.unwrap();
let doc_b_id = parse_schema(
std::fs::read_to_string(&doc_b_path).unwrap().as_bytes(),
&doc_b_path.to_string_lossy(),
&mut schema_set,
)
.unwrap();
let my_type_name = schema_set.name_table.get("MyType").unwrap();
assert!(
schema_set.lookup_type(None, my_type_name).is_some(),
"MyType should be in global namespace table from doc_a"
);
let redef_key = schema_set
.arenas
.alloc_simple_type(crate::arenas::SimpleTypeDefData {
name: Some(my_type_name),
target_namespace: None,
variety: crate::parser::frames::SimpleTypeVariety::Atomic,
base_type: Some(crate::parser::frames::TypeRefResult::QName(
crate::parser::frames::QNameRef {
namespace: None,
local_name: my_type_name,
prefix: None,
},
)),
item_type: None,
member_types: Vec::new(),
facets: Default::default(),
final_derivation: crate::schema::model::DerivationSet::empty(),
id: None,
derivation_id: None,
annotation: None,
source: None,
resolved_base_type: None,
resolved_item_type: None,
resolved_member_types: Vec::new(),
redefine_original: None,
deferred_item_type_error: None,
});
let redefine = RedefineDirective {
source: None,
schema_location: doc_b_path.to_string_lossy().to_string(),
resolved_doc_id: Some(doc_b_id), simple_types: vec![redef_key],
complex_types: Vec::new(),
groups: Vec::new(),
attribute_groups: Vec::new(),
};
let result = apply_redefine(&mut schema_set, &redefine);
assert!(
result.is_err(),
"Redefine should fail when target document lacks the component (no global fallback)"
);
let _ = std::fs::remove_dir_all(&tmp);
}
#[test]
fn test_redefine_simple_vs_complex_kind_mismatch() {
use crate::parser::parse::parse_schema;
use crate::schema::model::RedefineDirective;
use crate::schema::redefine::apply_redefine;
use crate::schema::SchemaSet;
let tmp = std::env::temp_dir().join("xsd_test_redefine_kind_mismatch");
std::fs::create_dir_all(&tmp).unwrap();
let target_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:complexType name="Foo">
<xs:sequence>
<xs:element name="bar" type="xs:string"/>
</xs:sequence>
</xs:complexType>
</xs:schema>"#;
let target_path = tmp.join("kind_target.xsd");
std::fs::write(&target_path, target_xsd).unwrap();
let mut schema_set = SchemaSet::new();
let target_id = parse_schema(
std::fs::read_to_string(&target_path).unwrap().as_bytes(),
&target_path.to_string_lossy(),
&mut schema_set,
)
.unwrap();
let foo_name = schema_set.name_table.get("Foo").unwrap();
let target_doc = &schema_set.documents[target_id as usize];
assert!(
target_doc
.component_index
.lookup_complex_type(None, foo_name)
.is_some(),
"Target should have Foo as complex type"
);
assert!(
target_doc
.component_index
.lookup_simple_type(None, foo_name)
.is_none(),
"Target should NOT have Foo as simple type"
);
let redef_key = schema_set
.arenas
.alloc_simple_type(crate::arenas::SimpleTypeDefData {
name: Some(foo_name),
target_namespace: None,
variety: crate::parser::frames::SimpleTypeVariety::Atomic,
base_type: Some(crate::parser::frames::TypeRefResult::QName(
crate::parser::frames::QNameRef {
namespace: None,
local_name: foo_name,
prefix: None,
},
)),
item_type: None,
member_types: Vec::new(),
facets: Default::default(),
final_derivation: crate::schema::model::DerivationSet::empty(),
id: None,
derivation_id: None,
annotation: None,
source: None,
resolved_base_type: None,
resolved_item_type: None,
resolved_member_types: Vec::new(),
redefine_original: None,
deferred_item_type_error: None,
});
let redefine = RedefineDirective {
source: None,
schema_location: target_path.to_string_lossy().to_string(),
resolved_doc_id: Some(target_id),
simple_types: vec![redef_key],
complex_types: Vec::new(),
groups: Vec::new(),
attribute_groups: Vec::new(),
};
let result = apply_redefine(&mut schema_set, &redefine);
assert!(
result.is_err(),
"Simple type redefine must not match a same-name complex type in target document"
);
let _ = std::fs::remove_dir_all(&tmp);
}
#[test]
fn test_chameleon_include_adopts_namespace() {
use crate::parser::parse::parse_schema;
use crate::schema::SchemaSet;
let tmp = std::env::temp_dir().join("xsd_test_chameleon_include");
std::fs::create_dir_all(&tmp).unwrap();
let chameleon_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:simpleType name="MyType">
<xs:restriction base="xs:string"/>
</xs:simpleType>
</xs:schema>"#;
let chameleon_path = tmp.join("chameleon.xsd");
std::fs::write(&chameleon_path, chameleon_xsd).unwrap();
let main_xsd = format!(
r#"<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
targetNamespace="http://example.com/main">
<xs:include schemaLocation="{}"/>
<xs:element name="root" type="tns:MyType" xmlns:tns="http://example.com/main"/>
</xs:schema>"#,
chameleon_path.to_string_lossy()
);
let mut schema_set = SchemaSet::new();
let main_path = tmp.join("main.xsd").to_string_lossy().to_string();
let doc_id = parse_schema(main_xsd.as_bytes(), &main_path, &mut schema_set).unwrap();
let mut resolver = SchemaResolver::new();
let result = resolve_all_directives(doc_id, &mut resolver, &mut schema_set);
assert!(result.is_ok(), "Resolution should succeed");
assert!(
!result.loaded.is_empty(),
"Should have loaded chameleon.xsd"
);
let chameleon_doc_id = result.loaded[0];
let chameleon_doc = &schema_set.documents[chameleon_doc_id as usize];
let main_ns = schema_set
.name_table
.get("http://example.com/main")
.unwrap();
assert_eq!(
chameleon_doc.target_namespace,
Some(main_ns),
"Chameleon document should adopt includer's targetNamespace"
);
let my_type_name = schema_set.name_table.get("MyType").unwrap();
assert!(
schema_set
.lookup_type(Some(main_ns), my_type_name)
.is_some(),
"MyType should be in the includer's namespace after chameleon adoption"
);
assert!(
schema_set.lookup_type(None, my_type_name).is_none(),
"MyType should NOT be in no-namespace after chameleon adoption"
);
let _ = std::fs::remove_dir_all(&tmp);
}
#[test]
fn test_chameleon_redefine_adopts_namespace() {
use crate::parser::parse::parse_schema;
use crate::schema::SchemaSet;
let tmp = std::env::temp_dir().join("xsd_test_chameleon_redefine");
std::fs::create_dir_all(&tmp).unwrap();
let chameleon_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:simpleType name="MyStr">
<xs:restriction base="xs:string"/>
</xs:simpleType>
</xs:schema>"#;
let chameleon_path = tmp.join("cham_redef.xsd");
std::fs::write(&chameleon_path, chameleon_xsd).unwrap();
let main_xsd = format!(
r#"<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
targetNamespace="http://example.com/ns">
<xs:redefine schemaLocation="{}">
<xs:simpleType name="MyStr">
<xs:restriction base="MyStr">
<xs:maxLength value="50"/>
</xs:restriction>
</xs:simpleType>
</xs:redefine>
</xs:schema>"#,
chameleon_path.to_string_lossy()
);
let mut schema_set = SchemaSet::new();
let main_path = tmp.join("cham_main.xsd").to_string_lossy().to_string();
let doc_id = parse_schema(main_xsd.as_bytes(), &main_path, &mut schema_set).unwrap();
let mut resolver = SchemaResolver::new();
let result = resolve_all_directives(doc_id, &mut resolver, &mut schema_set);
assert!(result.is_ok(), "Resolution should succeed");
let chameleon_doc_id = result.loaded[0];
let chameleon_doc = &schema_set.documents[chameleon_doc_id as usize];
let ns = schema_set.name_table.get("http://example.com/ns").unwrap();
assert_eq!(
chameleon_doc.target_namespace,
Some(ns),
"Chameleon redefine target should adopt redefiner's namespace"
);
let _ = std::fs::remove_dir_all(&tmp);
}
#[test]
fn test_chameleon_multi_namespace_creates_separate_views() {
use crate::parser::parse::parse_schema;
use crate::schema::SchemaSet;
let tmp = std::env::temp_dir().join("xsd_test_chameleon_multi_ns");
std::fs::create_dir_all(&tmp).unwrap();
let chameleon_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:simpleType name="SharedType">
<xs:restriction base="xs:string"/>
</xs:simpleType>
</xs:schema>"#;
let chameleon_path = tmp.join("multi_ns_chameleon.xsd");
std::fs::write(&chameleon_path, chameleon_xsd).unwrap();
let ns_a_xsd = format!(
r#"<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
targetNamespace="urn:a">
<xs:include schemaLocation="{}"/>
</xs:schema>"#,
chameleon_path.to_string_lossy()
);
let ns_a_path = tmp.join("multi_ns_a.xsd");
std::fs::write(&ns_a_path, &ns_a_xsd).unwrap();
let ns_b_xsd = format!(
r#"<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
targetNamespace="urn:b">
<xs:include schemaLocation="{}"/>
</xs:schema>"#,
chameleon_path.to_string_lossy()
);
let ns_b_path = tmp.join("multi_ns_b.xsd");
std::fs::write(&ns_b_path, &ns_b_xsd).unwrap();
let mut schema_set = SchemaSet::new();
let ns_a_uri = ns_a_path.to_string_lossy().to_string();
let doc_a = parse_schema(
std::fs::read_to_string(&ns_a_path).unwrap().as_bytes(),
&ns_a_uri,
&mut schema_set,
)
.unwrap();
let mut resolver = SchemaResolver::new();
let res_a = resolve_all_directives(doc_a, &mut resolver, &mut schema_set);
assert!(res_a.is_ok(), "ns_a resolution should succeed");
let chameleon_a_id = res_a.loaded[0];
let ns_b_uri = ns_b_path.to_string_lossy().to_string();
let doc_b = parse_schema(
std::fs::read_to_string(&ns_b_path).unwrap().as_bytes(),
&ns_b_uri,
&mut schema_set,
)
.unwrap();
let res_b = resolve_all_directives(doc_b, &mut resolver, &mut schema_set);
assert!(res_b.is_ok(), "ns_b resolution should succeed");
let chameleon_b_id = res_b.loaded[0];
assert_ne!(
chameleon_a_id, chameleon_b_id,
"Chameleon schema included from different namespaces must produce separate documents"
);
let ns_a_name = schema_set.name_table.get("urn:a").unwrap();
let ns_b_name = schema_set.name_table.get("urn:b").unwrap();
assert_eq!(
schema_set.documents[chameleon_a_id as usize].target_namespace,
Some(ns_a_name),
"First chameleon copy should have urn:a namespace"
);
assert_eq!(
schema_set.documents[chameleon_b_id as usize].target_namespace,
Some(ns_b_name),
"Second chameleon copy should have urn:b namespace"
);
assert!(schema_set.documents[chameleon_a_id as usize].is_chameleon());
assert!(schema_set.documents[chameleon_b_id as usize].is_chameleon());
let _ = std::fs::remove_dir_all(&tmp);
}
#[test]
fn test_raw_no_namespace_not_reused_for_chameleon() {
use crate::parser::parse::parse_schema;
use crate::schema::SchemaSet;
let tmp = std::env::temp_dir().join("xsd_test_raw_no_ns_chameleon");
std::fs::create_dir_all(&tmp).unwrap();
let shared_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:element name="Shared" type="xs:string"/>
</xs:schema>"#;
let shared_path = tmp.join("raw_shared.xsd");
std::fs::write(&shared_path, shared_xsd).unwrap();
let no_ns_xsd = format!(
r#"<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:include schemaLocation="{}"/>
</xs:schema>"#,
shared_path.to_string_lossy()
);
let no_ns_path = tmp.join("raw_no_ns.xsd");
std::fs::write(&no_ns_path, &no_ns_xsd).unwrap();
let with_ns_xsd = format!(
r#"<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
targetNamespace="urn:test">
<xs:include schemaLocation="{}"/>
</xs:schema>"#,
shared_path.to_string_lossy()
);
let with_ns_path = tmp.join("raw_with_ns.xsd");
std::fs::write(&with_ns_path, &with_ns_xsd).unwrap();
let mut schema_set = SchemaSet::new();
let mut resolver = SchemaResolver::new();
let no_ns_uri = no_ns_path.to_string_lossy().to_string();
let doc_no_ns = parse_schema(
std::fs::read_to_string(&no_ns_path).unwrap().as_bytes(),
&no_ns_uri,
&mut schema_set,
)
.unwrap();
let res1 = resolve_all_directives(doc_no_ns, &mut resolver, &mut schema_set);
assert!(res1.is_ok());
let raw_id = res1.loaded[0];
assert!(!schema_set.documents[raw_id as usize].is_chameleon());
assert!(schema_set.documents[raw_id as usize]
.target_namespace
.is_none());
let with_ns_uri = with_ns_path.to_string_lossy().to_string();
let doc_with_ns = parse_schema(
std::fs::read_to_string(&with_ns_path).unwrap().as_bytes(),
&with_ns_uri,
&mut schema_set,
)
.unwrap();
let res2 = resolve_all_directives(doc_with_ns, &mut resolver, &mut schema_set);
assert!(res2.is_ok());
let chameleon_id = res2.loaded[0];
assert_ne!(
raw_id, chameleon_id,
"Raw no-namespace document must not be reused for chameleon adoption"
);
let ns_name = schema_set.name_table.get("urn:test").unwrap();
assert_eq!(
schema_set.documents[chameleon_id as usize].target_namespace,
Some(ns_name),
"Chameleon copy should adopt urn:test namespace"
);
assert!(schema_set.documents[chameleon_id as usize].is_chameleon());
let _ = std::fs::remove_dir_all(&tmp);
}
}