1use std::collections::HashSet;
45use std::fmt::Debug;
46use std::path::{Path, PathBuf};
47#[cfg(feature = "async")]
48use std::pin::Pin;
49
50use crate::error::{SchemaError, SchemaResult};
51use crate::ids::{DocumentId, NameId};
52use crate::parser::parse::ParserConfig;
53use crate::schema::composition::{CompositionEdge, CompositionEdgeKind};
54use crate::SchemaSet;
55
56#[derive(Debug)]
58pub enum LoadOutcome {
59 Loaded(DocumentId),
61 AlreadyLoaded(DocumentId),
63 Cycle(String),
66}
67
68const UTF8_BOM: &[u8] = &[0xEF, 0xBB, 0xBF];
73const UTF16_LE_BOM: &[u8] = &[0xFF, 0xFE];
74const UTF16_BE_BOM: &[u8] = &[0xFE, 0xFF];
75
76#[derive(Copy, Clone)]
77enum Endian {
78 Le,
79 Be,
80}
81
82pub fn decode_xml_to_utf8_bytes(bytes: Vec<u8>) -> SchemaResult<Vec<u8>> {
89 if bytes.starts_with(UTF8_BOM) {
90 return Ok(bytes[UTF8_BOM.len()..].to_vec());
91 }
92 if bytes.starts_with(UTF16_LE_BOM) {
93 return Ok(decode_utf16(&bytes[UTF16_LE_BOM.len()..], Endian::Le)?.into_bytes());
94 }
95 if bytes.starts_with(UTF16_BE_BOM) {
96 return Ok(decode_utf16(&bytes[UTF16_BE_BOM.len()..], Endian::Be)?.into_bytes());
97 }
98 if let Some(endian) = sniff_utf16_no_bom(&bytes) {
99 return Ok(decode_utf16(&bytes, endian)?.into_bytes());
100 }
101 Ok(bytes)
102}
103
104pub fn decode_xml_bytes(bytes: Vec<u8>) -> SchemaResult<String> {
107 let utf8 = decode_xml_to_utf8_bytes(bytes)?;
108 String::from_utf8(utf8)
109 .map_err(|e| SchemaError::resolution(format!("Invalid UTF-8 content: {}", e)))
110}
111
112fn sniff_utf16_no_bom(bytes: &[u8]) -> Option<Endian> {
113 if bytes.len() < 4 {
117 return None;
118 }
119 match (bytes[0], bytes[1]) {
120 (0x3C, 0x00) if bytes[2] != 0x00 && bytes[3] == 0x00 => Some(Endian::Le),
121 (0x00, 0x3C) if bytes[2] == 0x00 && bytes[3] != 0x00 => Some(Endian::Be),
122 _ => None,
123 }
124}
125
126fn decode_utf16(bytes: &[u8], endian: Endian) -> SchemaResult<String> {
127 if !bytes.len().is_multiple_of(2) {
128 return Err(SchemaError::resolution(
129 "UTF-16 byte stream has an odd number of bytes".to_string(),
130 ));
131 }
132 let units: Vec<u16> = bytes
133 .chunks_exact(2)
134 .map(|c| match endian {
135 Endian::Le => u16::from_le_bytes([c[0], c[1]]),
136 Endian::Be => u16::from_be_bytes([c[0], c[1]]),
137 })
138 .collect();
139 String::from_utf16(&units)
140 .map_err(|e| SchemaError::resolution(format!("Invalid UTF-16 sequence: {}", e)))
141}
142
143pub trait SchemaLoader: Send + Sync + Debug {
152 fn load(&self, location: &str) -> SchemaResult<String>;
156
157 fn can_load(&self, location: &str) -> bool;
161
162 fn priority(&self) -> i32 {
166 0
167 }
168}
169
170#[derive(Debug, Clone, Default)]
174pub struct FileSystemLoader {
175 pub base_dir: Option<PathBuf>,
177}
178
179impl FileSystemLoader {
180 pub fn new() -> Self {
182 Self::default()
183 }
184
185 pub fn with_base_dir(base_dir: PathBuf) -> Self {
187 Self {
188 base_dir: Some(base_dir),
189 }
190 }
191}
192
193impl SchemaLoader for FileSystemLoader {
194 fn load(&self, location: &str) -> SchemaResult<String> {
195 let path = Path::new(location);
196 let bytes = std::fs::read(path).map_err(|e| {
197 SchemaError::resolution(format!("Failed to read file '{}': {}", location, e))
198 })?;
199 decode_xml_bytes(bytes)
200 }
201
202 fn can_load(&self, location: &str) -> bool {
203 !location.starts_with("http://")
204 && !location.starts_with("https://")
205 && !location.starts_with("embedded://")
206 }
207
208 fn priority(&self) -> i32 {
209 0
210 }
211}
212
213#[derive(Debug, Clone, Default)]
218pub struct EmbeddedLoader;
219
220impl EmbeddedLoader {
221 pub fn new() -> Self {
223 Self
224 }
225}
226
227impl SchemaLoader for EmbeddedLoader {
228 fn load(&self, location: &str) -> SchemaResult<String> {
229 if let Some(rest) = location.strip_prefix("embedded://") {
230 match rest {
231 "xml.xsd" => {
232 let bytes = crate::embedded::XML_XSD;
233 String::from_utf8(bytes.to_vec()).map_err(|e| {
234 SchemaError::resolution(format!("Invalid UTF-8 in embedded schema: {}", e))
235 })
236 }
237 "xlink.xsd" => {
238 let bytes = crate::embedded::XLINK_XSD;
239 String::from_utf8(bytes.to_vec()).map_err(|e| {
240 SchemaError::resolution(format!("Invalid UTF-8 in embedded schema: {}", e))
241 })
242 }
243 _ => Err(SchemaError::resolution(format!(
244 "Unknown embedded schema: {}",
245 rest
246 ))),
247 }
248 } else {
249 Err(SchemaError::resolution(format!(
250 "Not an embedded location: {}",
251 location
252 )))
253 }
254 }
255
256 fn can_load(&self, location: &str) -> bool {
257 location.starts_with("embedded://")
258 }
259
260 fn priority(&self) -> i32 {
261 100 }
263}
264
265#[derive(Debug, Default)]
270pub struct LoaderChain {
271 loaders: Vec<Box<dyn SchemaLoader>>,
272}
273
274impl LoaderChain {
275 pub fn new() -> Self {
277 Self {
278 loaders: Vec::new(),
279 }
280 }
281
282 pub fn with_defaults() -> Self {
284 let mut chain = Self::new();
285 chain.add(Box::new(EmbeddedLoader::new()));
286 chain.add(Box::new(FileSystemLoader::new()));
287 chain
288 }
289
290 pub fn add(&mut self, loader: Box<dyn SchemaLoader>) {
294 self.loaders.push(loader);
295 self.loaders
296 .sort_by_key(|b| std::cmp::Reverse(b.priority()));
297 }
298
299 pub fn len(&self) -> usize {
301 self.loaders.len()
302 }
303
304 pub fn is_empty(&self) -> bool {
306 self.loaders.is_empty()
307 }
308}
309
310impl SchemaLoader for LoaderChain {
311 fn load(&self, location: &str) -> SchemaResult<String> {
312 for loader in &self.loaders {
313 if loader.can_load(location) {
314 return loader.load(location);
315 }
316 }
317 Err(SchemaError::resolution(format!(
318 "No loader available for: {}",
319 location
320 )))
321 }
322
323 fn can_load(&self, location: &str) -> bool {
324 self.loaders.iter().any(|l| l.can_load(location))
325 }
326
327 fn priority(&self) -> i32 {
328 self.loaders.iter().map(|l| l.priority()).max().unwrap_or(0)
330 }
331}
332
333pub struct SchemaResolver {
342 pub config: ResolverConfig,
344 resolving: HashSet<String>,
346 catalog: SchemaCatalog,
348 loader: Box<dyn SchemaLoader>,
350 #[cfg(feature = "async")]
355 async_loader: Option<Box<dyn AsyncSchemaLoader>>,
356}
357
358#[derive(Debug, Clone)]
360pub struct ResolverConfig {
361 pub base_dir: Option<PathBuf>,
363 pub allow_network: bool,
365 pub max_depth: usize,
367 pub parser_config: ParserConfig,
369}
370
371impl Default for ResolverConfig {
372 fn default() -> Self {
373 Self {
374 base_dir: None,
375 allow_network: false,
376 max_depth: 100,
377 parser_config: ParserConfig::default(),
378 }
379 }
380}
381
382#[derive(Debug, Clone, Default)]
384pub struct SchemaCatalog {
385 entries: Vec<CatalogEntry>,
387}
388
389#[derive(Debug, Clone)]
391pub struct CatalogEntry {
392 pub namespace: String,
394 pub location: String,
396}
397
398impl SchemaCatalog {
399 pub fn new() -> Self {
401 Self::default()
402 }
403
404 pub fn add(&mut self, namespace: impl Into<String>, location: impl Into<String>) {
406 self.entries.push(CatalogEntry {
407 namespace: namespace.into(),
408 location: location.into(),
409 });
410 }
411
412 pub fn lookup(&self, namespace: &str) -> Option<&str> {
414 self.entries
415 .iter()
416 .find(|e| e.namespace == namespace)
417 .map(|e| e.location.as_str())
418 }
419
420 pub fn add_xml_catalog(&mut self) {
425 self.add("http://www.w3.org/XML/1998/namespace", "embedded://xml.xsd");
427
428 self.add("http://www.w3.org/1999/xlink", "embedded://xlink.xsd");
430
431 self.add(
434 "http://www.w3.org/2001/XMLSchema-instance",
435 "http://www.w3.org/2001/XMLSchema-instance.xsd",
436 );
437 }
438}
439
440impl SchemaResolver {
441 pub fn new() -> Self {
447 Self {
448 config: ResolverConfig::default(),
449 resolving: HashSet::new(),
450 catalog: SchemaCatalog::new(),
451 loader: Box::new(LoaderChain::with_defaults()),
452 #[cfg(feature = "async")]
453 async_loader: None,
454 }
455 }
456
457 pub fn with_config(config: ResolverConfig) -> Self {
461 Self {
462 config,
463 resolving: HashSet::new(),
464 catalog: SchemaCatalog::new(),
465 loader: Box::new(LoaderChain::with_defaults()),
466 #[cfg(feature = "async")]
467 async_loader: None,
468 }
469 }
470
471 pub fn with_loader(loader: Box<dyn SchemaLoader>) -> Self {
481 Self {
482 config: ResolverConfig::default(),
483 resolving: HashSet::new(),
484 catalog: SchemaCatalog::new(),
485 loader,
486 #[cfg(feature = "async")]
487 async_loader: None,
488 }
489 }
490
491 pub fn with_config_and_loader(config: ResolverConfig, loader: Box<dyn SchemaLoader>) -> Self {
493 Self {
494 config,
495 resolving: HashSet::new(),
496 catalog: SchemaCatalog::new(),
497 loader,
498 #[cfg(feature = "async")]
499 async_loader: None,
500 }
501 }
502
503 #[cfg(feature = "async")]
508 pub fn with_async_loader(async_loader: Box<dyn AsyncSchemaLoader>) -> Self {
509 Self {
510 config: ResolverConfig::default(),
511 resolving: HashSet::new(),
512 catalog: SchemaCatalog::new(),
513 loader: Box::new(LoaderChain::with_defaults()),
514 async_loader: Some(async_loader),
515 }
516 }
517
518 #[cfg(feature = "async")]
520 pub fn with_config_and_async_loader(
521 config: ResolverConfig,
522 async_loader: Box<dyn AsyncSchemaLoader>,
523 ) -> Self {
524 Self {
525 config,
526 resolving: HashSet::new(),
527 catalog: SchemaCatalog::new(),
528 loader: Box::new(LoaderChain::with_defaults()),
529 async_loader: Some(async_loader),
530 }
531 }
532
533 pub fn catalog_mut(&mut self) -> &mut SchemaCatalog {
535 &mut self.catalog
536 }
537
538 pub fn resolve_location(&self, schema_location: &str, base_uri: &str) -> SchemaResult<String> {
540 if is_absolute_uri(schema_location) {
542 return Ok(schema_location.to_string());
543 }
544
545 let resolved = resolve_relative_uri(schema_location, base_uri)?;
547 Ok(resolved)
548 }
549
550 pub fn load_schema(
558 &mut self,
559 location: &str,
560 base_uri: &str,
561 schema_set: &mut SchemaSet,
562 chameleon_namespace: Option<NameId>,
563 ) -> SchemaResult<LoadOutcome> {
564 let resolved = self.resolve_location(location, base_uri)?;
566
567 if let Some(id) = check_loaded_cache(schema_set, &resolved, chameleon_namespace) {
569 return Ok(LoadOutcome::AlreadyLoaded(id));
570 }
571
572 if self.resolving.contains(&resolved) {
574 return Ok(LoadOutcome::Cycle(resolved));
576 }
577
578 self.resolving.insert(resolved.clone());
580
581 let content = match self.load_content(&resolved) {
583 Ok(c) => c,
584 Err(e) => {
585 self.resolving.remove(&resolved);
586 return Err(e);
587 }
588 };
589
590 let doc_id = match crate::parser::parse::parse_schema_with_chameleon(
593 content.as_bytes(),
594 &resolved,
595 schema_set,
596 &self.config.parser_config,
597 chameleon_namespace,
598 ) {
599 Ok(id) => id,
600 Err(e) => {
601 self.resolving.remove(&resolved);
602 return Err(e);
603 }
604 };
605
606 mark_loaded_chameleon_aware(schema_set, &resolved, doc_id, chameleon_namespace);
608
609 self.resolving.remove(&resolved);
611
612 Ok(LoadOutcome::Loaded(doc_id))
613 }
614
615 pub fn load_content(&self, location: &str) -> SchemaResult<String> {
619 if (location.starts_with("http://") || location.starts_with("https://"))
621 && !self.config.allow_network
622 {
623 return Err(SchemaError::resolution(format!(
624 "Network access not allowed for: {}",
625 location
626 )));
627 }
628
629 self.loader.load(location)
631 }
632
633 pub fn process_include(
638 &mut self,
639 schema_location: &str,
640 base_uri: &str,
641 target_namespace: Option<NameId>,
642 schema_set: &mut SchemaSet,
643 ) -> SchemaResult<LoadOutcome> {
644 self.load_schema(schema_location, base_uri, schema_set, target_namespace)
645 }
646
647 pub fn process_import(
660 &mut self,
661 namespace: Option<&str>,
662 schema_location: Option<&str>,
663 base_uri: &str,
664 schema_set: &mut SchemaSet,
665 ) -> SchemaResult<Option<LoadOutcome>> {
666 if let Some(location) = schema_location {
681 match self.load_schema(location, base_uri, schema_set, None) {
682 Ok(outcome) => {
683 validate_import_target_namespace(schema_set, &outcome, namespace)?;
684 return Ok(Some(outcome));
685 }
686 Err(load_err) => {
687 if let Some(cat_loc) = namespace.and_then(|ns| self.catalog.lookup(ns)) {
688 let cat_loc = cat_loc.to_string();
689 return self.try_catalog_load(&cat_loc, base_uri, namespace, schema_set);
690 }
691 return Err(load_err);
692 }
693 }
694 }
695
696 if let Some(cat_loc) = namespace.and_then(|ns| self.catalog.lookup(ns)) {
697 let cat_loc = cat_loc.to_string();
698 return self.try_catalog_load(&cat_loc, base_uri, namespace, schema_set);
699 }
700
701 Ok(None)
704 }
705
706 fn try_catalog_load(
714 &mut self,
715 catalog_location: &str,
716 base_uri: &str,
717 namespace: Option<&str>,
718 schema_set: &mut SchemaSet,
719 ) -> SchemaResult<Option<LoadOutcome>> {
720 let already_loaded = self
721 .resolve_location(catalog_location, base_uri)
722 .ok()
723 .is_some_and(|r| schema_set.loaded_locations.contains_key(&r));
724 if already_loaded {
725 return Ok(None);
726 }
727 if namespace_already_covered(schema_set, namespace) {
728 return Ok(None);
729 }
730 let outcome = self.load_schema(catalog_location, base_uri, schema_set, None)?;
731 validate_import_target_namespace(schema_set, &outcome, namespace)?;
732 Ok(Some(outcome))
733 }
734
735 pub fn process_redefine(
740 &mut self,
741 schema_location: &str,
742 base_uri: &str,
743 target_namespace: Option<NameId>,
744 schema_set: &mut SchemaSet,
745 ) -> SchemaResult<LoadOutcome> {
746 self.load_schema(schema_location, base_uri, schema_set, target_namespace)
747 }
748
749 #[cfg(feature = "xsd11")]
754 pub fn process_override(
755 &mut self,
756 schema_location: &str,
757 base_uri: &str,
758 target_namespace: Option<NameId>,
759 schema_set: &mut SchemaSet,
760 ) -> SchemaResult<LoadOutcome> {
761 self.load_schema(schema_location, base_uri, schema_set, target_namespace)
762 }
763}
764
765impl Default for SchemaResolver {
766 fn default() -> Self {
767 Self::new()
768 }
769}
770
771fn namespace_already_covered(schema_set: &SchemaSet, namespace: Option<&str>) -> bool {
788 let Some(ns_str) = namespace else {
789 return false;
790 };
791 let Some(ns_id) = schema_set.name_table.get(ns_str) else {
792 return false;
793 };
794 schema_set.documents.iter().any(|d| {
795 d.declared_target_namespace == Some(ns_id) || d.target_namespace == Some(ns_id)
796 })
797}
798
799fn validate_import_target_namespace(
800 schema_set: &SchemaSet,
801 outcome: &LoadOutcome,
802 namespace: Option<&str>,
803) -> SchemaResult<()> {
804 let doc_id = match outcome {
805 LoadOutcome::Loaded(id) | LoadOutcome::AlreadyLoaded(id) => *id,
806 LoadOutcome::Cycle(_) => return Ok(()),
807 };
808 let Some(doc) = schema_set.documents.get(doc_id as usize) else {
809 return Ok(());
810 };
811 let imported_tns = doc
812 .target_namespace
813 .map(|n| schema_set.name_table.resolve_ref(n));
814 if namespace == imported_tns {
815 return Ok(());
816 }
817 let msg = match (namespace, imported_tns) {
818 (None, Some(tns)) => format!(
819 "Import directive has no namespace attribute, but imported schema has \
820 targetNamespace='{}' (src-import clause 1.1 requires absent targetNamespace)",
821 tns
822 ),
823 (Some(ns), None) => format!(
824 "Import directive namespace='{}' does not match imported schema's absent \
825 targetNamespace (src-import clause 1.2)",
826 ns
827 ),
828 (Some(ns), Some(tns)) => format!(
829 "Import directive namespace='{}' does not match imported schema's \
830 targetNamespace='{}' (src-import clause 1.2)",
831 ns, tns
832 ),
833 (None, None) => unreachable!("handled by early return above"),
834 };
835 Err(SchemaError::structural("src-import", msg, None))
836}
837
838#[cfg(feature = "async")]
854pub trait AsyncSchemaLoader: Send + Sync + Debug {
855 fn load_async(
857 &self,
858 location: &str,
859 ) -> Pin<Box<dyn std::future::Future<Output = SchemaResult<String>> + Send + '_>>;
860
861 fn can_load(&self, location: &str) -> bool;
863}
864
865#[cfg(feature = "async")]
866impl SchemaResolver {
867 pub async fn load_content_async(&self, location: &str) -> SchemaResult<String> {
873 if (location.starts_with("http://") || location.starts_with("https://"))
875 && !self.config.allow_network
876 {
877 return Err(SchemaError::resolution(format!(
878 "Network access not allowed for: {}",
879 location
880 )));
881 }
882
883 if let Some(ref async_loader) = self.async_loader {
886 if async_loader.can_load(location) {
887 return async_loader.load_async(location).await;
888 }
889 }
890 self.loader.load(location)
891 }
892
893 pub async fn load_schema_async(
898 &mut self,
899 location: &str,
900 base_uri: &str,
901 schema_set: &mut SchemaSet,
902 chameleon_namespace: Option<NameId>,
903 ) -> SchemaResult<LoadOutcome> {
904 let resolved = self.resolve_location(location, base_uri)?;
906
907 if let Some(id) = check_loaded_cache(schema_set, &resolved, chameleon_namespace) {
909 return Ok(LoadOutcome::AlreadyLoaded(id));
910 }
911
912 if self.resolving.contains(&resolved) {
914 return Ok(LoadOutcome::Cycle(resolved));
915 }
916
917 self.resolving.insert(resolved.clone());
919
920 let content = match self.load_content_async(&resolved).await {
922 Ok(c) => c,
923 Err(e) => {
924 self.resolving.remove(&resolved);
925 return Err(e);
926 }
927 };
928
929 let doc_id = match crate::parser::parse::parse_schema_with_chameleon(
932 content.as_bytes(),
933 &resolved,
934 schema_set,
935 &self.config.parser_config,
936 chameleon_namespace,
937 ) {
938 Ok(id) => id,
939 Err(e) => {
940 self.resolving.remove(&resolved);
941 return Err(e);
942 }
943 };
944
945 mark_loaded_chameleon_aware(schema_set, &resolved, doc_id, chameleon_namespace);
947
948 self.resolving.remove(&resolved);
950
951 Ok(LoadOutcome::Loaded(doc_id))
952 }
953}
954
955#[cfg(feature = "async")]
959pub async fn resolve_all_directives_async(
960 doc_id: DocumentId,
961 resolver: &mut SchemaResolver,
962 schema_set: &mut SchemaSet,
963) -> ResolutionResult {
964 let mut result = ResolutionResult::default();
965
966 let doc = match schema_set.documents.get(doc_id as usize) {
968 Some(d) => d,
969 None => {
970 result.errors.push(SchemaError::internal(format!(
971 "Document {} not found",
972 doc_id
973 )));
974 return result;
975 }
976 };
977
978 let base_uri = doc.base_uri.clone();
979 let target_namespace = doc.target_namespace;
980
981 let includes: Vec<_> = doc.includes.to_vec();
983 let imports: Vec<_> = doc.imports.to_vec();
984 let redefines: Vec<_> = doc.redefines.to_vec();
985 #[cfg(feature = "xsd11")]
986 let overrides: Vec<_> = doc.overrides.to_vec();
987
988 for (i, include) in includes.iter().enumerate() {
990 match resolver
991 .load_schema_async(
992 &include.schema_location,
993 &base_uri,
994 schema_set,
995 target_namespace,
996 )
997 .await
998 {
999 Ok(ref outcome) => {
1000 if let LoadOutcome::Loaded(id) | LoadOutcome::AlreadyLoaded(id) = outcome {
1001 result.loaded.push(*id);
1002 schema_set.documents[doc_id as usize].includes[i].resolved_doc_id = Some(*id);
1003 } else {
1004 result.skipped.push(include.schema_location.clone());
1005 }
1006 record_edge(
1007 schema_set,
1008 doc_id,
1009 outcome,
1010 CompositionEdgeKind::Include,
1011 include.source.as_ref(),
1012 &include.schema_location,
1013 );
1014 }
1015 Err(e) => result.errors.push(e),
1016 }
1017 }
1018
1019 for (i, import) in imports.iter().enumerate() {
1021 let catalog_location = import
1023 .namespace
1024 .as_deref()
1025 .and_then(|ns| resolver.catalog.lookup(ns).map(|l| l.to_string()));
1026
1027 if let Some(location) = catalog_location {
1028 let catalog_already_loaded = resolver
1029 .resolve_location(&location, &base_uri)
1030 .ok()
1031 .is_some_and(|r| schema_set.loaded_locations.contains_key(&r));
1032 if catalog_already_loaded {
1033 continue;
1034 }
1035 match resolver
1036 .load_schema_async(&location, &base_uri, schema_set, None)
1037 .await
1038 {
1039 Ok(ref outcome) => {
1040 if let LoadOutcome::Loaded(id) | LoadOutcome::AlreadyLoaded(id) = outcome {
1041 result.loaded.push(*id);
1042 schema_set.documents[doc_id as usize].imports[i].resolved_doc_id =
1043 Some(*id);
1044 } else {
1045 result.skipped.push(location.clone());
1046 }
1047 record_edge(
1048 schema_set,
1049 doc_id,
1050 outcome,
1051 CompositionEdgeKind::Import,
1052 import.source.as_ref(),
1053 &location,
1054 );
1055 }
1056 Err(e) => result.import_errors.push(e),
1057 }
1058 } else if let Some(location) = import.schema_location.as_deref() {
1059 match resolver
1060 .load_schema_async(location, &base_uri, schema_set, None)
1061 .await
1062 {
1063 Ok(ref outcome) => {
1064 if let LoadOutcome::Loaded(id) | LoadOutcome::AlreadyLoaded(id) = outcome {
1065 result.loaded.push(*id);
1066 schema_set.documents[doc_id as usize].imports[i].resolved_doc_id =
1067 Some(*id);
1068 } else {
1069 result.skipped.push(location.to_string());
1070 }
1071 record_edge(
1072 schema_set,
1073 doc_id,
1074 outcome,
1075 CompositionEdgeKind::Import,
1076 import.source.as_ref(),
1077 location,
1078 );
1079 }
1080 Err(e) => result.import_errors.push(e),
1081 }
1082 }
1083 }
1084
1085 for (i, redefine) in redefines.iter().enumerate() {
1087 match resolver
1088 .load_schema_async(
1089 &redefine.schema_location,
1090 &base_uri,
1091 schema_set,
1092 target_namespace,
1093 )
1094 .await
1095 {
1096 Ok(ref outcome) => {
1097 if let LoadOutcome::Loaded(id) | LoadOutcome::AlreadyLoaded(id) = outcome {
1098 result.loaded.push(*id);
1099 schema_set.documents[doc_id as usize].redefines[i].resolved_doc_id = Some(*id);
1100 } else {
1101 result.skipped.push(redefine.schema_location.clone());
1102 }
1103 record_edge(
1104 schema_set,
1105 doc_id,
1106 outcome,
1107 CompositionEdgeKind::Redefine,
1108 redefine.source.as_ref(),
1109 &redefine.schema_location,
1110 );
1111 }
1112 Err(e) => result.errors.push(e),
1113 }
1114 }
1115
1116 #[cfg(feature = "xsd11")]
1118 for (i, override_dir) in overrides.iter().enumerate() {
1119 match resolver
1120 .load_schema_async(
1121 &override_dir.schema_location,
1122 &base_uri,
1123 schema_set,
1124 target_namespace,
1125 )
1126 .await
1127 {
1128 Ok(ref outcome) => {
1129 if let LoadOutcome::Loaded(id) | LoadOutcome::AlreadyLoaded(id) = outcome {
1130 result.loaded.push(*id);
1131 schema_set.documents[doc_id as usize].overrides[i].resolved_doc_id = Some(*id);
1132 } else {
1133 result.skipped.push(override_dir.schema_location.clone());
1134 }
1135 record_edge(
1136 schema_set,
1137 doc_id,
1138 outcome,
1139 CompositionEdgeKind::Override,
1140 override_dir.source.as_ref(),
1141 &override_dir.schema_location,
1142 );
1143 }
1144 Err(e) => result.errors.push(e),
1145 }
1146 }
1147
1148 result
1149}
1150
1151fn is_absolute_uri(uri: &str) -> bool {
1153 uri.starts_with("http://")
1155 || uri.starts_with("https://")
1156 || uri.starts_with("file://")
1157 || uri.starts_with("embedded://")
1158 || (cfg!(windows) && uri.len() >= 2 && &uri[1..2] == ":")
1159 || uri.starts_with('/')
1160}
1161
1162fn resolve_relative_uri(relative: &str, base: &str) -> SchemaResult<String> {
1164 if base.starts_with("http://") || base.starts_with("https://") {
1166 resolve_relative_url(relative, base)
1168 } else {
1169 resolve_relative_path(relative, base)
1171 }
1172}
1173
1174fn resolve_relative_url(relative: &str, base: &str) -> SchemaResult<String> {
1176 let base_without_file = if let Some(pos) = base.rfind('/') {
1178 if pos > base.find("://").map_or(0, |p| p + 2) {
1180 &base[..=pos]
1181 } else {
1182 base
1183 }
1184 } else {
1185 base
1186 };
1187
1188 Ok(format!("{}{}", base_without_file, relative))
1189}
1190
1191fn resolve_relative_path(relative: &str, base: &str) -> SchemaResult<String> {
1193 let base_path = Path::new(base);
1194 let base_dir = base_path.parent().unwrap_or(Path::new("."));
1195 let resolved = base_dir.join(relative);
1196
1197 let normalized = normalize_path(&resolved);
1199
1200 Ok(normalized.to_string_lossy().into_owned())
1201}
1202
1203fn normalize_path(path: &Path) -> PathBuf {
1205 let mut result = PathBuf::new();
1206
1207 for component in path.components() {
1208 match component {
1209 std::path::Component::ParentDir => {
1210 result.pop();
1211 }
1212 std::path::Component::CurDir => {
1213 }
1215 _ => {
1216 result.push(component);
1217 }
1218 }
1219 }
1220
1221 result
1222}
1223
1224#[derive(Debug, Default)]
1226pub struct ResolutionResult {
1227 pub loaded: Vec<DocumentId>,
1229 pub errors: Vec<SchemaError>,
1231 pub import_errors: Vec<SchemaError>,
1233 pub skipped: Vec<String>,
1235}
1236
1237impl ResolutionResult {
1238 pub fn is_ok(&self) -> bool {
1240 self.errors.is_empty() && self.import_errors.is_empty()
1241 }
1242
1243 pub fn has_loaded(&self) -> bool {
1245 !self.loaded.is_empty()
1246 }
1247}
1248
1249fn record_edge(
1254 schema_set: &mut SchemaSet,
1255 source_doc: DocumentId,
1256 outcome: &LoadOutcome,
1257 kind: CompositionEdgeKind,
1258 source: Option<&crate::parser::location::SourceRef>,
1259 schema_location: &str,
1260) {
1261 let (target_doc, resolved_location) = match outcome {
1262 LoadOutcome::Loaded(id) | LoadOutcome::AlreadyLoaded(id) => {
1263 let loc = schema_set.documents[*id as usize].base_uri.clone();
1265 (Some(*id), loc)
1266 }
1267 LoadOutcome::Cycle(resolved) => (None, resolved.clone()),
1268 };
1269 schema_set.composition_edges.push(CompositionEdge {
1270 source_doc,
1271 target_doc,
1272 resolved_location,
1273 kind,
1274 source: source.cloned(),
1275 schema_location: schema_location.to_string(),
1276 });
1277}
1278
1279fn check_loaded_cache(
1285 schema_set: &SchemaSet,
1286 resolved: &str,
1287 chameleon_namespace: Option<NameId>,
1288) -> Option<DocumentId> {
1289 if let Some(ns) = chameleon_namespace {
1291 if let Some(&id) = schema_set.chameleon_cache.get(&(resolved.to_owned(), ns)) {
1292 return Some(id);
1293 }
1294 }
1295 if let Some(&id) = schema_set.loaded_locations.get(resolved) {
1297 let reusable = schema_set.documents.get(id as usize).is_none_or(|doc| {
1298 if doc.is_chameleon() {
1299 false
1300 } else if doc.target_namespace.is_some() {
1301 true
1302 } else {
1303 chameleon_namespace.is_none()
1306 }
1307 });
1308 if reusable {
1309 return Some(id);
1310 }
1311 }
1312 None
1313}
1314
1315fn mark_loaded_chameleon_aware(
1322 schema_set: &mut SchemaSet,
1323 resolved: &str,
1324 doc_id: DocumentId,
1325 chameleon_namespace: Option<NameId>,
1326) {
1327 let doc_is_chameleon = schema_set
1328 .documents
1329 .get(doc_id as usize)
1330 .is_some_and(|doc| doc.is_chameleon());
1331 if doc_is_chameleon {
1332 if let Some(ns) = chameleon_namespace {
1333 schema_set
1334 .chameleon_cache
1335 .insert((resolved.to_owned(), ns), doc_id);
1336 }
1337 }
1338 if !schema_set.loaded_locations.contains_key(resolved) {
1339 schema_set.mark_loaded(resolved.to_owned(), doc_id);
1340 }
1341}
1342
1343pub fn fixup_composition_edges(schema_set: &mut SchemaSet) {
1346 for edge in &mut schema_set.composition_edges {
1347 if edge.target_doc.is_none() {
1348 edge.target_doc = schema_set
1349 .loaded_locations
1350 .get(&edge.resolved_location)
1351 .copied();
1352 }
1353 }
1354}
1355
1356pub fn resolve_all_directives(
1358 doc_id: DocumentId,
1359 resolver: &mut SchemaResolver,
1360 schema_set: &mut SchemaSet,
1361) -> ResolutionResult {
1362 let mut result = ResolutionResult::default();
1363
1364 let doc = match schema_set.documents.get(doc_id as usize) {
1366 Some(d) => d,
1367 None => {
1368 result.errors.push(SchemaError::internal(format!(
1369 "Document {} not found",
1370 doc_id
1371 )));
1372 return result;
1373 }
1374 };
1375
1376 let base_uri = doc.base_uri.clone();
1377 let target_namespace = doc.target_namespace;
1378
1379 let includes: Vec<_> = doc.includes.to_vec();
1381 let imports: Vec<_> = doc.imports.to_vec();
1382 let redefines: Vec<_> = doc.redefines.to_vec();
1383 #[cfg(feature = "xsd11")]
1384 let overrides: Vec<_> = doc.overrides.to_vec();
1385
1386 for (i, include) in includes.iter().enumerate() {
1388 match resolver.process_include(
1389 &include.schema_location,
1390 &base_uri,
1391 target_namespace,
1392 schema_set,
1393 ) {
1394 Ok(ref outcome) => {
1395 match outcome {
1396 LoadOutcome::Loaded(id) | LoadOutcome::AlreadyLoaded(id) => {
1397 schema_set.documents[doc_id as usize].includes[i].resolved_doc_id =
1398 Some(*id);
1399 if matches!(outcome, LoadOutcome::Loaded(_)) {
1400 result.loaded.push(*id);
1401 }
1402 if target_namespace.is_none() {
1406 let included_declared = schema_set
1407 .documents
1408 .get(*id as usize)
1409 .and_then(|d| d.declared_target_namespace);
1410 if let Some(declared) = included_declared {
1411 let location = include
1412 .source
1413 .as_ref()
1414 .and_then(|s| schema_set.source_maps.locate(s));
1415 let declared_str =
1416 schema_set.name_table.resolve(declared).to_string();
1417 result.errors.push(SchemaError::structural(
1418 "src-include",
1419 format!(
1420 "Included schema has targetNamespace '{}' \
1421 but the including schema has no \
1422 targetNamespace",
1423 declared_str
1424 ),
1425 location,
1426 ));
1427 }
1428 }
1429 }
1430 _ => {
1431 result.skipped.push(include.schema_location.clone());
1432 }
1433 }
1434 record_edge(
1435 schema_set,
1436 doc_id,
1437 outcome,
1438 CompositionEdgeKind::Include,
1439 include.source.as_ref(),
1440 &include.schema_location,
1441 );
1442 }
1443 Err(e) => result.errors.push(e),
1444 }
1445 }
1446
1447 for (i, import) in imports.iter().enumerate() {
1449 if schema_set.is_xsd10() {
1454 if let Some(import_ns_str) = import.namespace.as_deref() {
1455 let tns_str = target_namespace.map(|n| schema_set.name_table.resolve(n));
1456 if Some(import_ns_str) == tns_str.as_deref() {
1457 result.errors.push(SchemaError::structural(
1458 "src-import",
1459 format!(
1460 "xs:import namespace '{}' must not equal the enclosing \
1461 schema's targetNamespace in XSD 1.0",
1462 import_ns_str
1463 ),
1464 import
1465 .source
1466 .as_ref()
1467 .and_then(|s| schema_set.source_maps.locate(s)),
1468 ));
1469 continue;
1470 }
1471 }
1472 }
1473 if import.namespace.is_none() && target_namespace.is_none() {
1477 result.errors.push(SchemaError::structural(
1478 "src-import",
1479 "xs:import without 'namespace' requires the enclosing schema to have \
1480 a 'targetNamespace' attribute",
1481 import
1482 .source
1483 .as_ref()
1484 .and_then(|s| schema_set.source_maps.locate(s)),
1485 ));
1486 continue;
1487 }
1488 match resolver.process_import(
1489 import.namespace.as_deref(),
1490 import.schema_location.as_deref(),
1491 &base_uri,
1492 schema_set,
1493 ) {
1494 Ok(Some(ref outcome)) => {
1495 match outcome {
1496 LoadOutcome::Loaded(id) => {
1497 result.loaded.push(*id);
1498 schema_set.documents[doc_id as usize].imports[i].resolved_doc_id =
1499 Some(*id);
1500 }
1501 LoadOutcome::AlreadyLoaded(id) => {
1502 schema_set.documents[doc_id as usize].imports[i].resolved_doc_id =
1504 Some(*id);
1505 }
1506 _ => {
1507 if let Some(loc) = &import.schema_location {
1508 result.skipped.push(loc.clone());
1509 }
1510 }
1511 }
1512 record_edge(
1513 schema_set,
1514 doc_id,
1515 outcome,
1516 CompositionEdgeKind::Import,
1517 import.source.as_ref(),
1518 import.schema_location.as_deref().unwrap_or_default(),
1519 );
1520 }
1521 Ok(None) => {
1522 }
1524 Err(e) => result.import_errors.push(e),
1525 }
1526 }
1527
1528 for (i, redefine) in redefines.iter().enumerate() {
1530 match resolver.process_redefine(
1531 &redefine.schema_location,
1532 &base_uri,
1533 target_namespace,
1534 schema_set,
1535 ) {
1536 Ok(ref outcome) => {
1537 match outcome {
1538 LoadOutcome::Loaded(id) => {
1539 result.loaded.push(*id);
1540 schema_set.documents[doc_id as usize].redefines[i].resolved_doc_id =
1541 Some(*id);
1542 }
1543 LoadOutcome::AlreadyLoaded(id) => {
1544 schema_set.documents[doc_id as usize].redefines[i].resolved_doc_id =
1545 Some(*id);
1546 }
1547 _ => {
1548 result.skipped.push(redefine.schema_location.clone());
1549 }
1550 }
1551 record_edge(
1552 schema_set,
1553 doc_id,
1554 outcome,
1555 CompositionEdgeKind::Redefine,
1556 redefine.source.as_ref(),
1557 &redefine.schema_location,
1558 );
1559 }
1560 Err(e) => result.errors.push(e),
1561 }
1562 }
1563
1564 #[cfg(feature = "xsd11")]
1566 for (i, override_dir) in overrides.iter().enumerate() {
1567 match resolver.process_override(
1568 &override_dir.schema_location,
1569 &base_uri,
1570 target_namespace,
1571 schema_set,
1572 ) {
1573 Ok(ref outcome) => {
1574 match outcome {
1575 LoadOutcome::Loaded(id) => {
1576 result.loaded.push(*id);
1577 schema_set.documents[doc_id as usize].overrides[i].resolved_doc_id =
1578 Some(*id);
1579 }
1580 LoadOutcome::AlreadyLoaded(id) => {
1581 schema_set.documents[doc_id as usize].overrides[i].resolved_doc_id =
1582 Some(*id);
1583 }
1584 _ => {
1585 result.skipped.push(override_dir.schema_location.clone());
1586 }
1587 }
1588 record_edge(
1589 schema_set,
1590 doc_id,
1591 outcome,
1592 CompositionEdgeKind::Override,
1593 override_dir.source.as_ref(),
1594 &override_dir.schema_location,
1595 );
1596 }
1597 Err(e) => result.errors.push(e),
1598 }
1599 }
1600
1601 result
1602}
1603
1604#[cfg(test)]
1605mod tests {
1606 use super::*;
1607
1608 #[test]
1609 fn test_is_absolute_uri() {
1610 assert!(is_absolute_uri("http://example.com/schema.xsd"));
1611 assert!(is_absolute_uri("https://example.com/schema.xsd"));
1612 assert!(is_absolute_uri("/absolute/path/schema.xsd"));
1613 assert!(!is_absolute_uri("relative/path/schema.xsd"));
1614 assert!(!is_absolute_uri("../parent/schema.xsd"));
1615 }
1616
1617 #[test]
1618 fn test_resolve_relative_path() {
1619 let resolved = resolve_relative_path("types.xsd", "/home/user/schema.xsd").unwrap();
1620 assert!(resolved.contains("types.xsd"));
1621 }
1622
1623 #[test]
1624 fn test_resolve_relative_path_parent() {
1625 let resolved =
1626 resolve_relative_path("../common/types.xsd", "/home/user/schemas/main.xsd").unwrap();
1627 assert!(resolved.contains("common"));
1629 assert!(resolved.contains("types.xsd"));
1630 }
1631
1632 #[test]
1633 fn test_resolve_relative_url() {
1634 let resolved =
1635 resolve_relative_url("types.xsd", "http://example.com/schemas/main.xsd").unwrap();
1636 assert_eq!(resolved, "http://example.com/schemas/types.xsd");
1637 }
1638
1639 #[test]
1640 fn test_catalog_lookup() {
1641 let mut catalog = SchemaCatalog::new();
1642 catalog.add("http://example.com/ns", "/path/to/schema.xsd");
1643
1644 assert_eq!(
1645 catalog.lookup("http://example.com/ns"),
1646 Some("/path/to/schema.xsd")
1647 );
1648 assert_eq!(catalog.lookup("http://other.com/ns"), None);
1649 }
1650
1651 #[test]
1652 fn test_resolver_config_default() {
1653 let config = ResolverConfig::default();
1654 assert!(!config.allow_network);
1655 assert_eq!(config.max_depth, 100);
1656 }
1657
1658 #[test]
1659 fn test_resolver_new() {
1660 let resolver = SchemaResolver::new();
1661 assert!(resolver.resolving.is_empty());
1662 }
1663
1664 #[test]
1665 fn test_normalize_path() {
1666 let path = Path::new("/home/user/../other/./schema.xsd");
1667 let normalized = normalize_path(path);
1668 assert!(!normalized.to_string_lossy().contains(".."));
1669 assert!(!normalized.to_string_lossy().contains("./"));
1670 }
1671
1672 #[test]
1673 fn test_resolution_result_default() {
1674 let result = ResolutionResult::default();
1675 assert!(result.is_ok());
1676 assert!(!result.has_loaded());
1677 }
1678
1679 #[test]
1680 fn test_catalog_xml_namespaces() {
1681 let mut catalog = SchemaCatalog::new();
1682 catalog.add_xml_catalog();
1683
1684 assert_eq!(
1685 catalog.lookup("http://www.w3.org/XML/1998/namespace"),
1686 Some("embedded://xml.xsd")
1687 );
1688 assert!(catalog
1689 .lookup("http://www.w3.org/2001/XMLSchema-instance")
1690 .is_some());
1691 }
1692
1693 #[test]
1694 fn test_embedded_loader() {
1695 let loader = EmbeddedLoader::new();
1696
1697 assert!(loader.can_load("embedded://xml.xsd"));
1699 assert!(!loader.can_load("/path/to/file.xsd"));
1700 assert!(!loader.can_load("http://example.com/schema.xsd"));
1701
1702 let content = loader.load("embedded://xml.xsd").unwrap();
1704 assert!(content.contains("targetNamespace=\"http://www.w3.org/XML/1998/namespace\""));
1705
1706 assert!(loader.load("embedded://unknown.xsd").is_err());
1708 }
1709
1710 #[test]
1711 fn test_file_system_loader() {
1712 let loader = FileSystemLoader::new();
1713
1714 assert!(loader.can_load("/path/to/file.xsd"));
1716 assert!(loader.can_load("relative/path.xsd"));
1717 assert!(!loader.can_load("embedded://xml.xsd"));
1718 assert!(!loader.can_load("http://example.com/schema.xsd"));
1719 assert!(!loader.can_load("https://example.com/schema.xsd"));
1720 }
1721
1722 #[test]
1723 fn test_loader_chain() {
1724 let chain = LoaderChain::with_defaults();
1725
1726 assert!(chain.can_load("embedded://xml.xsd"));
1728 assert!(chain.can_load("/path/to/file.xsd"));
1729
1730 let content = chain.load("embedded://xml.xsd").unwrap();
1732 assert!(content.contains("http://www.w3.org/XML/1998/namespace"));
1733
1734 assert_eq!(chain.len(), 2);
1736 }
1737
1738 #[test]
1739 fn test_loader_chain_priority() {
1740 let mut chain = LoaderChain::new();
1741 chain.add(Box::new(FileSystemLoader::new())); chain.add(Box::new(EmbeddedLoader::new())); assert_eq!(chain.priority(), 100);
1746 }
1747
1748 #[test]
1749 fn test_resolver_with_embedded_loader() {
1750 let resolver = SchemaResolver::new();
1751
1752 let content = resolver.load_content("embedded://xml.xsd").unwrap();
1754 assert!(content.contains("http://www.w3.org/XML/1998/namespace"));
1755 }
1756
1757 #[test]
1758 fn test_composition_edges_recorded() {
1759 use crate::parser::parse::parse_schema;
1760 use crate::schema::composition::CompositionEdgeKind;
1761 use crate::schema::SchemaSet;
1762
1763 let tmp = std::env::temp_dir().join("xsd_test_composition_edges");
1764 std::fs::create_dir_all(&tmp).unwrap();
1765
1766 let base_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
1768<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1769 <xs:simpleType name="MyString">
1770 <xs:restriction base="xs:string"/>
1771 </xs:simpleType>
1772</xs:schema>"#;
1773 let base_path = tmp.join("comp_base.xsd");
1774 std::fs::write(&base_path, base_xsd).unwrap();
1775
1776 let main_xsd = format!(
1778 r#"<?xml version="1.0" encoding="UTF-8"?>
1779<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1780 <xs:include schemaLocation="{loc}"/>
1781 <xs:redefine schemaLocation="{loc}">
1782 <xs:simpleType name="MyString">
1783 <xs:restriction base="MyString">
1784 <xs:maxLength value="50"/>
1785 </xs:restriction>
1786 </xs:simpleType>
1787 </xs:redefine>
1788</xs:schema>"#,
1789 loc = base_path.to_string_lossy()
1790 );
1791
1792 let mut schema_set = SchemaSet::new();
1793 let main_path = tmp.join("comp_main.xsd").to_string_lossy().to_string();
1794 let doc_id = parse_schema(main_xsd.as_bytes(), &main_path, &mut schema_set).unwrap();
1795
1796 let mut resolver = SchemaResolver::new();
1797 let result = resolve_all_directives(doc_id, &mut resolver, &mut schema_set);
1798 assert!(result.is_ok(), "Resolution should succeed");
1799
1800 let edges = &schema_set.composition_edges;
1802 assert!(
1803 edges.len() >= 2,
1804 "Expected at least 2 edges, got {}",
1805 edges.len()
1806 );
1807
1808 let include_edges: Vec<_> = edges
1809 .iter()
1810 .filter(|e| e.kind == CompositionEdgeKind::Include)
1811 .collect();
1812 assert!(!include_edges.is_empty(), "Should have an include edge");
1813 assert_eq!(include_edges[0].source_doc, doc_id);
1814
1815 let redefine_edges: Vec<_> = edges
1816 .iter()
1817 .filter(|e| e.kind == CompositionEdgeKind::Redefine)
1818 .collect();
1819 assert!(!redefine_edges.is_empty(), "Should have a redefine edge");
1820 assert_eq!(redefine_edges[0].source_doc, doc_id);
1821
1822 assert!(include_edges[0].target_doc.is_some());
1824 assert_eq!(include_edges[0].target_doc, redefine_edges[0].target_doc);
1825
1826 let _ = std::fs::remove_dir_all(&tmp);
1827 }
1828
1829 #[test]
1830 fn test_composition_edges_cycle() {
1831 use crate::parser::parse::parse_schema;
1832 use crate::schema::composition::CompositionEdgeKind;
1833 use crate::schema::SchemaSet;
1834
1835 let tmp = std::env::temp_dir().join("xsd_test_composition_cycle");
1836 std::fs::create_dir_all(&tmp).unwrap();
1837
1838 let a_path = tmp.join("cycle_a.xsd");
1839 let b_path = tmp.join("cycle_b.xsd");
1840
1841 let a_xsd = format!(
1843 r#"<?xml version="1.0" encoding="UTF-8"?>
1844<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1845 <xs:include schemaLocation="{}"/>
1846 <xs:element name="A" type="xs:string"/>
1847</xs:schema>"#,
1848 b_path.to_string_lossy()
1849 );
1850
1851 let b_xsd = format!(
1853 r#"<?xml version="1.0" encoding="UTF-8"?>
1854<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1855 <xs:include schemaLocation="{}"/>
1856 <xs:element name="B" type="xs:string"/>
1857</xs:schema>"#,
1858 a_path.to_string_lossy()
1859 );
1860
1861 std::fs::write(&a_path, &a_xsd).unwrap();
1862 std::fs::write(&b_path, &b_xsd).unwrap();
1863
1864 let mut schema_set = SchemaSet::new();
1865 let a_uri = a_path.to_string_lossy().to_string();
1866 let a_doc_id = parse_schema(
1867 std::fs::read_to_string(&a_path).unwrap().as_bytes(),
1868 &a_uri,
1869 &mut schema_set,
1870 )
1871 .unwrap();
1872
1873 schema_set.mark_loaded(a_uri, a_doc_id);
1875
1876 let mut resolver = SchemaResolver::new();
1877
1878 let result_a = resolve_all_directives(a_doc_id, &mut resolver, &mut schema_set);
1880 assert!(result_a.is_ok(), "Resolution of a.xsd should succeed");
1881 assert_eq!(result_a.loaded.len(), 1, "Should have loaded b.xsd");
1882
1883 let b_doc_id = result_a.loaded[0];
1884
1885 let result_b = resolve_all_directives(b_doc_id, &mut resolver, &mut schema_set);
1887 assert!(result_b.is_ok(), "Resolution of b.xsd should succeed");
1888
1889 let edges = &schema_set.composition_edges;
1891
1892 let a_to_b: Vec<_> = edges
1894 .iter()
1895 .filter(|e| e.source_doc == a_doc_id && e.target_doc == Some(b_doc_id))
1896 .collect();
1897 assert_eq!(a_to_b.len(), 1, "Should have exactly one a→b edge");
1898 assert_eq!(a_to_b[0].kind, CompositionEdgeKind::Include);
1899
1900 let b_to_a: Vec<_> = edges
1902 .iter()
1903 .filter(|e| e.source_doc == b_doc_id && e.target_doc == Some(a_doc_id))
1904 .collect();
1905 assert_eq!(b_to_a.len(), 1, "Should have exactly one b→a edge");
1906 assert_eq!(b_to_a[0].kind, CompositionEdgeKind::Include);
1907
1908 let _ = std::fs::remove_dir_all(&tmp);
1909 }
1910
1911 #[test]
1912 fn test_resolved_doc_id_populated() {
1913 use crate::parser::parse::parse_schema;
1914 use crate::schema::SchemaSet;
1915
1916 let tmp = std::env::temp_dir().join("xsd_test_resolved_doc_id");
1917 std::fs::create_dir_all(&tmp).unwrap();
1918
1919 let base_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
1921<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1922 <xs:simpleType name="MyString">
1923 <xs:restriction base="xs:string"/>
1924 </xs:simpleType>
1925</xs:schema>"#;
1926 let base_path = tmp.join("base.xsd");
1927 std::fs::write(&base_path, base_xsd).unwrap();
1928
1929 let main_xsd = format!(
1931 r#"<?xml version="1.0" encoding="UTF-8"?>
1932<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1933 <xs:include schemaLocation="{loc}"/>
1934 <xs:redefine schemaLocation="{loc}">
1935 <xs:simpleType name="MyString">
1936 <xs:restriction base="MyString">
1937 <xs:maxLength value="50"/>
1938 </xs:restriction>
1939 </xs:simpleType>
1940 </xs:redefine>
1941</xs:schema>"#,
1942 loc = base_path.to_string_lossy()
1943 );
1944
1945 let mut schema_set = SchemaSet::new();
1946 let main_path = tmp.join("main.xsd").to_string_lossy().to_string();
1947 let doc_id = parse_schema(main_xsd.as_bytes(), &main_path, &mut schema_set).unwrap();
1948
1949 let mut resolver = SchemaResolver::new();
1950 let result = resolve_all_directives(doc_id, &mut resolver, &mut schema_set);
1951 assert!(result.is_ok(), "Resolution should succeed");
1952
1953 let doc = &schema_set.documents[doc_id as usize];
1954 assert!(
1955 doc.includes[0].resolved_doc_id.is_some(),
1956 "Include should have resolved_doc_id"
1957 );
1958 assert!(
1959 doc.redefines[0].resolved_doc_id.is_some(),
1960 "Redefine should have resolved_doc_id"
1961 );
1962
1963 let _ = std::fs::remove_dir_all(&tmp);
1964 }
1965
1966 #[test]
1967 fn test_document_component_index_populated() {
1968 use crate::parser::parse::parse_schema;
1969 use crate::schema::SchemaSet;
1970
1971 let xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
1972<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1973 <xs:simpleType name="MyString">
1974 <xs:restriction base="xs:string"/>
1975 </xs:simpleType>
1976 <xs:element name="root" type="MyString"/>
1977</xs:schema>"#;
1978
1979 let mut schema_set = SchemaSet::new();
1980 let doc_id = parse_schema(xsd.as_bytes(), "test.xsd", &mut schema_set).unwrap();
1981
1982 let doc = &schema_set.documents[doc_id as usize];
1983 assert!(
1984 !doc.component_index.is_empty(),
1985 "Component index should be populated"
1986 );
1987
1988 assert!(
1990 doc.component_index
1991 .lookup_type(None, schema_set.name_table.get("MyString").unwrap())
1992 .is_some(),
1993 "Should find MyString type in document component index"
1994 );
1995
1996 assert!(
1998 doc.component_index
1999 .lookup_element(None, schema_set.name_table.get("root").unwrap())
2000 .is_some(),
2001 "Should find root element in document component index"
2002 );
2003
2004 assert!(
2006 doc.component_index
2007 .lookup_type(None, schema_set.name_table.get("root").unwrap())
2008 .is_none(),
2009 "Should not find 'root' as a type"
2010 );
2011 }
2012
2013 #[test]
2014 fn test_redefine_uses_document_scoped_lookup() {
2015 use crate::parser::parse::parse_schema;
2016 use crate::schema::SchemaSet;
2017
2018 let tmp = std::env::temp_dir().join("xsd_test_redefine_doc_scoped");
2019 std::fs::create_dir_all(&tmp).unwrap();
2020
2021 let base_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
2023<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
2024 <xs:simpleType name="MyString">
2025 <xs:restriction base="xs:string"/>
2026 </xs:simpleType>
2027</xs:schema>"#;
2028 let base_path = tmp.join("redef_base.xsd");
2029 std::fs::write(&base_path, base_xsd).unwrap();
2030
2031 let main_xsd = format!(
2033 r#"<?xml version="1.0" encoding="UTF-8"?>
2034<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
2035 <xs:redefine schemaLocation="{loc}">
2036 <xs:simpleType name="MyString">
2037 <xs:restriction base="MyString">
2038 <xs:maxLength value="50"/>
2039 </xs:restriction>
2040 </xs:simpleType>
2041 </xs:redefine>
2042</xs:schema>"#,
2043 loc = base_path.to_string_lossy()
2044 );
2045
2046 let mut schema_set = SchemaSet::new();
2047 let main_path = tmp.join("redef_main.xsd").to_string_lossy().to_string();
2048 let doc_id = parse_schema(main_xsd.as_bytes(), &main_path, &mut schema_set).unwrap();
2049
2050 let mut resolver = SchemaResolver::new();
2052 let result = resolve_all_directives(doc_id, &mut resolver, &mut schema_set);
2053 assert!(result.is_ok(), "Resolution should succeed");
2054
2055 let main_doc = &schema_set.documents[doc_id as usize];
2056 let target_doc_id = main_doc.redefines[0].resolved_doc_id;
2057 assert!(
2058 target_doc_id.is_some(),
2059 "Redefine should have resolved_doc_id"
2060 );
2061
2062 let target_doc = &schema_set.documents[target_doc_id.unwrap() as usize];
2064 let my_string_name = schema_set.name_table.get("MyString").unwrap();
2065 assert!(
2066 target_doc
2067 .component_index
2068 .lookup_type(None, my_string_name)
2069 .is_some(),
2070 "Target document should have MyString in component index"
2071 );
2072
2073 crate::schema::apply_redefine_override(&mut schema_set).unwrap();
2075
2076 let type_key = schema_set.lookup_type(None, my_string_name);
2078 assert!(
2079 type_key.is_some(),
2080 "MyString should still be in namespace table after redefine"
2081 );
2082
2083 let _ = std::fs::remove_dir_all(&tmp);
2084 }
2085
2086 #[test]
2087 fn test_effective_components_provenance_populated() {
2088 use crate::parser::parse::parse_schema;
2089 use crate::schema::composition::CompositionAction;
2090 use crate::schema::SchemaSet;
2091
2092 let tmp = std::env::temp_dir().join("xsd_test_provenance");
2093 std::fs::create_dir_all(&tmp).unwrap();
2094
2095 let base_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
2096<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
2097 <xs:simpleType name="MyStr">
2098 <xs:restriction base="xs:string"/>
2099 </xs:simpleType>
2100 <xs:element name="root" type="MyStr"/>
2101</xs:schema>"#;
2102 let base_path = tmp.join("prov_base.xsd");
2103 std::fs::write(&base_path, base_xsd).unwrap();
2104
2105 let main_xsd = format!(
2106 r#"<?xml version="1.0" encoding="UTF-8"?>
2107<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
2108 <xs:redefine schemaLocation="{loc}">
2109 <xs:simpleType name="MyStr">
2110 <xs:restriction base="MyStr">
2111 <xs:maxLength value="50"/>
2112 </xs:restriction>
2113 </xs:simpleType>
2114 </xs:redefine>
2115</xs:schema>"#,
2116 loc = base_path.to_string_lossy()
2117 );
2118
2119 let mut schema_set = SchemaSet::new();
2120 let main_path = tmp.join("prov_main.xsd").to_string_lossy().to_string();
2121 let doc_id = parse_schema(main_xsd.as_bytes(), &main_path, &mut schema_set).unwrap();
2122
2123 let mut resolver = SchemaResolver::new();
2124 let result = resolve_all_directives(doc_id, &mut resolver, &mut schema_set);
2125 assert!(result.is_ok());
2126
2127 crate::schema::apply_redefine_override(&mut schema_set).unwrap();
2129
2130 assert!(
2131 !schema_set.effective_components.is_empty(),
2132 "Effective components should be populated after composition"
2133 );
2134
2135 let my_str_name = schema_set.name_table.get("MyStr").unwrap();
2138 let my_str_identity = crate::schema::composition::ComponentIdentity {
2139 kind: crate::schema::composition::ComponentKind::SimpleType,
2140 name: my_str_name,
2141 namespace: None,
2142 };
2143 let my_str_eff = schema_set.effective_components.get(&my_str_identity);
2144 assert!(
2145 my_str_eff.is_some(),
2146 "MyStr should be in effective components"
2147 );
2148 let my_str_eff = my_str_eff.unwrap();
2149 assert!(
2150 matches!(my_str_eff.action, CompositionAction::Redefined { .. }),
2151 "MyStr should have Redefined action, not Declared"
2152 );
2153 assert_eq!(
2155 my_str_eff.origin.owner_doc,
2156 Some(doc_id),
2157 "Redefined component origin should be the redefining document"
2158 );
2159
2160 let declared_count = schema_set
2162 .effective_components
2163 .values()
2164 .filter(|c| matches!(c.action, CompositionAction::Declared))
2165 .count();
2166 assert!(
2167 declared_count > 0,
2168 "Should have declared components for non-redefined items"
2169 );
2170
2171 let _ = std::fs::remove_dir_all(&tmp);
2172 }
2173
2174 #[test]
2178 fn test_redefine_no_fallback_to_global_when_scoped() {
2179 use crate::parser::parse::parse_schema;
2180 use crate::schema::model::RedefineDirective;
2181 use crate::schema::redefine::apply_redefine;
2182 use crate::schema::SchemaSet;
2183
2184 let tmp = std::env::temp_dir().join("xsd_test_redefine_no_fallback");
2185 std::fs::create_dir_all(&tmp).unwrap();
2186
2187 let doc_a_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
2189<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
2190 <xs:simpleType name="MyType">
2191 <xs:restriction base="xs:string"/>
2192 </xs:simpleType>
2193</xs:schema>"#;
2194 let doc_a_path = tmp.join("no_fallback_a.xsd");
2195 std::fs::write(&doc_a_path, doc_a_xsd).unwrap();
2196
2197 let doc_b_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
2199<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
2200 <xs:simpleType name="OtherType">
2201 <xs:restriction base="xs:string"/>
2202 </xs:simpleType>
2203</xs:schema>"#;
2204 let doc_b_path = tmp.join("no_fallback_b.xsd");
2205 std::fs::write(&doc_b_path, doc_b_xsd).unwrap();
2206
2207 let mut schema_set = SchemaSet::new();
2209 let _doc_a_id = parse_schema(
2210 std::fs::read_to_string(&doc_a_path).unwrap().as_bytes(),
2211 &doc_a_path.to_string_lossy(),
2212 &mut schema_set,
2213 )
2214 .unwrap();
2215 let doc_b_id = parse_schema(
2216 std::fs::read_to_string(&doc_b_path).unwrap().as_bytes(),
2217 &doc_b_path.to_string_lossy(),
2218 &mut schema_set,
2219 )
2220 .unwrap();
2221
2222 let my_type_name = schema_set.name_table.get("MyType").unwrap();
2224 assert!(
2225 schema_set.lookup_type(None, my_type_name).is_some(),
2226 "MyType should be in global namespace table from doc_a"
2227 );
2228
2229 let redef_key = schema_set
2233 .arenas
2234 .alloc_simple_type(crate::arenas::SimpleTypeDefData {
2235 name: Some(my_type_name),
2236 target_namespace: None,
2237 variety: crate::parser::frames::SimpleTypeVariety::Atomic,
2238 base_type: Some(crate::parser::frames::TypeRefResult::QName(
2239 crate::parser::frames::QNameRef {
2240 namespace: None,
2241 local_name: my_type_name,
2242 prefix: None,
2243 },
2244 )),
2245 item_type: None,
2246 member_types: Vec::new(),
2247 facets: Default::default(),
2248 final_derivation: crate::schema::model::DerivationSet::empty(),
2249 id: None,
2250 derivation_id: None,
2251 annotation: None,
2252 source: None,
2253 resolved_base_type: None,
2254 resolved_item_type: None,
2255 resolved_member_types: Vec::new(),
2256 redefine_original: None,
2257 deferred_item_type_error: None,
2258 });
2259
2260 let redefine = RedefineDirective {
2261 source: None,
2262 schema_location: doc_b_path.to_string_lossy().to_string(),
2263 resolved_doc_id: Some(doc_b_id), simple_types: vec![redef_key],
2265 complex_types: Vec::new(),
2266 groups: Vec::new(),
2267 attribute_groups: Vec::new(),
2268 };
2269
2270 let result = apply_redefine(&mut schema_set, &redefine);
2273 assert!(
2274 result.is_err(),
2275 "Redefine should fail when target document lacks the component (no global fallback)"
2276 );
2277
2278 let _ = std::fs::remove_dir_all(&tmp);
2279 }
2280
2281 #[test]
2285 fn test_redefine_simple_vs_complex_kind_mismatch() {
2286 use crate::parser::parse::parse_schema;
2287 use crate::schema::model::RedefineDirective;
2288 use crate::schema::redefine::apply_redefine;
2289 use crate::schema::SchemaSet;
2290
2291 let tmp = std::env::temp_dir().join("xsd_test_redefine_kind_mismatch");
2292 std::fs::create_dir_all(&tmp).unwrap();
2293
2294 let target_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
2296<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
2297 <xs:complexType name="Foo">
2298 <xs:sequence>
2299 <xs:element name="bar" type="xs:string"/>
2300 </xs:sequence>
2301 </xs:complexType>
2302</xs:schema>"#;
2303 let target_path = tmp.join("kind_target.xsd");
2304 std::fs::write(&target_path, target_xsd).unwrap();
2305
2306 let mut schema_set = SchemaSet::new();
2307 let target_id = parse_schema(
2308 std::fs::read_to_string(&target_path).unwrap().as_bytes(),
2309 &target_path.to_string_lossy(),
2310 &mut schema_set,
2311 )
2312 .unwrap();
2313
2314 let foo_name = schema_set.name_table.get("Foo").unwrap();
2315
2316 let target_doc = &schema_set.documents[target_id as usize];
2318 assert!(
2319 target_doc
2320 .component_index
2321 .lookup_complex_type(None, foo_name)
2322 .is_some(),
2323 "Target should have Foo as complex type"
2324 );
2325 assert!(
2326 target_doc
2327 .component_index
2328 .lookup_simple_type(None, foo_name)
2329 .is_none(),
2330 "Target should NOT have Foo as simple type"
2331 );
2332
2333 let redef_key = schema_set
2335 .arenas
2336 .alloc_simple_type(crate::arenas::SimpleTypeDefData {
2337 name: Some(foo_name),
2338 target_namespace: None,
2339 variety: crate::parser::frames::SimpleTypeVariety::Atomic,
2340 base_type: Some(crate::parser::frames::TypeRefResult::QName(
2341 crate::parser::frames::QNameRef {
2342 namespace: None,
2343 local_name: foo_name,
2344 prefix: None,
2345 },
2346 )),
2347 item_type: None,
2348 member_types: Vec::new(),
2349 facets: Default::default(),
2350 final_derivation: crate::schema::model::DerivationSet::empty(),
2351 id: None,
2352 derivation_id: None,
2353 annotation: None,
2354 source: None,
2355 resolved_base_type: None,
2356 resolved_item_type: None,
2357 resolved_member_types: Vec::new(),
2358 redefine_original: None,
2359 deferred_item_type_error: None,
2360 });
2361
2362 let redefine = RedefineDirective {
2363 source: None,
2364 schema_location: target_path.to_string_lossy().to_string(),
2365 resolved_doc_id: Some(target_id),
2366 simple_types: vec![redef_key],
2367 complex_types: Vec::new(),
2368 groups: Vec::new(),
2369 attribute_groups: Vec::new(),
2370 };
2371
2372 let result = apply_redefine(&mut schema_set, &redefine);
2374 assert!(
2375 result.is_err(),
2376 "Simple type redefine must not match a same-name complex type in target document"
2377 );
2378
2379 let _ = std::fs::remove_dir_all(&tmp);
2380 }
2381
2382 #[test]
2385 fn test_chameleon_include_adopts_namespace() {
2386 use crate::parser::parse::parse_schema;
2387 use crate::schema::SchemaSet;
2388
2389 let tmp = std::env::temp_dir().join("xsd_test_chameleon_include");
2390 std::fs::create_dir_all(&tmp).unwrap();
2391
2392 let chameleon_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
2394<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
2395 <xs:simpleType name="MyType">
2396 <xs:restriction base="xs:string"/>
2397 </xs:simpleType>
2398</xs:schema>"#;
2399 let chameleon_path = tmp.join("chameleon.xsd");
2400 std::fs::write(&chameleon_path, chameleon_xsd).unwrap();
2401
2402 let main_xsd = format!(
2404 r#"<?xml version="1.0" encoding="UTF-8"?>
2405<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
2406 targetNamespace="http://example.com/main">
2407 <xs:include schemaLocation="{}"/>
2408 <xs:element name="root" type="tns:MyType" xmlns:tns="http://example.com/main"/>
2409</xs:schema>"#,
2410 chameleon_path.to_string_lossy()
2411 );
2412
2413 let mut schema_set = SchemaSet::new();
2414 let main_path = tmp.join("main.xsd").to_string_lossy().to_string();
2415 let doc_id = parse_schema(main_xsd.as_bytes(), &main_path, &mut schema_set).unwrap();
2416
2417 let mut resolver = SchemaResolver::new();
2419 let result = resolve_all_directives(doc_id, &mut resolver, &mut schema_set);
2420 assert!(result.is_ok(), "Resolution should succeed");
2421 assert!(
2422 !result.loaded.is_empty(),
2423 "Should have loaded chameleon.xsd"
2424 );
2425
2426 let chameleon_doc_id = result.loaded[0];
2428 let chameleon_doc = &schema_set.documents[chameleon_doc_id as usize];
2429 let main_ns = schema_set
2430 .name_table
2431 .get("http://example.com/main")
2432 .unwrap();
2433 assert_eq!(
2434 chameleon_doc.target_namespace,
2435 Some(main_ns),
2436 "Chameleon document should adopt includer's targetNamespace"
2437 );
2438
2439 let my_type_name = schema_set.name_table.get("MyType").unwrap();
2441 assert!(
2442 schema_set
2443 .lookup_type(Some(main_ns), my_type_name)
2444 .is_some(),
2445 "MyType should be in the includer's namespace after chameleon adoption"
2446 );
2447 assert!(
2448 schema_set.lookup_type(None, my_type_name).is_none(),
2449 "MyType should NOT be in no-namespace after chameleon adoption"
2450 );
2451
2452 let _ = std::fs::remove_dir_all(&tmp);
2453 }
2454
2455 #[test]
2458 fn test_chameleon_redefine_adopts_namespace() {
2459 use crate::parser::parse::parse_schema;
2460 use crate::schema::SchemaSet;
2461
2462 let tmp = std::env::temp_dir().join("xsd_test_chameleon_redefine");
2463 std::fs::create_dir_all(&tmp).unwrap();
2464
2465 let chameleon_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
2467<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
2468 <xs:simpleType name="MyStr">
2469 <xs:restriction base="xs:string"/>
2470 </xs:simpleType>
2471</xs:schema>"#;
2472 let chameleon_path = tmp.join("cham_redef.xsd");
2473 std::fs::write(&chameleon_path, chameleon_xsd).unwrap();
2474
2475 let main_xsd = format!(
2477 r#"<?xml version="1.0" encoding="UTF-8"?>
2478<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
2479 targetNamespace="http://example.com/ns">
2480 <xs:redefine schemaLocation="{}">
2481 <xs:simpleType name="MyStr">
2482 <xs:restriction base="MyStr">
2483 <xs:maxLength value="50"/>
2484 </xs:restriction>
2485 </xs:simpleType>
2486 </xs:redefine>
2487</xs:schema>"#,
2488 chameleon_path.to_string_lossy()
2489 );
2490
2491 let mut schema_set = SchemaSet::new();
2492 let main_path = tmp.join("cham_main.xsd").to_string_lossy().to_string();
2493 let doc_id = parse_schema(main_xsd.as_bytes(), &main_path, &mut schema_set).unwrap();
2494
2495 let mut resolver = SchemaResolver::new();
2496 let result = resolve_all_directives(doc_id, &mut resolver, &mut schema_set);
2497 assert!(result.is_ok(), "Resolution should succeed");
2498
2499 let chameleon_doc_id = result.loaded[0];
2501 let chameleon_doc = &schema_set.documents[chameleon_doc_id as usize];
2502 let ns = schema_set.name_table.get("http://example.com/ns").unwrap();
2503 assert_eq!(
2504 chameleon_doc.target_namespace,
2505 Some(ns),
2506 "Chameleon redefine target should adopt redefiner's namespace"
2507 );
2508
2509 let _ = std::fs::remove_dir_all(&tmp);
2510 }
2511
2512 #[test]
2517 fn test_chameleon_multi_namespace_creates_separate_views() {
2518 use crate::parser::parse::parse_schema;
2519 use crate::schema::SchemaSet;
2520
2521 let tmp = std::env::temp_dir().join("xsd_test_chameleon_multi_ns");
2522 std::fs::create_dir_all(&tmp).unwrap();
2523
2524 let chameleon_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
2526<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
2527 <xs:simpleType name="SharedType">
2528 <xs:restriction base="xs:string"/>
2529 </xs:simpleType>
2530</xs:schema>"#;
2531 let chameleon_path = tmp.join("multi_ns_chameleon.xsd");
2532 std::fs::write(&chameleon_path, chameleon_xsd).unwrap();
2533
2534 let ns_a_xsd = format!(
2536 r#"<?xml version="1.0" encoding="UTF-8"?>
2537<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
2538 targetNamespace="urn:a">
2539 <xs:include schemaLocation="{}"/>
2540</xs:schema>"#,
2541 chameleon_path.to_string_lossy()
2542 );
2543 let ns_a_path = tmp.join("multi_ns_a.xsd");
2544 std::fs::write(&ns_a_path, &ns_a_xsd).unwrap();
2545
2546 let ns_b_xsd = format!(
2548 r#"<?xml version="1.0" encoding="UTF-8"?>
2549<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
2550 targetNamespace="urn:b">
2551 <xs:include schemaLocation="{}"/>
2552</xs:schema>"#,
2553 chameleon_path.to_string_lossy()
2554 );
2555 let ns_b_path = tmp.join("multi_ns_b.xsd");
2556 std::fs::write(&ns_b_path, &ns_b_xsd).unwrap();
2557
2558 let mut schema_set = SchemaSet::new();
2559
2560 let ns_a_uri = ns_a_path.to_string_lossy().to_string();
2562 let doc_a = parse_schema(
2563 std::fs::read_to_string(&ns_a_path).unwrap().as_bytes(),
2564 &ns_a_uri,
2565 &mut schema_set,
2566 )
2567 .unwrap();
2568 let mut resolver = SchemaResolver::new();
2569 let res_a = resolve_all_directives(doc_a, &mut resolver, &mut schema_set);
2570 assert!(res_a.is_ok(), "ns_a resolution should succeed");
2571 let chameleon_a_id = res_a.loaded[0];
2572
2573 let ns_b_uri = ns_b_path.to_string_lossy().to_string();
2575 let doc_b = parse_schema(
2576 std::fs::read_to_string(&ns_b_path).unwrap().as_bytes(),
2577 &ns_b_uri,
2578 &mut schema_set,
2579 )
2580 .unwrap();
2581 let res_b = resolve_all_directives(doc_b, &mut resolver, &mut schema_set);
2582 assert!(res_b.is_ok(), "ns_b resolution should succeed");
2583 let chameleon_b_id = res_b.loaded[0];
2584
2585 assert_ne!(
2587 chameleon_a_id, chameleon_b_id,
2588 "Chameleon schema included from different namespaces must produce separate documents"
2589 );
2590
2591 let ns_a_name = schema_set.name_table.get("urn:a").unwrap();
2593 let ns_b_name = schema_set.name_table.get("urn:b").unwrap();
2594 assert_eq!(
2595 schema_set.documents[chameleon_a_id as usize].target_namespace,
2596 Some(ns_a_name),
2597 "First chameleon copy should have urn:a namespace"
2598 );
2599 assert_eq!(
2600 schema_set.documents[chameleon_b_id as usize].target_namespace,
2601 Some(ns_b_name),
2602 "Second chameleon copy should have urn:b namespace"
2603 );
2604
2605 assert!(schema_set.documents[chameleon_a_id as usize].is_chameleon());
2607 assert!(schema_set.documents[chameleon_b_id as usize].is_chameleon());
2608
2609 let _ = std::fs::remove_dir_all(&tmp);
2610 }
2611
2612 #[test]
2617 fn test_raw_no_namespace_not_reused_for_chameleon() {
2618 use crate::parser::parse::parse_schema;
2619 use crate::schema::SchemaSet;
2620
2621 let tmp = std::env::temp_dir().join("xsd_test_raw_no_ns_chameleon");
2622 std::fs::create_dir_all(&tmp).unwrap();
2623
2624 let shared_xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
2626<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
2627 <xs:element name="Shared" type="xs:string"/>
2628</xs:schema>"#;
2629 let shared_path = tmp.join("raw_shared.xsd");
2630 std::fs::write(&shared_path, shared_xsd).unwrap();
2631
2632 let no_ns_xsd = format!(
2634 r#"<?xml version="1.0" encoding="UTF-8"?>
2635<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
2636 <xs:include schemaLocation="{}"/>
2637</xs:schema>"#,
2638 shared_path.to_string_lossy()
2639 );
2640 let no_ns_path = tmp.join("raw_no_ns.xsd");
2641 std::fs::write(&no_ns_path, &no_ns_xsd).unwrap();
2642
2643 let with_ns_xsd = format!(
2645 r#"<?xml version="1.0" encoding="UTF-8"?>
2646<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
2647 targetNamespace="urn:test">
2648 <xs:include schemaLocation="{}"/>
2649</xs:schema>"#,
2650 shared_path.to_string_lossy()
2651 );
2652 let with_ns_path = tmp.join("raw_with_ns.xsd");
2653 std::fs::write(&with_ns_path, &with_ns_xsd).unwrap();
2654
2655 let mut schema_set = SchemaSet::new();
2656 let mut resolver = SchemaResolver::new();
2657
2658 let no_ns_uri = no_ns_path.to_string_lossy().to_string();
2660 let doc_no_ns = parse_schema(
2661 std::fs::read_to_string(&no_ns_path).unwrap().as_bytes(),
2662 &no_ns_uri,
2663 &mut schema_set,
2664 )
2665 .unwrap();
2666 let res1 = resolve_all_directives(doc_no_ns, &mut resolver, &mut schema_set);
2667 assert!(res1.is_ok());
2668 let raw_id = res1.loaded[0];
2669
2670 assert!(!schema_set.documents[raw_id as usize].is_chameleon());
2672 assert!(schema_set.documents[raw_id as usize]
2673 .target_namespace
2674 .is_none());
2675
2676 let with_ns_uri = with_ns_path.to_string_lossy().to_string();
2678 let doc_with_ns = parse_schema(
2679 std::fs::read_to_string(&with_ns_path).unwrap().as_bytes(),
2680 &with_ns_uri,
2681 &mut schema_set,
2682 )
2683 .unwrap();
2684 let res2 = resolve_all_directives(doc_with_ns, &mut resolver, &mut schema_set);
2685 assert!(res2.is_ok());
2686 let chameleon_id = res2.loaded[0];
2687
2688 assert_ne!(
2691 raw_id, chameleon_id,
2692 "Raw no-namespace document must not be reused for chameleon adoption"
2693 );
2694 let ns_name = schema_set.name_table.get("urn:test").unwrap();
2695 assert_eq!(
2696 schema_set.documents[chameleon_id as usize].target_namespace,
2697 Some(ns_name),
2698 "Chameleon copy should adopt urn:test namespace"
2699 );
2700 assert!(schema_set.documents[chameleon_id as usize].is_chameleon());
2701
2702 let _ = std::fs::remove_dir_all(&tmp);
2703 }
2704}