1use std::collections::HashSet;
32
33use quick_xml::events::Event;
34
35use crate::error::{SchemaError, SchemaResult};
36use crate::ids::{DocumentId, NameId};
37use crate::namespace::{is_ncname, NameTable, NamespaceContext, XS_NAMESPACE};
38use crate::parser::assemble::assemble_schema;
39use crate::parser::attrs::{categorize_attributes, parse_attributes, AttributeMap};
40use crate::parser::frames::{
41 create_frame, create_frame_recovering, xsd_names, Frame, FrameResult, SchemaFrameResult,
42 SkipFrame,
43};
44use crate::parser::location::{SourceLocation, SourceMap, SourceRef, SourceSpan};
45use crate::parser::reader::{split_qname, TrackedReader};
46use crate::parser::structure::{
47 validate_attribute_group_structure, validate_attribute_structure,
48 validate_complex_type_structure, validate_element_structure, validate_extension_structure,
49 validate_group_structure, validate_import_structure, validate_include_structure,
50 validate_key_unique_structure, validate_keyref_structure, validate_notation_structure,
51 validate_redefine_structure, validate_schema_structure, validate_simple_type_structure,
52 validate_xsd_version_attribute, validate_xsd_version_element, ValidationContext,
53};
54use crate::schema::annotation::ForeignAttribute;
55use crate::schema::model::XsdVersion;
56use crate::SchemaSet;
57
58#[derive(Debug, Clone)]
60pub struct ParserConfig {
61 pub error_recovery: bool,
63 pub collect_foreign_attributes: bool,
65 pub max_depth: usize,
67 pub(crate) xsd_version: XsdVersion,
70}
71
72impl Default for ParserConfig {
73 fn default() -> Self {
74 Self {
75 error_recovery: true,
76 collect_foreign_attributes: true,
77 max_depth: 0,
78 xsd_version: XsdVersion::V1_0,
79 }
80 }
81}
82
83struct ParserState<'a, 'b, 'c> {
85 ns_context: NamespaceContext<'a>,
87 frame_stack: Vec<Box<dyn Frame>>,
89 doc_id: DocumentId,
91 errors: Vec<SchemaError>,
93 config: &'b ParserConfig,
95 xsd_ns_id: Option<NameId>,
97 source_map: &'c SourceMap,
99 root_schema: Option<SchemaFrameResult>,
101 id_values: HashSet<String>,
103 vc_schema_excluded: bool,
106 chameleon_namespace: Option<NameId>,
113}
114
115impl<'a, 'b, 'c> ParserState<'a, 'b, 'c> {
116 fn new(
117 name_table: &'a mut NameTable,
118 doc_id: DocumentId,
119 config: &'b ParserConfig,
120 source_map: &'c SourceMap,
121 chameleon_namespace: Option<NameId>,
122 ) -> Self {
123 let ns_context = NamespaceContext::new(name_table);
124 Self {
125 ns_context,
126 frame_stack: Vec::new(),
127 doc_id,
128 errors: Vec::new(),
129 config,
130 xsd_ns_id: None,
131 source_map,
132 root_schema: None,
133 id_values: HashSet::new(),
134 vc_schema_excluded: false,
135 chameleon_namespace,
136 }
137 }
138
139 fn get_xsd_ns_id(&mut self) -> Option<NameId> {
141 if self.xsd_ns_id.is_none() {
142 self.xsd_ns_id = self.ns_context.name_table().get(XS_NAMESPACE);
143 }
144 self.xsd_ns_id
145 }
146
147 fn is_in_xsd_namespace(&mut self, namespace: Option<NameId>) -> bool {
149 match (namespace, self.get_xsd_ns_id()) {
150 (Some(ns), Some(xsd_ns)) => ns == xsd_ns,
151 (None, _) => false, _ => false,
153 }
154 }
155
156 fn push_scope(&mut self) {
158 self.ns_context.push_scope();
159 }
160
161 fn pop_scope(&mut self) {
163 self.ns_context.pop_scope();
164 }
165
166 fn current_frame(&self) -> Option<&dyn Frame> {
168 self.frame_stack.last().map(|b| b.as_ref())
169 }
170
171 fn current_frame_mut(&mut self) -> Option<&mut Box<dyn Frame>> {
173 self.frame_stack.last_mut()
174 }
175
176 fn add_error(&mut self, error: SchemaError) {
178 self.errors.push(error);
179 }
180
181 fn recover_or_fail(&mut self, error: SchemaError) -> SchemaResult<()> {
183 if self.config.error_recovery {
184 self.add_error(error);
185 Ok(())
186 } else {
187 Err(error)
188 }
189 }
190
191 fn source_ref(&self, span: SourceSpan) -> SourceRef {
193 SourceRef::new(self.doc_id, span)
194 }
195
196 fn validation_context(&self, source: Option<SourceRef>) -> ValidationContext {
200 let is_top_level = self
201 .frame_stack
202 .last()
203 .map(|f| f.children_are_top_level())
204 .unwrap_or(false);
205 let inside_complex_type = self
209 .frame_stack
210 .iter()
211 .rev()
212 .any(|f| f.children_inside_complex_type());
213 ValidationContext {
214 xsd_version: self.config.xsd_version,
215 is_top_level,
216 inside_complex_type,
217 source,
218 }
219 }
220}
221
222pub fn parse_schema(
236 xml: &[u8],
237 base_uri: &str,
238 schema_set: &mut SchemaSet,
239) -> SchemaResult<DocumentId> {
240 let config = ParserConfig::default();
241 parse_schema_with_config(xml, base_uri, schema_set, &config)
242}
243
244pub fn parse_schema_with_config(
249 xml: &[u8],
250 base_uri: &str,
251 schema_set: &mut SchemaSet,
252 config: &ParserConfig,
253) -> SchemaResult<DocumentId> {
254 parse_schema_with_chameleon(xml, base_uri, schema_set, config, None)
255}
256
257pub fn parse_schema_with_chameleon(
263 xml: &[u8],
264 base_uri: &str,
265 schema_set: &mut SchemaSet,
266 config: &ParserConfig,
267 chameleon_namespace: Option<NameId>,
268) -> SchemaResult<DocumentId> {
269 let mut config = config.clone();
271 config.xsd_version = schema_set.xsd_version;
272
273 let source_text = String::from_utf8_lossy(xml).into_owned();
275 let source_map = SourceMap::new(base_uri.to_string(), source_text);
276
277 let doc_id = schema_set.source_maps.len() as DocumentId;
279
280 let mut state = ParserState::new(
282 &mut schema_set.name_table,
283 doc_id,
284 &config,
285 &source_map,
286 chameleon_namespace,
287 );
288
289 let mut reader = TrackedReader::from_bytes(xml);
291 let mut buf = Vec::new();
292
293 let mut seen_root = false;
295
296 loop {
298 buf.clear();
299 let tracked_event = reader.read_event(&mut buf)?;
300 let span = tracked_event.span;
301
302 match tracked_event.event {
303 Event::Start(ref e) => {
304 handle_start_element(&mut state, e, span, &mut seen_root)?;
305 }
306 Event::Empty(ref e) => {
307 handle_start_element(&mut state, e, span, &mut seen_root)?;
309 handle_end_element(&mut state, span)?;
310 }
311 Event::End(_) => {
312 handle_end_element(&mut state, span)?;
313 }
314 Event::Text(ref e) => {
315 handle_text(&mut state, e, span)?;
316 }
317 Event::CData(ref e) => {
318 handle_cdata(&mut state, e, span)?;
319 }
320 Event::Comment(_) => {
321 }
323 Event::PI(_) => {
324 }
326 Event::Decl(_) => {
327 }
329 Event::DocType(_) => {
330 }
332 Event::Eof => break,
333 }
334 }
335
336 if !state.frame_stack.is_empty() {
338 return Err(SchemaError::structural(
339 "src-resolve",
340 "Schema document ended with unclosed elements",
341 None,
342 ));
343 }
344
345 let parsing_errors = std::mem::take(&mut state.errors);
348
349 let mut root_schema = state
350 .root_schema
351 .take()
352 .ok_or_else(|| SchemaError::internal("No schema result produced during parsing"))?;
353 drop(state);
354
355 schema_set.parsing_errors.extend(parsing_errors);
356
357 let declared_target_namespace = root_schema.target_namespace;
359
360 if let Some(includer_ns) = chameleon_namespace {
364 if let Some(declared) = declared_target_namespace {
365 if declared != includer_ns {
366 return Err(SchemaError::structural(
367 "src-include",
368 format!(
369 "Included schema's targetNamespace '{}' does not match \
370 including schema's targetNamespace '{}'",
371 schema_set.name_table.resolve(declared),
372 schema_set.name_table.resolve(includer_ns),
373 ),
374 None,
375 ));
376 }
377 }
378 }
379
380 if root_schema.target_namespace.is_none() {
383 if let Some(ns) = chameleon_namespace {
384 root_schema.target_namespace = Some(ns);
385 }
386 }
387
388 let added_id = schema_set.source_maps.add(source_map);
391 debug_assert_eq!(doc_id, added_id, "Document ID mismatch");
392
393 let mut doc = assemble_schema(schema_set, doc_id, base_uri, root_schema)?;
394 doc.declared_target_namespace = declared_target_namespace;
395 schema_set.documents.push(doc);
396
397 Ok(doc_id)
398}
399
400fn validate_element_attributes(
405 local_name: &str,
406 attrs: &AttributeMap,
407 name_table: &NameTable,
408 ctx: &ValidationContext,
409) -> SchemaResult<()> {
410 match local_name {
411 xsd_names::ELEMENT => validate_element_structure(attrs, name_table, ctx),
412 xsd_names::ATTRIBUTE => validate_attribute_structure(attrs, name_table, ctx),
413 xsd_names::SIMPLE_TYPE => validate_simple_type_structure(attrs, name_table, ctx),
414 xsd_names::COMPLEX_TYPE => validate_complex_type_structure(attrs, name_table, ctx),
415 xsd_names::GROUP => validate_group_structure(attrs, name_table, ctx),
416 xsd_names::ATTRIBUTE_GROUP => validate_attribute_group_structure(attrs, name_table, ctx),
417 xsd_names::NOTATION => validate_notation_structure(attrs, name_table, ctx),
418 xsd_names::INCLUDE => validate_include_structure(attrs, name_table),
419 xsd_names::IMPORT => validate_import_structure(attrs, name_table),
420 xsd_names::REDEFINE => validate_redefine_structure(attrs, name_table),
421 xsd_names::SCHEMA => validate_schema_structure(attrs, name_table),
422 xsd_names::KEY | xsd_names::UNIQUE => validate_key_unique_structure(attrs, name_table),
423 xsd_names::KEYREF => validate_keyref_structure(attrs, name_table),
424 xsd_names::EXTENSION => validate_extension_structure(attrs, name_table),
425 _ => Ok(()),
428 }
429}
430
431fn intern_attribute_values(local_name: &str, attrs: &AttributeMap, name_table: &mut NameTable) {
432 fn add_if_present(attrs: &AttributeMap, name_table: &mut NameTable, attr: &str) {
433 if let Some(value) = attrs.get_value_by_name(name_table, attr) {
434 name_table.add(value);
435 }
436 }
437
438 match local_name {
439 xsd_names::SCHEMA => {
440 add_if_present(attrs, name_table, "targetNamespace");
441 add_if_present(attrs, name_table, "defaultAttributes");
442 }
443 xsd_names::SIMPLE_TYPE | xsd_names::COMPLEX_TYPE => {
444 add_if_present(attrs, name_table, "name");
445 }
446 xsd_names::ELEMENT | xsd_names::ATTRIBUTE => {
447 add_if_present(attrs, name_table, "name");
448 add_if_present(attrs, name_table, "targetNamespace");
449 }
450 xsd_names::GROUP | xsd_names::ATTRIBUTE_GROUP | xsd_names::NOTATION => {
451 add_if_present(attrs, name_table, "name");
452 }
453 xsd_names::KEY | xsd_names::KEYREF | xsd_names::UNIQUE => {
454 add_if_present(attrs, name_table, "name");
455 }
456 _ => {}
457 }
458}
459
460fn handle_start_element(
462 state: &mut ParserState,
463 element: &quick_xml::events::BytesStart,
464 span: SourceSpan,
465 seen_root: &mut bool,
466) -> SchemaResult<()> {
467 state.push_scope();
469
470 let name = element.name();
472 let name_bytes = name.as_ref();
473 let (local_name_bytes, prefix_bytes) = split_qname(name_bytes);
474
475 let local_name = std::str::from_utf8(local_name_bytes).map_err(|e| {
476 SchemaError::xml(
477 format!("Invalid UTF-8 in element name: {}", e),
478 Some(state.source_ref(span).to_location(state.source_map)),
479 )
480 })?;
481
482 for attr_result in element.attributes() {
484 let attr =
485 attr_result.map_err(|e| SchemaError::xml(format!("Attribute error: {}", e), None))?;
486
487 let attr_name = attr.key.as_ref();
488 let attr_value = attr
489 .unescape_value()
490 .map_err(|e| SchemaError::xml(format!("Attribute value error: {}", e), None))?;
491
492 if attr_name == b"xmlns" {
494 state.ns_context.add_namespace("", &attr_value);
496 } else if attr_name.starts_with(b"xmlns:") {
497 let prefix = std::str::from_utf8(&attr_name[6..]).unwrap_or("");
499 state.ns_context.add_namespace(prefix, &attr_value);
500 }
501 }
502
503 let element_ns = if let Some(prefix) = prefix_bytes {
505 let prefix_str = std::str::from_utf8(prefix).unwrap_or("");
506 state.ns_context.lookup_namespace(prefix_str)
507 } else {
508 state.ns_context.default_namespace()
509 };
510
511 if !*seen_root {
513 *seen_root = true;
514
515 if local_name != xsd_names::SCHEMA || !state.is_in_xsd_namespace(element_ns) {
517 return Err(SchemaError::structural(
518 "sch-props-correct",
519 format!("Root element must be xs:schema, found '{}'", local_name),
520 None,
521 ));
522 }
523 }
524
525 let source_ref = Some(state.source_ref(span));
527 let parsed_attrs = parse_attributes(
528 element.attributes(),
529 &mut state.ns_context,
530 source_ref.clone(),
531 )?;
532 if state.is_in_xsd_namespace(element_ns) {
539 let xsd_ns = state.get_xsd_ns_id();
540 for attr in &parsed_attrs {
541 if attr.prefix.is_some() && attr.namespace == xsd_ns {
542 let attr_name = state.ns_context.name_table().resolve(attr.local_name);
543 let location = attr
544 .source
545 .as_ref()
546 .map(|s| s.to_location(state.source_map));
547 state.recover_or_fail(SchemaError::structural(
548 "sch-props-correct",
549 format!(
550 "XSD attribute '{}' on element '{}' must be unqualified, not in \
551 the XSD namespace",
552 attr_name, local_name,
553 ),
554 location,
555 ))?;
556 }
557 }
558 }
559 let (xsd_attrs, foreign_attrs) =
560 categorize_attributes(parsed_attrs, state.ns_context.name_table());
561 let attr_map = AttributeMap::new(xsd_attrs);
562
563 if state.frame_stack.is_empty()
572 && local_name == xsd_names::SCHEMA
573 && state.is_in_xsd_namespace(element_ns)
574 {
575 if let Some(chameleon_ns) = state.chameleon_namespace {
576 let has_own_tns = attr_map
577 .get_value_by_name(state.ns_context.name_table(), "targetNamespace")
578 .is_some();
579 let default_is_null = state.ns_context.default_namespace().is_none();
580 if !has_own_tns && default_is_null {
581 state
582 .ns_context
583 .set_default_namespace_id(Some(chameleon_ns));
584 }
585 }
586 }
587
588 let vc_excluded = if foreign_attrs.is_empty() {
590 false
591 } else {
592 let ns_snapshot = state.ns_context.snapshot();
593 should_skip_for_vc(
594 &foreign_attrs,
595 state.ns_context.name_table(),
596 &ns_snapshot,
597 state.config.xsd_version,
598 )?
599 };
600 if state.frame_stack.is_empty() {
601 if vc_excluded {
602 state.vc_schema_excluded = true;
603 }
604 } else if vc_excluded || state.vc_schema_excluded {
605 push_skip_frame(state, source_ref, foreign_attrs)?;
606 return Ok(());
607 }
608
609 let is_in_xsd_ns = state.is_in_xsd_namespace(element_ns);
611
612 let (allows_child, has_frame, in_skip_frame, accepts_foreign) = {
614 if let Some(frame) = state.current_frame() {
615 let mut allowed = frame.allows(local_name, state.ns_context.name_table());
616 if allowed && local_name == xsd_names::ANNOTATION && frame.has_annotation() {
618 allowed = false;
619 }
620 (
621 allowed,
622 true,
623 frame.is_skip_frame(),
624 frame.accepts_foreign_children(),
625 )
626 } else {
627 (true, false, false, false)
628 }
629 };
630
631 if has_frame {
632 if in_skip_frame {
634 if let Some(mut frame) = state.frame_stack.pop() {
636 frame.on_child_start(local_name, state.ns_context.name_table());
637 state.frame_stack.push(frame);
638 }
639 return Ok(());
640 }
641
642 if accepts_foreign {
650 push_skip_frame(state, source_ref, foreign_attrs)?;
651 return Ok(());
652 }
653
654 if !is_in_xsd_ns {
655 let location = source_ref.as_ref().map(|s| s.to_location(state.source_map));
660 state.recover_or_fail(SchemaError::structural(
661 "sch-props-correct",
662 format!(
663 "Foreign-namespace element '{}' is not allowed here",
664 local_name
665 ),
666 location,
667 ))?;
668 push_skip_frame(state, source_ref, foreign_attrs)?;
669 return Ok(());
670 }
671
672 if !allows_child {
673 if state.config.error_recovery {
674 state.add_error(SchemaError::structural(
676 "sch-props-correct",
677 format!("Unexpected element '{}' in current context", local_name),
678 None,
679 ));
680 push_skip_frame(state, source_ref, foreign_attrs)?;
681 return Ok(());
682 } else {
683 return Err(SchemaError::structural(
684 "sch-props-correct",
685 format!("Unexpected element '{}' in current context", local_name),
686 None,
687 ));
688 }
689 }
690
691 if let Some(mut frame) = state.frame_stack.pop() {
694 frame.on_child_start(local_name, state.ns_context.name_table());
695 state.frame_stack.push(frame);
696 }
697 }
698
699 let validation_ctx = state.validation_context(source_ref.clone());
701 if let Err(e) = validate_xsd_version_element(local_name, &validation_ctx) {
702 if state.config.error_recovery {
703 state.add_error(e);
704 push_skip_frame(state, source_ref, foreign_attrs)?;
705 return Ok(());
706 } else {
707 return Err(e);
708 }
709 }
710
711 if let Err(e) = validate_element_attributes(
713 local_name,
714 &attr_map,
715 state.ns_context.name_table(),
716 &validation_ctx,
717 ) {
718 state.recover_or_fail(e)?;
719 }
720
721 if matches!(local_name, xsd_names::DOCUMENTATION | xsd_names::APPINFO) {
725 let xml_ns = state
726 .ns_context
727 .name_table()
728 .get(crate::namespace::XML_NAMESPACE);
729 let lang_local = state.ns_context.name_table().get("lang");
730 if let (Some(xml_ns), Some(lang_local)) = (xml_ns, lang_local) {
731 for fa in &foreign_attrs {
732 if fa.namespace == Some(xml_ns)
733 && fa.local_name == lang_local
734 && !crate::types::validators::is_valid_language(
735 &crate::types::facets::normalize_whitespace(
736 &fa.value,
737 crate::types::facets::WhitespaceMode::Collapse,
738 ),
739 )
740 {
741 state.recover_or_fail(SchemaError::structural(
742 "s4s-att-invalid-value",
743 format!(
744 "'{}' xml:lang value '{}' is not a valid xs:language",
745 local_name, fa.value
746 ),
747 source_ref.as_ref().map(|s| s.to_location(state.source_map)),
748 ))?;
749 }
750 }
751 }
752 }
753
754 if is_in_xsd_ns {
756 for attr_name_id in attr_map.names() {
757 let attr_name = state.ns_context.name_table().resolve(attr_name_id);
758 if let Err(e) = validate_xsd_version_attribute(&attr_name, local_name, &validation_ctx)
759 {
760 state.recover_or_fail(e)?;
761 }
762 }
763 }
764
765 if !matches!(local_name, xsd_names::APPINFO | xsd_names::DOCUMENTATION) {
768 if let Some(id_val) = attr_map.get_value_by_name(state.ns_context.name_table(), "id") {
769 if !is_ncname(id_val) {
770 state.recover_or_fail(SchemaError::structural(
771 "s4s-att-invalid-value",
772 format!(
773 "'{}' attribute 'id' has invalid value '{}': not a valid xs:ID",
774 local_name, id_val
775 ),
776 source_ref.as_ref().map(|s| s.to_location(state.source_map)),
777 ))?;
778 } else if !state.id_values.insert(id_val.to_string()) {
779 state.recover_or_fail(SchemaError::structural(
780 "s4s-att-invalid-value",
781 format!(
782 "Duplicate xs:ID value '{}' on element '{}'",
783 id_val, local_name
784 ),
785 source_ref.as_ref().map(|s| s.to_location(state.source_map)),
786 ))?;
787 }
788 }
789 }
790
791 if is_in_xsd_ns {
793 intern_attribute_values(local_name, &attr_map, state.ns_context.name_table_mut());
794 }
795
796 let ns_snapshot = state.ns_context.snapshot();
798
799 let frame = if state.config.error_recovery {
801 let mut frame = create_frame_recovering(
802 local_name,
803 &attr_map,
804 state.ns_context.name_table(),
805 source_ref.clone(),
806 &ns_snapshot,
807 &mut state.errors,
808 );
809 frame.set_foreign_attributes(foreign_attrs);
810 if matches!(local_name, xsd_names::APPINFO | xsd_names::DOCUMENTATION) {
812 frame.set_namespaces(ns_snapshot.clone());
813 }
814 frame
815 } else {
816 let mut frame = create_frame(
817 local_name,
818 &attr_map,
819 state.ns_context.name_table(),
820 source_ref.clone(),
821 &ns_snapshot,
822 )?;
823 frame.set_foreign_attributes(foreign_attrs);
824 if matches!(local_name, xsd_names::APPINFO | xsd_names::DOCUMENTATION) {
826 frame.set_namespaces(ns_snapshot.clone());
827 }
828 frame
829 };
830
831 state.frame_stack.push(frame);
833
834 Ok(())
835}
836
837fn handle_end_element(state: &mut ParserState, _span: SourceSpan) -> SchemaResult<()> {
839 {
841 if let Some(mut frame) = state.frame_stack.pop() {
842 if frame.is_skip_frame() {
843 if !frame.on_child_end() {
846 state.frame_stack.push(frame);
848 state.pop_scope();
849 return Ok(());
850 }
851 }
852 state.frame_stack.push(frame);
854 }
855 }
856
857 let frame = match state.frame_stack.pop() {
859 Some(f) => f,
860 None => {
861 return Err(SchemaError::internal("End element with no frame on stack"));
862 }
863 };
864
865 let source_ref = frame.source().cloned();
867
868 let result = match frame.finish() {
869 Ok(r) => r,
870 Err(e) => {
871 let e = if let Some(ref src) = source_ref {
873 e.with_location(state.source_map.locate(src.span.start))
874 } else {
875 e
876 };
877 return Err(e);
878 }
879 };
880
881 state.pop_scope();
883
884 if let Some(parent) = state.current_frame_mut() {
888 if let Err(e) = parent.attach(result) {
889 let e = if let Some(ref src) = source_ref {
890 e.with_location(state.source_map.locate(src.span.start))
891 } else {
892 e
893 };
894 return Err(e);
895 }
896 }
897 else if let FrameResult::Schema(schema_result) = result {
899 state.root_schema = Some(schema_result);
900 } else {
901 return Err(SchemaError::internal(
902 "Root frame did not produce a schema result",
903 ));
904 }
905
906 Ok(())
907}
908
909fn handle_text(
911 state: &mut ParserState,
912 text: &quick_xml::events::BytesText,
913 span: SourceSpan,
914) -> SchemaResult<()> {
915 let text_content = text
916 .unescape()
917 .map_err(|e| SchemaError::xml(format!("Text content error: {}", e), None))?;
918
919 if let Some(mut frame) = state.frame_stack.pop() {
924 if frame.accepts_text() {
925 frame.on_text(&text_content);
926 } else if !frame.is_skip_frame() && !text_content.trim().is_empty() {
927 let source_ref = state.source_ref(span);
928 state.frame_stack.push(frame);
929 return state.recover_or_fail(SchemaError::structural(
930 "sch-props-correct",
931 "Non-whitespace text is not allowed here",
932 Some(source_ref.to_location(state.source_map)),
933 ));
934 }
935 state.frame_stack.push(frame);
936 }
937
938 Ok(())
939}
940
941fn handle_cdata(
943 state: &mut ParserState,
944 cdata: &quick_xml::events::BytesCData,
945 span: SourceSpan,
946) -> SchemaResult<()> {
947 if let Some(mut frame) = state.frame_stack.pop() {
949 if frame.accepts_text() {
950 if let Ok(cdata_str) = std::str::from_utf8(cdata.as_ref()) {
952 frame.on_cdata(cdata_str);
953 }
954 } else if !frame.is_skip_frame() {
955 let cdata_is_whitespace = std::str::from_utf8(cdata.as_ref())
959 .map(|s| s.trim().is_empty())
960 .unwrap_or(false);
961 if !cdata_is_whitespace {
962 let source_ref = state.source_ref(span);
963 state.frame_stack.push(frame);
964 return state.recover_or_fail(SchemaError::structural(
965 "sch-props-correct",
966 "Non-whitespace CDATA is not allowed here",
967 Some(source_ref.to_location(state.source_map)),
968 ));
969 }
970 }
971 state.frame_stack.push(frame);
972 }
973 Ok(())
974}
975
976fn should_skip_for_vc(
982 foreign_attrs: &[ForeignAttribute],
983 name_table: &NameTable,
984 ns_snapshot: &crate::namespace::NamespaceContextSnapshot,
985 xsd_version: XsdVersion,
986) -> SchemaResult<bool> {
987 const VC_NAMESPACE: &str = "http://www.w3.org/2007/XMLSchema-versioning";
988 let Some(vc_ns_id) = name_table.get(VC_NAMESPACE) else {
989 return Ok(false);
990 };
991 let current: f64 = match xsd_version {
992 XsdVersion::V1_0 => 1.0,
993 XsdVersion::V1_1 => 1.1,
994 };
995 for attr in foreign_attrs {
996 if attr.namespace != Some(vc_ns_id) {
997 continue;
998 }
999 let local = name_table.resolve_ref(attr.local_name);
1000 let include = match local {
1001 "minVersion" | "maxVersion" | "minVersionExclusive" | "maxVersionExclusive" => {
1002 let bound = match attr.value.trim().parse::<f64>() {
1003 Ok(v) => v,
1004 Err(_) => {
1005 if xsd_version == XsdVersion::V1_1 {
1006 return Err(err_versioning(format!(
1007 "Invalid vc:{} value '{}': must be a valid xs:decimal",
1008 local,
1009 attr.value.trim()
1010 )));
1011 }
1012 continue;
1014 }
1015 };
1016 match local {
1017 "minVersion" => current >= bound,
1018 "maxVersion" => current <= bound,
1019 "minVersionExclusive" => current > bound,
1020 _ => current < bound,
1021 }
1022 }
1023 "typeAvailable" | "typeUnavailable" | "facetAvailable" | "facetUnavailable" => {
1024 let is_available_attr = matches!(local, "typeAvailable" | "facetAvailable");
1031 let is_type_check = matches!(local, "typeAvailable" | "typeUnavailable");
1032 let mut available_count = 0usize;
1033 let mut total_count = 0usize;
1034 for token in attr.value.split_whitespace() {
1035 total_count += 1;
1036 if vc_token_available(
1037 token,
1038 local,
1039 is_type_check,
1040 ns_snapshot,
1041 name_table,
1042 xsd_version,
1043 )? {
1044 available_count += 1;
1045 }
1046 }
1047 if total_count == 0 {
1048 continue;
1049 }
1050 if is_available_attr {
1053 available_count == total_count
1054 } else {
1055 available_count < total_count
1056 }
1057 }
1058 _ => continue,
1059 };
1060 if !include {
1061 return Ok(true);
1062 }
1063 }
1064 Ok(false)
1065}
1066
1067fn err_versioning(msg: String) -> SchemaError {
1068 SchemaError::structural("src-versioning", msg, None)
1069}
1070
1071fn vc_token_available(
1073 token: &str,
1074 local: &str,
1075 is_type_check: bool,
1076 ns_snapshot: &crate::namespace::NamespaceContextSnapshot,
1077 name_table: &NameTable,
1078 xsd_version: XsdVersion,
1079) -> SchemaResult<bool> {
1080 use crate::namespace::is_ncname;
1081 let (prefix_str, local_str) = match token.find(':') {
1082 Some(pos) => (Some(&token[..pos]), &token[pos + 1..]),
1083 None => (None, token),
1084 };
1085 if !is_ncname(local_str) {
1086 return Err(err_versioning(format!(
1087 "Invalid QName '{}' in vc:{}: '{}' is not a valid NCName",
1088 token, local, local_str
1089 )));
1090 }
1091 let ns_id = match prefix_str {
1092 Some(p) => {
1093 if !is_ncname(p) {
1094 return Err(err_versioning(format!(
1095 "Invalid QName '{}' in vc:{}: '{}' is not a valid NCName prefix",
1096 token, local, p
1097 )));
1098 }
1099 let p_id = name_table.get(p).ok_or_else(|| {
1100 err_versioning(format!(
1101 "Undeclared prefix '{}' in vc:{} value '{}'",
1102 p, local, token
1103 ))
1104 })?;
1105 Some(ns_snapshot.resolve_prefix(p_id).ok_or_else(|| {
1106 err_versioning(format!(
1107 "Undeclared prefix '{}' in vc:{} value '{}'",
1108 p, local, token
1109 ))
1110 })?)
1111 }
1112 None => None,
1113 };
1114 if ns_id != Some(crate::namespace::well_known::XS_NAMESPACE) {
1115 return Ok(false);
1116 }
1117 Ok(if is_type_check {
1118 vc_is_xs_type_available(local_str, xsd_version)
1119 } else {
1120 vc_is_xs_facet_available(local_str)
1121 })
1122}
1123
1124fn vc_is_xs_type_available(local_name: &str, xsd_version: XsdVersion) -> bool {
1125 match crate::types::XmlTypeCode::from_local_name(local_name) {
1126 Some(code) => !code.is_xsd11() || xsd_version == XsdVersion::V1_1,
1127 None => false,
1128 }
1129}
1130
1131fn vc_is_xs_facet_available(local_name: &str) -> bool {
1132 matches!(
1133 local_name,
1134 "minLength"
1135 | "maxLength"
1136 | "length"
1137 | "pattern"
1138 | "enumeration"
1139 | "whiteSpace"
1140 | "totalDigits"
1141 | "fractionDigits"
1142 | "minInclusive"
1143 | "maxInclusive"
1144 | "minExclusive"
1145 | "maxExclusive"
1146 | "assertion"
1147 | "explicitTimezone"
1148 )
1149}
1150
1151fn push_skip_frame(
1153 state: &mut ParserState,
1154 source: Option<SourceRef>,
1155 foreign_attrs: Vec<ForeignAttribute>,
1156) -> SchemaResult<()> {
1157 let mut frame: Box<dyn Frame> = Box::new(SkipFrame::new(source));
1158 frame.set_foreign_attributes(foreign_attrs);
1159 state.frame_stack.push(frame);
1160 Ok(())
1161}
1162
1163trait SourceRefExt {
1165 fn to_location(&self, source_map: &SourceMap) -> SourceLocation;
1166}
1167
1168impl SourceRefExt for SourceRef {
1169 fn to_location(&self, source_map: &SourceMap) -> SourceLocation {
1170 source_map.locate(self.span.start)
1171 }
1172}
1173
1174#[cfg(test)]
1175mod tests {
1176 use super::*;
1177 use crate::ids::TypeKey;
1178 use crate::schema::model::FormChoice;
1179
1180 #[test]
1181 fn test_parse_minimal_schema() {
1182 let mut schema_set = SchemaSet::new();
1183 let xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
1184 <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1185 </xs:schema>"#;
1186
1187 let result = parse_schema(xsd.as_bytes(), "test.xsd", &mut schema_set);
1188 assert!(result.is_ok());
1189 }
1190
1191 #[test]
1192 fn test_parse_schema_with_element() {
1193 let mut schema_set = SchemaSet::new();
1194 let xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
1195 <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1196 <xs:element name="root" type="xs:string"/>
1197 </xs:schema>"#;
1198
1199 let result = parse_schema(xsd.as_bytes(), "test.xsd", &mut schema_set);
1200 assert!(result.is_ok());
1201 }
1202
1203 #[test]
1204 fn test_parse_schema_with_complex_type() {
1205 let mut schema_set = SchemaSet::new();
1206 let xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
1207 <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1208 <xs:complexType name="PersonType">
1209 <xs:sequence>
1210 <xs:element name="name" type="xs:string"/>
1211 <xs:element name="age" type="xs:int"/>
1212 </xs:sequence>
1213 </xs:complexType>
1214 </xs:schema>"#;
1215
1216 let result = parse_schema(xsd.as_bytes(), "test.xsd", &mut schema_set);
1217 assert!(result.is_ok());
1218 }
1219
1220 #[test]
1221 fn test_parse_schema_with_simple_type() {
1222 let mut schema_set = SchemaSet::new();
1223 let xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
1224 <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1225 <xs:simpleType name="StringList">
1226 <xs:list itemType="xs:string"/>
1227 </xs:simpleType>
1228 </xs:schema>"#;
1229
1230 let result = parse_schema(xsd.as_bytes(), "test.xsd", &mut schema_set);
1231 assert!(result.is_ok());
1232 }
1233
1234 #[test]
1235 fn test_parse_schema_with_target_namespace() {
1236 let mut schema_set = SchemaSet::new();
1237 let xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
1238 <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
1239 targetNamespace="http://example.com/test">
1240 </xs:schema>"#;
1241
1242 let result = parse_schema(xsd.as_bytes(), "test.xsd", &mut schema_set);
1243 assert!(result.is_ok());
1244 }
1245
1246 #[test]
1247 fn test_parse_schema_with_import() {
1248 let mut schema_set = SchemaSet::new();
1249 let xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
1250 <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1251 <xs:import namespace="http://www.w3.org/XML/1998/namespace"/>
1252 </xs:schema>"#;
1253
1254 let result = parse_schema(xsd.as_bytes(), "test.xsd", &mut schema_set);
1255 assert!(result.is_ok());
1256 }
1257
1258 #[cfg(feature = "xsd11")]
1259 #[test]
1260 fn test_parse_schema_assembles_arena_fields() {
1261 use crate::parser::frames::TypeFrameResult;
1262 use crate::schema::model::OpenContentMode;
1263 use crate::schema::wildcard::{NamespaceConstraint, ProcessContents};
1264
1265 let mut schema_set = SchemaSet::xsd11();
1266 let xsd = r###"<?xml version="1.0" encoding="UTF-8"?>
1267 <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
1268 defaultAttributes="common">
1269 <xs:defaultOpenContent mode="suffix">
1270 <xs:any namespace="##other" processContents="lax"/>
1271 </xs:defaultOpenContent>
1272 <xs:attributeGroup name="common">
1273 <xs:attribute name="lang" type="xs:string"/>
1274 </xs:attributeGroup>
1275 <xs:element name="head1" type="xs:string"/>
1276 <xs:element name="head2" type="xs:string"/>
1277 <xs:element name="root" substitutionGroup="head1 head2">
1278 <xs:complexType>
1279 <xs:attribute name="code" type="xs:string"/>
1280 </xs:complexType>
1281 </xs:element>
1282 </xs:schema>"###;
1283
1284 let doc_id = parse_schema_with_config(
1285 xsd.as_bytes(),
1286 "test.xsd",
1287 &mut schema_set,
1288 &ParserConfig::default(),
1289 )
1290 .unwrap();
1291
1292 let doc = &schema_set.documents[doc_id as usize];
1293 let default_attrs = doc.default_attributes.as_ref().expect("defaultAttributes");
1294 assert_eq!(
1295 schema_set.name_table.resolve(default_attrs.local_name),
1296 "common"
1297 );
1298 assert!(default_attrs.namespace_uri.is_none());
1299
1300 let default_open = doc
1301 .default_open_content
1302 .as_ref()
1303 .expect("defaultOpenContent");
1304 assert_eq!(default_open.mode, OpenContentMode::Suffix);
1305 let wildcard = default_open.wildcard.as_ref().expect("wildcard");
1306 assert!(matches!(
1307 wildcard.namespace_constraint,
1308 NamespaceConstraint::Other
1309 ));
1310 assert_eq!(wildcard.process_contents, ProcessContents::Lax);
1311
1312 let common_id = schema_set.name_table.get("common").unwrap();
1313 let group_key = schema_set
1314 .lookup_attribute_group(None, common_id)
1315 .expect("attributeGroup lookup");
1316 let group = schema_set.arenas.get_attribute_group(group_key).unwrap();
1317 assert_eq!(group.attributes.len(), 1);
1318 let lang_id = group.attributes[0].attribute.name.unwrap();
1319 assert_eq!(schema_set.name_table.resolve(lang_id), "lang");
1320
1321 let root_id = schema_set.name_table.get("root").unwrap();
1322 let root_key = schema_set
1323 .lookup_element(None, root_id)
1324 .expect("element lookup");
1325 let root = schema_set.arenas.get_element(root_key).unwrap();
1326 assert_eq!(root.substitution_group.len(), 2);
1327 assert_eq!(
1328 schema_set
1329 .name_table
1330 .resolve(root.substitution_group[0].local_name),
1331 "head1"
1332 );
1333 assert_eq!(
1334 schema_set
1335 .name_table
1336 .resolve(root.substitution_group[1].local_name),
1337 "head2"
1338 );
1339
1340 let inline = root.inline_type.as_ref().expect("inline type");
1341 match inline.as_ref() {
1342 TypeFrameResult::Complex(ct) => {
1343 assert_eq!(ct.attributes.len(), 1);
1344 let code_id = ct.attributes[0].attribute.name.unwrap();
1345 assert_eq!(schema_set.name_table.resolve(code_id), "code");
1346 }
1347 _ => panic!("expected inline complex type"),
1348 }
1349 }
1350
1351 #[test]
1352 fn test_parse_invalid_root() {
1353 let mut schema_set = SchemaSet::new();
1354 let xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
1355 <notSchema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1356 </notSchema>"#;
1357
1358 let result = parse_schema(xsd.as_bytes(), "test.xsd", &mut schema_set);
1359 assert!(result.is_err());
1360 }
1361
1362 #[test]
1363 fn test_parse_form_choice() {
1364 assert_eq!(
1365 crate::parser::assemble::parse_form_choice(Some("qualified")),
1366 FormChoice::Qualified
1367 );
1368 assert_eq!(
1369 crate::parser::assemble::parse_form_choice(Some("unqualified")),
1370 FormChoice::Unqualified
1371 );
1372 assert_eq!(
1373 crate::parser::assemble::parse_form_choice(None),
1374 FormChoice::Unqualified
1375 );
1376 }
1377
1378 #[test]
1379 fn test_parser_config_default() {
1380 let config = ParserConfig::default();
1381 assert!(config.error_recovery);
1382 assert!(config.collect_foreign_attributes);
1383 assert_eq!(config.max_depth, 0);
1384 }
1385
1386 #[test]
1387 fn test_apply_schema_defaults_to_elements_and_types() {
1388 let mut schema_set = SchemaSet::new();
1389 let xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
1390 <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
1391 blockDefault="extension"
1392 finalDefault="restriction">
1393 <xs:element name="head" type="xs:string"/>
1394 <xs:complexType name="Base"/>
1395 <xs:simpleType name="Simple">
1396 <xs:restriction base="xs:string"/>
1397 </xs:simpleType>
1398 </xs:schema>"#;
1399
1400 let result = parse_schema(xsd.as_bytes(), "test.xsd", &mut schema_set);
1401 assert!(result.is_ok());
1402
1403 let name_id = schema_set.name_table.get("head").expect("name id for head");
1404 let ns_table = schema_set
1405 .namespaces
1406 .get(&None)
1407 .expect("default namespace table");
1408 let elem_key = ns_table.elements.get(&name_id).expect("element key");
1409 let elem = schema_set
1410 .arenas
1411 .elements
1412 .get(*elem_key)
1413 .expect("element data");
1414 assert!(elem.block.contains_extension());
1415 assert!(elem.final_derivation.contains_restriction());
1416
1417 let base_id = schema_set.name_table.get("Base").expect("name id for Base");
1418 let base_key = ns_table.types.get(&base_id).expect("type key for Base");
1419 match base_key {
1420 TypeKey::Complex(key) => {
1421 let base = schema_set
1422 .arenas
1423 .complex_types
1424 .get(*key)
1425 .expect("complex type data");
1426 assert!(base.block.contains_extension());
1427 assert!(base.final_derivation.contains_restriction());
1428 }
1429 _ => panic!("expected complex type for Base"),
1430 }
1431
1432 let simple_id = schema_set
1433 .name_table
1434 .get("Simple")
1435 .expect("name id for Simple");
1436 let simple_key = ns_table.types.get(&simple_id).expect("type key for Simple");
1437 match simple_key {
1438 TypeKey::Simple(key) => {
1439 let simple = schema_set
1440 .arenas
1441 .simple_types
1442 .get(*key)
1443 .expect("simple type data");
1444 assert!(simple.final_derivation.contains_restriction());
1445 }
1446 _ => panic!("expected simple type for Simple"),
1447 }
1448 }
1449
1450 #[test]
1455 fn test_final_explicit_empty_overrides_final_default() {
1456 let mut schema_set = SchemaSet::new();
1457 let xsd = r#"<?xml version="1.0" encoding="UTF-8"?>
1458 <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
1459 finalDefault="restriction">
1460 <!-- final="" is explicit override: no derivation blocked, despite finalDefault -->
1461 <xs:element name="unlocked" type="xs:string" final=""/>
1462 <!-- absent final= inherits finalDefault="restriction" -->
1463 <xs:element name="inherited" type="xs:string"/>
1464 <xs:complexType name="UnlockedType" final=""/>
1465 <xs:complexType name="InheritedType"/>
1466 <xs:simpleType name="UnlockedSimple" final="">
1467 <xs:restriction base="xs:string"/>
1468 </xs:simpleType>
1469 <xs:simpleType name="InheritedSimple">
1470 <xs:restriction base="xs:string"/>
1471 </xs:simpleType>
1472 </xs:schema>"#;
1473
1474 let result = parse_schema(xsd.as_bytes(), "test.xsd", &mut schema_set);
1475 assert!(result.is_ok());
1476
1477 let ns_table = schema_set.namespaces.get(&None).expect("default namespace");
1478
1479 let unlocked_id = schema_set.name_table.get("unlocked").expect("unlocked");
1481 let unlocked_key = ns_table.elements.get(&unlocked_id).expect("element key");
1482 let unlocked = schema_set
1483 .arenas
1484 .elements
1485 .get(*unlocked_key)
1486 .expect("element");
1487 assert!(
1488 unlocked.final_derivation.is_empty(),
1489 "final=\"\" must produce empty set, not inherit finalDefault"
1490 );
1491
1492 let inherited_id = schema_set.name_table.get("inherited").expect("inherited");
1494 let inherited_key = ns_table.elements.get(&inherited_id).expect("element key");
1495 let inherited = schema_set
1496 .arenas
1497 .elements
1498 .get(*inherited_key)
1499 .expect("element");
1500 assert!(
1501 inherited.final_derivation.contains_restriction(),
1502 "absent final= must inherit finalDefault=restriction"
1503 );
1504
1505 let ut_id = schema_set
1507 .name_table
1508 .get("UnlockedType")
1509 .expect("UnlockedType");
1510 let ut_key = ns_table.types.get(&ut_id).expect("type key");
1511 if let crate::ids::TypeKey::Complex(key) = ut_key {
1512 let ct = schema_set
1513 .arenas
1514 .complex_types
1515 .get(*key)
1516 .expect("complex type");
1517 assert!(
1518 ct.final_derivation.is_empty(),
1519 "complexType final=\"\" must not inherit finalDefault"
1520 );
1521 }
1522
1523 let it_id = schema_set
1525 .name_table
1526 .get("InheritedType")
1527 .expect("InheritedType");
1528 let it_key = ns_table.types.get(&it_id).expect("type key");
1529 if let crate::ids::TypeKey::Complex(key) = it_key {
1530 let ct = schema_set
1531 .arenas
1532 .complex_types
1533 .get(*key)
1534 .expect("complex type");
1535 assert!(
1536 ct.final_derivation.contains_restriction(),
1537 "complexType absent final= must inherit finalDefault"
1538 );
1539 }
1540
1541 let us_id = schema_set
1543 .name_table
1544 .get("UnlockedSimple")
1545 .expect("UnlockedSimple");
1546 let us_key = ns_table.types.get(&us_id).expect("type key");
1547 if let crate::ids::TypeKey::Simple(key) = us_key {
1548 let st = schema_set
1549 .arenas
1550 .simple_types
1551 .get(*key)
1552 .expect("simple type");
1553 assert!(
1554 st.final_derivation.is_empty(),
1555 "simpleType final=\"\" must not inherit finalDefault"
1556 );
1557 }
1558
1559 let is_id = schema_set
1561 .name_table
1562 .get("InheritedSimple")
1563 .expect("InheritedSimple");
1564 let is_key = ns_table.types.get(&is_id).expect("type key");
1565 if let crate::ids::TypeKey::Simple(key) = is_key {
1566 let st = schema_set
1567 .arenas
1568 .simple_types
1569 .get(*key)
1570 .expect("simple type");
1571 assert!(
1572 st.final_derivation.contains_restriction(),
1573 "simpleType absent final= must inherit finalDefault"
1574 );
1575 }
1576 }
1577
1578 #[test]
1579 fn test_duplicate_id_detected() {
1580 let mut schema_set = SchemaSet::new();
1581 let xsd = r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1582 <xs:element id="foo123" name="a" type="xs:string"/>
1583 <xs:element id="foo123" name="b" type="xs:string"/>
1584 </xs:schema>"#;
1585 let result = parse_schema(xsd.as_bytes(), "test.xsd", &mut schema_set);
1586 assert!(result.is_ok());
1587 assert!(schema_set
1588 .parsing_errors
1589 .iter()
1590 .any(|e| { e.to_string().contains("Duplicate xs:ID value 'foo123'") }));
1591 }
1592
1593 #[test]
1594 fn test_unique_ids_valid() {
1595 let mut schema_set = SchemaSet::new();
1596 let xsd = r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1597 <xs:element id="id1" name="a" type="xs:string"/>
1598 <xs:element id="id2" name="b" type="xs:string"/>
1599 </xs:schema>"#;
1600 let result = parse_schema(xsd.as_bytes(), "test.xsd", &mut schema_set);
1601 assert!(result.is_ok());
1602 assert!(schema_set.parsing_errors.is_empty());
1603 }
1604
1605 #[test]
1606 fn test_invalid_id_format() {
1607 let mut schema_set = SchemaSet::new();
1608 let xsd = r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1609 <xs:element id="123bad" name="a" type="xs:string"/>
1610 </xs:schema>"#;
1611 let result = parse_schema(xsd.as_bytes(), "test.xsd", &mut schema_set);
1612 assert!(result.is_ok());
1613 assert!(schema_set
1614 .parsing_errors
1615 .iter()
1616 .any(|e| { e.to_string().contains("not a valid xs:ID") }));
1617 }
1618}