use std::collections::HashSet;
use std::path::{Path, PathBuf};
use crate::dom::{Document, NodeId, NodeKind};
use crate::error::{XmlError, XmlResult};
use super::parser::{
parse_attribute_group_def, parse_complex_type, parse_model_group_def, parse_simple_type,
};
use super::types::{
ContentModel, ElementDecl, Particle, ParticleKind, TypeDef, TypeRef, XsdValidator,
};
use super::XS_NAMESPACE;
pub(super) const MAX_INCLUDE_DEPTH: u8 = 16;
pub(super) struct CompositionState {
pub(super) visited: HashSet<PathBuf>,
pub(super) depth: u8,
}
impl CompositionState {
pub(super) fn new(base_path: Option<&Path>) -> Self {
let mut visited = HashSet::new();
if let Some(p) = base_path {
if let Ok(c) = p.canonicalize() {
visited.insert(c);
}
}
CompositionState { visited, depth: 0 }
}
}
fn resolve_include_path(
schema_location: &str,
base_dir: Option<&Path>,
canonical_base: Option<&Path>,
state: &mut CompositionState,
kind: &str,
) -> XmlResult<Option<PathBuf>> {
let resolved_path = match base_dir {
Some(dir) => dir.join(schema_location),
None => PathBuf::from(schema_location),
};
let canonical = resolved_path.canonicalize().ok();
match (canonical_base, canonical.as_ref()) {
(Some(cb), Some(c)) if !c.starts_with(cb) => {
return Err(XmlError::validation(format!(
"Cannot resolve {} schemaLocation '{}': path escapes the schema's base directory",
kind, schema_location
)));
}
(Some(_), None) => {
if is_absolute_uri(schema_location) {
return Err(XmlError::validation(format!(
"Cannot resolve {} schemaLocation '{}': absolute URI not supported",
kind, schema_location
)));
}
return Ok(None);
}
_ => {}
}
if let Some(ref c) = canonical {
if !state.visited.insert(c.clone()) {
return Ok(None);
}
}
Ok(Some(canonical.unwrap_or(resolved_path)))
}
pub(super) fn process_schema_composition(
schema_doc: &Document,
schema_elem: NodeId,
validator: &mut XsdValidator,
base_path: Option<&Path>,
state: &mut CompositionState,
) -> XmlResult<()> {
if state.depth >= MAX_INCLUDE_DEPTH {
return Err(XmlError::validation(format!(
"Schema include/import/redefine nesting exceeds maximum depth of {}",
MAX_INCLUDE_DEPTH
)));
}
let base_dir = base_path.and_then(|p| p.parent());
let canonical_base = match base_dir {
Some(b) => Some(b.canonicalize().map_err(|e| {
XmlError::validation(format!(
"Failed to canonicalize schema base directory '{}': {}",
b.display(),
e
))
})?),
None => None,
};
for child in schema_doc.children(schema_elem) {
if let Some(NodeKind::Element(elem)) = schema_doc.node_kind(child) {
let is_xs = elem.name.namespace_uri.as_deref() == Some(XS_NAMESPACE)
|| elem.name.prefix.as_deref() == Some("xs")
|| elem.name.prefix.as_deref() == Some("xsd");
if !is_xs {
continue;
}
match elem.name.local_name.as_ref() {
"include" | "redefine" => {
let is_redefine = elem.name.local_name == "redefine";
let schema_location = match elem.get_attribute("schemaLocation") {
Some(loc) => loc,
None => continue, };
let kind = if is_redefine { "redefine" } else { "include" };
let canonical_path = match resolve_include_path(
schema_location,
base_dir,
canonical_base.as_deref(),
state,
kind,
)? {
Some(p) => p,
None => continue,
};
let ext_str = match std::fs::read_to_string(&canonical_path) {
Ok(s) => s,
Err(_) => continue,
};
let ext_doc = match crate::parse(&ext_str) {
Ok(d) => d,
Err(_) => continue,
};
state.depth += 1;
let ext_validator_res = XsdValidator::from_schema_with_composition_state(
&ext_doc,
Some(&canonical_path),
state,
);
state.depth -= 1;
let ext_validator = ext_validator_res?;
let chameleon = ext_validator.target_namespace.is_none()
&& validator.target_namespace.is_some();
merge_external_declarations(validator, &ext_validator, chameleon);
if is_redefine {
process_redefine_children(schema_doc, child, validator)?;
}
}
"import" => {
let schema_location = match elem.get_attribute("schemaLocation") {
Some(loc) => loc,
None => continue, };
let canonical_path = match resolve_include_path(
schema_location,
base_dir,
canonical_base.as_deref(),
state,
"import",
)? {
Some(p) => p,
None => continue,
};
let ext_str = match std::fs::read_to_string(&canonical_path) {
Ok(s) => s,
Err(_) => continue,
};
let ext_doc = match crate::parse(&ext_str) {
Ok(d) => d,
Err(_) => continue,
};
state.depth += 1;
let ext_validator_res = XsdValidator::from_schema_with_composition_state(
&ext_doc,
Some(&canonical_path),
state,
);
state.depth -= 1;
let ext_validator = ext_validator_res?;
merge_external_declarations(validator, &ext_validator, false);
}
_ => {}
}
}
}
Ok(())
}
fn merge_external_declarations(validator: &mut XsdValidator, ext: &XsdValidator, chameleon: bool) {
let target_ns = validator.target_namespace.clone();
let rekey = |key: &(Option<String>, String)| -> (Option<String>, String) {
if chameleon && key.0.is_none() {
(target_ns.clone(), key.1.clone())
} else {
key.clone()
}
};
for (key, decl) in &ext.elements {
let new_key = rekey(key);
let mut new_decl = decl.clone();
if chameleon && new_decl.namespace.is_none() {
new_decl.namespace = target_ns.clone();
}
if chameleon {
chameleon_fixup_element_decl(&mut new_decl, &target_ns);
}
validator.elements.entry(new_key).or_insert(new_decl);
}
for (key, type_def) in &ext.types {
let new_key = rekey(key);
let mut new_td = type_def.clone();
if chameleon {
chameleon_fixup_type_def(&mut new_td, &target_ns);
}
validator.types.entry(new_key).or_insert(new_td);
}
for (key, attr) in &ext.global_attributes {
let new_key = rekey(key);
validator
.global_attributes
.entry(new_key)
.or_insert(attr.clone());
}
for (key, ag) in &ext.attribute_groups {
let new_key = rekey(key);
validator
.attribute_groups
.entry(new_key)
.or_insert(ag.clone());
}
for (key, mg) in &ext.model_groups {
let new_key = rekey(key);
let mut new_mg = mg.clone();
if chameleon {
chameleon_fixup_content_model(&mut new_mg.content, &target_ns);
}
validator.model_groups.entry(new_key).or_insert(new_mg);
}
}
fn chameleon_fixup_element_decl(decl: &mut ElementDecl, target_ns: &Option<String>) {
if decl.namespace.is_none() {
decl.namespace = target_ns.clone();
}
chameleon_fixup_type_ref(&mut decl.type_ref, target_ns);
}
fn chameleon_fixup_type_ref(type_ref: &mut TypeRef, target_ns: &Option<String>) {
match type_ref {
TypeRef::Named(ref mut ns, _) => {
if ns.is_none() {
*ns = target_ns.clone();
}
}
TypeRef::Inline(ref mut td) => {
chameleon_fixup_type_def(td, target_ns);
}
_ => {}
}
}
fn chameleon_fixup_type_def(td: &mut TypeDef, target_ns: &Option<String>) {
match td {
TypeDef::Complex(ref mut ct) => {
if let Some((ref mut ns, _)) = ct.base_type {
if ns.is_none() {
*ns = target_ns.clone();
}
}
chameleon_fixup_content_model(&mut ct.content, target_ns);
}
TypeDef::Simple(_) => {
}
}
}
fn chameleon_fixup_content_model(content: &mut ContentModel, target_ns: &Option<String>) {
match content {
ContentModel::Sequence(ref mut particles, _, _)
| ContentModel::Choice(ref mut particles, _, _) => {
chameleon_fixup_particles(particles, target_ns);
}
ContentModel::All(ref mut particles) => {
chameleon_fixup_particles(particles, target_ns);
}
ContentModel::SimpleContent(ref mut type_ref) => {
chameleon_fixup_type_ref(type_ref, target_ns);
}
_ => {}
}
}
fn chameleon_fixup_particles(particles: &mut [Particle], target_ns: &Option<String>) {
for particle in particles {
match &mut particle.kind {
ParticleKind::Element(ref mut decl) => {
chameleon_fixup_element_decl(decl, target_ns);
}
ParticleKind::Sequence(ref mut sub) | ParticleKind::Choice(ref mut sub) => {
chameleon_fixup_particles(sub, target_ns);
}
ParticleKind::Any { .. } => {}
}
}
}
fn process_redefine_children(
doc: &Document,
redefine_node: NodeId,
validator: &mut XsdValidator,
) -> XmlResult<()> {
let target_ns = validator.target_namespace.clone();
for child in doc.children(redefine_node) {
if let Some(NodeKind::Element(child_elem)) = doc.node_kind(child) {
let is_xs = child_elem.name.namespace_uri.as_deref() == Some(XS_NAMESPACE)
|| child_elem.name.prefix.as_deref() == Some("xs")
|| child_elem.name.prefix.as_deref() == Some("xsd");
if !is_xs {
continue;
}
match child_elem.name.local_name.as_ref() {
"simpleType" => {
let type_def = parse_simple_type(doc, child)?;
if let TypeDef::Simple(ref st) = type_def {
if let Some(name) = &st.name {
let key = (target_ns.clone(), name.clone());
validator.types.insert(key, type_def);
}
}
}
"complexType" => {
let local_elem_ns = target_ns.clone(); let type_def = parse_complex_type(
doc,
child,
&local_elem_ns,
&target_ns,
&target_ns,
&validator.attribute_groups,
&validator.model_groups,
validator.block_default_extension,
validator.block_default_restriction,
)?;
if let TypeDef::Complex(ref ct) = type_def {
if let Some(name) = &ct.name {
let key = (target_ns.clone(), name.clone());
if let Some(ref base) = ct.base_type {
if base.1 == *name && base.0 == target_ns {
let old_key =
(target_ns.clone(), format!("__redefine_base_{}", name));
if let Some(old_td) = validator.types.get(&key).cloned() {
validator.types.insert(old_key.clone(), old_td);
}
let mut new_td = type_def.clone();
if let TypeDef::Complex(ref mut new_ct) = new_td {
new_ct.base_type =
Some((old_key.0.clone(), old_key.1.clone()));
}
validator.types.insert(key, new_td);
} else {
validator.types.insert(key, type_def);
}
} else {
validator.types.insert(key, type_def);
}
}
}
}
"group" => {
if let Some(g_elem) = doc.element(child) {
if let Some(name) = g_elem.get_attribute("name") {
let key = (target_ns.clone(), name.to_string());
let old_mg = validator.model_groups.get(&key).cloned();
let local_elem_ns = target_ns.clone();
let mg_def = parse_model_group_def(
doc,
child,
&local_elem_ns,
&target_ns,
&validator.attribute_groups,
&validator.model_groups,
validator.block_default_extension,
validator.block_default_restriction,
)?;
let _ = old_mg; validator.model_groups.insert(key, mg_def);
}
}
}
"attributeGroup" => {
if let Some(ag_elem) = doc.element(child) {
if let Some(name) = ag_elem.get_attribute("name") {
let ag_def = parse_attribute_group_def(
doc,
child,
&target_ns,
&validator.global_attributes,
&validator.attribute_groups,
)?;
let key = (target_ns.clone(), name.to_string());
validator.attribute_groups.insert(key, ag_def);
}
}
}
_ => {} }
}
}
reresolve_types_after_redefine(validator);
Ok(())
}
fn reresolve_types_after_redefine(validator: &mut XsdValidator) {
let keys_to_update: Vec<(Option<String>, String)> = validator
.types
.iter()
.filter_map(|(key, td)| {
if let TypeDef::Complex(ct) = td {
if ct.group_ref.is_some() || !ct.attribute_group_refs.is_empty() {
return Some(key.clone());
}
}
None
})
.collect();
for key in keys_to_update {
let td = match validator.types.get(&key) {
Some(td) => td.clone(),
None => continue,
};
if let TypeDef::Complex(mut ct) = td {
if let Some(ref mg_key) = ct.group_ref {
if let Some(mg) = validator.model_groups.get(mg_key) {
ct.content = mg.content.clone();
}
}
if !ct.attribute_group_refs.is_empty() {
let mut new_attrs = Vec::new();
let mut new_wildcard = ct.attribute_wildcard.clone();
for ag_key in &ct.attribute_group_refs {
if let Some(ag) = validator.attribute_groups.get(ag_key) {
new_attrs.extend(ag.attributes.iter().cloned());
if let Some(ref ag_wc) = ag.wildcard {
new_wildcard = match new_wildcard {
Some(existing_wc) => existing_wc.intersect(ag_wc),
None => Some(ag_wc.clone()),
};
}
}
}
ct.attributes = new_attrs;
ct.attribute_wildcard = new_wildcard;
}
validator.types.insert(key, TypeDef::Complex(ct));
}
}
}
fn is_absolute_uri(s: &str) -> bool {
let bytes = s.as_bytes();
if bytes.is_empty() || !bytes[0].is_ascii_alphabetic() {
return false;
}
for &b in &bytes[1..] {
if b == b':' {
return true;
}
if !b.is_ascii_alphanumeric() && b != b'+' && b != b'-' && b != b'.' {
return false;
}
}
false
}