use crate::error::SchemaResult;
use crate::ids::DocumentId;
use crate::parser::parse::{parse_schema_with_config, ParserConfig};
#[cfg(feature = "async")]
use crate::parser::resolver::resolve_all_directives_async;
use crate::parser::resolver::{
fixup_composition_edges, resolve_all_directives, ResolutionResult, ResolverConfig,
SchemaResolver,
};
use crate::schema::{
allocate_content_particle_elements, allocate_model_group_particle_elements,
assemble_inline_types, build_dependency_graph, compile_all_patterns, resolve_all_references,
validate_all_derivations, validate_attribute_id_constraints,
validate_attribute_value_constraints, validate_element_value_constraints, InlineAssemblyStats,
ResolutionStats,
};
use crate::SchemaSet;
#[derive(Debug, Clone)]
pub struct PipelineConfig {
pub parser: ParserConfig,
pub resolver: ResolverConfig,
pub resolve_directives: bool,
pub assemble_inline_types: bool,
pub resolve_references: bool,
}
impl Default for PipelineConfig {
fn default() -> Self {
Self {
parser: ParserConfig::default(),
resolver: ResolverConfig::default(),
resolve_directives: true,
assemble_inline_types: true,
resolve_references: true,
}
}
}
impl PipelineConfig {
pub fn parse_only() -> Self {
Self {
parser: ParserConfig::default(),
resolver: ResolverConfig::default(),
resolve_directives: false,
assemble_inline_types: false,
resolve_references: false,
}
}
pub fn full() -> Self {
Self::default()
}
}
#[derive(Debug, Default)]
pub struct PipelineStats {
pub doc_id: DocumentId,
pub loaded_docs: Vec<DocumentId>,
pub directive_result: Option<DirectiveStats>,
pub inline_stats: Option<InlineAssemblyStats>,
pub resolution_stats: Option<ResolutionStats>,
}
#[derive(Debug, Default)]
pub struct DirectiveStats {
pub loaded_count: usize,
pub skipped_count: usize,
pub error_count: usize,
}
impl From<&ResolutionResult> for DirectiveStats {
fn from(result: &ResolutionResult) -> Self {
Self {
loaded_count: result.loaded.len(),
skipped_count: result.skipped.len(),
error_count: result.errors.len() + result.import_errors.len(),
}
}
}
pub fn load_and_process_schema(
xml: &[u8],
base_uri: &str,
schema_set: &mut SchemaSet,
config: Option<PipelineConfig>,
) -> SchemaResult<PipelineStats> {
let config = config.unwrap_or_default();
let mut stats = PipelineStats::default();
let doc_id = parse_schema_with_config(xml, base_uri, schema_set, &config.parser)?;
stats.doc_id = doc_id;
if config.resolve_directives {
let mut resolver = SchemaResolver::with_config(config.resolver.clone());
let dir_result = resolve_all_directives(doc_id, &mut resolver, schema_set);
stats.loaded_docs.extend(dir_result.loaded.iter().copied());
stats.directive_result = Some(DirectiveStats::from(&dir_result));
let mut directive_errors: Vec<crate::error::SchemaError> = dir_result.errors;
let mut import_errors: Vec<crate::error::SchemaError> = dir_result.import_errors;
let mut pending_docs = dir_result.loaded.clone();
while !pending_docs.is_empty() {
let current_batch: Vec<_> = std::mem::take(&mut pending_docs);
for loaded_doc_id in current_batch {
let nested_result =
resolve_all_directives(loaded_doc_id, &mut resolver, schema_set);
stats
.loaded_docs
.extend(nested_result.loaded.iter().copied());
pending_docs.extend(nested_result.loaded.iter().copied());
if let Some(ref mut dir_stats) = stats.directive_result {
dir_stats.loaded_count += nested_result.loaded.len();
dir_stats.skipped_count += nested_result.skipped.len();
dir_stats.error_count +=
nested_result.errors.len() + nested_result.import_errors.len();
}
directive_errors.extend(nested_result.errors);
import_errors.extend(nested_result.import_errors);
}
}
fixup_composition_edges(schema_set);
if let Some(err) = directive_errors
.into_iter()
.chain(import_errors)
.find(|e| e.is_schema_content_error())
{
return Err(err);
}
}
if !schema_set.parsing_errors.is_empty() {
let errors = std::mem::take(&mut schema_set.parsing_errors);
return Err(errors.into_iter().next().unwrap());
}
if config.assemble_inline_types || config.resolve_references {
crate::schema::apply_redefine_override(schema_set)?;
}
if config.assemble_inline_types {
let inline_stats = assemble_inline_types(schema_set)?;
stats.inline_stats = Some(inline_stats);
}
if config.resolve_references {
let resolution_stats = resolve_all_references(schema_set)?;
stats.resolution_stats = Some(resolution_stats);
}
if config.resolve_references {
compile_all_patterns(schema_set)?;
}
#[cfg(feature = "xsd11")]
if config.resolve_references {
crate::compiler::validate_all_default_open_content(schema_set)?;
}
if config.resolve_references {
let (dep_graph, _dep_stats) = build_dependency_graph(schema_set)?;
validate_all_derivations(schema_set, &dep_graph)?;
}
if config.resolve_references {
validate_attribute_id_constraints(schema_set)?;
validate_attribute_value_constraints(schema_set)?;
validate_element_value_constraints(schema_set)?;
#[cfg(feature = "xsd11")]
xsd11_pre_resolution_validations(schema_set)?;
}
if config.resolve_references {
crate::schema::validate_xsd10_annotation_source_anyuri(schema_set)?;
}
if config.resolve_references {
crate::schema::validate_complex_type_attribute_uniqueness(schema_set)?;
}
if config.resolve_references {
crate::compiler::substitution::validate_all_substitution_groups(schema_set)?;
}
if config.assemble_inline_types && config.resolve_references {
allocate_content_particle_elements(schema_set)?;
allocate_model_group_particle_elements(schema_set)?;
finalize_local_element_pass(schema_set)?;
}
Ok(stats)
}
pub fn load_schema(
xml: &[u8],
base_uri: &str,
schema_set: &mut SchemaSet,
) -> SchemaResult<PipelineStats> {
load_and_process_schema(xml, base_uri, schema_set, Some(PipelineConfig::full()))
}
pub fn parse_schema_only(
xml: &[u8],
base_uri: &str,
schema_set: &mut SchemaSet,
) -> SchemaResult<DocumentId> {
let config = PipelineConfig::parse_only();
let stats = load_and_process_schema(xml, base_uri, schema_set, Some(config))?;
Ok(stats.doc_id)
}
pub fn process_loaded_schemas(
schema_set: &mut SchemaSet,
) -> SchemaResult<(InlineAssemblyStats, ResolutionStats)> {
if !schema_set.parsing_errors.is_empty() {
let errors = std::mem::take(&mut schema_set.parsing_errors);
return Err(errors.into_iter().next().unwrap());
}
crate::schema::apply_redefine_override(schema_set)?;
let inline_stats = assemble_inline_types(schema_set)?;
let resolution_stats = resolve_all_references(schema_set)?;
compile_all_patterns(schema_set)?;
#[cfg(feature = "xsd11")]
crate::compiler::validate_all_default_open_content(schema_set)?;
let (dep_graph, _dep_stats) = build_dependency_graph(schema_set)?;
validate_all_derivations(schema_set, &dep_graph)?;
validate_attribute_id_constraints(schema_set)?;
validate_attribute_value_constraints(schema_set)?;
validate_element_value_constraints(schema_set)?;
#[cfg(feature = "xsd11")]
xsd11_pre_resolution_validations(schema_set)?;
crate::schema::validate_xsd10_annotation_source_anyuri(schema_set)?;
crate::schema::validate_complex_type_attribute_uniqueness(schema_set)?;
crate::schema::validate_local_decl_target_namespace(schema_set)?;
crate::schema::validate_no_xsi_attribute_declarations(schema_set)?;
crate::compiler::substitution::validate_all_substitution_groups(schema_set)?;
allocate_content_particle_elements(schema_set)?;
allocate_model_group_particle_elements(schema_set)?;
crate::schema::validate_substitution_group_element_consistency(schema_set)?;
finalize_local_element_pass(schema_set)?;
Ok((inline_stats, resolution_stats))
}
fn finalize_local_element_pass(schema_set: &mut SchemaSet) -> SchemaResult<()> {
#[cfg(feature = "xsd11")]
{
let new_alt_types = crate::schema::inline::resolve_local_element_alternatives(schema_set)?;
if !new_alt_types.is_empty() {
resolve_all_references(schema_set)?;
allocate_content_particle_elements(schema_set)?;
}
}
crate::schema::finalize_pending_ic_refs(schema_set)?;
validate_element_value_constraints(schema_set)?;
#[cfg(feature = "xsd11")]
{
crate::schema::validate_cta_xpath(schema_set)?;
crate::schema::validate_cta_substitutability(schema_set)?;
}
#[cfg(feature = "xsd11")]
xsd11_element_consistency_checks(schema_set)?;
validate_all_group_outer_occurs(schema_set)?;
validate_all_group_content(schema_set)?;
validate_all_group_placement(schema_set)?;
validate_all_particle_occurs(schema_set)?;
validate_all_upa_constraints(schema_set)?;
Ok(())
}
#[cfg(feature = "xsd11")]
fn xsd11_pre_resolution_validations(schema_set: &SchemaSet) -> SchemaResult<()> {
crate::schema::validate_element_type_alternatives(schema_set)?;
crate::schema::validate_cta_xpath(schema_set)?;
crate::schema::validate_cta_substitutability(schema_set)?;
crate::schema::validate_wildcard_disallowed_names(schema_set)?;
Ok(())
}
#[cfg(feature = "xsd11")]
fn xsd11_element_consistency_checks(schema_set: &SchemaSet) -> SchemaResult<()> {
crate::schema::validate_local_element_type_table_consistency(schema_set)?;
crate::schema::validate_wildcard_element_type_table_consistency(schema_set)?;
crate::schema::validate_restriction_local_element_type_table_consistency(schema_set)?;
Ok(())
}
fn validate_all_group_outer_occurs(schema_set: &SchemaSet) -> SchemaResult<()> {
use crate::compiler::{
is_top_level_all_group, resolve_top_level_all_group_ref, validate_outer_all_group_occurs,
};
for (_, type_def) in schema_set.arenas.complex_types.iter() {
let Some(particle) = (match &type_def.content {
crate::parser::frames::ComplexContentResult::Complex(content) => {
content.particle.as_ref()
}
crate::parser::frames::ComplexContentResult::Empty
| crate::parser::frames::ComplexContentResult::Simple(_) => None,
}) else {
continue;
};
let is_all = is_top_level_all_group(particle).is_some()
|| resolve_top_level_all_group_ref(particle, schema_set).is_some();
if !is_all {
continue;
}
validate_outer_all_group_occurs(particle, schema_set.xsd_version).map_err(|error| {
let location = error
.location()
.and_then(|source| schema_set.source_maps.locate(source));
crate::error::SchemaError::structural("cos-all-limited", format!("{}", error), location)
})?;
}
Ok(())
}
fn validate_all_group_content(schema_set: &SchemaSet) -> SchemaResult<()> {
use crate::parser::frames::{ComplexContentResult, Compositor};
if !schema_set.is_xsd10() {
return Ok(());
}
for (_, mg) in schema_set.arenas.model_groups.iter() {
if mg.compositor == Some(Compositor::All) {
check_all_group_xsd10_constraints(&mg.particles, schema_set)?;
}
}
for (_, type_def) in schema_set.arenas.complex_types.iter() {
if let ComplexContentResult::Complex(content) = &type_def.content {
if let Some(particle) = content.particle.as_ref() {
check_particle_all_group_constraints(particle, schema_set)?;
}
}
}
Ok(())
}
fn check_all_group_xsd10_constraints(
particles: &[crate::parser::frames::ParticleResult],
schema_set: &SchemaSet,
) -> SchemaResult<()> {
use crate::parser::frames::ParticleTerm;
for particle in particles {
let particle_loc = schema_set.locate(particle.source.as_ref());
match &particle.term {
ParticleTerm::Any(wc) => {
let location = schema_set
.locate(wc.source.as_ref())
.or_else(|| particle_loc.clone());
return Err(crate::error::SchemaError::structural(
"cos-all-limited",
"In XSD 1.0, xs:any (wildcard) is not allowed inside an xs:all group"
.to_string(),
location,
));
}
ParticleTerm::Group(mg) => {
let location = schema_set
.locate(mg.source.as_ref())
.or_else(|| particle_loc.clone());
return Err(crate::error::SchemaError::structural(
"cos-all-limited",
"In XSD 1.0, a nested model group is not allowed inside an xs:all group"
.to_string(),
location,
));
}
ParticleTerm::Element(_) => {}
}
if particle.min_occurs > 1 {
return Err(crate::error::SchemaError::structural(
"cos-all-limited",
format!(
"In XSD 1.0, an xs:all member's minOccurs must be 0 or 1, found {}",
particle.min_occurs
),
particle_loc,
));
}
match particle.max_occurs {
Some(0) | Some(1) => {}
Some(n) => {
return Err(crate::error::SchemaError::structural(
"cos-all-limited",
format!(
"In XSD 1.0, an xs:all member's maxOccurs must be 0 or 1, found {}",
n
),
particle_loc,
));
}
None => {
return Err(crate::error::SchemaError::structural(
"cos-all-limited",
"In XSD 1.0, an xs:all member's maxOccurs must be 0 or 1 (unbounded not allowed)"
.to_string(),
particle_loc,
));
}
}
}
Ok(())
}
fn check_particle_all_group_constraints(
particle: &crate::parser::frames::ParticleResult,
schema_set: &SchemaSet,
) -> SchemaResult<()> {
use crate::parser::frames::{Compositor, ParticleTerm};
if let ParticleTerm::Group(mg) = &particle.term {
if mg.compositor == Some(Compositor::All) {
check_all_group_xsd10_constraints(&mg.particles, schema_set)?;
}
for child in &mg.particles {
check_particle_all_group_constraints(child, schema_set)?;
}
}
Ok(())
}
fn validate_all_group_placement(schema_set: &SchemaSet) -> SchemaResult<()> {
use crate::compiler::{is_top_level_all_group, resolve_top_level_all_group_ref};
use crate::parser::frames::ComplexContentResult;
if !schema_set.is_xsd10() {
return Ok(());
}
for (_, type_def) in schema_set.arenas.complex_types.iter() {
let ComplexContentResult::Complex(content) = &type_def.content else {
continue;
};
let Some(particle) = content.particle.as_ref() else {
continue;
};
if is_top_level_all_group(particle).is_some()
|| resolve_top_level_all_group_ref(particle, schema_set).is_some()
{
continue;
}
let mut visited = std::collections::HashSet::new();
check_no_nested_all_group(particle, schema_set, &mut visited)?;
}
Ok(())
}
fn check_no_nested_all_group(
particle: &crate::parser::frames::ParticleResult,
schema_set: &SchemaSet,
visited: &mut std::collections::HashSet<crate::ids::ModelGroupKey>,
) -> SchemaResult<()> {
use crate::parser::frames::{Compositor, ParticleTerm, ParticleResult, ModelGroupDefResult};
fn placement_location(
particle: &ParticleResult,
group: &ModelGroupDefResult,
schema_set: &SchemaSet,
) -> Option<crate::parser::location::SourceLocation> {
schema_set
.locate(particle.source.as_ref())
.or_else(|| schema_set.locate(group.source.as_ref()))
}
let ParticleTerm::Group(group) = &particle.term else {
return Ok(());
};
if group.compositor == Some(Compositor::All) && group.ref_name.is_none() {
return Err(crate::error::SchemaError::structural(
"cos-all-limited",
"In XSD 1.0, an xs:all model group may only appear as the top-level \
particle of a complex type definition"
.to_string(),
placement_location(particle, group, schema_set),
));
}
if let Some(ref_name) = group.ref_name.as_ref() {
if let Some(group_key) =
schema_set.lookup_model_group(ref_name.namespace, ref_name.local_name)
{
if let Some(group_data) = schema_set.arenas.get_model_group(group_key) {
if group_data.compositor == Some(Compositor::All) {
return Err(crate::error::SchemaError::structural(
"cos-all-limited",
"In XSD 1.0, a reference to a named xs:all group may only \
appear as the top-level particle of a complex type definition"
.to_string(),
placement_location(particle, group, schema_set),
));
}
if visited.insert(group_key) {
for child in &group_data.particles {
check_no_nested_all_group(child, schema_set, visited)?;
}
visited.remove(&group_key);
}
}
}
} else {
for child in &group.particles {
check_no_nested_all_group(child, schema_set, visited)?;
}
}
Ok(())
}
fn validate_all_particle_occurs(schema_set: &SchemaSet) -> SchemaResult<()> {
for (_, type_def) in schema_set.arenas.complex_types.iter() {
if let crate::parser::frames::ComplexContentResult::Complex(content) = &type_def.content {
if let Some(particle) = content.particle.as_ref() {
validate_particle_occurs_recursive(particle, schema_set)?;
}
}
}
for (_, mg) in schema_set.arenas.model_groups.iter() {
for particle in &mg.particles {
validate_particle_occurs_recursive(particle, schema_set)?;
}
}
Ok(())
}
fn validate_particle_occurs_recursive(
particle: &crate::parser::frames::ParticleResult,
schema_set: &SchemaSet,
) -> SchemaResult<()> {
if let Some(max) = particle.max_occurs {
if particle.min_occurs > max {
let location = particle
.source
.as_ref()
.and_then(|s| schema_set.source_maps.locate(s));
return Err(crate::error::SchemaError::structural(
"p-props-correct",
format!(
"minOccurs ({}) exceeds maxOccurs ({})",
particle.min_occurs, max
),
location,
));
}
}
if let crate::parser::frames::ParticleTerm::Group(ref mg) = particle.term {
for child in &mg.particles {
validate_particle_occurs_recursive(child, schema_set)?;
}
}
Ok(())
}
fn validate_all_upa_constraints(schema_set: &SchemaSet) -> SchemaResult<()> {
for (_, type_def) in schema_set.arenas.complex_types.iter() {
if type_def.source.is_none() {
continue;
}
let Some(_particle) = (match &type_def.content {
crate::parser::frames::ComplexContentResult::Complex(content) => {
content.particle.as_ref()
}
crate::parser::frames::ComplexContentResult::Empty
| crate::parser::frames::ComplexContentResult::Simple(_) => None,
}) else {
continue;
};
let matcher = crate::compiler::compile_content_model_for_upa(schema_set, type_def)
.map_err(|error| {
let location = error
.location()
.and_then(|source| schema_set.source_maps.locate(source));
crate::error::SchemaError::structural(
"cos-nonambig",
format!(
"Failed to compile content model for UPA checking: {}",
error
),
location,
)
})?;
match matcher {
crate::compiler::ContentModelMatcher::Nfa(nfa)
| crate::compiler::ContentModelMatcher::WithOpenContent { nfa, .. } => {
crate::compiler::check_upa(&nfa, schema_set, type_def.target_namespace)?;
}
crate::compiler::ContentModelMatcher::AllGroup(model) => {
crate::compiler::check_all_group_upa(
&model,
schema_set,
type_def.target_namespace,
)?;
}
#[cfg(feature = "xsd11")]
crate::compiler::ContentModelMatcher::AllGroupExtension {
base_model,
extension_nfa,
} => {
crate::compiler::check_all_group_upa(
&base_model,
schema_set,
type_def.target_namespace,
)?;
crate::compiler::check_upa(&extension_nfa, schema_set, type_def.target_namespace)?;
}
}
}
Ok(())
}
#[cfg(feature = "async")]
pub async fn load_and_process_schema_async(
xml: &[u8],
base_uri: &str,
schema_set: &mut SchemaSet,
config: Option<PipelineConfig>,
) -> SchemaResult<PipelineStats> {
let config = config.unwrap_or_default();
let mut stats = PipelineStats::default();
let doc_id = parse_schema_with_config(xml, base_uri, schema_set, &config.parser)?;
stats.doc_id = doc_id;
if config.resolve_directives {
let mut resolver = SchemaResolver::with_config(config.resolver.clone());
let dir_result = resolve_all_directives_async(doc_id, &mut resolver, schema_set).await;
stats.loaded_docs.extend(dir_result.loaded.iter().copied());
stats.directive_result = Some(DirectiveStats::from(&dir_result));
let mut directive_errors: Vec<crate::error::SchemaError> = dir_result.errors;
let mut import_errors: Vec<crate::error::SchemaError> = dir_result.import_errors;
let mut pending_docs = dir_result.loaded.clone();
while !pending_docs.is_empty() {
let current_batch: Vec<_> = std::mem::take(&mut pending_docs);
for loaded_doc_id in current_batch {
let nested_result =
resolve_all_directives_async(loaded_doc_id, &mut resolver, schema_set).await;
stats
.loaded_docs
.extend(nested_result.loaded.iter().copied());
pending_docs.extend(nested_result.loaded.iter().copied());
if let Some(ref mut dir_stats) = stats.directive_result {
dir_stats.loaded_count += nested_result.loaded.len();
dir_stats.skipped_count += nested_result.skipped.len();
dir_stats.error_count +=
nested_result.errors.len() + nested_result.import_errors.len();
}
directive_errors.extend(nested_result.errors);
import_errors.extend(nested_result.import_errors);
}
}
fixup_composition_edges(schema_set);
if let Some(err) = directive_errors
.into_iter()
.chain(import_errors)
.find(|e| e.is_schema_content_error())
{
return Err(err);
}
}
if config.assemble_inline_types || config.resolve_references {
crate::schema::apply_redefine_override(schema_set)?;
}
if config.assemble_inline_types {
let inline_stats = assemble_inline_types(schema_set)?;
stats.inline_stats = Some(inline_stats);
}
if config.resolve_references {
let resolution_stats = resolve_all_references(schema_set)?;
stats.resolution_stats = Some(resolution_stats);
}
if config.resolve_references {
compile_all_patterns(schema_set)?;
}
#[cfg(feature = "xsd11")]
if config.resolve_references {
crate::compiler::validate_all_default_open_content(schema_set)?;
}
if config.resolve_references {
let (dep_graph, _dep_stats) = build_dependency_graph(schema_set)?;
validate_all_derivations(schema_set, &dep_graph)?;
}
if config.resolve_references {
validate_attribute_id_constraints(schema_set)?;
validate_attribute_value_constraints(schema_set)?;
validate_element_value_constraints(schema_set)?;
#[cfg(feature = "xsd11")]
xsd11_pre_resolution_validations(schema_set)?;
}
if config.resolve_references {
crate::schema::validate_xsd10_annotation_source_anyuri(schema_set)?;
}
if config.resolve_references {
crate::schema::validate_complex_type_attribute_uniqueness(schema_set)?;
}
if config.assemble_inline_types && config.resolve_references {
allocate_content_particle_elements(schema_set)?;
allocate_model_group_particle_elements(schema_set)?;
#[cfg(feature = "xsd11")]
{
let new_alt_types =
crate::schema::inline::resolve_local_element_alternatives(schema_set)?;
if !new_alt_types.is_empty() {
resolve_all_references(schema_set)?;
allocate_content_particle_elements(schema_set)?;
}
}
#[cfg(feature = "xsd11")]
xsd11_element_consistency_checks(schema_set)?;
}
Ok(stats)
}
#[cfg(feature = "async")]
pub async fn load_schema_async(
xml: &[u8],
base_uri: &str,
schema_set: &mut SchemaSet,
) -> SchemaResult<PipelineStats> {
load_and_process_schema_async(xml, base_uri, schema_set, Some(PipelineConfig::full())).await
}
#[cfg(test)]
#[path = "pipeline_tests.rs"]
mod tests;