xsd-schema 0.1.0

XML Schema (XSD 1.0/1.1) validator with PSVI and a built-in XPath 2.0 engine
Documentation
//! XSD 1.1 Conditional Type Assignment (CTA) schema-time validation.
//!
//! These passes validate `xs:alternative` elements at schema-compile time.
//! They are run after type derivation but before instance validation, so
//! schema authors get static feedback instead of waiting for an instance
//! that happens to exercise the broken alternative.
//!
//! Covered:
//!
//! - **CTA static-context XPath compile** (§3.12.4) — every `xs:alternative/@test`
//!   is compiled under a CTA static context. Compile-time errors (undefined
//!   variables, unbound prefixes) become schema errors.
//! - **No user-defined types in `instance of` / cast-style operators** —
//!   §3.12.4 restricts the static context: only built-in primitive types
//!   from the `xs` namespace are accessible.

#![cfg(feature = "xsd11")]

use crate::error::{SchemaError, SchemaResult};
use crate::ids::TypeKey;
use crate::parser::frames::AlternativeResult;
use crate::schema::model::DerivationSet;
use crate::schema::SchemaSet;
use crate::xpath::api::XPathExpr;
use crate::xpath::ast::{AstNode, ItemTypeNode, TypeExprNode};
use crate::xpath::error::XPathError;
use crate::xpath::XPathContext;

/// Compile every `xs:alternative/@test` expression under a CTA static
/// context. Surface compile-time errors as schema errors.
///
/// Catches:
///   - undefined variables in the test (e.g. `$kind` instead of `@kind`).
///     A CTA test's static context exposes no variables, so any
///     `$name` reference resolves to an unknown variable (XPST0008).
///   - unbound prefixes in QName literals or atomic types
///     (XPST0081).
///   - references to user-defined types in `instance of`, `cast as`,
///     `castable as`, and `treat as`. Per §3.12.4, the CTA static
///     context only exposes built-in primitive types from the
///     XML Schema namespace.
pub fn validate_cta_xpath(schema_set: &SchemaSet) -> SchemaResult<()> {
    if !schema_set.is_xsd11() {
        return Ok(());
    }

    for (_key, elem) in schema_set.arenas.elements.iter() {
        for alt in &elem.alternatives {
            let Some(test) = alt.test.as_deref() else {
                continue;
            };

            // Build the CTA static context: bindings come from the
            // alternative's xs:alternative element scope (so prefixes
            // declared on or above <xs:alternative> are visible).
            let ctx = XPathContext::new(&schema_set.name_table)
                .with_namespaces(alt.ns_snapshot.clone())
                .with_schema_set(schema_set);
            let ctx = if let Some(default_ns) = resolve_alt_default_ns(alt, schema_set) {
                ctx.with_default_element_ns(default_ns)
            } else {
                ctx
            };

            let expr = match XPathExpr::compile(test, &ctx) {
                Ok(e) => e,
                Err(err) => {
                    if is_schema_invalidating_error(&err) {
                        let location = schema_set.locate(alt.source.as_ref());
                        let elem_name = elem
                            .name
                            .map(|n| schema_set.name_table.resolve_ref(n))
                            .unwrap_or("(anonymous)");
                        return Err(SchemaError::structural(
                            "src-ct-alternative",
                            format!(
                                "Element '{}' <xs:alternative test=\"{}\"> failed CTA \
                                 static analysis: {}",
                                elem_name, test, err
                            ),
                            location,
                        ));
                    }
                    // Compile failure is not a schema-validity problem
                    // (e.g. unimplemented function in our engine). Skip
                    // the rest of the static check; the test will
                    // simply not match at runtime.
                    continue;
                }
            };

            // Walk the bound AST and reject user-defined types in
            // type expressions (instance of / cast as / etc.).
            check_no_user_defined_types(&expr, alt, elem, test, schema_set)?;
        }
    }

    Ok(())
}

/// XPath compile errors that genuinely indicate a broken schema.
///
/// Per §3.12.4, a CTA test that violates the static context is a
/// schema validity error. Static errors that mean the XPath author
/// got something wrong — undefined variables, unbound prefixes,
/// syntax problems, unknown atomic-type names — should reject the
/// schema. Errors that just reflect this engine's incomplete
/// function library (XPST0017 — function not found) should NOT
/// reject the schema; they only mean the test will not evaluate
/// at runtime.
fn is_schema_invalidating_error(err: &XPathError) -> bool {
    matches!(
        err,
        XPathError::XPST0003 { .. }
            | XPathError::XPST0008 { .. }
            | XPathError::XPST0051 { .. }
            | XPathError::XPST0081 { .. }
    )
}

/// Resolve the default-element-namespace for an alternative's static
/// context. We follow the cascade {alternative, schema-document}
/// for `xpathDefaultNamespace`. The runtime cascade in
/// `validation::alternatives::resolve_alternative_default_ns` is the
/// canonical implementation; we mirror only the parts that affect
/// XPath compile-time prefix resolution.
fn resolve_alt_default_ns(
    alt: &AlternativeResult,
    schema_set: &SchemaSet,
) -> Option<crate::ids::NameId> {
    let raw = alt.xpath_default_namespace.as_deref()?;
    match raw {
        "##defaultNamespace" => alt.ns_snapshot.default_ns,
        "##targetNamespace" => None, // schema-document target, irrelevant for compile-time prefix lookup of XSD namespace
        "##local" => None,
        uri => Some(schema_set.name_table.add(uri)),
    }
}

fn check_no_user_defined_types(
    expr: &XPathExpr,
    alt: &AlternativeResult,
    elem: &crate::arenas::ElementDeclData,
    test: &str,
    schema_set: &SchemaSet,
) -> SchemaResult<()> {
    let arena = expr.arena();
    let xs_ns = schema_set
        .name_table
        .add("http://www.w3.org/2001/XMLSchema");

    for (_id, node) in arena.iter() {
        let AstNode::TypeExpr(type_expr) = node else {
            continue;
        };
        let Some(ItemTypeNode::Atomic(_)) = type_expr.target_type.item_type else {
            continue;
        };
        // The bind step stores resolved_atomic_type when the QName
        // could be resolved; if it's None, compile() would already
        // have errored.
        let Some(qn) = type_expr.resolved_atomic_type.as_ref() else {
            continue;
        };
        // CTA only allows xs:* type names. Anything else is a
        // user-defined type, which §3.12.4 forbids.
        if qn.namespace_uri != Some(xs_ns) {
            return Err(report_user_defined_type(
                alt, elem, type_expr, test, schema_set,
            ));
        }
        // Even within xs:, only built-in atomic types are allowed;
        // a name not recognised as a built-in counts as user-defined.
        if let Some(local) = schema_set.name_table.try_resolve_ref(qn.local_name) {
            if crate::types::XmlTypeCode::from_local_name(local).is_none() {
                return Err(report_user_defined_type(
                    alt, elem, type_expr, test, schema_set,
                ));
            }
        }
    }

    Ok(())
}

/// XSD 1.1 §3.12.6: Conditional Type Substitutable.
///
/// Each alternative's resolved type must be validly substitutable for
/// the element's declared type, taking into account the element's
/// `{disallowed substitutions}` (its `block` attribute).
///
/// Validly substitutable here means the alternative type is the
/// declared type or derives from it via a method not blocked by the
/// element. We compute this with the existing
/// [`SchemaSet::is_type_derived_from`] helper, passing the element's
/// effective derivation block so the check matches the runtime
/// `xsi:type` substitution rule (§3.4.4 clause 4.6).
///
/// ## Gate: anonymous-declared elements skipped
///
/// We enforce the substitutability rule only when the element's
/// declared type is named. The strict §3.12.6 reading rejects
/// W3C `cta9008err` correctly (its declared type is the named
/// `docType`), and a named declared type is the canonical use of
/// CTA — the alternatives are restrictions or extensions of the
/// declared component. Elements with an anonymous inline declared
/// type often pair it with named alternative types in common
/// "override" patterns; their derivation chains never converge, so
/// strict enforcement would flag patterns that downstream code
/// (and our own unit tests) treat as valid. Until that policy can
/// be audited end-to-end, we keep the check off for anonymous
/// declared types and rely on named-declared coverage to catch
/// cta9008err-shaped errors.
///
/// **TODO:** revisit once anonymous-declared CTA usage is audited.
pub fn validate_cta_substitutability(schema_set: &SchemaSet) -> SchemaResult<()> {
    if !schema_set.is_xsd11() {
        return Ok(());
    }

    for (_key, elem) in schema_set.arenas.elements.iter() {
        let Some(declared) = elem.resolved_type else {
            continue;
        };
        if elem.alternatives.is_empty() {
            continue;
        }
        // Skip anonymous declared types (see doc-comment above).
        if !type_is_named(schema_set, declared) {
            continue;
        }
        for alt in &elem.alternatives {
            let Some(alt_type) = alt.resolved_type else {
                // No resolved type means the alternative carries the
                // element's declared type as fallback. Substitutable by
                // construction.
                continue;
            };
            // §3.12.6: "xs:error need not be substitutable." It is the
            // bottom type — selecting it always rejects the element,
            // which is the schema author's deliberate intent.
            if is_xs_error(schema_set, alt_type) {
                continue;
            }
            // {disallowed substitutions} on the element controls which
            // derivation methods are forbidden. The CTA substitution
            // mirrors xsi:type (§3.4.4 clause 4.6) — same blocking set.
            let blocked = elem.block;
            if !is_substitutable(schema_set, alt_type, declared, blocked) {
                let elem_name = elem
                    .name
                    .map(|n| schema_set.name_table.resolve_ref(n))
                    .unwrap_or("(anonymous)");
                let location = schema_set.locate(alt.source.as_ref());
                return Err(SchemaError::structural(
                    "cos-ct-alternative-substitutable",
                    format!(
                        "Element '{}': type alternative is not validly substitutable \
                         for the element's declared type — the alternative type does \
                         not derive from the declared type by a non-blocked method \
                         (§3.12.6)",
                        elem_name
                    ),
                    location,
                ));
            }
        }
    }
    Ok(())
}

/// True when the given `TypeKey` refers to a named (top-level or
/// redefined) type definition. Anonymous inline types have `name = None`.
fn type_is_named(schema_set: &SchemaSet, type_key: TypeKey) -> bool {
    match type_key {
        TypeKey::Simple(sk) => schema_set
            .arenas
            .simple_types
            .get(sk)
            .and_then(|t| t.name)
            .is_some(),
        TypeKey::Complex(ck) => schema_set
            .arenas
            .complex_types
            .get(ck)
            .and_then(|t| t.name)
            .is_some(),
    }
}

fn is_substitutable(
    schema_set: &SchemaSet,
    alt_type: TypeKey,
    declared: TypeKey,
    blocked: DerivationSet,
) -> bool {
    schema_set.is_type_derived_from(alt_type, declared, blocked)
}

/// Whether `type_key` is the built-in `xs:error` type.
fn is_xs_error(schema_set: &SchemaSet, type_key: TypeKey) -> bool {
    let TypeKey::Simple(key) = type_key else {
        return false;
    };
    let Some(error_key) = schema_set
        .builtin_types()
        .get_by_type_code(crate::types::XmlTypeCode::Error)
    else {
        return false;
    };
    key == error_key
}

fn report_user_defined_type(
    alt: &AlternativeResult,
    elem: &crate::arenas::ElementDeclData,
    _type_expr: &TypeExprNode,
    test: &str,
    schema_set: &SchemaSet,
) -> SchemaError {
    let location = schema_set.locate(alt.source.as_ref());
    let elem_name = elem
        .name
        .map(|n| schema_set.name_table.resolve_ref(n))
        .unwrap_or("(anonymous)");
    SchemaError::structural(
        "src-ct-alternative",
        format!(
            "Element '{}' <xs:alternative test=\"{}\"> references a user-defined \
             type in a type expression; the CTA static context only exposes \
             built-in primitive types from the XML Schema namespace (§3.12.4)",
            elem_name, test
        ),
        location,
    )
}