xsd-schema 0.1.0

XML Schema (XSD 1.0/1.1) validator with PSVI and a built-in XPath 2.0 engine
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
//! XSD 1.1 complex-type assertion evaluation.
//!
//! Complex types can carry `xs:assert` elements whose XPath 2.0 expressions
//! are evaluated against the element subtree. This module provides:
//!
//! - [`AssertionBufferFrame`] — per-element bookkeeping for assertion buffering
//! - [`has_inherited_assertions`] — cheap hot-path check for any assertions
//! - [`collect_inherited_assertions`] — full base-first collection with owner keys
//! - [`resolve_ct_assertion_default_ns`] — xpathDefaultNamespace cascade
//! - [`evaluate_complex_type_assertions`] — core XPath evaluation

use crate::document::buffer::BufferDocument;
use crate::document::navigator::BufferDocNavigator;
use crate::ids::{ComplexTypeKey, NameId, TypeKey};
use crate::navigator::{DomNavigator, TypedValue};
use crate::parser::frames::{AssertResult, ComplexContentResult};
use crate::parser::location::SourceLocation;
use crate::schema::SchemaSet;
use crate::validation::errors::{self, ValidationError};
use crate::validation::simple::validate_simple_type;
use crate::xpath::api::XPathExpr;
use crate::xpath::functions::{effective_boolean_value, XPathValue};
use crate::xpath::XPathContext;

use crate::arenas::SchemaArenas;

// ---------------------------------------------------------------------------
// AssertionBufferFrame
// ---------------------------------------------------------------------------

/// Per-element assertion buffer frame.
///
/// Created when a complex type with assertions is encountered during streaming
/// validation. Tracks the node reference in the fragment document and the
/// owning complex type, so assertions can be evaluated at element close.
pub(crate) struct AssertionBufferFrame {
    /// Node ref of this element in the fragment document.
    pub element_ref: u32,
    /// ComplexType key whose assertions triggered this frame.
    pub complex_type_key: ComplexTypeKey,
    /// Element path at the time this frame's element closed (for error reporting).
    /// Populated when the frame is popped at its own end-element (before deferral).
    pub element_path: String,
    /// Source location at the time this frame's element closed.
    pub location: Option<SourceLocation>,
}

// ---------------------------------------------------------------------------
// has_inherited_assertions — cheap hot-path check
// ---------------------------------------------------------------------------

/// Returns `true` if the complex type (or any base in its derivation chain)
/// has non-empty `assertions`. No allocation. Used in
/// `validate_element_by_id` to decide whether to start assertion buffering.
pub(crate) fn has_inherited_assertions(ct_key: ComplexTypeKey, arenas: &SchemaArenas) -> bool {
    let ct = &arenas.complex_types[ct_key];
    if !ct.assertions.is_empty() {
        return true;
    }
    // Walk the derivation chain
    let mut current = ct.resolved_base_type;
    while let Some(TypeKey::Complex(base_key)) = current {
        let base = &arenas.complex_types[base_key];
        if !base.assertions.is_empty() {
            return true;
        }
        current = base.resolved_base_type;
    }
    false
}

// ---------------------------------------------------------------------------
// collect_inherited_assertions — full collection
// ---------------------------------------------------------------------------

/// Collects all assertions from the complex type and its base types,
/// ordered base-first. Each assertion is paired with its **defining** type's
/// key — essential for the xpathDefaultNamespace cascade, which must use the
/// type-level default from the type that declared the assertion.
pub(crate) fn collect_inherited_assertions(
    ct_key: ComplexTypeKey,
    arenas: &SchemaArenas,
) -> Vec<(&AssertResult, ComplexTypeKey)> {
    // Collect chain of complex type keys from derived to base
    let mut chain = vec![ct_key];
    let mut current = arenas.complex_types[ct_key].resolved_base_type;
    while let Some(TypeKey::Complex(base_key)) = current {
        chain.push(base_key);
        current = arenas.complex_types[base_key].resolved_base_type;
    }

    // Reverse for base-first order, then collect assertions
    let mut result = Vec::new();
    for &key in chain.iter().rev() {
        let ct = &arenas.complex_types[key];
        for assertion in &ct.assertions {
            result.push((assertion, key));
        }
    }
    result
}

// ---------------------------------------------------------------------------
// resolve_ct_assertion_default_ns — xpathDefaultNamespace cascade
// ---------------------------------------------------------------------------

/// Three-level cascade: **assertion-level > owner-type-level > schema-document-level**.
///
/// Takes the **owner** `ComplexTypeKey` (from `collect_inherited_assertions`),
/// not the derived type, so inherited assertions get the correct type-level default.
fn resolve_ct_assertion_default_ns(
    assertion: &AssertResult,
    owner_ct_key: ComplexTypeKey,
    schema_set: &SchemaSet,
) -> Option<NameId> {
    let ct = &schema_set.arenas.complex_types[owner_ct_key];

    // Look up the schema document that defines the owning type
    let doc = ct
        .source
        .as_ref()
        .and_then(|s| schema_set.documents.get(s.doc_id as usize));

    // Cascade: assertion-level > type-level > schema-document-level
    let effective = if let Some(raw) = &assertion.xpath_default_namespace {
        Some(raw.clone())
    } else if let Some(raw) = &ct.xpath_default_namespace {
        Some(raw.clone())
    } else {
        doc.and_then(|d| d.xpath_default_namespace)
            .map(|id| schema_set.name_table.resolve(id))
    };

    match effective.as_deref() {
        Some("##defaultNamespace") => assertion.ns_snapshot.default_ns,
        Some("##targetNamespace") => doc.and_then(|d| d.target_namespace),
        Some("##local") | None => None,
        Some(uri) => Some(schema_set.name_table.add(uri)),
    }
}

// ---------------------------------------------------------------------------
// compute_dollar_value — XSD 1.1 §3.13.4.1 clause 2.3 binding
// ---------------------------------------------------------------------------

/// Compute the value of `$value` for an assertion.
///
/// Per §3.13.4.1 clause 2.3, `$value` is bound from **E's governing
/// type definition** (the most-derived type for the element), not from
/// each inherited assertion's owner. So this is computed once per
/// element and reused across all assertions in the inheritance chain:
/// - Governing type's content variety **simple**, element not nilled,
///   simple-type validation succeeds → the typed value.
/// - Otherwise → empty sequence (clause 2.3.2).
///
/// The partial-PSVI `[validity]` is unavailable here, so any
/// simple-type-validation failure falls into the empty-sequence branch.
fn compute_dollar_value<'doc>(
    doc: &'doc BufferDocument<'doc>,
    element_ref: u32,
    governing_ct_key: ComplexTypeKey,
    schema_set: &SchemaSet,
) -> XPathValue<BufferDocNavigator<'doc>> {
    use crate::types::value::{XmlValue, XmlValueKind};
    use crate::xpath::iterator::XmlItem;

    let ct = &schema_set.arenas.complex_types[governing_ct_key];
    if !matches!(ct.content, ComplexContentResult::Simple(_)) {
        return XPathValue::empty();
    }

    let nav = BufferDocNavigator::new(doc, element_ref);
    if matches!(nav.typed_value(), TypedValue::Nilled) {
        return XPathValue::empty();
    }

    match validate_simple_type(&nav.value(), TypeKey::Complex(governing_ct_key), schema_set) {
        Ok(result) => {
            // §3.13.4.1 clause 2.3.1.4: when the governing simple-content type's
            // {variety} = list, `$value` is a sequence of atomic values, one per
            // list item. The simple-type validator stores list items in
            // `XmlValueKind::List`; unwrap to a sequence so XPath sees a
            // multi-item input.
            if let XmlValueKind::List { item_type, items } = &result.typed_value.value {
                let item_type_code = *item_type;
                let xpath_items: Vec<XmlItem<BufferDocNavigator<'doc>>> = items
                    .iter()
                    .cloned()
                    .map(|atom| {
                        XmlItem::Atomic(XmlValue::new(item_type_code, XmlValueKind::Atomic(atom)))
                    })
                    .collect();
                return XPathValue::from_sequence(xpath_items);
            }
            XPathValue::from_atomic(result.typed_value)
        }
        Err(_) => XPathValue::empty(),
    }
}

// ---------------------------------------------------------------------------
// evaluate_complex_type_assertions — core evaluation
// ---------------------------------------------------------------------------

/// Evaluate all assertions (own + inherited) for a complex type against
/// the element subtree in a `BufferDocument`.
///
/// Returns a `Vec` of all `cvc-assertion` errors (does not stop at first failure).
pub(crate) fn evaluate_complex_type_assertions(
    doc: &BufferDocument<'_>,
    element_ref: u32,
    ct_key: ComplexTypeKey,
    schema_set: &SchemaSet,
) -> Vec<ValidationError> {
    let assertions = collect_inherited_assertions(ct_key, &schema_set.arenas);
    let mut errors = Vec::new();

    // §3.13.4.1 clause 2.3 ties `$value` to E's governing type
    // (the parameter `ct_key`), so it is identical across all
    // inherited assertions. Compute once and clone per evaluation.
    let dollar_value = compute_dollar_value(doc, element_ref, ct_key, schema_set);

    for (assertion, owner_key) in assertions {
        if assertion.test.is_empty() {
            continue;
        }

        // Build XPath static context with schema-time namespace snapshot
        let ctx = XPathContext::new(&schema_set.name_table)
            .with_namespaces(assertion.ns_snapshot.clone())
            .with_schema_set(schema_set);

        // Apply xpathDefaultNamespace cascade
        let ctx = if let Some(default_ns) =
            resolve_ct_assertion_default_ns(assertion, owner_key, schema_set)
        {
            ctx.with_default_element_ns(default_ns)
        } else {
            ctx
        };

        // §3.13.4.1 clause 2.2: `$value` is in scope for every assertion.
        // Declared unconditionally so XPath that references it compiles.
        let expr = match XPathExpr::compile_with_vars(&assertion.test, &ctx, &["value"]) {
            Ok(e) => e,
            Err(e) => {
                errors.push(errors::error(
                    "cvc-assertion",
                    format!(
                        "Failed to compile assertion test '{}': {}",
                        assertion.test, e
                    ),
                    None,
                ));
                continue;
            }
        };

        let nav = BufferDocNavigator::new_assertion(doc, element_ref);
        let value_for_eval = dollar_value.clone();

        let result = match expr
            .evaluator(&ctx)
            .run_with_node_and_setup(Some(nav), |eval| {
                eval.set_variable_by_name("value", value_for_eval)
                    .expect("$value declared via compile_with_vars");
            }) {
            Ok(r) => r,
            Err(e) => {
                errors.push(errors::error(
                    "cvc-assertion",
                    format!(
                        "Failed to evaluate assertion test '{}': {}",
                        assertion.test, e
                    ),
                    None,
                ));
                continue;
            }
        };

        // Check effective boolean value
        match effective_boolean_value(&result) {
            Ok(true) => { /* assertion passed */ }
            Ok(false) => {
                errors.push(errors::error(
                    "cvc-assertion",
                    format!("Assertion '{}' failed", assertion.test),
                    None,
                ));
            }
            Err(e) => {
                errors.push(errors::error(
                    "cvc-assertion",
                    format!(
                        "Failed to compute boolean value for assertion '{}': {}",
                        assertion.test, e
                    ),
                    None,
                ));
            }
        }
    }

    errors
}

// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------

#[cfg(test)]
mod tests {
    use super::*;
    use crate::pipeline::load_and_process_schema;

    fn load_schema(xsd: &str) -> SchemaSet {
        let mut schema_set = SchemaSet::xsd11();
        load_and_process_schema(xsd.as_bytes(), "test.xsd", &mut schema_set, None)
            .expect("failed to load schema");
        schema_set
    }

    /// Find the first complex type key in the schema set by name.
    fn find_ct_key(schema_set: &SchemaSet, name: &str) -> ComplexTypeKey {
        let name_id = schema_set.name_table.add(name);
        for (key, ct) in &schema_set.arenas.complex_types {
            if ct.name == Some(name_id) {
                return key;
            }
        }
        panic!("Complex type '{}' not found", name);
    }

    #[test]
    fn test_has_inherited_assertions_none() {
        let schema_set = load_schema(
            r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
                <xs:complexType name="plain">
                    <xs:sequence>
                        <xs:element name="x" type="xs:string"/>
                    </xs:sequence>
                </xs:complexType>
            </xs:schema>"#,
        );
        let key = find_ct_key(&schema_set, "plain");
        assert!(!has_inherited_assertions(key, &schema_set.arenas));
    }

    #[test]
    fn test_has_inherited_assertions_own() {
        // xs:assert as direct child of complexType with attribute-only content
        let schema_set = load_schema(
            r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
                <xs:complexType name="withAssert">
                    <xs:attribute name="val" type="xs:integer"/>
                    <xs:assert test="@val >= 0"/>
                </xs:complexType>
            </xs:schema>"#,
        );
        let key = find_ct_key(&schema_set, "withAssert");
        assert!(has_inherited_assertions(key, &schema_set.arenas));
    }

    #[test]
    fn test_has_inherited_assertions_from_base() {
        let schema_set = load_schema(
            r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
                <xs:complexType name="base">
                    <xs:attribute name="val" type="xs:integer"/>
                    <xs:assert test="@val >= 0"/>
                </xs:complexType>
                <xs:complexType name="derived">
                    <xs:complexContent>
                        <xs:restriction base="base">
                            <xs:attribute name="val" type="xs:integer"/>
                        </xs:restriction>
                    </xs:complexContent>
                </xs:complexType>
            </xs:schema>"#,
        );
        let key = find_ct_key(&schema_set, "derived");
        assert!(has_inherited_assertions(key, &schema_set.arenas));
    }

    #[test]
    fn test_collect_inherited_assertions_ordering() {
        let schema_set = load_schema(
            r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
                <xs:complexType name="base">
                    <xs:attribute name="val" type="xs:integer"/>
                    <xs:assert test="@val >= 0"/>
                </xs:complexType>
                <xs:complexType name="derived">
                    <xs:complexContent>
                        <xs:restriction base="base">
                            <xs:attribute name="val" type="xs:integer"/>
                            <xs:assert test="@val &lt; 100"/>
                        </xs:restriction>
                    </xs:complexContent>
                </xs:complexType>
            </xs:schema>"#,
        );
        let derived_key = find_ct_key(&schema_set, "derived");
        let base_key = find_ct_key(&schema_set, "base");
        let assertions = collect_inherited_assertions(derived_key, &schema_set.arenas);

        // Base-first ordering: base assertion comes first
        assert_eq!(assertions.len(), 2);
        assert_eq!(
            assertions[0].1, base_key,
            "first assertion should be from base"
        );
        assert_eq!(
            assertions[1].1, derived_key,
            "second assertion should be from derived"
        );
        assert!(assertions[0].0.test.contains(">= 0"));
        assert!(assertions[1].0.test.contains("< 100"));
    }

    #[test]
    fn test_collect_inherited_assertions_no_assertions() {
        let schema_set = load_schema(
            r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
                <xs:complexType name="plain">
                    <xs:sequence>
                        <xs:element name="x" type="xs:string"/>
                    </xs:sequence>
                </xs:complexType>
            </xs:schema>"#,
        );
        let key = find_ct_key(&schema_set, "plain");
        let assertions = collect_inherited_assertions(key, &schema_set.arenas);
        assert!(assertions.is_empty());
    }
}