Skip to main content

xsd_schema/compiler/
substitution.rs

1//! Substitution group helpers for validation and UPA checks.
2
3use std::collections::{HashMap, HashSet};
4
5use crate::ids::{ElementKey, NameId, TypeKey};
6use crate::schema::model::{DerivationSet, SchemaSet};
7
8/// Map from substitution group head to all substitutable element names.
9pub type SubstitutionGroupMap = HashMap<ElementKey, HashSet<(NameId, Option<NameId>)>>;
10
11/// Build a substitution group membership map for the schema set.
12pub fn build_substitution_group_map(schema_set: &SchemaSet) -> SubstitutionGroupMap {
13    build_substitution_group_map_inner(schema_set, false)
14}
15
16/// Build a substitution group membership map that includes abstract members.
17///
18/// Per XSD 1.1 (W3C Bugzilla 4337), abstract elements participate in the
19/// substitution group for schema-time UPA / cos-element-consistent (EDC)
20/// constraints, even though they cannot appear in instances. This variant is
21/// used by UPA/EDC schema-time validation under XSD 1.1.
22pub fn build_substitution_group_map_with_abstract(schema_set: &SchemaSet) -> SubstitutionGroupMap {
23    build_substitution_group_map_inner(schema_set, true)
24}
25
26fn build_substitution_group_map_inner(
27    schema_set: &SchemaSet,
28    include_abstract: bool,
29) -> SubstitutionGroupMap {
30    let mut member_map: HashMap<ElementKey, Vec<ElementKey>> = HashMap::new();
31    for (member_key, elem) in schema_set.arenas.elements.iter() {
32        for head_key in &elem.resolved_substitution_groups {
33            member_map.entry(*head_key).or_default().push(member_key);
34        }
35    }
36
37    let mut result = HashMap::new();
38    let mut seen_heads = HashSet::new();
39    for (head_key, _) in schema_set.arenas.elements.iter() {
40        let head_key = resolve_element_key(schema_set, head_key);
41        if !seen_heads.insert(head_key) {
42            continue;
43        }
44
45        let head_elem = match schema_set.arenas.elements.get(head_key) {
46            Some(elem) => elem,
47            None => continue,
48        };
49        let mut names = HashSet::new();
50        if let Some(name) = head_elem.name {
51            if !head_elem.is_abstract || include_abstract {
52                names.insert((name, head_elem.target_namespace));
53            }
54        }
55
56        let (effective_block, effective_final) =
57            effective_element_constraints(schema_set, head_elem);
58        if !effective_block.contains_substitution() {
59            let head_type = head_elem.resolved_type;
60            let exclude = derivation_exclusions(effective_block, effective_final);
61
62            let mut stack = member_map.get(&head_key).cloned().unwrap_or_default();
63            let mut visited = HashSet::new();
64            while let Some(member_key) = stack.pop() {
65                if !visited.insert(member_key) {
66                    continue;
67                }
68                if let Some(member) = resolved_element(schema_set, member_key) {
69                    if let Some(name) = member.name {
70                        if (!member.is_abstract || include_abstract)
71                            && is_substitutable(
72                                schema_set,
73                                head_type,
74                                exclude,
75                                member.resolved_type,
76                            )
77                        {
78                            names.insert((name, member.target_namespace));
79                        }
80                    }
81                }
82                if let Some(nested) = member_map.get(&member_key) {
83                    for &next in nested {
84                        if !visited.contains(&next) {
85                            stack.push(next);
86                        }
87                    }
88                }
89            }
90        }
91
92        result.insert(head_key, names);
93    }
94
95    result
96}
97
98fn resolve_element_key(schema_set: &SchemaSet, key: ElementKey) -> ElementKey {
99    schema_set
100        .arenas
101        .elements
102        .get(key)
103        .and_then(|elem| elem.resolved_ref)
104        .unwrap_or(key)
105}
106
107fn resolved_element(
108    schema_set: &SchemaSet,
109    key: ElementKey,
110) -> Option<&crate::arenas::ElementDeclData> {
111    let key = resolve_element_key(schema_set, key);
112    schema_set.arenas.elements.get(key)
113}
114
115pub(crate) fn derivation_exclusions(
116    effective_block: DerivationSet,
117    effective_final: DerivationSet,
118) -> DerivationSet {
119    // Per §3.3.6.3 / §3.9.6 the exclusion set is built solely from the
120    // head *element's* {substitution group exclusions} (element `final`)
121    // and, for instance-time checks, its `block` attribute.  The head
122    // *type's* {final} is intentionally excluded here: is_type_derived_from
123    // walks the full derivation chain and the type's own finality is a
124    // property of the type hierarchy, not the element declaration.
125    (effective_block | effective_final) & derivation_mask()
126}
127
128fn derivation_mask() -> DerivationSet {
129    DerivationSet::EXTENSION
130        | DerivationSet::RESTRICTION
131        | DerivationSet::LIST
132        | DerivationSet::UNION
133}
134
135pub(crate) fn is_substitutable(
136    schema_set: &SchemaSet,
137    head_type: Option<TypeKey>,
138    exclude: DerivationSet,
139    member_type: Option<TypeKey>,
140) -> bool {
141    let any_type = TypeKey::Complex(schema_set.any_type_key());
142    let head_type = head_type.unwrap_or(any_type);
143    let member_type = member_type.unwrap_or(any_type);
144    schema_set.is_type_derived_from(member_type, head_type, exclude)
145}
146
147pub(crate) fn effective_element_constraints(
148    _schema_set: &SchemaSet,
149    element: &crate::arenas::ElementDeclData,
150) -> (DerivationSet, DerivationSet) {
151    // Both block and final_derivation are resolved at assembly time
152    // (the assembler applies blockDefault/finalDefault to empty/absent entries).
153    (element.block, element.final_derivation)
154}
155
156/// Check if `candidate_key` is validly substitutable for `head_key`
157/// per XSD §3.3.6.3 / §3.9.6 NameAndTypeOK.
158pub(crate) fn is_element_substitutable_for(
159    schema_set: &SchemaSet,
160    head_key: ElementKey,
161    candidate_key: ElementKey,
162) -> bool {
163    let Some(head_elem) = schema_set.arenas.elements.get(head_key) else {
164        return false;
165    };
166    let Some(candidate_elem) = schema_set.arenas.elements.get(candidate_key) else {
167        return false;
168    };
169
170    // Check declared substitution group membership (direct or transitive).
171    // Walk candidate's declared heads to find head_key.
172    let mut visited = HashSet::new();
173    let mut stack: Vec<ElementKey> = candidate_elem.resolved_substitution_groups.clone();
174    let mut is_member = false;
175    while let Some(sg_head) = stack.pop() {
176        if !visited.insert(sg_head) {
177            continue;
178        }
179        if sg_head == head_key {
180            is_member = true;
181            break;
182        }
183        if let Some(sg_elem) = schema_set.arenas.elements.get(sg_head) {
184            stack.extend_from_slice(&sg_elem.resolved_substitution_groups);
185        }
186    }
187    if !is_member {
188        return false;
189    }
190
191    // Check block constraints on the head element
192    let (effective_block, effective_final) = effective_element_constraints(schema_set, head_elem);
193    if effective_block.contains_substitution() {
194        return false;
195    }
196
197    // Check type derivation with exclusion mask
198    let exclude = derivation_exclusions(effective_block, effective_final);
199    is_substitutable(
200        schema_set,
201        head_elem.resolved_type,
202        exclude,
203        candidate_elem.resolved_type,
204    )
205}
206
207/// Check e-props-correct.4: member type must be validly substitutable for
208/// head type subject to the head's `{substitution group exclusions}` (= `final`).
209///
210/// Unlike `is_element_substitutable_for`, this does NOT check `block` because
211/// `block` controls instance-time substitution, not affiliation legality.
212fn check_substitution_group_affiliation(
213    schema_set: &SchemaSet,
214    head_key: ElementKey,
215    member_key: ElementKey,
216) -> bool {
217    let Some(head_elem) = schema_set.arenas.elements.get(head_key) else {
218        return false;
219    };
220    let Some(member_elem) = schema_set.arenas.elements.get(member_key) else {
221        return false;
222    };
223    let (_, effective_final) = effective_element_constraints(schema_set, head_elem);
224    let exclude = derivation_exclusions(DerivationSet::empty(), effective_final);
225    is_substitutable(
226        schema_set,
227        head_elem.resolved_type,
228        exclude,
229        member_elem.resolved_type,
230    )
231}
232
233/// Validate all declared substitution group memberships.
234///
235/// Reports `e-props-correct.4` if a member element's type is not validly
236/// substitutable for its head element's type (respecting `final` constraints).
237///
238/// Note: This uses only the head's `{substitution group exclusions}` (= `final`),
239/// NOT the head's `block` attribute. The `block` attribute controls instance-time
240/// substitution, not schema-level affiliation legality.
241pub fn validate_all_substitution_groups(schema_set: &SchemaSet) -> crate::SchemaResult<()> {
242    for (member_key, elem) in schema_set.arenas.elements.iter() {
243        for &head_key in &elem.resolved_substitution_groups {
244            if !check_substitution_group_affiliation(schema_set, head_key, member_key) {
245                let member_name = elem
246                    .name
247                    .map(|n| schema_set.name_table.resolve(n).to_string())
248                    .unwrap_or_else(|| "<anonymous>".to_string());
249                let head_name = schema_set
250                    .arenas
251                    .elements
252                    .get(head_key)
253                    .and_then(|h| h.name)
254                    .map(|n| schema_set.name_table.resolve(n).to_string())
255                    .unwrap_or_else(|| "<anonymous>".to_string());
256                let location = elem
257                    .source
258                    .as_ref()
259                    .and_then(|s| schema_set.source_maps.locate(s));
260                return Err(crate::error::SchemaError::structural(
261                    "e-props-correct.4",
262                    format!(
263                        "Element '{}' is not a valid member of the substitution group \
264                         headed by '{}': type derivation is blocked by 'final' constraint",
265                        member_name, head_name
266                    ),
267                    location,
268                ));
269            }
270        }
271    }
272
273    // §3.3.6.1.5: substitution group affiliation must be acyclic. Walk each
274    // element's resolved substitution-group chain and reject any back-edge.
275    for (start_key, _) in schema_set.arenas.elements.iter() {
276        let mut visited = HashSet::new();
277        let mut stack = vec![start_key];
278        while let Some(current) = stack.pop() {
279            if !visited.insert(current) {
280                continue;
281            }
282            let Some(decl) = schema_set.arenas.elements.get(current) else {
283                continue;
284            };
285            for &head in &decl.resolved_substitution_groups {
286                if head == start_key {
287                    let elem = &schema_set.arenas.elements[start_key];
288                    let elem_name = elem
289                        .name
290                        .map(|n| schema_set.name_table.resolve(n).to_string())
291                        .unwrap_or_else(|| "<anonymous>".to_string());
292                    let location = elem
293                        .source
294                        .as_ref()
295                        .and_then(|s| schema_set.source_maps.locate(s));
296                    return Err(crate::error::SchemaError::structural(
297                        "e-props-correct",
298                        format!(
299                            "Substitution group cycle detected involving element '{}' \
300                             (§3.3.6.1.5)",
301                            elem_name
302                        ),
303                        location,
304                    ));
305                }
306                stack.push(head);
307            }
308        }
309    }
310    Ok(())
311}
312
313#[cfg(test)]
314mod tests {
315    use super::*;
316    use crate::parser::location::SourceRef;
317
318    fn element_data(
319        name: NameId,
320        type_key: TypeKey,
321        source: Option<SourceRef>,
322    ) -> crate::arenas::ElementDeclData {
323        crate::arenas::ElementDeclData {
324            name: Some(name),
325            target_namespace: None,
326            ref_name: None,
327            type_ref: None,
328            inline_type: None,
329            substitution_group: Vec::new(),
330            default_value: None,
331            fixed_value: None,
332            nillable: false,
333            is_abstract: false,
334            min_occurs: 1,
335            max_occurs: Some(1),
336            block: DerivationSet::empty(),
337            final_derivation: DerivationSet::empty(),
338            form: None,
339            id: None,
340            alternatives: Vec::new(),
341            identity_constraints: Vec::new(),
342            pending_ic_refs: vec![],
343            annotation: None,
344            source,
345            resolved_type: Some(type_key),
346            resolved_ref: None,
347            resolved_substitution_groups: Vec::new(),
348            deferred_type_error: None,
349        }
350    }
351
352    #[test]
353    fn test_substitution_group_type_derivation_allows_member() {
354        let mut schema_set = SchemaSet::new();
355        let head_name = schema_set.name_table.add("head");
356        let member_name = schema_set.name_table.add("member");
357        let head_type = TypeKey::Simple(schema_set.builtin_types().decimal);
358        let member_type = TypeKey::Simple(schema_set.builtin_types().int);
359
360        let head_key = schema_set
361            .arenas
362            .alloc_element(element_data(head_name, head_type, None));
363        let member_key =
364            schema_set
365                .arenas
366                .alloc_element(element_data(member_name, member_type, None));
367        schema_set
368            .arenas
369            .elements
370            .get_mut(member_key)
371            .unwrap()
372            .resolved_substitution_groups
373            .push(head_key);
374
375        let map = build_substitution_group_map(&schema_set);
376        let names = map.get(&head_key).unwrap();
377        assert!(names.contains(&(member_name, None)));
378    }
379
380    #[test]
381    fn test_substitution_group_element_final_blocks_member() {
382        let mut schema_set = SchemaSet::new();
383        let head_name = schema_set.name_table.add("head");
384        let member_name = schema_set.name_table.add("member");
385        let head_type = TypeKey::Simple(schema_set.builtin_types().decimal);
386        let member_type = TypeKey::Simple(schema_set.builtin_types().int);
387
388        let mut head = element_data(head_name, head_type, None);
389        head.final_derivation = DerivationSet::RESTRICTION;
390        let head_key = schema_set.arenas.alloc_element(head);
391        let member_key =
392            schema_set
393                .arenas
394                .alloc_element(element_data(member_name, member_type, None));
395        schema_set
396            .arenas
397            .elements
398            .get_mut(member_key)
399            .unwrap()
400            .resolved_substitution_groups
401            .push(head_key);
402
403        let map = build_substitution_group_map(&schema_set);
404        let names = map.get(&head_key).unwrap();
405        assert!(!names.contains(&(member_name, None)));
406    }
407
408    // Per §3.3.4 the {substitution group exclusions} are derived solely from the
409    // *element* declaration's `final` attribute.  The head *type's* {final} does
410    // not gate substitution group membership.  This test verifies that setting
411    // final_derivation on the head type alone does NOT block the member.
412    #[test]
413    fn test_substitution_group_type_final_does_not_block_member() {
414        let mut schema_set = SchemaSet::new();
415        let head_name = schema_set.name_table.add("head");
416        let member_name = schema_set.name_table.add("member");
417        let head_type = schema_set.builtin_types().decimal;
418        let member_type = TypeKey::Simple(schema_set.builtin_types().int);
419
420        // Mark the head *type* as final for restriction — this must not block the member.
421        if let Some(type_def) = schema_set.arenas.simple_types.get_mut(head_type) {
422            type_def.final_derivation = DerivationSet::RESTRICTION;
423        }
424
425        let head_key = schema_set.arenas.alloc_element(element_data(
426            head_name,
427            TypeKey::Simple(head_type),
428            None,
429        ));
430        let member_key =
431            schema_set
432                .arenas
433                .alloc_element(element_data(member_name, member_type, None));
434        schema_set
435            .arenas
436            .elements
437            .get_mut(member_key)
438            .unwrap()
439            .resolved_substitution_groups
440            .push(head_key);
441
442        let map = build_substitution_group_map(&schema_set);
443        let names = map.get(&head_key).unwrap();
444        // The type's {final} must NOT gate membership; only the element's final does.
445        assert!(names.contains(&(member_name, None)));
446    }
447
448    #[test]
449    fn test_substitution_group_block_substitution_keeps_head_only() {
450        let mut schema_set = SchemaSet::new();
451        let head_name = schema_set.name_table.add("head");
452        let member_name = schema_set.name_table.add("member");
453        let head_type = TypeKey::Simple(schema_set.builtin_types().decimal);
454        let member_type = TypeKey::Simple(schema_set.builtin_types().int);
455
456        let mut head = element_data(head_name, head_type, None);
457        head.block = DerivationSet::SUBSTITUTION;
458        let head_key = schema_set.arenas.alloc_element(head);
459        let member_key =
460            schema_set
461                .arenas
462                .alloc_element(element_data(member_name, member_type, None));
463        schema_set
464            .arenas
465            .elements
466            .get_mut(member_key)
467            .unwrap()
468            .resolved_substitution_groups
469            .push(head_key);
470
471        let map = build_substitution_group_map(&schema_set);
472        let names = map.get(&head_key).unwrap();
473        assert!(names.contains(&(head_name, None)));
474        assert!(!names.contains(&(member_name, None)));
475    }
476
477    #[test]
478    fn test_substitution_group_block_default_blocks_member() {
479        // Assembly would apply blockDefault to elements without an explicit block.
480        // This test simulates that: head.block = SUBSTITUTION (inherited from blockDefault).
481        let mut schema_set = SchemaSet::new();
482        let head_name = schema_set.name_table.add("head");
483        let member_name = schema_set.name_table.add("member");
484        let head_type = TypeKey::Simple(schema_set.builtin_types().decimal);
485        let member_type = TypeKey::Simple(schema_set.builtin_types().int);
486
487        let mut head = element_data(head_name, head_type, None);
488        head.block = DerivationSet::SUBSTITUTION;
489        let head_key = schema_set.arenas.alloc_element(head);
490        let member_key =
491            schema_set
492                .arenas
493                .alloc_element(element_data(member_name, member_type, None));
494        schema_set
495            .arenas
496            .elements
497            .get_mut(member_key)
498            .unwrap()
499            .resolved_substitution_groups
500            .push(head_key);
501
502        let map = build_substitution_group_map(&schema_set);
503        let names = map.get(&head_key).unwrap();
504        assert!(names.contains(&(head_name, None)));
505        assert!(!names.contains(&(member_name, None)));
506    }
507
508    #[test]
509    fn test_substitution_group_final_default_blocks_member() {
510        // Assembly would apply finalDefault to elements without an explicit final.
511        // This test simulates that: head.final_derivation = RESTRICTION (inherited from finalDefault).
512        let mut schema_set = SchemaSet::new();
513        let head_name = schema_set.name_table.add("head");
514        let member_name = schema_set.name_table.add("member");
515        let head_type = TypeKey::Simple(schema_set.builtin_types().decimal);
516        let member_type = TypeKey::Simple(schema_set.builtin_types().int);
517
518        let mut head = element_data(head_name, head_type, None);
519        head.final_derivation = DerivationSet::RESTRICTION;
520        let head_key = schema_set.arenas.alloc_element(head);
521        let member_key =
522            schema_set
523                .arenas
524                .alloc_element(element_data(member_name, member_type, None));
525        schema_set
526            .arenas
527            .elements
528            .get_mut(member_key)
529            .unwrap()
530            .resolved_substitution_groups
531            .push(head_key);
532
533        let map = build_substitution_group_map(&schema_set);
534        let names = map.get(&head_key).unwrap();
535        assert!(names.contains(&(head_name, None)));
536        assert!(!names.contains(&(member_name, None)));
537    }
538}