Skip to main content

xsd_schema/compiler/
all_group.rs

1//! All-group content model validation
2//!
3//! This module implements the `xs:all` content model, which allows particles
4//! to appear in any order. Unlike sequence/choice groups, all-groups are not
5//! compiled to NFAs due to the exponential state explosion that would result
6//! from permutation expansion.
7//!
8//! # XSD Version Differences
9//!
10//! | Feature | XSD 1.0 | XSD 1.1 |
11//! |---------|---------|---------|
12//! | Element particles | Yes | Yes |
13//! | Wildcard particles | No | Yes |
14//! | Group references | No | Yes |
15//! | minOccurs | 0 or 1 | Any value |
16//! | maxOccurs | 1 only | Any value |
17
18use crate::ids::NameId;
19use crate::parser::frames::{ParticleResult, ParticleTerm};
20use crate::parser::location::SourceRef;
21use crate::schema::model::XsdVersion;
22use crate::types::complex::{not_qnames_exclude, NamespaceConstraint, ProcessContents};
23
24use super::error::{NfaCompileError, NfaCompileResult};
25use super::nfa::NfaTerm;
26use super::particle::MaxOccurs;
27use super::substitution::SubstitutionGroupMap;
28
29/// Compiled all-group content model
30///
31/// Represents an `xs:all` group compiled for validation. All-groups allow
32/// their particles to appear in any order, with each particle subject to
33/// its occurrence constraints.
34#[derive(Debug, Clone)]
35pub struct AllGroupModel {
36    /// Particles in the all-group
37    pub particles: Vec<AllParticle>,
38    /// Open content wildcard (XSD 1.1 only)
39    pub open_content: Option<OpenContentWildcard>,
40    /// Whether the outer particle has minOccurs=0, making the entire group optional.
41    /// When true, the content model is satisfied even if no children are consumed.
42    pub outer_optional: bool,
43}
44
45/// A particle within an all-group
46#[derive(Debug, Clone)]
47pub struct AllParticle {
48    /// The term that must be matched
49    pub term: NfaTerm,
50    /// Minimum required occurrences
51    pub min_occurs: u32,
52    /// Maximum allowed occurrences
53    pub max_occurs: MaxOccurs,
54    /// Source location for error reporting
55    pub source: Option<SourceRef>,
56}
57
58/// Open content wildcard for XSD 1.1
59#[derive(Debug, Clone)]
60pub struct OpenContentWildcard {
61    /// Namespace constraint for allowed namespaces
62    pub namespace_constraint: NamespaceConstraint,
63    /// How to process matched content
64    pub process_contents: ProcessContents,
65    /// Open content mode
66    pub mode: OpenContentMode,
67    /// Pre-expanded concrete QName exclusions (XSD 1.1 notQName)
68    pub not_qnames: Vec<(Option<NameId>, NameId)>,
69}
70
71/// Open content mode for XSD 1.1
72#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
73pub enum OpenContentMode {
74    /// No open content
75    #[default]
76    None,
77    /// Open content can be interleaved
78    Interleave,
79    /// Open content only at the end
80    Suffix,
81}
82
83/// Mutable state during all-group validation
84///
85/// Tracks how many times each particle has been matched (consumed count).
86#[derive(Debug, Clone)]
87pub struct AllGroupState {
88    /// Number of times each particle has been matched (by index)
89    consumed: Vec<u32>,
90}
91
92impl AllGroupModel {
93    /// Create a new all-group model
94    pub fn new(particles: Vec<AllParticle>) -> Self {
95        Self {
96            particles,
97            open_content: None,
98            outer_optional: false,
99        }
100    }
101
102    /// Create an all-group model with open content
103    pub fn with_open_content(
104        particles: Vec<AllParticle>,
105        open_content: OpenContentWildcard,
106    ) -> Self {
107        Self {
108            particles,
109            open_content: Some(open_content),
110            outer_optional: false,
111        }
112    }
113
114    /// Check if the all-group is empty
115    pub fn is_empty(&self) -> bool {
116        self.particles.is_empty()
117    }
118
119    /// Get the number of particles
120    pub fn particle_count(&self) -> usize {
121        self.particles.len()
122    }
123
124    /// Check if all particles are optional (can match empty sequence)
125    pub fn is_optional(&self) -> bool {
126        self.particles.iter().all(|p| p.is_optional())
127    }
128
129    /// Create a validation state for this model
130    pub fn create_state(&self) -> AllGroupState {
131        AllGroupState::new(self)
132    }
133}
134
135impl AllParticle {
136    /// Create a new all-particle
137    pub fn new(
138        term: NfaTerm,
139        min_occurs: u32,
140        max_occurs: MaxOccurs,
141        source: Option<SourceRef>,
142    ) -> Self {
143        Self {
144            term,
145            min_occurs,
146            max_occurs,
147            source,
148        }
149    }
150
151    /// Check if this particle is optional (minOccurs = 0)
152    pub fn is_optional(&self) -> bool {
153        self.min_occurs == 0
154    }
155
156    /// Check if the given occurrence count satisfies minOccurs
157    pub fn is_satisfied(&self, consumed: u32) -> bool {
158        consumed >= self.min_occurs
159    }
160}
161
162impl AllGroupState {
163    /// Create a new validation state for an all-group
164    pub fn new(model: &AllGroupModel) -> Self {
165        Self {
166            consumed: vec![0; model.particles.len()],
167        }
168    }
169
170    /// Reset the state for a new validation run
171    pub fn reset(&mut self, model: &AllGroupModel) {
172        self.consumed.clear();
173        self.consumed.resize(model.particles.len(), 0);
174    }
175
176    /// Check if a particle can still accept matches
177    pub fn can_accept(&self, model: &AllGroupModel, index: usize) -> bool {
178        if let (Some(&count), Some(particle)) =
179            (self.consumed.get(index), model.particles.get(index))
180        {
181            match particle.max_occurs {
182                MaxOccurs::Unbounded => true,
183                MaxOccurs::Bounded(max) => count < max,
184            }
185        } else {
186            false
187        }
188    }
189
190    /// Accept a match for the particle at the given index
191    ///
192    /// Returns true if the match was accepted, false if the particle
193    /// cannot accept any more matches.
194    pub fn accept(&mut self, model: &AllGroupModel, index: usize) -> bool {
195        if self.can_accept(model, index) {
196            self.consumed[index] += 1;
197            true
198        } else {
199            false
200        }
201    }
202
203    /// Get how many times a particle has been matched
204    pub fn consumed(&self, index: usize) -> u32 {
205        self.consumed.get(index).copied().unwrap_or(0)
206    }
207
208    /// Check if all particles have satisfied their minOccurs constraints
209    pub fn is_satisfied(&self, model: &AllGroupModel) -> bool {
210        for (i, particle) in model.particles.iter().enumerate() {
211            if !particle.is_satisfied(self.consumed(i)) {
212                return false;
213            }
214        }
215        true
216    }
217
218    /// Check if any particle has been consumed at all
219    pub fn has_any_consumed(&self) -> bool {
220        self.consumed.iter().any(|&c| c > 0)
221    }
222
223    /// Get indices of particles that have not satisfied their minOccurs
224    pub fn unsatisfied_indices(&self, model: &AllGroupModel) -> Vec<usize> {
225        let mut result = Vec::new();
226        for (i, particle) in model.particles.iter().enumerate() {
227            if !particle.is_satisfied(self.consumed(i)) {
228                result.push(i);
229            }
230        }
231        result
232    }
233}
234
235/// Validate all-group constraints based on XSD version
236///
237/// XSD 1.0 has strict constraints on what can appear in an all-group:
238/// - Only element particles (no wildcards, no group references)
239/// - minOccurs must be 0 or 1
240/// - maxOccurs must be 0 or 1 (`maxOccurs="0"` is the standard idiom for
241///   forbidding an element via restriction; see W3C XSD 1.0 ยง3.8.6
242///   cos-all-limited and the conformance test mgA015)
243///
244/// XSD 1.1 relaxes these constraints to allow wildcards and arbitrary
245/// occurrence values. Group references are allowed but must satisfy
246/// cos-all-limited constraints:
247/// - **Rule 1.3**: minOccurs = maxOccurs = 1
248/// - **Rule 2**: referenced group must have compositor = all
249///   (compositor check requires schema resolution, so only the occurrence
250///   constraint is validated here; compositor is checked during compilation)
251/// - Must be a group reference (`ref_name` set), not an inline group
252pub fn validate_all_group_constraints(
253    particles: &[ParticleResult],
254    xsd_version: XsdVersion,
255    source: Option<SourceRef>,
256) -> NfaCompileResult<()> {
257    match xsd_version {
258        XsdVersion::V1_0 => validate_all_group_xsd10(particles, source),
259        #[cfg(feature = "xsd11")]
260        XsdVersion::V1_1 => validate_all_group_xsd11(particles, source),
261        #[cfg(not(feature = "xsd11"))]
262        XsdVersion::V1_1 => validate_all_group_xsd10(particles, source),
263    }
264}
265
266/// Validate XSD 1.1 all-group constraints (cos-all-limited)
267#[cfg(feature = "xsd11")]
268fn validate_all_group_xsd11(
269    particles: &[ParticleResult],
270    source: Option<SourceRef>,
271) -> NfaCompileResult<()> {
272    for particle in particles {
273        if let ParticleTerm::Group(group) = &particle.term {
274            // cos-all-limited 1.3: group ref must have minOccurs = maxOccurs = 1
275            if particle.min_occurs != 1 || particle.max_occurs != Some(1) {
276                return Err(NfaCompileError::InvalidAllGroupOccurs {
277                    reason: "cos-all-limited.1.3: group reference inside xs:all \
278                             must have minOccurs = maxOccurs = 1"
279                        .into(),
280                    location: particle.source.clone().or(source.clone()),
281                });
282            }
283            // Must be a group reference, not an inline group
284            if group.ref_name.is_none() {
285                return Err(NfaCompileError::InvalidAllGroupContent {
286                    location: particle.source.clone().or(source.clone()),
287                });
288            }
289        }
290    }
291    Ok(())
292}
293
294/// Validate XSD 1.0 all-group constraints
295fn validate_all_group_xsd10(
296    particles: &[ParticleResult],
297    source: Option<SourceRef>,
298) -> NfaCompileResult<()> {
299    for particle in particles {
300        // XSD 1.0: Only element particles allowed
301        if !matches!(particle.term, ParticleTerm::Element(_)) {
302            return Err(NfaCompileError::InvalidAllGroupContent {
303                location: particle.source.clone().or(source.clone()),
304            });
305        }
306
307        // XSD 1.0: minOccurs must be 0 or 1
308        if particle.min_occurs > 1 {
309            return Err(NfaCompileError::InvalidAllGroupOccurs {
310                reason: format!(
311                    "minOccurs must be 0 or 1 in XSD 1.0 all-group, found {}",
312                    particle.min_occurs
313                ),
314                location: particle.source.clone().or(source.clone()),
315            });
316        }
317
318        // XSD 1.0: maxOccurs must be 0 or 1
319        match particle.max_occurs {
320            Some(0) | Some(1) => {} // OK
321            Some(n) => {
322                return Err(NfaCompileError::InvalidAllGroupOccurs {
323                    reason: format!("maxOccurs must be 0 or 1 in XSD 1.0 all-group, found {}", n),
324                    location: particle.source.clone().or(source.clone()),
325                });
326            }
327            None => {
328                return Err(NfaCompileError::InvalidAllGroupOccurs {
329                    reason: "maxOccurs='unbounded' not allowed in XSD 1.0 all-group".to_string(),
330                    location: particle.source.clone().or(source.clone()),
331                });
332            }
333        }
334    }
335
336    Ok(())
337}
338
339/// Term matching result
340#[derive(Debug, Clone, Copy, PartialEq, Eq)]
341pub enum TermMatchResult {
342    /// Term matched
343    Match,
344    /// Term did not match
345    NoMatch,
346}
347
348/// Match an element name against an NfaTerm
349pub fn term_matches(
350    term: &NfaTerm,
351    element_name: NameId,
352    element_namespace: Option<NameId>,
353    target_namespace: Option<NameId>,
354    xsd_version: XsdVersion,
355) -> TermMatchResult {
356    term_matches_with_substitution(
357        term,
358        element_name,
359        element_namespace,
360        target_namespace,
361        None,
362        xsd_version,
363    )
364}
365
366/// Match an element name against an NfaTerm with optional substitution groups.
367pub fn term_matches_with_substitution(
368    term: &NfaTerm,
369    element_name: NameId,
370    element_namespace: Option<NameId>,
371    target_namespace: Option<NameId>,
372    substitution_groups: Option<&SubstitutionGroupMap>,
373    xsd_version: XsdVersion,
374) -> TermMatchResult {
375    match term {
376        NfaTerm::Element {
377            name,
378            namespace,
379            element_key,
380            ..
381        } => {
382            if let (Some(map), Some(key)) = (substitution_groups, element_key) {
383                if let Some(names) = map.get(key) {
384                    return if names.contains(&(element_name, element_namespace)) {
385                        TermMatchResult::Match
386                    } else {
387                        TermMatchResult::NoMatch
388                    };
389                }
390            }
391
392            if *name == element_name && *namespace == element_namespace {
393                TermMatchResult::Match
394            } else {
395                TermMatchResult::NoMatch
396            }
397        }
398        NfaTerm::Wildcard {
399            namespace_constraint,
400            not_qnames,
401            ..
402        } => {
403            if !namespace_constraint.matches(element_namespace, target_namespace, xsd_version) {
404                return TermMatchResult::NoMatch;
405            }
406            if not_qnames_exclude(not_qnames, element_namespace, element_name) {
407                return TermMatchResult::NoMatch;
408            }
409            TermMatchResult::Match
410        }
411    }
412}
413
414#[cfg(test)]
415mod tests {
416    use super::*;
417    use crate::compiler::build_substitution_group_map;
418    use crate::ids::NameId;
419    use crate::schema::model::{DerivationSet, SchemaSet};
420
421    fn element_data(
422        name: NameId,
423        target_namespace: Option<NameId>,
424    ) -> crate::arenas::ElementDeclData {
425        crate::arenas::ElementDeclData {
426            name: Some(name),
427            target_namespace,
428            ref_name: None,
429            type_ref: None,
430            inline_type: None,
431            substitution_group: Vec::new(),
432            default_value: None,
433            fixed_value: None,
434            nillable: false,
435            is_abstract: false,
436            min_occurs: 1,
437            max_occurs: Some(1),
438            block: DerivationSet::empty(),
439            final_derivation: DerivationSet::empty(),
440            form: None,
441            id: None,
442            alternatives: Vec::new(),
443            identity_constraints: Vec::new(),
444            pending_ic_refs: vec![],
445            annotation: None,
446            source: None,
447            resolved_type: None,
448            resolved_ref: None,
449            resolved_substitution_groups: Vec::new(),
450            deferred_type_error: None,
451        }
452    }
453
454    fn make_element_term(name: u32) -> NfaTerm {
455        NfaTerm::element(NameId(name), None, None)
456    }
457
458    fn make_particle(name: u32, min: u32, max: MaxOccurs) -> AllParticle {
459        AllParticle::new(make_element_term(name), min, max, None)
460    }
461
462    #[test]
463    fn test_all_group_model_new() {
464        let particles = vec![
465            make_particle(1, 1, MaxOccurs::Bounded(1)),
466            make_particle(2, 0, MaxOccurs::Bounded(1)),
467        ];
468        let model = AllGroupModel::new(particles);
469
470        assert_eq!(model.particle_count(), 2);
471        assert!(!model.is_empty());
472        assert!(!model.is_optional()); // First particle is required
473    }
474
475    #[test]
476    fn test_all_group_model_optional() {
477        let particles = vec![
478            make_particle(1, 0, MaxOccurs::Bounded(1)),
479            make_particle(2, 0, MaxOccurs::Bounded(1)),
480        ];
481        let model = AllGroupModel::new(particles);
482
483        assert!(model.is_optional()); // All particles are optional
484    }
485
486    #[test]
487    fn test_all_particle_is_optional() {
488        let required = make_particle(1, 1, MaxOccurs::Bounded(1));
489        let optional = make_particle(2, 0, MaxOccurs::Bounded(1));
490
491        assert!(!required.is_optional());
492        assert!(optional.is_optional());
493    }
494
495    #[test]
496    fn test_all_particle_is_satisfied() {
497        let particle = make_particle(1, 2, MaxOccurs::Bounded(5));
498
499        assert!(!particle.is_satisfied(0));
500        assert!(!particle.is_satisfied(1));
501        assert!(particle.is_satisfied(2));
502        assert!(particle.is_satisfied(3));
503    }
504
505    #[test]
506    fn test_all_group_state_new() {
507        let particles = vec![
508            make_particle(1, 1, MaxOccurs::Bounded(2)),
509            make_particle(2, 0, MaxOccurs::Bounded(1)),
510        ];
511        let model = AllGroupModel::new(particles);
512        let state = model.create_state();
513
514        assert!(state.can_accept(&model, 0));
515        assert!(state.can_accept(&model, 1));
516    }
517
518    #[test]
519    fn test_all_group_state_accept() {
520        let particles = vec![make_particle(1, 1, MaxOccurs::Bounded(2))];
521        let model = AllGroupModel::new(particles);
522        let mut state = model.create_state();
523
524        assert!(state.can_accept(&model, 0));
525        assert!(state.accept(&model, 0));
526        assert!(state.can_accept(&model, 0)); // Still has 1 remaining
527        assert!(state.accept(&model, 0));
528        assert!(!state.can_accept(&model, 0)); // No more remaining
529        assert!(!state.accept(&model, 0)); // Should return false
530    }
531
532    #[test]
533    fn test_all_group_state_accept_unbounded() {
534        let particles = vec![make_particle(1, 1, MaxOccurs::Unbounded)];
535        let model = AllGroupModel::new(particles);
536        let mut state = model.create_state();
537
538        for _ in 0..1000 {
539            assert!(state.can_accept(&model, 0));
540            assert!(state.accept(&model, 0));
541        }
542        assert!(state.can_accept(&model, 0)); // Still accepting
543    }
544
545    #[test]
546    fn test_all_group_state_is_satisfied() {
547        let particles = vec![
548            make_particle(1, 1, MaxOccurs::Bounded(2)), // Required
549            make_particle(2, 0, MaxOccurs::Bounded(1)), // Optional
550        ];
551        let model = AllGroupModel::new(particles);
552        let mut state = model.create_state();
553
554        assert!(!state.is_satisfied(&model)); // First particle not satisfied
555
556        state.accept(&model, 0); // Match first particle once
557        assert!(state.is_satisfied(&model)); // Now satisfied
558    }
559
560    #[test]
561    fn test_all_group_state_unsatisfied_indices() {
562        let particles = vec![
563            make_particle(1, 1, MaxOccurs::Bounded(1)),
564            make_particle(2, 1, MaxOccurs::Bounded(1)),
565            make_particle(3, 0, MaxOccurs::Bounded(1)),
566        ];
567        let model = AllGroupModel::new(particles);
568        let mut state = model.create_state();
569
570        let unsatisfied = state.unsatisfied_indices(&model);
571        assert_eq!(unsatisfied, vec![0, 1]); // Particles 0 and 1 require matching
572
573        state.accept(&model, 0);
574        let unsatisfied = state.unsatisfied_indices(&model);
575        assert_eq!(unsatisfied, vec![1]); // Only particle 1 unsatisfied now
576    }
577
578    #[test]
579    fn test_term_matches_element() {
580        let term = NfaTerm::element(NameId(1), Some(NameId(100)), None);
581
582        assert_eq!(
583            term_matches(&term, NameId(1), Some(NameId(100)), None, XsdVersion::V1_0),
584            TermMatchResult::Match
585        );
586        assert_eq!(
587            term_matches(&term, NameId(2), Some(NameId(100)), None, XsdVersion::V1_0),
588            TermMatchResult::NoMatch
589        );
590        assert_eq!(
591            term_matches(&term, NameId(1), Some(NameId(200)), None, XsdVersion::V1_0),
592            TermMatchResult::NoMatch
593        );
594    }
595
596    #[test]
597    fn test_term_matches_wildcard_any() {
598        let term = NfaTerm::wildcard(NamespaceConstraint::Any, ProcessContents::Lax);
599
600        assert_eq!(
601            term_matches(&term, NameId(1), Some(NameId(100)), None, XsdVersion::V1_0),
602            TermMatchResult::Match
603        );
604        assert_eq!(
605            term_matches(&term, NameId(999), None, None, XsdVersion::V1_0),
606            TermMatchResult::Match
607        );
608    }
609
610    #[test]
611    fn test_term_matches_wildcard_other() {
612        let term = NfaTerm::wildcard(NamespaceConstraint::Other, ProcessContents::Lax);
613        let target_ns = Some(NameId(100));
614
615        assert_eq!(
616            term_matches(
617                &term,
618                NameId(1),
619                Some(NameId(200)),
620                target_ns,
621                XsdVersion::V1_0
622            ),
623            TermMatchResult::Match
624        );
625        assert_eq!(
626            term_matches(&term, NameId(1), target_ns, target_ns, XsdVersion::V1_0),
627            TermMatchResult::NoMatch
628        );
629    }
630
631    #[test]
632    fn test_term_matches_substitution_group_member() {
633        let mut schema_set = SchemaSet::new();
634        let head_name = schema_set.name_table.add("head");
635        let member_name = schema_set.name_table.add("member");
636
637        let head_key = schema_set
638            .arenas
639            .alloc_element(element_data(head_name, None));
640        let member_key = schema_set
641            .arenas
642            .alloc_element(element_data(member_name, None));
643
644        schema_set
645            .arenas
646            .elements
647            .get_mut(member_key)
648            .unwrap()
649            .resolved_substitution_groups
650            .push(head_key);
651
652        let map = build_substitution_group_map(&schema_set);
653        let term = NfaTerm::element(head_name, None, Some(head_key));
654
655        assert_eq!(
656            term_matches_with_substitution(
657                &term,
658                member_name,
659                None,
660                None,
661                Some(&map),
662                XsdVersion::V1_0
663            ),
664            TermMatchResult::Match
665        );
666    }
667
668    #[test]
669    fn test_term_matches_substitution_group_abstract_head() {
670        let mut schema_set = SchemaSet::new();
671        let head_name = schema_set.name_table.add("head");
672        let member_name = schema_set.name_table.add("member");
673
674        let mut head = element_data(head_name, None);
675        head.is_abstract = true;
676        let head_key = schema_set.arenas.alloc_element(head);
677        let member_key = schema_set
678            .arenas
679            .alloc_element(element_data(member_name, None));
680        schema_set
681            .arenas
682            .elements
683            .get_mut(member_key)
684            .unwrap()
685            .resolved_substitution_groups
686            .push(head_key);
687
688        let map = build_substitution_group_map(&schema_set);
689        let term = NfaTerm::element(head_name, None, Some(head_key));
690
691        assert_eq!(
692            term_matches_with_substitution(
693                &term,
694                head_name,
695                None,
696                None,
697                Some(&map),
698                XsdVersion::V1_0
699            ),
700            TermMatchResult::NoMatch
701        );
702        assert_eq!(
703            term_matches_with_substitution(
704                &term,
705                member_name,
706                None,
707                None,
708                Some(&map),
709                XsdVersion::V1_0
710            ),
711            TermMatchResult::Match
712        );
713    }
714}