Skip to main content

xsd_schema/compiler/
particle.rs

1//! Particle occurrence handling with threshold optimization
2//!
3//! This module implements occurrence constraint handling for XSD particles
4//! (minOccurs/maxOccurs) with optimization for large maxOccurs values.
5//!
6//! For small maxOccurs (≤ COUNTED_THRESHOLD), NFA fragments are unrolled
7//! (cloned). For larger values (up to MAX_COUNTED_OCCURS), counted NFA
8//! transitions are used for O(1) state overhead. Values above
9//! MAX_COUNTED_OCCURS are treated as unbounded.
10
11use super::fragment::NfaFragment;
12
13/// Threshold above which counted NFA is used instead of unrolling.
14///
15/// Values ≤ this threshold are unrolled (cloned fragments).
16/// Values above use counted transitions with O(1) extra states.
17pub const COUNTED_THRESHOLD: u32 = 16;
18
19/// Maximum maxOccurs value before treating as unbounded.
20///
21/// Values above this cap fall back to unbounded treatment to bound the
22/// O(maxOccurs) cost of epsilon closure for nullable loop bodies.
23/// Raises the correctness ceiling from the old limit of 100 to 10000.
24pub const MAX_COUNTED_OCCURS: u32 = 10_000;
25
26/// MaxOccurs value representation
27///
28/// Represents the maxOccurs constraint from XSD, which can be either
29/// a bounded positive integer or unbounded.
30#[derive(Debug, Clone, Copy, PartialEq, Eq)]
31pub enum MaxOccurs {
32    /// Unbounded (no maximum limit)
33    Unbounded,
34    /// Bounded to a specific maximum value
35    Bounded(u32),
36}
37
38impl MaxOccurs {
39    /// Create from an Option, where None means unbounded
40    pub fn from_option(max: Option<u32>) -> Self {
41        match max {
42            Some(n) => MaxOccurs::Bounded(n),
43            None => MaxOccurs::Unbounded,
44        }
45    }
46
47    /// Convert to Option for compatibility with fragment methods
48    pub fn to_option(&self) -> Option<u32> {
49        match self {
50            MaxOccurs::Unbounded => None,
51            MaxOccurs::Bounded(n) => Some(*n),
52        }
53    }
54
55    /// Check if this value is effectively unbounded.
56    ///
57    /// Returns true if:
58    /// - The value is explicitly Unbounded, or
59    /// - The bounded value exceeds MAX_COUNTED_OCCURS
60    ///
61    /// Values up to MAX_COUNTED_OCCURS are handled exactly by counted NFA.
62    /// Values above fall back to unbounded to bound runtime cost.
63    pub fn is_effectively_unbounded(&self) -> bool {
64        match self {
65            MaxOccurs::Unbounded => true,
66            MaxOccurs::Bounded(n) => *n > MAX_COUNTED_OCCURS,
67        }
68    }
69
70    /// Check if this is explicitly unbounded
71    pub fn is_unbounded(&self) -> bool {
72        matches!(self, MaxOccurs::Unbounded)
73    }
74}
75
76impl Default for MaxOccurs {
77    fn default() -> Self {
78        MaxOccurs::Bounded(1)
79    }
80}
81
82/// Apply occurrence constraints with threshold-based dispatch.
83///
84/// - Small bounded (≤ COUNTED_THRESHOLD): unroll via repeat_range
85/// - Large bounded (≤ MAX_COUNTED_OCCURS): counted NFA via repeat_counted
86/// - Unbounded or > MAX_COUNTED_OCCURS: Kleene star via repeat_range
87pub fn apply_occurs(frag: NfaFragment, min: u32, max: MaxOccurs) -> NfaFragment {
88    let effective_max = if max.is_effectively_unbounded() {
89        None // Treat as unbounded
90    } else {
91        max.to_option()
92    };
93
94    match effective_max {
95        // Unbounded with large min → counted exact prefix + star tail
96        None if min > COUNTED_THRESHOLD => frag
97            .clone()
98            .repeat_counted(min, min)
99            .concat(frag.repeat_star()),
100        // Unbounded with small min → existing unroll via repeat_range (star/plus)
101        None => frag.repeat_range(min, None),
102        // Small bounded → existing unroll via repeat_range
103        Some(m) if m <= COUNTED_THRESHOLD => frag.repeat_range(min, Some(m)),
104        // Large bounded → counted construction
105        Some(m) if min == 0 => frag.repeat_counted(0, m),
106        Some(m) if min <= COUNTED_THRESHOLD => frag
107            .clone()
108            .repeat_exact(min)
109            .concat(frag.repeat_counted(0, m - min)),
110        Some(m) => frag.repeat_counted(min, m),
111    }
112}
113
114#[cfg(test)]
115mod tests {
116    use super::*;
117
118    #[test]
119    fn test_max_occurs_from_option() {
120        assert_eq!(MaxOccurs::from_option(Some(5)), MaxOccurs::Bounded(5));
121        assert_eq!(MaxOccurs::from_option(None), MaxOccurs::Unbounded);
122    }
123
124    #[test]
125    fn test_max_occurs_to_option() {
126        assert_eq!(MaxOccurs::Bounded(5).to_option(), Some(5));
127        assert_eq!(MaxOccurs::Unbounded.to_option(), None);
128    }
129
130    #[test]
131    fn test_max_occurs_effectively_unbounded() {
132        // Below MAX_COUNTED_OCCURS - NOT effectively unbounded
133        assert!(!MaxOccurs::Bounded(50).is_effectively_unbounded());
134        assert!(!MaxOccurs::Bounded(100).is_effectively_unbounded());
135        assert!(!MaxOccurs::Bounded(1000).is_effectively_unbounded());
136        assert!(!MaxOccurs::Bounded(MAX_COUNTED_OCCURS).is_effectively_unbounded());
137
138        // Above MAX_COUNTED_OCCURS - effectively unbounded
139        assert!(MaxOccurs::Bounded(MAX_COUNTED_OCCURS + 1).is_effectively_unbounded());
140
141        // Explicitly unbounded
142        assert!(MaxOccurs::Unbounded.is_effectively_unbounded());
143    }
144
145    #[test]
146    fn test_max_occurs_default() {
147        assert_eq!(MaxOccurs::default(), MaxOccurs::Bounded(1));
148    }
149}