xsd_schema/compiler/particle.rs
1//! Particle occurrence handling with threshold optimization
2//!
3//! This module implements occurrence constraint handling for XSD particles
4//! (minOccurs/maxOccurs) with optimization for large maxOccurs values.
5//!
6//! For small maxOccurs (≤ COUNTED_THRESHOLD), NFA fragments are unrolled
7//! (cloned). For larger values (up to MAX_COUNTED_OCCURS), counted NFA
8//! transitions are used for O(1) state overhead. Values above
9//! MAX_COUNTED_OCCURS are treated as unbounded.
10
11use super::fragment::NfaFragment;
12
13/// Threshold above which counted NFA is used instead of unrolling.
14///
15/// Values ≤ this threshold are unrolled (cloned fragments).
16/// Values above use counted transitions with O(1) extra states.
17pub const COUNTED_THRESHOLD: u32 = 16;
18
19/// Maximum maxOccurs value before treating as unbounded.
20///
21/// Values above this cap fall back to unbounded treatment to bound the
22/// O(maxOccurs) cost of epsilon closure for nullable loop bodies.
23/// Raises the correctness ceiling from the old limit of 100 to 10000.
24pub const MAX_COUNTED_OCCURS: u32 = 10_000;
25
26/// MaxOccurs value representation
27///
28/// Represents the maxOccurs constraint from XSD, which can be either
29/// a bounded positive integer or unbounded.
30#[derive(Debug, Clone, Copy, PartialEq, Eq)]
31pub enum MaxOccurs {
32 /// Unbounded (no maximum limit)
33 Unbounded,
34 /// Bounded to a specific maximum value
35 Bounded(u32),
36}
37
38impl MaxOccurs {
39 /// Create from an Option, where None means unbounded
40 pub fn from_option(max: Option<u32>) -> Self {
41 match max {
42 Some(n) => MaxOccurs::Bounded(n),
43 None => MaxOccurs::Unbounded,
44 }
45 }
46
47 /// Convert to Option for compatibility with fragment methods
48 pub fn to_option(&self) -> Option<u32> {
49 match self {
50 MaxOccurs::Unbounded => None,
51 MaxOccurs::Bounded(n) => Some(*n),
52 }
53 }
54
55 /// Check if this value is effectively unbounded.
56 ///
57 /// Returns true if:
58 /// - The value is explicitly Unbounded, or
59 /// - The bounded value exceeds MAX_COUNTED_OCCURS
60 ///
61 /// Values up to MAX_COUNTED_OCCURS are handled exactly by counted NFA.
62 /// Values above fall back to unbounded to bound runtime cost.
63 pub fn is_effectively_unbounded(&self) -> bool {
64 match self {
65 MaxOccurs::Unbounded => true,
66 MaxOccurs::Bounded(n) => *n > MAX_COUNTED_OCCURS,
67 }
68 }
69
70 /// Check if this is explicitly unbounded
71 pub fn is_unbounded(&self) -> bool {
72 matches!(self, MaxOccurs::Unbounded)
73 }
74}
75
76impl Default for MaxOccurs {
77 fn default() -> Self {
78 MaxOccurs::Bounded(1)
79 }
80}
81
82/// Apply occurrence constraints with threshold-based dispatch.
83///
84/// - Small bounded (≤ COUNTED_THRESHOLD): unroll via repeat_range
85/// - Large bounded (≤ MAX_COUNTED_OCCURS): counted NFA via repeat_counted
86/// - Unbounded or > MAX_COUNTED_OCCURS: Kleene star via repeat_range
87pub fn apply_occurs(frag: NfaFragment, min: u32, max: MaxOccurs) -> NfaFragment {
88 let effective_max = if max.is_effectively_unbounded() {
89 None // Treat as unbounded
90 } else {
91 max.to_option()
92 };
93
94 match effective_max {
95 // Unbounded with large min → counted exact prefix + star tail
96 None if min > COUNTED_THRESHOLD => frag
97 .clone()
98 .repeat_counted(min, min)
99 .concat(frag.repeat_star()),
100 // Unbounded with small min → existing unroll via repeat_range (star/plus)
101 None => frag.repeat_range(min, None),
102 // Small bounded → existing unroll via repeat_range
103 Some(m) if m <= COUNTED_THRESHOLD => frag.repeat_range(min, Some(m)),
104 // Large bounded → counted construction
105 Some(m) if min == 0 => frag.repeat_counted(0, m),
106 Some(m) if min <= COUNTED_THRESHOLD => frag
107 .clone()
108 .repeat_exact(min)
109 .concat(frag.repeat_counted(0, m - min)),
110 Some(m) => frag.repeat_counted(min, m),
111 }
112}
113
114#[cfg(test)]
115mod tests {
116 use super::*;
117
118 #[test]
119 fn test_max_occurs_from_option() {
120 assert_eq!(MaxOccurs::from_option(Some(5)), MaxOccurs::Bounded(5));
121 assert_eq!(MaxOccurs::from_option(None), MaxOccurs::Unbounded);
122 }
123
124 #[test]
125 fn test_max_occurs_to_option() {
126 assert_eq!(MaxOccurs::Bounded(5).to_option(), Some(5));
127 assert_eq!(MaxOccurs::Unbounded.to_option(), None);
128 }
129
130 #[test]
131 fn test_max_occurs_effectively_unbounded() {
132 // Below MAX_COUNTED_OCCURS - NOT effectively unbounded
133 assert!(!MaxOccurs::Bounded(50).is_effectively_unbounded());
134 assert!(!MaxOccurs::Bounded(100).is_effectively_unbounded());
135 assert!(!MaxOccurs::Bounded(1000).is_effectively_unbounded());
136 assert!(!MaxOccurs::Bounded(MAX_COUNTED_OCCURS).is_effectively_unbounded());
137
138 // Above MAX_COUNTED_OCCURS - effectively unbounded
139 assert!(MaxOccurs::Bounded(MAX_COUNTED_OCCURS + 1).is_effectively_unbounded());
140
141 // Explicitly unbounded
142 assert!(MaxOccurs::Unbounded.is_effectively_unbounded());
143 }
144
145 #[test]
146 fn test_max_occurs_default() {
147 assert_eq!(MaxOccurs::default(), MaxOccurs::Bounded(1));
148 }
149}