1use std::collections::{HashMap, HashSet, VecDeque};
7
8use super::substitution::SubstitutionGroupMap;
9use crate::ids::{ElementKey, NameId, TypeKey};
10use crate::parser::location::SourceRef;
11use crate::schema::model::XsdVersion;
12use crate::types::complex::{not_qnames_exclude, NamespaceConstraint, ProcessContents};
13
14pub type StateId = u32;
16
17pub type CounterId = u16;
22
23#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
29pub struct CounterRange {
30 pub lo: u32,
31 pub hi: u32,
32}
33
34impl CounterRange {
35 pub fn single(v: u32) -> Self {
37 Self { lo: v, hi: v }
38 }
39
40 pub fn new(lo: u32, hi: u32) -> Self {
42 debug_assert!(lo <= hi, "CounterRange::new({lo}, {hi}): lo > hi");
43 Self { lo, hi }
44 }
45
46 pub fn is_empty(self) -> bool {
48 self.lo > self.hi
49 }
50
51 pub fn subsumes(self, other: Self) -> bool {
53 self.lo <= other.lo && self.hi >= other.hi
54 }
55
56 pub fn union(self, other: Self) -> Self {
59 let result = Self {
60 lo: self.lo.min(other.lo),
61 hi: self.hi.max(other.hi),
62 };
63 debug_assert!(result.lo <= result.hi);
64 result
65 }
66
67 pub fn intersect_below(self, max_exclusive: u32) -> Self {
70 if max_exclusive == 0 || self.lo >= max_exclusive {
71 return Self { lo: 1, hi: 0 };
73 }
74 Self {
75 lo: self.lo,
76 hi: self.hi.min(max_exclusive - 1),
77 }
78 }
79
80 pub fn intersect_above(self, min_inclusive: u32) -> Self {
83 if self.hi < min_inclusive {
84 return Self { lo: 1, hi: 0 };
85 }
86 Self {
87 lo: self.lo.max(min_inclusive),
88 hi: self.hi,
89 }
90 }
91}
92
93#[derive(Debug, Clone, Copy, PartialEq, Eq)]
99pub struct CounterDef {
100 pub min: u32,
102 pub max: u32,
104 pub body_nullable: bool,
108}
109
110#[derive(Debug, Clone)]
115pub struct NfaTable {
116 pub states: Vec<NfaState>,
118 pub start_state: StateId,
120 pub accept_state: StateId,
122 pub counter_defs: Vec<CounterDef>,
124}
125
126impl NfaTable {
127 pub fn new(states: Vec<NfaState>, start_state: StateId, accept_state: StateId) -> Self {
129 Self {
130 states,
131 start_state,
132 accept_state,
133 counter_defs: Vec::new(),
134 }
135 }
136
137 pub fn with_counters(
139 states: Vec<NfaState>,
140 start_state: StateId,
141 accept_state: StateId,
142 counter_defs: Vec<CounterDef>,
143 ) -> Self {
144 Self {
145 states,
146 start_state,
147 accept_state,
148 counter_defs,
149 }
150 }
151
152 pub fn has_counters(&self) -> bool {
154 !self.counter_defs.is_empty()
155 }
156
157 pub fn get_state(&self, id: StateId) -> Option<&NfaState> {
159 self.states.get(id as usize)
160 }
161
162 pub fn get_state_mut(&mut self, id: StateId) -> Option<&mut NfaState> {
164 self.states.get_mut(id as usize)
165 }
166
167 pub fn state_count(&self) -> usize {
169 self.states.len()
170 }
171
172 pub fn is_accept(&self, state_id: StateId) -> bool {
174 state_id == self.accept_state
175 }
176
177 pub fn concat(mut self, mut other: NfaTable) -> NfaTable {
181 let state_offset = self.states.len() as StateId;
182 let counter_offset = self.counter_defs.len() as CounterId;
183 for state in &mut other.states {
184 state.id += state_offset;
185 for trans in &mut state.transitions {
186 trans.target += state_offset;
187 trans.kind = trans.kind.offset_counter(counter_offset);
188 }
189 }
190 let other_start = other.start_state + state_offset;
191 self.states[self.accept_state as usize].add_epsilon(other_start);
192 let new_accept = other.accept_state + state_offset;
193 self.states.extend(other.states);
194 self.counter_defs.extend(other.counter_defs);
195 NfaTable::with_counters(self.states, self.start_state, new_accept, self.counter_defs)
196 }
197
198 pub fn transitions_from(&self, state_id: StateId) -> &[NfaTransition] {
200 self.get_state(state_id)
201 .map(|s| s.transitions.as_slice())
202 .unwrap_or(&[])
203 }
204}
205
206#[derive(Debug, Clone)]
212pub struct NfaState {
213 pub id: StateId,
215 pub term: Option<NfaTerm>,
218 pub transitions: Vec<NfaTransition>,
220 pub origin: Option<SourceRef>,
222}
223
224impl NfaState {
225 pub fn epsilon(id: StateId, origin: Option<SourceRef>) -> Self {
227 Self {
228 id,
229 term: None,
230 transitions: Vec::new(),
231 origin,
232 }
233 }
234
235 pub fn with_term(id: StateId, term: NfaTerm, origin: Option<SourceRef>) -> Self {
237 Self {
238 id,
239 term: Some(term),
240 transitions: Vec::new(),
241 origin,
242 }
243 }
244
245 pub fn add_transition(&mut self, target: StateId, kind: TransitionKind) {
247 self.transitions.push(NfaTransition { target, kind });
248 }
249
250 pub fn add_epsilon(&mut self, target: StateId) {
252 self.add_transition(target, TransitionKind::Epsilon);
253 }
254
255 pub fn add_consume(&mut self, target: StateId) {
257 self.add_transition(target, TransitionKind::Consume);
258 }
259
260 pub fn is_epsilon(&self) -> bool {
262 self.term.is_none()
263 }
264
265 pub fn epsilon_transitions(&self) -> impl Iterator<Item = StateId> + '_ {
267 self.transitions
268 .iter()
269 .filter(|t| t.kind == TransitionKind::Epsilon)
270 .map(|t| t.target)
271 }
272
273 pub fn consuming_transitions(&self) -> impl Iterator<Item = StateId> + '_ {
275 self.transitions
276 .iter()
277 .filter(|t| t.kind == TransitionKind::Consume)
278 .map(|t| t.target)
279 }
280}
281
282#[derive(Debug, Clone)]
287pub enum NfaTerm {
288 Element {
290 name: NameId,
292 namespace: Option<NameId>,
294 element_key: Option<ElementKey>,
296 resolved_type: Option<TypeKey>,
298 },
299 Wildcard {
301 namespace_constraint: NamespaceConstraint,
303 process_contents: ProcessContents,
305 not_qnames: Vec<(Option<NameId>, NameId)>,
307 },
308}
309
310impl NfaTerm {
311 pub fn element(
313 name: NameId,
314 namespace: Option<NameId>,
315 element_key: Option<ElementKey>,
316 ) -> Self {
317 NfaTerm::Element {
318 name,
319 namespace,
320 element_key,
321 resolved_type: None,
322 }
323 }
324
325 pub fn element_with_type(
327 name: NameId,
328 namespace: Option<NameId>,
329 element_key: Option<ElementKey>,
330 resolved_type: Option<TypeKey>,
331 ) -> Self {
332 NfaTerm::Element {
333 name,
334 namespace,
335 element_key,
336 resolved_type,
337 }
338 }
339
340 pub fn wildcard(
342 namespace_constraint: NamespaceConstraint,
343 process_contents: ProcessContents,
344 ) -> Self {
345 NfaTerm::Wildcard {
346 namespace_constraint,
347 process_contents,
348 not_qnames: Vec::new(),
349 }
350 }
351
352 pub fn wildcard_with_not_qnames(
354 namespace_constraint: NamespaceConstraint,
355 process_contents: ProcessContents,
356 not_qnames: Vec<(Option<NameId>, NameId)>,
357 ) -> Self {
358 NfaTerm::Wildcard {
359 namespace_constraint,
360 process_contents,
361 not_qnames,
362 }
363 }
364
365 pub fn is_element(&self) -> bool {
367 matches!(self, NfaTerm::Element { .. })
368 }
369
370 pub fn is_wildcard(&self) -> bool {
372 matches!(self, NfaTerm::Wildcard { .. })
373 }
374}
375
376#[derive(Debug, Clone, Copy, PartialEq, Eq)]
378pub struct NfaTransition {
379 pub target: StateId,
381 pub kind: TransitionKind,
383}
384
385impl NfaTransition {
386 pub fn new(target: StateId, kind: TransitionKind) -> Self {
388 Self { target, kind }
389 }
390
391 pub fn epsilon(target: StateId) -> Self {
393 Self::new(target, TransitionKind::Epsilon)
394 }
395
396 pub fn consume(target: StateId) -> Self {
398 Self::new(target, TransitionKind::Consume)
399 }
400}
401
402#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
404pub enum TransitionKind {
405 Epsilon,
407 Consume,
409 CounterReset(CounterId),
411 CounterIncrement(CounterId),
413 CounterMaxGuard(CounterId),
415 CounterMinGuard(CounterId),
417}
418
419impl TransitionKind {
420 pub fn is_epsilon_like(&self) -> bool {
423 !matches!(self, TransitionKind::Consume)
424 }
425
426 pub fn offset_counter(self, offset: CounterId) -> Self {
429 if offset == 0 {
430 return self;
431 }
432 match self {
433 TransitionKind::CounterReset(c) => TransitionKind::CounterReset(c + offset),
434 TransitionKind::CounterIncrement(c) => TransitionKind::CounterIncrement(c + offset),
435 TransitionKind::CounterMaxGuard(c) => TransitionKind::CounterMaxGuard(c + offset),
436 TransitionKind::CounterMinGuard(c) => TransitionKind::CounterMinGuard(c + offset),
437 other => other,
438 }
439 }
440}
441
442pub fn epsilon_closure(
448 nfa: &NfaTable,
449 start_states: impl IntoIterator<Item = StateId>,
450) -> HashSet<StateId> {
451 debug_assert!(
452 !nfa.has_counters(),
453 "epsilon_closure called on counted NFA; use ActiveStates instead"
454 );
455 let mut closure = HashSet::new();
456 let mut stack: Vec<StateId> = start_states.into_iter().collect();
457
458 while let Some(state_id) = stack.pop() {
459 if !closure.insert(state_id) {
460 continue;
461 }
462 if let Some(state) = nfa.get_state(state_id) {
463 for target in state.epsilon_transitions() {
464 if !closure.contains(&target) {
465 stack.push(target);
466 }
467 }
468 }
469 }
470
471 closure
472}
473
474pub fn term_matches(
476 term: &NfaTerm,
477 element_name: NameId,
478 element_namespace: Option<NameId>,
479 target_namespace: Option<NameId>,
480 substitution_groups: Option<&SubstitutionGroupMap>,
481 xsd_version: XsdVersion,
482) -> bool {
483 match term {
484 NfaTerm::Element {
485 name,
486 namespace,
487 element_key,
488 ..
489 } => {
490 if let (Some(map), Some(key)) = (substitution_groups, element_key) {
491 if let Some(names) = map.get(key) {
492 return names.contains(&(element_name, element_namespace));
493 }
494 }
495 *name == element_name && *namespace == element_namespace
496 }
497 NfaTerm::Wildcard {
498 namespace_constraint,
499 not_qnames,
500 ..
501 } => {
502 if !namespace_constraint.matches(element_namespace, target_namespace, xsd_version) {
503 return false;
504 }
505 !not_qnames_exclude(not_qnames, element_namespace, element_name)
506 }
507 }
508}
509
510pub fn advance_states(
515 nfa: &NfaTable,
516 start_states: impl IntoIterator<Item = StateId>,
517 element_name: NameId,
518 element_namespace: Option<NameId>,
519 target_namespace: Option<NameId>,
520 substitution_groups: Option<&SubstitutionGroupMap>,
521 xsd_version: XsdVersion,
522) -> HashSet<StateId> {
523 let closure = epsilon_closure(nfa, start_states);
524 let mut next = HashSet::new();
525
526 for state_id in closure {
527 let state = match nfa.get_state(state_id) {
528 Some(state) => state,
529 None => continue,
530 };
531 let term = match state.term.as_ref() {
532 Some(term) => term,
533 None => continue,
534 };
535
536 if term_matches(
537 term,
538 element_name,
539 element_namespace,
540 target_namespace,
541 substitution_groups,
542 xsd_version,
543 ) {
544 for target in state.consuming_transitions() {
545 next.insert(target);
546 }
547 }
548 }
549
550 epsilon_closure(nfa, next)
551}
552
553pub fn advance_with_priority(
558 nfa: &NfaTable,
559 start_states: impl IntoIterator<Item = StateId>,
560 element_name: NameId,
561 element_namespace: Option<NameId>,
562 target_namespace: Option<NameId>,
563 substitution_groups: Option<&SubstitutionGroupMap>,
564 xsd_version: XsdVersion,
565) -> HashSet<StateId> {
566 let closure = epsilon_closure(nfa, start_states);
567 let mut element_targets = HashSet::new();
568 let mut wildcard_targets = HashSet::new();
569
570 for state_id in closure {
571 let state = match nfa.get_state(state_id) {
572 Some(state) => state,
573 None => continue,
574 };
575 let term = match state.term.as_ref() {
576 Some(term) => term,
577 None => continue,
578 };
579
580 if !term_matches(
581 term,
582 element_name,
583 element_namespace,
584 target_namespace,
585 substitution_groups,
586 xsd_version,
587 ) {
588 continue;
589 }
590
591 match term {
592 NfaTerm::Element { .. } => {
593 for target in state.consuming_transitions() {
594 element_targets.insert(target);
595 }
596 }
597 NfaTerm::Wildcard { .. } => {
598 for target in state.consuming_transitions() {
599 wildcard_targets.insert(target);
600 }
601 }
602 }
603 }
604
605 let next = if !element_targets.is_empty() {
606 element_targets
607 } else {
608 wildcard_targets
609 };
610
611 epsilon_closure(nfa, next)
612}
613
614#[derive(Debug, Clone, Hash, Eq, PartialEq)]
624pub struct ActiveConfig {
625 pub state_id: StateId,
626 pub counters: Box<[u32]>,
627}
628
629impl ActiveConfig {
630 pub fn initial(state_id: StateId, num_counters: usize) -> Self {
632 Self {
633 state_id,
634 counters: vec![0; num_counters].into_boxed_slice(),
635 }
636 }
637
638 fn with_state(&self, new_state: StateId) -> Self {
640 Self {
641 state_id: new_state,
642 counters: self.counters.clone(),
643 }
644 }
645
646 fn with_counter_set(&self, new_state: StateId, counter_id: CounterId, value: u32) -> Self {
648 let mut new = self.with_state(new_state);
649 new.counters[counter_id as usize] = value;
650 new
651 }
652}
653
654#[derive(Debug, Clone, Hash, Eq, PartialEq)]
660pub struct HybridKey {
661 state_id: StateId,
662 counters: Box<[u32]>,
663}
664
665impl HybridKey {
666 fn initial(state_id: StateId, num_counters: usize) -> Self {
668 Self {
669 state_id,
670 counters: vec![0; num_counters].into_boxed_slice(),
671 }
672 }
673
674 fn with_state(&self, new_state: StateId) -> Self {
676 Self {
677 state_id: new_state,
678 counters: self.counters.clone(),
679 }
680 }
681
682 fn with_scalar_counter(
686 &self,
687 new_state: StateId,
688 counter_id: CounterId,
689 value: u32,
690 ranged_counter_idx: usize,
691 ) -> Self {
692 debug_assert!(
693 counter_id as usize != ranged_counter_idx,
694 "with_scalar_counter called on ranged counter {counter_id}"
695 );
696 let mut new = self.with_state(new_state);
697 new.counters[counter_id as usize] = value;
698 new
699 }
700
701 fn counter(&self, counter_id: CounterId) -> u32 {
703 self.counters[counter_id as usize]
704 }
705
706 fn repacked(&self, old_ranged_idx: usize, new_ranged_idx: usize) -> (Self, u32) {
713 let scalar_val = self.counters[new_ranged_idx];
714 let mut new_counters = self.counters.clone();
715 new_counters[old_ranged_idx] = 0;
716 new_counters[new_ranged_idx] = 0; (
718 Self {
719 state_id: self.state_id,
720 counters: new_counters,
721 },
722 scalar_val,
723 )
724 }
725}
726
727fn ranged_single_epsilon_closure(
741 nfa: &NfaTable,
742 seeds: HashMap<StateId, CounterRange>,
743 counter_def: CounterDef,
744) -> ActiveStates {
745 let mut map: HashMap<StateId, CounterRange> = HashMap::new();
746 let mut worklist: VecDeque<(StateId, CounterRange)> = seeds.into_iter().collect();
747
748 while let Some((state_id, range)) = worklist.pop_front() {
749 if let Some(&existing) = map.get(&state_id) {
751 if existing.subsumes(range) {
752 continue;
753 }
754 let merged = existing.union(range);
756 debug_assert!(
757 merged.lo <= merged.hi,
758 "contiguity invariant violated at state {state_id}"
759 );
760 map.insert(state_id, merged);
761 } else {
762 map.insert(state_id, range);
763 }
764
765 if let Some(state) = nfa.get_state(state_id) {
767 for trans in &state.transitions {
768 let next = match trans.kind {
769 TransitionKind::Epsilon => Some(range),
770 TransitionKind::CounterReset(_) => Some(CounterRange::single(0)),
771 TransitionKind::CounterIncrement(_) => {
772 Some(CounterRange::new(range.lo + 1, counter_def.max))
775 }
776 TransitionKind::CounterMaxGuard(_) => {
777 let clamped = range.intersect_below(counter_def.max);
778 if clamped.is_empty() {
779 None
780 } else {
781 Some(clamped)
782 }
783 }
784 TransitionKind::CounterMinGuard(_) => {
785 let passed = range.intersect_above(counter_def.min);
786 if passed.is_empty() {
787 None
788 } else {
789 Some(CounterRange::single(0))
791 }
792 }
793 TransitionKind::Consume => None,
794 };
795 if let Some(next_range) = next {
796 let dominated = map
798 .get(&trans.target)
799 .is_some_and(|r| r.subsumes(next_range));
800 if !dominated {
801 worklist.push_back((trans.target, next_range));
802 }
803 }
804 }
805 }
806 }
807
808 ActiveStates::RangedSingle {
809 state_ranges: map,
810 counter_def,
811 }
812}
813
814fn hybrid_epsilon_closure(
824 nfa: &NfaTable,
825 seeds: HashMap<HybridKey, CounterRange>,
826 ranged_counter_idx: usize,
827 num_counters: usize,
828) -> ActiveStates {
829 let ranged_id = ranged_counter_idx as CounterId;
830 let ranged_def = nfa.counter_defs[ranged_counter_idx];
831
832 let mut map: HashMap<HybridKey, CounterRange> = HashMap::new();
833 let mut worklist: VecDeque<(HybridKey, CounterRange)> = seeds.into_iter().collect();
834
835 while let Some((key, range)) = worklist.pop_front() {
836 if let Some(&existing) = map.get(&key) {
838 if existing.subsumes(range) {
839 continue;
840 }
841 let merged = existing.union(range);
842 debug_assert!(
843 merged.lo <= merged.hi,
844 "contiguity invariant violated at state {}",
845 key.state_id
846 );
847 map.insert(key.clone(), merged);
848 } else {
849 map.insert(key.clone(), range);
850 }
851
852 if let Some(state) = nfa.get_state(key.state_id) {
853 for trans in &state.transitions {
854 let next: Option<(HybridKey, CounterRange)> = match trans.kind {
855 TransitionKind::Epsilon => Some((key.with_state(trans.target), range)),
856
857 TransitionKind::CounterReset(c) if c == ranged_id => {
859 Some((key.with_state(trans.target), CounterRange::single(0)))
860 }
861 TransitionKind::CounterIncrement(c) if c == ranged_id => {
862 Some((
864 key.with_state(trans.target),
865 CounterRange::new(range.lo + 1, ranged_def.max),
866 ))
867 }
868 TransitionKind::CounterMaxGuard(c) if c == ranged_id => {
869 let clamped = range.intersect_below(ranged_def.max);
870 if clamped.is_empty() {
871 None
872 } else {
873 Some((key.with_state(trans.target), clamped))
874 }
875 }
876 TransitionKind::CounterMinGuard(c) if c == ranged_id => {
877 let passed = range.intersect_above(ranged_def.min);
878 if passed.is_empty() {
879 None
880 } else {
881 Some((key.with_state(trans.target), CounterRange::single(0)))
883 }
884 }
885
886 TransitionKind::CounterReset(c) => Some((
888 key.with_scalar_counter(trans.target, c, 0, ranged_counter_idx),
889 range,
890 )),
891 TransitionKind::CounterIncrement(c) => {
892 let val = key.counter(c) + 1;
893 Some((
894 key.with_scalar_counter(trans.target, c, val, ranged_counter_idx),
895 range,
896 ))
897 }
898 TransitionKind::CounterMaxGuard(c) => {
899 if key.counter(c) < nfa.counter_defs[c as usize].max {
900 Some((key.with_state(trans.target), range))
901 } else {
902 None
903 }
904 }
905 TransitionKind::CounterMinGuard(c) => {
906 if key.counter(c) >= nfa.counter_defs[c as usize].min {
907 Some((
909 key.with_scalar_counter(trans.target, c, 0, ranged_counter_idx),
910 range,
911 ))
912 } else {
913 None
914 }
915 }
916
917 TransitionKind::Consume => None,
918 };
919
920 if let Some((next_key, next_range)) = next {
921 let dominated = map.get(&next_key).is_some_and(|r| r.subsumes(next_range));
922 if !dominated {
923 worklist.push_back((next_key, next_range));
924 }
925 }
926 }
927 }
928 }
929
930 let result = ActiveStates::Hybrid {
931 configs: map,
932 ranged_counter_idx,
933 num_counters,
934 };
935 result.maybe_switch_ranged_counter(nfa)
936}
937
938#[derive(Debug, Clone)]
950pub enum ActiveStates {
951 Simple(HashSet<StateId>),
953 Counted {
955 configs: HashSet<ActiveConfig>,
956 num_counters: usize,
957 },
958 RangedSingle {
961 state_ranges: HashMap<StateId, CounterRange>,
962 counter_def: CounterDef,
963 },
964 Hybrid {
968 configs: HashMap<HybridKey, CounterRange>,
969 ranged_counter_idx: usize,
970 num_counters: usize,
971 },
972}
973
974impl ActiveStates {
975 pub fn from_nfa(nfa: &NfaTable) -> Self {
977 use super::particle::COUNTED_THRESHOLD;
978
979 if nfa.has_counters() {
980 if nfa.counter_defs.len() == 1 && nfa.counter_defs[0].body_nullable {
982 let mut state_ranges = HashMap::new();
983 state_ranges.insert(nfa.start_state, CounterRange::single(0));
984 let result = ActiveStates::RangedSingle {
985 state_ranges,
986 counter_def: nfa.counter_defs[0],
987 };
988 return result.epsilon_closure(nfa);
989 }
990
991 if nfa.counter_defs.len() > 1 {
998 let best_nullable = nfa
999 .counter_defs
1000 .iter()
1001 .enumerate()
1002 .filter(|(_, def)| def.body_nullable)
1003 .max_by_key(|(idx, def)| (def.max, std::cmp::Reverse(*idx)));
1004
1005 if let Some((ranged_idx, def)) = best_nullable {
1006 if def.max > COUNTED_THRESHOLD {
1007 let num_counters = nfa.counter_defs.len();
1008 let initial_key = HybridKey::initial(nfa.start_state, num_counters);
1009 let mut configs = HashMap::new();
1010 configs.insert(initial_key, CounterRange::single(0));
1011 let result = ActiveStates::Hybrid {
1012 configs,
1013 ranged_counter_idx: ranged_idx,
1014 num_counters,
1015 };
1016 return result.epsilon_closure(nfa);
1017 }
1018 }
1019 }
1020
1021 let initial = ActiveConfig::initial(nfa.start_state, nfa.counter_defs.len());
1023 let mut configs = HashSet::new();
1024 configs.insert(initial);
1025 let result = ActiveStates::Counted {
1026 configs,
1027 num_counters: nfa.counter_defs.len(),
1028 };
1029 result.epsilon_closure(nfa)
1030 } else {
1031 let simple = epsilon_closure(nfa, std::iter::once(nfa.start_state));
1032 ActiveStates::Simple(simple)
1033 }
1034 }
1035
1036 pub fn is_empty(&self) -> bool {
1038 match self {
1039 ActiveStates::Simple(s) => s.is_empty(),
1040 ActiveStates::Counted { configs, .. } => configs.is_empty(),
1041 ActiveStates::RangedSingle { state_ranges, .. } => state_ranges.is_empty(),
1042 ActiveStates::Hybrid { configs, .. } => configs.is_empty(),
1043 }
1044 }
1045
1046 pub fn contains_accept(&self, nfa: &NfaTable) -> bool {
1048 match self {
1049 ActiveStates::Simple(s) => s.iter().any(|&id| nfa.is_accept(id)),
1050 ActiveStates::Counted { configs, .. } => {
1051 configs.iter().any(|c| nfa.is_accept(c.state_id))
1052 }
1053 ActiveStates::RangedSingle { state_ranges, .. } => {
1054 state_ranges.contains_key(&nfa.accept_state)
1055 }
1056 ActiveStates::Hybrid { configs, .. } => {
1057 configs.keys().any(|k| nfa.is_accept(k.state_id))
1058 }
1059 }
1060 }
1061
1062 pub fn epsilon_closure(self, nfa: &NfaTable) -> Self {
1064 match self {
1065 ActiveStates::Simple(states) => ActiveStates::Simple(epsilon_closure(nfa, states)),
1066 ActiveStates::Counted {
1067 configs,
1068 num_counters,
1069 } => {
1070 let mut result: HashSet<ActiveConfig> = HashSet::new();
1071 let mut stack: Vec<ActiveConfig> = configs.into_iter().collect();
1072
1073 while let Some(config) = stack.pop() {
1074 if !result.insert(config.clone()) {
1075 continue;
1076 }
1077 if let Some(state) = nfa.get_state(config.state_id) {
1078 for trans in &state.transitions {
1079 let next = match trans.kind {
1080 TransitionKind::Epsilon => Some(config.with_state(trans.target)),
1081 TransitionKind::CounterReset(c) => {
1082 Some(config.with_counter_set(trans.target, c, 0))
1083 }
1084 TransitionKind::CounterIncrement(c) => {
1085 let val = config.counters[c as usize] + 1;
1086 Some(config.with_counter_set(trans.target, c, val))
1087 }
1088 TransitionKind::CounterMaxGuard(c) => {
1089 if config.counters[c as usize]
1090 < nfa.counter_defs[c as usize].max
1091 {
1092 Some(config.with_state(trans.target))
1093 } else {
1094 None
1095 }
1096 }
1097 TransitionKind::CounterMinGuard(c) => {
1098 if config.counters[c as usize]
1099 >= nfa.counter_defs[c as usize].min
1100 {
1101 Some(config.with_counter_set(trans.target, c, 0))
1104 } else {
1105 None
1106 }
1107 }
1108 TransitionKind::Consume => None,
1109 };
1110 if let Some(next_config) = next {
1111 if !result.contains(&next_config) {
1112 stack.push(next_config);
1113 }
1114 }
1115 }
1116 }
1117 }
1118 ActiveStates::Counted {
1119 configs: result,
1120 num_counters,
1121 }
1122 }
1123 ActiveStates::RangedSingle {
1124 state_ranges,
1125 counter_def,
1126 } => ranged_single_epsilon_closure(nfa, state_ranges, counter_def),
1127 ActiveStates::Hybrid {
1128 configs,
1129 ranged_counter_idx,
1130 num_counters,
1131 } => hybrid_epsilon_closure(nfa, configs, ranged_counter_idx, num_counters),
1132 }
1133 }
1134
1135 fn maybe_switch_ranged_counter(self, nfa: &NfaTable) -> Self {
1143 use super::particle::COUNTED_THRESHOLD;
1144
1145 let ActiveStates::Hybrid {
1146 configs,
1147 ranged_counter_idx,
1148 num_counters,
1149 } = self
1150 else {
1151 return self;
1152 };
1153
1154 let dead = CounterRange::single(0);
1155 if configs.is_empty() || !configs.values().all(|r| *r == dead) {
1156 return ActiveStates::Hybrid {
1157 configs,
1158 ranged_counter_idx,
1159 num_counters,
1160 };
1161 }
1162
1163 let nc = nfa.counter_defs.len();
1165 let mut min_vals = vec![u32::MAX; nc];
1166 let mut max_vals = vec![0u32; nc];
1167 for key in configs.keys() {
1168 for idx in 0..nc {
1169 let v = key.counter(idx as CounterId);
1170 min_vals[idx] = min_vals[idx].min(v);
1171 max_vals[idx] = max_vals[idx].max(v);
1172 }
1173 }
1174
1175 let mut best: Option<(usize, u32, u32)> = None; for (idx, def) in nfa.counter_defs.iter().enumerate() {
1180 if idx == ranged_counter_idx || !def.body_nullable || def.max <= COUNTED_THRESHOLD {
1181 continue;
1182 }
1183 if min_vals[idx] == max_vals[idx] {
1184 continue;
1185 }
1186 let spread = max_vals[idx] - min_vals[idx] + 1;
1187 let dominated =
1188 best.is_some_and(|(_, bs, bm)| spread < bs || (spread == bs && def.max <= bm));
1189 if !dominated {
1190 best = Some((idx, spread, def.max));
1191 }
1192 }
1193
1194 let Some((new_ranged_idx, _, _)) = best else {
1195 return ActiveStates::Hybrid {
1196 configs,
1197 ranged_counter_idx,
1198 num_counters,
1199 };
1200 };
1201
1202 let mut new_configs: HashMap<HybridKey, (CounterRange, u32)> =
1205 HashMap::with_capacity(configs.len());
1206 for old_key in configs.keys() {
1207 let (new_key, scalar_val) = old_key.repacked(ranged_counter_idx, new_ranged_idx);
1208 let new_range = CounterRange::single(scalar_val);
1209 new_configs
1210 .entry(new_key)
1211 .and_modify(|(r, count)| {
1212 *r = r.union(new_range);
1213 *count += 1;
1214 })
1215 .or_insert((new_range, 1));
1216 }
1217
1218 for (range, count) in new_configs.values() {
1219 if range.hi - range.lo + 1 != *count {
1220 return ActiveStates::Hybrid {
1221 configs,
1222 ranged_counter_idx,
1223 num_counters,
1224 };
1225 }
1226 }
1227
1228 let repacked = new_configs.into_iter().map(|(k, (r, _))| (k, r)).collect();
1229
1230 ActiveStates::Hybrid {
1231 configs: repacked,
1232 ranged_counter_idx: new_ranged_idx,
1233 num_counters,
1234 }
1235 }
1236
1237 pub fn advance(
1239 self,
1240 nfa: &NfaTable,
1241 element_name: NameId,
1242 element_namespace: Option<NameId>,
1243 target_namespace: Option<NameId>,
1244 substitution_groups: Option<&SubstitutionGroupMap>,
1245 xsd_version: XsdVersion,
1246 ) -> Self {
1247 match self {
1248 ActiveStates::Simple(states) => ActiveStates::Simple(advance_states(
1249 nfa,
1250 states,
1251 element_name,
1252 element_namespace,
1253 target_namespace,
1254 substitution_groups,
1255 xsd_version,
1256 )),
1257 ActiveStates::Counted {
1258 configs,
1259 num_counters,
1260 } => {
1261 let mut next_configs = HashSet::new();
1264 for config in &configs {
1265 if let Some(state) = nfa.get_state(config.state_id) {
1266 if let Some(ref term_val) = state.term {
1267 if term_matches(
1268 term_val,
1269 element_name,
1270 element_namespace,
1271 target_namespace,
1272 substitution_groups,
1273 xsd_version,
1274 ) {
1275 for trans in &state.transitions {
1276 if trans.kind == TransitionKind::Consume {
1277 next_configs.insert(config.with_state(trans.target));
1278 }
1279 }
1280 }
1281 }
1282 }
1283 }
1284 let result = ActiveStates::Counted {
1285 configs: next_configs,
1286 num_counters,
1287 };
1288 result.epsilon_closure(nfa)
1289 }
1290 ActiveStates::RangedSingle {
1291 state_ranges,
1292 counter_def,
1293 } => {
1294 let mut next_seeds: HashMap<StateId, CounterRange> = HashMap::new();
1295 for (&state_id, &range) in &state_ranges {
1296 if let Some(state) = nfa.get_state(state_id) {
1297 if let Some(ref term_val) = state.term {
1298 if term_matches(
1299 term_val,
1300 element_name,
1301 element_namespace,
1302 target_namespace,
1303 substitution_groups,
1304 xsd_version,
1305 ) {
1306 for trans in &state.transitions {
1307 if trans.kind == TransitionKind::Consume {
1308 next_seeds
1309 .entry(trans.target)
1310 .and_modify(|r| *r = r.union(range))
1311 .or_insert(range);
1312 }
1313 }
1314 }
1315 }
1316 }
1317 }
1318 let result = ActiveStates::RangedSingle {
1319 state_ranges: next_seeds,
1320 counter_def,
1321 };
1322 result.epsilon_closure(nfa)
1323 }
1324 ActiveStates::Hybrid {
1325 configs,
1326 ranged_counter_idx,
1327 num_counters,
1328 } => {
1329 let mut next_configs: HashMap<HybridKey, CounterRange> = HashMap::new();
1330 for (key, &range) in &configs {
1331 if let Some(state) = nfa.get_state(key.state_id) {
1332 if let Some(ref term_val) = state.term {
1333 if term_matches(
1334 term_val,
1335 element_name,
1336 element_namespace,
1337 target_namespace,
1338 substitution_groups,
1339 xsd_version,
1340 ) {
1341 for trans in &state.transitions {
1342 if trans.kind == TransitionKind::Consume {
1343 let new_key = key.with_state(trans.target);
1344 next_configs
1345 .entry(new_key)
1346 .and_modify(|r| *r = r.union(range))
1347 .or_insert(range);
1348 }
1349 }
1350 }
1351 }
1352 }
1353 }
1354 let result = ActiveStates::Hybrid {
1355 configs: next_configs,
1356 ranged_counter_idx,
1357 num_counters,
1358 };
1359 result.epsilon_closure(nfa)
1360 }
1361 }
1362 }
1363
1364 pub fn advance_with_priority(
1366 self,
1367 nfa: &NfaTable,
1368 element_name: NameId,
1369 element_namespace: Option<NameId>,
1370 target_namespace: Option<NameId>,
1371 substitution_groups: Option<&SubstitutionGroupMap>,
1372 xsd_version: XsdVersion,
1373 ) -> Self {
1374 match self {
1375 ActiveStates::Simple(states) => ActiveStates::Simple(advance_with_priority(
1376 nfa,
1377 states,
1378 element_name,
1379 element_namespace,
1380 target_namespace,
1381 substitution_groups,
1382 xsd_version,
1383 )),
1384 ActiveStates::Counted {
1385 configs,
1386 num_counters,
1387 } => {
1388 let mut element_configs = HashSet::new();
1389 let mut wildcard_configs = HashSet::new();
1390
1391 for config in &configs {
1392 if let Some(state) = nfa.get_state(config.state_id) {
1393 if let Some(ref term_val) = state.term {
1394 if term_matches(
1395 term_val,
1396 element_name,
1397 element_namespace,
1398 target_namespace,
1399 substitution_groups,
1400 xsd_version,
1401 ) {
1402 let target_set = match term_val {
1403 NfaTerm::Element { .. } => &mut element_configs,
1404 NfaTerm::Wildcard { .. } => &mut wildcard_configs,
1405 };
1406 for trans in &state.transitions {
1407 if trans.kind == TransitionKind::Consume {
1408 target_set.insert(config.with_state(trans.target));
1409 }
1410 }
1411 }
1412 }
1413 }
1414 }
1415
1416 let next = if !element_configs.is_empty() {
1417 element_configs
1418 } else {
1419 wildcard_configs
1420 };
1421
1422 let result = ActiveStates::Counted {
1423 configs: next,
1424 num_counters,
1425 };
1426 result.epsilon_closure(nfa)
1427 }
1428 ActiveStates::RangedSingle {
1429 state_ranges,
1430 counter_def,
1431 } => {
1432 let mut element_seeds: HashMap<StateId, CounterRange> = HashMap::new();
1433 let mut wildcard_seeds: HashMap<StateId, CounterRange> = HashMap::new();
1434
1435 for (&state_id, &range) in &state_ranges {
1436 if let Some(state) = nfa.get_state(state_id) {
1437 if let Some(ref term_val) = state.term {
1438 if term_matches(
1439 term_val,
1440 element_name,
1441 element_namespace,
1442 target_namespace,
1443 substitution_groups,
1444 xsd_version,
1445 ) {
1446 let target_map = match term_val {
1447 NfaTerm::Element { .. } => &mut element_seeds,
1448 NfaTerm::Wildcard { .. } => &mut wildcard_seeds,
1449 };
1450 for trans in &state.transitions {
1451 if trans.kind == TransitionKind::Consume {
1452 target_map
1453 .entry(trans.target)
1454 .and_modify(|r| *r = r.union(range))
1455 .or_insert(range);
1456 }
1457 }
1458 }
1459 }
1460 }
1461 }
1462
1463 let next = if !element_seeds.is_empty() {
1464 element_seeds
1465 } else {
1466 wildcard_seeds
1467 };
1468
1469 let result = ActiveStates::RangedSingle {
1470 state_ranges: next,
1471 counter_def,
1472 };
1473 result.epsilon_closure(nfa)
1474 }
1475 ActiveStates::Hybrid {
1476 configs,
1477 ranged_counter_idx,
1478 num_counters,
1479 } => {
1480 let mut element_configs: HashMap<HybridKey, CounterRange> = HashMap::new();
1481 let mut wildcard_configs: HashMap<HybridKey, CounterRange> = HashMap::new();
1482
1483 for (key, &range) in &configs {
1484 if let Some(state) = nfa.get_state(key.state_id) {
1485 if let Some(ref term_val) = state.term {
1486 if term_matches(
1487 term_val,
1488 element_name,
1489 element_namespace,
1490 target_namespace,
1491 substitution_groups,
1492 xsd_version,
1493 ) {
1494 let target_map = match term_val {
1495 NfaTerm::Element { .. } => &mut element_configs,
1496 NfaTerm::Wildcard { .. } => &mut wildcard_configs,
1497 };
1498 for trans in &state.transitions {
1499 if trans.kind == TransitionKind::Consume {
1500 let new_key = key.with_state(trans.target);
1501 target_map
1502 .entry(new_key)
1503 .and_modify(|r| *r = r.union(range))
1504 .or_insert(range);
1505 }
1506 }
1507 }
1508 }
1509 }
1510 }
1511
1512 let next = if !element_configs.is_empty() {
1513 element_configs
1514 } else {
1515 wildcard_configs
1516 };
1517
1518 let result = ActiveStates::Hybrid {
1519 configs: next,
1520 ranged_counter_idx,
1521 num_counters,
1522 };
1523 result.epsilon_closure(nfa)
1524 }
1525 }
1526 }
1527
1528 pub fn find_match_info(
1534 &self,
1535 nfa: &NfaTable,
1536 name: NameId,
1537 namespace: Option<NameId>,
1538 target_ns: Option<NameId>,
1539 subst_groups: Option<&SubstitutionGroupMap>,
1540 xsd_version: XsdVersion,
1541 ) -> MatchInfo {
1542 match self {
1543 ActiveStates::Simple(states) => {
1544 let closure = epsilon_closure(nfa, states.iter().copied());
1547 find_match_info_in_states(
1548 nfa,
1549 closure.iter().copied(),
1550 name,
1551 namespace,
1552 target_ns,
1553 subst_groups,
1554 xsd_version,
1555 )
1556 }
1557 ActiveStates::Counted { configs, .. } => {
1558 find_match_info_in_states(
1560 nfa,
1561 configs.iter().map(|c| c.state_id),
1562 name,
1563 namespace,
1564 target_ns,
1565 subst_groups,
1566 xsd_version,
1567 )
1568 }
1569 ActiveStates::RangedSingle { state_ranges, .. } => find_match_info_in_states(
1570 nfa,
1571 state_ranges.keys().copied(),
1572 name,
1573 namespace,
1574 target_ns,
1575 subst_groups,
1576 xsd_version,
1577 ),
1578 ActiveStates::Hybrid { configs, .. } => find_match_info_in_states(
1579 nfa,
1580 configs.keys().map(|k| k.state_id),
1581 name,
1582 namespace,
1583 target_ns,
1584 subst_groups,
1585 xsd_version,
1586 ),
1587 }
1588 }
1589
1590 pub fn expected_element_terms(
1594 &self,
1595 nfa: &NfaTable,
1596 ) -> Vec<(NameId, Option<NameId>, Option<ElementKey>)> {
1597 let mut result = Vec::new();
1598 match self {
1599 ActiveStates::Simple(states) => {
1600 let closure = epsilon_closure(nfa, states.iter().copied());
1601 for state_id in closure {
1602 if let Some(state) = nfa.get_state(state_id) {
1603 if let Some(NfaTerm::Element {
1604 name,
1605 namespace,
1606 element_key,
1607 ..
1608 }) = &state.term
1609 {
1610 result.push((*name, *namespace, *element_key));
1611 }
1612 }
1613 }
1614 }
1615 ActiveStates::Counted { configs, .. } => {
1616 let mut seen = HashSet::new();
1617 for config in configs {
1618 if !seen.insert(config.state_id) {
1619 continue; }
1621 if let Some(state) = nfa.get_state(config.state_id) {
1622 if let Some(NfaTerm::Element {
1623 name,
1624 namespace,
1625 element_key,
1626 ..
1627 }) = &state.term
1628 {
1629 result.push((*name, *namespace, *element_key));
1630 }
1631 }
1632 }
1633 }
1634 ActiveStates::RangedSingle { state_ranges, .. } => {
1635 for &state_id in state_ranges.keys() {
1636 if let Some(state) = nfa.get_state(state_id) {
1637 if let Some(NfaTerm::Element {
1638 name,
1639 namespace,
1640 element_key,
1641 ..
1642 }) = &state.term
1643 {
1644 result.push((*name, *namespace, *element_key));
1645 }
1646 }
1647 }
1648 }
1649 ActiveStates::Hybrid { configs, .. } => {
1650 let mut seen = HashSet::new();
1651 for key in configs.keys() {
1652 if !seen.insert(key.state_id) {
1653 continue;
1654 }
1655 if let Some(state) = nfa.get_state(key.state_id) {
1656 if let Some(NfaTerm::Element {
1657 name,
1658 namespace,
1659 element_key,
1660 ..
1661 }) = &state.term
1662 {
1663 result.push((*name, *namespace, *element_key));
1664 }
1665 }
1666 }
1667 }
1668 }
1669 result
1670 }
1671}
1672
1673#[derive(Debug, Clone, Copy, Default)]
1675pub struct MatchInfo {
1676 pub element_key: Option<ElementKey>,
1677 pub resolved_type: Option<TypeKey>,
1678 pub process_contents: Option<ProcessContents>,
1679}
1680
1681fn find_match_info_in_states(
1688 nfa: &NfaTable,
1689 state_ids: impl Iterator<Item = StateId>,
1690 name: NameId,
1691 namespace: Option<NameId>,
1692 target_ns: Option<NameId>,
1693 subst_groups: Option<&SubstitutionGroupMap>,
1694 xsd_version: XsdVersion,
1695) -> MatchInfo {
1696 let mut wildcard_info: Option<MatchInfo> = None;
1697
1698 for state_id in state_ids {
1699 if let Some(state) = nfa.get_state(state_id) {
1700 if let Some(ref term) = state.term {
1701 if term_matches(term, name, namespace, target_ns, subst_groups, xsd_version) {
1702 match term {
1703 NfaTerm::Element {
1704 name: term_name,
1705 namespace: term_ns,
1706 element_key,
1707 resolved_type,
1708 } => {
1709 return if *term_name == name && *term_ns == namespace {
1711 MatchInfo {
1713 element_key: *element_key,
1714 resolved_type: *resolved_type,
1715 process_contents: None,
1716 }
1717 } else {
1718 MatchInfo {
1720 element_key: None,
1721 resolved_type: None,
1722 process_contents: None,
1723 }
1724 };
1725 }
1726 NfaTerm::Wildcard {
1727 process_contents, ..
1728 } => {
1729 if wildcard_info.is_none() {
1731 wildcard_info = Some(MatchInfo {
1732 element_key: None,
1733 resolved_type: None,
1734 process_contents: Some(*process_contents),
1735 });
1736 }
1737 }
1738 }
1739 }
1740 }
1741 }
1742 }
1743
1744 wildcard_info.unwrap_or_default()
1745}
1746
1747#[cfg(test)]
1748mod tests {
1749 use super::*;
1750 use std::collections::HashSet;
1751
1752 use crate::compiler::build_substitution_group_map;
1753 use crate::schema::model::{DerivationSet, SchemaSet};
1754
1755 fn element_data(name: NameId) -> crate::arenas::ElementDeclData {
1756 crate::arenas::ElementDeclData {
1757 name: Some(name),
1758 target_namespace: None,
1759 ref_name: None,
1760 type_ref: None,
1761 inline_type: None,
1762 substitution_group: Vec::new(),
1763 default_value: None,
1764 fixed_value: None,
1765 nillable: false,
1766 is_abstract: false,
1767 min_occurs: 1,
1768 max_occurs: Some(1),
1769 block: DerivationSet::empty(),
1770 final_derivation: DerivationSet::empty(),
1771 form: None,
1772 id: None,
1773 alternatives: Vec::new(),
1774 identity_constraints: Vec::new(),
1775 pending_ic_refs: vec![],
1776 annotation: None,
1777 source: None,
1778 resolved_type: None,
1779 resolved_ref: None,
1780 resolved_substitution_groups: Vec::new(),
1781 deferred_type_error: None,
1782 }
1783 }
1784
1785 fn make_set(ids: &[StateId]) -> HashSet<StateId> {
1786 ids.iter().copied().collect()
1787 }
1788
1789 #[test]
1790 fn test_nfa_state_creation() {
1791 let state = NfaState::epsilon(0, None);
1792 assert_eq!(state.id, 0);
1793 assert!(state.is_epsilon());
1794 assert!(state.transitions.is_empty());
1795 }
1796
1797 #[test]
1798 fn test_nfa_state_with_term() {
1799 let term = NfaTerm::element(NameId(1), None, None);
1800 let state = NfaState::with_term(1, term, None);
1801 assert_eq!(state.id, 1);
1802 assert!(!state.is_epsilon());
1803 }
1804
1805 #[test]
1806 fn test_nfa_state_transitions() {
1807 let mut state = NfaState::epsilon(0, None);
1808 state.add_epsilon(1);
1809 state.add_consume(2);
1810
1811 let epsilons: Vec<_> = state.epsilon_transitions().collect();
1812 assert_eq!(epsilons, vec![1]);
1813
1814 let consuming: Vec<_> = state.consuming_transitions().collect();
1815 assert_eq!(consuming, vec![2]);
1816 }
1817
1818 #[test]
1819 fn test_nfa_table() {
1820 let states = vec![
1821 NfaState::epsilon(0, None),
1822 NfaState::with_term(1, NfaTerm::element(NameId(1), None, None), None),
1823 NfaState::epsilon(2, None),
1824 ];
1825 let table = NfaTable::new(states, 0, 2);
1826
1827 assert_eq!(table.state_count(), 3);
1828 assert_eq!(table.start_state, 0);
1829 assert_eq!(table.accept_state, 2);
1830 assert!(table.is_accept(2));
1831 assert!(!table.is_accept(0));
1832 }
1833
1834 #[test]
1835 fn test_nfa_term() {
1836 let elem = NfaTerm::element(NameId(1), Some(NameId(2)), None);
1837 assert!(elem.is_element());
1838 assert!(!elem.is_wildcard());
1839
1840 let wild = NfaTerm::wildcard(NamespaceConstraint::Any, ProcessContents::Lax);
1841 assert!(!wild.is_element());
1842 assert!(wild.is_wildcard());
1843 }
1844
1845 #[test]
1846 fn test_transition_kinds() {
1847 let eps = NfaTransition::epsilon(1);
1848 assert_eq!(eps.kind, TransitionKind::Epsilon);
1849
1850 let cons = NfaTransition::consume(2);
1851 assert_eq!(cons.kind, TransitionKind::Consume);
1852 }
1853
1854 #[test]
1855 fn test_epsilon_closure_basic() {
1856 let mut s0 = NfaState::epsilon(0, None);
1857 s0.add_epsilon(1);
1858 let mut s1 = NfaState::epsilon(1, None);
1859 s1.add_epsilon(2);
1860 let s2 = NfaState::with_term(2, NfaTerm::element(NameId(1), None, None), None);
1861
1862 let nfa = NfaTable::new(vec![s0, s1, s2], 0, 2);
1863 let closure = epsilon_closure(&nfa, [0]);
1864
1865 assert_eq!(closure, make_set(&[0, 1, 2]));
1866 }
1867
1868 fn make_priority_nfa() -> NfaTable {
1869 let mut start = NfaState::epsilon(0, None);
1870 start.add_epsilon(1);
1871 start.add_epsilon(2);
1872
1873 let mut elem_state = NfaState::with_term(1, NfaTerm::element(NameId(1), None, None), None);
1874 let mut wild_state = NfaState::with_term(
1875 2,
1876 NfaTerm::wildcard(NamespaceConstraint::Any, ProcessContents::Lax),
1877 None,
1878 );
1879
1880 elem_state.add_consume(3);
1881 wild_state.add_consume(4);
1882
1883 let exit_elem = NfaState::epsilon(3, None);
1884 let exit_wild = NfaState::epsilon(4, None);
1885
1886 NfaTable::new(
1887 vec![start, elem_state, wild_state, exit_elem, exit_wild],
1888 0,
1889 3,
1890 )
1891 }
1892
1893 #[test]
1894 fn test_advance_states_matches_element_and_wildcard() {
1895 let nfa = make_priority_nfa();
1896 let next = advance_states(&nfa, [0], NameId(1), None, None, None, XsdVersion::V1_0);
1897 assert_eq!(next, make_set(&[3, 4]));
1898
1899 let next = advance_states(&nfa, [0], NameId(2), None, None, None, XsdVersion::V1_0);
1900 assert_eq!(next, make_set(&[4]));
1901 }
1902
1903 #[test]
1904 fn test_advance_with_priority_prefers_element() {
1905 let nfa = make_priority_nfa();
1906 let next = advance_with_priority(&nfa, [0], NameId(1), None, None, None, XsdVersion::V1_1);
1907 assert_eq!(next, make_set(&[3]));
1908
1909 let next = advance_with_priority(&nfa, [0], NameId(2), None, None, None, XsdVersion::V1_1);
1910 assert_eq!(next, make_set(&[4]));
1911 }
1912
1913 #[test]
1914 fn test_find_match_info_prefers_element_over_wildcard() {
1915 let nfa = make_priority_nfa();
1919 let active = ActiveStates::Simple(epsilon_closure(&nfa, [0]));
1920
1921 let mi = active.find_match_info(&nfa, NameId(1), None, None, None, XsdVersion::V1_1);
1923 assert!(
1924 mi.process_contents.is_none(),
1925 "element match should not have process_contents, got {:?}",
1926 mi.process_contents,
1927 );
1928
1929 let mi2 = active.find_match_info(&nfa, NameId(2), None, None, None, XsdVersion::V1_1);
1931 assert!(
1932 mi2.process_contents.is_some(),
1933 "wildcard-only match should have process_contents",
1934 );
1935 }
1936
1937 #[test]
1938 fn test_term_matches_substitution_groups() {
1939 let mut schema_set = SchemaSet::new();
1940 let head_name = schema_set.name_table.add("head");
1941 let member_name = schema_set.name_table.add("member");
1942
1943 let head_key = schema_set.arenas.alloc_element(element_data(head_name));
1944 let member_key = schema_set.arenas.alloc_element(element_data(member_name));
1945
1946 schema_set
1947 .arenas
1948 .elements
1949 .get_mut(member_key)
1950 .unwrap()
1951 .resolved_substitution_groups
1952 .push(head_key);
1953
1954 let map = build_substitution_group_map(&schema_set);
1955 let term = NfaTerm::element(head_name, None, Some(head_key));
1956
1957 assert!(term_matches(
1958 &term,
1959 member_name,
1960 None,
1961 None,
1962 Some(&map),
1963 XsdVersion::V1_0,
1964 ));
1965 }
1966
1967 #[test]
1968 fn test_find_match_info_substitution_returns_none_for_head_key() {
1969 let mut schema_set = SchemaSet::new();
1973 let head_name = schema_set.name_table.add("head");
1974 let member_name = schema_set.name_table.add("member");
1975
1976 let mut head_data = element_data(head_name);
1977 head_data.is_abstract = true;
1978 let head_key = schema_set.arenas.alloc_element(head_data);
1979 let member_key = schema_set.arenas.alloc_element(element_data(member_name));
1980
1981 schema_set
1982 .arenas
1983 .elements
1984 .get_mut(member_key)
1985 .unwrap()
1986 .resolved_substitution_groups
1987 .push(head_key);
1988
1989 let map = build_substitution_group_map(&schema_set);
1990
1991 let builder = crate::compiler::fragment::FragmentBuilder::new();
1993 let frag = builder.single_term(NfaTerm::element(head_name, None, Some(head_key)), None);
1994 let nfa = crate::compiler::fragment::fragment_to_table(frag);
1995 let active = ActiveStates::from_nfa(&nfa);
1996
1997 let mi =
1999 active.find_match_info(&nfa, member_name, None, None, Some(&map), XsdVersion::V1_0);
2000 assert!(
2001 mi.element_key.is_none(),
2002 "substitution match should not return head's element_key"
2003 );
2004 assert!(mi.resolved_type.is_none());
2005
2006 let mi_head =
2009 active.find_match_info(&nfa, head_name, None, None, Some(&map), XsdVersion::V1_0);
2010 assert!(
2011 mi_head.element_key.is_none(),
2012 "abstract head should not match its own name via subst map"
2013 );
2014 }
2015
2016 #[test]
2017 fn test_find_match_info_direct_match_returns_element_key() {
2018 let mut schema_set = SchemaSet::new();
2021 let head_name = schema_set.name_table.add("head");
2022 let member_name = schema_set.name_table.add("member");
2023
2024 let head_key = schema_set.arenas.alloc_element(element_data(head_name));
2026 let member_key = schema_set.arenas.alloc_element(element_data(member_name));
2027
2028 schema_set
2029 .arenas
2030 .elements
2031 .get_mut(member_key)
2032 .unwrap()
2033 .resolved_substitution_groups
2034 .push(head_key);
2035
2036 let map = build_substitution_group_map(&schema_set);
2037
2038 let builder = crate::compiler::fragment::FragmentBuilder::new();
2039 let frag = builder.single_term(NfaTerm::element(head_name, None, Some(head_key)), None);
2040 let nfa = crate::compiler::fragment::fragment_to_table(frag);
2041 let active = ActiveStates::from_nfa(&nfa);
2042
2043 let mi = active.find_match_info(&nfa, head_name, None, None, Some(&map), XsdVersion::V1_0);
2045 assert_eq!(
2046 mi.element_key,
2047 Some(head_key),
2048 "direct match should return the term's element_key"
2049 );
2050
2051 let mi_member =
2053 active.find_match_info(&nfa, member_name, None, None, Some(&map), XsdVersion::V1_0);
2054 assert!(
2055 mi_member.element_key.is_none(),
2056 "substitution match should not return head's element_key"
2057 );
2058 }
2059
2060 use crate::compiler::fragment::{fragment_to_table, FragmentBuilder};
2065
2066 fn make_counted_element_nfa(min: u32, max: u32) -> (NfaTable, NameId) {
2068 let name = NameId(100);
2069 let builder = FragmentBuilder::new();
2070 let frag = builder.single_term(NfaTerm::element(name, None, None), None);
2071 let counted = frag.repeat_counted(min, max);
2072 let nfa = fragment_to_table(counted);
2073 (nfa, name)
2074 }
2075
2076 fn advance_n(active: ActiveStates, nfa: &NfaTable, name: NameId, n: usize) -> ActiveStates {
2077 let mut state = active;
2078 for _ in 0..n {
2079 state = state.advance(nfa, name, None, None, None, XsdVersion::V1_0);
2080 }
2081 state
2082 }
2083
2084 #[test]
2085 fn test_counted_nfa_has_counters() {
2086 let (nfa, _) = make_counted_element_nfa(2, 5);
2087 assert!(nfa.has_counters());
2088 assert_eq!(nfa.counter_defs.len(), 1);
2089 assert_eq!(nfa.counter_defs[0].min, 2);
2090 assert_eq!(nfa.counter_defs[0].max, 5);
2091 assert_eq!(nfa.state_count(), 5);
2093 }
2094
2095 #[test]
2096 fn test_counted_nfa_compact_state_count() {
2097 let (nfa, _) = make_counted_element_nfa(0, 1000);
2099 assert_eq!(nfa.state_count(), 5); }
2101
2102 #[test]
2103 fn test_counted_active_states_is_counted_variant() {
2104 let (nfa, _) = make_counted_element_nfa(2, 5);
2105 let active = ActiveStates::from_nfa(&nfa);
2106 assert!(matches!(active, ActiveStates::Counted { .. }));
2107 }
2108
2109 #[test]
2110 fn test_counted_element_a_3_5() {
2111 let (nfa, a) = make_counted_element_nfa(3, 5);
2112
2113 let s2 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 2);
2115 assert!(!s2.is_empty());
2116 assert!(!s2.contains_accept(&nfa));
2117
2118 let s3 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 3);
2120 assert!(s3.contains_accept(&nfa));
2121
2122 let s4 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 4);
2124 assert!(s4.contains_accept(&nfa));
2125
2126 let s5 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 5);
2128 assert!(s5.contains_accept(&nfa));
2129
2130 let s6 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 6);
2132 assert!(s6.is_empty());
2133 }
2134
2135 #[test]
2136 fn test_counted_element_a_0_100() {
2137 let (nfa, a) = make_counted_element_nfa(0, 100);
2138
2139 let s0 = ActiveStates::from_nfa(&nfa);
2141 assert!(s0.contains_accept(&nfa));
2142
2143 let s100 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 100);
2145 assert!(s100.contains_accept(&nfa));
2146
2147 let s101 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 101);
2149 assert!(s101.is_empty());
2150 }
2151
2152 #[test]
2153 fn test_counted_element_exact_17() {
2154 let (nfa, a) = make_counted_element_nfa(17, 17);
2155
2156 let s16 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 16);
2158 assert!(!s16.contains_accept(&nfa));
2159
2160 let s17 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 17);
2162 assert!(s17.contains_accept(&nfa));
2163
2164 let s18 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 18);
2166 assert!(s18.is_empty());
2167 }
2168
2169 #[test]
2170 fn test_counted_sequence_a_b() {
2171 let a = NameId(100);
2173 let b = NameId(200);
2174 let builder = FragmentBuilder::new();
2175
2176 let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
2177 let counted_a = frag_a.repeat_counted(2, 50);
2178
2179 let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
2180 let seq = counted_a.concat(frag_b);
2181 let nfa = fragment_to_table(seq);
2182
2183 assert!(nfa.has_counters());
2184
2185 let s = ActiveStates::from_nfa(&nfa);
2187 let s = s.advance(&nfa, a, None, None, None, XsdVersion::V1_0); let s = s.advance(&nfa, b, None, None, None, XsdVersion::V1_0); assert!(!s.contains_accept(&nfa)); let s = ActiveStates::from_nfa(&nfa);
2193 let s = advance_n(s, &nfa, a, 2);
2194 let s = s.advance(&nfa, b, None, None, None, XsdVersion::V1_0);
2195 assert!(s.contains_accept(&nfa));
2196
2197 let s = ActiveStates::from_nfa(&nfa);
2199 let s = advance_n(s, &nfa, a, 50);
2200 let s = s.advance(&nfa, b, None, None, None, XsdVersion::V1_0);
2201 assert!(s.contains_accept(&nfa));
2202
2203 let s = ActiveStates::from_nfa(&nfa);
2205 let s = advance_n(s, &nfa, a, 51);
2206 assert!(s.is_empty());
2207 }
2208
2209 #[test]
2210 fn test_dead_counter_collapse() {
2211 let a = NameId(100);
2214 let b = NameId(200);
2215 let builder = FragmentBuilder::new();
2216
2217 let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
2219 let opt_a = frag_a.optional();
2220
2221 let counted = opt_a.repeat_counted(0, 200);
2223
2224 let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
2226 let seq = counted.concat(frag_b);
2227 let nfa = fragment_to_table(seq);
2228
2229 let initial = ActiveStates::from_nfa(&nfa);
2231 assert!(
2232 matches!(&initial, ActiveStates::RangedSingle { .. }),
2233 "Expected RangedSingle for nullable body counted NFA"
2234 );
2235
2236 let after_b = initial
2239 .clone()
2240 .advance(&nfa, b, None, None, None, XsdVersion::V1_0);
2241 assert!(after_b.contains_accept(&nfa));
2242
2243 if let ActiveStates::RangedSingle { state_ranges, .. } = &after_b {
2245 assert!(
2246 state_ranges.len() <= 5,
2247 "Expected O(1) ranged entries after loop exit, got {}",
2248 state_ranges.len()
2249 );
2250 }
2251 }
2252
2253 #[test]
2254 fn test_counted_exact_prefix_plus_star() {
2255 use crate::compiler::particle::{apply_occurs, MaxOccurs};
2258
2259 let a = NameId(100);
2260 let builder = FragmentBuilder::new();
2261 let frag = builder.single_term(NfaTerm::element(a, None, None), None);
2262 let result = apply_occurs(frag, 17, MaxOccurs::Unbounded);
2263 let nfa = fragment_to_table(result);
2264
2265 assert!(nfa.has_counters());
2267 assert!(
2269 nfa.state_count() < 15,
2270 "expected compact NFA, got {} states",
2271 nfa.state_count()
2272 );
2273
2274 let s16 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 16);
2276 assert!(!s16.contains_accept(&nfa));
2277
2278 let s17 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 17);
2280 assert!(s17.contains_accept(&nfa));
2281
2282 let s100 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 100);
2284 assert!(s100.contains_accept(&nfa));
2285 }
2286
2287 #[test]
2288 fn test_is_epsilon_like() {
2289 assert!(TransitionKind::Epsilon.is_epsilon_like());
2290 assert!(!TransitionKind::Consume.is_epsilon_like());
2291 assert!(TransitionKind::CounterReset(0).is_epsilon_like());
2292 assert!(TransitionKind::CounterIncrement(0).is_epsilon_like());
2293 assert!(TransitionKind::CounterMaxGuard(0).is_epsilon_like());
2294 assert!(TransitionKind::CounterMinGuard(0).is_epsilon_like());
2295 }
2296
2297 #[test]
2298 fn test_offset_counter() {
2299 assert_eq!(
2300 TransitionKind::CounterReset(0).offset_counter(3),
2301 TransitionKind::CounterReset(3)
2302 );
2303 assert_eq!(
2304 TransitionKind::CounterIncrement(2).offset_counter(5),
2305 TransitionKind::CounterIncrement(7)
2306 );
2307 assert_eq!(
2308 TransitionKind::Epsilon.offset_counter(10),
2309 TransitionKind::Epsilon
2310 );
2311 assert_eq!(
2312 TransitionKind::Consume.offset_counter(10),
2313 TransitionKind::Consume
2314 );
2315 }
2316
2317 #[test]
2322 fn test_counter_range_operations() {
2323 let r = CounterRange::single(5);
2324 assert_eq!(r.lo, 5);
2325 assert_eq!(r.hi, 5);
2326 assert!(!r.is_empty());
2327 assert!(r.subsumes(r));
2328
2329 let r2 = CounterRange::new(3, 7);
2330 assert!(r2.subsumes(r));
2331 assert!(!r.subsumes(r2));
2332
2333 let u = CounterRange::single(2).union(CounterRange::new(3, 5));
2335 assert_eq!(u, CounterRange::new(2, 5));
2336
2337 assert_eq!(
2339 CounterRange::new(1, 5).intersect_below(4),
2340 CounterRange::new(1, 3)
2341 );
2342 assert!(CounterRange::new(5, 8).intersect_below(5).is_empty());
2343 assert!(CounterRange::new(0, 10).intersect_below(0).is_empty());
2344
2345 assert_eq!(
2347 CounterRange::new(1, 5).intersect_above(3),
2348 CounterRange::new(3, 5)
2349 );
2350 assert!(CounterRange::new(1, 3).intersect_above(5).is_empty());
2351 }
2352
2353 #[test]
2358 fn test_ranged_closure_convergence() {
2359 let a = NameId(100);
2361 let builder = FragmentBuilder::new();
2362 let frag = builder.single_term(NfaTerm::element(a, None, None), None);
2363 let opt = frag.optional();
2364 let counted = opt.repeat_counted(0, 10_000);
2365 let nfa = fragment_to_table(counted);
2366
2367 assert!(nfa.has_counters());
2368 assert!(nfa.counter_defs[0].body_nullable);
2369
2370 let initial = ActiveStates::from_nfa(&nfa);
2371 assert!(matches!(&initial, ActiveStates::RangedSingle { .. }));
2372
2373 if let ActiveStates::RangedSingle { state_ranges, .. } = &initial {
2375 assert!(
2376 state_ranges.len() <= 10,
2377 "Expected O(states) entries, got {}",
2378 state_ranges.len()
2379 );
2380 }
2381
2382 assert!(initial.contains_accept(&nfa));
2384
2385 let s5 = advance_n(initial, &nfa, a, 5);
2387 assert!(s5.contains_accept(&nfa));
2388
2389 assert!(matches!(&s5, ActiveStates::RangedSingle { .. }));
2391 }
2392
2393 #[test]
2394 fn test_nullable_body_accepts_range() {
2395 let a = NameId(100);
2397 let builder = FragmentBuilder::new();
2398 let frag = builder.single_term(NfaTerm::element(a, None, None), None);
2399 let opt = frag.optional();
2400 let counted = opt.repeat_counted(3, 5);
2401 let nfa = fragment_to_table(counted);
2402
2403 assert!(nfa.counter_defs[0].body_nullable);
2404 let initial = ActiveStates::from_nfa(&nfa);
2405 assert!(matches!(&initial, ActiveStates::RangedSingle { .. }));
2406
2407 assert!(initial.contains_accept(&nfa));
2409
2410 for n in 1..=5 {
2412 let s = advance_n(initial.clone(), &nfa, a, n);
2413 assert!(s.contains_accept(&nfa), "Expected accepting after {n} a's");
2414 }
2415
2416 let s6 = advance_n(initial, &nfa, a, 6);
2418 assert!(s6.is_empty(), "Expected rejection after 6 a's");
2419 }
2420
2421 #[test]
2422 fn test_choice_body_nullable() {
2423 let a = NameId(100);
2425 let b = NameId(200);
2426 let builder = FragmentBuilder::new();
2427 let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
2428 let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
2429 let choice = frag_a.alternate(frag_b.optional());
2430 let counted = choice.repeat_counted(0, 1000);
2431 let nfa = fragment_to_table(counted);
2432
2433 assert!(nfa.counter_defs[0].body_nullable);
2434 let initial = ActiveStates::from_nfa(&nfa);
2435 assert!(matches!(&initial, ActiveStates::RangedSingle { .. }));
2436
2437 if let ActiveStates::RangedSingle { state_ranges, .. } = &initial {
2438 assert!(
2439 state_ranges.len() <= 15,
2440 "Expected O(states) entries, got {}",
2441 state_ranges.len()
2442 );
2443 }
2444
2445 assert!(initial.contains_accept(&nfa));
2447 let s_a = initial
2448 .clone()
2449 .advance(&nfa, a, None, None, None, XsdVersion::V1_0);
2450 assert!(s_a.contains_accept(&nfa));
2451 let s_b = initial
2452 .clone()
2453 .advance(&nfa, b, None, None, None, XsdVersion::V1_0);
2454 assert!(s_b.contains_accept(&nfa));
2455 }
2456
2457 #[test]
2458 fn test_non_nullable_body_stays_counted() {
2459 let (nfa, a) = make_counted_element_nfa(0, 5);
2461 assert!(!nfa.counter_defs[0].body_nullable);
2462
2463 let initial = ActiveStates::from_nfa(&nfa);
2464 assert!(matches!(&initial, ActiveStates::Counted { .. }));
2465
2466 assert!(initial.contains_accept(&nfa)); for n in 1..=5 {
2469 let s = advance_n(initial.clone(), &nfa, a, n);
2470 assert!(s.contains_accept(&nfa), "Expected accepting after {n} a's");
2471 }
2472 let s6 = advance_n(initial, &nfa, a, 6);
2473 assert!(s6.is_empty());
2474 }
2475
2476 #[test]
2477 fn test_multi_counter_stays_counted() {
2478 let a = NameId(100);
2480 let b = NameId(200);
2481 let builder = FragmentBuilder::new();
2482 let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
2483 let counted_a = frag_a.repeat_counted(0, 100);
2484 let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
2485 let counted_b = frag_b.repeat_counted(0, 100);
2486 let seq = counted_a.concat(counted_b);
2487 let nfa = fragment_to_table(seq);
2488
2489 assert_eq!(nfa.counter_defs.len(), 2);
2490 let initial = ActiveStates::from_nfa(&nfa);
2491 assert!(
2492 matches!(&initial, ActiveStates::Counted { .. }),
2493 "Multi-counter NFA should use Counted, not RangedSingle"
2494 );
2495 }
2496
2497 #[test]
2498 fn test_nested_nullable_uses_hybrid() {
2499 let a = NameId(100);
2501 let builder = FragmentBuilder::new();
2502 let frag = builder.single_term(NfaTerm::element(a, None, None), None);
2503 let inner = frag.optional().repeat_counted(0, 50);
2504 let outer = inner.repeat_counted(0, 50);
2505 let nfa = fragment_to_table(outer);
2506
2507 assert_eq!(nfa.counter_defs.len(), 2);
2508 let initial = ActiveStates::from_nfa(&nfa);
2509 assert!(
2510 matches!(&initial, ActiveStates::Hybrid { .. }),
2511 "Nested nullable counted NFA should use Hybrid, got {:?}",
2512 std::mem::discriminant(&initial)
2513 );
2514 }
2515
2516 #[test]
2521 fn test_nested_nullable_hybrid_correctness() {
2522 let a = NameId(100);
2524 let builder = FragmentBuilder::new();
2525 let frag = builder.single_term(NfaTerm::element(a, None, None), None);
2526 let inner = frag.optional().repeat_counted(0, 100);
2527 let outer = inner.repeat_counted(0, 50);
2528 let nfa = fragment_to_table(outer);
2529
2530 assert_eq!(nfa.counter_defs.len(), 2);
2531 let initial = ActiveStates::from_nfa(&nfa);
2532 assert!(matches!(&initial, ActiveStates::Hybrid { .. }));
2533
2534 assert!(initial.contains_accept(&nfa));
2536
2537 let s100 = advance_n(initial.clone(), &nfa, a, 100);
2539 assert!(s100.contains_accept(&nfa));
2540
2541 let s5000 = advance_n(initial.clone(), &nfa, a, 5000);
2543 assert!(s5000.contains_accept(&nfa));
2544
2545 let s5001 = advance_n(initial, &nfa, a, 5001);
2547 assert!(s5001.is_empty(), "Expected rejection after 5001 a's");
2548 }
2549
2550 #[test]
2551 fn test_hybrid_convergence() {
2552 let a = NameId(100);
2554 let builder = FragmentBuilder::new();
2555 let frag = builder.single_term(NfaTerm::element(a, None, None), None);
2556 let inner = frag.optional().repeat_counted(0, 100);
2557 let outer = inner.repeat_counted(0, 50);
2558 let nfa = fragment_to_table(outer);
2559
2560 let initial = ActiveStates::from_nfa(&nfa);
2561 if let ActiveStates::Hybrid { configs, .. } = &initial {
2562 let max_expected = nfa.state_count() * 51; assert!(
2566 configs.len() <= max_expected,
2567 "Expected at most {} entries, got {} (state_count={})",
2568 max_expected,
2569 configs.len(),
2570 nfa.state_count()
2571 );
2572 } else {
2573 panic!("Expected Hybrid variant");
2574 }
2575 }
2576
2577 #[test]
2578 fn test_sequential_nullable_hybrid() {
2579 let a = NameId(100);
2581 let b = NameId(200);
2582 let builder = FragmentBuilder::new();
2583 let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
2584 let counted_a = frag_a.optional().repeat_counted(0, 1000);
2585 let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
2586 let counted_b = frag_b.optional().repeat_counted(0, 1000);
2587 let seq = counted_a.concat(counted_b);
2588 let nfa = fragment_to_table(seq);
2589
2590 assert_eq!(nfa.counter_defs.len(), 2);
2591 assert!(nfa.counter_defs[0].body_nullable);
2592 assert!(nfa.counter_defs[1].body_nullable);
2593
2594 let initial = ActiveStates::from_nfa(&nfa);
2595 assert!(matches!(&initial, ActiveStates::Hybrid { .. }));
2596
2597 assert!(initial.contains_accept(&nfa));
2599
2600 let s = initial
2602 .clone()
2603 .advance(&nfa, a, None, None, None, XsdVersion::V1_0);
2604 assert!(s.contains_accept(&nfa));
2605
2606 let s = initial
2608 .clone()
2609 .advance(&nfa, b, None, None, None, XsdVersion::V1_0);
2610 assert!(s.contains_accept(&nfa));
2611
2612 let s = advance_n(initial.clone(), &nfa, a, 1000);
2614 assert!(s.contains_accept(&nfa));
2615 let s = advance_n(s, &nfa, b, 1000);
2616 assert!(s.contains_accept(&nfa));
2617
2618 let s = advance_n(initial, &nfa, a, 1001);
2620 assert!(s.is_empty(), "Expected rejection after 1001 a's");
2621 }
2622
2623 #[test]
2624 fn test_mixed_nullable_nonnullable_hybrid() {
2625 let a = NameId(100);
2627 let b = NameId(200);
2628 let builder = FragmentBuilder::new();
2629 let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
2630 let counted_a = frag_a.optional().repeat_counted(0, 500);
2631 let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
2632 let counted_b = frag_b.repeat_counted(0, 500);
2633 let seq = counted_a.concat(counted_b);
2634 let nfa = fragment_to_table(seq);
2635
2636 assert_eq!(nfa.counter_defs.len(), 2);
2637 assert!(nfa.counter_defs[0].body_nullable);
2638 assert!(!nfa.counter_defs[1].body_nullable);
2639
2640 let initial = ActiveStates::from_nfa(&nfa);
2641 assert!(matches!(&initial, ActiveStates::Hybrid { .. }));
2642
2643 if let ActiveStates::Hybrid {
2645 ranged_counter_idx, ..
2646 } = &initial
2647 {
2648 assert_eq!(
2649 *ranged_counter_idx, 0,
2650 "Should range the nullable counter (index 0)"
2651 );
2652 }
2653
2654 assert!(initial.contains_accept(&nfa));
2656
2657 let s = advance_n(initial.clone(), &nfa, a, 500);
2659 let s = advance_n(s, &nfa, b, 500);
2660 assert!(s.contains_accept(&nfa));
2661
2662 let s = advance_n(initial, &nfa, b, 501);
2664 assert!(s.is_empty());
2665 }
2666
2667 #[test]
2668 fn test_hybrid_forced_outer_ranged() {
2669 let a = NameId(100);
2673 let builder = FragmentBuilder::new();
2674 let frag = builder.single_term(NfaTerm::element(a, None, None), None);
2675 let inner = frag.optional().repeat_counted(0, 100);
2676 let outer = inner.repeat_counted(0, 50);
2677 let nfa = fragment_to_table(outer);
2678
2679 let initial = ActiveStates::from_nfa(&nfa);
2680
2681 let s = advance_n(initial.clone(), &nfa, a, 5000);
2683 assert!(
2684 s.contains_accept(&nfa),
2685 "5000 a's should be accepted (50*100)"
2686 );
2687
2688 let s = advance_n(initial.clone(), &nfa, a, 101);
2690 assert!(
2691 s.contains_accept(&nfa),
2692 "101 a's should be accepted (needs 2 outer iterations)"
2693 );
2694
2695 assert!(initial.contains_accept(&nfa), "0 a's should be accepted");
2697
2698 let s = advance_n(initial, &nfa, a, 5001);
2700 assert!(s.is_empty(), "5001 a's should be rejected");
2701 }
2702
2703 #[test]
2704 fn test_hybrid_selection_different_maxima() {
2705 let a = NameId(100);
2707 let b = NameId(200);
2708 let builder = FragmentBuilder::new();
2709 let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
2710 let counted_a = frag_a.optional().repeat_counted(0, 10);
2711 let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
2712 let counted_b = frag_b.optional().repeat_counted(0, 1000);
2713 let seq = counted_a.concat(counted_b);
2714 let nfa = fragment_to_table(seq);
2715
2716 assert_eq!(nfa.counter_defs.len(), 2);
2717 let initial = ActiveStates::from_nfa(&nfa);
2718 assert!(
2720 matches!(&initial, ActiveStates::Hybrid { .. }),
2721 "Expected Hybrid, got {:?}",
2722 std::mem::discriminant(&initial)
2723 );
2724
2725 if let ActiveStates::Hybrid {
2726 ranged_counter_idx, ..
2727 } = &initial
2728 {
2729 assert_eq!(
2730 *ranged_counter_idx, 1,
2731 "Should range counter 1 (max=1000), not counter 0 (max=10)"
2732 );
2733 }
2734
2735 let s = advance_n(initial.clone(), &nfa, a, 10);
2737 let s = advance_n(s, &nfa, b, 1000);
2738 assert!(s.contains_accept(&nfa));
2739
2740 let s = advance_n(initial, &nfa, a, 11);
2742 assert!(s.is_empty());
2743 }
2744
2745 #[test]
2746 fn test_hybrid_small_max_stays_counted() {
2747 let a = NameId(100);
2749 let b = NameId(200);
2750 let builder = FragmentBuilder::new();
2751 let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
2752 let counted_a = frag_a.optional().repeat_counted(0, 5);
2753 let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
2754 let counted_b = frag_b.optional().repeat_counted(0, 5);
2755 let seq = counted_a.concat(counted_b);
2756 let nfa = fragment_to_table(seq);
2757
2758 assert_eq!(nfa.counter_defs.len(), 2);
2759 let initial = ActiveStates::from_nfa(&nfa);
2760 assert!(
2761 matches!(&initial, ActiveStates::Counted { .. }),
2762 "Small-max nullable counters should stay Counted, got {:?}",
2763 std::mem::discriminant(&initial)
2764 );
2765 }
2766
2767 #[test]
2768 fn test_xsd11_priority_ranged() {
2769 use crate::types::complex::NamespaceConstraint;
2772 let a = NameId(100);
2773 let b = NameId(200);
2774 let builder = FragmentBuilder::new();
2775
2776 let elem = builder.single_term(NfaTerm::element(a, None, None), None);
2778 let wc = builder.single_term(
2780 NfaTerm::Wildcard {
2781 namespace_constraint: NamespaceConstraint::Any,
2782 process_contents: ProcessContents::Lax,
2783 not_qnames: Vec::new(),
2784 },
2785 None,
2786 );
2787 let choice = elem.alternate(wc.optional());
2789 let counted = choice.repeat_counted(0, 100);
2790 let nfa = fragment_to_table(counted);
2791
2792 assert!(nfa.counter_defs[0].body_nullable);
2793 let initial = ActiveStates::from_nfa(&nfa);
2794 assert!(matches!(&initial, ActiveStates::RangedSingle { .. }));
2795
2796 let next =
2799 initial
2800 .clone()
2801 .advance_with_priority(&nfa, a, None, None, None, XsdVersion::V1_1);
2802 assert!(!next.is_empty());
2803 assert!(next.contains_accept(&nfa));
2804
2805 let next_b = initial.advance_with_priority(&nfa, b, None, None, None, XsdVersion::V1_1);
2807 assert!(!next_b.is_empty());
2808 assert!(next_b.contains_accept(&nfa));
2809 }
2810
2811 #[test]
2816 fn test_initial_tiebreak_prefers_first() {
2817 let a = NameId(100);
2819 let b = NameId(200);
2820 let builder = FragmentBuilder::new();
2821 let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
2822 let counted_a = frag_a.optional().repeat_counted(0, 1000);
2823 let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
2824 let counted_b = frag_b.optional().repeat_counted(0, 1000);
2825 let seq = counted_a.concat(counted_b);
2826 let nfa = fragment_to_table(seq);
2827
2828 let initial = ActiveStates::from_nfa(&nfa);
2829 if let ActiveStates::Hybrid {
2830 ranged_counter_idx, ..
2831 } = &initial
2832 {
2833 assert_eq!(
2834 *ranged_counter_idx, 0,
2835 "Equal max: should prefer first counter (index 0)"
2836 );
2837 } else {
2838 panic!("Expected Hybrid variant");
2839 }
2840 }
2841
2842 #[test]
2843 fn test_dynamic_switch_sequential_equal_max() {
2844 let a = NameId(100);
2847 let b = NameId(200);
2848 let builder = FragmentBuilder::new();
2849 let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
2850 let counted_a = frag_a.optional().repeat_counted(0, 1000);
2851 let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
2852 let counted_b = frag_b.optional().repeat_counted(0, 1000);
2853 let seq = counted_a.concat(counted_b);
2854 let nfa = fragment_to_table(seq);
2855
2856 let initial = ActiveStates::from_nfa(&nfa);
2857 let after_a = advance_n(initial, &nfa, a, 1000);
2859 let after_b = after_a.advance(&nfa, b, None, None, None, XsdVersion::V1_0);
2860
2861 if let ActiveStates::Hybrid {
2862 ranged_counter_idx,
2863 configs,
2864 ..
2865 } = &after_b
2866 {
2867 assert_eq!(
2868 *ranged_counter_idx, 1,
2869 "Should have dynamically switched to counter 1"
2870 );
2871 let max_expected = nfa.state_count() * 2;
2873 assert!(
2874 configs.len() <= max_expected,
2875 "Expected <= {} configs after switch, got {}",
2876 max_expected,
2877 configs.len()
2878 );
2879 } else {
2880 panic!("Expected Hybrid variant");
2881 }
2882
2883 let s = advance_n(after_b.clone(), &nfa, b, 999);
2885 assert!(s.contains_accept(&nfa));
2886
2887 let s = advance_n(after_b, &nfa, b, 1000);
2889 assert!(s.is_empty());
2890 }
2891
2892 #[test]
2893 fn test_dynamic_switch_three_counters() {
2894 let a = NameId(100);
2896 let b = NameId(200);
2897 let c = NameId(300);
2898 let builder = FragmentBuilder::new();
2899 let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
2900 let counted_a = frag_a.optional().repeat_counted(0, 500);
2901 let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
2902 let counted_b = frag_b.optional().repeat_counted(0, 500);
2903 let frag_c = builder.single_term(NfaTerm::element(c, None, None), None);
2904 let counted_c = frag_c.optional().repeat_counted(0, 500);
2905 let seq = counted_a.concat(counted_b).concat(counted_c);
2906 let nfa = fragment_to_table(seq);
2907
2908 assert_eq!(nfa.counter_defs.len(), 3);
2909 let initial = ActiveStates::from_nfa(&nfa);
2910
2911 let after_a = advance_n(initial, &nfa, a, 500);
2913 assert!(after_a.contains_accept(&nfa));
2914 let after_b1 = after_a.advance(&nfa, b, None, None, None, XsdVersion::V1_0);
2915 if let ActiveStates::Hybrid {
2916 ranged_counter_idx, ..
2917 } = &after_b1
2918 {
2919 assert_eq!(
2920 *ranged_counter_idx, 1,
2921 "Should switch to counter 1 after first loop exits"
2922 );
2923 }
2924
2925 let after_b = advance_n(after_b1, &nfa, b, 499);
2927 assert!(after_b.contains_accept(&nfa));
2928 let after_c1 = after_b.advance(&nfa, c, None, None, None, XsdVersion::V1_0);
2929 if let ActiveStates::Hybrid {
2930 ranged_counter_idx, ..
2931 } = &after_c1
2932 {
2933 assert_eq!(
2934 *ranged_counter_idx, 2,
2935 "Should switch to counter 2 after second loop exits"
2936 );
2937 }
2938
2939 let after_c = advance_n(after_c1.clone(), &nfa, c, 499);
2941 assert!(after_c.contains_accept(&nfa));
2942 let over = advance_n(after_c1, &nfa, c, 500);
2943 assert!(over.is_empty());
2944 }
2945
2946 #[test]
2947 fn test_no_switch_when_ranged_still_active() {
2948 let a = NameId(100);
2951 let b = NameId(200);
2952 let builder = FragmentBuilder::new();
2953 let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
2954 let counted_a = frag_a.optional().repeat_counted(0, 1000);
2955 let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
2956 let counted_b = frag_b.optional().repeat_counted(0, 1000);
2957 let seq = counted_a.concat(counted_b);
2958 let nfa = fragment_to_table(seq);
2959
2960 let initial = ActiveStates::from_nfa(&nfa);
2961 let after_5a = advance_n(initial, &nfa, a, 5);
2962 if let ActiveStates::Hybrid {
2963 ranged_counter_idx, ..
2964 } = &after_5a
2965 {
2966 assert_eq!(
2967 *ranged_counter_idx, 0,
2968 "Should NOT switch while first loop is still active"
2969 );
2970 }
2971 }
2972
2973 #[test]
2974 fn test_no_switch_non_nullable_candidate() {
2975 let a = NameId(100);
2977 let b = NameId(200);
2978 let builder = FragmentBuilder::new();
2979 let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
2980 let counted_a = frag_a.optional().repeat_counted(0, 1000);
2981 let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
2982 let counted_b = frag_b.repeat_counted(0, 1000); let seq = counted_a.concat(counted_b);
2984 let nfa = fragment_to_table(seq);
2985
2986 let initial = ActiveStates::from_nfa(&nfa);
2987 let after_b = advance_n(initial, &nfa, b, 100);
2989 if let ActiveStates::Hybrid {
2990 ranged_counter_idx, ..
2991 } = &after_b
2992 {
2993 assert_eq!(
2994 *ranged_counter_idx, 0,
2995 "Should NOT switch: counter 1 is not nullable"
2996 );
2997 }
2998
2999 let s = advance_n(after_b.clone(), &nfa, b, 900);
3001 assert!(s.contains_accept(&nfa));
3002 }
3003
3004 #[test]
3005 fn test_no_switch_nested_inner_active() {
3006 let a = NameId(100);
3009 let builder = FragmentBuilder::new();
3010 let frag = builder.single_term(NfaTerm::element(a, None, None), None);
3011 let inner = frag.optional().repeat_counted(0, 100);
3012 let outer = inner.repeat_counted(0, 50);
3013 let nfa = fragment_to_table(outer);
3014
3015 let initial = ActiveStates::from_nfa(&nfa);
3017 let after_a = advance_n(initial.clone(), &nfa, a, 50);
3018 if let ActiveStates::Hybrid {
3019 ranged_counter_idx, ..
3020 } = &after_a
3021 {
3022 let initial_idx = if let ActiveStates::Hybrid {
3024 ranged_counter_idx, ..
3025 } = &initial
3026 {
3027 *ranged_counter_idx
3028 } else {
3029 unreachable!()
3030 };
3031 assert_eq!(
3032 *ranged_counter_idx, initial_idx,
3033 "Should NOT switch while processing nested loops"
3034 );
3035 }
3036 }
3037
3038 #[test]
3039 fn test_switch_config_count_drops() {
3040 let a = NameId(100);
3043 let b = NameId(200);
3044 let builder = FragmentBuilder::new();
3045 let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
3046 let counted_a = frag_a.optional().repeat_counted(0, 1000);
3047 let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
3048 let counted_b = frag_b.optional().repeat_counted(0, 1000);
3049 let seq = counted_a.concat(counted_b);
3050 let nfa = fragment_to_table(seq);
3051
3052 let initial = ActiveStates::from_nfa(&nfa);
3053 let after_b = initial.advance(&nfa, b, None, None, None, XsdVersion::V1_0);
3055 if let ActiveStates::Hybrid {
3056 configs,
3057 ranged_counter_idx,
3058 ..
3059 } = &after_b
3060 {
3061 assert_eq!(
3062 *ranged_counter_idx, 1,
3063 "Should switch to counter 1 after 'b'"
3064 );
3065 assert!(
3067 configs.len() < nfa.state_count() * 2,
3068 "Config count {} should be << state_count {} after switch",
3069 configs.len(),
3070 nfa.state_count()
3071 );
3072 }
3073 }
3074
3075 #[test]
3076 fn test_differential_counted_vs_hybrid_switch() {
3077 let a = NameId(100);
3081 let b = NameId(200);
3082 let builder = FragmentBuilder::new();
3083 let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
3084 let counted_a = frag_a.optional().repeat_counted(0, 100);
3085 let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
3086 let counted_b = frag_b.optional().repeat_counted(0, 100);
3087 let seq = counted_a.concat(counted_b);
3088 let nfa = fragment_to_table(seq);
3089
3090 let hybrid = ActiveStates::from_nfa(&nfa);
3092
3093 let initial_config = ActiveConfig::initial(nfa.start_state, nfa.counter_defs.len());
3095 let mut counted_configs = HashSet::new();
3096 counted_configs.insert(initial_config);
3097 let counted = ActiveStates::Counted {
3098 configs: counted_configs,
3099 num_counters: nfa.counter_defs.len(),
3100 }
3101 .epsilon_closure(&nfa);
3102
3103 let mut h = hybrid;
3105 let mut c = counted;
3106 for _ in 0..50 {
3107 h = h.advance(&nfa, a, None, None, None, XsdVersion::V1_0);
3108 c = c.advance(&nfa, a, None, None, None, XsdVersion::V1_0);
3109 assert_eq!(
3110 h.contains_accept(&nfa),
3111 c.contains_accept(&nfa),
3112 "Hybrid/Counted disagree on accept during a-feeding"
3113 );
3114 assert_eq!(
3115 h.is_empty(),
3116 c.is_empty(),
3117 "Hybrid/Counted disagree on empty during a-feeding"
3118 );
3119 }
3120 for _ in 0..50 {
3121 h = h.advance(&nfa, b, None, None, None, XsdVersion::V1_0);
3122 c = c.advance(&nfa, b, None, None, None, XsdVersion::V1_0);
3123 assert_eq!(
3124 h.contains_accept(&nfa),
3125 c.contains_accept(&nfa),
3126 "Hybrid/Counted disagree on accept during b-feeding"
3127 );
3128 assert_eq!(
3129 h.is_empty(),
3130 c.is_empty(),
3131 "Hybrid/Counted disagree on empty during b-feeding"
3132 );
3133 }
3134 assert!(h.contains_accept(&nfa));
3136 assert!(c.contains_accept(&nfa));
3137
3138 h = h.advance(&nfa, b, None, None, None, XsdVersion::V1_0);
3140 c = c.advance(&nfa, b, None, None, None, XsdVersion::V1_0);
3141 assert_eq!(h.contains_accept(&nfa), c.contains_accept(&nfa));
3142 }
3143}