Skip to main content

xsd_schema/compiler/
nfa.rs

1//! NFA data structures for content model validation
2//!
3//! This module defines the core NFA (Nondeterministic Finite Automaton) structures
4//! used to represent compiled XSD content models.
5
6use std::collections::{HashMap, HashSet, VecDeque};
7
8use super::substitution::SubstitutionGroupMap;
9use crate::ids::{ElementKey, NameId, TypeKey};
10use crate::parser::location::SourceRef;
11use crate::schema::model::XsdVersion;
12use crate::types::complex::{not_qnames_exclude, NamespaceConstraint, ProcessContents};
13
14/// Unique identifier for NFA states within a table
15pub type StateId = u32;
16
17/// Unique identifier for a counter within an NFA.
18///
19/// u16 is intentional: keeps `TransitionKind` at 4 bytes (vs 8 with u32),
20/// halving transition growth. 65K counters is far beyond any real schema.
21pub type CounterId = u16;
22
23/// An inclusive range of counter values `[lo, hi]`.
24///
25/// Used by the `RangedSingle` optimized path to represent a set of
26/// possible counter values at a single NFA state, avoiding the O(N)
27/// enumeration that scalar counters require for nullable loop bodies.
28#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
29pub struct CounterRange {
30    pub lo: u32,
31    pub hi: u32,
32}
33
34impl CounterRange {
35    /// Create a range containing a single value.
36    pub fn single(v: u32) -> Self {
37        Self { lo: v, hi: v }
38    }
39
40    /// Create a range `[lo, hi]`. Panics in debug if `lo > hi`.
41    pub fn new(lo: u32, hi: u32) -> Self {
42        debug_assert!(lo <= hi, "CounterRange::new({lo}, {hi}): lo > hi");
43        Self { lo, hi }
44    }
45
46    /// True if this range contains no values (`lo > hi`).
47    pub fn is_empty(self) -> bool {
48        self.lo > self.hi
49    }
50
51    /// True if `self` fully contains `other`.
52    pub fn subsumes(self, other: Self) -> bool {
53        self.lo <= other.lo && self.hi >= other.hi
54    }
55
56    /// Widen to include all values in both ranges.
57    /// The caller must ensure the ranges are contiguous (adjacent or overlapping).
58    pub fn union(self, other: Self) -> Self {
59        let result = Self {
60            lo: self.lo.min(other.lo),
61            hi: self.hi.max(other.hi),
62        };
63        debug_assert!(result.lo <= result.hi);
64        result
65    }
66
67    /// Intersect with `[0, max_exclusive - 1]`.  Returns an empty range if
68    /// `lo >= max_exclusive`.
69    pub fn intersect_below(self, max_exclusive: u32) -> Self {
70        if max_exclusive == 0 || self.lo >= max_exclusive {
71            // Use lo > hi to signal empty
72            return Self { lo: 1, hi: 0 };
73        }
74        Self {
75            lo: self.lo,
76            hi: self.hi.min(max_exclusive - 1),
77        }
78    }
79
80    /// Intersect with `[min_inclusive, u32::MAX]`.  Returns an empty range if
81    /// `hi < min_inclusive`.
82    pub fn intersect_above(self, min_inclusive: u32) -> Self {
83        if self.hi < min_inclusive {
84            return Self { lo: 1, hi: 0 };
85        }
86        Self {
87            lo: self.lo.max(min_inclusive),
88            hi: self.hi,
89        }
90    }
91}
92
93/// Definition of a counter used by counted NFA loops.
94///
95/// Each counter tracks how many times a loop body has been completed.
96/// `min` is the minimum iterations for the exit guard; `max` is the
97/// maximum iterations for the loop guard.
98#[derive(Debug, Clone, Copy, PartialEq, Eq)]
99pub struct CounterDef {
100    /// Minimum completed iterations required to exit
101    pub min: u32,
102    /// Maximum iterations allowed before loop must exit
103    pub max: u32,
104    /// Whether the loop body can be traversed without consuming any input.
105    /// When true and `num_counters == 1`, the `RangedSingle` fast-forward
106    /// path is used in epsilon closure.
107    pub body_nullable: bool,
108}
109
110/// Complete NFA table for a content model
111///
112/// Represents a compiled content model as a state machine. The NFA can be used
113/// for validation by tracking active states and matching input elements.
114#[derive(Debug, Clone)]
115pub struct NfaTable {
116    /// All states in the automaton
117    pub states: Vec<NfaState>,
118    /// Initial state ID
119    pub start_state: StateId,
120    /// Accepting state ID (single accept state per Thompson's construction)
121    pub accept_state: StateId,
122    /// Counter definitions for counted loops (empty if no counted repeats)
123    pub counter_defs: Vec<CounterDef>,
124}
125
126impl NfaTable {
127    /// Create a new NFA table with the given states (no counters)
128    pub fn new(states: Vec<NfaState>, start_state: StateId, accept_state: StateId) -> Self {
129        Self {
130            states,
131            start_state,
132            accept_state,
133            counter_defs: Vec::new(),
134        }
135    }
136
137    /// Create a new NFA table with counters
138    pub fn with_counters(
139        states: Vec<NfaState>,
140        start_state: StateId,
141        accept_state: StateId,
142        counter_defs: Vec<CounterDef>,
143    ) -> Self {
144        Self {
145            states,
146            start_state,
147            accept_state,
148            counter_defs,
149        }
150    }
151
152    /// Check if this NFA has any counted loops
153    pub fn has_counters(&self) -> bool {
154        !self.counter_defs.is_empty()
155    }
156
157    /// Get a state by ID
158    pub fn get_state(&self, id: StateId) -> Option<&NfaState> {
159        self.states.get(id as usize)
160    }
161
162    /// Get a mutable reference to a state by ID
163    pub fn get_state_mut(&mut self, id: StateId) -> Option<&mut NfaState> {
164        self.states.get_mut(id as usize)
165    }
166
167    /// Get the number of states
168    pub fn state_count(&self) -> usize {
169        self.states.len()
170    }
171
172    /// Check if a state is the accept state
173    pub fn is_accept(&self, state_id: StateId) -> bool {
174        state_id == self.accept_state
175    }
176
177    /// Concatenate two NFA tables: self followed by other.
178    /// Creates an epsilon transition from self's accept state to other's start state.
179    /// Offsets both state IDs and counter IDs in the second table.
180    pub fn concat(mut self, mut other: NfaTable) -> NfaTable {
181        let state_offset = self.states.len() as StateId;
182        let counter_offset = self.counter_defs.len() as CounterId;
183        for state in &mut other.states {
184            state.id += state_offset;
185            for trans in &mut state.transitions {
186                trans.target += state_offset;
187                trans.kind = trans.kind.offset_counter(counter_offset);
188            }
189        }
190        let other_start = other.start_state + state_offset;
191        self.states[self.accept_state as usize].add_epsilon(other_start);
192        let new_accept = other.accept_state + state_offset;
193        self.states.extend(other.states);
194        self.counter_defs.extend(other.counter_defs);
195        NfaTable::with_counters(self.states, self.start_state, new_accept, self.counter_defs)
196    }
197
198    /// Get all transitions from a given state
199    pub fn transitions_from(&self, state_id: StateId) -> &[NfaTransition] {
200        self.get_state(state_id)
201            .map(|s| s.transitions.as_slice())
202            .unwrap_or(&[])
203    }
204}
205
206/// A single state in the NFA
207///
208/// Each state can optionally have a term that must be matched to consume input
209/// when transitioning through this state. States without terms (epsilon states)
210/// are used for branching logic.
211#[derive(Debug, Clone)]
212pub struct NfaState {
213    /// Unique identifier for this state
214    pub id: StateId,
215    /// The term that must be matched to enter this state via a consuming transition.
216    /// None for epsilon states (branching/merging points).
217    pub term: Option<NfaTerm>,
218    /// Outgoing transitions from this state
219    pub transitions: Vec<NfaTransition>,
220    /// Source location in the schema for error reporting
221    pub origin: Option<SourceRef>,
222}
223
224impl NfaState {
225    /// Create a new state with no term (epsilon state)
226    pub fn epsilon(id: StateId, origin: Option<SourceRef>) -> Self {
227        Self {
228            id,
229            term: None,
230            transitions: Vec::new(),
231            origin,
232        }
233    }
234
235    /// Create a new state with a term
236    pub fn with_term(id: StateId, term: NfaTerm, origin: Option<SourceRef>) -> Self {
237        Self {
238            id,
239            term: Some(term),
240            transitions: Vec::new(),
241            origin,
242        }
243    }
244
245    /// Add a transition to this state
246    pub fn add_transition(&mut self, target: StateId, kind: TransitionKind) {
247        self.transitions.push(NfaTransition { target, kind });
248    }
249
250    /// Add an epsilon transition
251    pub fn add_epsilon(&mut self, target: StateId) {
252        self.add_transition(target, TransitionKind::Epsilon);
253    }
254
255    /// Add a consuming transition
256    pub fn add_consume(&mut self, target: StateId) {
257        self.add_transition(target, TransitionKind::Consume);
258    }
259
260    /// Check if this is an epsilon state (no term)
261    pub fn is_epsilon(&self) -> bool {
262        self.term.is_none()
263    }
264
265    /// Get epsilon transitions from this state
266    pub fn epsilon_transitions(&self) -> impl Iterator<Item = StateId> + '_ {
267        self.transitions
268            .iter()
269            .filter(|t| t.kind == TransitionKind::Epsilon)
270            .map(|t| t.target)
271    }
272
273    /// Get consuming transitions from this state
274    pub fn consuming_transitions(&self) -> impl Iterator<Item = StateId> + '_ {
275        self.transitions
276            .iter()
277            .filter(|t| t.kind == TransitionKind::Consume)
278            .map(|t| t.target)
279    }
280}
281
282/// A term that can be matched in the NFA
283///
284/// Terms represent the actual content that must be matched during validation.
285/// Each term corresponds to either a specific element or a wildcard pattern.
286#[derive(Debug, Clone)]
287pub enum NfaTerm {
288    /// Match a specific element
289    Element {
290        /// Element local name (interned)
291        name: NameId,
292        /// Element namespace (None = no namespace)
293        namespace: Option<NameId>,
294        /// Resolved element key (for type lookup during validation)
295        element_key: Option<ElementKey>,
296        /// Resolved type for local elements (without element key)
297        resolved_type: Option<TypeKey>,
298    },
299    /// Match any element satisfying wildcard constraints
300    Wildcard {
301        /// Namespace constraint for allowed namespaces
302        namespace_constraint: NamespaceConstraint,
303        /// How to process matched content
304        process_contents: ProcessContents,
305        /// Pre-expanded concrete QName exclusions (XSD 1.1 notQName)
306        not_qnames: Vec<(Option<NameId>, NameId)>,
307    },
308}
309
310impl NfaTerm {
311    /// Create an element term
312    pub fn element(
313        name: NameId,
314        namespace: Option<NameId>,
315        element_key: Option<ElementKey>,
316    ) -> Self {
317        NfaTerm::Element {
318            name,
319            namespace,
320            element_key,
321            resolved_type: None,
322        }
323    }
324
325    /// Create an element term with a resolved type
326    pub fn element_with_type(
327        name: NameId,
328        namespace: Option<NameId>,
329        element_key: Option<ElementKey>,
330        resolved_type: Option<TypeKey>,
331    ) -> Self {
332        NfaTerm::Element {
333            name,
334            namespace,
335            element_key,
336            resolved_type,
337        }
338    }
339
340    /// Create a wildcard term
341    pub fn wildcard(
342        namespace_constraint: NamespaceConstraint,
343        process_contents: ProcessContents,
344    ) -> Self {
345        NfaTerm::Wildcard {
346            namespace_constraint,
347            process_contents,
348            not_qnames: Vec::new(),
349        }
350    }
351
352    /// Create a wildcard term with QName exclusions (XSD 1.1)
353    pub fn wildcard_with_not_qnames(
354        namespace_constraint: NamespaceConstraint,
355        process_contents: ProcessContents,
356        not_qnames: Vec<(Option<NameId>, NameId)>,
357    ) -> Self {
358        NfaTerm::Wildcard {
359            namespace_constraint,
360            process_contents,
361            not_qnames,
362        }
363    }
364
365    /// Check if this term is an element
366    pub fn is_element(&self) -> bool {
367        matches!(self, NfaTerm::Element { .. })
368    }
369
370    /// Check if this term is a wildcard
371    pub fn is_wildcard(&self) -> bool {
372        matches!(self, NfaTerm::Wildcard { .. })
373    }
374}
375
376/// A transition between NFA states
377#[derive(Debug, Clone, Copy, PartialEq, Eq)]
378pub struct NfaTransition {
379    /// Target state ID
380    pub target: StateId,
381    /// Transition type (epsilon or consuming)
382    pub kind: TransitionKind,
383}
384
385impl NfaTransition {
386    /// Create a new transition
387    pub fn new(target: StateId, kind: TransitionKind) -> Self {
388        Self { target, kind }
389    }
390
391    /// Create an epsilon transition
392    pub fn epsilon(target: StateId) -> Self {
393        Self::new(target, TransitionKind::Epsilon)
394    }
395
396    /// Create a consuming transition
397    pub fn consume(target: StateId) -> Self {
398        Self::new(target, TransitionKind::Consume)
399    }
400}
401
402/// Type of transition between NFA states
403#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
404pub enum TransitionKind {
405    /// Epsilon transition (no input consumed, always available)
406    Epsilon,
407    /// Consuming transition (requires matching the target state's term)
408    Consume,
409    /// Reset a counter to 0 (entering a counted region)
410    CounterReset(CounterId),
411    /// Increment a counter by 1 (completed one loop iteration)
412    CounterIncrement(CounterId),
413    /// Guard: pass only if counter < def.max (loop back)
414    CounterMaxGuard(CounterId),
415    /// Guard: pass only if counter >= def.min (exit loop)
416    CounterMinGuard(CounterId),
417}
418
419impl TransitionKind {
420    /// Check if this transition is epsilon-like (does not consume input).
421    /// All counter transitions are epsilon-like; only `Consume` is not.
422    pub fn is_epsilon_like(&self) -> bool {
423        !matches!(self, TransitionKind::Consume)
424    }
425
426    /// Offset counter IDs by the given amount (for fragment/table composition).
427    /// Returns self unchanged for non-counter transitions.
428    pub fn offset_counter(self, offset: CounterId) -> Self {
429        if offset == 0 {
430            return self;
431        }
432        match self {
433            TransitionKind::CounterReset(c) => TransitionKind::CounterReset(c + offset),
434            TransitionKind::CounterIncrement(c) => TransitionKind::CounterIncrement(c + offset),
435            TransitionKind::CounterMaxGuard(c) => TransitionKind::CounterMaxGuard(c + offset),
436            TransitionKind::CounterMinGuard(c) => TransitionKind::CounterMinGuard(c + offset),
437            other => other,
438        }
439    }
440}
441
442/// Compute the epsilon closure for a set of start states.
443///
444/// Only follows `TransitionKind::Epsilon` transitions. For NFAs with counter
445/// transitions, use `ActiveStates` methods instead — this function will miss
446/// states reachable via counter transitions.
447pub fn epsilon_closure(
448    nfa: &NfaTable,
449    start_states: impl IntoIterator<Item = StateId>,
450) -> HashSet<StateId> {
451    debug_assert!(
452        !nfa.has_counters(),
453        "epsilon_closure called on counted NFA; use ActiveStates instead"
454    );
455    let mut closure = HashSet::new();
456    let mut stack: Vec<StateId> = start_states.into_iter().collect();
457
458    while let Some(state_id) = stack.pop() {
459        if !closure.insert(state_id) {
460            continue;
461        }
462        if let Some(state) = nfa.get_state(state_id) {
463            for target in state.epsilon_transitions() {
464                if !closure.contains(&target) {
465                    stack.push(target);
466                }
467            }
468        }
469    }
470
471    closure
472}
473
474/// Match an element name against an NfaTerm with optional substitution groups.
475pub fn term_matches(
476    term: &NfaTerm,
477    element_name: NameId,
478    element_namespace: Option<NameId>,
479    target_namespace: Option<NameId>,
480    substitution_groups: Option<&SubstitutionGroupMap>,
481    xsd_version: XsdVersion,
482) -> bool {
483    match term {
484        NfaTerm::Element {
485            name,
486            namespace,
487            element_key,
488            ..
489        } => {
490            if let (Some(map), Some(key)) = (substitution_groups, element_key) {
491                if let Some(names) = map.get(key) {
492                    return names.contains(&(element_name, element_namespace));
493                }
494            }
495            *name == element_name && *namespace == element_namespace
496        }
497        NfaTerm::Wildcard {
498            namespace_constraint,
499            not_qnames,
500            ..
501        } => {
502            if !namespace_constraint.matches(element_namespace, target_namespace, xsd_version) {
503                return false;
504            }
505            !not_qnames_exclude(not_qnames, element_namespace, element_name)
506        }
507    }
508}
509
510/// Advance NFA states by matching an element and applying epsilon closure.
511///
512/// Only handles plain epsilon transitions. For NFAs with counter transitions,
513/// use `ActiveStates::advance` instead.
514pub fn advance_states(
515    nfa: &NfaTable,
516    start_states: impl IntoIterator<Item = StateId>,
517    element_name: NameId,
518    element_namespace: Option<NameId>,
519    target_namespace: Option<NameId>,
520    substitution_groups: Option<&SubstitutionGroupMap>,
521    xsd_version: XsdVersion,
522) -> HashSet<StateId> {
523    let closure = epsilon_closure(nfa, start_states);
524    let mut next = HashSet::new();
525
526    for state_id in closure {
527        let state = match nfa.get_state(state_id) {
528            Some(state) => state,
529            None => continue,
530        };
531        let term = match state.term.as_ref() {
532            Some(term) => term,
533            None => continue,
534        };
535
536        if term_matches(
537            term,
538            element_name,
539            element_namespace,
540            target_namespace,
541            substitution_groups,
542            xsd_version,
543        ) {
544            for target in state.consuming_transitions() {
545                next.insert(target);
546            }
547        }
548    }
549
550    epsilon_closure(nfa, next)
551}
552
553/// Advance NFA states with element-over-wildcard priority (XSD 1.1).
554///
555/// Only handles plain epsilon transitions. For NFAs with counter transitions,
556/// use `ActiveStates::advance_with_priority` instead.
557pub fn advance_with_priority(
558    nfa: &NfaTable,
559    start_states: impl IntoIterator<Item = StateId>,
560    element_name: NameId,
561    element_namespace: Option<NameId>,
562    target_namespace: Option<NameId>,
563    substitution_groups: Option<&SubstitutionGroupMap>,
564    xsd_version: XsdVersion,
565) -> HashSet<StateId> {
566    let closure = epsilon_closure(nfa, start_states);
567    let mut element_targets = HashSet::new();
568    let mut wildcard_targets = HashSet::new();
569
570    for state_id in closure {
571        let state = match nfa.get_state(state_id) {
572            Some(state) => state,
573            None => continue,
574        };
575        let term = match state.term.as_ref() {
576            Some(term) => term,
577            None => continue,
578        };
579
580        if !term_matches(
581            term,
582            element_name,
583            element_namespace,
584            target_namespace,
585            substitution_groups,
586            xsd_version,
587        ) {
588            continue;
589        }
590
591        match term {
592            NfaTerm::Element { .. } => {
593                for target in state.consuming_transitions() {
594                    element_targets.insert(target);
595                }
596            }
597            NfaTerm::Wildcard { .. } => {
598                for target in state.consuming_transitions() {
599                    wildcard_targets.insert(target);
600                }
601            }
602        }
603    }
604
605    let next = if !element_targets.is_empty() {
606        element_targets
607    } else {
608        wildcard_targets
609    };
610
611    epsilon_closure(nfa, next)
612}
613
614// ---------------------------------------------------------------------------
615// Counter-aware runtime types
616// ---------------------------------------------------------------------------
617
618/// A single NFA configuration with counter values.
619///
620/// Represents one "thread" in the NFA simulation where each counter
621/// has a specific value. Two configs at the same state but different
622/// counter values are distinct.
623#[derive(Debug, Clone, Hash, Eq, PartialEq)]
624pub struct ActiveConfig {
625    pub state_id: StateId,
626    pub counters: Box<[u32]>,
627}
628
629impl ActiveConfig {
630    /// Create an initial config at the given state with all counters at 0.
631    pub fn initial(state_id: StateId, num_counters: usize) -> Self {
632        Self {
633            state_id,
634            counters: vec![0; num_counters].into_boxed_slice(),
635        }
636    }
637
638    /// Clone this config with a different state ID.
639    fn with_state(&self, new_state: StateId) -> Self {
640        Self {
641            state_id: new_state,
642            counters: self.counters.clone(),
643        }
644    }
645
646    /// Clone this config with a different state ID and a modified counter.
647    fn with_counter_set(&self, new_state: StateId, counter_id: CounterId, value: u32) -> Self {
648        let mut new = self.with_state(new_state);
649        new.counters[counter_id as usize] = value;
650        new
651    }
652}
653
654/// Key for the `Hybrid` active-state variant.
655///
656/// Structurally identical to `ActiveConfig` but the slot at `ranged_counter_idx`
657/// is **always 0** (sentinel).  All constructors and setters enforce this invariant,
658/// so derived `Hash`/`Eq` work correctly without a custom impl.
659#[derive(Debug, Clone, Hash, Eq, PartialEq)]
660pub struct HybridKey {
661    state_id: StateId,
662    counters: Box<[u32]>,
663}
664
665impl HybridKey {
666    /// Create a key with all counters at 0 (including the ranged slot).
667    fn initial(state_id: StateId, num_counters: usize) -> Self {
668        Self {
669            state_id,
670            counters: vec![0; num_counters].into_boxed_slice(),
671        }
672    }
673
674    /// Clone with a different state, same counter values.
675    fn with_state(&self, new_state: StateId) -> Self {
676        Self {
677            state_id: new_state,
678            counters: self.counters.clone(),
679        }
680    }
681
682    /// Clone with a modified **scalar** counter.
683    ///
684    /// Panics (debug) if `counter_id` is the ranged counter.
685    fn with_scalar_counter(
686        &self,
687        new_state: StateId,
688        counter_id: CounterId,
689        value: u32,
690        ranged_counter_idx: usize,
691    ) -> Self {
692        debug_assert!(
693            counter_id as usize != ranged_counter_idx,
694            "with_scalar_counter called on ranged counter {counter_id}"
695        );
696        let mut new = self.with_state(new_state);
697        new.counters[counter_id as usize] = value;
698        new
699    }
700
701    /// Read a counter slot value.
702    fn counter(&self, counter_id: CounterId) -> u32 {
703        self.counters[counter_id as usize]
704    }
705
706    /// Repack for dynamic counter switching.
707    ///
708    /// Precondition: old ranged counter is dead (`[0,0]` in all configs).
709    /// Writes 0 into the old ranged slot, zeros the new ranged slot (sentinel).
710    /// Returns `(new_key, extracted_scalar_value)` where `extracted_scalar_value`
711    /// is the value that was in the new ranged slot (to become a `CounterRange`).
712    fn repacked(&self, old_ranged_idx: usize, new_ranged_idx: usize) -> (Self, u32) {
713        let scalar_val = self.counters[new_ranged_idx];
714        let mut new_counters = self.counters.clone();
715        new_counters[old_ranged_idx] = 0;
716        new_counters[new_ranged_idx] = 0; // new sentinel
717        (
718            Self {
719                state_id: self.state_id,
720                counters: new_counters,
721            },
722            scalar_val,
723        )
724    }
725}
726
727/// Ranged epsilon closure for single-counter NFAs with nullable body.
728///
729/// Uses a `HashMap<StateId, CounterRange>` merge-map where each state has
730/// exactly one contiguous counter range.  On `CounterIncrement`, the fast-
731/// forward sets `hi = counter_def.max`, so the closure converges in O(states)
732/// worklist iterations instead of O(max).
733///
734/// **Contiguity invariant**: every counter range at a state is a single
735/// contiguous interval.  Reset→[0,0], increment shifts +1, guards clip one
736/// side, fast-forward extends hi — all preserve contiguity.  Ranges arriving
737/// at the same state from different paths are always adjacent or overlapping
738/// (because the counter increments by exactly 1 per iteration), so union
739/// preserves contiguity.
740fn ranged_single_epsilon_closure(
741    nfa: &NfaTable,
742    seeds: HashMap<StateId, CounterRange>,
743    counter_def: CounterDef,
744) -> ActiveStates {
745    let mut map: HashMap<StateId, CounterRange> = HashMap::new();
746    let mut worklist: VecDeque<(StateId, CounterRange)> = seeds.into_iter().collect();
747
748    while let Some((state_id, range)) = worklist.pop_front() {
749        // Check subsumption: if existing range already covers this one, skip.
750        if let Some(&existing) = map.get(&state_id) {
751            if existing.subsumes(range) {
752                continue;
753            }
754            // Merge (union) — contiguity invariant guarantees this is safe.
755            let merged = existing.union(range);
756            debug_assert!(
757                merged.lo <= merged.hi,
758                "contiguity invariant violated at state {state_id}"
759            );
760            map.insert(state_id, merged);
761        } else {
762            map.insert(state_id, range);
763        }
764
765        // Process transitions from this state.
766        if let Some(state) = nfa.get_state(state_id) {
767            for trans in &state.transitions {
768                let next = match trans.kind {
769                    TransitionKind::Epsilon => Some(range),
770                    TransitionKind::CounterReset(_) => Some(CounterRange::single(0)),
771                    TransitionKind::CounterIncrement(_) => {
772                        // Fast-forward: body is nullable, so the counter can
773                        // reach max via repeated empty body traversals.
774                        Some(CounterRange::new(range.lo + 1, counter_def.max))
775                    }
776                    TransitionKind::CounterMaxGuard(_) => {
777                        let clamped = range.intersect_below(counter_def.max);
778                        if clamped.is_empty() {
779                            None
780                        } else {
781                            Some(clamped)
782                        }
783                    }
784                    TransitionKind::CounterMinGuard(_) => {
785                        let passed = range.intersect_above(counter_def.min);
786                        if passed.is_empty() {
787                            None
788                        } else {
789                            // Canonicalize: counter is dead after exit.
790                            Some(CounterRange::single(0))
791                        }
792                    }
793                    TransitionKind::Consume => None,
794                };
795                if let Some(next_range) = next {
796                    // Only push if not already subsumed by existing map entry.
797                    let dominated = map
798                        .get(&trans.target)
799                        .is_some_and(|r| r.subsumes(next_range));
800                    if !dominated {
801                        worklist.push_back((trans.target, next_range));
802                    }
803                }
804            }
805        }
806    }
807
808    ActiveStates::RangedSingle {
809        state_ranges: map,
810        counter_def,
811    }
812}
813
814/// Hybrid epsilon closure for multi-counter NFAs with one ranged counter.
815///
816/// Combines the `ranged_single_epsilon_closure` fast-forward for the ranged
817/// counter with scalar operations for all other counters.  The `HybridKey`
818/// captures (state, scalar counter values) while the ranged counter is stored
819/// as a `CounterRange` map value.
820///
821/// **Contiguity invariant** holds per-`HybridKey`: for a fixed set of scalar
822/// counter values, the ranged counter progresses identically to `RangedSingle`.
823fn hybrid_epsilon_closure(
824    nfa: &NfaTable,
825    seeds: HashMap<HybridKey, CounterRange>,
826    ranged_counter_idx: usize,
827    num_counters: usize,
828) -> ActiveStates {
829    let ranged_id = ranged_counter_idx as CounterId;
830    let ranged_def = nfa.counter_defs[ranged_counter_idx];
831
832    let mut map: HashMap<HybridKey, CounterRange> = HashMap::new();
833    let mut worklist: VecDeque<(HybridKey, CounterRange)> = seeds.into_iter().collect();
834
835    while let Some((key, range)) = worklist.pop_front() {
836        // Subsumption check: if existing range already covers this one, skip.
837        if let Some(&existing) = map.get(&key) {
838            if existing.subsumes(range) {
839                continue;
840            }
841            let merged = existing.union(range);
842            debug_assert!(
843                merged.lo <= merged.hi,
844                "contiguity invariant violated at state {}",
845                key.state_id
846            );
847            map.insert(key.clone(), merged);
848        } else {
849            map.insert(key.clone(), range);
850        }
851
852        if let Some(state) = nfa.get_state(key.state_id) {
853            for trans in &state.transitions {
854                let next: Option<(HybridKey, CounterRange)> = match trans.kind {
855                    TransitionKind::Epsilon => Some((key.with_state(trans.target), range)),
856
857                    // --- Ranged counter operations (range arithmetic) ---
858                    TransitionKind::CounterReset(c) if c == ranged_id => {
859                        Some((key.with_state(trans.target), CounterRange::single(0)))
860                    }
861                    TransitionKind::CounterIncrement(c) if c == ranged_id => {
862                        // Fast-forward: body is nullable, so counter can reach max.
863                        Some((
864                            key.with_state(trans.target),
865                            CounterRange::new(range.lo + 1, ranged_def.max),
866                        ))
867                    }
868                    TransitionKind::CounterMaxGuard(c) if c == ranged_id => {
869                        let clamped = range.intersect_below(ranged_def.max);
870                        if clamped.is_empty() {
871                            None
872                        } else {
873                            Some((key.with_state(trans.target), clamped))
874                        }
875                    }
876                    TransitionKind::CounterMinGuard(c) if c == ranged_id => {
877                        let passed = range.intersect_above(ranged_def.min);
878                        if passed.is_empty() {
879                            None
880                        } else {
881                            // Canonicalize: ranged counter is dead after exit.
882                            Some((key.with_state(trans.target), CounterRange::single(0)))
883                        }
884                    }
885
886                    // --- Scalar counter operations ---
887                    TransitionKind::CounterReset(c) => Some((
888                        key.with_scalar_counter(trans.target, c, 0, ranged_counter_idx),
889                        range,
890                    )),
891                    TransitionKind::CounterIncrement(c) => {
892                        let val = key.counter(c) + 1;
893                        Some((
894                            key.with_scalar_counter(trans.target, c, val, ranged_counter_idx),
895                            range,
896                        ))
897                    }
898                    TransitionKind::CounterMaxGuard(c) => {
899                        if key.counter(c) < nfa.counter_defs[c as usize].max {
900                            Some((key.with_state(trans.target), range))
901                        } else {
902                            None
903                        }
904                    }
905                    TransitionKind::CounterMinGuard(c) => {
906                        if key.counter(c) >= nfa.counter_defs[c as usize].min {
907                            // Canonicalize scalar counter on exit.
908                            Some((
909                                key.with_scalar_counter(trans.target, c, 0, ranged_counter_idx),
910                                range,
911                            ))
912                        } else {
913                            None
914                        }
915                    }
916
917                    TransitionKind::Consume => None,
918                };
919
920                if let Some((next_key, next_range)) = next {
921                    let dominated = map.get(&next_key).is_some_and(|r| r.subsumes(next_range));
922                    if !dominated {
923                        worklist.push_back((next_key, next_range));
924                    }
925                }
926            }
927        }
928    }
929
930    let result = ActiveStates::Hybrid {
931        configs: map,
932        ranged_counter_idx,
933        num_counters,
934    };
935    result.maybe_switch_ranged_counter(nfa)
936}
937
938/// Quad-path active state set for NFA simulation.
939///
940/// **Invariant**: values are always closure-closed (epsilon closure has been
941/// applied). All constructors and advance methods enforce this.
942///
943/// - `Simple`: counter-free NFA — delegates to existing `HashSet<StateId>` functions.
944/// - `Counted`: NFA with multiple counters — tracks `HashSet<ActiveConfig>` (scalar).
945/// - `RangedSingle`: single counter with nullable body — stores one `CounterRange`
946///   per reachable state, converging epsilon closure in O(states) instead of O(N).
947/// - `Hybrid`: multiple counters, one ranged (fast-forward) + others scalar.
948///   Collapses the ranged counter's dimension; cost proportional to remaining scalar dims.
949#[derive(Debug, Clone)]
950pub enum ActiveStates {
951    /// Fast path: no counters in this NFA (bit-identical to old code)
952    Simple(HashSet<StateId>),
953    /// Scalar counted path: configurations carry individual counter values
954    Counted {
955        configs: HashSet<ActiveConfig>,
956        num_counters: usize,
957    },
958    /// Optimized path for single-counter NFAs with nullable loop body.
959    /// Each reachable state maps to one `CounterRange` (contiguity invariant).
960    RangedSingle {
961        state_ranges: HashMap<StateId, CounterRange>,
962        counter_def: CounterDef,
963    },
964    /// Hybrid path: one counter ranged (fast-forward), others scalar.
965    /// The ranged counter's slot in every `HybridKey` is always 0 (sentinel);
966    /// the actual ranged value lives in the `CounterRange` map value.
967    Hybrid {
968        configs: HashMap<HybridKey, CounterRange>,
969        ranged_counter_idx: usize,
970        num_counters: usize,
971    },
972}
973
974impl ActiveStates {
975    /// Create initial active states from an NFA, picking the right variant.
976    pub fn from_nfa(nfa: &NfaTable) -> Self {
977        use super::particle::COUNTED_THRESHOLD;
978
979        if nfa.has_counters() {
980            // Single counter with nullable body → optimized ranged path
981            if nfa.counter_defs.len() == 1 && nfa.counter_defs[0].body_nullable {
982                let mut state_ranges = HashMap::new();
983                state_ranges.insert(nfa.start_state, CounterRange::single(0));
984                let result = ActiveStates::RangedSingle {
985                    state_ranges,
986                    counter_def: nfa.counter_defs[0],
987                };
988                return result.epsilon_closure(nfa);
989            }
990
991            // Multiple counters: check for a nullable counter worth ranging.
992            // Pick the nullable counter with the largest max — that collapses
993            // the most configurations.  Only use Hybrid if max > COUNTED_THRESHOLD
994            // (below that, Counted is cheap enough and we avoid map overhead).
995            // On equal max, prefer the lowest index — for sequential loops this
996            // ranges the first loop's counter, keeping later counters at scalar 0.
997            if nfa.counter_defs.len() > 1 {
998                let best_nullable = nfa
999                    .counter_defs
1000                    .iter()
1001                    .enumerate()
1002                    .filter(|(_, def)| def.body_nullable)
1003                    .max_by_key(|(idx, def)| (def.max, std::cmp::Reverse(*idx)));
1004
1005                if let Some((ranged_idx, def)) = best_nullable {
1006                    if def.max > COUNTED_THRESHOLD {
1007                        let num_counters = nfa.counter_defs.len();
1008                        let initial_key = HybridKey::initial(nfa.start_state, num_counters);
1009                        let mut configs = HashMap::new();
1010                        configs.insert(initial_key, CounterRange::single(0));
1011                        let result = ActiveStates::Hybrid {
1012                            configs,
1013                            ranged_counter_idx: ranged_idx,
1014                            num_counters,
1015                        };
1016                        return result.epsilon_closure(nfa);
1017                    }
1018                }
1019            }
1020
1021            // Fall through to scalar Counted path
1022            let initial = ActiveConfig::initial(nfa.start_state, nfa.counter_defs.len());
1023            let mut configs = HashSet::new();
1024            configs.insert(initial);
1025            let result = ActiveStates::Counted {
1026                configs,
1027                num_counters: nfa.counter_defs.len(),
1028            };
1029            result.epsilon_closure(nfa)
1030        } else {
1031            let simple = epsilon_closure(nfa, std::iter::once(nfa.start_state));
1032            ActiveStates::Simple(simple)
1033        }
1034    }
1035
1036    /// Check if the active set is empty (no reachable states).
1037    pub fn is_empty(&self) -> bool {
1038        match self {
1039            ActiveStates::Simple(s) => s.is_empty(),
1040            ActiveStates::Counted { configs, .. } => configs.is_empty(),
1041            ActiveStates::RangedSingle { state_ranges, .. } => state_ranges.is_empty(),
1042            ActiveStates::Hybrid { configs, .. } => configs.is_empty(),
1043        }
1044    }
1045
1046    /// Check if any active state is the NFA accept state.
1047    pub fn contains_accept(&self, nfa: &NfaTable) -> bool {
1048        match self {
1049            ActiveStates::Simple(s) => s.iter().any(|&id| nfa.is_accept(id)),
1050            ActiveStates::Counted { configs, .. } => {
1051                configs.iter().any(|c| nfa.is_accept(c.state_id))
1052            }
1053            ActiveStates::RangedSingle { state_ranges, .. } => {
1054                state_ranges.contains_key(&nfa.accept_state)
1055            }
1056            ActiveStates::Hybrid { configs, .. } => {
1057                configs.keys().any(|k| nfa.is_accept(k.state_id))
1058            }
1059        }
1060    }
1061
1062    /// Compute epsilon closure (including counter transitions for Counted path).
1063    pub fn epsilon_closure(self, nfa: &NfaTable) -> Self {
1064        match self {
1065            ActiveStates::Simple(states) => ActiveStates::Simple(epsilon_closure(nfa, states)),
1066            ActiveStates::Counted {
1067                configs,
1068                num_counters,
1069            } => {
1070                let mut result: HashSet<ActiveConfig> = HashSet::new();
1071                let mut stack: Vec<ActiveConfig> = configs.into_iter().collect();
1072
1073                while let Some(config) = stack.pop() {
1074                    if !result.insert(config.clone()) {
1075                        continue;
1076                    }
1077                    if let Some(state) = nfa.get_state(config.state_id) {
1078                        for trans in &state.transitions {
1079                            let next = match trans.kind {
1080                                TransitionKind::Epsilon => Some(config.with_state(trans.target)),
1081                                TransitionKind::CounterReset(c) => {
1082                                    Some(config.with_counter_set(trans.target, c, 0))
1083                                }
1084                                TransitionKind::CounterIncrement(c) => {
1085                                    let val = config.counters[c as usize] + 1;
1086                                    Some(config.with_counter_set(trans.target, c, val))
1087                                }
1088                                TransitionKind::CounterMaxGuard(c) => {
1089                                    if config.counters[c as usize]
1090                                        < nfa.counter_defs[c as usize].max
1091                                    {
1092                                        Some(config.with_state(trans.target))
1093                                    } else {
1094                                        None
1095                                    }
1096                                }
1097                                TransitionKind::CounterMinGuard(c) => {
1098                                    if config.counters[c as usize]
1099                                        >= nfa.counter_defs[c as usize].min
1100                                    {
1101                                        // Canonicalize: zero out counter on exit to collapse
1102                                        // configs that left the loop at different counter values.
1103                                        Some(config.with_counter_set(trans.target, c, 0))
1104                                    } else {
1105                                        None
1106                                    }
1107                                }
1108                                TransitionKind::Consume => None,
1109                            };
1110                            if let Some(next_config) = next {
1111                                if !result.contains(&next_config) {
1112                                    stack.push(next_config);
1113                                }
1114                            }
1115                        }
1116                    }
1117                }
1118                ActiveStates::Counted {
1119                    configs: result,
1120                    num_counters,
1121                }
1122            }
1123            ActiveStates::RangedSingle {
1124                state_ranges,
1125                counter_def,
1126            } => ranged_single_epsilon_closure(nfa, state_ranges, counter_def),
1127            ActiveStates::Hybrid {
1128                configs,
1129                ranged_counter_idx,
1130                num_counters,
1131            } => hybrid_epsilon_closure(nfa, configs, ranged_counter_idx, num_counters),
1132        }
1133    }
1134
1135    /// After epsilon closure, check if the current ranged counter is dead
1136    /// (`[0,0]` in ALL configs — the canonical exit value from `CounterMinGuard`).
1137    /// If a better nullable counter exists and the repack produces contiguous
1138    /// ranges, switch the ranged counter.
1139    ///
1140    /// The repacked map is already epsilon-closed — no re-closure needed, since
1141    /// the same set of (state, counter-values) is preserved, just re-encoded.
1142    fn maybe_switch_ranged_counter(self, nfa: &NfaTable) -> Self {
1143        use super::particle::COUNTED_THRESHOLD;
1144
1145        let ActiveStates::Hybrid {
1146            configs,
1147            ranged_counter_idx,
1148            num_counters,
1149        } = self
1150        else {
1151            return self;
1152        };
1153
1154        let dead = CounterRange::single(0);
1155        if configs.is_empty() || !configs.values().all(|r| *r == dead) {
1156            return ActiveStates::Hybrid {
1157                configs,
1158                ranged_counter_idx,
1159                num_counters,
1160            };
1161        }
1162
1163        // Single pass: collect per-counter min/max across all configs.
1164        let nc = nfa.counter_defs.len();
1165        let mut min_vals = vec![u32::MAX; nc];
1166        let mut max_vals = vec![0u32; nc];
1167        for key in configs.keys() {
1168            for idx in 0..nc {
1169                let v = key.counter(idx as CounterId);
1170                min_vals[idx] = min_vals[idx].min(v);
1171                max_vals[idx] = max_vals[idx].max(v);
1172            }
1173        }
1174
1175        // Pick the best candidate: nullable, large max, widest spread.
1176        // Spread (max - min + 1) is an upper bound on distinct values;
1177        // the contiguity guard below rejects non-contiguous cases.
1178        let mut best: Option<(usize, u32, u32)> = None; // (idx, spread, max)
1179        for (idx, def) in nfa.counter_defs.iter().enumerate() {
1180            if idx == ranged_counter_idx || !def.body_nullable || def.max <= COUNTED_THRESHOLD {
1181                continue;
1182            }
1183            if min_vals[idx] == max_vals[idx] {
1184                continue;
1185            }
1186            let spread = max_vals[idx] - min_vals[idx] + 1;
1187            let dominated =
1188                best.is_some_and(|(_, bs, bm)| spread < bs || (spread == bs && def.max <= bm));
1189            if !dominated {
1190                best = Some((idx, spread, def.max));
1191            }
1192        }
1193
1194        let Some((new_ranged_idx, _, _)) = best else {
1195            return ActiveStates::Hybrid {
1196                configs,
1197                ranged_counter_idx,
1198                num_counters,
1199            };
1200        };
1201
1202        // Guarded repack: track (range, count) per key, then verify that
1203        // range width == count (no phantom values in the interval).
1204        let mut new_configs: HashMap<HybridKey, (CounterRange, u32)> =
1205            HashMap::with_capacity(configs.len());
1206        for old_key in configs.keys() {
1207            let (new_key, scalar_val) = old_key.repacked(ranged_counter_idx, new_ranged_idx);
1208            let new_range = CounterRange::single(scalar_val);
1209            new_configs
1210                .entry(new_key)
1211                .and_modify(|(r, count)| {
1212                    *r = r.union(new_range);
1213                    *count += 1;
1214                })
1215                .or_insert((new_range, 1));
1216        }
1217
1218        for (range, count) in new_configs.values() {
1219            if range.hi - range.lo + 1 != *count {
1220                return ActiveStates::Hybrid {
1221                    configs,
1222                    ranged_counter_idx,
1223                    num_counters,
1224                };
1225            }
1226        }
1227
1228        let repacked = new_configs.into_iter().map(|(k, (r, _))| (k, r)).collect();
1229
1230        ActiveStates::Hybrid {
1231            configs: repacked,
1232            ranged_counter_idx: new_ranged_idx,
1233            num_counters,
1234        }
1235    }
1236
1237    /// Advance NFA states by matching an element (XSD 1.0 — no priority).
1238    pub fn advance(
1239        self,
1240        nfa: &NfaTable,
1241        element_name: NameId,
1242        element_namespace: Option<NameId>,
1243        target_namespace: Option<NameId>,
1244        substitution_groups: Option<&SubstitutionGroupMap>,
1245        xsd_version: XsdVersion,
1246    ) -> Self {
1247        match self {
1248            ActiveStates::Simple(states) => ActiveStates::Simple(advance_states(
1249                nfa,
1250                states,
1251                element_name,
1252                element_namespace,
1253                target_namespace,
1254                substitution_groups,
1255                xsd_version,
1256            )),
1257            ActiveStates::Counted {
1258                configs,
1259                num_counters,
1260            } => {
1261                // Configs are already closure-closed (invariant).
1262                // Find matching terms and follow Consume transitions.
1263                let mut next_configs = HashSet::new();
1264                for config in &configs {
1265                    if let Some(state) = nfa.get_state(config.state_id) {
1266                        if let Some(ref term_val) = state.term {
1267                            if term_matches(
1268                                term_val,
1269                                element_name,
1270                                element_namespace,
1271                                target_namespace,
1272                                substitution_groups,
1273                                xsd_version,
1274                            ) {
1275                                for trans in &state.transitions {
1276                                    if trans.kind == TransitionKind::Consume {
1277                                        next_configs.insert(config.with_state(trans.target));
1278                                    }
1279                                }
1280                            }
1281                        }
1282                    }
1283                }
1284                let result = ActiveStates::Counted {
1285                    configs: next_configs,
1286                    num_counters,
1287                };
1288                result.epsilon_closure(nfa)
1289            }
1290            ActiveStates::RangedSingle {
1291                state_ranges,
1292                counter_def,
1293            } => {
1294                let mut next_seeds: HashMap<StateId, CounterRange> = HashMap::new();
1295                for (&state_id, &range) in &state_ranges {
1296                    if let Some(state) = nfa.get_state(state_id) {
1297                        if let Some(ref term_val) = state.term {
1298                            if term_matches(
1299                                term_val,
1300                                element_name,
1301                                element_namespace,
1302                                target_namespace,
1303                                substitution_groups,
1304                                xsd_version,
1305                            ) {
1306                                for trans in &state.transitions {
1307                                    if trans.kind == TransitionKind::Consume {
1308                                        next_seeds
1309                                            .entry(trans.target)
1310                                            .and_modify(|r| *r = r.union(range))
1311                                            .or_insert(range);
1312                                    }
1313                                }
1314                            }
1315                        }
1316                    }
1317                }
1318                let result = ActiveStates::RangedSingle {
1319                    state_ranges: next_seeds,
1320                    counter_def,
1321                };
1322                result.epsilon_closure(nfa)
1323            }
1324            ActiveStates::Hybrid {
1325                configs,
1326                ranged_counter_idx,
1327                num_counters,
1328            } => {
1329                let mut next_configs: HashMap<HybridKey, CounterRange> = HashMap::new();
1330                for (key, &range) in &configs {
1331                    if let Some(state) = nfa.get_state(key.state_id) {
1332                        if let Some(ref term_val) = state.term {
1333                            if term_matches(
1334                                term_val,
1335                                element_name,
1336                                element_namespace,
1337                                target_namespace,
1338                                substitution_groups,
1339                                xsd_version,
1340                            ) {
1341                                for trans in &state.transitions {
1342                                    if trans.kind == TransitionKind::Consume {
1343                                        let new_key = key.with_state(trans.target);
1344                                        next_configs
1345                                            .entry(new_key)
1346                                            .and_modify(|r| *r = r.union(range))
1347                                            .or_insert(range);
1348                                    }
1349                                }
1350                            }
1351                        }
1352                    }
1353                }
1354                let result = ActiveStates::Hybrid {
1355                    configs: next_configs,
1356                    ranged_counter_idx,
1357                    num_counters,
1358                };
1359                result.epsilon_closure(nfa)
1360            }
1361        }
1362    }
1363
1364    /// Advance NFA states with element-over-wildcard priority (XSD 1.1).
1365    pub fn advance_with_priority(
1366        self,
1367        nfa: &NfaTable,
1368        element_name: NameId,
1369        element_namespace: Option<NameId>,
1370        target_namespace: Option<NameId>,
1371        substitution_groups: Option<&SubstitutionGroupMap>,
1372        xsd_version: XsdVersion,
1373    ) -> Self {
1374        match self {
1375            ActiveStates::Simple(states) => ActiveStates::Simple(advance_with_priority(
1376                nfa,
1377                states,
1378                element_name,
1379                element_namespace,
1380                target_namespace,
1381                substitution_groups,
1382                xsd_version,
1383            )),
1384            ActiveStates::Counted {
1385                configs,
1386                num_counters,
1387            } => {
1388                let mut element_configs = HashSet::new();
1389                let mut wildcard_configs = HashSet::new();
1390
1391                for config in &configs {
1392                    if let Some(state) = nfa.get_state(config.state_id) {
1393                        if let Some(ref term_val) = state.term {
1394                            if term_matches(
1395                                term_val,
1396                                element_name,
1397                                element_namespace,
1398                                target_namespace,
1399                                substitution_groups,
1400                                xsd_version,
1401                            ) {
1402                                let target_set = match term_val {
1403                                    NfaTerm::Element { .. } => &mut element_configs,
1404                                    NfaTerm::Wildcard { .. } => &mut wildcard_configs,
1405                                };
1406                                for trans in &state.transitions {
1407                                    if trans.kind == TransitionKind::Consume {
1408                                        target_set.insert(config.with_state(trans.target));
1409                                    }
1410                                }
1411                            }
1412                        }
1413                    }
1414                }
1415
1416                let next = if !element_configs.is_empty() {
1417                    element_configs
1418                } else {
1419                    wildcard_configs
1420                };
1421
1422                let result = ActiveStates::Counted {
1423                    configs: next,
1424                    num_counters,
1425                };
1426                result.epsilon_closure(nfa)
1427            }
1428            ActiveStates::RangedSingle {
1429                state_ranges,
1430                counter_def,
1431            } => {
1432                let mut element_seeds: HashMap<StateId, CounterRange> = HashMap::new();
1433                let mut wildcard_seeds: HashMap<StateId, CounterRange> = HashMap::new();
1434
1435                for (&state_id, &range) in &state_ranges {
1436                    if let Some(state) = nfa.get_state(state_id) {
1437                        if let Some(ref term_val) = state.term {
1438                            if term_matches(
1439                                term_val,
1440                                element_name,
1441                                element_namespace,
1442                                target_namespace,
1443                                substitution_groups,
1444                                xsd_version,
1445                            ) {
1446                                let target_map = match term_val {
1447                                    NfaTerm::Element { .. } => &mut element_seeds,
1448                                    NfaTerm::Wildcard { .. } => &mut wildcard_seeds,
1449                                };
1450                                for trans in &state.transitions {
1451                                    if trans.kind == TransitionKind::Consume {
1452                                        target_map
1453                                            .entry(trans.target)
1454                                            .and_modify(|r| *r = r.union(range))
1455                                            .or_insert(range);
1456                                    }
1457                                }
1458                            }
1459                        }
1460                    }
1461                }
1462
1463                let next = if !element_seeds.is_empty() {
1464                    element_seeds
1465                } else {
1466                    wildcard_seeds
1467                };
1468
1469                let result = ActiveStates::RangedSingle {
1470                    state_ranges: next,
1471                    counter_def,
1472                };
1473                result.epsilon_closure(nfa)
1474            }
1475            ActiveStates::Hybrid {
1476                configs,
1477                ranged_counter_idx,
1478                num_counters,
1479            } => {
1480                let mut element_configs: HashMap<HybridKey, CounterRange> = HashMap::new();
1481                let mut wildcard_configs: HashMap<HybridKey, CounterRange> = HashMap::new();
1482
1483                for (key, &range) in &configs {
1484                    if let Some(state) = nfa.get_state(key.state_id) {
1485                        if let Some(ref term_val) = state.term {
1486                            if term_matches(
1487                                term_val,
1488                                element_name,
1489                                element_namespace,
1490                                target_namespace,
1491                                substitution_groups,
1492                                xsd_version,
1493                            ) {
1494                                let target_map = match term_val {
1495                                    NfaTerm::Element { .. } => &mut element_configs,
1496                                    NfaTerm::Wildcard { .. } => &mut wildcard_configs,
1497                                };
1498                                for trans in &state.transitions {
1499                                    if trans.kind == TransitionKind::Consume {
1500                                        let new_key = key.with_state(trans.target);
1501                                        target_map
1502                                            .entry(new_key)
1503                                            .and_modify(|r| *r = r.union(range))
1504                                            .or_insert(range);
1505                                    }
1506                                }
1507                            }
1508                        }
1509                    }
1510                }
1511
1512                let next = if !element_configs.is_empty() {
1513                    element_configs
1514                } else {
1515                    wildcard_configs
1516                };
1517
1518                let result = ActiveStates::Hybrid {
1519                    configs: next,
1520                    ranged_counter_idx,
1521                    num_counters,
1522                };
1523                result.epsilon_closure(nfa)
1524            }
1525        }
1526    }
1527
1528    /// Find the matching term info from active states (for content validation).
1529    ///
1530    /// Returns the element key and resolved type of the first matching term.
1531    /// Since ActiveStates is closure-closed, all reachable term states are
1532    /// already present — no additional epsilon closure needed for Counted path.
1533    pub fn find_match_info(
1534        &self,
1535        nfa: &NfaTable,
1536        name: NameId,
1537        namespace: Option<NameId>,
1538        target_ns: Option<NameId>,
1539        subst_groups: Option<&SubstitutionGroupMap>,
1540        xsd_version: XsdVersion,
1541    ) -> MatchInfo {
1542        match self {
1543            ActiveStates::Simple(states) => {
1544                // Delegate to existing function (it does epsilon_closure internally,
1545                // which is redundant but harmless since states are already closed)
1546                let closure = epsilon_closure(nfa, states.iter().copied());
1547                find_match_info_in_states(
1548                    nfa,
1549                    closure.iter().copied(),
1550                    name,
1551                    namespace,
1552                    target_ns,
1553                    subst_groups,
1554                    xsd_version,
1555                )
1556            }
1557            ActiveStates::Counted { configs, .. } => {
1558                // Configs are closure-closed — iterate directly
1559                find_match_info_in_states(
1560                    nfa,
1561                    configs.iter().map(|c| c.state_id),
1562                    name,
1563                    namespace,
1564                    target_ns,
1565                    subst_groups,
1566                    xsd_version,
1567                )
1568            }
1569            ActiveStates::RangedSingle { state_ranges, .. } => find_match_info_in_states(
1570                nfa,
1571                state_ranges.keys().copied(),
1572                name,
1573                namespace,
1574                target_ns,
1575                subst_groups,
1576                xsd_version,
1577            ),
1578            ActiveStates::Hybrid { configs, .. } => find_match_info_in_states(
1579                nfa,
1580                configs.keys().map(|k| k.state_id),
1581                name,
1582                namespace,
1583                target_ns,
1584                subst_groups,
1585                xsd_version,
1586            ),
1587        }
1588    }
1589
1590    /// Collect expected element terms from reachable states (for error messages).
1591    ///
1592    /// Returns (local_name, namespace, element_key) for each reachable Element term.
1593    pub fn expected_element_terms(
1594        &self,
1595        nfa: &NfaTable,
1596    ) -> Vec<(NameId, Option<NameId>, Option<ElementKey>)> {
1597        let mut result = Vec::new();
1598        match self {
1599            ActiveStates::Simple(states) => {
1600                let closure = epsilon_closure(nfa, states.iter().copied());
1601                for state_id in closure {
1602                    if let Some(state) = nfa.get_state(state_id) {
1603                        if let Some(NfaTerm::Element {
1604                            name,
1605                            namespace,
1606                            element_key,
1607                            ..
1608                        }) = &state.term
1609                        {
1610                            result.push((*name, *namespace, *element_key));
1611                        }
1612                    }
1613                }
1614            }
1615            ActiveStates::Counted { configs, .. } => {
1616                let mut seen = HashSet::new();
1617                for config in configs {
1618                    if !seen.insert(config.state_id) {
1619                        continue; // Skip duplicate state IDs
1620                    }
1621                    if let Some(state) = nfa.get_state(config.state_id) {
1622                        if let Some(NfaTerm::Element {
1623                            name,
1624                            namespace,
1625                            element_key,
1626                            ..
1627                        }) = &state.term
1628                        {
1629                            result.push((*name, *namespace, *element_key));
1630                        }
1631                    }
1632                }
1633            }
1634            ActiveStates::RangedSingle { state_ranges, .. } => {
1635                for &state_id in state_ranges.keys() {
1636                    if let Some(state) = nfa.get_state(state_id) {
1637                        if let Some(NfaTerm::Element {
1638                            name,
1639                            namespace,
1640                            element_key,
1641                            ..
1642                        }) = &state.term
1643                        {
1644                            result.push((*name, *namespace, *element_key));
1645                        }
1646                    }
1647                }
1648            }
1649            ActiveStates::Hybrid { configs, .. } => {
1650                let mut seen = HashSet::new();
1651                for key in configs.keys() {
1652                    if !seen.insert(key.state_id) {
1653                        continue;
1654                    }
1655                    if let Some(state) = nfa.get_state(key.state_id) {
1656                        if let Some(NfaTerm::Element {
1657                            name,
1658                            namespace,
1659                            element_key,
1660                            ..
1661                        }) = &state.term
1662                        {
1663                            result.push((*name, *namespace, *element_key));
1664                        }
1665                    }
1666                }
1667            }
1668        }
1669        result
1670    }
1671}
1672
1673/// Match info returned from term lookup in active states.
1674#[derive(Debug, Clone, Copy, Default)]
1675pub struct MatchInfo {
1676    pub element_key: Option<ElementKey>,
1677    pub resolved_type: Option<TypeKey>,
1678    pub process_contents: Option<ProcessContents>,
1679}
1680
1681/// Find match info from an iterator of state IDs.
1682///
1683/// Uses element-over-wildcard priority: if any Element term matches, its info
1684/// is returned immediately; a Wildcard match is kept as a fallback and only
1685/// returned when no Element matches.  This mirrors the priority rule in
1686/// `advance_with_priority`.
1687fn find_match_info_in_states(
1688    nfa: &NfaTable,
1689    state_ids: impl Iterator<Item = StateId>,
1690    name: NameId,
1691    namespace: Option<NameId>,
1692    target_ns: Option<NameId>,
1693    subst_groups: Option<&SubstitutionGroupMap>,
1694    xsd_version: XsdVersion,
1695) -> MatchInfo {
1696    let mut wildcard_info: Option<MatchInfo> = None;
1697
1698    for state_id in state_ids {
1699        if let Some(state) = nfa.get_state(state_id) {
1700            if let Some(ref term) = state.term {
1701                if term_matches(term, name, namespace, target_ns, subst_groups, xsd_version) {
1702                    match term {
1703                        NfaTerm::Element {
1704                            name: term_name,
1705                            namespace: term_ns,
1706                            element_key,
1707                            resolved_type,
1708                        } => {
1709                            // Element match — highest priority, return immediately
1710                            return if *term_name == name && *term_ns == namespace {
1711                                // Direct match — return term's declaration info
1712                                MatchInfo {
1713                                    element_key: *element_key,
1714                                    resolved_type: *resolved_type,
1715                                    process_contents: None,
1716                                }
1717                            } else {
1718                                // Substitution match — let runtime resolve the actual member
1719                                MatchInfo {
1720                                    element_key: None,
1721                                    resolved_type: None,
1722                                    process_contents: None,
1723                                }
1724                            };
1725                        }
1726                        NfaTerm::Wildcard {
1727                            process_contents, ..
1728                        } => {
1729                            // Keep first wildcard as fallback
1730                            if wildcard_info.is_none() {
1731                                wildcard_info = Some(MatchInfo {
1732                                    element_key: None,
1733                                    resolved_type: None,
1734                                    process_contents: Some(*process_contents),
1735                                });
1736                            }
1737                        }
1738                    }
1739                }
1740            }
1741        }
1742    }
1743
1744    wildcard_info.unwrap_or_default()
1745}
1746
1747#[cfg(test)]
1748mod tests {
1749    use super::*;
1750    use std::collections::HashSet;
1751
1752    use crate::compiler::build_substitution_group_map;
1753    use crate::schema::model::{DerivationSet, SchemaSet};
1754
1755    fn element_data(name: NameId) -> crate::arenas::ElementDeclData {
1756        crate::arenas::ElementDeclData {
1757            name: Some(name),
1758            target_namespace: None,
1759            ref_name: None,
1760            type_ref: None,
1761            inline_type: None,
1762            substitution_group: Vec::new(),
1763            default_value: None,
1764            fixed_value: None,
1765            nillable: false,
1766            is_abstract: false,
1767            min_occurs: 1,
1768            max_occurs: Some(1),
1769            block: DerivationSet::empty(),
1770            final_derivation: DerivationSet::empty(),
1771            form: None,
1772            id: None,
1773            alternatives: Vec::new(),
1774            identity_constraints: Vec::new(),
1775            pending_ic_refs: vec![],
1776            annotation: None,
1777            source: None,
1778            resolved_type: None,
1779            resolved_ref: None,
1780            resolved_substitution_groups: Vec::new(),
1781            deferred_type_error: None,
1782        }
1783    }
1784
1785    fn make_set(ids: &[StateId]) -> HashSet<StateId> {
1786        ids.iter().copied().collect()
1787    }
1788
1789    #[test]
1790    fn test_nfa_state_creation() {
1791        let state = NfaState::epsilon(0, None);
1792        assert_eq!(state.id, 0);
1793        assert!(state.is_epsilon());
1794        assert!(state.transitions.is_empty());
1795    }
1796
1797    #[test]
1798    fn test_nfa_state_with_term() {
1799        let term = NfaTerm::element(NameId(1), None, None);
1800        let state = NfaState::with_term(1, term, None);
1801        assert_eq!(state.id, 1);
1802        assert!(!state.is_epsilon());
1803    }
1804
1805    #[test]
1806    fn test_nfa_state_transitions() {
1807        let mut state = NfaState::epsilon(0, None);
1808        state.add_epsilon(1);
1809        state.add_consume(2);
1810
1811        let epsilons: Vec<_> = state.epsilon_transitions().collect();
1812        assert_eq!(epsilons, vec![1]);
1813
1814        let consuming: Vec<_> = state.consuming_transitions().collect();
1815        assert_eq!(consuming, vec![2]);
1816    }
1817
1818    #[test]
1819    fn test_nfa_table() {
1820        let states = vec![
1821            NfaState::epsilon(0, None),
1822            NfaState::with_term(1, NfaTerm::element(NameId(1), None, None), None),
1823            NfaState::epsilon(2, None),
1824        ];
1825        let table = NfaTable::new(states, 0, 2);
1826
1827        assert_eq!(table.state_count(), 3);
1828        assert_eq!(table.start_state, 0);
1829        assert_eq!(table.accept_state, 2);
1830        assert!(table.is_accept(2));
1831        assert!(!table.is_accept(0));
1832    }
1833
1834    #[test]
1835    fn test_nfa_term() {
1836        let elem = NfaTerm::element(NameId(1), Some(NameId(2)), None);
1837        assert!(elem.is_element());
1838        assert!(!elem.is_wildcard());
1839
1840        let wild = NfaTerm::wildcard(NamespaceConstraint::Any, ProcessContents::Lax);
1841        assert!(!wild.is_element());
1842        assert!(wild.is_wildcard());
1843    }
1844
1845    #[test]
1846    fn test_transition_kinds() {
1847        let eps = NfaTransition::epsilon(1);
1848        assert_eq!(eps.kind, TransitionKind::Epsilon);
1849
1850        let cons = NfaTransition::consume(2);
1851        assert_eq!(cons.kind, TransitionKind::Consume);
1852    }
1853
1854    #[test]
1855    fn test_epsilon_closure_basic() {
1856        let mut s0 = NfaState::epsilon(0, None);
1857        s0.add_epsilon(1);
1858        let mut s1 = NfaState::epsilon(1, None);
1859        s1.add_epsilon(2);
1860        let s2 = NfaState::with_term(2, NfaTerm::element(NameId(1), None, None), None);
1861
1862        let nfa = NfaTable::new(vec![s0, s1, s2], 0, 2);
1863        let closure = epsilon_closure(&nfa, [0]);
1864
1865        assert_eq!(closure, make_set(&[0, 1, 2]));
1866    }
1867
1868    fn make_priority_nfa() -> NfaTable {
1869        let mut start = NfaState::epsilon(0, None);
1870        start.add_epsilon(1);
1871        start.add_epsilon(2);
1872
1873        let mut elem_state = NfaState::with_term(1, NfaTerm::element(NameId(1), None, None), None);
1874        let mut wild_state = NfaState::with_term(
1875            2,
1876            NfaTerm::wildcard(NamespaceConstraint::Any, ProcessContents::Lax),
1877            None,
1878        );
1879
1880        elem_state.add_consume(3);
1881        wild_state.add_consume(4);
1882
1883        let exit_elem = NfaState::epsilon(3, None);
1884        let exit_wild = NfaState::epsilon(4, None);
1885
1886        NfaTable::new(
1887            vec![start, elem_state, wild_state, exit_elem, exit_wild],
1888            0,
1889            3,
1890        )
1891    }
1892
1893    #[test]
1894    fn test_advance_states_matches_element_and_wildcard() {
1895        let nfa = make_priority_nfa();
1896        let next = advance_states(&nfa, [0], NameId(1), None, None, None, XsdVersion::V1_0);
1897        assert_eq!(next, make_set(&[3, 4]));
1898
1899        let next = advance_states(&nfa, [0], NameId(2), None, None, None, XsdVersion::V1_0);
1900        assert_eq!(next, make_set(&[4]));
1901    }
1902
1903    #[test]
1904    fn test_advance_with_priority_prefers_element() {
1905        let nfa = make_priority_nfa();
1906        let next = advance_with_priority(&nfa, [0], NameId(1), None, None, None, XsdVersion::V1_1);
1907        assert_eq!(next, make_set(&[3]));
1908
1909        let next = advance_with_priority(&nfa, [0], NameId(2), None, None, None, XsdVersion::V1_1);
1910        assert_eq!(next, make_set(&[4]));
1911    }
1912
1913    #[test]
1914    fn test_find_match_info_prefers_element_over_wildcard() {
1915        // make_priority_nfa has element(NameId(1)) at state 1 and wildcard at state 2,
1916        // both reachable from state 0.  Regardless of HashSet iteration order,
1917        // find_match_info should prefer the element match.
1918        let nfa = make_priority_nfa();
1919        let active = ActiveStates::Simple(epsilon_closure(&nfa, [0]));
1920
1921        // NameId(1) matches both element and wildcard — element should win
1922        let mi = active.find_match_info(&nfa, NameId(1), None, None, None, XsdVersion::V1_1);
1923        assert!(
1924            mi.process_contents.is_none(),
1925            "element match should not have process_contents, got {:?}",
1926            mi.process_contents,
1927        );
1928
1929        // NameId(2) matches only the wildcard
1930        let mi2 = active.find_match_info(&nfa, NameId(2), None, None, None, XsdVersion::V1_1);
1931        assert!(
1932            mi2.process_contents.is_some(),
1933            "wildcard-only match should have process_contents",
1934        );
1935    }
1936
1937    #[test]
1938    fn test_term_matches_substitution_groups() {
1939        let mut schema_set = SchemaSet::new();
1940        let head_name = schema_set.name_table.add("head");
1941        let member_name = schema_set.name_table.add("member");
1942
1943        let head_key = schema_set.arenas.alloc_element(element_data(head_name));
1944        let member_key = schema_set.arenas.alloc_element(element_data(member_name));
1945
1946        schema_set
1947            .arenas
1948            .elements
1949            .get_mut(member_key)
1950            .unwrap()
1951            .resolved_substitution_groups
1952            .push(head_key);
1953
1954        let map = build_substitution_group_map(&schema_set);
1955        let term = NfaTerm::element(head_name, None, Some(head_key));
1956
1957        assert!(term_matches(
1958            &term,
1959            member_name,
1960            None,
1961            None,
1962            Some(&map),
1963            XsdVersion::V1_0,
1964        ));
1965    }
1966
1967    #[test]
1968    fn test_find_match_info_substitution_returns_none_for_head_key() {
1969        // When a member matches via substitution group, find_match_info should
1970        // return element_key: None (not the head's key) so the runtime resolves
1971        // the actual member declaration via lookup_element.
1972        let mut schema_set = SchemaSet::new();
1973        let head_name = schema_set.name_table.add("head");
1974        let member_name = schema_set.name_table.add("member");
1975
1976        let mut head_data = element_data(head_name);
1977        head_data.is_abstract = true;
1978        let head_key = schema_set.arenas.alloc_element(head_data);
1979        let member_key = schema_set.arenas.alloc_element(element_data(member_name));
1980
1981        schema_set
1982            .arenas
1983            .elements
1984            .get_mut(member_key)
1985            .unwrap()
1986            .resolved_substitution_groups
1987            .push(head_key);
1988
1989        let map = build_substitution_group_map(&schema_set);
1990
1991        // Build a simple NFA: start --[head]--> accept
1992        let builder = crate::compiler::fragment::FragmentBuilder::new();
1993        let frag = builder.single_term(NfaTerm::element(head_name, None, Some(head_key)), None);
1994        let nfa = crate::compiler::fragment::fragment_to_table(frag);
1995        let active = ActiveStates::from_nfa(&nfa);
1996
1997        // Match with member name — should return element_key: None
1998        let mi =
1999            active.find_match_info(&nfa, member_name, None, None, Some(&map), XsdVersion::V1_0);
2000        assert!(
2001            mi.element_key.is_none(),
2002            "substitution match should not return head's element_key"
2003        );
2004        assert!(mi.resolved_type.is_none());
2005
2006        // Abstract head's own name doesn't match (excluded from subst map,
2007        // and subst map lookup short-circuits before direct name comparison)
2008        let mi_head =
2009            active.find_match_info(&nfa, head_name, None, None, Some(&map), XsdVersion::V1_0);
2010        assert!(
2011            mi_head.element_key.is_none(),
2012            "abstract head should not match its own name via subst map"
2013        );
2014    }
2015
2016    #[test]
2017    fn test_find_match_info_direct_match_returns_element_key() {
2018        // When a non-abstract head matches directly, find_match_info should
2019        // return the term's element_key.
2020        let mut schema_set = SchemaSet::new();
2021        let head_name = schema_set.name_table.add("head");
2022        let member_name = schema_set.name_table.add("member");
2023
2024        // Non-abstract head
2025        let head_key = schema_set.arenas.alloc_element(element_data(head_name));
2026        let member_key = schema_set.arenas.alloc_element(element_data(member_name));
2027
2028        schema_set
2029            .arenas
2030            .elements
2031            .get_mut(member_key)
2032            .unwrap()
2033            .resolved_substitution_groups
2034            .push(head_key);
2035
2036        let map = build_substitution_group_map(&schema_set);
2037
2038        let builder = crate::compiler::fragment::FragmentBuilder::new();
2039        let frag = builder.single_term(NfaTerm::element(head_name, None, Some(head_key)), None);
2040        let nfa = crate::compiler::fragment::fragment_to_table(frag);
2041        let active = ActiveStates::from_nfa(&nfa);
2042
2043        // Direct match with head name — should return head's element_key
2044        let mi = active.find_match_info(&nfa, head_name, None, None, Some(&map), XsdVersion::V1_0);
2045        assert_eq!(
2046            mi.element_key,
2047            Some(head_key),
2048            "direct match should return the term's element_key"
2049        );
2050
2051        // Substitution match with member name — should return None
2052        let mi_member =
2053            active.find_match_info(&nfa, member_name, None, None, Some(&map), XsdVersion::V1_0);
2054        assert!(
2055            mi_member.element_key.is_none(),
2056            "substitution match should not return head's element_key"
2057        );
2058    }
2059
2060    // -----------------------------------------------------------------------
2061    // Counted NFA tests
2062    // -----------------------------------------------------------------------
2063
2064    use crate::compiler::fragment::{fragment_to_table, FragmentBuilder};
2065
2066    /// Build a counted NFA for element `a{min,max}` and return (nfa, name_id).
2067    fn make_counted_element_nfa(min: u32, max: u32) -> (NfaTable, NameId) {
2068        let name = NameId(100);
2069        let builder = FragmentBuilder::new();
2070        let frag = builder.single_term(NfaTerm::element(name, None, None), None);
2071        let counted = frag.repeat_counted(min, max);
2072        let nfa = fragment_to_table(counted);
2073        (nfa, name)
2074    }
2075
2076    fn advance_n(active: ActiveStates, nfa: &NfaTable, name: NameId, n: usize) -> ActiveStates {
2077        let mut state = active;
2078        for _ in 0..n {
2079            state = state.advance(nfa, name, None, None, None, XsdVersion::V1_0);
2080        }
2081        state
2082    }
2083
2084    #[test]
2085    fn test_counted_nfa_has_counters() {
2086        let (nfa, _) = make_counted_element_nfa(2, 5);
2087        assert!(nfa.has_counters());
2088        assert_eq!(nfa.counter_defs.len(), 1);
2089        assert_eq!(nfa.counter_defs[0].min, 2);
2090        assert_eq!(nfa.counter_defs[0].max, 5);
2091        // Counted NFA: body(2 states) + entry + guard + exit = 5 states
2092        assert_eq!(nfa.state_count(), 5);
2093    }
2094
2095    #[test]
2096    fn test_counted_nfa_compact_state_count() {
2097        // Large maxOccurs should NOT create many states
2098        let (nfa, _) = make_counted_element_nfa(0, 1000);
2099        assert_eq!(nfa.state_count(), 5); // Still just 5 states
2100    }
2101
2102    #[test]
2103    fn test_counted_active_states_is_counted_variant() {
2104        let (nfa, _) = make_counted_element_nfa(2, 5);
2105        let active = ActiveStates::from_nfa(&nfa);
2106        assert!(matches!(active, ActiveStates::Counted { .. }));
2107    }
2108
2109    #[test]
2110    fn test_counted_element_a_3_5() {
2111        let (nfa, a) = make_counted_element_nfa(3, 5);
2112
2113        // 2 occurrences: not complete, would accept more
2114        let s2 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 2);
2115        assert!(!s2.is_empty());
2116        assert!(!s2.contains_accept(&nfa));
2117
2118        // 3 occurrences: complete (min satisfied)
2119        let s3 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 3);
2120        assert!(s3.contains_accept(&nfa));
2121
2122        // 4 occurrences: still complete
2123        let s4 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 4);
2124        assert!(s4.contains_accept(&nfa));
2125
2126        // 5 occurrences: complete (max)
2127        let s5 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 5);
2128        assert!(s5.contains_accept(&nfa));
2129
2130        // 6 occurrences: rejected (past max)
2131        let s6 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 6);
2132        assert!(s6.is_empty());
2133    }
2134
2135    #[test]
2136    fn test_counted_element_a_0_100() {
2137        let (nfa, a) = make_counted_element_nfa(0, 100);
2138
2139        // 0 occurrences: complete (min=0)
2140        let s0 = ActiveStates::from_nfa(&nfa);
2141        assert!(s0.contains_accept(&nfa));
2142
2143        // 100 occurrences: complete (max)
2144        let s100 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 100);
2145        assert!(s100.contains_accept(&nfa));
2146
2147        // 101 occurrences: rejected
2148        let s101 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 101);
2149        assert!(s101.is_empty());
2150    }
2151
2152    #[test]
2153    fn test_counted_element_exact_17() {
2154        let (nfa, a) = make_counted_element_nfa(17, 17);
2155
2156        // 16: not complete
2157        let s16 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 16);
2158        assert!(!s16.contains_accept(&nfa));
2159
2160        // 17: complete
2161        let s17 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 17);
2162        assert!(s17.contains_accept(&nfa));
2163
2164        // 18: rejected
2165        let s18 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 18);
2166        assert!(s18.is_empty());
2167    }
2168
2169    #[test]
2170    fn test_counted_sequence_a_b() {
2171        // Build a{2,50} followed by b
2172        let a = NameId(100);
2173        let b = NameId(200);
2174        let builder = FragmentBuilder::new();
2175
2176        let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
2177        let counted_a = frag_a.repeat_counted(2, 50);
2178
2179        let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
2180        let seq = counted_a.concat(frag_b);
2181        let nfa = fragment_to_table(seq);
2182
2183        assert!(nfa.has_counters());
2184
2185        // 1 a + b: should fail (min=2 not satisfied)
2186        let s = ActiveStates::from_nfa(&nfa);
2187        let s = s.advance(&nfa, a, None, None, None, XsdVersion::V1_0); // 1 a
2188        let s = s.advance(&nfa, b, None, None, None, XsdVersion::V1_0); // b
2189        assert!(!s.contains_accept(&nfa)); // min not satisfied
2190
2191        // 2 a + b: should succeed
2192        let s = ActiveStates::from_nfa(&nfa);
2193        let s = advance_n(s, &nfa, a, 2);
2194        let s = s.advance(&nfa, b, None, None, None, XsdVersion::V1_0);
2195        assert!(s.contains_accept(&nfa));
2196
2197        // 50 a + b: should succeed
2198        let s = ActiveStates::from_nfa(&nfa);
2199        let s = advance_n(s, &nfa, a, 50);
2200        let s = s.advance(&nfa, b, None, None, None, XsdVersion::V1_0);
2201        assert!(s.contains_accept(&nfa));
2202
2203        // 51 a: should be rejected
2204        let s = ActiveStates::from_nfa(&nfa);
2205        let s = advance_n(s, &nfa, a, 51);
2206        assert!(s.is_empty());
2207    }
2208
2209    #[test]
2210    fn test_dead_counter_collapse() {
2211        // (a?){0,200} followed by b
2212        // After the nullable loop exits, all configs should collapse at b.
2213        let a = NameId(100);
2214        let b = NameId(200);
2215        let builder = FragmentBuilder::new();
2216
2217        // Build a? (optional element)
2218        let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
2219        let opt_a = frag_a.optional();
2220
2221        // Counted loop: (a?){0,200}
2222        let counted = opt_a.repeat_counted(0, 200);
2223
2224        // Followed by b
2225        let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
2226        let seq = counted.concat(frag_b);
2227        let nfa = fragment_to_table(seq);
2228
2229        // Single counter + nullable body → should use RangedSingle
2230        let initial = ActiveStates::from_nfa(&nfa);
2231        assert!(
2232            matches!(&initial, ActiveStates::RangedSingle { .. }),
2233            "Expected RangedSingle for nullable body counted NFA"
2234        );
2235
2236        // Initial state should be complete (min=0 loop can be skipped)
2237        // After advancing with b (skipping the loop entirely), should accept
2238        let after_b = initial
2239            .clone()
2240            .advance(&nfa, b, None, None, None, XsdVersion::V1_0);
2241        assert!(after_b.contains_accept(&nfa));
2242
2243        // With RangedSingle, the state map has O(states) entries, not O(200).
2244        if let ActiveStates::RangedSingle { state_ranges, .. } = &after_b {
2245            assert!(
2246                state_ranges.len() <= 5,
2247                "Expected O(1) ranged entries after loop exit, got {}",
2248                state_ranges.len()
2249            );
2250        }
2251    }
2252
2253    #[test]
2254    fn test_counted_exact_prefix_plus_star() {
2255        // a{17,unbounded}: counted exact(17) + star
2256        // Built via apply_occurs to test the dispatch
2257        use crate::compiler::particle::{apply_occurs, MaxOccurs};
2258
2259        let a = NameId(100);
2260        let builder = FragmentBuilder::new();
2261        let frag = builder.single_term(NfaTerm::element(a, None, None), None);
2262        let result = apply_occurs(frag, 17, MaxOccurs::Unbounded);
2263        let nfa = fragment_to_table(result);
2264
2265        // Should use counted path (has counters for the prefix)
2266        assert!(nfa.has_counters());
2267        // Should be compact — NOT 17*2 = 34 states from unrolling
2268        assert!(
2269            nfa.state_count() < 15,
2270            "expected compact NFA, got {} states",
2271            nfa.state_count()
2272        );
2273
2274        // 16 occurrences: not complete (min=17)
2275        let s16 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 16);
2276        assert!(!s16.contains_accept(&nfa));
2277
2278        // 17 occurrences: complete
2279        let s17 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 17);
2280        assert!(s17.contains_accept(&nfa));
2281
2282        // 100 occurrences: still complete (unbounded)
2283        let s100 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 100);
2284        assert!(s100.contains_accept(&nfa));
2285    }
2286
2287    #[test]
2288    fn test_is_epsilon_like() {
2289        assert!(TransitionKind::Epsilon.is_epsilon_like());
2290        assert!(!TransitionKind::Consume.is_epsilon_like());
2291        assert!(TransitionKind::CounterReset(0).is_epsilon_like());
2292        assert!(TransitionKind::CounterIncrement(0).is_epsilon_like());
2293        assert!(TransitionKind::CounterMaxGuard(0).is_epsilon_like());
2294        assert!(TransitionKind::CounterMinGuard(0).is_epsilon_like());
2295    }
2296
2297    #[test]
2298    fn test_offset_counter() {
2299        assert_eq!(
2300            TransitionKind::CounterReset(0).offset_counter(3),
2301            TransitionKind::CounterReset(3)
2302        );
2303        assert_eq!(
2304            TransitionKind::CounterIncrement(2).offset_counter(5),
2305            TransitionKind::CounterIncrement(7)
2306        );
2307        assert_eq!(
2308            TransitionKind::Epsilon.offset_counter(10),
2309            TransitionKind::Epsilon
2310        );
2311        assert_eq!(
2312            TransitionKind::Consume.offset_counter(10),
2313            TransitionKind::Consume
2314        );
2315    }
2316
2317    // -----------------------------------------------------------------------
2318    // CounterRange unit tests
2319    // -----------------------------------------------------------------------
2320
2321    #[test]
2322    fn test_counter_range_operations() {
2323        let r = CounterRange::single(5);
2324        assert_eq!(r.lo, 5);
2325        assert_eq!(r.hi, 5);
2326        assert!(!r.is_empty());
2327        assert!(r.subsumes(r));
2328
2329        let r2 = CounterRange::new(3, 7);
2330        assert!(r2.subsumes(r));
2331        assert!(!r.subsumes(r2));
2332
2333        // Union
2334        let u = CounterRange::single(2).union(CounterRange::new(3, 5));
2335        assert_eq!(u, CounterRange::new(2, 5));
2336
2337        // intersect_below
2338        assert_eq!(
2339            CounterRange::new(1, 5).intersect_below(4),
2340            CounterRange::new(1, 3)
2341        );
2342        assert!(CounterRange::new(5, 8).intersect_below(5).is_empty());
2343        assert!(CounterRange::new(0, 10).intersect_below(0).is_empty());
2344
2345        // intersect_above
2346        assert_eq!(
2347            CounterRange::new(1, 5).intersect_above(3),
2348            CounterRange::new(3, 5)
2349        );
2350        assert!(CounterRange::new(1, 3).intersect_above(5).is_empty());
2351    }
2352
2353    // -----------------------------------------------------------------------
2354    // RangedSingle tests
2355    // -----------------------------------------------------------------------
2356
2357    #[test]
2358    fn test_ranged_closure_convergence() {
2359        // (a?){0,10000} — must use RangedSingle and converge in O(states)
2360        let a = NameId(100);
2361        let builder = FragmentBuilder::new();
2362        let frag = builder.single_term(NfaTerm::element(a, None, None), None);
2363        let opt = frag.optional();
2364        let counted = opt.repeat_counted(0, 10_000);
2365        let nfa = fragment_to_table(counted);
2366
2367        assert!(nfa.has_counters());
2368        assert!(nfa.counter_defs[0].body_nullable);
2369
2370        let initial = ActiveStates::from_nfa(&nfa);
2371        assert!(matches!(&initial, ActiveStates::RangedSingle { .. }));
2372
2373        // Should have O(states) entries, not O(10000)
2374        if let ActiveStates::RangedSingle { state_ranges, .. } = &initial {
2375            assert!(
2376                state_ranges.len() <= 10,
2377                "Expected O(states) entries, got {}",
2378                state_ranges.len()
2379            );
2380        }
2381
2382        // Should be accepting (min=0)
2383        assert!(initial.contains_accept(&nfa));
2384
2385        // Advance with 'a' 5 times — should still be accepting
2386        let s5 = advance_n(initial, &nfa, a, 5);
2387        assert!(s5.contains_accept(&nfa));
2388
2389        // Still RangedSingle throughout
2390        assert!(matches!(&s5, ActiveStates::RangedSingle { .. }));
2391    }
2392
2393    #[test]
2394    fn test_nullable_body_accepts_range() {
2395        // (a?){3,5}: accepts "", "a", "aa", "aaa", "aaaa", "aaaaa", rejects "aaaaaa"
2396        let a = NameId(100);
2397        let builder = FragmentBuilder::new();
2398        let frag = builder.single_term(NfaTerm::element(a, None, None), None);
2399        let opt = frag.optional();
2400        let counted = opt.repeat_counted(3, 5);
2401        let nfa = fragment_to_table(counted);
2402
2403        assert!(nfa.counter_defs[0].body_nullable);
2404        let initial = ActiveStates::from_nfa(&nfa);
2405        assert!(matches!(&initial, ActiveStates::RangedSingle { .. }));
2406
2407        // "" — accepted (body nullable, 3 empty iterations satisfy min)
2408        assert!(initial.contains_accept(&nfa));
2409
2410        // "a" through "aaaaa" — all accepted
2411        for n in 1..=5 {
2412            let s = advance_n(initial.clone(), &nfa, a, n);
2413            assert!(s.contains_accept(&nfa), "Expected accepting after {n} a's");
2414        }
2415
2416        // "aaaaaa" — rejected (max=5 iterations, each consuming at most 1 a)
2417        let s6 = advance_n(initial, &nfa, a, 6);
2418        assert!(s6.is_empty(), "Expected rejection after 6 a's");
2419    }
2420
2421    #[test]
2422    fn test_choice_body_nullable() {
2423        // (a|b?){0,1000} — body is nullable (b? branch), should use RangedSingle
2424        let a = NameId(100);
2425        let b = NameId(200);
2426        let builder = FragmentBuilder::new();
2427        let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
2428        let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
2429        let choice = frag_a.alternate(frag_b.optional());
2430        let counted = choice.repeat_counted(0, 1000);
2431        let nfa = fragment_to_table(counted);
2432
2433        assert!(nfa.counter_defs[0].body_nullable);
2434        let initial = ActiveStates::from_nfa(&nfa);
2435        assert!(matches!(&initial, ActiveStates::RangedSingle { .. }));
2436
2437        if let ActiveStates::RangedSingle { state_ranges, .. } = &initial {
2438            assert!(
2439                state_ranges.len() <= 15,
2440                "Expected O(states) entries, got {}",
2441                state_ranges.len()
2442            );
2443        }
2444
2445        // Accepts "a", "b", "ab", ""
2446        assert!(initial.contains_accept(&nfa));
2447        let s_a = initial
2448            .clone()
2449            .advance(&nfa, a, None, None, None, XsdVersion::V1_0);
2450        assert!(s_a.contains_accept(&nfa));
2451        let s_b = initial
2452            .clone()
2453            .advance(&nfa, b, None, None, None, XsdVersion::V1_0);
2454        assert!(s_b.contains_accept(&nfa));
2455    }
2456
2457    #[test]
2458    fn test_non_nullable_body_stays_counted() {
2459        // (a){0,5} — body is NOT nullable, should use Counted (not RangedSingle)
2460        let (nfa, a) = make_counted_element_nfa(0, 5);
2461        assert!(!nfa.counter_defs[0].body_nullable);
2462
2463        let initial = ActiveStates::from_nfa(&nfa);
2464        assert!(matches!(&initial, ActiveStates::Counted { .. }));
2465
2466        // Still correct: accepts 0..5 a's
2467        assert!(initial.contains_accept(&nfa)); // min=0
2468        for n in 1..=5 {
2469            let s = advance_n(initial.clone(), &nfa, a, n);
2470            assert!(s.contains_accept(&nfa), "Expected accepting after {n} a's");
2471        }
2472        let s6 = advance_n(initial, &nfa, a, 6);
2473        assert!(s6.is_empty());
2474    }
2475
2476    #[test]
2477    fn test_multi_counter_stays_counted() {
2478        // a{0,100} followed by b{0,100} — 2 counters, should use Counted
2479        let a = NameId(100);
2480        let b = NameId(200);
2481        let builder = FragmentBuilder::new();
2482        let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
2483        let counted_a = frag_a.repeat_counted(0, 100);
2484        let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
2485        let counted_b = frag_b.repeat_counted(0, 100);
2486        let seq = counted_a.concat(counted_b);
2487        let nfa = fragment_to_table(seq);
2488
2489        assert_eq!(nfa.counter_defs.len(), 2);
2490        let initial = ActiveStates::from_nfa(&nfa);
2491        assert!(
2492            matches!(&initial, ActiveStates::Counted { .. }),
2493            "Multi-counter NFA should use Counted, not RangedSingle"
2494        );
2495    }
2496
2497    #[test]
2498    fn test_nested_nullable_uses_hybrid() {
2499        // ((a?){0,50}){0,50} — 2 nullable counters → Hybrid
2500        let a = NameId(100);
2501        let builder = FragmentBuilder::new();
2502        let frag = builder.single_term(NfaTerm::element(a, None, None), None);
2503        let inner = frag.optional().repeat_counted(0, 50);
2504        let outer = inner.repeat_counted(0, 50);
2505        let nfa = fragment_to_table(outer);
2506
2507        assert_eq!(nfa.counter_defs.len(), 2);
2508        let initial = ActiveStates::from_nfa(&nfa);
2509        assert!(
2510            matches!(&initial, ActiveStates::Hybrid { .. }),
2511            "Nested nullable counted NFA should use Hybrid, got {:?}",
2512            std::mem::discriminant(&initial)
2513        );
2514    }
2515
2516    // -----------------------------------------------------------------------
2517    // Hybrid variant tests
2518    // -----------------------------------------------------------------------
2519
2520    #[test]
2521    fn test_nested_nullable_hybrid_correctness() {
2522        // ((a?){0,100}){0,50}: inner up to 100 per outer iteration, outer up to 50
2523        let a = NameId(100);
2524        let builder = FragmentBuilder::new();
2525        let frag = builder.single_term(NfaTerm::element(a, None, None), None);
2526        let inner = frag.optional().repeat_counted(0, 100);
2527        let outer = inner.repeat_counted(0, 50);
2528        let nfa = fragment_to_table(outer);
2529
2530        assert_eq!(nfa.counter_defs.len(), 2);
2531        let initial = ActiveStates::from_nfa(&nfa);
2532        assert!(matches!(&initial, ActiveStates::Hybrid { .. }));
2533
2534        // Empty accepted (min=0 on both loops)
2535        assert!(initial.contains_accept(&nfa));
2536
2537        // "a"*100: accepted (1 outer iteration, 100 inner = max inner)
2538        let s100 = advance_n(initial.clone(), &nfa, a, 100);
2539        assert!(s100.contains_accept(&nfa));
2540
2541        // "a"*5000: accepted (50 outer * 100 inner)
2542        let s5000 = advance_n(initial.clone(), &nfa, a, 5000);
2543        assert!(s5000.contains_accept(&nfa));
2544
2545        // "a"*5001: rejected (exceeds 50*100)
2546        let s5001 = advance_n(initial, &nfa, a, 5001);
2547        assert!(s5001.is_empty(), "Expected rejection after 5001 a's");
2548    }
2549
2550    #[test]
2551    fn test_hybrid_convergence() {
2552        // ((a?){0,100}){0,50}: verify configs are O(states * outer_max), not O(M*N*states)
2553        let a = NameId(100);
2554        let builder = FragmentBuilder::new();
2555        let frag = builder.single_term(NfaTerm::element(a, None, None), None);
2556        let inner = frag.optional().repeat_counted(0, 100);
2557        let outer = inner.repeat_counted(0, 50);
2558        let nfa = fragment_to_table(outer);
2559
2560        let initial = ActiveStates::from_nfa(&nfa);
2561        if let ActiveStates::Hybrid { configs, .. } = &initial {
2562            // With Hybrid, inner counter is ranged → O(states * outer_max) entries.
2563            // Without Hybrid (Counted), would be O(states * 100 * 50) = O(5000 * states).
2564            let max_expected = nfa.state_count() * 51; // outer goes 0..50
2565            assert!(
2566                configs.len() <= max_expected,
2567                "Expected at most {} entries, got {} (state_count={})",
2568                max_expected,
2569                configs.len(),
2570                nfa.state_count()
2571            );
2572        } else {
2573            panic!("Expected Hybrid variant");
2574        }
2575    }
2576
2577    #[test]
2578    fn test_sequential_nullable_hybrid() {
2579        // (a?){0,1000} followed by (b?){0,1000}
2580        let a = NameId(100);
2581        let b = NameId(200);
2582        let builder = FragmentBuilder::new();
2583        let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
2584        let counted_a = frag_a.optional().repeat_counted(0, 1000);
2585        let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
2586        let counted_b = frag_b.optional().repeat_counted(0, 1000);
2587        let seq = counted_a.concat(counted_b);
2588        let nfa = fragment_to_table(seq);
2589
2590        assert_eq!(nfa.counter_defs.len(), 2);
2591        assert!(nfa.counter_defs[0].body_nullable);
2592        assert!(nfa.counter_defs[1].body_nullable);
2593
2594        let initial = ActiveStates::from_nfa(&nfa);
2595        assert!(matches!(&initial, ActiveStates::Hybrid { .. }));
2596
2597        // Empty: accepted (both min=0)
2598        assert!(initial.contains_accept(&nfa));
2599
2600        // "a": accepted
2601        let s = initial
2602            .clone()
2603            .advance(&nfa, a, None, None, None, XsdVersion::V1_0);
2604        assert!(s.contains_accept(&nfa));
2605
2606        // "b": accepted
2607        let s = initial
2608            .clone()
2609            .advance(&nfa, b, None, None, None, XsdVersion::V1_0);
2610        assert!(s.contains_accept(&nfa));
2611
2612        // "a"*1000 + "b"*1000: accepted
2613        let s = advance_n(initial.clone(), &nfa, a, 1000);
2614        assert!(s.contains_accept(&nfa));
2615        let s = advance_n(s, &nfa, b, 1000);
2616        assert!(s.contains_accept(&nfa));
2617
2618        // "a"*1001: rejected (exceeds first loop's max)
2619        let s = advance_n(initial, &nfa, a, 1001);
2620        assert!(s.is_empty(), "Expected rejection after 1001 a's");
2621    }
2622
2623    #[test]
2624    fn test_mixed_nullable_nonnullable_hybrid() {
2625        // (a?){0,500} followed by b{0,500}: one nullable, one not
2626        let a = NameId(100);
2627        let b = NameId(200);
2628        let builder = FragmentBuilder::new();
2629        let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
2630        let counted_a = frag_a.optional().repeat_counted(0, 500);
2631        let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
2632        let counted_b = frag_b.repeat_counted(0, 500);
2633        let seq = counted_a.concat(counted_b);
2634        let nfa = fragment_to_table(seq);
2635
2636        assert_eq!(nfa.counter_defs.len(), 2);
2637        assert!(nfa.counter_defs[0].body_nullable);
2638        assert!(!nfa.counter_defs[1].body_nullable);
2639
2640        let initial = ActiveStates::from_nfa(&nfa);
2641        assert!(matches!(&initial, ActiveStates::Hybrid { .. }));
2642
2643        // Verify ranged counter is counter 0 (the nullable one)
2644        if let ActiveStates::Hybrid {
2645            ranged_counter_idx, ..
2646        } = &initial
2647        {
2648            assert_eq!(
2649                *ranged_counter_idx, 0,
2650                "Should range the nullable counter (index 0)"
2651            );
2652        }
2653
2654        // Empty: accepted (both min=0)
2655        assert!(initial.contains_accept(&nfa));
2656
2657        // "a"*500 + "b"*500: accepted
2658        let s = advance_n(initial.clone(), &nfa, a, 500);
2659        let s = advance_n(s, &nfa, b, 500);
2660        assert!(s.contains_accept(&nfa));
2661
2662        // "b"*501: rejected
2663        let s = advance_n(initial, &nfa, b, 501);
2664        assert!(s.is_empty());
2665    }
2666
2667    #[test]
2668    fn test_hybrid_forced_outer_ranged() {
2669        // ((a?){0,100}){0,50}: manually verify correctness with the algorithm
2670        // choosing whichever counter it picks, by testing boundary conditions
2671        // that exercise both counters.
2672        let a = NameId(100);
2673        let builder = FragmentBuilder::new();
2674        let frag = builder.single_term(NfaTerm::element(a, None, None), None);
2675        let inner = frag.optional().repeat_counted(0, 100);
2676        let outer = inner.repeat_counted(0, 50);
2677        let nfa = fragment_to_table(outer);
2678
2679        let initial = ActiveStates::from_nfa(&nfa);
2680
2681        // Boundary: exactly 100 a's per iteration, 50 iterations = 5000 total
2682        let s = advance_n(initial.clone(), &nfa, a, 5000);
2683        assert!(
2684            s.contains_accept(&nfa),
2685            "5000 a's should be accepted (50*100)"
2686        );
2687
2688        // Boundary: 101 a's — requires second outer iteration (first handles 100, second 1)
2689        let s = advance_n(initial.clone(), &nfa, a, 101);
2690        assert!(
2691            s.contains_accept(&nfa),
2692            "101 a's should be accepted (needs 2 outer iterations)"
2693        );
2694
2695        // Boundary: 0 a's
2696        assert!(initial.contains_accept(&nfa), "0 a's should be accepted");
2697
2698        // Boundary: 5001 — exceeds total capacity
2699        let s = advance_n(initial, &nfa, a, 5001);
2700        assert!(s.is_empty(), "5001 a's should be rejected");
2701    }
2702
2703    #[test]
2704    fn test_hybrid_selection_different_maxima() {
2705        // (a?){0,10} followed by (b?){0,1000}: should range counter 1 (max=1000)
2706        let a = NameId(100);
2707        let b = NameId(200);
2708        let builder = FragmentBuilder::new();
2709        let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
2710        let counted_a = frag_a.optional().repeat_counted(0, 10);
2711        let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
2712        let counted_b = frag_b.optional().repeat_counted(0, 1000);
2713        let seq = counted_a.concat(counted_b);
2714        let nfa = fragment_to_table(seq);
2715
2716        assert_eq!(nfa.counter_defs.len(), 2);
2717        let initial = ActiveStates::from_nfa(&nfa);
2718        // counter 0 max=10 (below COUNTED_THRESHOLD=16 — but counter 1 max=1000 qualifies)
2719        assert!(
2720            matches!(&initial, ActiveStates::Hybrid { .. }),
2721            "Expected Hybrid, got {:?}",
2722            std::mem::discriminant(&initial)
2723        );
2724
2725        if let ActiveStates::Hybrid {
2726            ranged_counter_idx, ..
2727        } = &initial
2728        {
2729            assert_eq!(
2730                *ranged_counter_idx, 1,
2731                "Should range counter 1 (max=1000), not counter 0 (max=10)"
2732            );
2733        }
2734
2735        // Correctness: "a"*10 + "b"*1000 accepted
2736        let s = advance_n(initial.clone(), &nfa, a, 10);
2737        let s = advance_n(s, &nfa, b, 1000);
2738        assert!(s.contains_accept(&nfa));
2739
2740        // "a"*11 rejected
2741        let s = advance_n(initial, &nfa, a, 11);
2742        assert!(s.is_empty());
2743    }
2744
2745    #[test]
2746    fn test_hybrid_small_max_stays_counted() {
2747        // (a?){0,5} followed by (b?){0,5}: both nullable but max ≤ COUNTED_THRESHOLD → Counted
2748        let a = NameId(100);
2749        let b = NameId(200);
2750        let builder = FragmentBuilder::new();
2751        let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
2752        let counted_a = frag_a.optional().repeat_counted(0, 5);
2753        let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
2754        let counted_b = frag_b.optional().repeat_counted(0, 5);
2755        let seq = counted_a.concat(counted_b);
2756        let nfa = fragment_to_table(seq);
2757
2758        assert_eq!(nfa.counter_defs.len(), 2);
2759        let initial = ActiveStates::from_nfa(&nfa);
2760        assert!(
2761            matches!(&initial, ActiveStates::Counted { .. }),
2762            "Small-max nullable counters should stay Counted, got {:?}",
2763            std::mem::discriminant(&initial)
2764        );
2765    }
2766
2767    #[test]
2768    fn test_xsd11_priority_ranged() {
2769        // Nullable counted body containing element 'a' + wildcard *.
2770        // advance_with_priority should prefer element over wildcard.
2771        use crate::types::complex::NamespaceConstraint;
2772        let a = NameId(100);
2773        let b = NameId(200);
2774        let builder = FragmentBuilder::new();
2775
2776        // Element term for 'a' (mandatory)
2777        let elem = builder.single_term(NfaTerm::element(a, None, None), None);
2778        // Wildcard term matching anything (optional → makes body nullable)
2779        let wc = builder.single_term(
2780            NfaTerm::Wildcard {
2781                namespace_constraint: NamespaceConstraint::Any,
2782                process_contents: ProcessContents::Lax,
2783                not_qnames: Vec::new(),
2784            },
2785            None,
2786        );
2787        // (a | *?){0,100} — nullable body via the *? branch
2788        let choice = elem.alternate(wc.optional());
2789        let counted = choice.repeat_counted(0, 100);
2790        let nfa = fragment_to_table(counted);
2791
2792        assert!(nfa.counter_defs[0].body_nullable);
2793        let initial = ActiveStates::from_nfa(&nfa);
2794        assert!(matches!(&initial, ActiveStates::RangedSingle { .. }));
2795
2796        // advance_with_priority for 'a': element branch exists, so wildcard
2797        // should not be chosen.  The result should be non-empty.
2798        let next =
2799            initial
2800                .clone()
2801                .advance_with_priority(&nfa, a, None, None, None, XsdVersion::V1_1);
2802        assert!(!next.is_empty());
2803        assert!(next.contains_accept(&nfa));
2804
2805        // advance_with_priority for 'b': only wildcard matches, should work
2806        let next_b = initial.advance_with_priority(&nfa, b, None, None, None, XsdVersion::V1_1);
2807        assert!(!next_b.is_empty());
2808        assert!(next_b.contains_accept(&nfa));
2809    }
2810
2811    // -----------------------------------------------------------------------
2812    // Dynamic ranged-counter switching tests
2813    // -----------------------------------------------------------------------
2814
2815    #[test]
2816    fn test_initial_tiebreak_prefers_first() {
2817        // (a?){0,1000} followed by (b?){0,1000}: equal max → should range counter 0
2818        let a = NameId(100);
2819        let b = NameId(200);
2820        let builder = FragmentBuilder::new();
2821        let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
2822        let counted_a = frag_a.optional().repeat_counted(0, 1000);
2823        let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
2824        let counted_b = frag_b.optional().repeat_counted(0, 1000);
2825        let seq = counted_a.concat(counted_b);
2826        let nfa = fragment_to_table(seq);
2827
2828        let initial = ActiveStates::from_nfa(&nfa);
2829        if let ActiveStates::Hybrid {
2830            ranged_counter_idx, ..
2831        } = &initial
2832        {
2833            assert_eq!(
2834                *ranged_counter_idx, 0,
2835                "Equal max: should prefer first counter (index 0)"
2836            );
2837        } else {
2838            panic!("Expected Hybrid variant");
2839        }
2840    }
2841
2842    #[test]
2843    fn test_dynamic_switch_sequential_equal_max() {
2844        // (a?){0,1000} followed by (b?){0,1000}
2845        // After feeding a's and then b, the ranged counter should switch.
2846        let a = NameId(100);
2847        let b = NameId(200);
2848        let builder = FragmentBuilder::new();
2849        let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
2850        let counted_a = frag_a.optional().repeat_counted(0, 1000);
2851        let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
2852        let counted_b = frag_b.optional().repeat_counted(0, 1000);
2853        let seq = counted_a.concat(counted_b);
2854        let nfa = fragment_to_table(seq);
2855
2856        let initial = ActiveStates::from_nfa(&nfa);
2857        // Feed 1000 a's then 1 b to move fully into second loop
2858        let after_a = advance_n(initial, &nfa, a, 1000);
2859        let after_b = after_a.advance(&nfa, b, None, None, None, XsdVersion::V1_0);
2860
2861        if let ActiveStates::Hybrid {
2862            ranged_counter_idx,
2863            configs,
2864            ..
2865        } = &after_b
2866        {
2867            assert_eq!(
2868                *ranged_counter_idx, 1,
2869                "Should have dynamically switched to counter 1"
2870            );
2871            // Config count should be O(states), not O(1000 * states)
2872            let max_expected = nfa.state_count() * 2;
2873            assert!(
2874                configs.len() <= max_expected,
2875                "Expected <= {} configs after switch, got {}",
2876                max_expected,
2877                configs.len()
2878            );
2879        } else {
2880            panic!("Expected Hybrid variant");
2881        }
2882
2883        // Correctness: b*999 more (total 1000 b) accepted
2884        let s = advance_n(after_b.clone(), &nfa, b, 999);
2885        assert!(s.contains_accept(&nfa));
2886
2887        // b*1001 total rejected
2888        let s = advance_n(after_b, &nfa, b, 1000);
2889        assert!(s.is_empty());
2890    }
2891
2892    #[test]
2893    fn test_dynamic_switch_three_counters() {
2894        // (a?){0,500} (b?){0,500} (c?){0,500}: cascading switches 0→1→2
2895        let a = NameId(100);
2896        let b = NameId(200);
2897        let c = NameId(300);
2898        let builder = FragmentBuilder::new();
2899        let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
2900        let counted_a = frag_a.optional().repeat_counted(0, 500);
2901        let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
2902        let counted_b = frag_b.optional().repeat_counted(0, 500);
2903        let frag_c = builder.single_term(NfaTerm::element(c, None, None), None);
2904        let counted_c = frag_c.optional().repeat_counted(0, 500);
2905        let seq = counted_a.concat(counted_b).concat(counted_c);
2906        let nfa = fragment_to_table(seq);
2907
2908        assert_eq!(nfa.counter_defs.len(), 3);
2909        let initial = ActiveStates::from_nfa(&nfa);
2910
2911        // After 500 a's + 1 b → switch to counter 1
2912        let after_a = advance_n(initial, &nfa, a, 500);
2913        assert!(after_a.contains_accept(&nfa));
2914        let after_b1 = after_a.advance(&nfa, b, None, None, None, XsdVersion::V1_0);
2915        if let ActiveStates::Hybrid {
2916            ranged_counter_idx, ..
2917        } = &after_b1
2918        {
2919            assert_eq!(
2920                *ranged_counter_idx, 1,
2921                "Should switch to counter 1 after first loop exits"
2922            );
2923        }
2924
2925        // After 500 b's + 1 c → switch to counter 2
2926        let after_b = advance_n(after_b1, &nfa, b, 499);
2927        assert!(after_b.contains_accept(&nfa));
2928        let after_c1 = after_b.advance(&nfa, c, None, None, None, XsdVersion::V1_0);
2929        if let ActiveStates::Hybrid {
2930            ranged_counter_idx, ..
2931        } = &after_c1
2932        {
2933            assert_eq!(
2934                *ranged_counter_idx, 2,
2935                "Should switch to counter 2 after second loop exits"
2936            );
2937        }
2938
2939        // 500 c's accepted, 501 rejected
2940        let after_c = advance_n(after_c1.clone(), &nfa, c, 499);
2941        assert!(after_c.contains_accept(&nfa));
2942        let over = advance_n(after_c1, &nfa, c, 500);
2943        assert!(over.is_empty());
2944    }
2945
2946    #[test]
2947    fn test_no_switch_when_ranged_still_active() {
2948        // (a?){0,1000} followed by (b?){0,1000}
2949        // After 5 a's, ranged counter is still active — no switch.
2950        let a = NameId(100);
2951        let b = NameId(200);
2952        let builder = FragmentBuilder::new();
2953        let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
2954        let counted_a = frag_a.optional().repeat_counted(0, 1000);
2955        let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
2956        let counted_b = frag_b.optional().repeat_counted(0, 1000);
2957        let seq = counted_a.concat(counted_b);
2958        let nfa = fragment_to_table(seq);
2959
2960        let initial = ActiveStates::from_nfa(&nfa);
2961        let after_5a = advance_n(initial, &nfa, a, 5);
2962        if let ActiveStates::Hybrid {
2963            ranged_counter_idx, ..
2964        } = &after_5a
2965        {
2966            assert_eq!(
2967                *ranged_counter_idx, 0,
2968                "Should NOT switch while first loop is still active"
2969            );
2970        }
2971    }
2972
2973    #[test]
2974    fn test_no_switch_non_nullable_candidate() {
2975        // (a?){0,1000} followed by b{0,1000}: second counter is NOT nullable
2976        let a = NameId(100);
2977        let b = NameId(200);
2978        let builder = FragmentBuilder::new();
2979        let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
2980        let counted_a = frag_a.optional().repeat_counted(0, 1000);
2981        let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
2982        let counted_b = frag_b.repeat_counted(0, 1000); // NOT nullable
2983        let seq = counted_a.concat(counted_b);
2984        let nfa = fragment_to_table(seq);
2985
2986        let initial = ActiveStates::from_nfa(&nfa);
2987        // Feed b's to exit first loop (counter 0 dead) and enter second
2988        let after_b = advance_n(initial, &nfa, b, 100);
2989        if let ActiveStates::Hybrid {
2990            ranged_counter_idx, ..
2991        } = &after_b
2992        {
2993            assert_eq!(
2994                *ranged_counter_idx, 0,
2995                "Should NOT switch: counter 1 is not nullable"
2996            );
2997        }
2998
2999        // Correctness: b*1000 accepted
3000        let s = advance_n(after_b.clone(), &nfa, b, 900);
3001        assert!(s.contains_accept(&nfa));
3002    }
3003
3004    #[test]
3005    fn test_no_switch_nested_inner_active() {
3006        // ((a?){0,100}){0,50}: inner counter should not cause a switch
3007        // while it still has active ranges.
3008        let a = NameId(100);
3009        let builder = FragmentBuilder::new();
3010        let frag = builder.single_term(NfaTerm::element(a, None, None), None);
3011        let inner = frag.optional().repeat_counted(0, 100);
3012        let outer = inner.repeat_counted(0, 50);
3013        let nfa = fragment_to_table(outer);
3014
3015        // During active processing, the ranged counter should not switch
3016        let initial = ActiveStates::from_nfa(&nfa);
3017        let after_a = advance_n(initial.clone(), &nfa, a, 50);
3018        if let ActiveStates::Hybrid {
3019            ranged_counter_idx, ..
3020        } = &after_a
3021        {
3022            // The initial ranged counter (inner, max=100) should stay
3023            let initial_idx = if let ActiveStates::Hybrid {
3024                ranged_counter_idx, ..
3025            } = &initial
3026            {
3027                *ranged_counter_idx
3028            } else {
3029                unreachable!()
3030            };
3031            assert_eq!(
3032                *ranged_counter_idx, initial_idx,
3033                "Should NOT switch while processing nested loops"
3034            );
3035        }
3036    }
3037
3038    #[test]
3039    fn test_switch_config_count_drops() {
3040        // (a?){0,1000} followed by (b?){0,1000}
3041        // After first 'b', config count should be small.
3042        let a = NameId(100);
3043        let b = NameId(200);
3044        let builder = FragmentBuilder::new();
3045        let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
3046        let counted_a = frag_a.optional().repeat_counted(0, 1000);
3047        let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
3048        let counted_b = frag_b.optional().repeat_counted(0, 1000);
3049        let seq = counted_a.concat(counted_b);
3050        let nfa = fragment_to_table(seq);
3051
3052        let initial = ActiveStates::from_nfa(&nfa);
3053        // Feed one 'b': exits first loop, enters second loop
3054        let after_b = initial.advance(&nfa, b, None, None, None, XsdVersion::V1_0);
3055        if let ActiveStates::Hybrid {
3056            configs,
3057            ranged_counter_idx,
3058            ..
3059        } = &after_b
3060        {
3061            assert_eq!(
3062                *ranged_counter_idx, 1,
3063                "Should switch to counter 1 after 'b'"
3064            );
3065            // After switch, configs should be compact: O(states) not O(1000*states)
3066            assert!(
3067                configs.len() < nfa.state_count() * 2,
3068                "Config count {} should be << state_count {} after switch",
3069                configs.len(),
3070                nfa.state_count()
3071            );
3072        }
3073    }
3074
3075    #[test]
3076    fn test_differential_counted_vs_hybrid_switch() {
3077        // (a?){0,100} followed by (b?){0,100}
3078        // Build the same NFA, force one to Counted and the other to Hybrid.
3079        // Compare accept/reject decisions at every step.
3080        let a = NameId(100);
3081        let b = NameId(200);
3082        let builder = FragmentBuilder::new();
3083        let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
3084        let counted_a = frag_a.optional().repeat_counted(0, 100);
3085        let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
3086        let counted_b = frag_b.optional().repeat_counted(0, 100);
3087        let seq = counted_a.concat(counted_b);
3088        let nfa = fragment_to_table(seq);
3089
3090        // Hybrid path (normal)
3091        let hybrid = ActiveStates::from_nfa(&nfa);
3092
3093        // Force Counted path
3094        let initial_config = ActiveConfig::initial(nfa.start_state, nfa.counter_defs.len());
3095        let mut counted_configs = HashSet::new();
3096        counted_configs.insert(initial_config);
3097        let counted = ActiveStates::Counted {
3098            configs: counted_configs,
3099            num_counters: nfa.counter_defs.len(),
3100        }
3101        .epsilon_closure(&nfa);
3102
3103        // Compare: feed a*50, then b*50, checking at each step
3104        let mut h = hybrid;
3105        let mut c = counted;
3106        for _ in 0..50 {
3107            h = h.advance(&nfa, a, None, None, None, XsdVersion::V1_0);
3108            c = c.advance(&nfa, a, None, None, None, XsdVersion::V1_0);
3109            assert_eq!(
3110                h.contains_accept(&nfa),
3111                c.contains_accept(&nfa),
3112                "Hybrid/Counted disagree on accept during a-feeding"
3113            );
3114            assert_eq!(
3115                h.is_empty(),
3116                c.is_empty(),
3117                "Hybrid/Counted disagree on empty during a-feeding"
3118            );
3119        }
3120        for _ in 0..50 {
3121            h = h.advance(&nfa, b, None, None, None, XsdVersion::V1_0);
3122            c = c.advance(&nfa, b, None, None, None, XsdVersion::V1_0);
3123            assert_eq!(
3124                h.contains_accept(&nfa),
3125                c.contains_accept(&nfa),
3126                "Hybrid/Counted disagree on accept during b-feeding"
3127            );
3128            assert_eq!(
3129                h.is_empty(),
3130                c.is_empty(),
3131                "Hybrid/Counted disagree on empty during b-feeding"
3132            );
3133        }
3134        // Both should accept after a*50 + b*50
3135        assert!(h.contains_accept(&nfa));
3136        assert!(c.contains_accept(&nfa));
3137
3138        // Feed one more b — still accepted (total b=51 ≤ 100)
3139        h = h.advance(&nfa, b, None, None, None, XsdVersion::V1_0);
3140        c = c.advance(&nfa, b, None, None, None, XsdVersion::V1_0);
3141        assert_eq!(h.contains_accept(&nfa), c.contains_accept(&nfa));
3142    }
3143}