use std::collections::{HashMap, HashSet, VecDeque};
use super::substitution::SubstitutionGroupMap;
use crate::ids::{ElementKey, NameId, TypeKey};
use crate::parser::location::SourceRef;
use crate::schema::model::XsdVersion;
use crate::types::complex::{not_qnames_exclude, NamespaceConstraint, ProcessContents};
pub type StateId = u32;
pub type CounterId = u16;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct CounterRange {
pub lo: u32,
pub hi: u32,
}
impl CounterRange {
pub fn single(v: u32) -> Self {
Self { lo: v, hi: v }
}
pub fn new(lo: u32, hi: u32) -> Self {
debug_assert!(lo <= hi, "CounterRange::new({lo}, {hi}): lo > hi");
Self { lo, hi }
}
pub fn is_empty(self) -> bool {
self.lo > self.hi
}
pub fn subsumes(self, other: Self) -> bool {
self.lo <= other.lo && self.hi >= other.hi
}
pub fn union(self, other: Self) -> Self {
let result = Self {
lo: self.lo.min(other.lo),
hi: self.hi.max(other.hi),
};
debug_assert!(result.lo <= result.hi);
result
}
pub fn intersect_below(self, max_exclusive: u32) -> Self {
if max_exclusive == 0 || self.lo >= max_exclusive {
return Self { lo: 1, hi: 0 };
}
Self {
lo: self.lo,
hi: self.hi.min(max_exclusive - 1),
}
}
pub fn intersect_above(self, min_inclusive: u32) -> Self {
if self.hi < min_inclusive {
return Self { lo: 1, hi: 0 };
}
Self {
lo: self.lo.max(min_inclusive),
hi: self.hi,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct CounterDef {
pub min: u32,
pub max: u32,
pub body_nullable: bool,
}
#[derive(Debug, Clone)]
pub struct NfaTable {
pub states: Vec<NfaState>,
pub start_state: StateId,
pub accept_state: StateId,
pub counter_defs: Vec<CounterDef>,
}
impl NfaTable {
pub fn new(states: Vec<NfaState>, start_state: StateId, accept_state: StateId) -> Self {
Self {
states,
start_state,
accept_state,
counter_defs: Vec::new(),
}
}
pub fn with_counters(
states: Vec<NfaState>,
start_state: StateId,
accept_state: StateId,
counter_defs: Vec<CounterDef>,
) -> Self {
Self {
states,
start_state,
accept_state,
counter_defs,
}
}
pub fn has_counters(&self) -> bool {
!self.counter_defs.is_empty()
}
pub fn get_state(&self, id: StateId) -> Option<&NfaState> {
self.states.get(id as usize)
}
pub fn get_state_mut(&mut self, id: StateId) -> Option<&mut NfaState> {
self.states.get_mut(id as usize)
}
pub fn state_count(&self) -> usize {
self.states.len()
}
pub fn is_accept(&self, state_id: StateId) -> bool {
state_id == self.accept_state
}
pub fn concat(mut self, mut other: NfaTable) -> NfaTable {
let state_offset = self.states.len() as StateId;
let counter_offset = self.counter_defs.len() as CounterId;
for state in &mut other.states {
state.id += state_offset;
for trans in &mut state.transitions {
trans.target += state_offset;
trans.kind = trans.kind.offset_counter(counter_offset);
}
}
let other_start = other.start_state + state_offset;
self.states[self.accept_state as usize].add_epsilon(other_start);
let new_accept = other.accept_state + state_offset;
self.states.extend(other.states);
self.counter_defs.extend(other.counter_defs);
NfaTable::with_counters(self.states, self.start_state, new_accept, self.counter_defs)
}
pub fn transitions_from(&self, state_id: StateId) -> &[NfaTransition] {
self.get_state(state_id)
.map(|s| s.transitions.as_slice())
.unwrap_or(&[])
}
}
#[derive(Debug, Clone)]
pub struct NfaState {
pub id: StateId,
pub term: Option<NfaTerm>,
pub transitions: Vec<NfaTransition>,
pub origin: Option<SourceRef>,
}
impl NfaState {
pub fn epsilon(id: StateId, origin: Option<SourceRef>) -> Self {
Self {
id,
term: None,
transitions: Vec::new(),
origin,
}
}
pub fn with_term(id: StateId, term: NfaTerm, origin: Option<SourceRef>) -> Self {
Self {
id,
term: Some(term),
transitions: Vec::new(),
origin,
}
}
pub fn add_transition(&mut self, target: StateId, kind: TransitionKind) {
self.transitions.push(NfaTransition { target, kind });
}
pub fn add_epsilon(&mut self, target: StateId) {
self.add_transition(target, TransitionKind::Epsilon);
}
pub fn add_consume(&mut self, target: StateId) {
self.add_transition(target, TransitionKind::Consume);
}
pub fn is_epsilon(&self) -> bool {
self.term.is_none()
}
pub fn epsilon_transitions(&self) -> impl Iterator<Item = StateId> + '_ {
self.transitions
.iter()
.filter(|t| t.kind == TransitionKind::Epsilon)
.map(|t| t.target)
}
pub fn consuming_transitions(&self) -> impl Iterator<Item = StateId> + '_ {
self.transitions
.iter()
.filter(|t| t.kind == TransitionKind::Consume)
.map(|t| t.target)
}
}
#[derive(Debug, Clone)]
pub enum NfaTerm {
Element {
name: NameId,
namespace: Option<NameId>,
element_key: Option<ElementKey>,
resolved_type: Option<TypeKey>,
},
Wildcard {
namespace_constraint: NamespaceConstraint,
process_contents: ProcessContents,
not_qnames: Vec<(Option<NameId>, NameId)>,
},
}
impl NfaTerm {
pub fn element(
name: NameId,
namespace: Option<NameId>,
element_key: Option<ElementKey>,
) -> Self {
NfaTerm::Element {
name,
namespace,
element_key,
resolved_type: None,
}
}
pub fn element_with_type(
name: NameId,
namespace: Option<NameId>,
element_key: Option<ElementKey>,
resolved_type: Option<TypeKey>,
) -> Self {
NfaTerm::Element {
name,
namespace,
element_key,
resolved_type,
}
}
pub fn wildcard(
namespace_constraint: NamespaceConstraint,
process_contents: ProcessContents,
) -> Self {
NfaTerm::Wildcard {
namespace_constraint,
process_contents,
not_qnames: Vec::new(),
}
}
pub fn wildcard_with_not_qnames(
namespace_constraint: NamespaceConstraint,
process_contents: ProcessContents,
not_qnames: Vec<(Option<NameId>, NameId)>,
) -> Self {
NfaTerm::Wildcard {
namespace_constraint,
process_contents,
not_qnames,
}
}
pub fn is_element(&self) -> bool {
matches!(self, NfaTerm::Element { .. })
}
pub fn is_wildcard(&self) -> bool {
matches!(self, NfaTerm::Wildcard { .. })
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct NfaTransition {
pub target: StateId,
pub kind: TransitionKind,
}
impl NfaTransition {
pub fn new(target: StateId, kind: TransitionKind) -> Self {
Self { target, kind }
}
pub fn epsilon(target: StateId) -> Self {
Self::new(target, TransitionKind::Epsilon)
}
pub fn consume(target: StateId) -> Self {
Self::new(target, TransitionKind::Consume)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum TransitionKind {
Epsilon,
Consume,
CounterReset(CounterId),
CounterIncrement(CounterId),
CounterMaxGuard(CounterId),
CounterMinGuard(CounterId),
}
impl TransitionKind {
pub fn is_epsilon_like(&self) -> bool {
!matches!(self, TransitionKind::Consume)
}
pub fn offset_counter(self, offset: CounterId) -> Self {
if offset == 0 {
return self;
}
match self {
TransitionKind::CounterReset(c) => TransitionKind::CounterReset(c + offset),
TransitionKind::CounterIncrement(c) => TransitionKind::CounterIncrement(c + offset),
TransitionKind::CounterMaxGuard(c) => TransitionKind::CounterMaxGuard(c + offset),
TransitionKind::CounterMinGuard(c) => TransitionKind::CounterMinGuard(c + offset),
other => other,
}
}
}
pub fn epsilon_closure(
nfa: &NfaTable,
start_states: impl IntoIterator<Item = StateId>,
) -> HashSet<StateId> {
debug_assert!(
!nfa.has_counters(),
"epsilon_closure called on counted NFA; use ActiveStates instead"
);
let mut closure = HashSet::new();
let mut stack: Vec<StateId> = start_states.into_iter().collect();
while let Some(state_id) = stack.pop() {
if !closure.insert(state_id) {
continue;
}
if let Some(state) = nfa.get_state(state_id) {
for target in state.epsilon_transitions() {
if !closure.contains(&target) {
stack.push(target);
}
}
}
}
closure
}
pub fn term_matches(
term: &NfaTerm,
element_name: NameId,
element_namespace: Option<NameId>,
target_namespace: Option<NameId>,
substitution_groups: Option<&SubstitutionGroupMap>,
xsd_version: XsdVersion,
) -> bool {
match term {
NfaTerm::Element {
name,
namespace,
element_key,
..
} => {
if let (Some(map), Some(key)) = (substitution_groups, element_key) {
if let Some(names) = map.get(key) {
return names.contains(&(element_name, element_namespace));
}
}
*name == element_name && *namespace == element_namespace
}
NfaTerm::Wildcard {
namespace_constraint,
not_qnames,
..
} => {
if !namespace_constraint.matches(element_namespace, target_namespace, xsd_version) {
return false;
}
!not_qnames_exclude(not_qnames, element_namespace, element_name)
}
}
}
pub fn advance_states(
nfa: &NfaTable,
start_states: impl IntoIterator<Item = StateId>,
element_name: NameId,
element_namespace: Option<NameId>,
target_namespace: Option<NameId>,
substitution_groups: Option<&SubstitutionGroupMap>,
xsd_version: XsdVersion,
) -> HashSet<StateId> {
let closure = epsilon_closure(nfa, start_states);
let mut next = HashSet::new();
for state_id in closure {
let state = match nfa.get_state(state_id) {
Some(state) => state,
None => continue,
};
let term = match state.term.as_ref() {
Some(term) => term,
None => continue,
};
if term_matches(
term,
element_name,
element_namespace,
target_namespace,
substitution_groups,
xsd_version,
) {
for target in state.consuming_transitions() {
next.insert(target);
}
}
}
epsilon_closure(nfa, next)
}
pub fn advance_with_priority(
nfa: &NfaTable,
start_states: impl IntoIterator<Item = StateId>,
element_name: NameId,
element_namespace: Option<NameId>,
target_namespace: Option<NameId>,
substitution_groups: Option<&SubstitutionGroupMap>,
xsd_version: XsdVersion,
) -> HashSet<StateId> {
let closure = epsilon_closure(nfa, start_states);
let mut element_targets = HashSet::new();
let mut wildcard_targets = HashSet::new();
for state_id in closure {
let state = match nfa.get_state(state_id) {
Some(state) => state,
None => continue,
};
let term = match state.term.as_ref() {
Some(term) => term,
None => continue,
};
if !term_matches(
term,
element_name,
element_namespace,
target_namespace,
substitution_groups,
xsd_version,
) {
continue;
}
match term {
NfaTerm::Element { .. } => {
for target in state.consuming_transitions() {
element_targets.insert(target);
}
}
NfaTerm::Wildcard { .. } => {
for target in state.consuming_transitions() {
wildcard_targets.insert(target);
}
}
}
}
let next = if !element_targets.is_empty() {
element_targets
} else {
wildcard_targets
};
epsilon_closure(nfa, next)
}
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
pub struct ActiveConfig {
pub state_id: StateId,
pub counters: Box<[u32]>,
}
impl ActiveConfig {
pub fn initial(state_id: StateId, num_counters: usize) -> Self {
Self {
state_id,
counters: vec![0; num_counters].into_boxed_slice(),
}
}
fn with_state(&self, new_state: StateId) -> Self {
Self {
state_id: new_state,
counters: self.counters.clone(),
}
}
fn with_counter_set(&self, new_state: StateId, counter_id: CounterId, value: u32) -> Self {
let mut new = self.with_state(new_state);
new.counters[counter_id as usize] = value;
new
}
}
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
pub struct HybridKey {
state_id: StateId,
counters: Box<[u32]>,
}
impl HybridKey {
fn initial(state_id: StateId, num_counters: usize) -> Self {
Self {
state_id,
counters: vec![0; num_counters].into_boxed_slice(),
}
}
fn with_state(&self, new_state: StateId) -> Self {
Self {
state_id: new_state,
counters: self.counters.clone(),
}
}
fn with_scalar_counter(
&self,
new_state: StateId,
counter_id: CounterId,
value: u32,
ranged_counter_idx: usize,
) -> Self {
debug_assert!(
counter_id as usize != ranged_counter_idx,
"with_scalar_counter called on ranged counter {counter_id}"
);
let mut new = self.with_state(new_state);
new.counters[counter_id as usize] = value;
new
}
fn counter(&self, counter_id: CounterId) -> u32 {
self.counters[counter_id as usize]
}
fn repacked(&self, old_ranged_idx: usize, new_ranged_idx: usize) -> (Self, u32) {
let scalar_val = self.counters[new_ranged_idx];
let mut new_counters = self.counters.clone();
new_counters[old_ranged_idx] = 0;
new_counters[new_ranged_idx] = 0; (
Self {
state_id: self.state_id,
counters: new_counters,
},
scalar_val,
)
}
}
fn ranged_single_epsilon_closure(
nfa: &NfaTable,
seeds: HashMap<StateId, CounterRange>,
counter_def: CounterDef,
) -> ActiveStates {
let mut map: HashMap<StateId, CounterRange> = HashMap::new();
let mut worklist: VecDeque<(StateId, CounterRange)> = seeds.into_iter().collect();
while let Some((state_id, range)) = worklist.pop_front() {
if let Some(&existing) = map.get(&state_id) {
if existing.subsumes(range) {
continue;
}
let merged = existing.union(range);
debug_assert!(
merged.lo <= merged.hi,
"contiguity invariant violated at state {state_id}"
);
map.insert(state_id, merged);
} else {
map.insert(state_id, range);
}
if let Some(state) = nfa.get_state(state_id) {
for trans in &state.transitions {
let next = match trans.kind {
TransitionKind::Epsilon => Some(range),
TransitionKind::CounterReset(_) => Some(CounterRange::single(0)),
TransitionKind::CounterIncrement(_) => {
Some(CounterRange::new(range.lo + 1, counter_def.max))
}
TransitionKind::CounterMaxGuard(_) => {
let clamped = range.intersect_below(counter_def.max);
if clamped.is_empty() {
None
} else {
Some(clamped)
}
}
TransitionKind::CounterMinGuard(_) => {
let passed = range.intersect_above(counter_def.min);
if passed.is_empty() {
None
} else {
Some(CounterRange::single(0))
}
}
TransitionKind::Consume => None,
};
if let Some(next_range) = next {
let dominated = map
.get(&trans.target)
.is_some_and(|r| r.subsumes(next_range));
if !dominated {
worklist.push_back((trans.target, next_range));
}
}
}
}
}
ActiveStates::RangedSingle {
state_ranges: map,
counter_def,
}
}
fn hybrid_epsilon_closure(
nfa: &NfaTable,
seeds: HashMap<HybridKey, CounterRange>,
ranged_counter_idx: usize,
num_counters: usize,
) -> ActiveStates {
let ranged_id = ranged_counter_idx as CounterId;
let ranged_def = nfa.counter_defs[ranged_counter_idx];
let mut map: HashMap<HybridKey, CounterRange> = HashMap::new();
let mut worklist: VecDeque<(HybridKey, CounterRange)> = seeds.into_iter().collect();
while let Some((key, range)) = worklist.pop_front() {
if let Some(&existing) = map.get(&key) {
if existing.subsumes(range) {
continue;
}
let merged = existing.union(range);
debug_assert!(
merged.lo <= merged.hi,
"contiguity invariant violated at state {}",
key.state_id
);
map.insert(key.clone(), merged);
} else {
map.insert(key.clone(), range);
}
if let Some(state) = nfa.get_state(key.state_id) {
for trans in &state.transitions {
let next: Option<(HybridKey, CounterRange)> = match trans.kind {
TransitionKind::Epsilon => Some((key.with_state(trans.target), range)),
TransitionKind::CounterReset(c) if c == ranged_id => {
Some((key.with_state(trans.target), CounterRange::single(0)))
}
TransitionKind::CounterIncrement(c) if c == ranged_id => {
Some((
key.with_state(trans.target),
CounterRange::new(range.lo + 1, ranged_def.max),
))
}
TransitionKind::CounterMaxGuard(c) if c == ranged_id => {
let clamped = range.intersect_below(ranged_def.max);
if clamped.is_empty() {
None
} else {
Some((key.with_state(trans.target), clamped))
}
}
TransitionKind::CounterMinGuard(c) if c == ranged_id => {
let passed = range.intersect_above(ranged_def.min);
if passed.is_empty() {
None
} else {
Some((key.with_state(trans.target), CounterRange::single(0)))
}
}
TransitionKind::CounterReset(c) => Some((
key.with_scalar_counter(trans.target, c, 0, ranged_counter_idx),
range,
)),
TransitionKind::CounterIncrement(c) => {
let val = key.counter(c) + 1;
Some((
key.with_scalar_counter(trans.target, c, val, ranged_counter_idx),
range,
))
}
TransitionKind::CounterMaxGuard(c) => {
if key.counter(c) < nfa.counter_defs[c as usize].max {
Some((key.with_state(trans.target), range))
} else {
None
}
}
TransitionKind::CounterMinGuard(c) => {
if key.counter(c) >= nfa.counter_defs[c as usize].min {
Some((
key.with_scalar_counter(trans.target, c, 0, ranged_counter_idx),
range,
))
} else {
None
}
}
TransitionKind::Consume => None,
};
if let Some((next_key, next_range)) = next {
let dominated = map.get(&next_key).is_some_and(|r| r.subsumes(next_range));
if !dominated {
worklist.push_back((next_key, next_range));
}
}
}
}
}
let result = ActiveStates::Hybrid {
configs: map,
ranged_counter_idx,
num_counters,
};
result.maybe_switch_ranged_counter(nfa)
}
#[derive(Debug, Clone)]
pub enum ActiveStates {
Simple(HashSet<StateId>),
Counted {
configs: HashSet<ActiveConfig>,
num_counters: usize,
},
RangedSingle {
state_ranges: HashMap<StateId, CounterRange>,
counter_def: CounterDef,
},
Hybrid {
configs: HashMap<HybridKey, CounterRange>,
ranged_counter_idx: usize,
num_counters: usize,
},
}
impl ActiveStates {
pub fn from_nfa(nfa: &NfaTable) -> Self {
use super::particle::COUNTED_THRESHOLD;
if nfa.has_counters() {
if nfa.counter_defs.len() == 1 && nfa.counter_defs[0].body_nullable {
let mut state_ranges = HashMap::new();
state_ranges.insert(nfa.start_state, CounterRange::single(0));
let result = ActiveStates::RangedSingle {
state_ranges,
counter_def: nfa.counter_defs[0],
};
return result.epsilon_closure(nfa);
}
if nfa.counter_defs.len() > 1 {
let best_nullable = nfa
.counter_defs
.iter()
.enumerate()
.filter(|(_, def)| def.body_nullable)
.max_by_key(|(idx, def)| (def.max, std::cmp::Reverse(*idx)));
if let Some((ranged_idx, def)) = best_nullable {
if def.max > COUNTED_THRESHOLD {
let num_counters = nfa.counter_defs.len();
let initial_key = HybridKey::initial(nfa.start_state, num_counters);
let mut configs = HashMap::new();
configs.insert(initial_key, CounterRange::single(0));
let result = ActiveStates::Hybrid {
configs,
ranged_counter_idx: ranged_idx,
num_counters,
};
return result.epsilon_closure(nfa);
}
}
}
let initial = ActiveConfig::initial(nfa.start_state, nfa.counter_defs.len());
let mut configs = HashSet::new();
configs.insert(initial);
let result = ActiveStates::Counted {
configs,
num_counters: nfa.counter_defs.len(),
};
result.epsilon_closure(nfa)
} else {
let simple = epsilon_closure(nfa, std::iter::once(nfa.start_state));
ActiveStates::Simple(simple)
}
}
pub fn is_empty(&self) -> bool {
match self {
ActiveStates::Simple(s) => s.is_empty(),
ActiveStates::Counted { configs, .. } => configs.is_empty(),
ActiveStates::RangedSingle { state_ranges, .. } => state_ranges.is_empty(),
ActiveStates::Hybrid { configs, .. } => configs.is_empty(),
}
}
pub fn contains_accept(&self, nfa: &NfaTable) -> bool {
match self {
ActiveStates::Simple(s) => s.iter().any(|&id| nfa.is_accept(id)),
ActiveStates::Counted { configs, .. } => {
configs.iter().any(|c| nfa.is_accept(c.state_id))
}
ActiveStates::RangedSingle { state_ranges, .. } => {
state_ranges.contains_key(&nfa.accept_state)
}
ActiveStates::Hybrid { configs, .. } => {
configs.keys().any(|k| nfa.is_accept(k.state_id))
}
}
}
pub fn epsilon_closure(self, nfa: &NfaTable) -> Self {
match self {
ActiveStates::Simple(states) => ActiveStates::Simple(epsilon_closure(nfa, states)),
ActiveStates::Counted {
configs,
num_counters,
} => {
let mut result: HashSet<ActiveConfig> = HashSet::new();
let mut stack: Vec<ActiveConfig> = configs.into_iter().collect();
while let Some(config) = stack.pop() {
if !result.insert(config.clone()) {
continue;
}
if let Some(state) = nfa.get_state(config.state_id) {
for trans in &state.transitions {
let next = match trans.kind {
TransitionKind::Epsilon => Some(config.with_state(trans.target)),
TransitionKind::CounterReset(c) => {
Some(config.with_counter_set(trans.target, c, 0))
}
TransitionKind::CounterIncrement(c) => {
let val = config.counters[c as usize] + 1;
Some(config.with_counter_set(trans.target, c, val))
}
TransitionKind::CounterMaxGuard(c) => {
if config.counters[c as usize]
< nfa.counter_defs[c as usize].max
{
Some(config.with_state(trans.target))
} else {
None
}
}
TransitionKind::CounterMinGuard(c) => {
if config.counters[c as usize]
>= nfa.counter_defs[c as usize].min
{
Some(config.with_counter_set(trans.target, c, 0))
} else {
None
}
}
TransitionKind::Consume => None,
};
if let Some(next_config) = next {
if !result.contains(&next_config) {
stack.push(next_config);
}
}
}
}
}
ActiveStates::Counted {
configs: result,
num_counters,
}
}
ActiveStates::RangedSingle {
state_ranges,
counter_def,
} => ranged_single_epsilon_closure(nfa, state_ranges, counter_def),
ActiveStates::Hybrid {
configs,
ranged_counter_idx,
num_counters,
} => hybrid_epsilon_closure(nfa, configs, ranged_counter_idx, num_counters),
}
}
fn maybe_switch_ranged_counter(self, nfa: &NfaTable) -> Self {
use super::particle::COUNTED_THRESHOLD;
let ActiveStates::Hybrid {
configs,
ranged_counter_idx,
num_counters,
} = self
else {
return self;
};
let dead = CounterRange::single(0);
if configs.is_empty() || !configs.values().all(|r| *r == dead) {
return ActiveStates::Hybrid {
configs,
ranged_counter_idx,
num_counters,
};
}
let nc = nfa.counter_defs.len();
let mut min_vals = vec![u32::MAX; nc];
let mut max_vals = vec![0u32; nc];
for key in configs.keys() {
for idx in 0..nc {
let v = key.counter(idx as CounterId);
min_vals[idx] = min_vals[idx].min(v);
max_vals[idx] = max_vals[idx].max(v);
}
}
let mut best: Option<(usize, u32, u32)> = None; for (idx, def) in nfa.counter_defs.iter().enumerate() {
if idx == ranged_counter_idx || !def.body_nullable || def.max <= COUNTED_THRESHOLD {
continue;
}
if min_vals[idx] == max_vals[idx] {
continue;
}
let spread = max_vals[idx] - min_vals[idx] + 1;
let dominated =
best.is_some_and(|(_, bs, bm)| spread < bs || (spread == bs && def.max <= bm));
if !dominated {
best = Some((idx, spread, def.max));
}
}
let Some((new_ranged_idx, _, _)) = best else {
return ActiveStates::Hybrid {
configs,
ranged_counter_idx,
num_counters,
};
};
let mut new_configs: HashMap<HybridKey, (CounterRange, u32)> =
HashMap::with_capacity(configs.len());
for old_key in configs.keys() {
let (new_key, scalar_val) = old_key.repacked(ranged_counter_idx, new_ranged_idx);
let new_range = CounterRange::single(scalar_val);
new_configs
.entry(new_key)
.and_modify(|(r, count)| {
*r = r.union(new_range);
*count += 1;
})
.or_insert((new_range, 1));
}
for (range, count) in new_configs.values() {
if range.hi - range.lo + 1 != *count {
return ActiveStates::Hybrid {
configs,
ranged_counter_idx,
num_counters,
};
}
}
let repacked = new_configs.into_iter().map(|(k, (r, _))| (k, r)).collect();
ActiveStates::Hybrid {
configs: repacked,
ranged_counter_idx: new_ranged_idx,
num_counters,
}
}
pub fn advance(
self,
nfa: &NfaTable,
element_name: NameId,
element_namespace: Option<NameId>,
target_namespace: Option<NameId>,
substitution_groups: Option<&SubstitutionGroupMap>,
xsd_version: XsdVersion,
) -> Self {
match self {
ActiveStates::Simple(states) => ActiveStates::Simple(advance_states(
nfa,
states,
element_name,
element_namespace,
target_namespace,
substitution_groups,
xsd_version,
)),
ActiveStates::Counted {
configs,
num_counters,
} => {
let mut next_configs = HashSet::new();
for config in &configs {
if let Some(state) = nfa.get_state(config.state_id) {
if let Some(ref term_val) = state.term {
if term_matches(
term_val,
element_name,
element_namespace,
target_namespace,
substitution_groups,
xsd_version,
) {
for trans in &state.transitions {
if trans.kind == TransitionKind::Consume {
next_configs.insert(config.with_state(trans.target));
}
}
}
}
}
}
let result = ActiveStates::Counted {
configs: next_configs,
num_counters,
};
result.epsilon_closure(nfa)
}
ActiveStates::RangedSingle {
state_ranges,
counter_def,
} => {
let mut next_seeds: HashMap<StateId, CounterRange> = HashMap::new();
for (&state_id, &range) in &state_ranges {
if let Some(state) = nfa.get_state(state_id) {
if let Some(ref term_val) = state.term {
if term_matches(
term_val,
element_name,
element_namespace,
target_namespace,
substitution_groups,
xsd_version,
) {
for trans in &state.transitions {
if trans.kind == TransitionKind::Consume {
next_seeds
.entry(trans.target)
.and_modify(|r| *r = r.union(range))
.or_insert(range);
}
}
}
}
}
}
let result = ActiveStates::RangedSingle {
state_ranges: next_seeds,
counter_def,
};
result.epsilon_closure(nfa)
}
ActiveStates::Hybrid {
configs,
ranged_counter_idx,
num_counters,
} => {
let mut next_configs: HashMap<HybridKey, CounterRange> = HashMap::new();
for (key, &range) in &configs {
if let Some(state) = nfa.get_state(key.state_id) {
if let Some(ref term_val) = state.term {
if term_matches(
term_val,
element_name,
element_namespace,
target_namespace,
substitution_groups,
xsd_version,
) {
for trans in &state.transitions {
if trans.kind == TransitionKind::Consume {
let new_key = key.with_state(trans.target);
next_configs
.entry(new_key)
.and_modify(|r| *r = r.union(range))
.or_insert(range);
}
}
}
}
}
}
let result = ActiveStates::Hybrid {
configs: next_configs,
ranged_counter_idx,
num_counters,
};
result.epsilon_closure(nfa)
}
}
}
pub fn advance_with_priority(
self,
nfa: &NfaTable,
element_name: NameId,
element_namespace: Option<NameId>,
target_namespace: Option<NameId>,
substitution_groups: Option<&SubstitutionGroupMap>,
xsd_version: XsdVersion,
) -> Self {
match self {
ActiveStates::Simple(states) => ActiveStates::Simple(advance_with_priority(
nfa,
states,
element_name,
element_namespace,
target_namespace,
substitution_groups,
xsd_version,
)),
ActiveStates::Counted {
configs,
num_counters,
} => {
let mut element_configs = HashSet::new();
let mut wildcard_configs = HashSet::new();
for config in &configs {
if let Some(state) = nfa.get_state(config.state_id) {
if let Some(ref term_val) = state.term {
if term_matches(
term_val,
element_name,
element_namespace,
target_namespace,
substitution_groups,
xsd_version,
) {
let target_set = match term_val {
NfaTerm::Element { .. } => &mut element_configs,
NfaTerm::Wildcard { .. } => &mut wildcard_configs,
};
for trans in &state.transitions {
if trans.kind == TransitionKind::Consume {
target_set.insert(config.with_state(trans.target));
}
}
}
}
}
}
let next = if !element_configs.is_empty() {
element_configs
} else {
wildcard_configs
};
let result = ActiveStates::Counted {
configs: next,
num_counters,
};
result.epsilon_closure(nfa)
}
ActiveStates::RangedSingle {
state_ranges,
counter_def,
} => {
let mut element_seeds: HashMap<StateId, CounterRange> = HashMap::new();
let mut wildcard_seeds: HashMap<StateId, CounterRange> = HashMap::new();
for (&state_id, &range) in &state_ranges {
if let Some(state) = nfa.get_state(state_id) {
if let Some(ref term_val) = state.term {
if term_matches(
term_val,
element_name,
element_namespace,
target_namespace,
substitution_groups,
xsd_version,
) {
let target_map = match term_val {
NfaTerm::Element { .. } => &mut element_seeds,
NfaTerm::Wildcard { .. } => &mut wildcard_seeds,
};
for trans in &state.transitions {
if trans.kind == TransitionKind::Consume {
target_map
.entry(trans.target)
.and_modify(|r| *r = r.union(range))
.or_insert(range);
}
}
}
}
}
}
let next = if !element_seeds.is_empty() {
element_seeds
} else {
wildcard_seeds
};
let result = ActiveStates::RangedSingle {
state_ranges: next,
counter_def,
};
result.epsilon_closure(nfa)
}
ActiveStates::Hybrid {
configs,
ranged_counter_idx,
num_counters,
} => {
let mut element_configs: HashMap<HybridKey, CounterRange> = HashMap::new();
let mut wildcard_configs: HashMap<HybridKey, CounterRange> = HashMap::new();
for (key, &range) in &configs {
if let Some(state) = nfa.get_state(key.state_id) {
if let Some(ref term_val) = state.term {
if term_matches(
term_val,
element_name,
element_namespace,
target_namespace,
substitution_groups,
xsd_version,
) {
let target_map = match term_val {
NfaTerm::Element { .. } => &mut element_configs,
NfaTerm::Wildcard { .. } => &mut wildcard_configs,
};
for trans in &state.transitions {
if trans.kind == TransitionKind::Consume {
let new_key = key.with_state(trans.target);
target_map
.entry(new_key)
.and_modify(|r| *r = r.union(range))
.or_insert(range);
}
}
}
}
}
}
let next = if !element_configs.is_empty() {
element_configs
} else {
wildcard_configs
};
let result = ActiveStates::Hybrid {
configs: next,
ranged_counter_idx,
num_counters,
};
result.epsilon_closure(nfa)
}
}
}
pub fn find_match_info(
&self,
nfa: &NfaTable,
name: NameId,
namespace: Option<NameId>,
target_ns: Option<NameId>,
subst_groups: Option<&SubstitutionGroupMap>,
xsd_version: XsdVersion,
) -> MatchInfo {
match self {
ActiveStates::Simple(states) => {
let closure = epsilon_closure(nfa, states.iter().copied());
find_match_info_in_states(
nfa,
closure.iter().copied(),
name,
namespace,
target_ns,
subst_groups,
xsd_version,
)
}
ActiveStates::Counted { configs, .. } => {
find_match_info_in_states(
nfa,
configs.iter().map(|c| c.state_id),
name,
namespace,
target_ns,
subst_groups,
xsd_version,
)
}
ActiveStates::RangedSingle { state_ranges, .. } => find_match_info_in_states(
nfa,
state_ranges.keys().copied(),
name,
namespace,
target_ns,
subst_groups,
xsd_version,
),
ActiveStates::Hybrid { configs, .. } => find_match_info_in_states(
nfa,
configs.keys().map(|k| k.state_id),
name,
namespace,
target_ns,
subst_groups,
xsd_version,
),
}
}
pub fn expected_element_terms(
&self,
nfa: &NfaTable,
) -> Vec<(NameId, Option<NameId>, Option<ElementKey>)> {
let mut result = Vec::new();
match self {
ActiveStates::Simple(states) => {
let closure = epsilon_closure(nfa, states.iter().copied());
for state_id in closure {
if let Some(state) = nfa.get_state(state_id) {
if let Some(NfaTerm::Element {
name,
namespace,
element_key,
..
}) = &state.term
{
result.push((*name, *namespace, *element_key));
}
}
}
}
ActiveStates::Counted { configs, .. } => {
let mut seen = HashSet::new();
for config in configs {
if !seen.insert(config.state_id) {
continue; }
if let Some(state) = nfa.get_state(config.state_id) {
if let Some(NfaTerm::Element {
name,
namespace,
element_key,
..
}) = &state.term
{
result.push((*name, *namespace, *element_key));
}
}
}
}
ActiveStates::RangedSingle { state_ranges, .. } => {
for &state_id in state_ranges.keys() {
if let Some(state) = nfa.get_state(state_id) {
if let Some(NfaTerm::Element {
name,
namespace,
element_key,
..
}) = &state.term
{
result.push((*name, *namespace, *element_key));
}
}
}
}
ActiveStates::Hybrid { configs, .. } => {
let mut seen = HashSet::new();
for key in configs.keys() {
if !seen.insert(key.state_id) {
continue;
}
if let Some(state) = nfa.get_state(key.state_id) {
if let Some(NfaTerm::Element {
name,
namespace,
element_key,
..
}) = &state.term
{
result.push((*name, *namespace, *element_key));
}
}
}
}
}
result
}
}
#[derive(Debug, Clone, Copy, Default)]
pub struct MatchInfo {
pub element_key: Option<ElementKey>,
pub resolved_type: Option<TypeKey>,
pub process_contents: Option<ProcessContents>,
}
fn find_match_info_in_states(
nfa: &NfaTable,
state_ids: impl Iterator<Item = StateId>,
name: NameId,
namespace: Option<NameId>,
target_ns: Option<NameId>,
subst_groups: Option<&SubstitutionGroupMap>,
xsd_version: XsdVersion,
) -> MatchInfo {
let mut wildcard_info: Option<MatchInfo> = None;
for state_id in state_ids {
if let Some(state) = nfa.get_state(state_id) {
if let Some(ref term) = state.term {
if term_matches(term, name, namespace, target_ns, subst_groups, xsd_version) {
match term {
NfaTerm::Element {
name: term_name,
namespace: term_ns,
element_key,
resolved_type,
} => {
return if *term_name == name && *term_ns == namespace {
MatchInfo {
element_key: *element_key,
resolved_type: *resolved_type,
process_contents: None,
}
} else {
MatchInfo {
element_key: None,
resolved_type: None,
process_contents: None,
}
};
}
NfaTerm::Wildcard {
process_contents, ..
} => {
if wildcard_info.is_none() {
wildcard_info = Some(MatchInfo {
element_key: None,
resolved_type: None,
process_contents: Some(*process_contents),
});
}
}
}
}
}
}
}
wildcard_info.unwrap_or_default()
}
#[cfg(test)]
mod tests {
use super::*;
use std::collections::HashSet;
use crate::compiler::build_substitution_group_map;
use crate::schema::model::{DerivationSet, SchemaSet};
fn element_data(name: NameId) -> crate::arenas::ElementDeclData {
crate::arenas::ElementDeclData {
name: Some(name),
target_namespace: None,
ref_name: None,
type_ref: None,
inline_type: None,
substitution_group: Vec::new(),
default_value: None,
fixed_value: None,
nillable: false,
is_abstract: false,
min_occurs: 1,
max_occurs: Some(1),
block: DerivationSet::empty(),
final_derivation: DerivationSet::empty(),
form: None,
id: None,
alternatives: Vec::new(),
identity_constraints: Vec::new(),
pending_ic_refs: vec![],
annotation: None,
source: None,
resolved_type: None,
resolved_ref: None,
resolved_substitution_groups: Vec::new(),
deferred_type_error: None,
}
}
fn make_set(ids: &[StateId]) -> HashSet<StateId> {
ids.iter().copied().collect()
}
#[test]
fn test_nfa_state_creation() {
let state = NfaState::epsilon(0, None);
assert_eq!(state.id, 0);
assert!(state.is_epsilon());
assert!(state.transitions.is_empty());
}
#[test]
fn test_nfa_state_with_term() {
let term = NfaTerm::element(NameId(1), None, None);
let state = NfaState::with_term(1, term, None);
assert_eq!(state.id, 1);
assert!(!state.is_epsilon());
}
#[test]
fn test_nfa_state_transitions() {
let mut state = NfaState::epsilon(0, None);
state.add_epsilon(1);
state.add_consume(2);
let epsilons: Vec<_> = state.epsilon_transitions().collect();
assert_eq!(epsilons, vec![1]);
let consuming: Vec<_> = state.consuming_transitions().collect();
assert_eq!(consuming, vec![2]);
}
#[test]
fn test_nfa_table() {
let states = vec![
NfaState::epsilon(0, None),
NfaState::with_term(1, NfaTerm::element(NameId(1), None, None), None),
NfaState::epsilon(2, None),
];
let table = NfaTable::new(states, 0, 2);
assert_eq!(table.state_count(), 3);
assert_eq!(table.start_state, 0);
assert_eq!(table.accept_state, 2);
assert!(table.is_accept(2));
assert!(!table.is_accept(0));
}
#[test]
fn test_nfa_term() {
let elem = NfaTerm::element(NameId(1), Some(NameId(2)), None);
assert!(elem.is_element());
assert!(!elem.is_wildcard());
let wild = NfaTerm::wildcard(NamespaceConstraint::Any, ProcessContents::Lax);
assert!(!wild.is_element());
assert!(wild.is_wildcard());
}
#[test]
fn test_transition_kinds() {
let eps = NfaTransition::epsilon(1);
assert_eq!(eps.kind, TransitionKind::Epsilon);
let cons = NfaTransition::consume(2);
assert_eq!(cons.kind, TransitionKind::Consume);
}
#[test]
fn test_epsilon_closure_basic() {
let mut s0 = NfaState::epsilon(0, None);
s0.add_epsilon(1);
let mut s1 = NfaState::epsilon(1, None);
s1.add_epsilon(2);
let s2 = NfaState::with_term(2, NfaTerm::element(NameId(1), None, None), None);
let nfa = NfaTable::new(vec![s0, s1, s2], 0, 2);
let closure = epsilon_closure(&nfa, [0]);
assert_eq!(closure, make_set(&[0, 1, 2]));
}
fn make_priority_nfa() -> NfaTable {
let mut start = NfaState::epsilon(0, None);
start.add_epsilon(1);
start.add_epsilon(2);
let mut elem_state = NfaState::with_term(1, NfaTerm::element(NameId(1), None, None), None);
let mut wild_state = NfaState::with_term(
2,
NfaTerm::wildcard(NamespaceConstraint::Any, ProcessContents::Lax),
None,
);
elem_state.add_consume(3);
wild_state.add_consume(4);
let exit_elem = NfaState::epsilon(3, None);
let exit_wild = NfaState::epsilon(4, None);
NfaTable::new(
vec![start, elem_state, wild_state, exit_elem, exit_wild],
0,
3,
)
}
#[test]
fn test_advance_states_matches_element_and_wildcard() {
let nfa = make_priority_nfa();
let next = advance_states(&nfa, [0], NameId(1), None, None, None, XsdVersion::V1_0);
assert_eq!(next, make_set(&[3, 4]));
let next = advance_states(&nfa, [0], NameId(2), None, None, None, XsdVersion::V1_0);
assert_eq!(next, make_set(&[4]));
}
#[test]
fn test_advance_with_priority_prefers_element() {
let nfa = make_priority_nfa();
let next = advance_with_priority(&nfa, [0], NameId(1), None, None, None, XsdVersion::V1_1);
assert_eq!(next, make_set(&[3]));
let next = advance_with_priority(&nfa, [0], NameId(2), None, None, None, XsdVersion::V1_1);
assert_eq!(next, make_set(&[4]));
}
#[test]
fn test_find_match_info_prefers_element_over_wildcard() {
let nfa = make_priority_nfa();
let active = ActiveStates::Simple(epsilon_closure(&nfa, [0]));
let mi = active.find_match_info(&nfa, NameId(1), None, None, None, XsdVersion::V1_1);
assert!(
mi.process_contents.is_none(),
"element match should not have process_contents, got {:?}",
mi.process_contents,
);
let mi2 = active.find_match_info(&nfa, NameId(2), None, None, None, XsdVersion::V1_1);
assert!(
mi2.process_contents.is_some(),
"wildcard-only match should have process_contents",
);
}
#[test]
fn test_term_matches_substitution_groups() {
let mut schema_set = SchemaSet::new();
let head_name = schema_set.name_table.add("head");
let member_name = schema_set.name_table.add("member");
let head_key = schema_set.arenas.alloc_element(element_data(head_name));
let member_key = schema_set.arenas.alloc_element(element_data(member_name));
schema_set
.arenas
.elements
.get_mut(member_key)
.unwrap()
.resolved_substitution_groups
.push(head_key);
let map = build_substitution_group_map(&schema_set);
let term = NfaTerm::element(head_name, None, Some(head_key));
assert!(term_matches(
&term,
member_name,
None,
None,
Some(&map),
XsdVersion::V1_0,
));
}
#[test]
fn test_find_match_info_substitution_returns_none_for_head_key() {
let mut schema_set = SchemaSet::new();
let head_name = schema_set.name_table.add("head");
let member_name = schema_set.name_table.add("member");
let mut head_data = element_data(head_name);
head_data.is_abstract = true;
let head_key = schema_set.arenas.alloc_element(head_data);
let member_key = schema_set.arenas.alloc_element(element_data(member_name));
schema_set
.arenas
.elements
.get_mut(member_key)
.unwrap()
.resolved_substitution_groups
.push(head_key);
let map = build_substitution_group_map(&schema_set);
let builder = crate::compiler::fragment::FragmentBuilder::new();
let frag = builder.single_term(NfaTerm::element(head_name, None, Some(head_key)), None);
let nfa = crate::compiler::fragment::fragment_to_table(frag);
let active = ActiveStates::from_nfa(&nfa);
let mi =
active.find_match_info(&nfa, member_name, None, None, Some(&map), XsdVersion::V1_0);
assert!(
mi.element_key.is_none(),
"substitution match should not return head's element_key"
);
assert!(mi.resolved_type.is_none());
let mi_head =
active.find_match_info(&nfa, head_name, None, None, Some(&map), XsdVersion::V1_0);
assert!(
mi_head.element_key.is_none(),
"abstract head should not match its own name via subst map"
);
}
#[test]
fn test_find_match_info_direct_match_returns_element_key() {
let mut schema_set = SchemaSet::new();
let head_name = schema_set.name_table.add("head");
let member_name = schema_set.name_table.add("member");
let head_key = schema_set.arenas.alloc_element(element_data(head_name));
let member_key = schema_set.arenas.alloc_element(element_data(member_name));
schema_set
.arenas
.elements
.get_mut(member_key)
.unwrap()
.resolved_substitution_groups
.push(head_key);
let map = build_substitution_group_map(&schema_set);
let builder = crate::compiler::fragment::FragmentBuilder::new();
let frag = builder.single_term(NfaTerm::element(head_name, None, Some(head_key)), None);
let nfa = crate::compiler::fragment::fragment_to_table(frag);
let active = ActiveStates::from_nfa(&nfa);
let mi = active.find_match_info(&nfa, head_name, None, None, Some(&map), XsdVersion::V1_0);
assert_eq!(
mi.element_key,
Some(head_key),
"direct match should return the term's element_key"
);
let mi_member =
active.find_match_info(&nfa, member_name, None, None, Some(&map), XsdVersion::V1_0);
assert!(
mi_member.element_key.is_none(),
"substitution match should not return head's element_key"
);
}
use crate::compiler::fragment::{fragment_to_table, FragmentBuilder};
fn make_counted_element_nfa(min: u32, max: u32) -> (NfaTable, NameId) {
let name = NameId(100);
let builder = FragmentBuilder::new();
let frag = builder.single_term(NfaTerm::element(name, None, None), None);
let counted = frag.repeat_counted(min, max);
let nfa = fragment_to_table(counted);
(nfa, name)
}
fn advance_n(active: ActiveStates, nfa: &NfaTable, name: NameId, n: usize) -> ActiveStates {
let mut state = active;
for _ in 0..n {
state = state.advance(nfa, name, None, None, None, XsdVersion::V1_0);
}
state
}
#[test]
fn test_counted_nfa_has_counters() {
let (nfa, _) = make_counted_element_nfa(2, 5);
assert!(nfa.has_counters());
assert_eq!(nfa.counter_defs.len(), 1);
assert_eq!(nfa.counter_defs[0].min, 2);
assert_eq!(nfa.counter_defs[0].max, 5);
assert_eq!(nfa.state_count(), 5);
}
#[test]
fn test_counted_nfa_compact_state_count() {
let (nfa, _) = make_counted_element_nfa(0, 1000);
assert_eq!(nfa.state_count(), 5); }
#[test]
fn test_counted_active_states_is_counted_variant() {
let (nfa, _) = make_counted_element_nfa(2, 5);
let active = ActiveStates::from_nfa(&nfa);
assert!(matches!(active, ActiveStates::Counted { .. }));
}
#[test]
fn test_counted_element_a_3_5() {
let (nfa, a) = make_counted_element_nfa(3, 5);
let s2 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 2);
assert!(!s2.is_empty());
assert!(!s2.contains_accept(&nfa));
let s3 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 3);
assert!(s3.contains_accept(&nfa));
let s4 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 4);
assert!(s4.contains_accept(&nfa));
let s5 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 5);
assert!(s5.contains_accept(&nfa));
let s6 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 6);
assert!(s6.is_empty());
}
#[test]
fn test_counted_element_a_0_100() {
let (nfa, a) = make_counted_element_nfa(0, 100);
let s0 = ActiveStates::from_nfa(&nfa);
assert!(s0.contains_accept(&nfa));
let s100 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 100);
assert!(s100.contains_accept(&nfa));
let s101 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 101);
assert!(s101.is_empty());
}
#[test]
fn test_counted_element_exact_17() {
let (nfa, a) = make_counted_element_nfa(17, 17);
let s16 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 16);
assert!(!s16.contains_accept(&nfa));
let s17 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 17);
assert!(s17.contains_accept(&nfa));
let s18 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 18);
assert!(s18.is_empty());
}
#[test]
fn test_counted_sequence_a_b() {
let a = NameId(100);
let b = NameId(200);
let builder = FragmentBuilder::new();
let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
let counted_a = frag_a.repeat_counted(2, 50);
let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
let seq = counted_a.concat(frag_b);
let nfa = fragment_to_table(seq);
assert!(nfa.has_counters());
let s = ActiveStates::from_nfa(&nfa);
let s = s.advance(&nfa, a, None, None, None, XsdVersion::V1_0); let s = s.advance(&nfa, b, None, None, None, XsdVersion::V1_0); assert!(!s.contains_accept(&nfa));
let s = ActiveStates::from_nfa(&nfa);
let s = advance_n(s, &nfa, a, 2);
let s = s.advance(&nfa, b, None, None, None, XsdVersion::V1_0);
assert!(s.contains_accept(&nfa));
let s = ActiveStates::from_nfa(&nfa);
let s = advance_n(s, &nfa, a, 50);
let s = s.advance(&nfa, b, None, None, None, XsdVersion::V1_0);
assert!(s.contains_accept(&nfa));
let s = ActiveStates::from_nfa(&nfa);
let s = advance_n(s, &nfa, a, 51);
assert!(s.is_empty());
}
#[test]
fn test_dead_counter_collapse() {
let a = NameId(100);
let b = NameId(200);
let builder = FragmentBuilder::new();
let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
let opt_a = frag_a.optional();
let counted = opt_a.repeat_counted(0, 200);
let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
let seq = counted.concat(frag_b);
let nfa = fragment_to_table(seq);
let initial = ActiveStates::from_nfa(&nfa);
assert!(
matches!(&initial, ActiveStates::RangedSingle { .. }),
"Expected RangedSingle for nullable body counted NFA"
);
let after_b = initial
.clone()
.advance(&nfa, b, None, None, None, XsdVersion::V1_0);
assert!(after_b.contains_accept(&nfa));
if let ActiveStates::RangedSingle { state_ranges, .. } = &after_b {
assert!(
state_ranges.len() <= 5,
"Expected O(1) ranged entries after loop exit, got {}",
state_ranges.len()
);
}
}
#[test]
fn test_counted_exact_prefix_plus_star() {
use crate::compiler::particle::{apply_occurs, MaxOccurs};
let a = NameId(100);
let builder = FragmentBuilder::new();
let frag = builder.single_term(NfaTerm::element(a, None, None), None);
let result = apply_occurs(frag, 17, MaxOccurs::Unbounded);
let nfa = fragment_to_table(result);
assert!(nfa.has_counters());
assert!(
nfa.state_count() < 15,
"expected compact NFA, got {} states",
nfa.state_count()
);
let s16 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 16);
assert!(!s16.contains_accept(&nfa));
let s17 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 17);
assert!(s17.contains_accept(&nfa));
let s100 = advance_n(ActiveStates::from_nfa(&nfa), &nfa, a, 100);
assert!(s100.contains_accept(&nfa));
}
#[test]
fn test_is_epsilon_like() {
assert!(TransitionKind::Epsilon.is_epsilon_like());
assert!(!TransitionKind::Consume.is_epsilon_like());
assert!(TransitionKind::CounterReset(0).is_epsilon_like());
assert!(TransitionKind::CounterIncrement(0).is_epsilon_like());
assert!(TransitionKind::CounterMaxGuard(0).is_epsilon_like());
assert!(TransitionKind::CounterMinGuard(0).is_epsilon_like());
}
#[test]
fn test_offset_counter() {
assert_eq!(
TransitionKind::CounterReset(0).offset_counter(3),
TransitionKind::CounterReset(3)
);
assert_eq!(
TransitionKind::CounterIncrement(2).offset_counter(5),
TransitionKind::CounterIncrement(7)
);
assert_eq!(
TransitionKind::Epsilon.offset_counter(10),
TransitionKind::Epsilon
);
assert_eq!(
TransitionKind::Consume.offset_counter(10),
TransitionKind::Consume
);
}
#[test]
fn test_counter_range_operations() {
let r = CounterRange::single(5);
assert_eq!(r.lo, 5);
assert_eq!(r.hi, 5);
assert!(!r.is_empty());
assert!(r.subsumes(r));
let r2 = CounterRange::new(3, 7);
assert!(r2.subsumes(r));
assert!(!r.subsumes(r2));
let u = CounterRange::single(2).union(CounterRange::new(3, 5));
assert_eq!(u, CounterRange::new(2, 5));
assert_eq!(
CounterRange::new(1, 5).intersect_below(4),
CounterRange::new(1, 3)
);
assert!(CounterRange::new(5, 8).intersect_below(5).is_empty());
assert!(CounterRange::new(0, 10).intersect_below(0).is_empty());
assert_eq!(
CounterRange::new(1, 5).intersect_above(3),
CounterRange::new(3, 5)
);
assert!(CounterRange::new(1, 3).intersect_above(5).is_empty());
}
#[test]
fn test_ranged_closure_convergence() {
let a = NameId(100);
let builder = FragmentBuilder::new();
let frag = builder.single_term(NfaTerm::element(a, None, None), None);
let opt = frag.optional();
let counted = opt.repeat_counted(0, 10_000);
let nfa = fragment_to_table(counted);
assert!(nfa.has_counters());
assert!(nfa.counter_defs[0].body_nullable);
let initial = ActiveStates::from_nfa(&nfa);
assert!(matches!(&initial, ActiveStates::RangedSingle { .. }));
if let ActiveStates::RangedSingle { state_ranges, .. } = &initial {
assert!(
state_ranges.len() <= 10,
"Expected O(states) entries, got {}",
state_ranges.len()
);
}
assert!(initial.contains_accept(&nfa));
let s5 = advance_n(initial, &nfa, a, 5);
assert!(s5.contains_accept(&nfa));
assert!(matches!(&s5, ActiveStates::RangedSingle { .. }));
}
#[test]
fn test_nullable_body_accepts_range() {
let a = NameId(100);
let builder = FragmentBuilder::new();
let frag = builder.single_term(NfaTerm::element(a, None, None), None);
let opt = frag.optional();
let counted = opt.repeat_counted(3, 5);
let nfa = fragment_to_table(counted);
assert!(nfa.counter_defs[0].body_nullable);
let initial = ActiveStates::from_nfa(&nfa);
assert!(matches!(&initial, ActiveStates::RangedSingle { .. }));
assert!(initial.contains_accept(&nfa));
for n in 1..=5 {
let s = advance_n(initial.clone(), &nfa, a, n);
assert!(s.contains_accept(&nfa), "Expected accepting after {n} a's");
}
let s6 = advance_n(initial, &nfa, a, 6);
assert!(s6.is_empty(), "Expected rejection after 6 a's");
}
#[test]
fn test_choice_body_nullable() {
let a = NameId(100);
let b = NameId(200);
let builder = FragmentBuilder::new();
let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
let choice = frag_a.alternate(frag_b.optional());
let counted = choice.repeat_counted(0, 1000);
let nfa = fragment_to_table(counted);
assert!(nfa.counter_defs[0].body_nullable);
let initial = ActiveStates::from_nfa(&nfa);
assert!(matches!(&initial, ActiveStates::RangedSingle { .. }));
if let ActiveStates::RangedSingle { state_ranges, .. } = &initial {
assert!(
state_ranges.len() <= 15,
"Expected O(states) entries, got {}",
state_ranges.len()
);
}
assert!(initial.contains_accept(&nfa));
let s_a = initial
.clone()
.advance(&nfa, a, None, None, None, XsdVersion::V1_0);
assert!(s_a.contains_accept(&nfa));
let s_b = initial
.clone()
.advance(&nfa, b, None, None, None, XsdVersion::V1_0);
assert!(s_b.contains_accept(&nfa));
}
#[test]
fn test_non_nullable_body_stays_counted() {
let (nfa, a) = make_counted_element_nfa(0, 5);
assert!(!nfa.counter_defs[0].body_nullable);
let initial = ActiveStates::from_nfa(&nfa);
assert!(matches!(&initial, ActiveStates::Counted { .. }));
assert!(initial.contains_accept(&nfa)); for n in 1..=5 {
let s = advance_n(initial.clone(), &nfa, a, n);
assert!(s.contains_accept(&nfa), "Expected accepting after {n} a's");
}
let s6 = advance_n(initial, &nfa, a, 6);
assert!(s6.is_empty());
}
#[test]
fn test_multi_counter_stays_counted() {
let a = NameId(100);
let b = NameId(200);
let builder = FragmentBuilder::new();
let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
let counted_a = frag_a.repeat_counted(0, 100);
let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
let counted_b = frag_b.repeat_counted(0, 100);
let seq = counted_a.concat(counted_b);
let nfa = fragment_to_table(seq);
assert_eq!(nfa.counter_defs.len(), 2);
let initial = ActiveStates::from_nfa(&nfa);
assert!(
matches!(&initial, ActiveStates::Counted { .. }),
"Multi-counter NFA should use Counted, not RangedSingle"
);
}
#[test]
fn test_nested_nullable_uses_hybrid() {
let a = NameId(100);
let builder = FragmentBuilder::new();
let frag = builder.single_term(NfaTerm::element(a, None, None), None);
let inner = frag.optional().repeat_counted(0, 50);
let outer = inner.repeat_counted(0, 50);
let nfa = fragment_to_table(outer);
assert_eq!(nfa.counter_defs.len(), 2);
let initial = ActiveStates::from_nfa(&nfa);
assert!(
matches!(&initial, ActiveStates::Hybrid { .. }),
"Nested nullable counted NFA should use Hybrid, got {:?}",
std::mem::discriminant(&initial)
);
}
#[test]
fn test_nested_nullable_hybrid_correctness() {
let a = NameId(100);
let builder = FragmentBuilder::new();
let frag = builder.single_term(NfaTerm::element(a, None, None), None);
let inner = frag.optional().repeat_counted(0, 100);
let outer = inner.repeat_counted(0, 50);
let nfa = fragment_to_table(outer);
assert_eq!(nfa.counter_defs.len(), 2);
let initial = ActiveStates::from_nfa(&nfa);
assert!(matches!(&initial, ActiveStates::Hybrid { .. }));
assert!(initial.contains_accept(&nfa));
let s100 = advance_n(initial.clone(), &nfa, a, 100);
assert!(s100.contains_accept(&nfa));
let s5000 = advance_n(initial.clone(), &nfa, a, 5000);
assert!(s5000.contains_accept(&nfa));
let s5001 = advance_n(initial, &nfa, a, 5001);
assert!(s5001.is_empty(), "Expected rejection after 5001 a's");
}
#[test]
fn test_hybrid_convergence() {
let a = NameId(100);
let builder = FragmentBuilder::new();
let frag = builder.single_term(NfaTerm::element(a, None, None), None);
let inner = frag.optional().repeat_counted(0, 100);
let outer = inner.repeat_counted(0, 50);
let nfa = fragment_to_table(outer);
let initial = ActiveStates::from_nfa(&nfa);
if let ActiveStates::Hybrid { configs, .. } = &initial {
let max_expected = nfa.state_count() * 51; assert!(
configs.len() <= max_expected,
"Expected at most {} entries, got {} (state_count={})",
max_expected,
configs.len(),
nfa.state_count()
);
} else {
panic!("Expected Hybrid variant");
}
}
#[test]
fn test_sequential_nullable_hybrid() {
let a = NameId(100);
let b = NameId(200);
let builder = FragmentBuilder::new();
let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
let counted_a = frag_a.optional().repeat_counted(0, 1000);
let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
let counted_b = frag_b.optional().repeat_counted(0, 1000);
let seq = counted_a.concat(counted_b);
let nfa = fragment_to_table(seq);
assert_eq!(nfa.counter_defs.len(), 2);
assert!(nfa.counter_defs[0].body_nullable);
assert!(nfa.counter_defs[1].body_nullable);
let initial = ActiveStates::from_nfa(&nfa);
assert!(matches!(&initial, ActiveStates::Hybrid { .. }));
assert!(initial.contains_accept(&nfa));
let s = initial
.clone()
.advance(&nfa, a, None, None, None, XsdVersion::V1_0);
assert!(s.contains_accept(&nfa));
let s = initial
.clone()
.advance(&nfa, b, None, None, None, XsdVersion::V1_0);
assert!(s.contains_accept(&nfa));
let s = advance_n(initial.clone(), &nfa, a, 1000);
assert!(s.contains_accept(&nfa));
let s = advance_n(s, &nfa, b, 1000);
assert!(s.contains_accept(&nfa));
let s = advance_n(initial, &nfa, a, 1001);
assert!(s.is_empty(), "Expected rejection after 1001 a's");
}
#[test]
fn test_mixed_nullable_nonnullable_hybrid() {
let a = NameId(100);
let b = NameId(200);
let builder = FragmentBuilder::new();
let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
let counted_a = frag_a.optional().repeat_counted(0, 500);
let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
let counted_b = frag_b.repeat_counted(0, 500);
let seq = counted_a.concat(counted_b);
let nfa = fragment_to_table(seq);
assert_eq!(nfa.counter_defs.len(), 2);
assert!(nfa.counter_defs[0].body_nullable);
assert!(!nfa.counter_defs[1].body_nullable);
let initial = ActiveStates::from_nfa(&nfa);
assert!(matches!(&initial, ActiveStates::Hybrid { .. }));
if let ActiveStates::Hybrid {
ranged_counter_idx, ..
} = &initial
{
assert_eq!(
*ranged_counter_idx, 0,
"Should range the nullable counter (index 0)"
);
}
assert!(initial.contains_accept(&nfa));
let s = advance_n(initial.clone(), &nfa, a, 500);
let s = advance_n(s, &nfa, b, 500);
assert!(s.contains_accept(&nfa));
let s = advance_n(initial, &nfa, b, 501);
assert!(s.is_empty());
}
#[test]
fn test_hybrid_forced_outer_ranged() {
let a = NameId(100);
let builder = FragmentBuilder::new();
let frag = builder.single_term(NfaTerm::element(a, None, None), None);
let inner = frag.optional().repeat_counted(0, 100);
let outer = inner.repeat_counted(0, 50);
let nfa = fragment_to_table(outer);
let initial = ActiveStates::from_nfa(&nfa);
let s = advance_n(initial.clone(), &nfa, a, 5000);
assert!(
s.contains_accept(&nfa),
"5000 a's should be accepted (50*100)"
);
let s = advance_n(initial.clone(), &nfa, a, 101);
assert!(
s.contains_accept(&nfa),
"101 a's should be accepted (needs 2 outer iterations)"
);
assert!(initial.contains_accept(&nfa), "0 a's should be accepted");
let s = advance_n(initial, &nfa, a, 5001);
assert!(s.is_empty(), "5001 a's should be rejected");
}
#[test]
fn test_hybrid_selection_different_maxima() {
let a = NameId(100);
let b = NameId(200);
let builder = FragmentBuilder::new();
let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
let counted_a = frag_a.optional().repeat_counted(0, 10);
let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
let counted_b = frag_b.optional().repeat_counted(0, 1000);
let seq = counted_a.concat(counted_b);
let nfa = fragment_to_table(seq);
assert_eq!(nfa.counter_defs.len(), 2);
let initial = ActiveStates::from_nfa(&nfa);
assert!(
matches!(&initial, ActiveStates::Hybrid { .. }),
"Expected Hybrid, got {:?}",
std::mem::discriminant(&initial)
);
if let ActiveStates::Hybrid {
ranged_counter_idx, ..
} = &initial
{
assert_eq!(
*ranged_counter_idx, 1,
"Should range counter 1 (max=1000), not counter 0 (max=10)"
);
}
let s = advance_n(initial.clone(), &nfa, a, 10);
let s = advance_n(s, &nfa, b, 1000);
assert!(s.contains_accept(&nfa));
let s = advance_n(initial, &nfa, a, 11);
assert!(s.is_empty());
}
#[test]
fn test_hybrid_small_max_stays_counted() {
let a = NameId(100);
let b = NameId(200);
let builder = FragmentBuilder::new();
let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
let counted_a = frag_a.optional().repeat_counted(0, 5);
let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
let counted_b = frag_b.optional().repeat_counted(0, 5);
let seq = counted_a.concat(counted_b);
let nfa = fragment_to_table(seq);
assert_eq!(nfa.counter_defs.len(), 2);
let initial = ActiveStates::from_nfa(&nfa);
assert!(
matches!(&initial, ActiveStates::Counted { .. }),
"Small-max nullable counters should stay Counted, got {:?}",
std::mem::discriminant(&initial)
);
}
#[test]
fn test_xsd11_priority_ranged() {
use crate::types::complex::NamespaceConstraint;
let a = NameId(100);
let b = NameId(200);
let builder = FragmentBuilder::new();
let elem = builder.single_term(NfaTerm::element(a, None, None), None);
let wc = builder.single_term(
NfaTerm::Wildcard {
namespace_constraint: NamespaceConstraint::Any,
process_contents: ProcessContents::Lax,
not_qnames: Vec::new(),
},
None,
);
let choice = elem.alternate(wc.optional());
let counted = choice.repeat_counted(0, 100);
let nfa = fragment_to_table(counted);
assert!(nfa.counter_defs[0].body_nullable);
let initial = ActiveStates::from_nfa(&nfa);
assert!(matches!(&initial, ActiveStates::RangedSingle { .. }));
let next =
initial
.clone()
.advance_with_priority(&nfa, a, None, None, None, XsdVersion::V1_1);
assert!(!next.is_empty());
assert!(next.contains_accept(&nfa));
let next_b = initial.advance_with_priority(&nfa, b, None, None, None, XsdVersion::V1_1);
assert!(!next_b.is_empty());
assert!(next_b.contains_accept(&nfa));
}
#[test]
fn test_initial_tiebreak_prefers_first() {
let a = NameId(100);
let b = NameId(200);
let builder = FragmentBuilder::new();
let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
let counted_a = frag_a.optional().repeat_counted(0, 1000);
let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
let counted_b = frag_b.optional().repeat_counted(0, 1000);
let seq = counted_a.concat(counted_b);
let nfa = fragment_to_table(seq);
let initial = ActiveStates::from_nfa(&nfa);
if let ActiveStates::Hybrid {
ranged_counter_idx, ..
} = &initial
{
assert_eq!(
*ranged_counter_idx, 0,
"Equal max: should prefer first counter (index 0)"
);
} else {
panic!("Expected Hybrid variant");
}
}
#[test]
fn test_dynamic_switch_sequential_equal_max() {
let a = NameId(100);
let b = NameId(200);
let builder = FragmentBuilder::new();
let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
let counted_a = frag_a.optional().repeat_counted(0, 1000);
let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
let counted_b = frag_b.optional().repeat_counted(0, 1000);
let seq = counted_a.concat(counted_b);
let nfa = fragment_to_table(seq);
let initial = ActiveStates::from_nfa(&nfa);
let after_a = advance_n(initial, &nfa, a, 1000);
let after_b = after_a.advance(&nfa, b, None, None, None, XsdVersion::V1_0);
if let ActiveStates::Hybrid {
ranged_counter_idx,
configs,
..
} = &after_b
{
assert_eq!(
*ranged_counter_idx, 1,
"Should have dynamically switched to counter 1"
);
let max_expected = nfa.state_count() * 2;
assert!(
configs.len() <= max_expected,
"Expected <= {} configs after switch, got {}",
max_expected,
configs.len()
);
} else {
panic!("Expected Hybrid variant");
}
let s = advance_n(after_b.clone(), &nfa, b, 999);
assert!(s.contains_accept(&nfa));
let s = advance_n(after_b, &nfa, b, 1000);
assert!(s.is_empty());
}
#[test]
fn test_dynamic_switch_three_counters() {
let a = NameId(100);
let b = NameId(200);
let c = NameId(300);
let builder = FragmentBuilder::new();
let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
let counted_a = frag_a.optional().repeat_counted(0, 500);
let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
let counted_b = frag_b.optional().repeat_counted(0, 500);
let frag_c = builder.single_term(NfaTerm::element(c, None, None), None);
let counted_c = frag_c.optional().repeat_counted(0, 500);
let seq = counted_a.concat(counted_b).concat(counted_c);
let nfa = fragment_to_table(seq);
assert_eq!(nfa.counter_defs.len(), 3);
let initial = ActiveStates::from_nfa(&nfa);
let after_a = advance_n(initial, &nfa, a, 500);
assert!(after_a.contains_accept(&nfa));
let after_b1 = after_a.advance(&nfa, b, None, None, None, XsdVersion::V1_0);
if let ActiveStates::Hybrid {
ranged_counter_idx, ..
} = &after_b1
{
assert_eq!(
*ranged_counter_idx, 1,
"Should switch to counter 1 after first loop exits"
);
}
let after_b = advance_n(after_b1, &nfa, b, 499);
assert!(after_b.contains_accept(&nfa));
let after_c1 = after_b.advance(&nfa, c, None, None, None, XsdVersion::V1_0);
if let ActiveStates::Hybrid {
ranged_counter_idx, ..
} = &after_c1
{
assert_eq!(
*ranged_counter_idx, 2,
"Should switch to counter 2 after second loop exits"
);
}
let after_c = advance_n(after_c1.clone(), &nfa, c, 499);
assert!(after_c.contains_accept(&nfa));
let over = advance_n(after_c1, &nfa, c, 500);
assert!(over.is_empty());
}
#[test]
fn test_no_switch_when_ranged_still_active() {
let a = NameId(100);
let b = NameId(200);
let builder = FragmentBuilder::new();
let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
let counted_a = frag_a.optional().repeat_counted(0, 1000);
let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
let counted_b = frag_b.optional().repeat_counted(0, 1000);
let seq = counted_a.concat(counted_b);
let nfa = fragment_to_table(seq);
let initial = ActiveStates::from_nfa(&nfa);
let after_5a = advance_n(initial, &nfa, a, 5);
if let ActiveStates::Hybrid {
ranged_counter_idx, ..
} = &after_5a
{
assert_eq!(
*ranged_counter_idx, 0,
"Should NOT switch while first loop is still active"
);
}
}
#[test]
fn test_no_switch_non_nullable_candidate() {
let a = NameId(100);
let b = NameId(200);
let builder = FragmentBuilder::new();
let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
let counted_a = frag_a.optional().repeat_counted(0, 1000);
let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
let counted_b = frag_b.repeat_counted(0, 1000); let seq = counted_a.concat(counted_b);
let nfa = fragment_to_table(seq);
let initial = ActiveStates::from_nfa(&nfa);
let after_b = advance_n(initial, &nfa, b, 100);
if let ActiveStates::Hybrid {
ranged_counter_idx, ..
} = &after_b
{
assert_eq!(
*ranged_counter_idx, 0,
"Should NOT switch: counter 1 is not nullable"
);
}
let s = advance_n(after_b.clone(), &nfa, b, 900);
assert!(s.contains_accept(&nfa));
}
#[test]
fn test_no_switch_nested_inner_active() {
let a = NameId(100);
let builder = FragmentBuilder::new();
let frag = builder.single_term(NfaTerm::element(a, None, None), None);
let inner = frag.optional().repeat_counted(0, 100);
let outer = inner.repeat_counted(0, 50);
let nfa = fragment_to_table(outer);
let initial = ActiveStates::from_nfa(&nfa);
let after_a = advance_n(initial.clone(), &nfa, a, 50);
if let ActiveStates::Hybrid {
ranged_counter_idx, ..
} = &after_a
{
let initial_idx = if let ActiveStates::Hybrid {
ranged_counter_idx, ..
} = &initial
{
*ranged_counter_idx
} else {
unreachable!()
};
assert_eq!(
*ranged_counter_idx, initial_idx,
"Should NOT switch while processing nested loops"
);
}
}
#[test]
fn test_switch_config_count_drops() {
let a = NameId(100);
let b = NameId(200);
let builder = FragmentBuilder::new();
let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
let counted_a = frag_a.optional().repeat_counted(0, 1000);
let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
let counted_b = frag_b.optional().repeat_counted(0, 1000);
let seq = counted_a.concat(counted_b);
let nfa = fragment_to_table(seq);
let initial = ActiveStates::from_nfa(&nfa);
let after_b = initial.advance(&nfa, b, None, None, None, XsdVersion::V1_0);
if let ActiveStates::Hybrid {
configs,
ranged_counter_idx,
..
} = &after_b
{
assert_eq!(
*ranged_counter_idx, 1,
"Should switch to counter 1 after 'b'"
);
assert!(
configs.len() < nfa.state_count() * 2,
"Config count {} should be << state_count {} after switch",
configs.len(),
nfa.state_count()
);
}
}
#[test]
fn test_differential_counted_vs_hybrid_switch() {
let a = NameId(100);
let b = NameId(200);
let builder = FragmentBuilder::new();
let frag_a = builder.single_term(NfaTerm::element(a, None, None), None);
let counted_a = frag_a.optional().repeat_counted(0, 100);
let frag_b = builder.single_term(NfaTerm::element(b, None, None), None);
let counted_b = frag_b.optional().repeat_counted(0, 100);
let seq = counted_a.concat(counted_b);
let nfa = fragment_to_table(seq);
let hybrid = ActiveStates::from_nfa(&nfa);
let initial_config = ActiveConfig::initial(nfa.start_state, nfa.counter_defs.len());
let mut counted_configs = HashSet::new();
counted_configs.insert(initial_config);
let counted = ActiveStates::Counted {
configs: counted_configs,
num_counters: nfa.counter_defs.len(),
}
.epsilon_closure(&nfa);
let mut h = hybrid;
let mut c = counted;
for _ in 0..50 {
h = h.advance(&nfa, a, None, None, None, XsdVersion::V1_0);
c = c.advance(&nfa, a, None, None, None, XsdVersion::V1_0);
assert_eq!(
h.contains_accept(&nfa),
c.contains_accept(&nfa),
"Hybrid/Counted disagree on accept during a-feeding"
);
assert_eq!(
h.is_empty(),
c.is_empty(),
"Hybrid/Counted disagree on empty during a-feeding"
);
}
for _ in 0..50 {
h = h.advance(&nfa, b, None, None, None, XsdVersion::V1_0);
c = c.advance(&nfa, b, None, None, None, XsdVersion::V1_0);
assert_eq!(
h.contains_accept(&nfa),
c.contains_accept(&nfa),
"Hybrid/Counted disagree on accept during b-feeding"
);
assert_eq!(
h.is_empty(),
c.is_empty(),
"Hybrid/Counted disagree on empty during b-feeding"
);
}
assert!(h.contains_accept(&nfa));
assert!(c.contains_accept(&nfa));
h = h.advance(&nfa, b, None, None, None, XsdVersion::V1_0);
c = c.advance(&nfa, b, None, None, None, XsdVersion::V1_0);
assert_eq!(h.contains_accept(&nfa), c.contains_accept(&nfa));
}
}