use crate::parser::location::SourceRef;
use super::nfa::{CounterDef, CounterId, NfaState, NfaTable, NfaTerm, StateId, TransitionKind};
#[derive(Debug, Clone)]
pub struct NfaFragment {
pub states: Vec<NfaState>,
pub start: usize,
pub end: usize,
pub counter_defs: Vec<CounterDef>,
pub nullable: bool,
}
impl NfaFragment {
pub fn new(states: Vec<NfaState>, start: usize, end: usize) -> Self {
debug_assert!(start < states.len(), "start index out of bounds");
debug_assert!(end < states.len(), "end index out of bounds");
Self {
states,
start,
end,
counter_defs: Vec::new(),
nullable: false,
}
}
pub fn with_counters(
states: Vec<NfaState>,
start: usize,
end: usize,
counter_defs: Vec<CounterDef>,
nullable: bool,
) -> Self {
debug_assert!(start < states.len(), "start index out of bounds");
debug_assert!(end < states.len(), "end index out of bounds");
Self {
states,
start,
end,
counter_defs,
nullable,
}
}
fn assert_ids_normalized(&self) {
for (pos, state) in self.states.iter().enumerate() {
debug_assert_eq!(
state.id, pos as StateId,
"Fragment ID invariant violated: state at position {} has id {}",
pos, state.id
);
}
}
pub fn start_state(&self) -> &NfaState {
&self.states[self.start]
}
pub fn end_state(&self) -> &NfaState {
&self.states[self.end]
}
pub fn get_state_mut(&mut self, index: usize) -> Option<&mut NfaState> {
self.states.get_mut(index)
}
pub fn concat(mut self, mut other: NfaFragment) -> NfaFragment {
let nullable = self.nullable && other.nullable;
self.assert_ids_normalized();
other.assert_ids_normalized();
let state_offset = self.states.len();
let counter_offset = self.counter_defs.len() as CounterId;
for state in &mut other.states {
state.id += state_offset as StateId;
for trans in &mut state.transitions {
trans.target += state_offset as StateId;
trans.kind = trans.kind.offset_counter(counter_offset);
}
}
let other_start = other.start + state_offset;
self.states[self.end].add_epsilon(other_start as StateId);
let new_end = other.end + state_offset;
self.states.extend(other.states);
self.counter_defs.extend(other.counter_defs);
NfaFragment::with_counters(
self.states,
self.start,
new_end,
self.counter_defs,
nullable,
)
}
pub fn alternate(mut self, mut other: NfaFragment) -> NfaFragment {
let nullable = self.nullable || other.nullable;
self.assert_ids_normalized();
other.assert_ids_normalized();
let new_start_id = (self.states.len() + other.states.len()) as StateId;
let new_end_id = new_start_id + 1;
let mut new_start = NfaState::epsilon(new_start_id, None);
let new_end = NfaState::epsilon(new_end_id, None);
let other_state_offset = self.states.len();
let counter_offset = self.counter_defs.len() as CounterId;
for state in &mut other.states {
state.id += other_state_offset as StateId;
for trans in &mut state.transitions {
trans.target += other_state_offset as StateId;
trans.kind = trans.kind.offset_counter(counter_offset);
}
}
new_start.add_epsilon(self.start as StateId);
new_start.add_epsilon((other.start + other_state_offset) as StateId);
self.states[self.end].add_epsilon(new_end_id);
other.states[other.end].add_epsilon(new_end_id);
let mut states = self.states;
states.extend(other.states);
states.push(new_start);
states.push(new_end);
let mut counter_defs = self.counter_defs;
counter_defs.extend(other.counter_defs);
NfaFragment::with_counters(
states,
new_start_id as usize,
new_end_id as usize,
counter_defs,
nullable,
)
}
pub fn optional(mut self) -> NfaFragment {
self.assert_ids_normalized();
let end_id = self.end as StateId;
self.states[self.start].add_epsilon(end_id);
self.nullable = true;
self
}
pub fn repeat_star(mut self) -> NfaFragment {
self.assert_ids_normalized();
let start_id = self.start as StateId;
self.states[self.end].add_epsilon(start_id);
let end_id = self.end as StateId;
self.states[self.start].add_epsilon(end_id);
self.nullable = true;
self
}
pub fn repeat_plus(mut self) -> NfaFragment {
self.assert_ids_normalized();
let start_id = self.start as StateId;
self.states[self.end].add_epsilon(start_id);
self
}
pub fn repeat_exact(self, n: u32) -> NfaFragment {
if n == 0 {
return FragmentBuilder::new().epsilon_fragment();
}
let mut result = self.clone();
for _ in 1..n {
result = result.concat(self.clone());
}
result
}
pub fn repeat_counted(mut self, min: u32, max: u32) -> NfaFragment {
debug_assert!(min <= max, "repeat_counted: min ({min}) > max ({max})");
let body_nullable = self.nullable;
self.assert_ids_normalized();
let counter_id = self.counter_defs.len() as CounterId;
self.counter_defs.push(CounterDef {
min,
max,
body_nullable,
});
let entry_idx = self.states.len();
let guard_idx = entry_idx + 1;
let exit_idx = entry_idx + 2;
let entry_id = entry_idx as StateId;
let guard_id = guard_idx as StateId;
let exit_id = exit_idx as StateId;
let body_start_id = self.start as StateId;
let mut entry = NfaState::epsilon(entry_id, None);
entry.add_transition(body_start_id, TransitionKind::CounterReset(counter_id));
if min == 0 {
entry.add_epsilon(exit_id);
}
self.states[self.end]
.add_transition(guard_id, TransitionKind::CounterIncrement(counter_id));
let mut guard = NfaState::epsilon(guard_id, None);
guard.add_transition(body_start_id, TransitionKind::CounterMaxGuard(counter_id));
guard.add_transition(exit_id, TransitionKind::CounterMinGuard(counter_id));
let exit = NfaState::epsilon(exit_id, None);
self.states.push(entry);
self.states.push(guard);
self.states.push(exit);
let nullable = min == 0 || body_nullable;
NfaFragment::with_counters(
self.states,
entry_idx,
exit_idx,
self.counter_defs,
nullable,
)
}
pub fn repeat_range(self, min: u32, max: Option<u32>) -> NfaFragment {
match (min, max) {
(0, Some(0)) => FragmentBuilder::new().epsilon_fragment(),
(0, Some(1)) => self.optional(),
(0, None) => self.repeat_star(),
(1, Some(1)) => self,
(1, None) => self.repeat_plus(),
(n, Some(m)) if n == m => self.repeat_exact(n),
(n, Some(m)) => {
let mut result = self.clone().repeat_exact(n);
for _ in n..m {
result = result.concat(self.clone().optional());
}
result
}
(n, None) => {
let mandatory = self.clone().repeat_exact(n);
mandatory.concat(self.repeat_star())
}
}
}
}
#[derive(Debug)]
pub struct FragmentBuilder;
impl FragmentBuilder {
pub fn new() -> Self {
Self
}
pub fn single_term(&self, term: NfaTerm, origin: Option<SourceRef>) -> NfaFragment {
let mut term_state = NfaState::with_term(0, term, origin);
let exit_state = NfaState::epsilon(1, None);
term_state.add_consume(1);
NfaFragment::new(vec![term_state, exit_state], 0, 1)
}
pub fn epsilon_fragment(&self) -> NfaFragment {
let state = NfaState::epsilon(0, None);
let mut frag = NfaFragment::new(vec![state], 0, 0);
frag.nullable = true;
frag
}
}
impl Default for FragmentBuilder {
fn default() -> Self {
Self::new()
}
}
pub fn fragment_to_table(fragment: NfaFragment) -> NfaTable {
fragment.assert_ids_normalized();
let start_state = fragment.start as StateId;
let accept_state = fragment.end as StateId;
NfaTable::with_counters(
fragment.states,
start_state,
accept_state,
fragment.counter_defs,
)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::ids::NameId;
fn make_element_term(name: u32) -> NfaTerm {
NfaTerm::element(NameId(name), None, None)
}
#[test]
fn test_single_term_fragment() {
let builder = FragmentBuilder::new();
let frag = builder.single_term(make_element_term(1), None);
assert_eq!(frag.states.len(), 2);
assert_eq!(frag.start, 0);
assert_eq!(frag.end, 1);
assert!(frag.states[0].term.is_some());
assert!(frag.states[1].term.is_none()); }
#[test]
fn test_epsilon_fragment() {
let builder = FragmentBuilder::new();
let frag = builder.epsilon_fragment();
assert_eq!(frag.states.len(), 1);
assert_eq!(frag.start, 0);
assert_eq!(frag.end, 0); assert!(frag.states[0].term.is_none());
}
#[test]
fn test_concat() {
let builder = FragmentBuilder::new();
let a = builder.single_term(make_element_term(1), None);
let b = builder.single_term(make_element_term(2), None);
let concat = a.concat(b);
assert_eq!(concat.states.len(), 4);
assert_eq!(concat.start, 0); assert_eq!(concat.end, 3);
let a_end = &concat.states[1];
assert!(a_end.epsilon_transitions().any(|t| t == 2));
}
#[test]
fn test_alternate() {
let builder = FragmentBuilder::new();
let a = builder.single_term(make_element_term(1), None);
let b = builder.single_term(make_element_term(2), None);
let alt = a.alternate(b);
assert_eq!(alt.states.len(), 6);
let new_start = &alt.states[alt.start];
let eps: Vec<_> = new_start.epsilon_transitions().collect();
assert_eq!(eps.len(), 2);
assert!(eps.contains(&0)); assert!(eps.contains(&2)); }
#[test]
fn test_optional() {
let builder = FragmentBuilder::new();
let frag = builder.single_term(make_element_term(1), None);
let opt = frag.optional();
let start = &opt.states[opt.start];
assert!(start.epsilon_transitions().any(|t| t == opt.end as StateId));
}
#[test]
fn test_repeat_star() {
let builder = FragmentBuilder::new();
let frag = builder.single_term(make_element_term(1), None);
let star = frag.repeat_star();
let end = &star.states[star.end];
assert!(end
.epsilon_transitions()
.any(|t| t == star.start as StateId));
let start = &star.states[star.start];
assert!(start
.epsilon_transitions()
.any(|t| t == star.end as StateId));
}
#[test]
fn test_repeat_plus() {
let builder = FragmentBuilder::new();
let frag = builder.single_term(make_element_term(1), None);
let plus = frag.repeat_plus();
let end = &plus.states[plus.end];
assert!(end
.epsilon_transitions()
.any(|t| t == plus.start as StateId));
let start = &plus.states[plus.start];
assert!(!start
.epsilon_transitions()
.any(|t| t == plus.end as StateId));
}
#[test]
fn test_repeat_exact() {
let builder = FragmentBuilder::new();
let frag = builder.single_term(make_element_term(1), None);
let exact = frag.repeat_exact(3);
assert_eq!(exact.states.len(), 6);
}
#[test]
fn test_repeat_range() {
let builder = FragmentBuilder::new();
let frag1 = builder.single_term(make_element_term(1), None);
let opt = frag1.repeat_range(0, Some(1));
let start = &opt.states[opt.start];
assert!(start.epsilon_transitions().any(|t| t == opt.end as StateId));
let frag2 = builder.single_term(make_element_term(2), None);
let range = frag2.repeat_range(2, Some(4));
assert_eq!(range.states.len(), 8);
}
#[test]
fn test_fragment_to_table() {
let builder = FragmentBuilder::new();
let frag = builder.single_term(make_element_term(1), None);
let table = fragment_to_table(frag);
assert_eq!(table.start_state, 0);
assert_eq!(table.accept_state, 1);
assert_eq!(table.state_count(), 2);
}
}