use crate::compile::{Ast, AstNode, AstNodeId, AstNodeKind, ErrorKind};
use std::collections::HashSet;
use std::mem;
use std::ops::Index;
#[derive(Debug)]
pub struct Nfa {
start_states: Vec<StateId>,
match_state: StateId,
states: Vec<State>,
}
impl Nfa {
pub fn build(ast: Ast) -> Result<Self, ErrorKind> {
enum Instruction<'ast> {
Visit { node: &'ast AstNode<'ast>, out: StateId },
PhantomLink { state: StateId },
CollectAlternate { alts_count: usize },
DoConcat { remaining: &'ast [AstNodeId] },
}
let mut states = vec![State::Match];
let match_state = StateId(0);
let mut instrs_stack = vec![Instruction::Visit { node: ast.root(), out: match_state }];
let mut state_stack = Vec::new();
while let Some(instr) = instrs_stack.pop() {
use AstNodeKind::{Alternate, Concat, Group, Literal};
use Instruction::{CollectAlternate, DoConcat, PhantomLink, Visit};
match instr {
Visit { node, mut out } => match &node.kind {
Literal(s) => {
for c in s.chars().rev() {
let id = StateId(states.len());
states.push(State::Trans(c, out));
out = id;
}
state_stack.push(out);
}
Group(None) => state_stack.push(out),
Group(Some(id)) => instrs_stack.push(Visit { node: &ast[id], out }),
Alternate(ids) => {
debug_assert!(ids.len() >= 2);
instrs_stack.push(CollectAlternate { alts_count: ids.len() });
for opt_id in ids {
match opt_id {
Some(id) => instrs_stack.push(Visit { node: &ast[id], out }),
None => instrs_stack.push(PhantomLink { state: out }),
}
}
}
Concat(ids) => {
debug_assert!(ids.len() >= 2);
state_stack.push(out);
instrs_stack.push(DoConcat { remaining: ids });
}
},
PhantomLink { state } => state_stack.push(state),
CollectAlternate { alts_count } => {
let mut out = state_stack.pop().unwrap();
for _ in 1..alts_count {
let next = state_stack.pop().unwrap();
let id = StateId(states.len());
states.push(State::Split(next, out));
out = id;
}
state_stack.push(out);
}
DoConcat { remaining: [] } => (),
DoConcat { remaining: [remaining @ .., last] } => {
let out = state_stack.pop().unwrap();
instrs_stack.push(DoConcat { remaining });
instrs_stack.push(Visit { node: &ast[last], out });
}
}
}
assert_eq!(state_stack.len(), 1);
let initial_start_state = state_stack.pop().unwrap();
let mut start_states = HashSet::new();
let mut visit_stack = vec![initial_start_state];
while let Some(id) = visit_stack.pop() {
if start_states.insert(id) {
if let State::Split(x, y) = states[id.0] {
visit_stack.push(x);
visit_stack.push(y);
}
}
}
let start_states = start_states
.into_iter()
.filter(|id| match states[id.0] {
State::Split(_, _) => false,
_ => true,
})
.collect();
Ok(Nfa { start_states, match_state, states })
}
pub fn matcher<R: AsRef<Self>>(this: R) -> Matcher<R> {
let states = this.as_ref().start_states.clone();
let last_used = vec![0; this.as_ref().states.len()];
Matcher { nfa: this, seen: 0, states, last_used }
}
}
pub struct Matcher<N> {
pub nfa: N,
seen: usize,
states: Vec<StateId>,
last_used: Vec<usize>,
}
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct MatcherState {
seen: usize,
states: Vec<StateId>,
last_used: Vec<usize>,
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
struct StateId(usize);
#[derive(Debug, Copy, Clone)]
enum State {
Trans(char, StateId),
Split(StateId, StateId),
Match,
}
impl<N: AsRef<Nfa>> Matcher<N> {
pub fn get_state(&self) -> MatcherState {
MatcherState {
seen: self.seen,
states: self.states.clone(),
last_used: self.last_used.clone(),
}
}
pub fn from_state(nfa: N, state: MatcherState) -> Self {
Matcher {
nfa,
seen: state.seen,
states: state.states,
last_used: state.last_used,
}
}
pub fn feed(&mut self, string: &str) {
for c_in in string.chars() {
self.seen += 1;
let mut stack = Vec::new();
for s in mem::replace(&mut self.states, Vec::new()) {
match self.nfa.as_ref().states.as_slice()[s] {
State::Trans(c, new) => {
if c == c_in {
stack.push(new);
self.add_states(&mut stack);
}
}
State::Split(_, _) => unreachable!(),
State::Match => (),
}
}
}
}
fn add_states(&mut self, stack: &mut Vec<StateId>) {
while let Some(s) = stack.pop() {
if mem::replace(&mut self.last_used[s.0], self.seen) == self.seen {
continue;
}
match self.nfa.as_ref().states.as_slice()[s] {
State::Trans(_, _) | State::Match => self.states.push(s),
State::Split(x, y) => {
stack.push(x);
stack.push(y);
}
}
}
}
pub fn is_match(&self) -> bool {
self.states.contains(&self.nfa.as_ref().match_state)
}
}
impl Index<StateId> for [State] {
type Output = State;
fn index(&self, idx: StateId) -> &State {
&self[idx.0]
}
}