use std::ops::Range;
use std::option::IntoIter;
use fsa::nfa;
use fsa::nfa::{No, One, Two, More};
use unicode;
pub use self::RegexNode::{Or, Cat, Maybe, Closure, Var, Literal, Bind};
pub use self::Const::{Class, NotClass, Char, Any};
#[derive(Clone)]
pub struct CharSet<T>(pub Vec<Range<T>>);
impl<T> CharSet<T> {
pub fn new() -> CharSet<T> {
CharSet(Vec::new())
}
pub fn push(&mut self, range: Range<T>) {
let CharSet(ref mut vec) = *self;
vec.push(range);
}
pub fn contains(&self, item: T) -> bool where T: PartialOrd {
let CharSet(ref vec) = *self;
vec.iter().any(|x| x.start <= item && item <= x.end)
}
}
#[derive(Clone)]
pub enum Const {
Class(CharSet<char>),
NotClass(CharSet<char>),
Char(char),
Any,
}
pub type Regex = Box<RegexNode>;
#[derive(Clone)]
pub enum RegexNode {
Or(Regex, Regex),
Cat(Regex, Regex),
Maybe(Regex),
Closure(Regex),
Var(usize),
Literal(Const),
Bind(::syntax::ast::Ident, Regex)
}
pub fn string(string: &str) -> Option<Regex> {
let mut it = string.chars();
let mut reg = Box::new(Literal(Char(match it.next() {
Some(ch) => ch,
None => return None
})));
for ch in it {
reg = Box::new(Cat(reg, Box::new(Literal(Char(ch)))));
}
Some(reg)
}
#[derive(Clone, Copy, Eq, Hash, PartialEq, PartialOrd)]
pub struct Action(pub usize);
impl nfa::StateData for Action {
fn no_data() -> Action {
Action(0)
}
fn combine(a: Action, b: Action) -> Action {
if a >= b { a } else { b }
}
fn is_final(&self) -> bool {
*self != Action(0)
}
}
pub enum Label {
Byte(u8),
Class(CharSet<u8>),
NotClass(CharSet<u8>)
}
pub struct State {
etrans: nfa::Etrans,
pub trans: Option<(Label, usize)>,
action: Action
}
impl nfa::State for State {
type Data = Action;
type Iter = IntoIter<usize>;
fn new() -> State {
State {
trans: None,
etrans: No,
action: Action(0)
}
}
fn etransition<'a>(&'a self) -> &'a nfa::Etrans {
&self.etrans
}
fn transition(&self, c: u8) -> IntoIter<usize> {
match self.trans {
Some((Label::Class(ref set), dst)) if set.contains(c) => Some(dst),
Some((Label::NotClass(ref set), dst)) if !set.contains(c) => Some(dst),
Some((Label::Byte(ch), dst)) if ch == c => Some(dst),
_ => None
}.into_iter()
}
fn data(&self) -> Action {
self.action
}
}
pub type Automaton = nfa::Automaton<State>;
pub fn build_nfa<Encoding>(regexs: &[(Regex, Action)], defs: &[Regex])
-> Automaton where Encoding: unicode::Encode {
let mut ret = Automaton {
states: Vec::new(),
initial: 0usize
};
let ini = ret.create_state();
let mut etrans = Vec::new();
for &(ref reg, act) in regexs.iter() {
let (init, f1nal) = reg.to_automaton::<Encoding>(&mut ret, defs);
etrans.push(init);
ret.states[f1nal].action = act;
}
ret.states[ini].etrans = More(etrans);
ret.initial = ini;
ret
}
impl RegexNode {
fn to_automaton<Encoding>(&self, auto: &mut Automaton, defs: &[Regex])
-> (usize, usize) where Encoding: unicode::Encode {
match *self {
Or(ref left, ref right) => {
let (linit, lf1nal) = left.to_automaton::<Encoding>(auto, defs);
let (rinit, rf1nal) = right.to_automaton::<Encoding>(auto, defs);
let new_f1nal = auto.create_state();
let new_init = auto.create_state();
auto.states[new_init].etrans = Two(linit, rinit);
auto.states[lf1nal].etrans = One(new_f1nal);
auto.states[rf1nal].etrans = One(new_f1nal);
(new_init, new_f1nal)
}
Cat(ref fst, ref snd) => {
let ( _ , sf1nal) = snd.to_automaton::<Encoding>(auto, defs);
let State {
etrans, trans, ..
} = auto.states.pop().unwrap();
let (finit, ff1nal) = fst.to_automaton::<Encoding>(auto, defs);
auto.states[ff1nal].etrans = etrans;
auto.states[ff1nal].trans = trans;
(finit, sf1nal)
}
Maybe(ref reg) => {
let (init, f1nal) = reg.to_automaton::<Encoding>(auto, defs);
let new_f1nal = auto.create_state();
let new_init = auto.create_state();
auto.states[new_init].etrans = Two(new_f1nal, init);
auto.states[f1nal].etrans = One(new_f1nal);
(new_init, new_f1nal)
}
Closure(ref reg) => {
let (init, f1nal) = reg.to_automaton::<Encoding>(auto, defs);
let new_f1nal = auto.create_state();
let new_init = auto.create_state();
auto.states[new_init].etrans = Two(new_f1nal, init);
auto.states[f1nal].etrans = Two(new_f1nal, init);
(new_init, new_f1nal)
}
Literal(Class(ref set)) => {
Encoding::class_to_automaton(set, auto)
}
Literal(NotClass(ref set)) => {
Encoding::not_class_to_automaton(set, auto)
}
Var(idx) => {
defs[idx].to_automaton::<Encoding>(auto, defs)
}
Literal(Char(ch)) => {
Encoding::char_to_automaton(ch, auto)
}
Literal(Any) => {
Encoding::any_to_automaton(auto)
}
Bind(_, ref expr) => {
expr.to_automaton::<Encoding>(auto, defs)
}
}
}
#[allow(dead_code)]
pub fn show(&self, span: &str, defs: &[Regex]) {
match self {
&Or(ref l, ref r) => {
println!("{} Or of: ", span);
l.show(&format!(" {}", span), defs);
r.show(&format!(" {}", span), defs);
}
&Cat(ref l, ref r) => {
println!("{} Cat of: ", span);
l.show(&format!(" {}", span), defs);
r.show(&format!(" {}", span), defs);
}
&Maybe(ref reg) => {
println!("{} Optionnally the regex:", span);
reg.show(span, defs);
}
&Closure(ref reg) => {
println!("{} The eclosure of", span);
reg.show(&format!(" {}", span), defs)
}
&Var(idx) => {
defs[idx].show(span, defs);
}
&Literal(Char(ref c)) => println!("{} The char {}", span, *c as char),
&Literal(Any) => println!("Anything"),
_ => ()
}
}
}