1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
use std::fmt;
use regex_syntax;
use {Automaton, Result};
pub use regex::error::Error;
mod compile;
mod dfa;
mod error;
mod sparse;
pub struct Regex {
original: String,
dfa: dfa::Dfa,
}
#[derive(Eq, PartialEq)]
pub enum Inst {
Match,
Jump(usize),
Split(usize, usize),
Range(u8, u8),
}
impl Regex {
pub fn new(re: &str) -> Result<Regex> {
Regex::with_size_limit(10 * (1 << 20), re)
}
pub fn with_size_limit(size: usize, re: &str) -> Result<Regex> {
let expr = try!(regex_syntax::Expr::parse(re));
let insts = try!(compile::Compiler::new(size).compile(&expr));
let dfa = try!(dfa::DfaBuilder::new(insts).build());
Ok(Regex { original: re.to_owned(), dfa: dfa })
}
}
impl Automaton for Regex {
type State = usize;
fn start(&self) -> usize { 0 }
fn is_match(&self, state: usize) -> bool {
self.dfa.is_match(state)
}
fn accept(&self, state: usize, byte: u8) -> Option<usize> {
self.dfa.accept(state, byte)
}
}
impl fmt::Debug for Regex {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
try!(writeln!(f, "Regex({:?})", self.original));
self.dfa.fmt(f)
}
}
impl fmt::Debug for Inst {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::Inst::*;
match *self {
Match => write!(f, "Match"),
Jump(ip) => write!(f, "JUMP {}", ip),
Split(ip1, ip2) => write!(f, "SPLIT {}, {}", ip1, ip2),
Range(s, e) => write!(f, "RANGE {:X}-{:X}", s, e),
}
}
}