1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
use std::fmt;

use regex_syntax;

use {Automaton, Result};

pub use regex::error::Error;

mod compile;
mod dfa;
mod error;
mod sparse;

pub struct Regex {
    original: String,
    dfa: dfa::Dfa,
}

#[derive(Eq, PartialEq)]
pub enum Inst {
    Match,
    Jump(usize),
    Split(usize, usize),
    Range(u8, u8),
}

impl Regex {
    pub fn new(re: &str) -> Result<Regex> {
        Regex::with_size_limit(10 * (1 << 20), re)
    }

    pub fn with_size_limit(size: usize, re: &str) -> Result<Regex> {
        let expr = try!(regex_syntax::Expr::parse(re));
        let insts = try!(compile::Compiler::new(size).compile(&expr));
        let dfa = try!(dfa::DfaBuilder::new(insts).build());
        Ok(Regex { original: re.to_owned(), dfa: dfa })
    }
}

impl Automaton for Regex {
    type State = usize;

    fn start(&self) -> usize { 0 }

    fn is_match(&self, state: usize) -> bool {
        self.dfa.is_match(state)
    }

    fn accept(&self, state: usize, byte: u8) -> Option<usize> {
        self.dfa.accept(state, byte)
    }
}

impl fmt::Debug for Regex {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        try!(writeln!(f, "Regex({:?})", self.original));
        self.dfa.fmt(f)
    }
}

impl fmt::Debug for Inst {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        use self::Inst::*;
        match *self {
            Match => write!(f, "Match"),
            Jump(ip) => write!(f, "JUMP {}", ip),
            Split(ip1, ip2) => write!(f, "SPLIT {}, {}", ip1, ip2),
            Range(s, e) => write!(f, "RANGE {:X}-{:X}", s, e),
        }
    }
}

// #[cfg(test)]
// mod tests {
    // use regex::Regex;
//
    // #[test]
    // fn scratch() {
        // // let re = Regex::new("[\u{0}-\u{10FFFF}]").unwrap();
        // let re = Regex::new(r"[a-z0-9]").unwrap();
        // println!("{:?}", re.dfa);
    // }
// }