1use std::collections::{HashMap, HashSet};
2use std::fmt;
3
4use crate::sparse::SparseSet;
5use crate::{Error, Inst};
6
7const STATE_LIMIT: usize = 1_000; pub struct DfaBuilder {
10 dfa: Dfa,
11 cache: HashMap<Vec<usize>, usize>,
12}
13
14pub struct Dfa {
15 insts: Vec<Inst>,
16 states: Vec<State>,
17}
18
19struct State {
20 insts: Vec<usize>,
21 next: [Option<usize>; 256],
22 is_match: bool,
23}
24
25impl DfaBuilder {
26 pub fn new(insts: Vec<Inst>) -> Self {
27 DfaBuilder {
28 dfa: Dfa { insts: insts, states: Vec::with_capacity(16) },
29 cache: HashMap::with_capacity(1024),
30 }
31 }
32
33 pub fn build(mut self) -> Result<Dfa, Error> {
34 let mut cur = SparseSet::new(self.dfa.insts.len());
35 let mut next = SparseSet::new(self.dfa.insts.len());
36
37 self.dfa.add(&mut cur, 0);
38 let mut states = vec![self.cached_state(&cur).unwrap()];
39 let mut seen = HashSet::new();
40 while let Some(s) = states.pop() {
41 for b in 0..256 {
42 let ns = self.run_state(&mut cur, &mut next, s, b as u8);
43 if let Some(ns) = ns {
44 if !seen.contains(&ns) {
45 seen.insert(ns);
46 states.push(ns);
47 }
48 }
49 if self.dfa.states.len() > STATE_LIMIT {
50 return Err(Error::TooManyStates(STATE_LIMIT).into());
51 }
52 }
53 }
54 Ok(self.dfa)
55 }
56
57 fn run_state(
58 &mut self,
59 cur: &mut SparseSet,
60 next: &mut SparseSet,
61 state: usize,
62 byte: u8,
63 ) -> Option<usize> {
64 cur.clear();
65 for &ip in &self.dfa.states[state].insts {
66 cur.add(ip);
67 }
68 self.dfa.run(cur, next, byte);
69 let next_state = self.cached_state(next);
70 self.dfa.states[state].next[byte as usize] = next_state;
71 next_state
72 }
73
74 fn cached_state(&mut self, set: &SparseSet) -> Option<usize> {
75 use std::collections::hash_map::Entry;
76 use crate::Inst::*;
77
78 let mut insts = vec![];
81 let mut is_match = false;
82 for i in 0..set.len() {
83 let ip = set.get(i);
84 match self.dfa.insts[ip] {
85 Jump(_) | Split(_, _) => {}
86 Range(_, _) => insts.push(ip),
87 Match => {
88 is_match = true;
89 insts.push(ip);
90 }
91 }
92 }
93 if insts.len() == 0 {
94 return None;
95 }
96 Some(match self.cache.entry(insts.clone()) {
97 Entry::Occupied(v) => *v.get(),
98 Entry::Vacant(v) => {
99 self.dfa.states.push(State {
100 insts: insts,
101 next: [None; 256],
102 is_match: is_match,
103 });
104 *v.insert(self.dfa.states.len() - 1)
105 }
106 })
107 }
108}
109
110impl Dfa {
111 pub fn is_match(&self, si: usize) -> bool {
112 self.states[si].is_match
113 }
114
115 pub fn accept(&self, si: usize, byte: u8) -> Option<usize> {
116 self.states[si].next[byte as usize]
117 }
118
119 fn add(&self, set: &mut SparseSet, ip: usize) {
120 use crate::Inst::*;
121
122 if set.contains(ip) {
123 return;
124 }
125 set.add(ip);
126 match self.insts[ip] {
127 Match | Range(_, _) => {}
128 Jump(ip) => self.add(set, ip),
129 Split(ip1, ip2) => {
130 self.add(set, ip1);
131 self.add(set, ip2);
132 }
133 }
134 }
135
136 fn run(&self, from: &SparseSet, to: &mut SparseSet, byte: u8) -> bool {
137 use crate::Inst::*;
138 to.clear();
139 let mut is_match = false;
140 for i in 0..from.len() {
141 let ip = from.get(i);
142 match self.insts[ip] {
143 Jump(_) | Split(_, _) => {}
144 Match => is_match = true,
145 Range(s, e) => {
146 if s <= byte && byte <= e {
147 self.add(to, ip + 1);
148 }
149 }
150 }
151 }
152 is_match
153 }
154}
155
156impl fmt::Debug for Dfa {
157 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
158 for (i, inst) in self.insts.iter().enumerate() {
159 writeln!(f, "{:03} {:?}", i, inst)?;
160 }
161 writeln!(f, "------------")?;
162 for (i, state) in self.states.iter().enumerate() {
163 if state.is_match {
164 writeln!(f, "{:03}* {:?}", i, state.insts)?;
165 } else {
166 writeln!(f, "{:03} {:?}", i, state.insts)?;
167 }
168 for j in 0..256 {
169 if let Some(si) = state.next[j] {
170 writeln!(f, "{:03} {:X} => {}", i, j, si)?;
171 }
172 }
173 }
174 Ok(())
175 }
176}