1use crate::nfa::{NFA};
2pub(crate) const NULL: usize = usize::MAX;
3pub struct DFA {
4 pub ncount: usize,
5 pub jumps: Vec<[usize; u8::MAX as usize]>,
6 pub accepts: Vec<usize>,
7 pub labels: Vec<String>,
8 pub dead: usize
9}
10
11#[allow(dead_code)]
12impl DFA {
13 pub fn new() -> Self {
14 return DFA {
15 ncount: 0,
16 jumps: Vec::new(),
17 accepts: Vec::new(),
18 labels: Vec::new(),
19 dead: 0
20 };
21 }
22
23 pub fn compress(dfa: DFA) -> Self {
26 let mut table = vec![vec![false; dfa.ncount]; dfa.ncount];
27 for i in 0..table.len() {
28 for j in 0..=i {
29 table[i][j] = dfa.accepts[i] != dfa.accepts[j];
30 }
31 }
32
33 let mut changed = true;
34 while changed {
35 changed = false;
36 for idx in 0..u8::MAX as usize {
37 for i in 0..table.len() {
38 for j in 0..=i {
39 if table[i][j] { continue; }
40 let mut a = dfa.jumps[i][idx];
41 let mut b = dfa.jumps[j][idx];
42 if b > a { std::mem::swap(&mut a, &mut b); }
43 if table[a][b] {
44 table[i][j] = true;
45 changed = true;
46 }
47 }
48 }
49 }
50 }
51 let mut reps: Vec<usize> = Vec::new();
52 let mut id = vec![NULL; dfa.ncount];
53 for i in 0..table.len() {
54 for j in 0..i {
55 if !table[i][j] {
56 id[i] = j;
57 break;
58 }
59 }
60 if id[i] != NULL { continue; }
61 id[i] = reps.len();
62 reps.push(i);
63 }
64
65 let mut accepts = vec![0; reps.len()];
66 let mut jumps = vec![[NULL; u8::MAX as usize]; reps.len()];
67 for rep in &reps {
68 for i in 0..u8::MAX as usize {
69 jumps[id[*rep]][i] = id[dfa.jumps[*rep][i]];
70 }
71 accepts[id[*rep]] = dfa.accepts[*rep];
72 }
73 return Self {
74 ncount: reps.len(),
75 jumps,
76 accepts,
77 labels: dfa.labels.clone(),
78 dead: id[dfa.dead]
79 };
80 }
81
82 pub fn subset_construction(nfa: NFA) -> Self {
83 let mut ncount: usize = 1;
84 let mut jumps = vec![[NULL; u8::MAX as usize]; 1];
85 let mut accepts: Vec<usize> = vec![0; 1];
86 let mut unmarked = vec![0usize; 1];
87 let mut d_states: Vec<Vec<usize>> = vec![
88 DFA::eps_closure(&nfa, vec![0; 1]); 1
89 ];
90 let mut dead = NULL;
91
92 while let Some(index) = unmarked.pop() {
93 for c in 0..u8::MAX {
95 let mut has = vec![false; nfa.ncount];
97 let mut nxt: Vec<usize> = Vec::new();
98
99 for d in &d_states[index] {
100 let mv = nfa.jumps[*d][c as usize];
101 if mv == NULL || has[mv] { continue; }
102 nxt.push(mv);
103 has[mv] = true;
104 }
105
106 let state = DFA::eps_closure(&nfa,nxt);
108
109 let mut u = d_states.len();
111 for i in 0..d_states.len() {
112 if d_states[i] == state {
113 u = i;
114 break;
115 }
116 }
117 if u == d_states.len() {
119 if state == Vec::new() { dead = u; }
120 d_states.push(state.clone());
121 jumps.push([NULL; u8::MAX as usize]);
122 accepts.push(DFA::is_accept(&nfa, state));
123 unmarked.push(u);
124 ncount += 1;
125 }
126 jumps[index][c as usize] = u;
127 }
128 }
129 assert!(dead != NULL, "Dead state must exist!");
130 return Self {
131 ncount,
132 jumps,
133 accepts,
134 labels: nfa.labels.clone(),
135 dead
136 }
137 }
138
139 fn eps_closure(nfa: &NFA, set: Vec<usize>) -> Vec<usize> {
140 let mut has = vec![false; nfa.ncount];
141 let mut closure: Vec<usize> = set;
142 let mut stack: Vec<usize> = Vec::new();
143 for s in &closure {
144 has[*s] = true;
145 stack.push(*s);
146 }
147 while let Some(s) = stack.pop() {
148 for nbr in &nfa.eps[s] {
149 if has[*nbr] { continue; }
150 has[*nbr] = true;
151 closure.push(*nbr);
152 stack.push(*nbr);
153 }
154 }
155 return closure;
156 }
157
158 fn is_accept(nfa: &NFA, set: Vec<usize>) -> usize {
159 for s in set {
160 let acc = nfa.accepts[s];
161 if acc != 0 { return acc; }
162 }
163 return 0;
164 }
165
166 #[cfg(debug_assertions)]
167 #[allow(dead_code)]
168 pub fn print_dot(&self) {
169 println!("digraph DFA {{");
170 for state in 0..self.ncount {
171 let mut ind = 0;
172 while ind < u8::MAX {
173 let nbr = self.jumps[state][ind as usize];
174 if nbr == NULL { ind += 1; continue };
175
176 let start = ind;
177 while ind + 1 < u8::MAX &&
178 self.jumps[state][(ind + 1) as usize] == nbr {
179 ind += 1;
180 }
181
182 if start == ind {
183 println!("\t{} -> {} [label=\"{}\"];",
184 state, nbr, (ind as char).escape_debug()
185 );
186 } else {
187 println!("\t{} -> {} [label=\"{}\"];",
188 state, nbr,
189 format!("{}-{}", (start as char).escape_debug(),
190 (ind as char).escape_debug()
191 )
192 );
193 }
194 ind += 1;
195 }
196 }
197 println!("}}");
198 }
199}
200
201#[cfg(test)]
202mod tests {
203 use std::{path::Path, fs::File, io::{BufReader, BufRead}};
204 use crate::{lexer::Lexer, parser::Parser, nfa::NFA};
205 use super::*;
206
207 impl DFA {
208 fn accepts(&self, s: &str) -> bool {
209 let mut state = 0;
210 let chars = s.chars();
211 for c in chars {
212 let nxt = self.jumps[state][c as usize];
213 if nxt == NULL { return false; }
214 state = nxt;
215 }
216 return self.accepts[state] != 0;
217 }
218 }
219
220 #[test]
221 fn test_matches_uncompressed() {
222 let path = "tests/data/regex/input";
223 let mut i = 0;
224 while Path::new(&format!("{path}/match-{i}.txt")).exists() {
225 println!("{}", format!("{path}/match-{i}.txt"));
226 let lexer = Lexer::new(&format!("{path}/match-{i}.txt")).expect("Invalid Path");
227 let mut parser = Parser::new(lexer).expect("File should be non-empty!");
228 let nfa = NFA::build_from_matches(&parser.parse().expect("Invalid parse"));
229 let dfa = DFA::subset_construction(nfa);
230
231 for id in ["right", "wrong"] {
233 let file = File::open(&format!("{path}/{id}-words-{i}.txt"))
234 .expect("Should be valid...");
235 let reader = BufReader::new(file);
236 for line in reader.lines() {
237 if let Ok(word) = line {
238 assert!(dfa.accepts(&word) == (id == "right"));
239 }
240 }
241 }
242 i += 1;
243 }
244 }
245
246 #[test]
247 fn test_matches_compressed() {
248 let path = "tests/data/regex/input";
249 let mut i = 0;
250 while Path::new(&format!("{path}/match-{i}.txt")).exists() {
251 println!("{}", format!("{path}/match-{i}.txt"));
252 let lexer = Lexer::new(&format!("{path}/match-{i}.txt")).expect("Invalid Path");
253 let mut parser = Parser::new(lexer).expect("File should be non-empty!");
254 let nfa = NFA::build_from_matches(&parser.parse().expect("Invalid parse"));
255 let dfa = DFA::compress(DFA::subset_construction(nfa));
256 for id in ["right", "wrong"] {
257 let file = File::open(&format!("{path}/{id}-words-{i}.txt"))
258 .expect("Should be valid...");
259 let reader = BufReader::new(file);
260 for line in reader.lines() {
261 if let Ok(word) = line {
262 assert!(dfa.accepts(&word) == (id == "right"));
263 }
264 }
265 }
266 i += 1;
267 }
268 }
269}