rflex_lib/
dfa.rs

1use crate::nfa::{NFA};
2pub(crate) const NULL: usize = usize::MAX;
3pub struct DFA {
4    pub ncount:  usize,
5    pub jumps:   Vec<[usize; u8::MAX as usize]>,
6    pub accepts: Vec<usize>,
7    pub labels:  Vec<String>,
8    pub dead:    usize
9}
10
11#[allow(dead_code)]
12impl DFA {
13    pub fn new() -> Self {
14        return DFA {
15            ncount:  0,
16            jumps:   Vec::new(),
17            accepts: Vec::new(),
18            labels:  Vec::new(),
19            dead:    0
20        };
21    }
22
23    // TODO: Hycroft algorithm (guaranteed nlog(n))
24    // https://www.cs.cornell.edu/courses/cs2800/2013fa/Handouts/minimization.pdf
25    pub fn compress(dfa: DFA) -> Self {
26        let mut table = vec![vec![false; dfa.ncount]; dfa.ncount];
27        for i in 0..table.len() {
28            for j in 0..=i {
29                table[i][j] = dfa.accepts[i] != dfa.accepts[j];
30            }
31        }
32
33        let mut changed = true;
34        while changed {
35            changed = false;
36            for idx in 0..u8::MAX as usize {
37                for i in 0..table.len() {
38                    for j in 0..=i {
39                        if table[i][j] { continue; }
40                        let mut a = dfa.jumps[i][idx];
41                        let mut b = dfa.jumps[j][idx];
42                        if b > a { std::mem::swap(&mut a, &mut b); }
43                        if table[a][b] { 
44                            table[i][j] = true; 
45                            changed = true;
46                        }
47                    }
48                }
49            }
50        }
51        let mut reps: Vec<usize> = Vec::new();
52        let mut id = vec![NULL; dfa.ncount];
53        for i in 0..table.len() {
54            for j in 0..i {
55                if !table[i][j] {
56                    id[i] = j;
57                    break;
58                }
59            }
60            if id[i] != NULL { continue; }
61            id[i] = reps.len();
62            reps.push(i);
63        }
64
65        let mut accepts = vec![0; reps.len()];
66        let mut jumps = vec![[NULL; u8::MAX as usize]; reps.len()];
67        for rep in &reps {
68            for i in 0..u8::MAX as usize {
69                jumps[id[*rep]][i] = id[dfa.jumps[*rep][i]];
70            }
71            accepts[id[*rep]] = dfa.accepts[*rep];
72        }
73        return Self {
74            ncount: reps.len(),
75            jumps,
76            accepts,
77            labels: dfa.labels.clone(),
78            dead: id[dfa.dead]
79        };
80    }
81
82    pub fn subset_construction(nfa: NFA) -> Self {
83        let mut ncount:  usize = 1;
84        let mut jumps = vec![[NULL; u8::MAX as usize]; 1];
85        let mut accepts: Vec<usize> = vec![0; 1];
86        let mut unmarked = vec![0usize; 1];
87        let mut d_states: Vec<Vec<usize>> = vec![
88            DFA::eps_closure(&nfa, vec![0; 1]); 1
89        ];
90        let mut dead = NULL;
91
92        while let Some(index) = unmarked.pop() {
93            //println!("index: {}", index);
94            for c in 0..u8::MAX {
95                // MOVE
96                let mut has = vec![false; nfa.ncount];
97                let mut nxt: Vec<usize> = Vec::new();
98                
99                for d in &d_states[index] {
100                    let mv = nfa.jumps[*d][c as usize];
101                    if mv == NULL || has[mv] { continue; }
102                    nxt.push(mv);
103                    has[mv] = true;
104                }
105
106                //if index == 6 { println!("Len: {}", nxt.len()); }
107                let state = DFA::eps_closure(&nfa,nxt);
108
109                // Seen Before?
110                let mut u = d_states.len();
111                for i in 0..d_states.len() {
112                    if d_states[i] == state {
113                        u = i;
114                        break;
115                    }
116                }
117                //println!("u: {}", u);
118                if u == d_states.len() {
119                    if state == Vec::new() { dead = u; }
120                    d_states.push(state.clone());
121                    jumps.push([NULL; u8::MAX as usize]);
122                    accepts.push(DFA::is_accept(&nfa, state));
123                    unmarked.push(u);
124                    ncount += 1;
125                }
126                jumps[index][c as usize] = u;
127            }
128        }
129        assert!(dead != NULL, "Dead state must exist!");
130        return Self {
131            ncount,
132            jumps,
133            accepts,
134            labels: nfa.labels.clone(),
135            dead
136        }
137    }
138
139    fn eps_closure(nfa: &NFA, set: Vec<usize>) -> Vec<usize> {
140        let mut has = vec![false; nfa.ncount];
141        let mut closure: Vec<usize> = set;
142        let mut stack: Vec<usize> = Vec::new();
143        for s in &closure { 
144            has[*s] = true;
145            stack.push(*s); 
146        }
147        while let Some(s) = stack.pop() {
148            for nbr in &nfa.eps[s] {
149                if has[*nbr] { continue; }
150                has[*nbr] = true;
151                closure.push(*nbr);
152                stack.push(*nbr);
153            }
154        } 
155        return closure;
156    }
157
158    fn is_accept(nfa: &NFA, set: Vec<usize>) -> usize {
159        for s in set {
160            let acc = nfa.accepts[s];
161            if acc != 0 { return acc; }
162        }
163        return 0;
164    }
165
166    #[cfg(debug_assertions)]
167    #[allow(dead_code)]
168    pub fn print_dot(&self) {
169        println!("digraph DFA {{");
170        for state in 0..self.ncount {
171            let mut ind = 0;
172            while ind < u8::MAX {
173                let nbr = self.jumps[state][ind as usize];
174                if nbr == NULL { ind += 1; continue };
175
176                let start = ind;
177                while ind + 1 < u8::MAX &&
178                    self.jumps[state][(ind + 1) as usize] == nbr {
179                    ind += 1;
180                }
181
182                if start == ind {
183                    println!("\t{} -> {} [label=\"{}\"];",
184                        state, nbr, (ind as char).escape_debug()
185                    );
186                } else {
187                    println!("\t{} -> {} [label=\"{}\"];",
188                        state, nbr,
189                        format!("{}-{}", (start as char).escape_debug(), 
190                            (ind as char).escape_debug()
191                        )
192                    );
193                }
194                ind += 1;
195            }
196        }
197        println!("}}");
198    }
199}
200
201#[cfg(test)]
202mod tests {
203    use std::{path::Path, fs::File, io::{BufReader, BufRead}};
204    use crate::{lexer::Lexer, parser::Parser, nfa::NFA};
205    use super::*;
206
207    impl DFA {
208        fn accepts(&self, s: &str) -> bool {
209            let mut state = 0;
210            let chars = s.chars();
211            for c in chars {
212                let nxt = self.jumps[state][c as usize];
213                if nxt == NULL { return false; }
214                state = nxt;
215            }
216            return self.accepts[state] != 0;
217        }
218    }
219
220    #[test]
221    fn test_matches_uncompressed() {
222        let path = "tests/data/regex/input";
223        let mut i = 0;
224        while Path::new(&format!("{path}/match-{i}.txt")).exists() {
225            println!("{}", format!("{path}/match-{i}.txt"));
226            let lexer = Lexer::new(&format!("{path}/match-{i}.txt")).expect("Invalid Path");
227            let mut parser = Parser::new(lexer).expect("File should be non-empty!");
228            let nfa = NFA::build_from_matches(&parser.parse().expect("Invalid parse"));
229            let dfa = DFA::subset_construction(nfa);
230
231            // dfa.print_dot();
232            for id in ["right", "wrong"] {
233                let file = File::open(&format!("{path}/{id}-words-{i}.txt"))
234                    .expect("Should be valid...");
235                let reader = BufReader::new(file);
236                for line in reader.lines() {
237                    if let Ok(word) = line {
238                        assert!(dfa.accepts(&word) == (id == "right"));
239                    }
240                }
241            }
242            i += 1;
243        }
244    }
245
246    #[test]
247    fn test_matches_compressed() {
248        let path = "tests/data/regex/input";
249        let mut i = 0;
250        while Path::new(&format!("{path}/match-{i}.txt")).exists() {
251            println!("{}", format!("{path}/match-{i}.txt"));
252            let lexer = Lexer::new(&format!("{path}/match-{i}.txt")).expect("Invalid Path");
253            let mut parser = Parser::new(lexer).expect("File should be non-empty!");
254            let nfa = NFA::build_from_matches(&parser.parse().expect("Invalid parse"));
255            let dfa = DFA::compress(DFA::subset_construction(nfa));
256            for id in ["right", "wrong"] {
257                let file = File::open(&format!("{path}/{id}-words-{i}.txt"))
258                    .expect("Should be valid...");
259                let reader = BufReader::new(file);
260                for line in reader.lines() {
261                    if let Ok(word) = line {
262                        assert!(dfa.accepts(&word) == (id == "right"));
263                    }
264                }
265            }
266            i += 1;
267        }
268    }
269}