rflex_lib/
nfa.rs

1use crate::{ast::{self, Match}, lexer};
2
3const NULL: usize = usize::MAX;
4pub struct NFA { 
5    pub ncount:  usize,
6    pub jumps:   Vec<[usize; u8::MAX as usize]>,
7    pub eps:     Vec<Vec<usize>>,
8    pub accepts: Vec<usize>,
9    pub labels:  Vec<String>
10}
11impl NFA {
12    pub fn new() -> Self {
13        return NFA { 
14            ncount:  0,
15            jumps:   Vec::new(),
16            eps:     Vec::new(),
17            accepts: Vec::new(),
18            labels:  Vec::new()
19        };
20    }
21
22    pub fn build_from_matches(matches: &Vec<ast::Match>) -> Self {
23        let mut nfa = NFA::new();
24        let root = nfa.make_node();
25        for m in matches {
26            if m.name.len() == 0 {
27                nfa.handle_whitespace(&m.root);
28            } else {
29                let node = NFA::build_ast(&mut nfa, m);
30                nfa.add_eps(root,node);
31            }
32        }
33        return nfa;
34    }
35
36    fn handle_whitespace(&mut self, ast: &ast::Node) {
37        match ast {
38            ast::Node::BinaryExpr(_) => {
39                let mut stk: Vec<&ast::Node> = vec![ast; 1];
40                while let Some(state) = stk.pop() {
41                    match state {
42                        ast::Node::BinaryExpr(node) => {
43                            if let lexer::Op::BAR = node.op {
44                                stk.push(&node.left);
45                                stk.push(&node.right);
46                            } else {
47                                panic!("You used something besides or in whitespace!");
48                            }
49                        },
50                        ast::Node::Char(c) => self.add(0, 0, *c),
51                        _ => panic!("Neither Char no Or in whitespace!")
52                    }
53                }
54            },
55            ast::Node::Char(c) => self.add(0, 0, *c),
56            _ => panic!("White Space should be single or-seperated tokens!")
57        }
58    }
59
60    fn build_ast(nfa: &mut NFA, m: &Match) -> usize {
61        let (start, end) = nfa.build(&m.root);
62        nfa.label(end, m.name.clone());
63        return start;
64    }
65
66
67    fn build(&mut self, ast: &ast::Node) -> (usize, usize) {
68        return match ast {
69            ast::Node::BinaryExpr(node) => {
70                let left = self.build(&node.left);
71                let right = self.build(&node.right);
72                match node.op {
73                    lexer::Op::BAR  => self.handle_bar(left, right),
74                    lexer::Op::DASH => self.handle_dash(node.left.char(), node.right.char()),
75                    lexer::Op::AND  => self.handle_add(left, right),
76                    _ => panic!("Expected Binary Op but got {:?}", node.op)
77                }
78            },
79            ast::Node::UnaryExpr(node) => {
80                let child = self.build(&node.child);
81                match node.op {
82                    lexer::Op::STAR     => self.handle_star(child),
83                    lexer::Op::PLUS     => self.handle_plus(child),
84                    lexer::Op::QUESTION => self.handle_question(child),
85                    _ => panic!("Expected Unary Op but got {:?}", node.op)
86                }
87            },
88            ast::Node::Char(c) => self.handle_char(*c)
89        }
90    }
91
92    fn handle_bar(&mut self, left: (usize, usize),
93        right: (usize, usize)) -> (usize, usize) {
94        let i = self.make_node();
95        let f = self.make_node();
96        self.add_eps(i, left.0);
97        self.add_eps(i, right.0);
98        self.add_eps(left.1, f);
99        self.add_eps(right.1, f);
100        return (i, f);
101    }
102
103    fn handle_dash(&mut self, start: char, end: char) -> (usize, usize) {
104        let i = self.make_node();
105        let f = self.make_node();
106        for c in start..=end { 
107            self.add(i, f, c);
108        }
109        return (i, f);
110    }
111
112    fn handle_add(&mut self, left: (usize, usize), right: (usize, usize)) 
113        -> (usize, usize) {
114        let (_, lf) = left;
115        let (ri, _) = right;
116        self.swap(lf, ri);
117        return (left.0, right.1);
118    }
119
120    fn handle_question(&mut self, child: (usize, usize)) -> (usize, usize) {
121        let (start, end) = child;
122        let i = self.make_node();
123        let f = self.make_node();
124        self.add_eps(i, start);
125        self.add_eps(i, f);
126        self.add_eps(end, f);
127        return (i, f);
128    }
129
130    fn handle_plus(&mut self, child: (usize, usize)) -> (usize, usize) {
131        let (start, end) = child;
132        let i = self.make_node();
133        let f = self.make_node();
134        self.add_eps(i, start);
135        self.add_eps(end, start);
136        self.add_eps(end, f);
137        return (i, f);
138    }
139
140    fn handle_star(&mut self, child: (usize, usize)) -> (usize, usize) {
141        let (start, end) = child;
142        let i = self.make_node();
143        let f = self.make_node();
144        self.add_eps(i, start);
145        self.add_eps(i, f);
146        self.add_eps(end, start);
147        self.add_eps(end, f);
148        return (i, f);
149    }
150
151    fn handle_char(&mut self, c: char) -> (usize, usize) {
152        let i = self.make_node();
153        let f = self.make_node();
154        self.add(i, f, c);
155        return (i, f);
156    }
157
158    fn label(&mut self, i: usize, label: String) {
159        self.labels.push(label);
160        self.accepts[i] = self.labels.len();
161    }
162
163    fn swap(&mut self, i: usize, j: usize) {
164        self.jumps.swap(i, j);
165        self.eps.swap(i, j);
166    }
167
168    fn add_eps(&mut self, i: usize, f: usize) {
169        self.eps[i].push(f);
170    }
171
172    fn add(&mut self, i: usize, f: usize, c: char) {
173        self.jumps[i][c as usize] = f;
174    }
175
176    fn make_node(&mut self) -> usize {
177        self.ncount += 1;
178        self.jumps.push([NULL; u8::MAX as usize]);
179        self.eps.push(Vec::new());
180        self.accepts.push(0);
181        return self.ncount - 1;
182    }
183
184    // #[cfg(test)]
185    #[cfg(debug_assertions)]
186    #[allow(dead_code)]
187    pub fn print_dot(&self) {
188        println!("digraph NFA {{");
189        for state in 0..self.ncount {
190            let mut ind = 0;
191            while ind < u8::MAX {
192                let nbr = self.jumps[state][ind as usize];
193                if nbr == NULL { ind += 1; continue };
194
195                let start = ind;
196                while ind + 1 < u8::MAX &&
197                    self.jumps[state][(ind + 1) as usize] == nbr {
198                    ind += 1;
199                }
200
201                if start == ind {
202                    println!("\t{} -> {} [label=\"{}\"];",
203                        state, nbr, (ind as char).escape_debug()
204                    );
205                } else {
206                    println!("\t{} -> {} [label=\"{}\"];",
207                        state, nbr,
208                        format!("{}-{}", (start as char).escape_debug(),
209                            (ind as char).escape_debug()
210                        )
211                    );
212                }
213                ind += 1;
214            }
215            for nbr in &self.eps[state] {
216                println!("\t{} -> {} [label=\"eps\"];",
217                    state, *nbr
218                );
219            }
220        }
221        println!("}}");
222    }
223}
224
225#[cfg(test)]
226mod tests {
227    use std::{path::Path, fs::File, io::{BufReader, BufRead}};
228    use crate::{lexer::Lexer, parser::Parser};
229
230    use super::*;
231    impl NFA {
232        fn accepts(&self, s: &str) -> bool {
233            let mut states: Vec<usize> = self.eps_closure(vec![0; 1]);
234            for c in s.chars() {
235                let mut has = vec![false; self.ncount];
236                let mut mv: Vec<usize> = Vec::new();
237                for s in &states {
238                    let nxt = self.jumps[*s][c as usize];
239                    if nxt != NULL && !has[nxt] { 
240                        has[nxt] = true;
241                        mv.push(nxt); 
242                    } 
243                }
244                states = self.eps_closure(mv);
245            }
246
247            for state in states {
248                if self.accepts[state] != 0 { 
249                    return true; 
250                }
251            }
252            return false;
253        }
254
255        fn eps_closure(&self, set: Vec<usize>) -> Vec<usize> {
256            let mut has = vec![false; self.ncount];
257            let mut closure: Vec<usize> = set;
258            let mut stack: Vec<usize> = Vec::new();
259            for s in &closure { 
260                stack.push(*s); 
261                has[*s] = true;
262            }
263            while let Some(s) = stack.pop() {
264                for nbr in &self.eps[s] {
265                    if has[*nbr] { continue; }
266                    has[*nbr] = true;
267                    closure.push(*nbr);
268                    stack.push(*nbr);
269                }
270            }
271            return closure;
272        }
273    }
274
275    #[test]
276    fn test_matches() {
277        let path = "tests/data/regex/input";
278        let mut i = 0;
279        while Path::new(&format!("{path}/match-{i}.txt")).exists() {
280            println!("{}", format!("{path}/match-{i}.txt"));
281            let lexer = Lexer::new(&format!("{path}/match-{i}.txt")).expect("Invalid Path");
282            let mut parser = Parser::new(lexer).expect("File should be non-empty!");
283            let nfa = NFA::build_from_matches(&parser.parse().expect("Invalid parse"));
284            // nfa.print_dot();
285            for id in ["right", "wrong"] {
286                let file = File::open(&format!("{path}/{id}-words-{i}.txt"))
287                    .expect("Should be valid...");
288                let reader = BufReader::new(file);
289                for line in reader.lines() {
290                    if let Ok(word) = line {
291                        assert!(nfa.accepts(&word) == (id == "right"));
292                    }
293                }
294            }
295            i += 1;
296        }
297    }
298}