1use crate::{ast::{self, Match}, lexer};
2
3const NULL: usize = usize::MAX;
4pub struct NFA {
5 pub ncount: usize,
6 pub jumps: Vec<[usize; u8::MAX as usize]>,
7 pub eps: Vec<Vec<usize>>,
8 pub accepts: Vec<usize>,
9 pub labels: Vec<String>
10}
11impl NFA {
12 pub fn new() -> Self {
13 return NFA {
14 ncount: 0,
15 jumps: Vec::new(),
16 eps: Vec::new(),
17 accepts: Vec::new(),
18 labels: Vec::new()
19 };
20 }
21
22 pub fn build_from_matches(matches: &Vec<ast::Match>) -> Self {
23 let mut nfa = NFA::new();
24 let root = nfa.make_node();
25 for m in matches {
26 if m.name.len() == 0 {
27 nfa.handle_whitespace(&m.root);
28 } else {
29 let node = NFA::build_ast(&mut nfa, m);
30 nfa.add_eps(root,node);
31 }
32 }
33 return nfa;
34 }
35
36 fn handle_whitespace(&mut self, ast: &ast::Node) {
37 match ast {
38 ast::Node::BinaryExpr(_) => {
39 let mut stk: Vec<&ast::Node> = vec![ast; 1];
40 while let Some(state) = stk.pop() {
41 match state {
42 ast::Node::BinaryExpr(node) => {
43 if let lexer::Op::BAR = node.op {
44 stk.push(&node.left);
45 stk.push(&node.right);
46 } else {
47 panic!("You used something besides or in whitespace!");
48 }
49 },
50 ast::Node::Char(c) => self.add(0, 0, *c),
51 _ => panic!("Neither Char no Or in whitespace!")
52 }
53 }
54 },
55 ast::Node::Char(c) => self.add(0, 0, *c),
56 _ => panic!("White Space should be single or-seperated tokens!")
57 }
58 }
59
60 fn build_ast(nfa: &mut NFA, m: &Match) -> usize {
61 let (start, end) = nfa.build(&m.root);
62 nfa.label(end, m.name.clone());
63 return start;
64 }
65
66
67 fn build(&mut self, ast: &ast::Node) -> (usize, usize) {
68 return match ast {
69 ast::Node::BinaryExpr(node) => {
70 let left = self.build(&node.left);
71 let right = self.build(&node.right);
72 match node.op {
73 lexer::Op::BAR => self.handle_bar(left, right),
74 lexer::Op::DASH => self.handle_dash(node.left.char(), node.right.char()),
75 lexer::Op::AND => self.handle_add(left, right),
76 _ => panic!("Expected Binary Op but got {:?}", node.op)
77 }
78 },
79 ast::Node::UnaryExpr(node) => {
80 let child = self.build(&node.child);
81 match node.op {
82 lexer::Op::STAR => self.handle_star(child),
83 lexer::Op::PLUS => self.handle_plus(child),
84 lexer::Op::QUESTION => self.handle_question(child),
85 _ => panic!("Expected Unary Op but got {:?}", node.op)
86 }
87 },
88 ast::Node::Char(c) => self.handle_char(*c)
89 }
90 }
91
92 fn handle_bar(&mut self, left: (usize, usize),
93 right: (usize, usize)) -> (usize, usize) {
94 let i = self.make_node();
95 let f = self.make_node();
96 self.add_eps(i, left.0);
97 self.add_eps(i, right.0);
98 self.add_eps(left.1, f);
99 self.add_eps(right.1, f);
100 return (i, f);
101 }
102
103 fn handle_dash(&mut self, start: char, end: char) -> (usize, usize) {
104 let i = self.make_node();
105 let f = self.make_node();
106 for c in start..=end {
107 self.add(i, f, c);
108 }
109 return (i, f);
110 }
111
112 fn handle_add(&mut self, left: (usize, usize), right: (usize, usize))
113 -> (usize, usize) {
114 let (_, lf) = left;
115 let (ri, _) = right;
116 self.swap(lf, ri);
117 return (left.0, right.1);
118 }
119
120 fn handle_question(&mut self, child: (usize, usize)) -> (usize, usize) {
121 let (start, end) = child;
122 let i = self.make_node();
123 let f = self.make_node();
124 self.add_eps(i, start);
125 self.add_eps(i, f);
126 self.add_eps(end, f);
127 return (i, f);
128 }
129
130 fn handle_plus(&mut self, child: (usize, usize)) -> (usize, usize) {
131 let (start, end) = child;
132 let i = self.make_node();
133 let f = self.make_node();
134 self.add_eps(i, start);
135 self.add_eps(end, start);
136 self.add_eps(end, f);
137 return (i, f);
138 }
139
140 fn handle_star(&mut self, child: (usize, usize)) -> (usize, usize) {
141 let (start, end) = child;
142 let i = self.make_node();
143 let f = self.make_node();
144 self.add_eps(i, start);
145 self.add_eps(i, f);
146 self.add_eps(end, start);
147 self.add_eps(end, f);
148 return (i, f);
149 }
150
151 fn handle_char(&mut self, c: char) -> (usize, usize) {
152 let i = self.make_node();
153 let f = self.make_node();
154 self.add(i, f, c);
155 return (i, f);
156 }
157
158 fn label(&mut self, i: usize, label: String) {
159 self.labels.push(label);
160 self.accepts[i] = self.labels.len();
161 }
162
163 fn swap(&mut self, i: usize, j: usize) {
164 self.jumps.swap(i, j);
165 self.eps.swap(i, j);
166 }
167
168 fn add_eps(&mut self, i: usize, f: usize) {
169 self.eps[i].push(f);
170 }
171
172 fn add(&mut self, i: usize, f: usize, c: char) {
173 self.jumps[i][c as usize] = f;
174 }
175
176 fn make_node(&mut self) -> usize {
177 self.ncount += 1;
178 self.jumps.push([NULL; u8::MAX as usize]);
179 self.eps.push(Vec::new());
180 self.accepts.push(0);
181 return self.ncount - 1;
182 }
183
184 #[cfg(debug_assertions)]
186 #[allow(dead_code)]
187 pub fn print_dot(&self) {
188 println!("digraph NFA {{");
189 for state in 0..self.ncount {
190 let mut ind = 0;
191 while ind < u8::MAX {
192 let nbr = self.jumps[state][ind as usize];
193 if nbr == NULL { ind += 1; continue };
194
195 let start = ind;
196 while ind + 1 < u8::MAX &&
197 self.jumps[state][(ind + 1) as usize] == nbr {
198 ind += 1;
199 }
200
201 if start == ind {
202 println!("\t{} -> {} [label=\"{}\"];",
203 state, nbr, (ind as char).escape_debug()
204 );
205 } else {
206 println!("\t{} -> {} [label=\"{}\"];",
207 state, nbr,
208 format!("{}-{}", (start as char).escape_debug(),
209 (ind as char).escape_debug()
210 )
211 );
212 }
213 ind += 1;
214 }
215 for nbr in &self.eps[state] {
216 println!("\t{} -> {} [label=\"eps\"];",
217 state, *nbr
218 );
219 }
220 }
221 println!("}}");
222 }
223}
224
225#[cfg(test)]
226mod tests {
227 use std::{path::Path, fs::File, io::{BufReader, BufRead}};
228 use crate::{lexer::Lexer, parser::Parser};
229
230 use super::*;
231 impl NFA {
232 fn accepts(&self, s: &str) -> bool {
233 let mut states: Vec<usize> = self.eps_closure(vec![0; 1]);
234 for c in s.chars() {
235 let mut has = vec![false; self.ncount];
236 let mut mv: Vec<usize> = Vec::new();
237 for s in &states {
238 let nxt = self.jumps[*s][c as usize];
239 if nxt != NULL && !has[nxt] {
240 has[nxt] = true;
241 mv.push(nxt);
242 }
243 }
244 states = self.eps_closure(mv);
245 }
246
247 for state in states {
248 if self.accepts[state] != 0 {
249 return true;
250 }
251 }
252 return false;
253 }
254
255 fn eps_closure(&self, set: Vec<usize>) -> Vec<usize> {
256 let mut has = vec![false; self.ncount];
257 let mut closure: Vec<usize> = set;
258 let mut stack: Vec<usize> = Vec::new();
259 for s in &closure {
260 stack.push(*s);
261 has[*s] = true;
262 }
263 while let Some(s) = stack.pop() {
264 for nbr in &self.eps[s] {
265 if has[*nbr] { continue; }
266 has[*nbr] = true;
267 closure.push(*nbr);
268 stack.push(*nbr);
269 }
270 }
271 return closure;
272 }
273 }
274
275 #[test]
276 fn test_matches() {
277 let path = "tests/data/regex/input";
278 let mut i = 0;
279 while Path::new(&format!("{path}/match-{i}.txt")).exists() {
280 println!("{}", format!("{path}/match-{i}.txt"));
281 let lexer = Lexer::new(&format!("{path}/match-{i}.txt")).expect("Invalid Path");
282 let mut parser = Parser::new(lexer).expect("File should be non-empty!");
283 let nfa = NFA::build_from_matches(&parser.parse().expect("Invalid parse"));
284 for id in ["right", "wrong"] {
286 let file = File::open(&format!("{path}/{id}-words-{i}.txt"))
287 .expect("Should be valid...");
288 let reader = BufReader::new(file);
289 for line in reader.lines() {
290 if let Ok(word) = line {
291 assert!(nfa.accepts(&word) == (id == "right"));
292 }
293 }
294 }
295 i += 1;
296 }
297 }
298}