1use quote::{quote, ToTokens};
2
3use crate::parse_tree::Atom;
7use crate::simplified_tree::SimplifiedTreeNode;
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub enum EpsilonType {
11 None,
12 StartAnchor,
13 EndAnchor,
14 StartCapture(usize),
15 EndCapture(usize),
16}
17impl ToTokens for EpsilonType {
18 fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) {
19 match self {
20 EpsilonType::None => {
21 tokens.extend(quote! { ::ere_core::working_nfa::EpsilonType::None })
22 }
23 EpsilonType::StartAnchor => {
24 tokens.extend(quote! { ::ere_core::working_nfa::EpsilonType::StartAnchor })
25 }
26 EpsilonType::EndAnchor => {
27 tokens.extend(quote! { ::ere_core::working_nfa::EpsilonType::EndAnchor })
28 }
29 EpsilonType::StartCapture(group_num) => tokens.extend(quote! {
30 ::ere_core::working_nfa::EpsilonType::StartCapture(#group_num)
31 }),
32 EpsilonType::EndCapture(group_num) => tokens.extend(quote! {
33 ::ere_core::working_nfa::EpsilonType::EndCapture(#group_num)
34 }),
35 };
36 }
37}
38
39#[derive(Debug, Clone, Copy)]
41pub struct EpsilonTransition {
42 pub(crate) from: usize,
43 pub(crate) to: usize,
44 pub(crate) special: EpsilonType,
45}
46impl EpsilonTransition {
47 pub(crate) const fn new(from: usize, to: usize) -> EpsilonTransition {
48 return EpsilonTransition {
49 from,
50 to,
51 special: EpsilonType::None,
52 };
53 }
54 pub(crate) const fn with_offset(self, offset: usize) -> EpsilonTransition {
55 return EpsilonTransition {
56 from: self.from + offset,
57 to: self.to + offset,
58 special: self.special,
59 };
60 }
61 pub(crate) const fn add_offset(&self, offset: usize) -> EpsilonTransition {
62 return EpsilonTransition {
63 from: self.from + offset,
64 to: self.to + offset,
65 special: self.special,
66 };
67 }
68 pub const fn __load(from: usize, to: usize, special: EpsilonType) -> EpsilonTransition {
70 return EpsilonTransition { from, to, special };
71 }
72}
73impl std::fmt::Display for EpsilonTransition {
74 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
75 return write!(f, "{} -> {}", self.from, self.to);
76 }
77}
78impl ToTokens for EpsilonTransition {
79 fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) {
80 let EpsilonTransition { from, to, special } = self;
81 tokens.extend(quote! {
82 ere_core::working_nfa::EpsilonTransition::__load(
83 #from,
84 #to,
85 #special,
86 )
87 });
88 }
89}
90
91#[derive(Debug, Clone)]
92pub(crate) struct WorkingTransition {
93 pub(crate) from: usize,
94 pub(crate) to: usize,
95 pub(crate) symbol: Atom,
96}
97impl WorkingTransition {
98 pub fn new(from: usize, to: usize, symbol: Atom) -> WorkingTransition {
99 return WorkingTransition { from, to, symbol };
100 }
101 pub fn with_offset(self, offset: usize) -> WorkingTransition {
102 return WorkingTransition {
103 from: self.from + offset,
104 to: self.to + offset,
105 symbol: self.symbol,
106 };
107 }
108 pub fn add_offset(&self, offset: usize) -> WorkingTransition {
109 return WorkingTransition {
110 from: self.from + offset,
111 to: self.to + offset,
112 symbol: self.symbol.clone(),
113 };
114 }
115}
116impl std::fmt::Display for WorkingTransition {
117 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
118 return write!(f, "{} -({})> {}", self.from, self.symbol, self.to);
119 }
120}
121
122#[derive(Debug)]
124pub struct WorkingNFA {
125 pub(crate) transitions: Vec<WorkingTransition>,
126 pub(crate) epsilons: Vec<EpsilonTransition>,
127 pub(crate) states: usize,
128}
129impl WorkingNFA {
130 const fn build_empty() -> WorkingNFA {
131 return WorkingNFA {
132 transitions: Vec::new(),
133 epsilons: Vec::new(),
134 states: 1,
135 };
136 }
137 fn build_symbol(c: &Atom) -> WorkingNFA {
138 return WorkingNFA {
139 transitions: vec![WorkingTransition::new(0, 1, c.clone())],
140 epsilons: Vec::new(),
141 states: 2,
142 };
143 }
144 fn build_union(nodes: &[SimplifiedTreeNode]) -> WorkingNFA {
145 let sub_nfas: Vec<WorkingNFA> = nodes.iter().map(WorkingNFA::build).collect();
146 let states = 2 + sub_nfas.iter().map(|n| n.states).sum::<usize>();
147 let mut transitions = Vec::new();
148 let mut epsilons = Vec::new();
149 let mut used_states = 1usize;
150 for nfa in sub_nfas {
151 epsilons.push(EpsilonTransition::new(0, used_states));
153 transitions.extend(
155 nfa.transitions
156 .into_iter()
157 .map(|t| t.with_offset(used_states)),
158 );
159 epsilons.extend(nfa.epsilons.into_iter().map(|t| t.with_offset(used_states)));
160 epsilons.push(EpsilonTransition::new(
162 used_states + nfa.states - 1,
163 states - 1,
164 ));
165 used_states += nfa.states;
166 }
167 assert_eq!(used_states + 1, states);
168
169 return WorkingNFA {
170 transitions,
171 epsilons,
172 states,
173 };
174 }
175 fn build_capture(tree: &SimplifiedTreeNode, group_num: usize) -> WorkingNFA {
176 let nfa = WorkingNFA::build(tree);
177 let states = nfa.states + 2;
178 let mut epsilons: Vec<_> = nfa.epsilons.into_iter().map(|t| t.with_offset(1)).collect();
179 let transitions: Vec<_> = nfa
180 .transitions
181 .into_iter()
182 .map(|t| t.with_offset(1))
183 .collect();
184 epsilons.push(EpsilonTransition {
185 from: 0,
186 to: 1,
187 special: EpsilonType::StartCapture(group_num),
188 });
189 epsilons.push(EpsilonTransition {
190 from: states - 2,
191 to: states - 1,
192 special: EpsilonType::EndCapture(group_num),
193 });
194 return WorkingNFA {
195 transitions,
196 epsilons,
197 states,
198 };
199 }
200 fn build_concat(nodes: &[SimplifiedTreeNode]) -> WorkingNFA {
201 if nodes.is_empty() {
202 return WorkingNFA {
203 transitions: Vec::new(),
204 epsilons: Vec::new(),
205 states: 1,
206 };
207 }
208
209 let mut states = 1usize;
210 let mut epsilons = vec![EpsilonTransition::new(0, 1)];
211 let mut transitions = vec![];
212
213 for sub in nodes {
214 let nfa = WorkingNFA::build(sub);
215 transitions.extend(nfa.transitions.into_iter().map(|t| t.with_offset(states)));
216 epsilons.extend(nfa.epsilons.into_iter().map(|t| t.with_offset(states)));
217 states += nfa.states;
218 epsilons.push(EpsilonTransition::new(states - 1, states));
219 }
220
221 return WorkingNFA {
222 transitions,
223 epsilons,
224 states: states + 1,
225 };
226 }
227 fn build_repeat(tree: &SimplifiedTreeNode, times: usize) -> WorkingNFA {
228 let mut states = 1usize;
229 let mut epsilons = vec![EpsilonTransition::new(0, 1)];
230 let mut transitions = vec![];
231 let nfa = WorkingNFA::build(tree);
232
233 for _ in 0..times {
234 transitions.extend(nfa.transitions.iter().map(|t| t.add_offset(states)));
235 epsilons.extend(nfa.epsilons.iter().map(|t| t.add_offset(states)));
236 states += nfa.states;
237 epsilons.push(EpsilonTransition::new(states - 1, states));
238 }
239
240 return WorkingNFA {
241 transitions,
242 epsilons,
243 states: states + 1,
244 };
245 }
246 fn build_upto(tree: &SimplifiedTreeNode, times: usize) -> WorkingNFA {
247 let nfa = WorkingNFA::build(tree);
248 let states = 2 + nfa.states * times;
249 let mut used_states = 1usize;
250 let mut transitions = Vec::new();
251 let mut epsilons = vec![
252 EpsilonTransition::new(0, 1),
253 EpsilonTransition::new(0, states - 1),
254 ];
255
256 for _ in 0..times {
257 transitions.extend(nfa.transitions.iter().map(|t| t.add_offset(states)));
258 epsilons.extend(nfa.epsilons.iter().map(|t| t.add_offset(states)));
259 used_states += nfa.states;
260 epsilons.push(EpsilonTransition::new(used_states - 1, used_states));
261 if used_states != states - 1 {
262 epsilons.push(EpsilonTransition::new(used_states - 1, states - 1));
263 }
264 }
265
266 return WorkingNFA {
267 transitions,
268 epsilons,
269 states,
270 };
271 }
272 fn build_star(tree: &SimplifiedTreeNode) -> WorkingNFA {
273 let nfa = WorkingNFA::build(tree);
274 let states = 2 + nfa.states;
275 let transitions: Vec<_> = nfa
276 .transitions
277 .into_iter()
278 .map(|t| t.with_offset(1))
279 .collect();
280 let mut epsilons: Vec<_> = nfa.epsilons.into_iter().map(|t| t.with_offset(1)).collect();
281 epsilons.push(EpsilonTransition::new(0, 1));
282 epsilons.push(EpsilonTransition::new(states - 2, 0));
283 epsilons.push(EpsilonTransition::new(0, states - 1));
284 return WorkingNFA {
285 transitions,
286 epsilons,
287 states,
288 };
289 }
290 fn build_start() -> WorkingNFA {
291 return WorkingNFA {
292 transitions: Vec::new(),
293 epsilons: vec![EpsilonTransition {
294 from: 0,
295 to: 1,
296 special: EpsilonType::StartAnchor,
297 }],
298 states: 2,
299 };
300 }
301 fn build_end() -> WorkingNFA {
302 return WorkingNFA {
303 transitions: Vec::new(),
304 epsilons: vec![EpsilonTransition {
305 from: 0,
306 to: 1,
307 special: EpsilonType::EndAnchor,
308 }],
309 states: 2,
310 };
311 }
312 const fn build_never() -> WorkingNFA {
313 return WorkingNFA {
314 transitions: Vec::new(),
315 epsilons: Vec::new(),
316 states: 2,
317 };
318 }
319 fn build(tree: &SimplifiedTreeNode) -> WorkingNFA {
323 return match tree {
324 SimplifiedTreeNode::Empty => WorkingNFA::build_empty(),
325 SimplifiedTreeNode::Symbol(c) => WorkingNFA::build_symbol(c),
326 SimplifiedTreeNode::Union(nodes) => WorkingNFA::build_union(nodes),
327 SimplifiedTreeNode::Capture(tree, group_num) => {
328 WorkingNFA::build_capture(&tree, *group_num)
329 }
330 SimplifiedTreeNode::Concat(nodes) => WorkingNFA::build_concat(nodes),
331 SimplifiedTreeNode::Repeat(tree, times) => WorkingNFA::build_repeat(tree, *times),
332 SimplifiedTreeNode::UpTo(tree, times) => WorkingNFA::build_upto(tree, *times),
333 SimplifiedTreeNode::Star(tree) => WorkingNFA::build_star(tree),
334 SimplifiedTreeNode::Start => WorkingNFA::build_start(),
335 SimplifiedTreeNode::End => WorkingNFA::build_end(),
336 SimplifiedTreeNode::Never => WorkingNFA::build_never(),
337 };
338 }
339 pub fn new(tree: &SimplifiedTreeNode) -> WorkingNFA {
340 let mut nfa = WorkingNFA::build(tree);
341
342 nfa.clean_start_anchors();
343 nfa.clean_end_anchors();
344
345 nfa.transitions = nfa.transitions.iter().map(|t| t.add_offset(1)).collect();
347 nfa.epsilons = nfa.epsilons.iter().map(|e| e.add_offset(1)).collect();
348 nfa.states += 2;
349 nfa.epsilons.push(EpsilonTransition::new(0, 1));
350 nfa.transitions.push(WorkingTransition::new(
351 0,
352 0,
353 Atom::NonmatchingList(Vec::new()),
354 ));
355 nfa.epsilons
356 .push(EpsilonTransition::new(nfa.states - 2, nfa.states - 1));
357 nfa.transitions.push(WorkingTransition::new(
358 nfa.states - 1,
359 nfa.states - 1,
360 Atom::NonmatchingList(Vec::new()),
361 ));
362
363 let zero_symbol_states: Vec<bool> =
366 std::iter::zip(nfa.nodes_after_end(), nfa.nodes_before_start())
367 .map(|(a, b)| a || b)
368 .collect();
369 nfa.transitions.retain(|t| !zero_symbol_states[t.from]);
370
371 while nfa.optimize_pass() {}
373 nfa.remove_unreachable();
374 return nfa;
375 }
376
377 fn clean_start_anchors(&mut self) {
380 let mut zero_len_reachable = vec![false; self.states];
381 zero_len_reachable[0] = true;
382 let mut changed = false;
383 loop {
384 for e in self.epsilons.iter() {
385 if zero_len_reachable[e.from] && !zero_len_reachable[e.to] {
386 zero_len_reachable[e.to] = true;
387 changed = true;
388 }
389 }
390 if !changed {
391 break;
392 }
393 changed = false;
394 }
395 self.epsilons = self
396 .epsilons
397 .iter()
398 .filter(|t| t.special != EpsilonType::StartAnchor || zero_len_reachable[t.from])
399 .cloned()
400 .collect();
401 }
402
403 fn clean_end_anchors(&mut self) {
406 let mut zero_len_reachable = vec![false; self.states];
407 zero_len_reachable[self.states - 1] = true;
408 let mut changed = false;
409 loop {
410 for e in self.epsilons.iter() {
411 if !zero_len_reachable[e.from] && zero_len_reachable[e.to] {
412 zero_len_reachable[e.from] = true;
413 changed = true;
414 }
415 }
416 if !changed {
417 break;
418 }
419 changed = false;
420 }
421 self.epsilons = self
422 .epsilons
423 .iter()
424 .filter(|t| t.special != EpsilonType::EndAnchor || zero_len_reachable[t.to])
425 .cloned()
426 .collect();
427 }
428 fn nodes_after_end(&self) -> Vec<bool> {
430 let mut nodes = vec![true; self.states];
431 nodes[0] = false;
432 let mut changed = false;
433 loop {
434 for e in self.epsilons.iter() {
435 if !nodes[e.from] && nodes[e.to] && e.special != EpsilonType::EndAnchor {
436 nodes[e.to] = false;
437 changed = true;
438 }
439 }
440 for t in self.transitions.iter() {
441 if !nodes[t.from] && nodes[t.to] {
442 nodes[t.to] = false;
443 changed = true;
444 }
445 }
446 if !changed {
447 break;
448 }
449 changed = false;
450 }
451 return nodes;
452 }
453 fn nodes_before_start(&self) -> Vec<bool> {
455 let mut nodes = vec![true; self.states];
456 nodes[self.states - 1] = false;
457 let mut changed = false;
458 loop {
459 for e in self.epsilons.iter() {
460 if nodes[e.from] && !nodes[e.to] && e.special != EpsilonType::StartAnchor {
461 nodes[e.from] = false;
462 changed = true;
463 }
464 }
465 for t in self.transitions.iter() {
466 if nodes[t.from] && !nodes[t.to] {
467 nodes[t.from] = false;
468 changed = true;
469 }
470 }
471 if !changed {
472 break;
473 }
474 changed = false;
475 }
476 return nodes;
477 }
478
479 fn optimize_pass(&mut self) -> bool {
483 let mut changed = false;
484
485 let mut dead_states = vec![false; self.states];
486
487 for state in 1..self.states - 1 {
490 let incoming: Vec<usize> = self
491 .transitions
492 .iter()
493 .enumerate()
494 .filter(|(_, t)| t.to == state)
495 .map(|(i, _)| i)
496 .collect();
497 let outgoing: Vec<usize> = self
498 .transitions
499 .iter()
500 .enumerate()
501 .filter(|(_, t)| t.from == state)
502 .map(|(i, _)| i)
503 .collect();
504 let incoming_eps: Vec<usize> = self
505 .epsilons
506 .iter()
507 .enumerate()
508 .filter(|(_, t)| t.to == state)
509 .map(|(i, _)| i)
510 .collect();
511 let outgoing_eps: Vec<usize> = self
512 .epsilons
513 .iter()
514 .enumerate()
515 .filter(|(_, t)| t.from == state)
516 .map(|(i, _)| i)
517 .collect();
518
519 match (
520 incoming.as_slice(),
521 incoming_eps.as_slice(),
522 outgoing.as_slice(),
523 outgoing_eps.as_slice(),
524 ) {
525 (incoming, incoming_eps, &[], &[outgoing_eps])
527 if self.epsilons[outgoing_eps].special == EpsilonType::None =>
528 {
529 for idx in incoming {
530 self.transitions[*idx].to = self.epsilons[outgoing_eps].to;
531 }
532 for idx in incoming_eps {
533 self.epsilons[*idx].to = self.epsilons[outgoing_eps].to;
534 }
535 dead_states[state] = true;
536 self.epsilons.swap_remove(outgoing_eps);
537 changed = true;
538 continue;
539 }
540 (&[], &[incoming_eps], outgoing, outgoing_eps)
542 if self.epsilons[incoming_eps].special == EpsilonType::None =>
543 {
544 for idx in outgoing {
545 self.transitions[*idx].from = self.epsilons[incoming_eps].from;
546 }
547 for idx in outgoing_eps {
548 self.epsilons[*idx].from = self.epsilons[incoming_eps].from;
549 }
550 dead_states[state] = true;
551 self.epsilons.swap_remove(incoming_eps);
552 changed = true;
553 continue;
554 }
555 _ => {}
556 }
557
558 }
564 if !changed {
565 return changed;
566 }
567
568 let mut new_states = 0;
569 let state_map: Vec<usize> = dead_states
570 .into_iter()
571 .scan(0, |s, dead| {
572 if dead {
573 return Some(usize::MAX);
574 } else {
575 let out = *s;
576 *s += 1;
577 new_states = *s;
578 return Some(out);
579 }
580 })
581 .collect();
582 self.states = new_states;
583
584 for t in &mut self.transitions {
585 t.from = state_map[t.from];
586 t.to = state_map[t.to];
587 }
588 for t in &mut self.epsilons {
589 t.from = state_map[t.from];
590 t.to = state_map[t.to];
591 }
592
593 return changed;
594 }
595
596 fn remove_unreachable(&mut self) {
600 let mut reach_start = vec![false; self.states];
601 reach_start[0] = true;
602 let mut reach_end = vec![false; self.states];
603 reach_end[self.states - 1] = true;
604
605 let mut changed = false;
606 loop {
607 for e in self.epsilons.iter() {
608 if reach_start[e.from] && !reach_start[e.to] {
609 reach_start[e.to] = true;
610 changed = true;
611 }
612 if !reach_end[e.from] && reach_end[e.to] {
613 reach_end[e.from] = true;
614 changed = true;
615 }
616 }
617 for t in self.transitions.iter() {
618 if reach_start[t.from] && !reach_start[t.to] {
619 reach_start[t.to] = true;
620 changed = true;
621 }
622 if !reach_end[t.from] && reach_end[t.to] {
623 reach_end[t.from] = true;
624 changed = true;
625 }
626 }
627 if !changed {
628 break;
629 }
630 changed = false;
631 }
632
633 self.transitions = self
635 .transitions
636 .iter()
637 .filter(|t| reach_start[t.from] && reach_end[t.to])
638 .cloned()
639 .collect();
640 self.epsilons = self
641 .epsilons
642 .iter()
643 .filter(|e| reach_start[e.from] && reach_end[e.to])
644 .cloned()
645 .collect();
646
647 let mut new_states = 0;
649 let state_map: Vec<usize> = std::iter::zip(reach_start.into_iter(), reach_end.into_iter())
650 .map(|(a, b)| !a || !b)
651 .scan(0, |s, dead| {
652 if dead {
653 return Some(usize::MAX);
654 } else {
655 let out = *s;
656 *s += 1;
657 new_states = *s;
658 return Some(out);
659 }
660 })
661 .collect();
662 self.states = new_states;
663
664 for t in &mut self.transitions {
665 t.from = state_map[t.from];
666 t.to = state_map[t.to];
667 }
668 for t in &mut self.epsilons {
669 t.from = state_map[t.from];
670 t.to = state_map[t.to];
671 }
672 }
673
674 pub fn to_tikz(&self, include_doc: bool) -> String {
679 fn escape_latex(text: String) -> String {
681 return text
682 .chars()
683 .map(|c| match c {
684 '\\' => r"{\textbackslash}".to_string(),
685 '&' => r"\&".to_string(),
686 '%' => r"\%".to_string(),
687 '$' => r"\$".to_string(),
688 '#' => r"\#".to_string(),
689 '_' => r"\_".to_string(),
690 '{' => r"\{".to_string(),
691 '}' => r"\}".to_string(),
692 '~' => r"{\textasciitilde}".to_string(),
693 '^' => r"{\textasciicircum}".to_string(),
694 c => c.to_string(),
695 })
696 .collect();
697 }
698
699 let mut text_parts: Vec<String> = Vec::new();
700 if include_doc {
701 text_parts.push(
702 "\\documentclass{standalone}\n\\usepackage{tikz}\n\\usetikzlibrary{automata, positioning}\n\\begin{document}\n"
703 .into(),
704 );
705 }
706 text_parts.push("\\begin{tikzpicture}[node distance=2cm, auto]\n".into());
707
708 text_parts.push("\\node[state, initial](q0){$q_0$};\n".into());
709 for i in 1..self.states - 1 {
710 text_parts.push(format!(
711 "\\node[state, right of=q{}](q{}){{$q_{{{}}}$}};\n",
712 i - 1,
713 i,
714 i,
715 ));
716 }
717 text_parts.push(format!(
718 "\\node[state, accepting, right of=q{}](q{}){{$q_{{{}}}$}};\n",
719 self.states - 2,
720 self.states - 1,
721 self.states - 1,
722 ));
723
724 for WorkingTransition { from, to, symbol } in &self.transitions {
725 let bend = match to.cmp(from) {
726 std::cmp::Ordering::Less => "[bend left] ",
727 std::cmp::Ordering::Equal => "[loop below]",
728 std::cmp::Ordering::Greater => "[bend left] ",
729 };
730 text_parts.push(format!(
731 "\\path[->] (q{from}) edge {bend} node {{{}}} (q{to});\n",
732 escape_latex(symbol.to_string()),
733 ));
734 }
735 for EpsilonTransition { from, to, special } in &self.epsilons {
736 let bend = match to.cmp(from) {
737 std::cmp::Ordering::Less => "[bend left] ",
738 std::cmp::Ordering::Equal => "[loop below]",
739 std::cmp::Ordering::Greater => "[bend left] ",
740 };
741 let label = match special {
742 EpsilonType::None => r"$\epsilon$".to_string(),
743 EpsilonType::StartAnchor => r"{\textasciicircum}".to_string(),
744 EpsilonType::EndAnchor => r"\$".to_string(),
745 EpsilonType::StartCapture(group) => format!("{group}("),
746 EpsilonType::EndCapture(group) => format!("){group}"),
747 };
748 text_parts.push(format!(
749 "\\path[->] (q{from}) edge {bend} node {{{label}}} (q{to});\n"
750 ));
751 }
752
753 text_parts.push("\\end{tikzpicture}\n".into());
754 if include_doc {
755 text_parts.push("\\end{document}\n".into());
756 }
757 return text_parts.into_iter().collect();
758 }
759
760 pub fn test(&self, text: &str) -> bool {
762 let mut list = vec![false; self.states];
763 let mut new_list = vec![false; self.states];
764 list[0] = true;
765
766 let propogate_epsilon = |list: &mut Vec<bool>, idx: usize| loop {
768 let mut has_new = false;
769 for EpsilonTransition { from, to, special } in &self.epsilons {
770 if list[*from]
771 && !list[*to]
772 && (match special {
773 EpsilonType::StartAnchor => idx == 0,
774 EpsilonType::EndAnchor => idx == text.len(),
775 _ => true,
776 })
777 {
778 list[*to] = true;
779 has_new = true;
780 }
781 }
782 if !has_new {
783 break;
784 }
785 };
786
787 for (i, c) in text.char_indices() {
788 propogate_epsilon(&mut list, i);
789 for WorkingTransition { from, to, symbol } in &self.transitions {
790 if list[*from] && symbol.check(c) {
791 new_list[*to] = true;
792 }
793 }
794 let tmp = list;
795 list = new_list;
796 new_list = tmp;
797 new_list.fill(false);
798 }
799 propogate_epsilon(&mut list, text.len());
800 return *list.last().unwrap_or(&false);
801 }
802}
803impl std::fmt::Display for WorkingNFA {
804 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
805 writeln!(f, "{} states:", self.states)?;
806 for t in &self.transitions {
807 writeln!(f, " {t}")?;
808 }
809 return Ok(());
810 }
811}
812
813#[cfg(test)]
814mod tests {
815 use super::*;
816 use crate::parse_tree::ERE;
817
818 #[test]
819 fn abbc_raw() {
820 let nfa = WorkingNFA {
821 transitions: vec![
822 WorkingTransition::new(0, 1, 'a'.into()),
823 WorkingTransition::new(1, 2, 'b'.into()),
824 WorkingTransition::new(2, 3, 'c'.into()),
825 ],
826 epsilons: vec![EpsilonTransition::new(2, 1)],
827 states: 4,
828 };
829 assert!(nfa.test("abc"));
832 assert!(nfa.test("abbc"));
833 assert!(nfa.test("abbbc"));
834 assert!(nfa.test("abbbbc"));
835
836 assert!(!nfa.test("ac"));
837 assert!(!nfa.test("abcc"));
838 assert!(!nfa.test("bac"));
839 assert!(!nfa.test("acb"));
840 }
841
842 #[test]
843 fn phone_number() {
844 let ere = ERE::parse_str(r"^(\+1 )?[0-9]{3}-[0-9]{3}-[0-9]{4}$").unwrap();
845 let (tree, capture_groups) = SimplifiedTreeNode::from_ere(&ere);
846 assert_eq!(capture_groups, 2);
847 let nfa = WorkingNFA::new(&tree);
848 assert!(nfa.test("012-345-6789"));
851 assert!(nfa.test("987-654-3210"));
852 assert!(nfa.test("+1 555-555-5555"));
853 assert!(nfa.test("123-555-9876"));
854
855 assert!(!nfa.test("abcd"));
856 assert!(!nfa.test("0123456789"));
857 assert!(!nfa.test("012--345-6789"));
858 assert!(!nfa.test("(555) 555-5555"));
859 assert!(!nfa.test("1 555-555-5555"));
860 }
861
862 #[test]
863 fn double_loop() {
864 let ere = ERE::parse_str(r"^.*(.*)*$").unwrap();
865 let (tree, capture_groups) = SimplifiedTreeNode::from_ere(&ere);
866 assert_eq!(capture_groups, 2);
867 let nfa = WorkingNFA::new(&tree);
868 assert!(nfa.test(""));
871 assert!(nfa.test("asdf"));
872 assert!(nfa.test("1234567"));
873 assert!(nfa.test("0"));
874
875 assert!(!nfa.test("\0"));
876 }
877
878 #[test]
879 fn good_anchored_start() {
880 let ere = ERE::parse_str(r"^a|b*^c|d^|n").unwrap();
881 let (tree, capture_groups) = SimplifiedTreeNode::from_ere(&ere);
882 assert_eq!(capture_groups, 1);
883 let nfa = WorkingNFA::new(&tree);
884 assert!(nfa.test("a"));
887 assert!(nfa.test("c"));
888 assert!(nfa.test("cq"));
889 assert!(nfa.test("wwwnwww"));
890
891 assert!(!nfa.test(""));
892 assert!(!nfa.test("qb"));
893 assert!(!nfa.test("qc"));
894 assert!(!nfa.test("b"));
895 assert!(!nfa.test("bc"));
896 assert!(!nfa.test("bbbbbbc"));
897 assert!(!nfa.test("d"));
898 }
899
900 #[test]
901 fn good_anchored_end() {
902 let ere = ERE::parse_str(r"a$|b$c*|$d|n").unwrap();
903 let (tree, capture_groups) = SimplifiedTreeNode::from_ere(&ere);
904 assert_eq!(capture_groups, 1);
905 let nfa = WorkingNFA::new(&tree);
906 println!("{}", nfa.to_tikz(true));
907
908 assert!(nfa.test("a"));
909 assert!(nfa.test("b"));
910 assert!(nfa.test("qb"));
911 assert!(nfa.test("wwwnwww"));
912
913 assert!(!nfa.test(""));
914 assert!(!nfa.test("bq"));
915 assert!(!nfa.test("qc"));
916 assert!(!nfa.test("c"));
917 assert!(!nfa.test("bc"));
918 assert!(!nfa.test("bcccccc"));
919 assert!(!nfa.test("d"));
920 }
921}