1use crate::parse_tree::Atom;
4use crate::simplified_tree::SimplifiedTreeNode;
5use quote::{quote, ToTokens};
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum EpsilonType {
9 None,
10 StartAnchor,
11 EndAnchor,
12 StartCapture(usize),
13 EndCapture(usize),
14}
15impl ToTokens for EpsilonType {
16 fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) {
17 match self {
18 EpsilonType::None => {
19 tokens.extend(quote! { ::ere_core::working_nfa::EpsilonType::None })
20 }
21 EpsilonType::StartAnchor => {
22 tokens.extend(quote! { ::ere_core::working_nfa::EpsilonType::StartAnchor })
23 }
24 EpsilonType::EndAnchor => {
25 tokens.extend(quote! { ::ere_core::working_nfa::EpsilonType::EndAnchor })
26 }
27 EpsilonType::StartCapture(group_num) => tokens.extend(quote! {
28 ::ere_core::working_nfa::EpsilonType::StartCapture(#group_num)
29 }),
30 EpsilonType::EndCapture(group_num) => tokens.extend(quote! {
31 ::ere_core::working_nfa::EpsilonType::EndCapture(#group_num)
32 }),
33 };
34 }
35}
36
37#[derive(Debug, Clone, Copy)]
39pub struct EpsilonTransition {
40 pub(crate) to: usize,
41 pub(crate) special: EpsilonType,
42}
43impl EpsilonTransition {
44 pub(crate) const fn new(to: usize) -> EpsilonTransition {
45 return EpsilonTransition {
46 to,
47 special: EpsilonType::None,
48 };
49 }
50 pub(crate) const fn with_offset(self, offset: usize) -> EpsilonTransition {
51 return EpsilonTransition {
52 to: self.to + offset,
53 special: self.special,
54 };
55 }
56 pub(crate) fn inplace_offset(&mut self, offset: usize) {
57 self.to += offset;
58 }
59 pub(crate) const fn add_offset(&self, offset: usize) -> EpsilonTransition {
60 return EpsilonTransition {
61 to: self.to + offset,
62 special: self.special,
63 };
64 }
65 pub const fn __load(to: usize, special: EpsilonType) -> EpsilonTransition {
67 return EpsilonTransition { to, special };
68 }
69}
70impl std::fmt::Display for EpsilonTransition {
71 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
72 return write!(f, "-> {}", self.to);
73 }
74}
75impl ToTokens for EpsilonTransition {
76 fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) {
77 let EpsilonTransition { to, special } = self;
78 tokens.extend(quote! {
79 ere_core::working_nfa::EpsilonTransition::__load(
80 #to,
81 #special,
82 )
83 });
84 }
85}
86
87#[derive(Debug, Clone)]
88pub(crate) struct WorkingTransition {
89 pub(crate) to: usize,
90 pub(crate) symbol: Atom,
91}
92impl WorkingTransition {
93 pub fn new(to: usize, symbol: Atom) -> WorkingTransition {
94 return WorkingTransition { to, symbol };
95 }
96 pub fn with_offset(mut self, offset: usize) -> WorkingTransition {
97 self.inplace_offset(offset);
98 return self;
99 }
100 pub fn inplace_offset(&mut self, offset: usize) {
101 self.to += offset;
102 }
103 pub fn add_offset(&self, offset: usize) -> WorkingTransition {
104 return WorkingTransition {
105 to: self.to + offset,
106 symbol: self.symbol.clone(),
107 };
108 }
109}
110impl std::fmt::Display for WorkingTransition {
111 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
112 return write!(f, "-({})> {}", self.symbol, self.to);
113 }
114}
115
116#[derive(Debug, Clone)]
117pub struct WorkingState {
118 pub(crate) transitions: Vec<WorkingTransition>,
119 pub(crate) epsilons: Vec<EpsilonTransition>,
120}
121impl WorkingState {
122 pub const fn new() -> WorkingState {
123 return WorkingState {
124 transitions: Vec::new(),
125 epsilons: Vec::new(),
126 };
127 }
128 pub fn with_transition(mut self, to: usize, symbol: Atom) -> WorkingState {
129 self.transitions.push(WorkingTransition::new(to, symbol));
130 return self;
131 }
132 pub fn with_epsilon(mut self, to: usize) -> WorkingState {
133 self.epsilons.push(EpsilonTransition::new(to));
134 return self;
135 }
136 pub fn with_epsilon_special(mut self, to: usize, special: EpsilonType) -> WorkingState {
137 self.epsilons.push(EpsilonTransition { to, special });
138 return self;
139 }
140 pub fn with_offset(mut self, offset: usize) -> WorkingState {
141 self.inplace_offset(offset);
142 return self;
143 }
144 pub fn inplace_offset(&mut self, offset: usize) {
145 for t in &mut self.transitions {
146 t.inplace_offset(offset);
147 }
148 for e in &mut self.epsilons {
149 e.inplace_offset(offset);
150 }
151 }
152 pub fn add_offset(&self, offset: usize) -> WorkingState {
153 return WorkingState {
154 transitions: self
155 .transitions
156 .iter()
157 .map(|t| t.add_offset(offset))
158 .collect(),
159 epsilons: self.epsilons.iter().map(|e| e.add_offset(offset)).collect(),
160 };
161 }
162}
163impl std::fmt::Display for WorkingState {
164 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
165 for t in &self.transitions {
166 writeln!(f, " {t}")?;
167 }
168 for e in &self.epsilons {
169 writeln!(f, " {e}")?;
170 }
171 return Ok(());
172 }
173}
174
175#[derive(Debug, Clone)]
177pub struct WorkingNFA {
178 pub(crate) states: Vec<WorkingState>,
179}
180impl WorkingNFA {
181 fn nfa_empty() -> WorkingNFA {
183 let states = vec![WorkingState::new()];
184 return WorkingNFA { states };
185 }
186 fn nfa_symbol(c: &Atom) -> WorkingNFA {
188 let states = vec![
189 WorkingState::new().with_transition(1, c.clone()),
190 WorkingState::new(),
191 ];
192 return WorkingNFA { states };
193 }
194 fn nfa_union(nodes: &[WorkingNFA]) -> WorkingNFA {
196 let states_count = 2 + nodes.iter().map(|n| n.states.len()).sum::<usize>();
197 let mut states = vec![WorkingState::new()];
198 for nfa in nodes {
199 let sub_nfa_start = states.len();
200 states[0]
201 .epsilons
202 .push(EpsilonTransition::new(sub_nfa_start));
203 states.extend(
204 nfa.states
205 .iter()
206 .map(|state| state.add_offset(sub_nfa_start)),
207 );
208 states
209 .last_mut()
210 .unwrap()
211 .epsilons
212 .push(EpsilonTransition::new(states_count - 1));
213 }
214 states.push(WorkingState::new());
215 assert_eq!(states_count, states.len());
216
217 return WorkingNFA { states };
218 }
219 fn build_union(nodes: &[SimplifiedTreeNode]) -> WorkingNFA {
220 let sub_nfas: Vec<WorkingNFA> = nodes.iter().map(WorkingNFA::build).collect();
221 return WorkingNFA::nfa_union(&sub_nfas);
222 }
223 fn nfa_capture(nfa: &WorkingNFA, group_num: usize) -> WorkingNFA {
225 let states_count = 2 + nfa.states.len();
226 let mut states: Vec<WorkingState> = std::iter::once(
227 WorkingState::new().with_epsilon_special(1, EpsilonType::StartCapture(group_num)),
228 )
229 .chain(nfa.states.iter().map(|state| state.add_offset(1)))
230 .chain(std::iter::once(WorkingState::new()))
231 .collect();
232 assert_eq!(states_count, states.len());
233 states[states_count - 2].epsilons.push(EpsilonTransition {
234 to: states_count - 1,
235 special: EpsilonType::EndCapture(group_num),
236 });
237
238 return WorkingNFA { states };
239 }
240 fn build_capture(tree: &SimplifiedTreeNode, group_num: usize) -> WorkingNFA {
241 let nfa = WorkingNFA::build(tree);
242 return WorkingNFA::nfa_capture(&nfa, group_num);
243 }
244 fn nfa_concat<T: IntoIterator<Item = WorkingNFA>>(nodes: T) -> WorkingNFA {
246 let mut states = vec![WorkingState::new().with_epsilon(1)];
247
248 for nfa in nodes {
249 let states_count = states.len();
250 states.extend(
251 nfa.states
252 .into_iter()
253 .map(|state| state.with_offset(states_count)),
254 );
255 let states_count = states.len();
256 states
257 .last_mut()
258 .unwrap()
259 .epsilons
260 .push(EpsilonTransition::new(states_count));
261 }
262
263 states.push(WorkingState::new());
264 return WorkingNFA { states };
265 }
266 fn build_concat<'a, T: IntoIterator<Item = &'a SimplifiedTreeNode>>(nodes: T) -> WorkingNFA {
267 return WorkingNFA::nfa_concat(nodes.into_iter().map(WorkingNFA::build));
268 }
269 fn nfa_repeat(nfa: &WorkingNFA, times: usize) -> WorkingNFA {
271 return WorkingNFA::nfa_concat(std::iter::repeat(nfa).cloned().take(times));
272 }
273 fn build_repeat(tree: &SimplifiedTreeNode, times: usize) -> WorkingNFA {
274 let nfa = WorkingNFA::build(tree);
275 return WorkingNFA::nfa_repeat(&nfa, times);
276 }
277 fn nfa_upto(nfa: &WorkingNFA, times: usize, longest: bool) -> WorkingNFA {
279 let end_state_idx = 1 + (nfa.states.len() + 1) * times;
280
281 let mut states = vec![WorkingState::new()
282 .with_epsilon(1)
283 .with_epsilon(end_state_idx - 1)];
284 for i in 0..times {
285 let states_count = states.len();
286 states.extend(
287 nfa.states
288 .iter()
289 .map(|state| state.add_offset(states_count)),
290 );
291 let transition_state_idx = states.len();
292 states
293 .last_mut()
294 .unwrap()
295 .epsilons
296 .push(EpsilonTransition::new(transition_state_idx));
297 let mut transition_state = WorkingState::new();
298 if i + 1 != times {
299 if longest {
300 transition_state
301 .epsilons
302 .push(EpsilonTransition::new(states.len() + 1));
303 }
304
305 transition_state
306 .epsilons
307 .push(EpsilonTransition::new(end_state_idx - 1));
308 if !longest {
309 transition_state
310 .epsilons
311 .push(EpsilonTransition::new(states.len() + 1));
312 }
313 }
314 states.push(transition_state);
315 }
316
317 return WorkingNFA { states };
318 }
319 fn build_upto(tree: &SimplifiedTreeNode, times: usize, longest: bool) -> WorkingNFA {
320 let nfa = WorkingNFA::build(tree);
321 return WorkingNFA::nfa_upto(&nfa, times, longest);
322 }
323 fn nfa_star(nfa: WorkingNFA, longest: bool) -> WorkingNFA {
325 let end_state_idx = 1 + nfa.states.len();
326 let mut start_state = WorkingState::new();
327 if !longest {
328 start_state
329 .epsilons
330 .push(EpsilonTransition::new(end_state_idx));
331 }
332 start_state.epsilons.push(EpsilonTransition::new(1));
333 if longest {
334 start_state
335 .epsilons
336 .push(EpsilonTransition::new(end_state_idx));
337 }
338 let mut states: Vec<WorkingState> = std::iter::once(start_state)
339 .chain(nfa.states.into_iter().map(|state| state.with_offset(1)))
340 .chain(std::iter::once(WorkingState::new()))
341 .collect();
342 states[end_state_idx - 1]
343 .epsilons
344 .push(EpsilonTransition::new(0));
345 return WorkingNFA { states };
346 }
347 fn build_star(tree: &SimplifiedTreeNode, longest: bool) -> WorkingNFA {
348 let nfa = WorkingNFA::build(tree);
349 return WorkingNFA::nfa_star(nfa, longest);
350 }
351 fn nfa_start() -> WorkingNFA {
353 let states = vec![
354 WorkingState::new().with_epsilon_special(1, EpsilonType::StartAnchor),
355 WorkingState::new(),
356 ];
357 return WorkingNFA { states };
358 }
359 fn nfa_end() -> WorkingNFA {
361 let states = vec![
362 WorkingState::new().with_epsilon_special(1, EpsilonType::EndAnchor),
363 WorkingState::new(),
364 ];
365 return WorkingNFA { states };
366 }
367 fn nfa_never() -> WorkingNFA {
369 let states = vec![WorkingState::new(), WorkingState::new()];
370 return WorkingNFA { states };
371 }
372 fn build(tree: &SimplifiedTreeNode) -> WorkingNFA {
376 return match tree {
377 SimplifiedTreeNode::Empty => WorkingNFA::nfa_empty(),
378 SimplifiedTreeNode::Symbol(c) => WorkingNFA::nfa_symbol(c),
379 SimplifiedTreeNode::Union(nodes) => WorkingNFA::build_union(nodes),
380 SimplifiedTreeNode::Capture(tree, group_num) => {
381 WorkingNFA::build_capture(&tree, *group_num)
382 }
383 SimplifiedTreeNode::Concat(nodes) => WorkingNFA::build_concat(nodes),
384 SimplifiedTreeNode::Repeat(tree, times) => WorkingNFA::build_repeat(tree, *times),
385 SimplifiedTreeNode::UpTo(tree, times, longest) => {
386 WorkingNFA::build_upto(tree, *times, *longest)
387 }
388 SimplifiedTreeNode::Star(tree, longest) => WorkingNFA::build_star(tree, *longest),
389 SimplifiedTreeNode::Start => WorkingNFA::nfa_start(),
390 SimplifiedTreeNode::End => WorkingNFA::nfa_end(),
391 SimplifiedTreeNode::Never => WorkingNFA::nfa_never(),
392 };
393 }
394 pub fn new(tree: &SimplifiedTreeNode) -> WorkingNFA {
395 let mut nfa = WorkingNFA::build(tree);
396
397 nfa.clean_start_anchors();
398 nfa.clean_end_anchors();
399
400 nfa = WorkingNFA::nfa_concat([
402 WorkingNFA::nfa_star(
403 WorkingNFA::nfa_symbol(&Atom::NonmatchingList(Vec::new())),
404 false,
405 ),
406 nfa,
407 WorkingNFA::nfa_star(
408 WorkingNFA::nfa_symbol(&Atom::NonmatchingList(Vec::new())),
409 false,
410 ),
411 ]);
412
413 let zero_symbol_states: Vec<bool> =
416 std::iter::zip(nfa.nodes_after_end(), nfa.nodes_before_start())
417 .map(|(a, b)| a || b)
418 .collect();
419 for (from, state) in nfa.states.iter_mut().enumerate() {
420 if zero_symbol_states[from] {
421 state.transitions = Vec::new();
422 }
423 }
424
425 while nfa.optimize_pass() {}
427 nfa.remove_unreachable();
428 return nfa;
429 }
430
431 fn clean_start_anchors(&mut self) {
434 let mut zero_len_reachable = vec![false; self.states.len()];
435 zero_len_reachable[0] = true;
436 let mut stack = vec![0];
437 while let Some(state) = stack.pop() {
438 for e in &self.states[state].epsilons {
439 if !zero_len_reachable[e.to] {
440 stack.push(e.to);
441 }
442 zero_len_reachable[e.to] = true;
443 }
444 }
445
446 for (i, state) in self.states.iter_mut().enumerate() {
447 state
448 .epsilons
449 .retain(|e| e.special != EpsilonType::StartAnchor || zero_len_reachable[i]);
450 }
451 }
452
453 fn clean_end_anchors(&mut self) {
456 let mut zero_len_reachable = vec![false; self.states.len()];
457 zero_len_reachable[self.states.len() - 1] = true;
458
459 let mut reverse_epsilons = vec![Vec::new(); self.states.len()];
460 for (i, state) in self.states.iter().enumerate() {
461 for e in &state.epsilons {
462 reverse_epsilons[e.to].push(i);
463 }
464 }
465
466 let mut stack = vec![self.states.len() - 1];
467 while let Some(state) = stack.pop() {
468 for src in &reverse_epsilons[state] {
469 if !zero_len_reachable[*src] {
470 stack.push(*src);
471 }
472 zero_len_reachable[*src] = true;
473 }
474 }
475
476 for state in self.states.iter_mut() {
477 state
478 .epsilons
479 .retain(|e| e.special != EpsilonType::EndAnchor || zero_len_reachable[e.to]);
480 }
481 }
482 fn nodes_after_end(&self) -> Vec<bool> {
484 let mut nodes = vec![true; self.states.len()];
485 nodes[0] = false;
486
487 let mut stack = vec![0];
488 while let Some(from) = stack.pop() {
489 for e in self.states[from].epsilons.iter() {
490 if nodes[e.to] && e.special != EpsilonType::EndAnchor {
491 nodes[e.to] = false;
492 stack.push(e.to);
493 }
494 }
495 for t in self.states[from].transitions.iter() {
496 if nodes[t.to] {
497 nodes[t.to] = false;
498 stack.push(t.to);
499 }
500 }
501 }
502 return nodes;
503 }
504 fn nodes_before_start(&self) -> Vec<bool> {
506 let mut reverse = vec![Vec::new(); self.states.len()];
507 for (i, state) in self.states.iter().enumerate() {
508 for e in &state.epsilons {
509 if e.special != EpsilonType::StartAnchor {
510 reverse[e.to].push(i);
511 }
512 }
513 for t in &state.transitions {
514 reverse[t.to].push(i);
515 }
516 }
517
518 let mut nodes = vec![true; self.states.len()];
519 nodes[self.states.len() - 1] = false;
520
521 let mut stack = vec![self.states.len() - 1];
522 while let Some(to) = stack.pop() {
523 for from in &reverse[to] {
524 if nodes[*from] {
525 nodes[*from] = false;
526 stack.push(*from);
527 }
528 }
529 }
530 return nodes;
531 }
532
533 fn remove_dead_states<T: IntoIterator<Item = bool>>(&mut self, dead_states: T) {
537 let state_map: Vec<usize> = dead_states
538 .into_iter()
539 .scan(0, |s, dead| {
540 if dead {
541 return Some(usize::MAX);
542 } else {
543 let out = *s;
544 *s += 1;
545 return Some(out);
546 }
547 })
548 .collect();
549 self.states = self
550 .states
551 .iter()
552 .enumerate()
553 .filter(|(i, _)| state_map[*i] != usize::MAX)
554 .map(|(_, state)| state)
555 .cloned()
556 .collect();
557
558 for state in &mut self.states {
559 for t in &mut state.transitions {
560 t.to = state_map[t.to];
561 }
562 for t in &mut state.epsilons {
563 t.to = state_map[t.to];
564 }
565 }
566 }
567
568 fn optimize_pass(&mut self) -> bool {
572 let mut changed = false;
573 let state_count = self.states.len();
574
575 let mut dead_states = vec![false; self.states.len()];
576
577 for state_idx in 1..state_count - 1 {
580 let incoming: Vec<(usize, usize)> = self
581 .states
582 .iter()
583 .enumerate()
584 .flat_map(|(s_i, s)| s.transitions.iter().enumerate().map(move |(t, _)| (s_i, t)))
585 .filter(|(s, t)| self.states[*s].transitions[*t].to == state_idx)
586 .collect();
587 let incoming_eps: Vec<(usize, usize)> = self
588 .states
589 .iter()
590 .enumerate()
591 .flat_map(|(s_i, s)| s.epsilons.iter().enumerate().map(move |(e, _)| (s_i, e)))
592 .filter(|(s, e)| self.states[*s].epsilons[*e].to == state_idx)
593 .collect();
594
595 match (
596 incoming.as_slice(),
597 incoming_eps.as_slice(),
598 self.states[state_idx].transitions.len(),
599 self.states[state_idx].epsilons.len(),
600 ) {
601 (incoming, incoming_eps, 0, 1)
603 if self.states[state_idx].epsilons[0].special == EpsilonType::None =>
604 {
605 let to = self.states[state_idx].epsilons[0].to;
606 for (s, t) in incoming {
607 self.states[*s].transitions[*t].to = to;
608 }
609 for (s, e) in incoming_eps {
610 self.states[*s].epsilons[*e].to = to;
611 }
612 dead_states[state_idx] = true;
613 self.states[state_idx].epsilons = Vec::new();
614 changed = true;
615 continue;
616 }
617 (&[], &[(incoming_state, incoming_eps)], 0, _)
619 if self.states[incoming_state].epsilons[incoming_eps].special
620 == EpsilonType::None =>
621 {
622 let outgoing_eps = std::mem::take(&mut self.states[state_idx].epsilons);
623 let after = self.states[incoming_state]
624 .epsilons
625 .split_off(incoming_eps + 1);
626 self.states[incoming_state].epsilons.pop();
627 self.states[incoming_state]
628 .epsilons
629 .extend_from_slice(&outgoing_eps);
630 self.states[incoming_state]
631 .epsilons
632 .extend_from_slice(&after);
633
634 dead_states[state_idx] = true;
635 changed = true;
636 continue;
637 }
638 _ => {}
639 }
640
641 }
648 if !changed {
649 return changed;
650 }
651
652 self.remove_dead_states(dead_states);
653
654 return changed;
655 }
656
657 fn states_reachable_start(&self) -> Vec<bool> {
659 let mut reachable = vec![false; self.states.len()];
660 reachable[0] = true;
661 let mut stack = vec![0];
662
663 while let Some(state) = stack.pop() {
664 for src in &self.states[state].epsilons {
665 if !reachable[src.to] {
666 stack.push(src.to);
667 }
668 reachable[src.to] = true;
669 }
670 for src in &self.states[state].transitions {
671 if !reachable[src.to] {
672 stack.push(src.to);
673 }
674 reachable[src.to] = true;
675 }
676 }
677
678 return reachable;
679 }
680 fn states_reachable_end(&self) -> Vec<bool> {
682 let mut reverse = vec![Vec::new(); self.states.len()];
683 for (i, state) in self.states.iter().enumerate() {
684 for e in &state.epsilons {
685 reverse[e.to].push(i);
686 }
687 for t in &state.transitions {
688 reverse[t.to].push(i);
689 }
690 }
691
692 let mut reachable = vec![false; self.states.len()];
693 reachable[self.states.len() - 1] = true;
694 let mut stack = vec![self.states.len() - 1];
695
696 while let Some(state) = stack.pop() {
697 for src in &reverse[state] {
698 if !reachable[*src] {
699 stack.push(*src);
700 }
701 reachable[*src] = true;
702 }
703 }
704
705 return reachable;
706 }
707
708 fn remove_unreachable(&mut self) {
712 let reach_start = self.states_reachable_start();
713 let reach_end = self.states_reachable_end();
714
715 for state in &mut self.states {
717 state
718 .epsilons
719 .retain(|e| reach_start[e.to] && reach_end[e.to]);
720 state
721 .transitions
722 .retain(|t| reach_start[t.to] && reach_end[t.to]);
723 }
724
725 self.remove_dead_states(
727 std::iter::zip(reach_start.into_iter(), reach_end.into_iter()).map(|(a, b)| !a || !b),
728 );
729 }
730
731 pub fn num_capture_groups(&self) -> usize {
733 return self
734 .states
735 .iter()
736 .flat_map(|state| &state.epsilons)
737 .map(|eps| match eps.special {
738 EpsilonType::StartCapture(n) => n,
739 _ => 0,
740 })
741 .max()
742 .unwrap_or(0)
743 + 1;
744 }
745
746 pub fn capture_group_is_optional(&self, group_num: usize) -> bool {
748 let mut reached_states = vec![false; self.states.len()];
749 reached_states[0] = true;
750 let mut stack = vec![0];
751 while let Some(idx) = stack.pop() {
752 for t in &self.states[idx].transitions {
753 if !reached_states[t.to] {
754 reached_states[t.to] = true;
755 stack.push(t.to);
756 }
757 }
758 for e in &self.states[idx].epsilons {
759 if !reached_states[e.to] && e.special != EpsilonType::StartCapture(group_num) {
760 debug_assert_ne!(e.special, EpsilonType::EndCapture(group_num));
763 reached_states[e.to] = true;
764 stack.push(e.to);
765 }
766 }
767 }
768
769 return *reached_states.last().unwrap();
770 }
771
772 pub fn to_tikz(&self, include_doc: bool) -> String {
777 let map_state =
778 |(i, state): (usize, &WorkingState)| -> crate::visualization::LatexGraphState {
779 let transitions =
780 state
781 .transitions
782 .iter()
783 .map(|t| crate::visualization::LatexGraphTransition {
784 label: crate::visualization::escape_latex(t.symbol.to_string()),
785 to: t.to,
786 });
787 let epsilons = state.epsilons.iter().map(|e| {
788 let label = match e.special {
789 EpsilonType::None => r"$\epsilon$".to_string(),
790 EpsilonType::StartAnchor => r"{\textasciicircum}".to_string(),
791 EpsilonType::EndAnchor => r"\$".to_string(),
792 EpsilonType::StartCapture(group) => format!("{group}("),
793 EpsilonType::EndCapture(group) => format!("){group}"),
794 };
795 return crate::visualization::LatexGraphTransition { label, to: e.to };
796 });
797 let transitions = transitions.chain(epsilons).collect();
798 return crate::visualization::LatexGraphState {
799 label: format!("q{i}"),
800 transitions,
801 initial: i == 0,
802 accept: i + 1 == self.states.len(),
803 };
804 };
805
806 let graph = crate::visualization::LatexGraph {
807 states: self.states.iter().enumerate().map(map_state).collect(),
808 };
809 return graph.to_tikz(include_doc);
810 }
811
812 pub fn test(&self, text: &str) -> bool {
814 let mut list = vec![false; self.states.len()];
815 let mut new_list = vec![false; self.states.len()];
816 list[0] = true;
817
818 let propogate_epsilon = |list: &mut Vec<bool>, idx: usize| {
820 let mut stack: Vec<usize> = list
821 .iter()
822 .enumerate()
823 .filter_map(|(i, set)| set.then_some(i))
824 .collect();
825
826 while let Some(from) = stack.pop() {
827 for EpsilonTransition { to, special } in &self.states[from].epsilons {
828 if list[from]
829 && !list[*to]
830 && (match special {
831 EpsilonType::StartAnchor => idx == 0,
832 EpsilonType::EndAnchor => idx == text.len(),
833 _ => true,
834 })
835 {
836 stack.push(*to);
837 list[*to] = true;
838 }
839 }
840 }
841 };
842
843 for (i, c) in text.char_indices() {
844 propogate_epsilon(&mut list, i);
845 for (from, state) in self.states.iter().enumerate() {
846 if !list[from] {
847 continue;
848 }
849
850 for WorkingTransition { to, symbol } in &state.transitions {
851 if symbol.check(c) {
852 new_list[*to] = true;
853 }
854 }
855 }
856 let tmp = list;
857 list = new_list;
858 new_list = tmp;
859 new_list.fill(false);
860 }
861 propogate_epsilon(&mut list, text.len());
862 return *list.last().unwrap_or(&false);
863 }
864}
865impl std::fmt::Display for WorkingNFA {
866 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
867 for (i, state) in self.states.iter().enumerate() {
868 writeln!(f, "State {i}:")?;
869 for e in &state.epsilons {
870 writeln!(f, " {e}")?;
871 }
872 for t in &state.transitions {
873 writeln!(f, " {t}")?;
874 }
875 }
876 return Ok(());
877 }
878}
879
880#[cfg(test)]
881mod tests {
882 use super::*;
883 use crate::{config::Config, parse_tree::ERE};
884
885 #[test]
886 fn abbc_raw() {
887 let nfa = WorkingNFA {
888 states: vec![
889 WorkingState::new().with_transition(1, 'a'.into()),
890 WorkingState::new().with_transition(2, 'b'.into()),
891 WorkingState::new()
892 .with_transition(3, 'c'.into())
893 .with_epsilon(1),
894 WorkingState::new(),
895 ],
896 };
897 println!("{}", nfa.to_tikz(true));
898
899 assert!(nfa.test("abc"));
900 assert!(nfa.test("abbc"));
901 assert!(nfa.test("abbbc"));
902 assert!(nfa.test("abbbbc"));
903
904 assert!(!nfa.test("ac"));
905 assert!(!nfa.test("abcc"));
906 assert!(!nfa.test("bac"));
907 assert!(!nfa.test("acb"));
908 }
909
910 #[test]
911 fn phone_number() {
912 let ere = ERE::parse_str(r"^(\+1 )?[0-9]{3}-[0-9]{3}-[0-9]{4}$").unwrap();
913 let (tree, capture_groups) = SimplifiedTreeNode::from_ere(&ere, &Config::default());
914 assert_eq!(capture_groups, 2);
915 let nfa = WorkingNFA::new(&tree);
916 println!("{}", nfa.to_tikz(true));
917
918 assert!(nfa.test("012-345-6789"));
919 assert!(nfa.test("987-654-3210"));
920 assert!(nfa.test("+1 555-555-5555"));
921 assert!(nfa.test("123-555-9876"));
922
923 assert!(!nfa.test("abcd"));
924 assert!(!nfa.test("0123456789"));
925 assert!(!nfa.test("012--345-6789"));
926 assert!(!nfa.test("(555) 555-5555"));
927 assert!(!nfa.test("1 555-555-5555"));
928 }
929
930 #[test]
931 fn double_loop() {
932 let ere = ERE::parse_str(r"^.*(.*)*$").unwrap();
933 let (tree, capture_groups) = SimplifiedTreeNode::from_ere(&ere, &Config::default());
934 assert_eq!(capture_groups, 2);
935 let nfa = WorkingNFA::new(&tree);
936 assert!(nfa.test(""));
939 assert!(nfa.test("asdf"));
940 assert!(nfa.test("1234567"));
941 assert!(nfa.test("0"));
942
943 assert!(!nfa.test("\0"));
944 }
945
946 #[test]
947 fn good_anchored_start() {
948 let ere = ERE::parse_str(r"^a|b*^c|d^|n").unwrap();
949 let (tree, capture_groups) = SimplifiedTreeNode::from_ere(&ere, &Config::default());
950 assert_eq!(capture_groups, 1);
951 let nfa = WorkingNFA::new(&tree);
952 assert!(nfa.test("a"));
955 assert!(nfa.test("c"));
956 assert!(nfa.test("cq"));
957 assert!(nfa.test("wwwnwww"));
958
959 assert!(!nfa.test(""));
960 assert!(!nfa.test("qb"));
961 assert!(!nfa.test("qc"));
962 assert!(!nfa.test("b"));
963 assert!(!nfa.test("bc"));
964 assert!(!nfa.test("bbbbbbc"));
965 assert!(!nfa.test("d"));
966 }
967
968 #[test]
969 fn good_anchored_end() {
970 let ere = ERE::parse_str(r"a$|b$c*|$d|n").unwrap();
971 let (tree, capture_groups) = SimplifiedTreeNode::from_ere(&ere, &Config::default());
972 assert_eq!(capture_groups, 1);
973 let nfa = WorkingNFA::new(&tree);
974 println!("{}", nfa.to_tikz(true));
975
976 assert!(nfa.test("a"));
977 assert!(nfa.test("b"));
978 assert!(nfa.test("qb"));
979 assert!(nfa.test("wwwnwww"));
980
981 assert!(!nfa.test(""));
982 assert!(!nfa.test("bq"));
983 assert!(!nfa.test("qc"));
984 assert!(!nfa.test("c"));
985 assert!(!nfa.test("bc"));
986 assert!(!nfa.test("bcccccc"));
987 assert!(!nfa.test("d"));
988 }
989
990 #[test]
991 fn range_digit() {
992 let ere = ERE::parse_str(r"^[[:digit:].]$").unwrap();
993 let (tree, capture_groups) = SimplifiedTreeNode::from_ere(&ere, &Config::default());
994 assert_eq!(capture_groups, 1);
995 let nfa = WorkingNFA::new(&tree);
996 assert!(nfa.test("0"));
999 assert!(nfa.test("1"));
1000 assert!(nfa.test("9"));
1001 assert!(nfa.test("."));
1002
1003 assert!(!nfa.test(""));
1004 assert!(!nfa.test("a"));
1005 assert!(!nfa.test("11"));
1006 assert!(!nfa.test("1."));
1007 assert!(!nfa.test(".2"));
1008 assert!(!nfa.test("09"));
1009 assert!(!nfa.test("d"));
1010 }
1011}