1use btree_range_map::{AnyRange, RangeMap, RangeSet};
2use educe::Educe;
3use range_traits::{Enum, Measure};
4use std::{
5 collections::{BTreeMap, BTreeSet, HashSet},
6 hash::Hash,
7 ops::Bound,
8};
9
10use crate::{Automaton, Class, DFA, Map, Token, dfa::DetTransitions};
11
12use super::token_set_intersection;
13
14mod tags;
15pub use tags::{TaggedNFA, Tags};
16
17#[derive(Debug)]
18pub struct TooManyStates;
19
20pub trait StateBuilder<T, Q, C = ()> {
22 type Error;
23
24 fn next_state(&mut self, nfa: &mut NFA<Q, T>, class: C) -> Result<Q, Self::Error>;
25
26 fn class_of(&self, q: &Q) -> Option<&C>;
27}
28
29impl<T, Q, C, S: StateBuilder<T, Q, C>> StateBuilder<T, Q, C> for &mut S {
30 type Error = S::Error;
31
32 fn next_state(&mut self, nfa: &mut NFA<Q, T>, class: C) -> Result<Q, Self::Error> {
33 S::next_state(*self, nfa, class)
34 }
35
36 fn class_of(&self, q: &Q) -> Option<&C> {
37 S::class_of(*self, q)
38 }
39}
40
41pub struct U32StateBuilder<C> {
42 states: Vec<C>,
43 limit: u32,
44}
45
46impl<C> U32StateBuilder<C> {
47 pub fn new() -> Self {
48 Self::default()
49 }
50}
51
52impl<C> Default for U32StateBuilder<C> {
53 fn default() -> Self {
54 U32StateBuilder {
55 states: Vec::new(),
56 limit: u32::MAX,
57 }
58 }
59}
60
61impl<T, C> StateBuilder<T, u32, C> for U32StateBuilder<C> {
62 type Error = TooManyStates;
63
64 fn next_state(&mut self, nfa: &mut NFA<u32, T>, class: C) -> Result<u32, Self::Error> {
65 let q = self.states.len() as u32;
66 self.states.push(class);
67 if self.states.len() as u32 > self.limit {
68 Err(TooManyStates)
69 } else {
70 nfa.add_state(q);
71 Ok(q)
72 }
73 }
74
75 fn class_of(&self, q: &u32) -> Option<&C> {
76 self.states.get(*q as usize)
77 }
78}
79
80pub trait BuildNFA<T = char, Q = u32, C = (), G = ()>
81where
82 T: Clone,
83 Q: Ord,
84 C: Class<T>,
85{
86 fn build_nfa<S: StateBuilder<T, Q, C>>(
87 &self,
88 mut state_builder: S,
89 class: C,
90 ) -> Result<TaggedNFA<Q, T, G>, S::Error> {
91 let mut nfa = NFA::new();
92 let mut tags = Tags::new();
93 let (a, bs) = self.build_nfa_from(&mut state_builder, &mut nfa, &mut tags, &class)?;
94 nfa.add_initial_state(a);
95 for (_, b) in bs.into_entries() {
96 nfa.add_final_state(b);
97 }
98
99 Ok(TaggedNFA::new(nfa, tags))
100 }
101
102 fn build_nfa_from<S: StateBuilder<T, Q, C>>(
103 &self,
104 state_builder: &mut S,
105 nfa: &mut NFA<Q, T>,
106 tags: &mut Tags<Q, G>,
107 class: &C,
108 ) -> Result<(Q, C::Map<Q>), S::Error>;
109}
110
111pub type Transitions<T, Q> = BTreeMap<Option<RangeSet<T>>, BTreeSet<Q>>;
113
114#[derive(Debug, Clone, Educe)]
116#[educe(PartialEq(bound(Q: PartialEq, T: Measure + Enum)), Eq, PartialOrd, Ord, Hash)]
117#[cfg_attr(feature = "serde", derive(serde::Serialize))]
118pub struct NFA<Q = u32, T = char> {
119 transitions: BTreeMap<Q, Transitions<T, Q>>,
120 initial_states: BTreeSet<Q>,
121 final_states: BTreeSet<Q>,
122}
123
124impl<T, Q> Default for NFA<Q, T> {
125 fn default() -> Self {
126 Self {
127 transitions: BTreeMap::new(),
128 initial_states: BTreeSet::new(),
129 final_states: BTreeSet::new(),
130 }
131 }
132}
133
134impl<T, Q> NFA<Q, T> {
135 pub fn new() -> Self {
137 Self::default()
138 }
139
140 pub fn initial_states(&self) -> &BTreeSet<Q> {
142 &self.initial_states
143 }
144
145 pub fn final_states(&self) -> &BTreeSet<Q> {
147 &self.final_states
148 }
149
150 pub fn state_count(&self) -> usize {
151 self.transitions.len()
152 }
153
154 pub fn states(&self) -> impl Iterator<Item = &Q> {
156 self.transitions.keys()
157 }
158
159 pub fn transitions(&self) -> std::collections::btree_map::Iter<'_, Q, Transitions<T, Q>> {
161 self.transitions.iter()
162 }
163}
164
165impl<T, Q: Ord> NFA<Q, T> {
166 pub fn successors(&self, q: &Q) -> Successors<'_, T, Q> {
168 Successors::new(self.transitions.get(q))
169 }
170
171 pub fn add_state(&mut self, q: Q) {
174 self.transitions.entry(q).or_default();
175 }
176
177 pub fn is_initial_state(&self, q: &Q) -> bool {
179 self.initial_states.contains(q)
180 }
181
182 pub fn add_initial_state(&mut self, q: Q) -> bool {
184 self.initial_states.insert(q)
185 }
186
187 pub fn is_final_state(&self, q: &Q) -> bool {
189 self.final_states.contains(q)
190 }
191
192 pub fn add_final_state(&mut self, q: Q) -> bool {
194 self.final_states.insert(q)
195 }
196}
197
198impl<T: Token, Q: Ord> NFA<Q, T> {
199 pub fn singleton(
200 list: impl IntoIterator<Item = T>,
201 mut next_state: impl FnMut(Option<usize>) -> Q,
202 ) -> Self
203 where
204 Q: Clone,
205 {
206 let mut result = Self::new();
207
208 let mut q = next_state(None);
209 result.add_initial_state(q.clone());
210
211 for (i, item) in list.into_iter().enumerate() {
212 let r = next_state(Some(i));
213 let mut label = RangeSet::new();
214 label.insert(AnyRange::new(Bound::Included(item), Bound::Included(item)));
215 result.add(q, Some(label), r.clone());
216 q = r;
217 }
218
219 result.add_final_state(q);
220
221 result
222 }
223
224 pub fn simple_loop(state: Q, label: RangeSet<T>) -> Self
225 where
226 Q: Clone,
227 {
228 let mut result = Self::new();
229 result.add(state.clone(), Some(label), state.clone());
230 result.add_initial_state(state.clone());
231 result.add_final_state(state);
232 result
233 }
234
235 pub fn add(&mut self, source: Q, label: Option<RangeSet<T>>, target: Q)
237 where
238 Q: Clone,
239 {
240 self.add_state(target.clone());
241 self.transitions
242 .entry(source)
243 .or_default()
244 .entry(label)
245 .or_default()
246 .insert(target);
247 }
248
249 pub fn recognizes_empty(&self) -> bool {
251 let mut stack: Vec<_> = self.initial_states.iter().collect();
252 let mut visited = BTreeSet::new();
253
254 while let Some(q) = stack.pop() {
255 if visited.insert(q) {
256 if self.is_final_state(q) {
257 return true;
258 }
259
260 if let Some(transitions) = self.transitions.get(q) {
261 if let Some(successors) = transitions.get(&None) {
262 stack.extend(successors)
263 }
264 }
265 }
266 }
267
268 false
269 }
270
271 pub fn is_singleton(&self) -> bool
273 where
274 Q: Hash,
275 {
276 let Some(mut q) = Automaton::initial_state(self) else {
277 return false;
278 };
279
280 loop {
281 if Automaton::is_final_state(self, &q) {
282 for label in q.labels(self) {
283 for range in label {
284 if range.first().is_some() {
285 return false;
286 }
287 }
288 }
289
290 break true;
291 } else {
292 let mut token = None;
293
294 for label in q.labels(self) {
295 for range in label {
296 if let Some(t) = range.first() {
297 let last = range.last().unwrap();
298 if t != last {
299 return false;
300 }
301
302 if let Some(u) = token.replace(t) {
303 if u != t {
304 return false;
305 }
306 }
307 }
308 }
309 }
310
311 match token {
312 Some(token) => {
313 let Some(r) = Automaton::next_state(self, q, token) else {
314 return false;
315 };
316
317 q = r;
318 }
319 None => break false,
320 }
321 }
322 }
323 }
324
325 pub fn to_singleton(&self) -> Option<Vec<T>>
331 where
332 Q: Hash,
333 {
334 let mut q = Automaton::initial_state(self)?;
335
336 let mut result = Vec::new();
337 loop {
338 if Automaton::is_final_state(self, &q) {
339 for label in q.labels(self) {
340 for range in label {
341 if range.first().is_some() {
342 return None;
343 }
344 }
345 }
346
347 break Some(result);
348 } else {
349 let mut token = None;
350
351 for label in q.labels(self) {
352 for range in label {
353 if let Some(t) = range.first() {
354 let last = range.last().unwrap();
355 if t != last {
356 return None;
357 }
358
359 if let Some(u) = token.replace(t) {
360 if u != t {
361 return None;
362 }
363 }
364 }
365 }
366 }
367
368 match token {
369 Some(token) => {
370 q = Automaton::next_state(self, q, token)?;
371 result.push(token)
372 }
373 None => break None,
374 }
375 }
376 }
377 }
378
379 pub fn is_finite(&self) -> bool {
381 let mut stack: Vec<&Q> = self.initial_states.iter().collect();
382
383 let mut visited = BTreeSet::new();
384 while let Some(q) = stack.pop() {
385 if !visited.insert(q) {
386 return false;
387 }
388
389 stack.extend(self.successors(q).flat_map(|(_, r)| r));
390 }
391
392 true
393 }
394
395 pub fn is_infinite(&self) -> bool {
397 !self.is_finite()
398 }
399
400 pub fn is_always(&self, predicate: impl Fn(&Q) -> bool) -> bool {
403 let mut stack: Vec<&Q> = self.initial_states.iter().collect();
404
405 let mut visited = BTreeSet::new();
406 while let Some(q) = stack.pop() {
407 if visited.insert(q) {
408 if !predicate(q) {
409 return false;
410 }
411
412 stack.extend(self.successors(q).flat_map(|(_, r)| r));
413 }
414 }
415
416 true
417 }
418
419 pub fn is_always_concurrently(&self, predicate: impl Fn(&BTreeSet<&Q>) -> bool) -> bool {
422 let mut stack: Vec<BTreeSet<&Q>> = vec![self.initial_states.iter().collect()];
423
424 let mut visited = BTreeSet::new();
425 while let Some(state_set) = stack.pop() {
426 if visited.insert(state_set.clone()) {
427 if !predicate(&state_set) {
428 return false;
429 }
430
431 let successors = state_set.into_iter().flat_map(|q| {
432 self.successors(q)
433 .filter_map(|(label, r)| if label.is_some() { Some(r) } else { None })
434 .flatten()
435 });
436
437 stack.push(self.modulo_epsilon_state(successors));
438 }
439 }
440
441 true
442 }
443
444 pub fn is_universal(&self, alphabet: RangeSet<T>) -> bool {
447 self.is_always_concurrently(|states| {
448 let mut set = RangeSet::new();
449
450 for q in states {
451 for (l, _) in self.successors(q) {
452 if let Some(l) = l {
453 for &range in l {
454 set.insert(range);
455 }
456 }
457 }
458 }
459
460 set == alphabet
461 })
462 }
463
464 pub fn is_eventually(&self, predicate: impl Fn(&Q) -> bool) -> bool {
465 !self.is_always(|q| !predicate(q))
466 }
467
468 fn modulo_epsilon_state<'a>(&'a self, qs: impl IntoIterator<Item = &'a Q>) -> BTreeSet<&'a Q> {
469 let mut states = BTreeSet::new();
470 let mut stack: Vec<_> = qs.into_iter().collect();
471
472 while let Some(q) = stack.pop() {
473 if states.insert(q) {
474 if let Some(transitions) = self.transitions.get(q) {
476 if let Some(epsilon_qs) = transitions.get(&None) {
477 for t in epsilon_qs {
478 stack.push(t)
479 }
480 }
481 }
482 }
483 }
484
485 states
486 }
487
488 fn determinize_transitions_for(
489 &self,
490 states: &BTreeSet<&Q>,
491 ) -> BTreeMap<RangeSet<T>, BTreeSet<&Q>> {
492 let mut map = RangeMap::new();
493
494 for q in states {
495 if let Some(transitions) = self.transitions.get(q) {
496 for (label, targets) in transitions {
497 if let Some(label) = label {
498 for range in label.iter() {
499 debug_assert!(!range.is_empty());
500
501 map.update(
502 *range,
503 |current_target_states_opt: Option<&BTreeSet<&Q>>| {
504 let mut current_target_states = match current_target_states_opt
505 {
506 Some(current_target_states) => {
507 current_target_states.clone()
508 }
509 None => BTreeSet::new(),
510 };
511
512 for q in targets {
513 current_target_states
514 .extend(self.modulo_epsilon_state(Some(q)));
515 }
516
517 Some(current_target_states)
518 },
519 );
520
521 assert!(map.get(range.first().unwrap()).is_some());
522 }
523 }
524 }
525 }
526 }
527
528 let mut simplified_map = BTreeMap::new();
529
530 for (range, set) in map {
531 debug_assert!(!range.is_empty());
532 let mut label = RangeSet::new();
533 label.insert(range);
534 simplified_map.insert(label, set);
535 }
536
537 simplified_map
538 }
539
540 pub fn determinize<'a, R>(
547 &'a self,
548 mut f: impl FnMut(&BTreeSet<&'a Q>) -> R,
549 ) -> DFA<R, RangeSet<T>>
550 where
551 R: Clone + Ord + Hash,
552 {
553 let mut transitions = BTreeMap::new();
554
555 let initial_state = self.modulo_epsilon_state(&self.initial_states);
557 let mut final_states = BTreeSet::new();
558
559 let mut visited_states = HashSet::new();
560 let mut stack = vec![initial_state.clone()];
561 while let Some(det_q) = stack.pop() {
562 let r = f(&det_q);
563 if visited_states.insert(r.clone()) {
564 if det_q.iter().any(|q| self.final_states.contains(q)) {
565 final_states.insert(r.clone());
566 }
567
568 let map = self.determinize_transitions_for(&det_q);
569
570 let mut r_map = BTreeMap::new();
571 for (label, next_det_q) in map {
572 r_map.insert(label, f(&next_det_q));
573 stack.push(next_det_q)
574 }
575
576 transitions.insert(r, r_map);
577 }
578 }
579
580 DFA::from_parts(
581 f(&initial_state),
582 final_states,
583 DetTransitions::from(transitions),
584 )
585 }
586
587 pub fn mapped_union<R>(&mut self, other: NFA<R, T>, f: impl Fn(R) -> Q) {
594 for (q, transitions) in other.transitions {
595 let this_transitions = self.transitions.entry(f(q)).or_default();
596 for (label, targets) in transitions {
597 this_transitions
598 .entry(label)
599 .or_default()
600 .extend(targets.into_iter().map(&f));
601 }
602 }
603
604 self.initial_states
605 .extend(other.initial_states.into_iter().map(&f));
606 self.final_states
607 .extend(other.final_states.into_iter().map(f));
608 }
609
610 pub fn union<R>(&mut self, other: NFA<Q, T>) {
612 self.mapped_union(other, |q| q)
613 }
614
615 pub fn product<'a, 'b, R, S>(
621 &'a self,
622 other: &'b NFA<R, T>,
623 mut f: impl FnMut(&'a Q, &'b R) -> S,
624 ) -> NFA<S, T>
625 where
626 R: Ord,
627 S: Clone + Ord + Hash,
628 {
629 let mut result = NFA::new();
630
631 let mut stack = Vec::with_capacity(self.initial_states.len() * other.initial_states.len());
632 for a in &self.initial_states {
633 for b in &other.initial_states {
634 let q = f(a, b);
635 stack.push((q.clone(), a, b));
636 result.add_initial_state(q);
637 }
638 }
639
640 let mut visited = HashSet::new();
641 while let Some((q, a, b)) = stack.pop() {
642 if visited.insert(q.clone()) {
643 if self.is_final_state(a) && other.is_final_state(b) {
644 result.add_final_state(q.clone());
645 }
646
647 let transitions = result.transitions.entry(q).or_default();
648
649 for (a_label, a_successors) in self.successors(a) {
650 match a_label {
651 Some(a_label) => {
652 for (b_label, b_successors) in other.successors(b) {
653 if let Some(b_label) = b_label {
654 let label = token_set_intersection(a_label, b_label);
655 if !label.is_empty() {
656 let successors =
657 transitions.entry(Some(label)).or_default();
658
659 for sa in a_successors {
660 for sb in b_successors {
661 let s = f(sa, sb);
662 stack.push((s.clone(), sa, sb));
663 successors.insert(s);
664 }
665 }
666 }
667 }
668 }
669 }
670 None => {
671 if let Some(b_successors) =
672 other.transitions.get(b).and_then(|s| s.get(&None))
673 {
674 let successors = transitions.entry(None).or_default();
675
676 for sa in a_successors {
677 for sb in b_successors {
678 let s = f(sa, sb);
679 stack.push((s.clone(), sa, sb));
680 successors.insert(s);
681 }
682 }
683 }
684 }
685 }
686 }
687 }
688 }
689
690 result
691 }
692}
693
694#[cfg(feature = "serde")]
695impl<'de, Q, T> serde::Deserialize<'de> for NFA<Q, T>
696where
697 Q: Clone + Ord + serde::Deserialize<'de>,
698 T: Clone + Ord + Enum + Measure + serde::Deserialize<'de>,
699{
700 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
701 where
702 D: serde::Deserializer<'de>,
703 {
704 #[derive(serde::Deserialize)]
705 #[serde(
706 bound = "Q: serde::Deserialize<'de> + Ord, T: serde::Deserialize<'de> + Ord + Enum + Measure + Clone"
707 )]
708 pub struct Inner<Q, T> {
709 transitions: BTreeMap<Q, Transitions<T, Q>>,
710 initial_states: BTreeSet<Q>,
711 final_states: BTreeSet<Q>,
712 }
713
714 let mut inner: Inner<Q, T> = Inner::deserialize(deserializer)?;
715
716 for q in &inner.initial_states {
717 inner.transitions.entry(q.clone()).or_default();
718 }
719
720 for q in &inner.final_states {
721 inner.transitions.entry(q.clone()).or_default();
722 }
723
724 Ok(Self {
725 transitions: inner.transitions,
726 initial_states: inner.initial_states,
727 final_states: inner.final_states,
728 })
729 }
730}
731
732pub struct Successors<'a, T, Q> {
734 inner: Option<std::collections::btree_map::Iter<'a, Option<RangeSet<T>>, BTreeSet<Q>>>,
735}
736
737impl<'a, T, Q> Successors<'a, T, Q> {
738 pub fn new(map: Option<&'a BTreeMap<Option<RangeSet<T>>, BTreeSet<Q>>>) -> Self {
739 Self {
740 inner: map.map(|map| map.iter()),
741 }
742 }
743}
744
745impl<'a, T, Q> Iterator for Successors<'a, T, Q> {
746 type Item = (&'a Option<RangeSet<T>>, &'a BTreeSet<Q>);
747
748 fn next(&mut self) -> Option<Self::Item> {
749 self.inner.as_mut().and_then(|inner| inner.next())
750 }
751}
752
753impl<T: Token, Q: Ord + Hash> Automaton<T> for NFA<Q, T> {
754 type State<'a>
755 = VisitingState<'a, Q>
756 where
757 Self: 'a;
758
759 fn initial_state(&self) -> Option<Self::State<'_>> {
760 let mut stack = Vec::new();
761 let mut states = HashSet::new();
762
763 for r in &self.initial_states {
764 states.insert(r);
765 stack.push(r);
766 }
767
768 while let Some(q) = stack.pop() {
770 if let Some(q_transitions) = self.transitions.get(q) {
771 if let Some(targets) = q_transitions.get(&None) {
772 for r in targets {
773 if states.insert(r) {
774 stack.push(r);
775 }
776 }
777 }
778 }
779 }
780
781 if states.is_empty() {
782 None
783 } else {
784 Some(VisitingState {
785 states,
786 next_states: HashSet::new(),
787 stack,
788 })
789 }
790 }
791
792 fn next_state<'a>(
793 &'a self,
794 VisitingState {
795 mut states,
796 mut next_states,
797 mut stack,
798 }: Self::State<'a>,
799 token: T,
800 ) -> Option<Self::State<'a>> {
801 for &q in &states {
802 if let Some(q_transitions) = self.transitions.get(q) {
803 for (label, targets) in q_transitions {
804 if let Some(label) = label {
805 if label.contains(token) {
806 for r in targets {
807 if next_states.insert(r) {
808 stack.push(r);
809 }
810 }
811 }
812 }
813 }
814 }
815 }
816
817 while let Some(q) = stack.pop() {
819 if let Some(q_transitions) = self.transitions.get(q) {
820 if let Some(targets) = q_transitions.get(&None) {
821 for r in targets {
822 if next_states.insert(r) {
823 stack.push(r);
824 }
825 }
826 }
827 }
828 }
829
830 if next_states.is_empty() {
831 None
832 } else {
833 states.clear();
834 Some(VisitingState {
835 states: next_states,
836 next_states: states,
837 stack,
838 })
839 }
840 }
841
842 fn is_final_state<'a>(&'a self, VisitingState { states, .. }: &Self::State<'a>) -> bool {
843 for &q in states {
844 if self.final_states.contains(q) {
845 return true;
846 }
847 }
848
849 false
850 }
851}
852
853pub struct VisitingState<'a, Q> {
854 states: HashSet<&'a Q>,
855 next_states: HashSet<&'a Q>,
856 stack: Vec<&'a Q>,
857}
858
859impl<'a, Q: Ord> VisitingState<'a, Q> {
860 pub fn labels<'b, T>(
861 &'b self,
862 aut: &'b NFA<Q, T>,
863 ) -> impl 'b + Iterator<Item = &'b RangeSet<T>> {
864 self.states.iter().flat_map(|q| {
865 aut.transitions
866 .get(*q)
867 .map(|q_transitions| q_transitions.keys().filter_map(Option::as_ref))
868 .into_iter()
869 .flatten()
870 })
871 }
872}
873
874#[cfg(test)]
875mod tests {
876 use btree_range_map::generic::RangeSet;
877
878 use super::NFA;
879 use crate::any_char;
880
881 #[test]
882 fn is_finite() {
883 let aut = NFA::singleton("foo".chars(), |q| q);
884 assert!(aut.is_finite())
885 }
886
887 #[test]
888 fn is_infinite() {
889 let aut = NFA::simple_loop(0, any_char());
890 assert!(aut.is_infinite())
891 }
892
893 #[test]
894 fn is_universal() {
895 let aut1 = NFA::simple_loop(0, any_char());
896 assert!(aut1.is_universal(any_char()));
897
898 let mut label = RangeSet::new();
899 label.insert('a');
900 assert!(!aut1.is_universal(label));
901
902 let aut2 = NFA::singleton("foo".chars(), |q| q);
903 assert!(!aut2.is_universal(any_char()))
904 }
905}