iregex_automata/
lib.rs

1//! This library provides an implementation of Nondeterministic Finite Automata
2//! (NFA) and Deterministic Finite Automata (DFA) for Unicode scalar values
3//! (the [`char`] type). It is used by the [`iregex`] crate to represent compiled
4//! regular expressions.
5//!
6//! [`iregex`]: <https://github.com/timothee-haudebourg/iregex-rs>
7use btree_range_map::RangePartialOrd;
8pub use btree_range_map::{AnyRange, RangeSet};
9use mown::Mown;
10use range_traits::{Bounded, Measure, PartialEnum};
11
12pub mod dfa;
13pub mod nfa;
14pub mod util;
15
16pub use dfa::DFA;
17pub use nfa::NFA;
18
19#[cfg(feature = "dot")]
20pub mod dot;
21
22pub fn any_char() -> RangeSet<char> {
23	let mut set = RangeSet::new();
24	set.insert('\u{0}'..='\u{d7ff}');
25	set.insert('\u{e000}'..='\u{10ffff}');
26	set
27}
28
29/// Computes the intersection of two character sets.
30pub fn token_set_intersection<T>(a: &RangeSet<T>, b: &RangeSet<T>) -> RangeSet<T>
31where
32	T: Clone + Measure + PartialEnum,
33{
34	let mut result = a.clone();
35
36	for r in b.gaps() {
37		result.remove(r.cloned());
38	}
39
40	result
41}
42
43pub trait MapSource: Sized {
44	type Map<U>: Map<Self, U>;
45}
46
47#[allow(clippy::len_without_is_empty)]
48pub trait Token: Copy + Ord + Measure + PartialEnum + RangePartialOrd + Bounded {
49	fn all() -> RangeSet<Self>;
50
51	/// Returns the (byte) length of the token.
52	fn len(&self) -> usize;
53
54	fn is_one(len: Self::Len) -> bool;
55}
56
57impl Token for u8 {
58	fn all() -> RangeSet<Self> {
59		let mut set = RangeSet::new();
60		set.insert(u8::MIN..=u8::MAX);
61		set
62	}
63
64	fn len(&self) -> usize {
65		1
66	}
67
68	fn is_one(len: Self::Len) -> bool {
69		len == 1
70	}
71}
72
73impl Token for char {
74	fn all() -> RangeSet<Self> {
75		any_char()
76	}
77
78	fn len(&self) -> usize {
79		self.len_utf8()
80	}
81
82	fn is_one(len: Self::Len) -> bool {
83		len == 1
84	}
85}
86
87/// Token class.
88pub trait Class<T = char>: MapSource {
89	/// Classify the given token set.
90	///
91	/// The output is a partition of the input set, where each member of the
92	/// partition is associated to a different class.
93	fn classify<'a>(&self, set: &'a RangeSet<T>) -> Self::Map<Mown<'a, RangeSet<T>>>;
94
95	fn next_class(&self, token: &T) -> Self;
96}
97
98pub trait Map<C, T>: Default + FromIterator<(C, T)> {
99	type Iter<'a>: Iterator<Item = (&'a C, &'a T)>
100	where
101		C: 'a,
102		T: 'a,
103		Self: 'a;
104	type IntoEntries: Iterator<Item = (C, T)>;
105
106	fn singleton(class: C, value: T) -> Self {
107		let mut result = Self::default();
108		result.set(class, value);
109		result
110	}
111
112	fn get(&self, class: &C) -> Option<&T>;
113
114	fn get_mut(&mut self, class: &C) -> Option<&mut T>;
115
116	fn contains(&self, class: &C) -> bool {
117		self.get(class).is_some()
118	}
119
120	fn set(&mut self, class: C, value: T);
121
122	fn get_mut_or_insert_with(&mut self, class: &C, f: impl FnOnce() -> T) -> &mut T
123	where
124		C: Clone,
125	{
126		if !self.contains(class) {
127			let t = f();
128			self.set(class.clone(), t);
129		}
130
131		self.get_mut(class).unwrap()
132	}
133
134	fn get_or_try_insert_with<E>(
135		&mut self,
136		class: &C,
137		f: impl FnOnce() -> Result<T, E>,
138	) -> Result<&T, E>
139	where
140		C: Clone,
141	{
142		if !self.contains(class) {
143			let t = f()?;
144			self.set(class.clone(), t);
145		}
146
147		Ok(self.get(class).unwrap())
148	}
149
150	fn iter(&self) -> Self::Iter<'_>;
151
152	fn into_entries(self) -> Self::IntoEntries;
153}
154
155impl MapSource for () {
156	type Map<U> = Unmapped<U>;
157}
158
159impl<T> Class<T> for () {
160	fn classify<'a>(&self, set: &'a RangeSet<T>) -> Self::Map<Mown<'a, RangeSet<T>>> {
161		Unmapped(Some(Mown::Borrowed(set)))
162	}
163
164	fn next_class(&self, _token: &T) -> Self {}
165}
166
167pub struct Unmapped<T>(Option<T>);
168
169impl<T> Unmapped<T> {
170	pub fn unwrap(self) -> Option<T> {
171		self.0
172	}
173}
174
175impl<T> Default for Unmapped<T> {
176	fn default() -> Self {
177		Self(None)
178	}
179}
180
181impl<T> Map<(), T> for Unmapped<T> {
182	type Iter<'a>
183		= OptionClassIter<'a, T>
184	where
185		T: 'a;
186	type IntoEntries = OptionClassIntoIter<T>;
187
188	fn get(&self, _class: &()) -> Option<&T> {
189		self.0.as_ref()
190	}
191
192	fn get_mut(&mut self, _class: &()) -> Option<&mut T> {
193		self.0.as_mut()
194	}
195
196	fn set(&mut self, _class: (), value: T) {
197		self.0 = Some(value)
198	}
199
200	fn iter(&self) -> Self::Iter<'_> {
201		OptionClassIter(self.0.as_ref())
202	}
203
204	fn into_entries(self) -> Self::IntoEntries {
205		OptionClassIntoIter(self.0)
206	}
207}
208
209impl<T> FromIterator<((), T)> for Unmapped<T> {
210	fn from_iter<I: IntoIterator<Item = ((), T)>>(iter: I) -> Self {
211		let mut result = Self::default();
212		for ((), t) in iter {
213			result.set((), t);
214		}
215		result
216	}
217}
218
219pub struct OptionClassIter<'a, T>(Option<&'a T>);
220
221impl<'a, T> Iterator for OptionClassIter<'a, T> {
222	type Item = (&'a (), &'a T);
223
224	fn next(&mut self) -> Option<Self::Item> {
225		self.0.take().map(|t| (&(), t))
226	}
227}
228
229pub struct OptionClassIntoIter<T>(Option<T>);
230
231impl<T> Iterator for OptionClassIntoIter<T> {
232	type Item = ((), T);
233
234	fn next(&mut self) -> Option<Self::Item> {
235		self.0.take().map(|t| ((), t))
236	}
237}
238
239/// Deterministic or non-deterministic automaton.
240pub trait Automaton<T> {
241	type State<'a>
242	where
243		Self: 'a;
244
245	fn initial_state(&self) -> Option<Self::State<'_>>;
246
247	fn next_state<'a>(
248		&'a self,
249		current_state: Self::State<'a>,
250		token: T,
251	) -> Option<Self::State<'a>>;
252
253	fn is_final_state<'a>(&'a self, state: &Self::State<'a>) -> bool;
254
255	fn contains(&self, tokens: impl IntoIterator<Item = T>) -> bool {
256		match self.initial_state() {
257			Some(mut q) => {
258				for token in tokens {
259					match self.next_state(q, token) {
260						Some(r) => q = r,
261						None => return false,
262					}
263				}
264
265				self.is_final_state(&q)
266			}
267			None => false,
268		}
269	}
270}
271
272/// Deterministic or non-deterministic automaton.
273pub trait TaggedAutomaton<T, G>: Automaton<T> {
274	fn get_tag(&self, state: &G) -> Option<usize>;
275}