1use std::borrow::Borrow;
18use std::borrow::Cow;
19use std::collections::HashMap;
20use std::collections::HashSet;
21use std::collections::VecDeque;
22
23use crate::builder::ApproximatelySimilarCanonical;
24use crate::builder::Regex;
25use crate::derivation::Symbols;
26use crate::Alphabet;
27
28#[derive(Clone)]
30pub struct FiniteAutomaton<S: Alphabet> {
31 states: Vec<State<S>>,
32}
33
34#[derive(Clone)]
35struct State<S: Alphabet> {
36 regex: Regex<ApproximatelySimilarCanonical<S>>,
37 transitions: HashMap<S, u16>,
38 default_transition: u16,
39 acceptance: Acceptance,
40}
41
42#[derive(Clone, Copy, Debug, Eq, PartialEq)]
44pub enum Acceptance {
45 Reject,
47 MayAccept,
49 Accept,
51}
52
53impl Acceptance {
54 fn from_nullable(is_nullable: bool) -> Self {
55 if is_nullable {
56 Self::Accept
57 } else {
58 Self::Reject
59 }
60 }
61}
62
63impl<S: Alphabet> Regex<ApproximatelySimilarCanonical<S>> {
64 pub fn to_automaton(&self) -> FiniteAutomaton<S> {
66 let mut symbols = HashSet::new();
67 self.collect_symbols(&mut symbols);
68 let default_symbols = Symbols::Exclude(symbols.clone());
69
70 let mut regexes: HashMap<Self, u16> = HashMap::new();
71 let mut states = Vec::new();
72 let mut reverse_transitions = Vec::new();
73
74 let mut queue = VecDeque::new();
75 fn get_or_insert<S: Alphabet>(
76 regex: Regex<ApproximatelySimilarCanonical<S>>,
77 queue: &mut VecDeque<Regex<ApproximatelySimilarCanonical<S>>>,
78 regexes: &mut HashMap<Regex<ApproximatelySimilarCanonical<S>>, u16>,
79 reverse_transitions: &mut Vec<Vec<u16>>,
80 ) -> u16 {
81 if let Some(idx) = regexes.get(®ex) {
82 *idx
83 } else {
84 let idx = regexes.len() as u16;
85 regexes.insert(regex.clone(), idx);
86 queue.push_back(regex);
87 reverse_transitions.push(Vec::new());
88 idx
89 }
90 }
91
92 get_or_insert(
93 self.clone(),
94 &mut queue,
95 &mut regexes,
96 &mut reverse_transitions,
97 );
98 let mut reverse_queue = VecDeque::new();
99 while let Some(regex) = queue.pop_front() {
100 let current_idx = states.len() as u16;
101 let accepting = Acceptance::from_nullable(regex.is_nullable());
102 if accepting == Acceptance::Accept {
103 reverse_queue.push_front(states.len() as u16);
104 }
105 let mut transitions = HashMap::default();
106 for symbol in &symbols {
107 let next = regex.derive_symbols(&Symbols::include([symbol.clone()]));
108 let next_idx =
109 get_or_insert(next, &mut queue, &mut regexes, &mut reverse_transitions);
110 reverse_transitions[next_idx as usize].push(current_idx);
111 transitions.insert(symbol.clone(), next_idx);
112 }
113 let default_transition = {
114 let next = regex.derive_symbols(&default_symbols);
115 let next_idx =
116 get_or_insert(next, &mut queue, &mut regexes, &mut reverse_transitions);
117 reverse_transitions[next_idx as usize].push(current_idx);
118 next_idx
119 };
120 states.push(State {
121 regex,
122 transitions,
123 default_transition,
124 acceptance: accepting,
125 });
126 }
127
128 while let Some(next_idx) = reverse_queue.pop_front() {
129 for prev_idx in &reverse_transitions[next_idx as usize] {
130 let prev = &mut states[*prev_idx as usize];
131 if prev.acceptance == Acceptance::Reject {
132 prev.acceptance = Acceptance::MayAccept;
133 reverse_queue.push_front(*prev_idx);
134 }
135 }
136 }
137
138 FiniteAutomaton { states }
139 }
140
141 fn collect_symbols(&self, symbols: &mut HashSet<S>) {
142 match self {
143 Regex::EmptySet => {}
144 Regex::EmptyString => {}
145 Regex::Symbol(symbol) => {
146 symbols.insert(symbol.clone());
147 }
148 Regex::Concat(left, right) => {
149 left.collect_symbols(symbols);
150 right.collect_symbols(symbols);
151 }
152 Regex::Closure(inner) => inner.collect_symbols(symbols),
153 Regex::Or(left, right) => {
154 left.collect_symbols(symbols);
155 right.collect_symbols(symbols);
156 }
157 Regex::And(left, right) => {
158 left.collect_symbols(symbols);
159 right.collect_symbols(symbols);
160 }
161 Regex::Complement(inner) => inner.collect_symbols(symbols),
162 }
163 }
164}
165
166impl<S: Alphabet> FiniteAutomaton<S> {
167 pub fn to_matcher(&self) -> Matcher<'_, S> {
168 Matcher {
169 fa: Cow::Borrowed(self),
170 state: 0,
171 }
172 }
173
174 pub fn into_matcher(self) -> Matcher<'static, S> {
175 Matcher {
176 fa: Cow::Owned(self),
177 state: 0,
178 }
179 }
180
181 pub fn matches<I>(&self, symbols: impl IntoIterator<Item = I>) -> bool
182 where
183 I: Borrow<S>,
184 {
185 self.to_matcher().next_iter(symbols) == Acceptance::Accept
186 }
187
188 #[inline]
189 fn state(&self, idx: u16) -> &State<S> {
190 &self.states[idx as usize]
191 }
192}
193
194impl<S: Alphabet> State<S> {
195 fn next(&self, symbol: &S) -> u16 {
196 self.transitions
197 .get(symbol)
198 .copied()
199 .unwrap_or(self.default_transition)
200 }
201}
202
203pub struct Matcher<'a, S: Alphabet> {
205 fa: Cow<'a, FiniteAutomaton<S>>,
206 state: u16,
207}
208
209impl<'a, S: Alphabet> Matcher<'a, S> {
210 pub fn next(&mut self, symbol: &S) -> Acceptance {
211 self.state = self.state().next(symbol);
212 self.fa.state(self.state).acceptance
213 }
214
215 pub fn next_iter<I>(&mut self, symbols: impl IntoIterator<Item = I>) -> Acceptance
216 where
217 I: Borrow<S>,
218 {
219 for symbol in symbols {
220 self.state = self.state().next(symbol.borrow());
221 }
222 self.state().acceptance
223 }
224
225 #[inline]
226 pub fn regex(&self) -> &Regex<ApproximatelySimilarCanonical<S>> {
227 &self.state().regex
228 }
229
230 #[inline]
231 fn state(&self) -> &State<S> {
232 self.fa.state(self.state)
233 }
234}
235
236#[cfg(test)]
237mod tests {
238 use itertools::Itertools;
239
240 use crate::builder::ApproximatelySimilarCanonical;
241 use crate::builder::Regex;
242 use crate::ops::*;
243
244 use super::*;
245
246 #[test]
247 fn test_matches() {
248 let tests: Vec<(
249 Regex<ApproximatelySimilarCanonical<usize>>,
250 Vec<_>,
251 Acceptance,
252 )> = vec![
253 ((().r()), vec![], Acceptance::Reject),
254 (([].r()), vec![], Acceptance::Accept),
255 (42.s(), vec![42], Acceptance::Accept),
256 (42.s(), vec![42, 42], Acceptance::Reject), (42.s(), vec![11], Acceptance::Reject), ((![42.s()].r()), vec![], Acceptance::Accept),
259 (([42.s()].r()), vec![42], Acceptance::Accept),
260 (
261 ([42.s(), (11.s() | 7.s())].r()),
262 vec![42],
263 Acceptance::MayAccept,
264 ),
265 (
266 ([42.s(), (11.s() | 7.s())].r()),
267 vec![42, 11],
268 Acceptance::Accept,
269 ),
270 (
271 ([42.s(), (11.s() | 7.s())].r()),
272 vec![42, 7],
273 Acceptance::Accept,
274 ),
275 ((42.s().c()), vec![], Acceptance::Accept),
276 ((42.s().c()), vec![42], Acceptance::Accept),
277 ((42.s().c()), vec![42, 42, 42], Acceptance::Accept),
278 ((42.s().c()), vec![42, 11], Acceptance::Reject),
279 ((42.s() & [].r()), vec![42], Acceptance::Reject),
280 ((42.s() & 42.s().c()), vec![42], Acceptance::Accept),
281 ((42.s() & 42.s().c()), vec![42, 42], Acceptance::Reject),
282 ((42.s() | 11.s()), vec![42], Acceptance::Accept),
283 ((42.s() | 11.s()), vec![11], Acceptance::Accept),
284 ((42.s() | 11.s()), vec![11, 42], Acceptance::Reject),
285 ((42.s() | 11.s().c()), vec![42], Acceptance::Accept),
286 ((42.s() | 11.s().c()), vec![11], Acceptance::Accept),
287 ((42.s() | 11.s().c()), vec![11, 11], Acceptance::Accept),
288 ((42.s() | 11.s().c()), vec![42, 11], Acceptance::Reject),
289 ((!().r()), vec![11], Acceptance::Accept),
290 ((!11.s()), vec![42], Acceptance::Accept),
291 ((!11.s()), vec![11], Acceptance::MayAccept),
292 ];
293 for test in tests {
294 assert_eq!(
295 test.2,
296 test.0.to_automaton().into_matcher().next_iter(&test.1),
297 "expected {:?} matching {} with [{}]",
298 test.2,
299 test.0,
300 test.1.iter().join(", ")
301 );
302 }
303 }
304}