1#![allow(clippy::result_large_err)]
2
3use regex_automata::{
4 dfa::{dense, sparse, Automaton},
5 util::primitives::StateID,
6 Input,
7};
8
9pub type DenseDfaIter<T> = DfaIter<dense::DFA<T>>;
11pub type SparseDfaIter<T> = DfaIter<sparse::DFA<T>>;
13
14pub struct DfaIter<A> {
31 pub(crate) regex: A,
33 start: StateID,
35 depth: usize,
37 max_depth: usize,
39 stack: Vec<(StateID, u8, usize)>,
41 str: Vec<u8>,
43}
44
45impl<A: Automaton> From<A> for DfaIter<A> {
46 fn from(dfa: A) -> Self {
47 let start = dfa
50 .start_state_forward(&Input::new("").anchored(regex_automata::Anchored::Yes))
51 .unwrap();
52
53 Self {
54 regex: dfa,
55 start,
56 depth: 0,
57 max_depth: 0,
58 stack: vec![(start, 0, 0)],
59 str: vec![],
60 }
61 }
62}
63
64impl DenseDfaIter<Vec<u32>> {
65 pub fn new(pattern: &str) -> Result<Self, dense::BuildError> {
73 dense::DFA::builder()
74 .configure(dense::Config::new().accelerate(false))
75 .build(pattern)
76 .map(Self::from)
77 }
78
79 pub fn new_many<P: AsRef<str>>(patterns: &[P]) -> Result<Self, dense::BuildError> {
87 dense::DFA::builder()
88 .configure(dense::Config::new().accelerate(false))
89 .build_many(patterns)
90 .map(Self::from)
91 }
92}
93
94impl SparseDfaIter<Vec<u8>> {
95 pub fn new(pattern: &str) -> Result<Self, dense::BuildError> {
104 dense::DFA::builder()
105 .configure(dense::Config::new().accelerate(false))
106 .build(pattern)
107 .and_then(|dense| dense.to_sparse())
108 .map(Self::from)
109 }
110
111 pub fn new_many<P: AsRef<str>>(patterns: &[P]) -> Result<Self, dense::BuildError> {
120 dense::DFA::builder()
121 .configure(dense::Config::new().accelerate(false))
122 .build_many(patterns)
123 .and_then(|dense| dense.to_sparse())
124 .map(Self::from)
125 }
126}
127
128impl<A: Automaton> DfaIter<A> {
129 pub fn borrow_next(&mut self) -> Option<&[u8]> {
131 loop {
132 let Some((current, b, depth)) = self.stack.pop() else {
133 if self.max_depth < self.depth {
135 break None;
136 }
137
138 self.depth += 1;
139 self.stack.clear();
140 self.stack.push((self.start, 0, 0));
141 continue;
142 };
143
144 self.max_depth = usize::max(self.max_depth, depth);
146 self.str.truncate(depth);
147 self.str.push(b);
148
149 if depth < self.depth {
151 for b in (0..=255).rev() {
152 let next_state = self.regex.next_state(current, b);
153 if !self.regex.is_dead_state(next_state) {
155 self.stack.push((next_state, b, depth + 1));
156 }
157 }
158 } else {
159 let eoi_state = self.regex.next_eoi_state(current);
161 if self.regex.is_match_state(eoi_state) {
162 break Some(&self.str[1..]);
163 }
164 }
165 }
166 }
167}
168
169impl<A: Automaton> Iterator for DfaIter<A> {
170 type Item = Vec<u8>;
171
172 fn next(&mut self) -> Option<Self::Item> {
173 self.borrow_next().map(ToOwned::to_owned)
174 }
175}
176
177#[cfg(test)]
178mod tests {
179 use std::collections::HashSet;
180
181 use regex_automata::dfa::dense::DFA;
182
183 use super::*;
184
185 #[test]
186 fn set() {
187 let iter = DenseDfaIter::new(r"^b|(a)?|cc").unwrap();
188
189 let x: Vec<Vec<u8>> = iter.collect();
190 assert_eq!(
191 x,
192 [
193 b"".to_vec(),
194 b"a".to_vec(),
195 b"b".to_vec(),
196 ]
199 );
200 }
201
202 #[test]
203 fn finite() {
204 let dfa = DFA::new(r"[0-1]{4}-[0-1]{2}-[0-1]{2}").unwrap();
205
206 let x: HashSet<Vec<u8>> = DfaIter::from(&dfa).collect();
209 assert_eq!(x.len(), 256);
210 for y in x {
211 assert_eq!(y.len(), 10);
212 }
213 }
214
215 #[test]
216 fn repeated() {
217 let dfa = DFA::new(r"a+(0|1)").unwrap();
218
219 let x: Vec<Vec<u8>> = DfaIter::from(&dfa).take(20).collect();
221 let y = [
222 b"a0".to_vec(),
223 b"a1".to_vec(),
224 b"aa0".to_vec(),
225 b"aa1".to_vec(),
226 b"aaa0".to_vec(),
227 b"aaa1".to_vec(),
228 b"aaaa0".to_vec(),
229 b"aaaa1".to_vec(),
230 b"aaaaa0".to_vec(),
231 b"aaaaa1".to_vec(),
232 b"aaaaaa0".to_vec(),
233 b"aaaaaa1".to_vec(),
234 b"aaaaaaa0".to_vec(),
235 b"aaaaaaa1".to_vec(),
236 b"aaaaaaaa0".to_vec(),
237 b"aaaaaaaa1".to_vec(),
238 b"aaaaaaaaa0".to_vec(),
239 b"aaaaaaaaa1".to_vec(),
240 b"aaaaaaaaaa0".to_vec(),
241 b"aaaaaaaaaa1".to_vec(),
242 ];
243 assert_eq!(x, y);
244 }
245
246 #[test]
247 fn complex() {
248 let dfa = DFA::new(r"(a+|b+)*").unwrap();
249
250 let x: Vec<Vec<u8>> = DfaIter::from(&dfa).take(8).collect();
252 let y = [
253 b"".to_vec(),
254 b"a".to_vec(),
255 b"b".to_vec(),
256 b"aa".to_vec(),
257 b"ab".to_vec(),
258 b"ba".to_vec(),
259 b"bb".to_vec(),
260 b"aaa".to_vec(),
261 ];
262 assert_eq!(x, y);
263 }
264
265 #[test]
266 fn email() {
267 let dfa = DFA::new(r"[a-zA-Z0-9.!#$%&’*+/=?^_`{|}~-]+@[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*")
268 .unwrap();
269 let mut search = DfaIter::from(&dfa);
270
271 for _ in 0..100_000 {
273 search.borrow_next();
274 }
275
276 let x = search.borrow_next().unwrap();
277 assert_eq!(String::from_utf8_lossy(x), "0@hI");
278 }
279
280 #[test]
281 fn many() {
282 let search = SparseDfaIter::new_many(&["[0-1]+", "^[a-b]+"]).unwrap();
283 let x: Vec<Vec<u8>> = search.take(12).collect();
284 let y = [
285 b"0".to_vec(),
286 b"1".to_vec(),
287 b"a".to_vec(),
288 b"b".to_vec(),
289 b"00".to_vec(),
290 b"01".to_vec(),
291 b"10".to_vec(),
292 b"11".to_vec(),
293 b"aa".to_vec(),
294 b"ab".to_vec(),
295 b"ba".to_vec(),
296 b"bb".to_vec(),
297 ];
298 assert_eq!(x, y);
299 }
300}