Skip to main content

vyre_std/pattern/
nfa_to_dfa.rs

1//! Subset-construction NFA → DFA converter.
2//!
3//! Walks the NFA's epsilon closures to produce a deterministic transition
4//! table. Dead states (non-accepting with no reachable accept) are pruned
5//! so downstream packing does not spend memory on them.
6//!
7//! Per perf-audit L.2.1: closures are stored as sorted deduplicated
8//! `Vec<NfaStateId>` (not `BTreeSet`) so that dedup via hashmap lookup
9//! uses the flat buffer directly — no BTreeSet → Vec key conversion per
10//! byte, no allocation hot-path inside the inner 256-byte loop. Edge
11//! adjacency is flattened into a per-state slice so the byte transition
12//! loop is a linear scan instead of a BTreeMap::get per byte per state.
13
14use std::collections::HashMap;
15
16use super::types::{Dfa, Nfa, NfaStateId, PatternError, INVALID_STATE};
17
18/// Configured ceiling on the DFA state count.
19///
20/// Matches [`super::aho_corasick_build::MAX_STATES`] so the full assembly
21/// pipeline speaks the same limit to its callers.
22pub const MAX_DFA_STATES: u32 = 65_535;
23
24/// Convert a Thompson NFA into a minimized-ready DFA via subset construction.
25///
26/// The returned DFA has a dense 256-wide transition table suitable for
27/// passing to [`super::dfa_minimize::dfa_minimize`] and then to
28/// [`super::dfa_pack::dfa_pack`].
29///
30/// # Examples
31///
32/// ```
33/// use vyre_std::pattern::{regex_to_nfa::regex_to_nfa, nfa_to_dfa::nfa_to_dfa};
34///
35/// let nfa = regex_to_nfa("a|b").unwrap();
36/// let dfa = nfa_to_dfa(&nfa).unwrap();
37/// assert!(dfa.state_count >= 2);
38/// ```
39///
40/// # Errors
41///
42/// Returns [`PatternError::StateOverflow`] if the DFA would exceed
43/// [`MAX_DFA_STATES`]. This guards against regex inputs whose closure
44/// explosion would blow the state budget.
45#[inline]
46pub fn nfa_to_dfa(nfa: &Nfa) -> Result<Dfa, PatternError> {
47    // Flatten edges into per-state slices in a single `Vec` — replaces
48    // the `BTreeMap<NfaStateId, Vec<...>>` that the prior code rebuilt
49    // into a Vec on every outer-loop iteration. Layout: eps_targets /
50    // byte_targets are flat `Vec<NfaStateId>` slabs; eps_slice /
51    // byte_slices are `(start, len)` windows into them indexed by
52    // state id. Per-byte adjacency is grouped so the 256-byte
53    // transition loop scans one slice instead of filtering every edge.
54    let n_states = nfa.state_count as usize;
55    let mut eps_slice: Vec<(u32, u32)> = vec![(0, 0); n_states];
56    let mut byte_slice: Vec<[(u32, u32); 256]> = Vec::with_capacity(n_states);
57    byte_slice.resize(n_states, [(0, 0); 256]);
58    let mut eps_targets: Vec<NfaStateId> = Vec::with_capacity(nfa.edges.len());
59    let mut byte_targets: Vec<NfaStateId> = Vec::with_capacity(nfa.edges.len());
60    {
61        // Two-pass fill: count per (state, byte) and (state, eps) then
62        // scatter into contiguous windows. One allocation each.
63        let mut eps_count = vec![0u32; n_states];
64        let mut byte_count = vec![[0u32; 256]; n_states];
65        for edge in &nfa.edges {
66            let from = edge.from as usize;
67            match edge.byte {
68                None => eps_count[from] += 1,
69                Some(b) => byte_count[from][b as usize] += 1,
70            }
71        }
72        let mut eps_cursor = 0u32;
73        let mut byte_cursor = 0u32;
74        for s in 0..n_states {
75            eps_slice[s] = (eps_cursor, eps_count[s]);
76            eps_cursor += eps_count[s];
77            for b in 0..256usize {
78                byte_slice[s][b] = (byte_cursor, byte_count[s][b]);
79                byte_cursor += byte_count[s][b];
80            }
81        }
82        eps_targets.resize(eps_cursor as usize, 0);
83        byte_targets.resize(byte_cursor as usize, 0);
84        let mut eps_fill = vec![0u32; n_states];
85        let mut byte_fill = vec![[0u32; 256]; n_states];
86        for edge in &nfa.edges {
87            let from = edge.from as usize;
88            match edge.byte {
89                None => {
90                    let off = eps_slice[from].0 + eps_fill[from];
91                    eps_targets[off as usize] = edge.to;
92                    eps_fill[from] += 1;
93                }
94                Some(b) => {
95                    let off = byte_slice[from][b as usize].0 + byte_fill[from][b as usize];
96                    byte_targets[off as usize] = edge.to;
97                    byte_fill[from][b as usize] += 1;
98                }
99            }
100        }
101    }
102
103    // Subsets live as sorted, deduplicated `Vec<NfaStateId>`. Dedup
104    // via `HashMap` is O(|closure|) per lookup, not O(log N · |closure|)
105    // as with BTreeMap<Vec<NfaStateId>, _>. The hash is std::SipHash —
106    // the subset builder is not a hostile-input path, so the default
107    // hasher's collision resistance is not load-bearing here.
108    let mut closure_scratch = ClosureScratch::with_capacity(n_states);
109    let start_closure = epsilon_closure(
110        &eps_slice,
111        &eps_targets,
112        &[nfa.start],
113        n_states,
114        &mut closure_scratch,
115    );
116    let mut subsets: Vec<Vec<NfaStateId>> = vec![start_closure.clone()];
117    let mut subset_index: HashMap<Vec<NfaStateId>, u32> = HashMap::new();
118    subset_index.insert(start_closure.clone(), 0);
119
120    let mut transitions: Vec<u32> = Vec::with_capacity(256);
121    let mut accept: Vec<bool> = Vec::with_capacity(1);
122    extend_row(&mut transitions, &mut accept, &start_closure, &nfa.accept);
123
124    let mut worklist: Vec<usize> = vec![0];
125    let mut reachable_scratch: Vec<NfaStateId> = Vec::with_capacity(n_states);
126    while let Some(idx) = worklist.pop() {
127        for byte in 0u16..256 {
128            reachable_scratch.clear();
129            for &state in subsets[idx].iter() {
130                let (off, len) = byte_slice[state as usize][byte as usize];
131                for t in &byte_targets[off as usize..(off + len) as usize] {
132                    reachable_scratch.push(*t);
133                }
134            }
135            if reachable_scratch.is_empty() {
136                transitions[idx * 256 + byte as usize] = INVALID_STATE;
137                continue;
138            }
139            let closure = epsilon_closure(
140                &eps_slice,
141                &eps_targets,
142                &reachable_scratch,
143                n_states,
144                &mut closure_scratch,
145            );
146            let next_idx = match subset_index.get(&closure) {
147                Some(existing) => *existing,
148                None => {
149                    // Check the usize length against MAX_DFA_STATES BEFORE
150                    // casting — a raw `subsets.len() as u32` on a >4G-subset
151                    // build would wrap past the limit and silently allow the
152                    // overflow through the guard.
153                    if subsets.len() as u64 >= u64::from(MAX_DFA_STATES) {
154                        return Err(PatternError::StateOverflow {
155                            limit: MAX_DFA_STATES,
156                        });
157                    }
158                    let new_id = subsets.len() as u32;
159                    subsets.push(closure.clone());
160                    subset_index.insert(closure.clone(), new_id);
161                    extend_row(&mut transitions, &mut accept, &closure, &nfa.accept);
162                    worklist.push(new_id as usize);
163                    new_id
164                }
165            };
166            transitions[idx * 256 + byte as usize] = next_idx;
167        }
168    }
169
170    Ok(Dfa {
171        state_count: subsets.len() as u32,
172        transitions,
173        start: 0,
174        accept,
175    })
176}
177
178fn extend_row(
179    transitions: &mut Vec<u32>,
180    accept: &mut Vec<bool>,
181    closure: &[NfaStateId],
182    nfa_accept: &[bool],
183) {
184    transitions.extend(std::iter::repeat_n(INVALID_STATE, 256));
185    accept.push(closure.iter().any(|s| nfa_accept[*s as usize]));
186}
187
188/// Reusable scratch buffers for [`epsilon_closure`].
189///
190/// DeepPerf H-7: the previous implementation allocated a fresh
191/// `vec![false; n_states]` bitset and a fresh work stack every time
192/// it was called. For pattern sets with many NFA states, subset
193/// construction triggers thousands of closure calls, and each
194/// allocation dominates the hot loop.
195///
196/// Callers own one `ClosureScratch` across the whole subset
197/// construction and pass it by mutable reference; `epsilon_closure`
198/// re-zeros the bitset in O(previous_closure_size) rather than
199/// O(n_states), so the cost is proportional to work done, not to
200/// state-table size.
201pub(crate) struct ClosureScratch {
202    in_closure: Vec<bool>,
203    stack: Vec<NfaStateId>,
204    collected: Vec<NfaStateId>,
205}
206
207impl ClosureScratch {
208    pub(crate) fn with_capacity(n_states: usize) -> Self {
209        Self {
210            in_closure: vec![false; n_states],
211            stack: Vec::with_capacity(n_states.min(64)),
212            collected: Vec::with_capacity(n_states.min(64)),
213        }
214    }
215}
216
217fn epsilon_closure(
218    eps_slice: &[(u32, u32)],
219    eps_targets: &[NfaStateId],
220    seeds: &[NfaStateId],
221    n_states: usize,
222    scratch: &mut ClosureScratch,
223) -> Vec<NfaStateId> {
224    // Widen the bitset if the caller wasn't sized for this NFA (the
225    // per-call path that grew the old `vec![false; n_states]` on
226    // every invocation). After the first call for a given NFA this
227    // branch never fires.
228    if scratch.in_closure.len() < n_states {
229        scratch.in_closure.resize(n_states, false);
230    }
231    // Clear only the bits we know are set — O(previous closure size),
232    // not O(n_states). `collected` from the previous call holds
233    // exactly those indices.
234    for state in scratch.collected.drain(..) {
235        scratch.in_closure[state as usize] = false;
236    }
237    scratch.stack.clear();
238    scratch.stack.extend_from_slice(seeds);
239    while let Some(state) = scratch.stack.pop() {
240        let idx = state as usize;
241        if scratch.in_closure[idx] {
242            continue;
243        }
244        scratch.in_closure[idx] = true;
245        scratch.collected.push(state);
246        let (off, len) = eps_slice[idx];
247        for t in &eps_targets[off as usize..(off + len) as usize] {
248            scratch.stack.push(*t);
249        }
250    }
251    // Return a sorted snapshot; the scratch keeps its `collected`
252    // buffer for the next call's zeroing step.
253    let mut out = scratch.collected.clone();
254    out.sort_unstable();
255    out
256}
257
258#[cfg(test)]
259mod tests {
260    use super::*;
261    use crate::pattern::regex_to_nfa::regex_to_nfa;
262
263    fn run(dfa: &Dfa, input: &[u8]) -> bool {
264        let mut state = dfa.start;
265        for &b in input {
266            let next = dfa.go(state, b);
267            if next == INVALID_STATE {
268                return false;
269            }
270            state = next;
271        }
272        dfa.accept[state as usize]
273    }
274
275    #[test]
276    fn literal_pattern_accepts_literal_input() {
277        let nfa = regex_to_nfa("abc").unwrap();
278        let dfa = nfa_to_dfa(&nfa).unwrap();
279        assert!(run(&dfa, b"abc"));
280        assert!(!run(&dfa, b"abd"));
281        assert!(!run(&dfa, b"ab"));
282    }
283
284    #[test]
285    fn alternation_accepts_either() {
286        let nfa = regex_to_nfa("foo|bar").unwrap();
287        let dfa = nfa_to_dfa(&nfa).unwrap();
288        assert!(run(&dfa, b"foo"));
289        assert!(run(&dfa, b"bar"));
290        assert!(!run(&dfa, b"baz"));
291    }
292
293    #[test]
294    fn kleene_star_accepts_repeats() {
295        let nfa = regex_to_nfa("a*").unwrap();
296        let dfa = nfa_to_dfa(&nfa).unwrap();
297        assert!(run(&dfa, b""));
298        assert!(run(&dfa, b"a"));
299        assert!(run(&dfa, b"aaaaa"));
300        assert!(!run(&dfa, b"b"));
301    }
302
303    #[test]
304    fn plus_requires_one() {
305        let nfa = regex_to_nfa("a+").unwrap();
306        let dfa = nfa_to_dfa(&nfa).unwrap();
307        assert!(!run(&dfa, b""));
308        assert!(run(&dfa, b"a"));
309        assert!(run(&dfa, b"aaa"));
310    }
311
312    #[test]
313    fn optional_allows_absence() {
314        let nfa = regex_to_nfa("ab?c").unwrap();
315        let dfa = nfa_to_dfa(&nfa).unwrap();
316        assert!(run(&dfa, b"ac"));
317        assert!(run(&dfa, b"abc"));
318        assert!(!run(&dfa, b"abbc"));
319    }
320
321    #[test]
322    fn character_class_accepts_members() {
323        let nfa = regex_to_nfa("[a-c]").unwrap();
324        let dfa = nfa_to_dfa(&nfa).unwrap();
325        assert!(run(&dfa, b"a"));
326        assert!(run(&dfa, b"b"));
327        assert!(run(&dfa, b"c"));
328        assert!(!run(&dfa, b"d"));
329    }
330
331    #[test]
332    fn group_with_star() {
333        let nfa = regex_to_nfa("(ab)*c").unwrap();
334        let dfa = nfa_to_dfa(&nfa).unwrap();
335        assert!(run(&dfa, b"c"));
336        assert!(run(&dfa, b"abc"));
337        assert!(run(&dfa, b"ababababc"));
338        assert!(!run(&dfa, b"ac"));
339    }
340
341    #[test]
342    fn empty_regex_only_accepts_empty() {
343        let nfa = regex_to_nfa("").unwrap();
344        let dfa = nfa_to_dfa(&nfa).unwrap();
345        assert!(run(&dfa, b""));
346        assert!(!run(&dfa, b"a"));
347    }
348}