Skip to main content

vyre_std/pattern/
dfa_minimize.rs

1//! Hopcroft's DFA minimization algorithm.
2//!
3//! Partition refinement yields the unique minimal DFA accepting the
4//! same language. The output is canonical up to state-id relabeling,
5//! so `minimize(minimize(d)) == minimize(d)` holds bytewise after both
6//! runs relabel starting from the start state.
7//!
8//! Per perf-audit L.2.2 (`audits/perf-kimi-AUDIT.md` finding 1): the
9//! prior implementation was the textbook Hopcroft shape, but computed
10//! the predecessor set `x` for every (splitter, byte) pair by scanning
11//! **all** states — O(|Σ|·N²) per refinement step. For 10k literal
12//! patterns that's ~30 billion state inspections and 632 s assembly
13//! time; `regex-automata` does the same work in <50 ms.
14//!
15//! This implementation:
16//!
17//! - Builds an inverse transition table
18//!   `pred: Vec<Vec<(byte, state)>>` once so the predecessor set for
19//!   `(splitter, byte)` is O(|preds|), not O(N).
20//! - Keeps partitions as **dense arrays** indexed by class id rather
21//!   than `BTreeSet<u32>` clones. Each state has a `class_of[state]`
22//!   entry; each class stores the backing `Vec<u32>` of its members
23//!   and a `len` snapshot used by `refine_in_place`.
24//! - Uses a `VecDeque<u32>` worklist of class ids, not a
25//!   `VecDeque<BTreeSet<u32>>` of copied sets.
26//!
27//! The asymptotic bound is now O(|Σ|·N·log N), which matches the
28//! published Hopcroft cost model.
29
30use std::collections::VecDeque;
31
32use super::types::{Dfa, INVALID_STATE};
33
34/// Minimize a DFA via partition refinement (Hopcroft's algorithm).
35///
36/// The returned DFA is the unique minimal equivalent; state ids are
37/// renumbered so that the start state is `0`.
38///
39/// # Examples
40///
41/// ```
42/// use vyre_std::pattern::{regex_to_nfa::regex_to_nfa, nfa_to_dfa::nfa_to_dfa, dfa_minimize::dfa_minimize};
43///
44/// let nfa = regex_to_nfa("a|aa").unwrap();
45/// let dfa = nfa_to_dfa(&nfa).unwrap();
46/// let minimized = dfa_minimize(&dfa);
47/// assert!(minimized.state_count <= dfa.state_count);
48/// ```
49#[must_use]
50#[inline]
51pub fn dfa_minimize(dfa: &Dfa) -> Dfa {
52    let state_count = dfa.state_count as usize;
53    if state_count == 0 {
54        return dfa.clone();
55    }
56
57    // Implicit dead state at index `state_count` so every byte from
58    // every state has a defined target. The dead class is pruned at
59    // the end.
60    let dead = state_count as u32;
61    let total_states = state_count + 1;
62
63    // Forward transition table with the dead-state row appended so we
64    // can walk every (state, byte) without a conditional. Redirects
65    // INVALID_STATE transitions into the dead state.
66    let mut trans = vec![dead; total_states * 256];
67    for s in 0..state_count {
68        for b in 0..256usize {
69            let t = dfa.transitions[s * 256 + b];
70            trans[s * 256 + b] = if t == INVALID_STATE { dead } else { t };
71        }
72    }
73    // Dead state self-loops on every byte by construction (already `dead`).
74
75    // Inverse transition table: for each byte `b`, pred[b] is a flat
76    // Vec<u32> concatenating the predecessors of every state, with
77    // `pred_slice[b][state]` giving the (start, len) window. The
78    // "predecessors of s under byte b" are the states `p` with
79    // `trans[p * 256 + b] == s`.
80    // Flat pred_head[byte * total_states + state] = (start, len)
81    // layout, avoiding the Vec<Vec<..>> 2D allocation that a naive
82    // inverse transition table would require.
83    let mut pred_head: Vec<(u32, u32)> = vec![(0, 0); 256 * total_states];
84    let mut pred_flat: Vec<u32>;
85    {
86        let mut count = vec![0u32; 256 * total_states];
87        for p in 0..total_states as u32 {
88            for b in 0..256usize {
89                let t = trans[p as usize * 256 + b];
90                count[b * total_states + t as usize] += 1;
91            }
92        }
93        let mut cursor = 0u32;
94        for i in 0..(256 * total_states) {
95            pred_head[i] = (cursor, count[i]);
96            cursor += count[i];
97        }
98        pred_flat = vec![0u32; cursor as usize];
99        let mut fill = vec![0u32; 256 * total_states];
100        for p in 0..total_states as u32 {
101            for b in 0..256usize {
102                let t = trans[p as usize * 256 + b];
103                let slot = b * total_states + t as usize;
104                let off = pred_head[slot].0 + fill[slot];
105                pred_flat[off as usize] = p;
106                fill[slot] += 1;
107            }
108        }
109    }
110    // Partition membership. `class_of[s]` is the current class id of
111    // state `s`. Each class owns a `Vec<u32>` of its members.
112    let mut accept_class: Vec<u32> = Vec::with_capacity(state_count);
113    let mut non_accept_class: Vec<u32> = Vec::with_capacity(total_states);
114    for s in 0..state_count {
115        if dfa.accept[s] {
116            accept_class.push(s as u32);
117        } else {
118            non_accept_class.push(s as u32);
119        }
120    }
121    non_accept_class.push(dead);
122
123    let mut classes: Vec<Vec<u32>> = Vec::with_capacity(total_states);
124    let mut class_of: Vec<u32> = vec![0; total_states];
125    if !accept_class.is_empty() {
126        let id = classes.len() as u32;
127        for &s in &accept_class {
128            class_of[s as usize] = id;
129        }
130        classes.push(accept_class);
131    }
132    if !non_accept_class.is_empty() {
133        let id = classes.len() as u32;
134        for &s in &non_accept_class {
135            class_of[s as usize] = id;
136        }
137        classes.push(non_accept_class);
138    }
139
140    let mut worklist: VecDeque<u32> = VecDeque::new();
141    let mut on_worklist = vec![false; classes.len()];
142    // Smaller initial class in the worklist.
143    if classes.len() == 2 {
144        let smaller = if classes[0].len() <= classes[1].len() {
145            0
146        } else {
147            1
148        };
149        worklist.push_back(smaller as u32);
150        on_worklist[smaller] = true;
151    } else if classes.len() == 1 {
152        worklist.push_back(0);
153        on_worklist[0] = true;
154    }
155
156    // Scratch buffers reused across splitter iterations to avoid
157    // per-(splitter, byte) allocations.
158    let mut hit = vec![false; classes.len()];
159    let mut x_set = vec![false; total_states];
160    let mut intersect_buf: Vec<u32> = Vec::with_capacity(total_states);
161    let mut x_list: Vec<u32> = Vec::with_capacity(total_states);
162
163    while let Some(splitter_id) = worklist.pop_front() {
164        if (splitter_id as usize) >= on_worklist.len() {
165            continue;
166        }
167        on_worklist[splitter_id as usize] = false;
168        for byte in 0u16..256 {
169            let b_idx = byte as usize;
170            // Compute x = { p | trans[p, byte] ∈ splitter } via
171            // inverse-transition lookup instead of scanning all states.
172            for &s in &classes[splitter_id as usize] {
173                let (off, len) = pred_head[b_idx * total_states + s as usize];
174                for p in &pred_flat[off as usize..(off + len) as usize] {
175                    if !x_set[*p as usize] {
176                        x_set[*p as usize] = true;
177                    }
178                }
179            }
180            x_list.clear();
181            x_list.extend((0..total_states as u32).filter(|p| x_set[*p as usize]));
182
183            // For every class that has a non-empty, non-full
184            // intersection with x, split it in place.
185            hit.resize(classes.len(), false);
186            hit.iter_mut().for_each(|h| *h = false);
187            for &p in &x_list {
188                let c = class_of[p as usize] as usize;
189                if !hit[c] {
190                    hit[c] = true;
191                }
192            }
193
194            for c in 0..classes.len() {
195                if !hit[c] {
196                    continue;
197                }
198                intersect_buf.clear();
199                classes[c].retain(|&s| {
200                    if x_set[s as usize] {
201                        intersect_buf.push(s);
202                        false
203                    } else {
204                        true
205                    }
206                });
207                if intersect_buf.is_empty() || classes[c].is_empty() {
208                    // Either no split or the class moved wholesale
209                    // into intersect_buf; restore and continue.
210                    if classes[c].is_empty() {
211                        classes[c] = std::mem::take(&mut intersect_buf);
212                    } else {
213                        classes[c].append(&mut intersect_buf);
214                    }
215                    continue;
216                }
217
218                let new_id = classes.len() as u32;
219                let intersect = std::mem::take(&mut intersect_buf);
220                for &s in &intersect {
221                    class_of[s as usize] = new_id;
222                }
223                classes.push(intersect);
224                on_worklist.push(false);
225                hit.push(false);
226
227                // Worklist update: if the original class was already
228                // queued, add the new class too (both halves must
229                // eventually act as splitters). Otherwise enqueue the
230                // smaller half only — the classic Hopcroft
231                // optimization that gives the N·log N bound.
232                if on_worklist[c] {
233                    worklist.push_back(new_id);
234                    on_worklist[new_id as usize] = true;
235                } else {
236                    let enqueue = if classes[c].len() <= classes[new_id as usize].len() {
237                        c as u32
238                    } else {
239                        new_id
240                    };
241                    worklist.push_back(enqueue);
242                    on_worklist[enqueue as usize] = true;
243                }
244            }
245
246            // Reset x_set for the next byte. Scanning x_list is cheap
247            // compared to a full total_states clear.
248            for &p in &x_list {
249                x_set[p as usize] = false;
250            }
251        }
252    }
253
254    // Compute canonical class ids: 0 for the start class, then BFS
255    // order. The dead class is pruned.
256    let dead_class = class_of[dead as usize];
257    let start_class = class_of[dfa.start as usize];
258
259    let mut canonical: Vec<i64> = vec![-1; classes.len()];
260    canonical[start_class as usize] = 0;
261    let mut queue: VecDeque<u32> = VecDeque::new();
262    queue.push_back(start_class);
263    let mut next_id: u32 = 1;
264    while let Some(class_id) = queue.pop_front() {
265        let representative = match classes[class_id as usize].iter().find(|s| **s != dead) {
266            Some(&s) => s,
267            None => continue,
268        };
269        for byte in 0u8..=255 {
270            let target = trans[representative as usize * 256 + byte as usize];
271            if target == dead {
272                continue;
273            }
274            let target_class = class_of[target as usize];
275            if canonical[target_class as usize] < 0 {
276                canonical[target_class as usize] = i64::from(next_id);
277                queue.push_back(target_class);
278                next_id += 1;
279            }
280        }
281    }
282
283    let final_state_count = next_id;
284    let mut transitions = vec![INVALID_STATE; (final_state_count as usize) * 256];
285    let mut accept = vec![false; final_state_count as usize];
286
287    for (class_id, &canon) in canonical.iter().enumerate() {
288        if canon < 0 {
289            continue;
290        }
291        let canon = canon as u32;
292        let representative = match classes[class_id].iter().find(|s| **s != dead) {
293            Some(&s) => s,
294            None => continue,
295        };
296        if dfa.accept[representative as usize] {
297            accept[canon as usize] = true;
298        }
299        for byte in 0u8..=255 {
300            let target = trans[representative as usize * 256 + byte as usize];
301            if target == dead {
302                continue;
303            }
304            let target_class = class_of[target as usize];
305            if target_class == dead_class {
306                continue;
307            }
308            transitions[(canon as usize) * 256 + byte as usize] =
309                canonical[target_class as usize] as u32;
310        }
311    }
312
313    Dfa {
314        state_count: final_state_count,
315        transitions,
316        start: 0,
317        accept,
318    }
319}
320
321#[cfg(test)]
322mod tests {
323    use super::*;
324    use crate::pattern::{nfa_to_dfa::nfa_to_dfa, regex_to_nfa::regex_to_nfa};
325
326    fn run(dfa: &Dfa, input: &[u8]) -> bool {
327        let mut state = dfa.start;
328        for &b in input {
329            let next = dfa.go(state, b);
330            if next == INVALID_STATE {
331                return false;
332            }
333            state = next;
334        }
335        dfa.accept[state as usize]
336    }
337
338    #[test]
339    fn minimize_preserves_language_literal() {
340        let nfa = regex_to_nfa("hello").unwrap();
341        let dfa = nfa_to_dfa(&nfa).unwrap();
342        let min = dfa_minimize(&dfa);
343        assert!(run(&min, b"hello"));
344        assert!(!run(&min, b"world"));
345        assert!(min.state_count <= dfa.state_count);
346    }
347
348    #[test]
349    fn minimize_is_idempotent() {
350        let nfa = regex_to_nfa("a|aa|aaa").unwrap();
351        let dfa = nfa_to_dfa(&nfa).unwrap();
352        let once = dfa_minimize(&dfa);
353        let twice = dfa_minimize(&once);
354        assert_eq!(once, twice, "Hopcroft output must be canonical");
355    }
356
357    #[test]
358    fn minimize_alternation() {
359        let nfa = regex_to_nfa("foo|bar|baz").unwrap();
360        let dfa = nfa_to_dfa(&nfa).unwrap();
361        let min = dfa_minimize(&dfa);
362        assert!(run(&min, b"foo"));
363        assert!(run(&min, b"bar"));
364        assert!(run(&min, b"baz"));
365        assert!(!run(&min, b"qux"));
366    }
367
368    #[test]
369    fn minimize_collapses_equivalent_states() {
370        let nfa = regex_to_nfa("(a|b)(c|d)").unwrap();
371        let dfa = nfa_to_dfa(&nfa).unwrap();
372        let min = dfa_minimize(&dfa);
373        assert!(run(&min, b"ac"));
374        assert!(run(&min, b"bd"));
375        assert!(!run(&min, b"ab"));
376        assert!(min.state_count <= dfa.state_count);
377    }
378
379    #[test]
380    fn minimize_empty_language() {
381        let nfa = regex_to_nfa("").unwrap();
382        let dfa = nfa_to_dfa(&nfa).unwrap();
383        let min = dfa_minimize(&dfa);
384        assert!(run(&min, b""));
385        assert!(!run(&min, b"x"));
386    }
387
388    #[test]
389    fn minimize_kleene_star() {
390        let nfa = regex_to_nfa("a*").unwrap();
391        let dfa = nfa_to_dfa(&nfa).unwrap();
392        let min = dfa_minimize(&dfa);
393        assert!(run(&min, b""));
394        assert!(run(&min, b"aaaaa"));
395        assert!(!run(&min, b"b"));
396    }
397}