vyre-std 0.1.0

Vyre standard library: GPU DFA assembly pipeline, Aho-Corasick construction, and compositional arithmetic helpers
Documentation
//! Subset-construction NFA → DFA converter.
//!
//! Walks the NFA's epsilon closures to produce a deterministic transition
//! table. Dead states (non-accepting with no reachable accept) are pruned
//! so downstream packing does not spend memory on them.
//!
//! Per perf-audit L.2.1: closures are stored as sorted deduplicated
//! `Vec<NfaStateId>` (not `BTreeSet`) so that dedup via hashmap lookup
//! uses the flat buffer directly — no BTreeSet → Vec key conversion per
//! byte, no allocation hot-path inside the inner 256-byte loop. Edge
//! adjacency is flattened into a per-state slice so the byte transition
//! loop is a linear scan instead of a BTreeMap::get per byte per state.

use std::collections::HashMap;

use super::types::{Dfa, Nfa, NfaStateId, PatternError, INVALID_STATE};

/// Configured ceiling on the DFA state count.
///
/// Matches [`super::aho_corasick_build::MAX_STATES`] so the full assembly
/// pipeline speaks the same limit to its callers.
pub const MAX_DFA_STATES: u32 = 65_535;

/// Convert a Thompson NFA into a minimized-ready DFA via subset construction.
///
/// The returned DFA has a dense 256-wide transition table suitable for
/// passing to [`super::dfa_minimize::dfa_minimize`] and then to
/// [`super::dfa_pack::dfa_pack`].
///
/// # Examples
///
/// ```
/// use vyre_std::pattern::{regex_to_nfa::regex_to_nfa, nfa_to_dfa::nfa_to_dfa};
///
/// let nfa = regex_to_nfa("a|b").unwrap();
/// let dfa = nfa_to_dfa(&nfa).unwrap();
/// assert!(dfa.state_count >= 2);
/// ```
///
/// # Errors
///
/// Returns [`PatternError::StateOverflow`] if the DFA would exceed
/// [`MAX_DFA_STATES`]. This guards against regex inputs whose closure
/// explosion would blow the state budget.
#[inline]
pub fn nfa_to_dfa(nfa: &Nfa) -> Result<Dfa, PatternError> {
    // Flatten edges into per-state slices in a single `Vec` — replaces
    // the `BTreeMap<NfaStateId, Vec<...>>` that the prior code rebuilt
    // into a Vec on every outer-loop iteration. Layout: eps_targets /
    // byte_targets are flat `Vec<NfaStateId>` slabs; eps_slice /
    // byte_slices are `(start, len)` windows into them indexed by
    // state id. Per-byte adjacency is grouped so the 256-byte
    // transition loop scans one slice instead of filtering every edge.
    let n_states = nfa.state_count as usize;
    let mut eps_slice: Vec<(u32, u32)> = vec![(0, 0); n_states];
    let mut byte_slice: Vec<[(u32, u32); 256]> = Vec::with_capacity(n_states);
    byte_slice.resize(n_states, [(0, 0); 256]);
    let mut eps_targets: Vec<NfaStateId> = Vec::with_capacity(nfa.edges.len());
    let mut byte_targets: Vec<NfaStateId> = Vec::with_capacity(nfa.edges.len());
    {
        // Two-pass fill: count per (state, byte) and (state, eps) then
        // scatter into contiguous windows. One allocation each.
        let mut eps_count = vec![0u32; n_states];
        let mut byte_count = vec![[0u32; 256]; n_states];
        for edge in &nfa.edges {
            let from = edge.from as usize;
            match edge.byte {
                None => eps_count[from] += 1,
                Some(b) => byte_count[from][b as usize] += 1,
            }
        }
        let mut eps_cursor = 0u32;
        let mut byte_cursor = 0u32;
        for s in 0..n_states {
            eps_slice[s] = (eps_cursor, eps_count[s]);
            eps_cursor += eps_count[s];
            for b in 0..256usize {
                byte_slice[s][b] = (byte_cursor, byte_count[s][b]);
                byte_cursor += byte_count[s][b];
            }
        }
        eps_targets.resize(eps_cursor as usize, 0);
        byte_targets.resize(byte_cursor as usize, 0);
        let mut eps_fill = vec![0u32; n_states];
        let mut byte_fill = vec![[0u32; 256]; n_states];
        for edge in &nfa.edges {
            let from = edge.from as usize;
            match edge.byte {
                None => {
                    let off = eps_slice[from].0 + eps_fill[from];
                    eps_targets[off as usize] = edge.to;
                    eps_fill[from] += 1;
                }
                Some(b) => {
                    let off = byte_slice[from][b as usize].0 + byte_fill[from][b as usize];
                    byte_targets[off as usize] = edge.to;
                    byte_fill[from][b as usize] += 1;
                }
            }
        }
    }

    // Subsets live as sorted, deduplicated `Vec<NfaStateId>`. Dedup
    // via `HashMap` is O(|closure|) per lookup, not O(log N · |closure|)
    // as with BTreeMap<Vec<NfaStateId>, _>. The hash is std::SipHash —
    // the subset builder is not a hostile-input path, so the default
    // hasher's collision resistance is not load-bearing here.
    let mut closure_scratch = ClosureScratch::with_capacity(n_states);
    let start_closure = epsilon_closure(
        &eps_slice,
        &eps_targets,
        &[nfa.start],
        n_states,
        &mut closure_scratch,
    );
    let mut subsets: Vec<Vec<NfaStateId>> = vec![start_closure.clone()];
    let mut subset_index: HashMap<Vec<NfaStateId>, u32> = HashMap::new();
    subset_index.insert(start_closure.clone(), 0);

    let mut transitions: Vec<u32> = Vec::with_capacity(256);
    let mut accept: Vec<bool> = Vec::with_capacity(1);
    extend_row(&mut transitions, &mut accept, &start_closure, &nfa.accept);

    let mut worklist: Vec<usize> = vec![0];
    let mut reachable_scratch: Vec<NfaStateId> = Vec::with_capacity(n_states);
    while let Some(idx) = worklist.pop() {
        for byte in 0u16..256 {
            reachable_scratch.clear();
            for &state in subsets[idx].iter() {
                let (off, len) = byte_slice[state as usize][byte as usize];
                for t in &byte_targets[off as usize..(off + len) as usize] {
                    reachable_scratch.push(*t);
                }
            }
            if reachable_scratch.is_empty() {
                transitions[idx * 256 + byte as usize] = INVALID_STATE;
                continue;
            }
            let closure = epsilon_closure(
                &eps_slice,
                &eps_targets,
                &reachable_scratch,
                n_states,
                &mut closure_scratch,
            );
            let next_idx = match subset_index.get(&closure) {
                Some(existing) => *existing,
                None => {
                    // Check the usize length against MAX_DFA_STATES BEFORE
                    // casting — a raw `subsets.len() as u32` on a >4G-subset
                    // build would wrap past the limit and silently allow the
                    // overflow through the guard.
                    if subsets.len() as u64 >= u64::from(MAX_DFA_STATES) {
                        return Err(PatternError::StateOverflow {
                            limit: MAX_DFA_STATES,
                        });
                    }
                    let new_id = subsets.len() as u32;
                    subsets.push(closure.clone());
                    subset_index.insert(closure.clone(), new_id);
                    extend_row(&mut transitions, &mut accept, &closure, &nfa.accept);
                    worklist.push(new_id as usize);
                    new_id
                }
            };
            transitions[idx * 256 + byte as usize] = next_idx;
        }
    }

    Ok(Dfa {
        state_count: subsets.len() as u32,
        transitions,
        start: 0,
        accept,
    })
}

fn extend_row(
    transitions: &mut Vec<u32>,
    accept: &mut Vec<bool>,
    closure: &[NfaStateId],
    nfa_accept: &[bool],
) {
    transitions.extend(std::iter::repeat_n(INVALID_STATE, 256));
    accept.push(closure.iter().any(|s| nfa_accept[*s as usize]));
}

/// Reusable scratch buffers for [`epsilon_closure`].
///
/// DeepPerf H-7: the previous implementation allocated a fresh
/// `vec![false; n_states]` bitset and a fresh work stack every time
/// it was called. For pattern sets with many NFA states, subset
/// construction triggers thousands of closure calls, and each
/// allocation dominates the hot loop.
///
/// Callers own one `ClosureScratch` across the whole subset
/// construction and pass it by mutable reference; `epsilon_closure`
/// re-zeros the bitset in O(previous_closure_size) rather than
/// O(n_states), so the cost is proportional to work done, not to
/// state-table size.
pub(crate) struct ClosureScratch {
    in_closure: Vec<bool>,
    stack: Vec<NfaStateId>,
    collected: Vec<NfaStateId>,
}

impl ClosureScratch {
    pub(crate) fn with_capacity(n_states: usize) -> Self {
        Self {
            in_closure: vec![false; n_states],
            stack: Vec::with_capacity(n_states.min(64)),
            collected: Vec::with_capacity(n_states.min(64)),
        }
    }
}

fn epsilon_closure(
    eps_slice: &[(u32, u32)],
    eps_targets: &[NfaStateId],
    seeds: &[NfaStateId],
    n_states: usize,
    scratch: &mut ClosureScratch,
) -> Vec<NfaStateId> {
    // Widen the bitset if the caller wasn't sized for this NFA (the
    // per-call path that grew the old `vec![false; n_states]` on
    // every invocation). After the first call for a given NFA this
    // branch never fires.
    if scratch.in_closure.len() < n_states {
        scratch.in_closure.resize(n_states, false);
    }
    // Clear only the bits we know are set — O(previous closure size),
    // not O(n_states). `collected` from the previous call holds
    // exactly those indices.
    for state in scratch.collected.drain(..) {
        scratch.in_closure[state as usize] = false;
    }
    scratch.stack.clear();
    scratch.stack.extend_from_slice(seeds);
    while let Some(state) = scratch.stack.pop() {
        let idx = state as usize;
        if scratch.in_closure[idx] {
            continue;
        }
        scratch.in_closure[idx] = true;
        scratch.collected.push(state);
        let (off, len) = eps_slice[idx];
        for t in &eps_targets[off as usize..(off + len) as usize] {
            scratch.stack.push(*t);
        }
    }
    // Return a sorted snapshot; the scratch keeps its `collected`
    // buffer for the next call's zeroing step.
    let mut out = scratch.collected.clone();
    out.sort_unstable();
    out
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::pattern::regex_to_nfa::regex_to_nfa;

    fn run(dfa: &Dfa, input: &[u8]) -> bool {
        let mut state = dfa.start;
        for &b in input {
            let next = dfa.go(state, b);
            if next == INVALID_STATE {
                return false;
            }
            state = next;
        }
        dfa.accept[state as usize]
    }

    #[test]
    fn literal_pattern_accepts_literal_input() {
        let nfa = regex_to_nfa("abc").unwrap();
        let dfa = nfa_to_dfa(&nfa).unwrap();
        assert!(run(&dfa, b"abc"));
        assert!(!run(&dfa, b"abd"));
        assert!(!run(&dfa, b"ab"));
    }

    #[test]
    fn alternation_accepts_either() {
        let nfa = regex_to_nfa("foo|bar").unwrap();
        let dfa = nfa_to_dfa(&nfa).unwrap();
        assert!(run(&dfa, b"foo"));
        assert!(run(&dfa, b"bar"));
        assert!(!run(&dfa, b"baz"));
    }

    #[test]
    fn kleene_star_accepts_repeats() {
        let nfa = regex_to_nfa("a*").unwrap();
        let dfa = nfa_to_dfa(&nfa).unwrap();
        assert!(run(&dfa, b""));
        assert!(run(&dfa, b"a"));
        assert!(run(&dfa, b"aaaaa"));
        assert!(!run(&dfa, b"b"));
    }

    #[test]
    fn plus_requires_one() {
        let nfa = regex_to_nfa("a+").unwrap();
        let dfa = nfa_to_dfa(&nfa).unwrap();
        assert!(!run(&dfa, b""));
        assert!(run(&dfa, b"a"));
        assert!(run(&dfa, b"aaa"));
    }

    #[test]
    fn optional_allows_absence() {
        let nfa = regex_to_nfa("ab?c").unwrap();
        let dfa = nfa_to_dfa(&nfa).unwrap();
        assert!(run(&dfa, b"ac"));
        assert!(run(&dfa, b"abc"));
        assert!(!run(&dfa, b"abbc"));
    }

    #[test]
    fn character_class_accepts_members() {
        let nfa = regex_to_nfa("[a-c]").unwrap();
        let dfa = nfa_to_dfa(&nfa).unwrap();
        assert!(run(&dfa, b"a"));
        assert!(run(&dfa, b"b"));
        assert!(run(&dfa, b"c"));
        assert!(!run(&dfa, b"d"));
    }

    #[test]
    fn group_with_star() {
        let nfa = regex_to_nfa("(ab)*c").unwrap();
        let dfa = nfa_to_dfa(&nfa).unwrap();
        assert!(run(&dfa, b"c"));
        assert!(run(&dfa, b"abc"));
        assert!(run(&dfa, b"ababababc"));
        assert!(!run(&dfa, b"ac"));
    }

    #[test]
    fn empty_regex_only_accepts_empty() {
        let nfa = regex_to_nfa("").unwrap();
        let dfa = nfa_to_dfa(&nfa).unwrap();
        assert!(run(&dfa, b""));
        assert!(!run(&dfa, b"a"));
    }
}