vyre 0.4.0

GPU compute intermediate representation with a standard operation library
Documentation
use super::{match_from, parse_program, MAX_NFA_STATES_VISITED};
use crate::ops::string_matching::search_contract::{
    to_u32_offset, validate_haystack, validate_needle, Match, MatchError,
};

/// Scan input with a compact bytecode NFA program.
///
/// Bytecode opcodes are `1,byte` literal, `2` any byte, `3,byte` zero-or-more
/// of one literal byte, `4` zero-or-more of any byte, and `255` accept. The
/// matcher is anchored at each scanned start offset and emits pattern id `0`.
///
/// # Errors
///
/// Returns `Fix: ...` when program or input exceeds the documented T47 cap, or
/// when the bytecode is malformed.
pub fn nfa_scan(nfa_program: &[u8], input: &[u8]) -> Result<Vec<Match>, MatchError> {
    validate_needle(nfa_program)?;
    validate_haystack(input)?;
    let states = input
        .len()
        .checked_add(1)
        .and_then(|positions| positions.checked_mul(nfa_program.len().max(1)))
        .ok_or_else(|| {
            "Fix: reduce nfa_program or input length; NFA state budget overflowed".to_string()
        })?;
    if states > MAX_NFA_STATES_VISITED {
        return Err(format!(
            "Fix: reduce nfa_program*input search states to <= {MAX_NFA_STATES_VISITED}, got {states}"
        ));
    }
    let tokens = parse_program(nfa_program)?;
    let mut matches = Vec::new();
    for start in 0..=input.len() {
        if let Some(end) = match_from(&tokens, input, start) {
            matches.push(Match {
                pattern_id: 0,
                start: to_u32_offset(start)?,
                end: to_u32_offset(end)?,
            });
        }
    }
    Ok(matches)
}