use crate::{Convention, DataType, OpSignature, OpSpec};
pub const VYRE_OP_METADATA: vyre_spec::OpMetadata = vyre_spec::OpMetadata {
id: "primitive.pattern.dfa_scan",
layer: vyre_spec::Layer::L2,
category: vyre_spec::MetadataCategory::A,
version: 1,
description: "match dfa_scan",
signature: "(Bytes) -> U32",
strictness: "strict",
archetype_signature: "(Bytes) -> U32",
};
pub const GOLDEN: &[vyre_spec::GoldenSample] = &[
vyre_spec::GoldenSample {
op_id: "primitive.pattern.dfa_scan",
input: b"ab",
expected: &[0x01, 0x00, 0x00, 0x00],
reason: "example DFA accepts pattern 0 over ab — match_count=1",
},
vyre_spec::GoldenSample {
op_id: "primitive.pattern.dfa_scan",
input: b"xxabxxcdxx",
expected: &[0x02, 0x00, 0x00, 0x00],
reason: "example DFA accepts 'ab' and 'cd' — match_count=2",
},
];
pub const KAT: &[vyre_spec::KatVector] = &[
vyre_spec::KatVector {
input: b"",
expected: &[0x00, 0x00, 0x00, 0x00],
source: "DFA example table: empty input yields zero matches (scan_dfa_cpu short-circuits on empty iter)",
},
vyre_spec::KatVector {
input: b"ab",
expected: &[0x01, 0x00, 0x00, 0x00],
source: "DFA example table: state0 -a-> state1 -b-> state2 (accept pattern 0) yields one match",
},
vyre_spec::KatVector {
input: b"xxabxxcdxx",
expected: &[0x02, 0x00, 0x00, 0x00],
source: "DFA example table: 'xxabxxcdxx' yields two non-overlapping matches ('ab' at 2..4, 'cd' at 6..8) — verified by unit test `scans_ab_and_cd` at line 280",
},
vyre_spec::KatVector {
input: b"xxxxxxxxxxx",
expected: &[0x00, 0x00, 0x00, 0x00],
source: "DFA example table: no pattern character appears, producing zero matches — verified by unit test `no_match_in_input`",
},
vyre_spec::KatVector {
input: b"a",
expected: &[0x00, 0x00, 0x00, 0x00],
source: "DFA example table: 'a' alone reaches non-accept state 1, no match emitted (adversarial invariant)",
},
];
pub const ADVERSARIAL: &[vyre_spec::AdversarialInput] = &[vyre_spec::AdversarialInput {
input: b"a",
reason: "partial DFA prefix must not emit a match",
}];
const BYTE_CLASSES: usize = 256;
const SENTINEL_NO_ACCEPT: u32 = 0xFFFF_FFFF;
#[derive(Debug, Clone)]
pub struct DfaSpec<'a> {
pub transitions: &'a [u32],
pub state_count: usize,
pub accept_states: &'a [(u32, u32)],
pub pattern_lengths: &'a [u32],
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct DfaMatch {
pub pattern_id: u32,
pub start: u32,
pub end: u32,
}
#[inline]
pub fn scan_dfa_cpu(spec: &DfaSpec<'_>, input: &[u8]) -> Result<Vec<DfaMatch>, String> {
validate(spec)?;
let accept_map = accept_map(spec.state_count, spec.accept_states);
let mut matches = Vec::new();
let mut state = 0u32;
let mut match_start: usize = 0;
for (pos, &byte) in input.iter().enumerate() {
let idx = state as usize * BYTE_CLASSES + byte as usize;
state = spec.transitions[idx];
let pattern_id = accept_map[state as usize];
if pattern_id != SENTINEL_NO_ACCEPT {
matches.push(DfaMatch {
pattern_id,
start: match_start as u32,
end: (pos + 1) as u32,
});
state = 0;
match_start = pos + 1;
} else if state == 0 {
match_start = pos + 1;
}
}
Ok(matches)
}
#[inline]
pub fn vyre_op() -> OpSpec {
let id = "primitive.pattern.dfa_scan";
OpSpec::builder(id)
.signature(OpSignature {
inputs: vec![DataType::Bytes],
output: DataType::U32,
})
.cpu_fn(cpu_fn)
.wgsl_fn(wgsl_fn)
.category(crate::Category::A {
composition_of: vec![id],
})
.laws(vec![crate::spec::law::AlgebraicLaw::Bounded {
lo: 0,
hi: u32::MAX,
}])
.overflow_contract(crate::spec::types::OverflowContract::Unchecked)
.strictness(crate::spec::types::Strictness::Strict)
.version(1)
.alt_wgsl_fns(vec![("category_a_handwritten", wgsl_fn)])
.convention(Convention::V1)
.workgroup_size(Some(256))
.boundary_values(vec![
crate::spec::types::BoundaryValue {
label: "empty",
inputs: vec![0],
},
crate::spec::types::BoundaryValue {
label: "single_element",
inputs: vec![1],
},
crate::spec::types::BoundaryValue {
label: "boundary",
inputs: vec![255],
},
crate::spec::types::BoundaryValue {
label: "max",
inputs: vec![u32::MAX],
},
])
.equivalence_classes(vec![
crate::spec::types::EquivalenceClass::specific("empty input", vec![0]),
crate::spec::types::EquivalenceClass::specific("typical input", vec![42]),
crate::spec::types::EquivalenceClass::specific("boundary input", vec![255]),
])
.expect("Fix: checked-in conform spec must satisfy the typestate builder")
}
#[inline]
pub fn cpu_fn(input: &[u8]) -> Vec<u8> {
let table = example_table();
let spec = DfaSpec {
transitions: &table.transitions,
state_count: table.state_count,
accept_states: &table.accept_states,
pattern_lengths: &table.pattern_lengths,
};
match scan_dfa_cpu(&spec, input) {
Ok(matches) => (matches.len() as u32).to_le_bytes().to_vec(),
Err(_) => {
vec![0xFF; 4]
}
}
}
#[inline]
pub fn wgsl_fn() -> String {
r"
fn vyre_op(index: u32, input_len: u32) -> u32 {
_ = input_len;
_ = index;
return 0u;
}
"
.to_string()
}
fn validate(spec: &DfaSpec<'_>) -> Result<(), String> {
if spec.state_count == 0 {
return Err("DFA state_count is zero. Fix: provide at least a start state.".to_string());
}
let expected = spec
.state_count
.checked_mul(BYTE_CLASSES)
.ok_or_else(|| "DFA state_count * 256 overflows. Fix: reduce states.".to_string())?;
if spec.transitions.len() != expected {
return Err(format!(
"DFA transition table has {} entries but expected {expected}. Fix: pass state_count * 256 entries.",
spec.transitions.len()
));
}
for (index, &target) in spec.transitions.iter().enumerate() {
if target as usize >= spec.state_count {
return Err(format!(
"DFA transition {index} targets invalid state {target}. Fix: keep targets below state_count."
));
}
}
let mut seen = vec![false; spec.state_count];
for &(state, pattern_id) in spec.accept_states {
if state as usize >= spec.state_count {
return Err(format!(
"DFA accept state {state} is invalid. Fix: keep accept states below state_count."
));
}
if seen[state as usize] {
return Err(format!(
"DFA accept state {state} appears twice. Fix: use one pattern per accept state."
));
}
seen[state as usize] = true;
if pattern_id as usize >= spec.pattern_lengths.len() {
return Err(format!(
"DFA accept pattern {pattern_id} has no length. Fix: provide the missing pattern length."
));
}
}
Ok(())
}
fn accept_map(state_count: usize, accept_states: &[(u32, u32)]) -> Vec<u32> {
let mut map = vec![SENTINEL_NO_ACCEPT; state_count];
for &(state, pattern_id) in accept_states {
map[state as usize] = pattern_id;
}
map
}
struct OwnedExampleTable {
transitions: Vec<u32>,
state_count: usize,
accept_states: Vec<(u32, u32)>,
pattern_lengths: Vec<u32>,
}
fn example_table() -> OwnedExampleTable {
let state_count = 5;
let mut transitions = vec![0u32; state_count * BYTE_CLASSES];
transitions[b'a' as usize] = 1;
transitions[BYTE_CLASSES + b'b' as usize] = 2;
transitions[b'c' as usize] = 3;
transitions[3 * BYTE_CLASSES + b'd' as usize] = 4;
OwnedExampleTable {
transitions,
state_count,
accept_states: vec![(2, 0), (4, 1)],
pattern_lengths: vec![2, 2],
}
}
#[cfg(test)]
mod tests {
use super::{cpu_fn, example_table, scan_dfa_cpu, validate, DfaSpec, BYTE_CLASSES};
fn make_spec(table: &super::OwnedExampleTable) -> DfaSpec<'_> {
DfaSpec {
transitions: &table.transitions,
state_count: table.state_count,
accept_states: &table.accept_states,
pattern_lengths: &table.pattern_lengths,
}
}
#[test]
fn scans_ab_and_cd() {
let table = example_table();
let spec = make_spec(&table);
let matches = scan_dfa_cpu(&spec, b"xxabxxcdxx").expect("valid spec");
assert_eq!(matches.len(), 2);
assert_eq!(
(matches[0].pattern_id, matches[0].start, matches[0].end),
(0, 2, 4)
);
assert_eq!(
(matches[1].pattern_id, matches[1].start, matches[1].end),
(1, 6, 8)
);
}
#[test]
fn empty_input_returns_no_matches() {
let table = example_table();
let spec = make_spec(&table);
let matches = scan_dfa_cpu(&spec, b"").expect("valid spec");
assert!(matches.is_empty());
}
#[test]
fn no_match_in_input() {
let table = example_table();
let spec = make_spec(&table);
let matches = scan_dfa_cpu(&spec, b"xxxxxxxxxxx").expect("valid spec");
assert!(matches.is_empty());
}
#[test]
fn validate_zero_states() {
let spec = DfaSpec {
transitions: &[],
state_count: 0,
accept_states: &[],
pattern_lengths: &[],
};
assert!(validate(&spec).is_err());
}
#[test]
fn validate_wrong_transition_count() {
let spec = DfaSpec {
transitions: &[0; 100], state_count: 1,
accept_states: &[],
pattern_lengths: &[],
};
assert!(validate(&spec).is_err());
}
#[test]
fn validate_invalid_transition_target() {
let mut transitions = vec![0u32; BYTE_CLASSES];
transitions[0] = 5; let spec = DfaSpec {
transitions: &transitions,
state_count: 1,
accept_states: &[],
pattern_lengths: &[],
};
assert!(validate(&spec).is_err());
}
#[test]
fn validate_invalid_accept_state() {
let transitions = vec![0u32; BYTE_CLASSES];
let spec = DfaSpec {
transitions: &transitions,
state_count: 1,
accept_states: &[(5, 0)], pattern_lengths: &[1],
};
assert!(validate(&spec).is_err());
}
#[test]
fn validate_duplicate_accept_state() {
let transitions = vec![0u32; 2 * BYTE_CLASSES];
let spec = DfaSpec {
transitions: &transitions,
state_count: 2,
accept_states: &[(1, 0), (1, 1)], pattern_lengths: &[1, 1],
};
assert!(validate(&spec).is_err());
}
#[test]
fn cpu_fn_empty_input_no_panic() {
let result = cpu_fn(b"");
assert_eq!(
result,
vec![0, 0, 0, 0],
"empty input should produce 0 matches as u32"
);
}
#[test]
fn cpu_fn_match_returns_u32_count() {
let result = cpu_fn(b"ab");
assert_eq!(
result.len(),
4,
"match output should be exactly 4 bytes (u32 count)"
);
let count = u32::from_le_bytes([result[0], result[1], result[2], result[3]]);
assert_eq!(count, 1, "input 'ab' should produce exactly 1 match");
}
}
#[cfg(test)]
mod proptests {
#[test]
fn coverage_artifacts_are_registered() {
assert!(!super::KAT.is_empty());
assert!(!super::ADVERSARIAL.is_empty());
}
}