// Portable GPU kernel for string_matching.kmp_find.
// This parallel verifier preserves KMP's first-offset contract.
@group(0) @binding(1) var<storage, read> haystack_words: array<u32>;
@group(0) @binding(2) var<storage, read> needle_words: array<u32>;
@group(0) @binding(3) var<storage, read_write> result: atomic<u32>;
fn packed_byte(words: ptr<storage, array<u32>, read>, index: u32) -> u32 {
return ((*words)[index >> 2u] >> ((index & 3u) << 3u)) & 0xffu;
}
fn matches_at(start: u32) -> bool {
var i = 0u;
loop {
if (i >= params.len_b) { break; }
if (packed_byte(&haystack_words, start + i) != packed_byte(&needle_words, i)) { return false; }
i = i + 1u;
}
return true;
}
@compute @workgroup_size(256, 1, 1)
fn string_matching_kmp_find(@builtin(global_invocation_id) id: vec3<u32>) {
if (params.len_b == 0u) { atomicMin(&result, 0u); return; }
if (params.len_b > params.len_a) { return; }
let start = id.x;
if (start <= params.len_a - params.len_b && start < atomicLoad(&result) && matches_at(start)) {
atomicMin(&result, start);
}
}