// Portable GPU kernel for string_similarity.ngram_extract.
//
// Bindings:
// 0 params: len_a, len_b, param_c, param_d
// 1 input_words: little-endian packed bytes
// 2 output_words: each n-gram copied into output_stride_words packed u32 words
@group(0) @binding(1) var<storage, read> input_words: array<u32>;
@group(0) @binding(2) var<storage, read_write> output_words: array<u32>;
fn byte_at(index: u32) -> u32 {
let word = input_words[index >> 2u];
let shift = (index & 3u) << 3u;
return (word >> shift) & 0xffu;
}
fn store_byte(base_word: u32, offset: u32, byte: u32) {
let word_index = base_word + (offset >> 2u);
let shift = (offset & 3u) << 3u;
let mask = 0xffu << shift;
let old_word = output_words[word_index];
output_words[word_index] = (old_word & ~mask) | ((byte & 0xffu) << shift);
}
@compute @workgroup_size(256, 1, 1)
fn string_similarity_ngram_extract(@builtin(global_invocation_id) id: vec3<u32>) {
if (params.len_b == 0u || params.len_b > params.len_a || params.param_c == 0u) {
return;
}
let gram_index = id.x;
if (gram_index > params.len_a - params.len_b) {
return;
}
let base = gram_index * params.param_c;
var offset = 0u;
loop {
if (offset >= params.len_b) {
break;
}
store_byte(base, offset, byte_at(gram_index + offset));
offset = offset + 1u;
}
}