vyre 0.4.0

GPU compute intermediate representation with a standard operation library
Documentation
// Portable GPU kernel for string_similarity.hamming.
// Bindings: 0 params(len_a, len_b), 1 a words, 2 b words, 3 atomic distance, 4 status.
@group(0) @binding(1) var<storage, read> a_words: array<u32>;
@group(0) @binding(2) var<storage, read> b_words: array<u32>;
@group(0) @binding(3) var<storage, read_write> distance: atomic<u32>;
@group(0) @binding(4) var<storage, read_write> status: array<u32>;
fn packed_byte(words: ptr<storage, array<u32>, read>, index: u32) -> u32 {
    return ((*words)[index >> 2u] >> ((index & 3u) << 3u)) & 0xffu;
}
@compute @workgroup_size(256, 1, 1)
fn string_similarity_hamming(@builtin(global_invocation_id) id: vec3<u32>) {
    if (params.len_a != params.len_b) {
        if (id.x == 0u) { status[0] = 1u; }
        return;
    }
    let index = id.x;
    if (index >= params.len_a) { return; }
    if (packed_byte(&a_words, index) != packed_byte(&b_words, index)) {
        atomicAdd(&distance, 1u);
    }
}