vyre 0.4.0

GPU compute intermediate representation with a standard operation library
Documentation
// Portable GPU kernel for string_matching.glob_match.
//
// Bindings:
// 0 params: len_a, len_b, param_c, param_d
// 1 pattern_words: little-endian packed bytes
// 2 input_words: little-endian packed bytes
// 3 result: u32, written as 1 for match and 0 for mismatch

@group(0) @binding(1) var<storage, read> pattern_words: array<u32>;
@group(0) @binding(2) var<storage, read> input_words: array<u32>;
@group(0) @binding(3) var<storage, read_write> result: array<u32>;

fn packed_byte(words: ptr<storage, array<u32>, read>, index: u32) -> u32 {
    let word = (*words)[index >> 2u];
    let shift = (index & 3u) << 3u;
    return (word >> shift) & 0xffu;
}

fn pattern_byte(index: u32) -> u32 {
    return packed_byte(&pattern_words, index);
}

fn input_byte(index: u32) -> u32 {
    return packed_byte(&input_words, index);
}

fn matches() -> bool {
    var p = 0u;
    var i = 0u;
    var star = 0xffffffffu;
    var retry_input = 0u;

    loop {
        if (i >= params.len_b) {
            break;
        }
        if (p < params.len_a) {
            let pb = pattern_byte(p);
            let ib = input_byte(i);
            if (pb == 63u || pb == ib) {
                p = p + 1u;
                i = i + 1u;
                continue;
            }
            if (pb == 42u) {
                star = p;
                p = p + 1u;
                retry_input = i;
                continue;
            }
        }
        if (star != 0xffffffffu) {
            p = star + 1u;
            retry_input = retry_input + 1u;
            i = retry_input;
            continue;
        }
        return false;
    }

    loop {
        if (p >= params.len_a || pattern_byte(p) != 42u) {
            break;
        }
        p = p + 1u;
    }
    return p == params.len_a;
}

@compute @workgroup_size(1, 1, 1)
fn string_matching_glob_match(@builtin(global_invocation_id) id: vec3<u32>) {
    if (id.x != 0u) {
        return;
    }
    result[0] = select(0u, 1u, matches());
}