vyre 0.4.0

GPU compute intermediate representation with a standard operation library
Documentation
struct MemcpyParams {
    input_len: u32,
    reserved0: u32,
    reserved1: u32,
    reserved2: u32,
}

@group(0) @binding(0) var<uniform> params: MemcpyParams;
@group(0) @binding(1) var<storage, read> input_words: array<u32>;
@group(0) @binding(2) var<storage, read_write> output_words: array<u32>;

@compute @workgroup_size(64, 1, 1)
fn buffer_memcpy(@builtin(global_invocation_id) id: vec3<u32>) {
    let word_index = id.x;
    let word_count = (params.input_len + 3u) >> 2u;
    if (word_index >= word_count) {
        return;
    }
    var word = input_words[word_index];
    if (word_index + 1u == word_count) {
        word = word & vyre_low_byte_mask(params.input_len);
    }
    output_words[word_index] = word;
}