vyre 0.4.0

GPU compute intermediate representation with a standard operation library
Documentation
@group(0) @binding(0) var<storage, read> input_words: array<u32>;
@group(0) @binding(1) var<storage, read_write> output_words: array<u32>;

fn buffer_byte_swap_u32_value(value: u32) -> u32 {
    return ((value & 0x000000ffu) << 24u) |
        ((value & 0x0000ff00u) << 8u) |
        ((value >> 8u) & 0x0000ff00u) |
        ((value >> 24u) & 0x000000ffu);
}

fn buffer_byte_swap_u32_lane0(value: u32) -> u32 {
    return value >> 24u;
}

fn buffer_byte_swap_u32_lane1(value: u32) -> u32 {
    return (value >> 8u) & 0x0000ff00u;
}

fn buffer_byte_swap_u32_lane2(value: u32) -> u32 {
    return (value << 8u) & 0x00ff0000u;
}

fn buffer_byte_swap_u32_lane3(value: u32) -> u32 {
    return value << 24u;
}

fn buffer_byte_swap_u32_checked(value: u32) -> u32 {
    let lane0 = buffer_byte_swap_u32_lane0(value);
    let lane1 = buffer_byte_swap_u32_lane1(value);
    let lane2 = buffer_byte_swap_u32_lane2(value);
    let lane3 = buffer_byte_swap_u32_lane3(value);
    return lane0 | lane1 | lane2 | lane3;
}

@compute @workgroup_size(64, 1, 1)
fn buffer_byte_swap_u32(@builtin(global_invocation_id) id: vec3<u32>) {
    output_words[id.x] = buffer_byte_swap_u32_checked(input_words[id.x]);
}