vyre 0.4.0

GPU compute intermediate representation with a standard operation library
Documentation
struct SlidingEntropyParams {
    byte_len: u32,
    window: u32,
    output_count: u32,
    reserved0: u32,
}

@group(0) @binding(0) var<uniform> params: SlidingEntropyParams;
@group(0) @binding(1) var<storage, read> input_words: array<u32>;
@group(0) @binding(2) var<storage, read_write> output_bits: array<u32>;

var<workgroup> window_tile: array<u32, 256>;

@compute @workgroup_size(1, 1, 1)
fn stats_sliding_entropy(@builtin(global_invocation_id) id: vec3<u32>) {
    let window_start = id.x;
    if (params.window == 0u || params.window > params.byte_len) {
        return;
    }
    if (window_start >= params.output_count) {
        return;
    }
    if (window_start + params.window > params.byte_len) {
        return;
    }

    var counts: array<u32, 256>;
    var bucket = 0u;
    loop {
        if (bucket >= 256u) {
            break;
        }
        counts[bucket] = 0u;
        bucket = bucket + 1u;
    }

    var offset = 0u;
    loop {
        if (offset >= params.window) {
            break;
        }

        let remaining = params.window - offset;
        let tile_count = min(remaining, 256u);
        var tile_index = 0u;
        loop {
            if (tile_index >= tile_count) {
                break;
            }
            window_tile[tile_index] = vyre_packed_byte(&input_words, window_start + offset + tile_index);
            tile_index = tile_index + 1u;
        }
        workgroupBarrier();

        tile_index = 0u;
        loop {
            if (tile_index >= tile_count) {
                break;
            }
            let value = window_tile[tile_index];
            counts[value] = counts[value] + 1u;
            tile_index = tile_index + 1u;
        }
        workgroupBarrier();

        offset = offset + tile_count;
    }

    var entropy = 0.0f;
    bucket = 0u;
    loop {
        if (bucket >= 256u) {
            break;
        }
        let count = counts[bucket];
        if (count != 0u) {
            let p = f32(count) / f32(params.window);
            entropy = entropy - (p * log2(p));
        }
        bucket = bucket + 1u;
    }
    output_bits[window_start] = bitcast<u32>(entropy);
}