struct ByteHistogramParams {
byte_len: u32,
reserved0: u32,
reserved1: u32,
reserved2: u32,
}
@group(0) @binding(0) var<uniform> params: ByteHistogramParams;
@group(0) @binding(1) var<storage, read> input_words: array<u32>;
@group(0) @binding(2) var<storage, read_write> histogram_counts: array<u32>;
var<workgroup> tile_bytes: array<u32, 256>;
var<workgroup> tile_histogram: array<atomic<u32>, 256>;
@compute @workgroup_size(256, 1, 1)
fn stats_byte_histogram(
@builtin(global_invocation_id) id: vec3<u32>,
@builtin(local_invocation_id) local_id: vec3<u32>,
) {
let lane = local_id.x;
atomicStore(&tile_histogram[lane], 0u);
workgroupBarrier();
var tile_start = 0u;
loop {
if (tile_start >= params.byte_len) {
break;
}
let input_index = tile_start + lane;
if (input_index < params.byte_len) {
tile_bytes[lane] = vyre_packed_byte(&input_words, input_index);
} else {
tile_bytes[lane] = 0u;
}
workgroupBarrier();
if (input_index < params.byte_len) {
atomicAdd(&tile_histogram[tile_bytes[lane]], 1u);
}
workgroupBarrier();
tile_start = tile_start + 256u;
}
histogram_counts[id.x] = atomicLoad(&tile_histogram[lane]);
}