oximedia-gpu 0.1.8

GPU compute pipeline using WGPU for OxiMedia - cross-platform acceleration
Documentation
// GPU Gaussian pyramid downsample pass for motion estimation.
//
// Reads luma samples from `input_buf` (u32 values, range 0–255 packed as u32),
// box-averages each 2×2 neighbourhood, and writes the result to `output_buf`.
// All coordinates are clamped so fractional-size frames are handled correctly.

struct PyramidUniforms {
    in_width:  u32,
    in_height: u32,
    out_width: u32,
    out_height: u32,
}

@group(0) @binding(0) var<uniform>           u:          PyramidUniforms;
@group(0) @binding(1) var<storage, read>     input_buf:  array<u32>;
@group(0) @binding(2) var<storage, read_write> output_buf: array<u32>;

fn load_pixel(x: u32, y: u32) -> u32 {
    let cx = min(x, u.in_width  - 1u);
    let cy = min(y, u.in_height - 1u);
    return input_buf[cy * u.in_width + cx];
}

@compute @workgroup_size(8, 8)
fn downsample_r8(@builtin(global_invocation_id) gid: vec3<u32>) {
    if gid.x >= u.out_width || gid.y >= u.out_height { return; }

    let sx = gid.x * 2u;
    let sy = gid.y * 2u;

    // Box-average of the 2×2 source neighbourhood.
    let v00 = load_pixel(sx,      sy);
    let v10 = load_pixel(sx + 1u, sy);
    let v01 = load_pixel(sx,      sy + 1u);
    let v11 = load_pixel(sx + 1u, sy + 1u);

    let avg = (v00 + v10 + v01 + v11 + 2u) / 4u;   // +2 for rounding
    output_buf[gid.y * u.out_width + gid.x] = avg;
}