hanzo-ml 0.10.2

Minimalist ML framework.
Documentation
#version 450
// Row-wise max reduction over the last dim of a row-major [rows, cols] f32 buffer.
// One invocation per row: out[row] = max_{c} in[row*cols + c]. Output length = rows.
layout(local_size_x = 64) in;

layout(set = 0, binding = 0) readonly  buffer In  { float inp[]; };
layout(set = 0, binding = 1) writeonly buffer Out { float o[]; };
layout(push_constant) uniform Pc { uint rows; uint cols; };

void main() {
    uint row = gl_GlobalInvocationID.x;
    if (row < rows) {
        uint base = row * cols;
        // cols is assumed >= 1 (a reduction over an empty axis is undefined here).
        float acc = inp[base];
        for (uint c = 1u; c < cols; c++) {
            acc = max(acc, inp[base + c]);
        }
        o[row] = acc;
    }
}