hanzo-ml 0.10.2

Minimalist ML framework.
Documentation
#version 450
// scatter add along `dim`. Same indexing as scatter_set but dst[idx] += src[g], done with an
// atomicCompSwap loop on a uint view of dst (Dozen/D3D12 lacks the float-atomic extension).
layout(local_size_x = 64) in;
layout(set = 0, binding = 0) buffer Dst { uint dst[]; };
layout(set = 0, binding = 1) readonly buffer Src { float src[]; };
layout(set = 0, binding = 2) readonly buffer Ids { uint ids[]; };
layout(push_constant) uniform Pc { uint n; uint right; uint dim_src; uint dim_dst; };
void main() {
    uint g = gl_GlobalInvocationID.x;
    if (g >= n) { return; }
    uint inner = g % right;
    uint outer = g / (right * dim_src);
    uint id = ids[g];
    uint idx = outer * (dim_dst * right) + id * right + inner;
    float add = src[g];
    uint old = dst[idx];
    uint assumed;
    do {
        assumed = old;
        float nv = uintBitsToFloat(assumed) + add;
        old = atomicCompSwap(dst[idx], assumed, floatBitsToUint(nv));
    } while (old != assumed);
}