hanzo-ml 0.10.2

Minimalist ML framework.
Documentation
#version 450
// Unary GELU (tanh approximation, matching hanzo-ml's CPU Gelu):
//   0.5 * x * (1 + tanh(sqrt(2/pi) * (x + 0.044715 * x^3)))
layout(local_size_x = 64) in;

layout(set = 0, binding = 0) readonly  buffer In  { float inp[]; };
layout(set = 0, binding = 1) writeonly buffer Out { float o[]; };
layout(push_constant) uniform Pc { uint n; };

const float SQRT_TWO_OVER_PI = 0.7978845608028654;

void main() {
    uint i = gl_GlobalInvocationID.x;
    if (i < n) {
        float x = inp[i];
        float inner = SQRT_TWO_OVER_PI * (x + 0.044715 * x * x * x);
        o[i] = 0.5 * x * (1.0 + tanh(inner));
    }
}