llama-cpp-sys-4 0.3.2

Low Level Bindings to llama.cpp
Documentation
#extension GL_EXT_shader_16bit_storage : require


layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;

layout (binding = 0) readonly buffer A {A_TYPE data_a[];};
layout (binding = 1) readonly buffer B {A_TYPE data_b[];};
layout (binding = 2) writeonly buffer D {D_TYPE data_d[];};

layout (push_constant) uniform parameter
{
    uint N;
    uint ne00;
    uint ne20;
    uint mode;
    float alpha;
    float limit;
    uint nb00;
    uint nb01;
    uint nb02;
    uint nb03;
    uint nb10;
    uint nb11;
    uint nb12;
    uint nb13;
    uint nb20;
    uint nb21;
    uint nb22;
    uint nb23;
    uint ne21;
    uint ne22;
    uint misalign_offsets;
    uint ne2_012mp; uint ne2_012L;
    uint ne2_01mp;  uint ne2_01L;
    uint ne2_0mp;   uint ne2_0L;
} p;

uint get_aoffset() { return p.misalign_offsets >> 16; }
uint get_boffset() { return (p.misalign_offsets >> 8) & 0xFF; }
uint get_doffset() { return p.misalign_offsets & 0xFF; }

// see init_fastdiv_values in ggml-vulkan.cpp
uint fastdiv(uint n, uint mp, uint L) {
    uint msbs, lsbs;
    umulExtended(n, mp, msbs, lsbs);
    return (msbs + n) >> L;
}