llama-gguf 0.14.0

A high-performance Rust implementation of llama.cpp - LLM inference engine with full GGUF support
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
#version 450

// Pass 3: Normalize by dividing by sum
layout(local_size_x = 256) in;

layout(set = 0, binding = 0) buffer Data { float data[]; };

layout(push_constant) uniform Params {
    int n;
    float inv_sum;
};

void main() {
    uint idx = gl_GlobalInvocationID.x;
    if (idx < n) {
        data[idx] *= inv_sum;
    }
}