llama-gguf 0.14.0

A high-performance Rust implementation of llama.cpp - LLM inference engine with full GGUF support
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
// SiLU activation: result[i] = x[i] / (1.0 + exp(-x[i]))

cbuffer Params : register(b0) {
    int n;
};

RWStructuredBuffer<float> x : register(u0);
RWStructuredBuffer<float> result : register(u1);

[numthreads(256, 1, 1)]
void main(uint3 dtid : SV_DispatchThreadID) {
    uint idx = dtid.x;
    if (idx < (uint)n) {
        float val = x[idx];
        result[idx] = val / (1.0 + exp(-val));
    }
}