rullama 0.5.0

Browser-resident Gemma 4 inference: pure Rust → WebAssembly + WebGPU. Loads Ollama's on-disk GGUF blobs and runs the forward pass on the local GPU via hand-written WGSL.
Documentation
// Logit softcap: y[i] = cap * tanh(x[i] / cap), elementwise.
//
// One thread per element. Used at the very end of the forward pass to clamp logits
// (Gemma 4 cap = 30.0).

struct Params {
    n:    u32,
    cap:  f32,
    _pad0: u32,
    _pad1: u32,
}

@group(0) @binding(0) var<uniform>             params: Params;
@group(0) @binding(1) var<storage, read>       x: array<f32>;
@group(0) @binding(2) var<storage, read_write> y: array<f32>;

@compute @workgroup_size(64)
fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
    let i = gid.x;
    if (i >= params.n) { return; }
    let cap = params.cap;
    if (cap <= 0.0) {
        y[i] = x[i];
        return;
    }
    y[i] = cap * tanh(x[i] / cap);
}