rullama 0.3.0

Browser-resident Gemma 4 inference: pure Rust → WebAssembly + WebGPU. Loads Ollama's on-disk GGUF blobs and runs the forward pass on the local GPU via hand-written WGSL.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
//! wgpu backend: device + queue + pipeline cache + buffer allocator.

mod context;
pub mod dispatch;
pub mod elementwise;
pub mod matmul;
pub mod pipelines;
mod spike;
pub mod weight_cache;

pub use context::WgpuCtx;
pub use pipelines::Pipelines;
pub use spike::compute_spike;
pub use weight_cache::WeightCache;