rullama 0.4.0

Browser-resident Gemma 4 inference: pure Rust → WebAssembly + WebGPU. Loads Ollama's on-disk GGUF blobs and runs the forward pass on the local GPU via hand-written WGSL.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
//! wgpu backend: device + queue + pipeline cache + buffer allocator.

pub mod bind_cache;
mod context;
pub mod dispatch;
pub mod elementwise;
pub mod gpu_mem;
pub mod matmul;
pub mod pipelines;
mod spike;
pub mod weight_cache;

pub use bind_cache::{BindGroupCache, CacheKey, CachedDispatch, buf_id};
pub use context::WgpuCtx;
pub use pipelines::Pipelines;
pub use spike::compute_spike;
pub use weight_cache::WeightCache;