rullama 0.3.0

Browser-resident Gemma 4 inference: pure Rust → WebAssembly + WebGPU. Loads Ollama's on-disk GGUF blobs and runs the forward pass on the local GPU via hand-written WGSL.
Documentation
//! GGUF v3 parser.
//!
//! Browser-friendly: takes `&[u8]` (a `Uint8Array` slice on wasm32), no mmap, no I/O.
//! Hand-rolled rather than depending on a crate so we own the wasm story end-to-end and
//! the dep tree stays small.
//!
//! Spec reference: <https://github.com/ggml-org/ggml/blob/master/docs/gguf.md>

mod dtype;
pub mod fetcher;
mod reader;
mod value;

pub mod quant;
pub mod tensor;

pub use dtype::GgmlDtype;
#[cfg(target_arch = "wasm32")]
pub use fetcher::HttpRangeFetcher;
#[cfg(target_arch = "wasm32")]
pub use fetcher::OpfsFetcher;
pub use fetcher::{InMemoryFetcher, TensorFetcher};
pub use reader::{GgufReader, TensorDesc};
pub use tensor::{
    dequant_row_to_f32, dequant_row_to_f32_async, dequant_tensor_to_f32,
    dequant_tensor_to_f32_async,
};
pub use value::{GgufValue, GgufValueType};