rullama 0.2.0

Browser-resident Gemma 4 inference: pure Rust → WebAssembly + WebGPU. Loads Ollama's on-disk GGUF blobs and runs the forward pass on the local GPU via hand-written WGSL.
Documentation
//! Quick token-id → string lookup using the vocab embedded in the GGUF. No tokenizer
//! mechanics — just an array indexing.

use std::env;
use std::fs;
use std::process::ExitCode;

use rullama::gguf::GgufReader;

fn main() -> ExitCode {
    let mut args = env::args().skip(1);
    let path = match args.next() {
        Some(p) => p,
        None => { eprintln!("usage: decode_ids <gguf> <id1> <id2> ..."); return ExitCode::from(2); }
    };
    let bytes = fs::read(&path).expect("read");
    let r = GgufReader::new(bytes).expect("parse");
    let tokens = r.get("tokenizer.ggml.tokens").expect("vocab").as_string_array().expect("strs");
    for arg in args {
        let id: usize = arg.parse().expect("id");
        if id < tokens.len() {
            println!("{:>7}  {:?}", id, tokens[id]);
        } else {
            println!("{id} OUT OF RANGE");
        }
    }
    ExitCode::SUCCESS
}