rullama 0.2.0

Browser-resident Gemma 4 inference: pure Rust → WebAssembly + WebGPU. Loads Ollama's on-disk GGUF blobs and runs the forward pass on the local GPU via hand-written WGSL.
Documentation
//! Smoke-test the BPE tokenizer against Ollama's golden vectors from
//! `model/models/gemma4/tokenizer_compare_test.go`.

use std::env;
use std::fs;
use std::process::ExitCode;

use rullama::gguf::GgufReader;
use rullama::tokenizer::BpeTokenizer;

fn main() -> ExitCode {
    let path = match env::args().nth(1) {
        Some(p) => p,
        None => { eprintln!("usage: encode_check <gguf>"); return ExitCode::from(2); }
    };
    let bytes = fs::read(&path).expect("read");
    let r = GgufReader::new(bytes).expect("parse");
    let tok = match BpeTokenizer::from_gguf(&r) {
        Ok(t) => t,
        Err(e) => { eprintln!("tokenizer build error: {e}"); return ExitCode::from(1); }
    };
    println!("vocab_size = {}", tok.vocab_size());

    let cases: &[(&str, &[u32])] = &[
        ("Hello, world!", &[9259, 236764, 1902, 236888]),
        ("<|turn>user\nWhat is 2+2?<turn|>\n<|turn>model\n",
         &[105, 2364, 107, 3689, 563, 236743, 236778, 236862, 236778, 236881, 106, 107, 105, 4368, 107]),
    ];

    let mut all_pass = true;
    for (input, expected) in cases {
        let got = tok.encode(input);
        let ok = got == *expected;
        if !ok { all_pass = false; }
        println!(
            "{} {:?}\n  got      = {:?}\n  expected = {:?}",
            if ok { "PASS" } else { "FAIL" },
            input, got, expected
        );
    }
    if all_pass { ExitCode::SUCCESS } else { ExitCode::from(1) }
}