use realizar::gguf::{MappedGGUFModel, OwnedQuantizedModel};
fn main() -> Result<(), Box<dyn std::error::Error>> {
let path = "/home/noah/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct-GGUF/snapshots/198f08841147e5196a6a69bd0053690fb1fd3857/qwen2-0_5b-instruct-q4_0.gguf";
let mapped = MappedGGUFModel::from_path(path)?;
let model = OwnedQuantizedModel::from_mapped(&mapped)?;
let vocab = mapped.model.vocabulary().expect("vocab");
let im_start = 151644u32;
let im_end = 151645u32;
let assistant = 77091u32; let newline = 198u32; let _system = 9125u32;
let tokens = vec![im_start, assistant, newline];
println!(
"Testing with: <|im_start|>assistant\\n (tokens: {:?})",
tokens
);
let logits = model.forward(&tokens)?;
println!("\nTop 10 predictions:");
let mut indexed: Vec<_> = logits.iter().enumerate().collect();
indexed.sort_by(|a, b| b.1.partial_cmp(a.1).unwrap());
for (tok, logit) in indexed.iter().take(10) {
let tok_str = vocab.get(*tok).map_or("?", |s| s.as_str());
println!(" Token {} ({:?}): logit={:.4}", tok, tok_str, logit);
}
let user = 872u32; let what = 3838u32; let is_ = 374u32; let space = 220u32; let two = 17u32; let plus = 10u32; let _eq = 28u32; let qmark = 30u32;
let chat_tokens = vec![
im_start, user, newline, what, is_, space, two, plus, two, qmark, im_end, newline, im_start, assistant, newline, ];
println!("\n\nTesting chat format: <|im_start|>user\\nWhat is 2+2?<|im_end|>\\n<|im_start|>assistant\\n");
println!("Tokens: {:?}", chat_tokens);
let logits = model.forward(&chat_tokens)?;
println!("\nTop 10 predictions:");
let mut indexed: Vec<_> = logits.iter().enumerate().collect();
indexed.sort_by(|a, b| b.1.partial_cmp(a.1).unwrap());
for (tok, logit) in indexed.iter().take(10) {
let tok_str = vocab.get(*tok).map_or("?", |s| s.as_str());
println!(" Token {} ({:?}): logit={:.4}", tok, tok_str, logit);
}
println!("\nSpecific tokens of interest:");
println!(" Token 19 (\"4\"): logit={:.4}", logits[19]);
println!(" Token 0 (\"!\"): logit={:.4}", logits[0]);
Ok(())
}