use llama_gguf::Backend;
use llama_gguf::backend::cpu::CpuBackend;
use llama_gguf::gguf::GgufFile;
use llama_gguf::tensor::{DType, Tensor};
fn main() -> Result<(), Box<dyn std::error::Error>> {
let backend = CpuBackend::new();
let gguf = GgufFile::open("/home/joseph/Models/qwen2.5-0.5b-instruct-q4_k_m.gguf")?;
let emb_info = gguf.data.get_tensor("token_embd.weight").unwrap();
let emb_data = gguf.tensor_data("token_embd.weight").unwrap();
println!(
"Embedding tensor: dims={:?}, dtype={:?}",
emb_info.dims, emb_info.dtype
);
let hidden_size = emb_info.dims[0] as usize;
let vocab_size = emb_info.dims[1] as usize;
println!("hidden_size={}, vocab_size={}", hidden_size, vocab_size);
let emb_tensor = Tensor::new(
emb_data.to_vec(),
vec![hidden_size, vocab_size],
llama_gguf::tensor::DType::from(emb_info.dtype),
)?;
let mut emb_f32 = Tensor::zeros(vec![hidden_size, vocab_size], DType::F32);
backend.dequantize(&emb_tensor, &mut emb_f32)?;
let emb_data = emb_f32.as_f32()?;
let token = 16;
let start = token * hidden_size;
let end = start + hidden_size;
let token_emb = &emb_data[start..end];
println!("\nEmbedding for token {} (first 10):", token);
for (i, v) in token_emb.iter().take(10).enumerate() {
println!(" [{}] = {:.6}", i, v);
}
let token2 = 17;
let start2 = token2 * hidden_size;
let end2 = start2 + hidden_size;
let token_emb2 = &emb_data[start2..end2];
println!("\nEmbedding for token {} (first 10):", token2);
for (i, v) in token_emb2.iter().take(10).enumerate() {
println!(" [{}] = {:.6}", i, v);
}
let diff: f32 = token_emb
.iter()
.zip(token_emb2.iter())
.map(|(a, b)| (a - b).abs())
.sum();
println!("\nSum of absolute differences: {}", diff);
Ok(())
}