use realizar::gguf::GGUFModel;
use std::fs;
fn main() {
let data = fs::read("/home/noah/src/aprender/tinyllama-1.1b-chat-v1.0.Q4_0.gguf").unwrap();
let model = GGUFModel::from_bytes(&data).unwrap();
let tensor = model
.tensors
.iter()
.find(|t| t.name == "token_embd.weight")
.unwrap();
println!("token_embd.weight:");
println!(" dims: {:?}", tensor.dims);
println!(
" qtype: {} ({})",
tensor.qtype,
if tensor.qtype == 0 { "F32" } else { "other" }
);
println!(" offset: {}", tensor.offset);
let embed = model.get_tensor_f32("token_embd.weight", &data).unwrap();
println!(" loaded len: {}", embed.len());
let hidden_dim = 2048;
let _vocab_size = 32000;
println!("\nToken 0 (UNK) embedding (first 10 values):");
for (i, &v) in embed[0..10].iter().enumerate() {
println!(" [{}] = {:.6}", i, v);
}
let bos_start = hidden_dim;
println!("\nToken 1 (BOS) embedding (first 10 values):");
for (i, &v) in embed[bos_start..bos_start + 10].iter().enumerate() {
println!(" [{}] = {:.6}", i, v);
}
let paris_start = 3681 * hidden_dim;
println!("\nToken 3681 (▁Paris) embedding (first 10 values):");
for (i, &v) in embed[paris_start..paris_start + 10].iter().enumerate() {
println!(" [{}] = {:.6}", i, v);
}
let bos_emb = &embed[bos_start..bos_start + hidden_dim];
let paris_emb = &embed[paris_start..paris_start + hidden_dim];
let diff: f32 = bos_emb
.iter()
.zip(paris_emb.iter())
.map(|(a, b)| (a - b).abs())
.sum();
println!(
"\nL1 distance between BOS and Paris embeddings: {:.4}",
diff
);
let min = embed.iter().cloned().fold(f32::INFINITY, f32::min);
let max = embed.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
let mean: f32 = embed.iter().sum::<f32>() / embed.len() as f32;
println!("\nEmbedding stats:");
println!(" min: {:.6}", min);
println!(" max: {:.6}", max);
println!(" mean: {:.6}", mean);
let non_zero = embed.iter().filter(|&&x| x != 0.0).count();
println!(
" non-zero: {}/{} ({:.1}%)",
non_zero,
embed.len(),
100.0 * non_zero as f64 / embed.len() as f64
);
}