use realizar::gguf::{MappedGGUFModel, OwnedQuantizedModel};
fn main() {
let path = "/tmp/parity-bench/tinyllama-1.1b-q4_k_m.gguf";
println!("=== PAR-001f: V Weight Data Comparison ===\n");
let mapped = MappedGGUFModel::from_path(path).expect("Failed to load model");
let model = OwnedQuantizedModel::from_mapped(&mapped).expect("test");
let layer = &model.layers()[0];
match &layer.qkv_weight {
realizar::gguf::OwnedQKVWeights::Separate { q, k, v } => {
println!("From OwnedQuantizedModel:");
println!(
" V in_dim={}, out_dim={}, qtype={}",
v.in_dim, v.out_dim, v.qtype
);
println!(" V data.len()={}", v.data.len());
println!(
" V first 32 bytes: {:02x?}",
&v.data[..32.min(v.data.len())]
);
if v.data.len() >= 210 {
let d_bytes = [v.data[208], v.data[209]];
let d = half::f16::from_bits(u16::from_le_bytes(d_bytes)).to_f32();
println!(" V d at offset 208: {:.6} (bytes: {:02x?})", d, d_bytes);
}
println!(
"\n Q in_dim={}, out_dim={}, qtype={}",
q.in_dim, q.out_dim, q.qtype
);
println!(" Q data.len()={}", q.data.len());
println!(
" Q first 32 bytes: {:02x?}",
&q.data[..32.min(q.data.len())]
);
if q.data.len() >= 4 {
let d_bytes = [q.data[0], q.data[1]];
let d = half::f16::from_bits(u16::from_le_bytes(d_bytes)).to_f32();
let dmin_bytes = [q.data[2], q.data[3]];
let dmin = half::f16::from_bits(u16::from_le_bytes(dmin_bytes)).to_f32();
println!(
" Q d={:.6}, dmin={:.6} (bytes: {:02x?} {:02x?})",
d, dmin, d_bytes, dmin_bytes
);
}
println!(
"\n K in_dim={}, out_dim={}, qtype={}",
k.in_dim, k.out_dim, k.qtype
);
println!(
" K first 32 bytes: {:02x?}",
&k.data[..32.min(k.data.len())]
);
},
_ => println!("QKV is fused"),
}
println!("\n\nFrom MappedGGUFModel (direct):");
if let Some(v_tensor) = mapped
.model
.tensors
.iter()
.find(|t| t.name == "blk.0.attn_v.weight")
{
let data = mapped.data();
let absolute_offset = mapped.model.tensor_data_start + v_tensor.offset as usize;
println!(" tensor offset: {}", v_tensor.offset);
println!(" absolute offset: {}", absolute_offset);
println!(" dims: {:?}", v_tensor.dims);
println!(" qtype: {}", v_tensor.qtype);
if absolute_offset + 210 <= data.len() {
println!(
" Direct first 32 bytes: {:02x?}",
&data[absolute_offset..absolute_offset + 32]
);
let d_offset = absolute_offset + 208;
let d_bytes = [data[d_offset], data[d_offset + 1]];
let d = half::f16::from_bits(u16::from_le_bytes(d_bytes)).to_f32();
println!(
" Direct d at offset 208: {:.6} (bytes: {:02x?})",
d, d_bytes
);
}
}
println!("\n\n=== Cross-check: Did Q and V get swapped? ===");
if let Some(q_tensor) = mapped
.model
.tensors
.iter()
.find(|t| t.name == "blk.0.attn_q.weight")
{
let data = mapped.data();
let q_offset = mapped.model.tensor_data_start + q_tensor.offset as usize;
println!(
"Q tensor at offset {}: first 32 = {:02x?}",
q_tensor.offset,
&data[q_offset..q_offset + 32]
);
}
if let Some(v_tensor) = mapped
.model
.tensors
.iter()
.find(|t| t.name == "blk.0.attn_v.weight")
{
let data = mapped.data();
let v_offset = mapped.model.tensor_data_start + v_tensor.offset as usize;
println!(
"V tensor at offset {}: first 32 = {:02x?}",
v_tensor.offset,
&data[v_offset..v_offset + 32]
);
}
println!("\n\n=== Try interpreting V as Q4_K ===");
if let realizar::gguf::OwnedQKVWeights::Separate { v, .. } = &model.layers()[0].qkv_weight {
if v.data.len() >= 4 {
let d_bytes = [v.data[0], v.data[1]];
let d = half::f16::from_bits(u16::from_le_bytes(d_bytes)).to_f32();
let dmin_bytes = [v.data[2], v.data[3]];
let dmin = half::f16::from_bits(u16::from_le_bytes(dmin_bytes)).to_f32();
println!(" If Q4_K: d={:.6}, dmin={:.6}", d, dmin);
}
let d = half::f16::from_bits(u16::from_le_bytes([0x28, 0xeb])).to_f32();
println!(" V first 2 bytes as f16: {:.6}", d);
}
println!("\n=== Analysis complete ===");
}