#![allow(clippy::needless_range_loop)]
use std::path::Path;
use realizar::apr_transformer::AprTransformer;
use realizar::gguf::{MappedGGUFModel, OwnedQuantizedModel};
use realizar::quantize::fused_q4_0_q8_0_parallel_matvec;
fn main() -> Result<(), Box<dyn std::error::Error>> {
let gguf_path =
"/home/noah/src/HF-Advanced-Fine-Tuning/corpus/models/qwen2-0.5b-instruct-q4_0.gguf";
let apr_path = "/tmp/qwen2-test6.apr";
if !Path::new(gguf_path).exists() || !Path::new(apr_path).exists() {
eprintln!("Model files not found");
return Ok(());
}
let mapped = MappedGGUFModel::from_path(gguf_path)?;
let gguf_model = OwnedQuantizedModel::from_mapped(&mapped)?;
let apr_model = AprTransformer::from_apr_file(apr_path)?;
let hidden_dim = apr_model.config.hidden_dim;
let bos: u32 = 151643;
let embed = apr_model.embed(&[bos]);
let eps = apr_model.config.eps;
let sum_sq: f32 = embed.iter().map(|x| x * x).sum();
let rms = (sum_sq / hidden_dim as f32 + eps).sqrt();
let apr_layer = &apr_model.layers[0];
let normed: Vec<f32> = embed
.iter()
.enumerate()
.map(|(i, &x)| (x / rms) * apr_layer.attn_norm_weight[i])
.collect();
println!("Normed first 5: {:?}", &normed[..5]);
let qkv_out_dim = apr_layer.qkv_weight.len() / hidden_dim;
let mut apr_qkv = vec![0.0f32; qkv_out_dim];
for o in 0..qkv_out_dim {
let w_start = o * hidden_dim;
let sum: f32 = (0..hidden_dim)
.map(|i| normed[i] * apr_layer.qkv_weight[w_start + i])
.sum();
apr_qkv[o] = sum;
}
println!("\nAPR QKV output (first 10): {:?}", &apr_qkv[..10]);
println!("APR Q[0..5]: {:?}", &apr_qkv[..5]);
println!(
"APR K (at {}..{}): {:?}",
hidden_dim,
hidden_dim + 5,
&apr_qkv[hidden_dim..hidden_dim + 5]
);
let gguf_layer = &gguf_model.layers()[0];
match &gguf_layer.qkv_weight {
realizar::gguf::OwnedQKVWeights::Separate { q, k, v } => {
println!("\n=== GGUF QKV dimensions ===");
println!(
"Q: in_dim={}, out_dim={}, data_len={}",
q.in_dim,
q.out_dim,
q.data.len()
);
println!(
"K: in_dim={}, out_dim={}, data_len={}",
k.in_dim,
k.out_dim,
k.data.len()
);
println!(
"V: in_dim={}, out_dim={}, data_len={}",
v.in_dim,
v.out_dim,
v.data.len()
);
let gguf_q = fused_q4_0_q8_0_parallel_matvec(&q.data, &normed, q.in_dim, q.out_dim)?;
println!("\nGGUF Q output (first 5): {:?}", &gguf_q[..5]);
let q_diff: f32 = apr_qkv[..hidden_dim]
.iter()
.zip(gguf_q.iter())
.map(|(a, b)| (a - b).abs())
.fold(0.0f32, f32::max);
println!("\nQ max diff: {:.10}", q_diff);
if q_diff > 0.01 {
println!("MISMATCH! Finding first significant diff...");
for i in 0..hidden_dim.min(20) {
let diff = (apr_qkv[i] - gguf_q[i]).abs();
println!(
" Q[{}]: APR={:.6}, GGUF={:.6}, diff={:.6}",
i, apr_qkv[i], gguf_q[i], diff
);
}
} else {
println!("✓ Q outputs match!");
}
let gguf_k = fused_q4_0_q8_0_parallel_matvec(&k.data, &normed, k.in_dim, k.out_dim)?;
println!("\nGGUF K output (first 5): {:?}", &gguf_k[..5]);
println!(
"APR K output (first 5): {:?}",
&apr_qkv[hidden_dim..hidden_dim + 5]
);
},
realizar::gguf::OwnedQKVWeights::Fused(qkv) => {
println!("GGUF uses fused QKV");
let gguf_qkv =
fused_q4_0_q8_0_parallel_matvec(&qkv.data, &normed, qkv.in_dim, qkv.out_dim)?;
println!("GGUF QKV output first 10: {:?}", &gguf_qkv[..10]);
},
}
Ok(())
}