use realizar::apr_transformer::AprTransformer;
use realizar::gguf::{MappedGGUFModel, OwnedQuantizedModel};
fn correlation(a: &[f32], b: &[f32]) -> f64 {
let n = a.len().min(b.len());
if n == 0 {
return 0.0;
}
let a_mean: f64 = a.iter().map(|&x| x as f64).sum::<f64>() / n as f64;
let b_mean: f64 = b.iter().map(|&x| x as f64).sum::<f64>() / n as f64;
let mut cov = 0.0f64;
let mut a_var = 0.0f64;
let mut b_var = 0.0f64;
for i in 0..n {
let a_d = a[i] as f64 - a_mean;
let b_d = b[i] as f64 - b_mean;
cov += a_d * b_d;
a_var += a_d * a_d;
b_var += b_d * b_d;
}
if a_var > 0.0 && b_var > 0.0 {
cov / (a_var.sqrt() * b_var.sqrt())
} else {
0.0
}
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
let apr_path = "/home/noah/models/qwen2.5-coder-1.5b-q4k.apr";
let gguf_path =
"/home/noah/src/single-shot-eval/models/raw/qwen2.5-coder-1.5b-instruct-q4_k_m.gguf";
println!("Loading APR model...");
let apr_model = AprTransformer::from_apr_file(apr_path)?;
println!("Loading GGUF model...");
let mapped = MappedGGUFModel::from_path(gguf_path)?;
let gguf_model = OwnedQuantizedModel::from_mapped(&mapped)?;
let hidden_dim = apr_model.config.hidden_dim;
println!("hidden_dim: {}", hidden_dim);
let bos: u32 = 151643;
let apr_embed = apr_model.embed(&[bos]);
let gguf_embed = gguf_model.embed(&[bos]);
println!("\n=== Embedding ===");
println!("APR embed first 5: {:?}", &apr_embed[..5]);
println!("GGUF embed first 5: {:?}", &gguf_embed[..5]);
println!(
"Embed correlation: {:.6}",
correlation(&apr_embed, &gguf_embed)
);
println!("\n=== RMSNorm ===");
let eps = apr_model.config.eps;
let apr_norm_weight = &apr_model.layers[0].attn_norm_weight;
let sum_sq: f32 = apr_embed.iter().map(|x| x * x).sum();
let rms = (sum_sq / hidden_dim as f32 + eps).sqrt();
let apr_normed: Vec<f32> = apr_embed
.iter()
.zip(apr_norm_weight.iter())
.map(|(h, w)| h / rms * w)
.collect();
let gguf_norm_weight = &gguf_model.layers()[0].attn_norm_weight;
let sum_sq: f32 = gguf_embed.iter().map(|x| x * x).sum();
let rms = (sum_sq / hidden_dim as f32 + eps).sqrt();
let gguf_normed: Vec<f32> = gguf_embed
.iter()
.zip(gguf_norm_weight.iter())
.map(|(h, w)| h / rms * w)
.collect();
println!("APR normed first 5: {:?}", &apr_normed[..5]);
println!("GGUF normed first 5: {:?}", &gguf_normed[..5]);
println!(
"Normed correlation: {:.6}",
correlation(&apr_normed, &gguf_normed)
);
println!("\n=== QKV Projection (F32 matmul) ===");
let apr_qkv_weight = &apr_model.layers[0].qkv_weight;
let qkv_dim = apr_qkv_weight.len() / hidden_dim;
println!(
"APR qkv_weight size: {} ({} x {})",
apr_qkv_weight.len(),
qkv_dim,
hidden_dim
);
let mut apr_qkv = vec![0.0f32; qkv_dim];
for o in 0..qkv_dim {
let mut sum = 0.0f32;
for i in 0..hidden_dim {
sum += apr_qkv_weight[o * hidden_dim + i] * apr_normed[i];
}
apr_qkv[o] = sum;
}
let _gguf_layer = &gguf_model.layers()[0];
println!("APR qkv first 10: {:?}", &apr_qkv[..10]);
let gguf_logits = gguf_model.forward(&[bos])?;
let apr_logits = apr_model.forward(&[bos])?;
println!("\n=== Final Logits ===");
println!("APR first 10: {:?}", &apr_logits[..10]);
println!("GGUF first 10: {:?}", &gguf_logits[..10]);
println!("Correlation: {:.6}", correlation(&apr_logits, &gguf_logits));
if let Some(ref q4k_layers) = apr_model.q4k_layers {
if let Some(ref q4k_attn_out) = q4k_layers[0].attn_output_weight {
println!("\n=== attn_output Q4K vs F32 ===");
println!("Q4K bytes: {}", q4k_attn_out.len());
let expected_f32_size = hidden_dim * hidden_dim;
let expected_q4k_bytes = (expected_f32_size / 256) * 144;
println!(
"Expected Q4K bytes for {}x{}: {}",
hidden_dim, hidden_dim, expected_q4k_bytes
);
if q4k_attn_out.len() == expected_q4k_bytes {
println!("Q4K size matches expected!");
} else {
println!(
"Q4K size MISMATCH: got {} expected {}",
q4k_attn_out.len(),
expected_q4k_bytes
);
}
}
}
Ok(())
}