use realizar::gguf::{MappedGGUFModel, OwnedQuantizedModel};
fn main() -> Result<(), Box<dyn std::error::Error>> {
let path = "/home/noah/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct-GGUF/snapshots/198f08841147e5196a6a69bd0053690fb1fd3857/qwen2-0_5b-instruct-q4_0.gguf";
let mapped = MappedGGUFModel::from_path(path)?;
let model = OwnedQuantizedModel::from_mapped(&mapped)?;
let hidden_dim = model.config().hidden_dim;
let bos = 151643u32;
println!("=== Forward Pass Trace ===\n");
println!("Config:");
println!(" hidden_dim: {}", model.config().hidden_dim);
println!(" num_heads: {}", model.config().num_heads);
println!(" num_kv_heads: {}", model.config().num_kv_heads);
println!(" eps: {:.1e}", model.config().eps);
println!(" rope_type: {}", model.config().rope_type);
let emb_start = bos as usize * hidden_dim;
let emb = &model.token_embedding()[emb_start..emb_start + hidden_dim];
let emb_norm: f32 = emb.iter().map(|x| x * x).sum::<f32>().sqrt();
let emb_sum: f32 = emb.iter().sum();
let emb_max: f32 = emb.iter().copied().fold(f32::NEG_INFINITY, f32::max);
let emb_min: f32 = emb.iter().copied().fold(f32::INFINITY, f32::min);
println!("\n1. Initial Embedding (BOS token):");
println!(" norm: {:.4}", emb_norm);
println!(" sum: {:.4}", emb_sum);
println!(" max: {:.4}, min: {:.4}", emb_max, emb_min);
println!(" first 8: {:?}", &emb[..8]);
let tok0_emb = &model.token_embedding()[0..hidden_dim];
let tok0_norm: f32 = tok0_emb.iter().map(|x| x * x).sum::<f32>().sqrt();
println!("\n2. Token 0 (\"!\") Embedding:");
println!(" norm: {:.4}", tok0_norm);
println!(" first 8: {:?}", &tok0_emb[..8]);
let dot: f32 = emb.iter().zip(tok0_emb.iter()).map(|(a, b)| a * b).sum();
let cos_sim = dot / (emb_norm * tok0_norm);
println!("\n Cosine sim (BOS, token_0): {:.4}", cos_sim);
let logits = model.forward(&[bos])?;
println!("\n3. Logits Analysis:");
println!(" Token 0 logit: {:.4}", logits[0]);
println!(" Token 19 (\"4\") logit: {:.4}", logits[19]);
let mut indexed: Vec<_> = logits.iter().enumerate().collect();
indexed.sort_by(|a, b| b.1.partial_cmp(a.1).unwrap());
println!("\n Top 5 predictions:");
for (tok, logit) in indexed.iter().take(5) {
println!(" Token {}: {:.4}", tok, logit);
}
let expected_identity_logit = dot; println!("\n4. If forward were identity (hidden = BOS_emb):");
println!(" Expected logit[0] = {:.4}", expected_identity_logit);
println!(" Actual logit[0] = {:.4}", logits[0]);
println!(
" Ratio actual/expected = {:.2}x",
logits[0] / expected_identity_logit
);
let estimated_hidden_norm = logits[0] / tok0_norm;
println!("\n5. Estimated hidden state (if aligned with token 0):");
println!(" ||hidden|| ≈ {:.4}", estimated_hidden_norm);
println!(" Original BOS emb norm: {:.4}", emb_norm);
println!(" Amplification: {:.2}x", estimated_hidden_norm / emb_norm);
println!("\n6. LM Head Weight:");
println!(" qtype: {}", model.lm_head_weight().qtype);
println!(
" in_dim: {} (should be hidden_dim={})",
model.lm_head_weight().in_dim,
hidden_dim
);
println!(
" out_dim: {} (should be vocab_size)",
model.lm_head_weight().out_dim
);
Ok(())
}