#![allow(clippy::disallowed_methods)]
fn main() {
let hidden_dim: usize = 3584;
let num_layers: usize = 28;
let num_heads: usize = 28;
let num_kv_heads: usize = 4;
let intermediate_dim: usize = 18944;
let head_dim: usize = 128;
let rope_theta: f64 = 1_000_000.0;
let gqa_ratio = num_kv_heads as f64 / num_heads as f64;
let kv_reduction = 1.0 - gqa_ratio;
println!(
"GQA ratio: {gqa_ratio:.2} -> {:.0}% KV cache reduction",
kv_reduction * 100.0
);
assert!(kv_reduction > 0.5, "GQA must reduce KV cache by >50%");
let ffn_ratio = intermediate_dim as f64 / hidden_dim as f64;
println!("SwiGLU expansion: {ffn_ratio:.2}x (compensates for gating)");
assert!(ffn_ratio > 2.0, "SwiGLU expansion must exceed 2x");
assert_eq!(
hidden_dim,
num_heads * head_dim,
"hidden_dim = num_heads * head_dim"
);
println!("RoPE theta: {rope_theta:.0} (higher = longer context)");
assert!(rope_theta > 0.0, "RoPE theta must be positive");
let ctx_len: usize = 4096;
let kv_bytes = 2 * num_layers * num_kv_heads * head_dim * ctx_len * 2;
let kv_mb = kv_bytes as f64 / (1024.0 * 1024.0);
println!("KV cache at {ctx_len} context: {kv_mb:.0} MB");
println!("Normalization: RMSNorm (no mean subtraction, faster)");
println!("Chapter 8 contracts: PASSED");
}