use realizar::cuda::{CudaKernels, KernelType};
fn main() {
let kernels = CudaKernels::new();
let kernel = KernelType::MultiHeadAttention {
seq_len: 128,
head_dim: 80,
n_heads: 32,
causal: true,
};
let ptx = kernels.generate_ptx(&kernel);
println!("=== Multi-Head Attention PTX for phi-2 ===");
println!("seq_len=128, head_dim=80, n_heads=32, causal=true");
println!("PTX size: {} bytes\n", ptx.len());
println!("{}", ptx);
std::fs::write("/tmp/attention_phi2.ptx", &ptx).unwrap();
println!("\nPTX saved to /tmp/attention_phi2.ptx");
}