#[cfg(test)]
mod tests {
use crate::apr::*;
#[test]
#[cfg(feature = "cuda")]
fn test_falsification_apr_cuda_gqa_dimensions() {
let home = std::env::var("HOME").expect("HOME env var not set");
let apr_path_str = format!(
"{}/.cache/huggingface/models/qwen2.5-coder-1.5b-apr/qwen2.5-coder-1.5b-q4k.apr",
home
);
let apr_path = std::path::Path::new(&apr_path_str);
if !apr_path.exists() {
println!("⚠️ Test model not available at {:?}, skipping", apr_path);
return;
}
let model = AprV2Model::load(apr_path).expect("Should load APR file");
assert_eq!(model.metadata.num_heads, Some(12), "num_heads should be 12");
assert_eq!(
model.metadata.num_kv_heads,
Some(2),
"num_kv_heads should be 2 (GQA)"
);
use crate::apr::AprV2ModelCuda;
let cuda_model = match AprV2ModelCuda::new(model, 0) {
Ok(m) => m,
Err(e) => {
eprintln!("⚠️ CUDA not available: {e}");
return;
},
};
println!("=== CUDA EXECUTOR GQA CONFIG ===");
println!(
" model.metadata.num_heads: {:?}",
cuda_model.inner().metadata.num_heads
);
println!(
" model.metadata.num_kv_heads: {:?}",
cuda_model.inner().metadata.num_kv_heads
);
let num_heads = cuda_model.inner().metadata.num_heads.unwrap_or(1);
let num_kv_heads = cuda_model
.inner()
.metadata
.num_kv_heads
.unwrap_or(num_heads);
let gqa_ratio = num_heads / num_kv_heads;
assert_eq!(
gqa_ratio, 6,
"FALSIFICATION FAILED: GQA ratio wrong!\n\
Expected: 6 (12 Q heads / 2 KV heads), Got: {} ({} / {})",
gqa_ratio, num_heads, num_kv_heads
);
println!(
"✅ CUDA model has correct GQA ratio: {} ({}:{} heads:kv_heads)",
gqa_ratio, num_heads, num_kv_heads
);
}
include!("tests_header_parsing.rs");
include!("tests_apr_flags_metadata.rs");
include!("tests_f16.rs");
include!("tests_apr_header.rs");
include!("tests_byte.rs");
include!("tests_simd_dot.rs");
include!("tests_tensor_entry_dequant.rs");
include!("tests_q8_f16_and_trait_impls.rs");
include!("transform.rs");
include!("tests_apr.rs");
include!("tests_bpe_tokenizer_apr.rs");
include!("tests_mapped_model.rs");
include!("tests_tensor_entry.rs");
}