#[test]
fn test_cov001_weight_quant_type_detection() {
assert!(matches!(
WeightQuantType::from_ggml_type(12),
Some(WeightQuantType::Q4K)
));
assert!(matches!(
WeightQuantType::from_ggml_type(13),
Some(WeightQuantType::Q5K)
));
assert!(matches!(
WeightQuantType::from_ggml_type(14),
Some(WeightQuantType::Q6K)
));
assert!(matches!(
WeightQuantType::from_ggml_type(8),
Some(WeightQuantType::Q8_0)
));
assert!(matches!(
WeightQuantType::from_ggml_type(6),
Some(WeightQuantType::Q5_0)
));
assert!(matches!(
WeightQuantType::from_ggml_type(2),
Some(WeightQuantType::Q4_0)
));
assert!(matches!(
WeightQuantType::from_ggml_type(3),
Some(WeightQuantType::Q4_1)
));
assert!(WeightQuantType::from_ggml_type(999).is_none());
assert_eq!(WeightQuantType::Q4K.bytes_per_superblock(), 144);
assert_eq!(WeightQuantType::Q5K.bytes_per_superblock(), 176);
assert_eq!(WeightQuantType::Q6K.bytes_per_superblock(), 210);
assert_eq!(WeightQuantType::Q8_0.bytes_per_block(), 34);
assert_eq!(WeightQuantType::Q4_0.bytes_per_block(), 18);
let q4k = WeightQuantType::Q4K;
assert!(q4k.matches_size(144, 1, 256)); }
#[test]
fn test_cov001_ptx_optimization_hints() {
let max_throughput = PtxOptimizationHints::max_throughput();
assert!(max_throughput.uses_vectorized_loads());
assert_eq!(max_throughput.vector_width(), 4);
assert_eq!(max_throughput.shared_mem_padding(), 1);
let low_latency = PtxOptimizationHints::low_latency();
assert!(!low_latency.uses_vectorized_loads());
assert_eq!(low_latency.vector_width(), 1);
assert_eq!(low_latency.shared_mem_padding(), 0);
let balanced = PtxOptimizationHints::balanced();
assert!(balanced.uses_vectorized_loads());
assert_eq!(balanced.vector_width(), 2);
}
#[test]
fn test_cov001_ptx_optimizer() {
let hints = PtxOptimizationHints::max_throughput();
let optimizer = PtxOptimizer::new(hints);
let summary = optimizer.summary();
assert!(summary.contains("PtxOptimizer"));
assert_eq!(optimizer.padded_shared_mem_row(32), 33);
let regs = optimizer.estimated_registers();
assert!(regs > 0);
let _high_pressure = optimizer.is_high_register_pressure();
}
#[test]
fn test_cov001_register_tiling() {
let large = RegisterTiling::large();
assert_eq!(large.registers_needed(), 64);
let medium = RegisterTiling::medium();
assert_eq!(medium.registers_needed(), 16);
let small = RegisterTiling::small();
assert_eq!(small.registers_needed(), 4);
}
#[test]
fn test_cov001_memory_pattern() {
let scalar = MemoryPattern::Scalar;
let vec2 = MemoryPattern::Vector2;
let vec4 = MemoryPattern::Vector4;
assert_ne!(scalar, vec2);
assert_ne!(vec2, vec4);
}
#[test]
fn test_cov001_bank_conflict_strategy() {
let none = BankConflictStrategy::None;
let padding = BankConflictStrategy::Padding;
let xor = BankConflictStrategy::Xor;
assert_ne!(none, padding);
assert_ne!(padding, xor);
}
#[test]
fn test_cov001_presets_coverage() {
let _llama_attn = presets::llama_attention(2048, 64);
let _ffn = presets::ffn_gemm(1, 4096, 11008);
let _q4k = presets::q4k_inference(1, 4096, 4096);
let _q4k_ggml = presets::q4k_ggml_inference(1, 4096, 4096);
let _rmsnorm = presets::rmsnorm(4096);
let _mha = presets::multi_head_attention(2048, 64, 32);
let _phi2_mha = presets::phi2_multi_head_attention(2048);
let _tc_attn = presets::tensor_core_attention(2048, 64, 32);
let _llama_tc = presets::llama_tensor_core_attention(2048);
}
#[test]
fn test_cov001_kernel_type_kernel_names() {
let kernels = CudaKernels::new();
let types = [
KernelType::GemmNaive { m: 1, n: 1, k: 1 },
KernelType::GemmTiled {
m: 1,
n: 1,
k: 1,
tile_size: 32,
},
KernelType::Softmax { dim: 128 },
KernelType::LayerNorm {
hidden_size: 256,
epsilon: 1e-5,
affine: true,
},
KernelType::Attention {
seq_len: 16,
head_dim: 64,
causal: true,
},
KernelType::QuantizedGemm {
m: 1,
n: 256,
k: 256,
},
KernelType::QuantizedGemmGgml {
m: 1,
n: 256,
k: 256,
},
KernelType::Q4KGemv { k: 256, n: 256 },
KernelType::Q5KGemv { k: 256, n: 256 },
KernelType::Q6KGemv { k: 256, n: 256 },
];
for kt in types {
let name = kernels.kernel_name(&kt);
assert!(!name.is_empty(), "Kernel name should not be empty");
}
}