use super::*;
fn create_executor() -> Option<CudaExecutor> {
CudaExecutor::new(0).ok()
}
fn test_zeroed_layer_weights() -> ValidatedLayerWeights {
ValidatedLayerWeights::new_unchecked(IndexedLayerWeights {
attn_q_ptr: 0,
attn_q_len: 0,
attn_q_qtype: WeightQuantType::Q4K,
attn_k_ptr: 0,
attn_k_len: 0,
attn_k_qtype: WeightQuantType::Q4K,
attn_v_ptr: 0,
attn_v_len: 0,
attn_v_qtype: WeightQuantType::Q4K,
attn_output_ptr: 0,
attn_output_len: 0,
attn_output_qtype: WeightQuantType::Q4K,
ffn_gate_ptr: 0,
ffn_gate_len: 0,
ffn_gate_qtype: WeightQuantType::Q4K,
ffn_up_ptr: 0,
ffn_up_len: 0,
ffn_up_qtype: WeightQuantType::Q4K,
ffn_down_ptr: 0,
ffn_down_len: 0,
ffn_down_qtype: WeightQuantType::Q4K,
attn_norm_ptr: 0,
attn_norm_len: 0,
ffn_norm_ptr: 0,
ffn_norm_len: 0,
attn_q_bias_ptr: 0,
attn_q_bias_len: 0,
attn_k_bias_ptr: 0,
attn_k_bias_len: 0,
attn_v_bias_ptr: 0,
attn_v_bias_len: 0,
attn_q_norm_ptr: 0,
attn_q_norm_len: 0,
attn_k_norm_ptr: 0,
attn_k_norm_len: 0,
})
}
#[test]
fn test_transformer_layer_indexed_missing_kv_cache() {
let Some(mut exec) = create_executor() else {
return;
};
let layer_weights = test_zeroed_layer_weights();
let input: Vec<f32> = vec![0.1; 256];
let input_buf = GpuBuffer::from_host(&exec.context, &input).unwrap();
let result = exec.transformer_layer_indexed(
&input_buf,
0, &layer_weights,
256, 1024, 1e-5, );
assert!(result.is_err());
}
#[test]
fn test_indexed_layer_weights_zeroed() {
let weights = test_zeroed_layer_weights();
assert_eq!(weights.attn_norm_ptr, 0);
assert_eq!(weights.attn_norm_len, 0);
assert_eq!(weights.attn_q_ptr, 0);
assert!(matches!(weights.attn_v_qtype, WeightQuantType::Q4K));
}
#[test]
fn test_weight_quant_type_variants() {
assert!(matches!(WeightQuantType::Q6K, WeightQuantType::Q6K));
assert!(matches!(WeightQuantType::Q4K, WeightQuantType::Q4K));
assert!(matches!(WeightQuantType::Q5K, WeightQuantType::Q5K));
}
#[test]
fn test_transformer_layer_indexed_with_harness() {
use crate::cuda::executor::test_fixtures::{setup_executor_harness, HarnessConfig};
let mut exec = CudaExecutor::new(0).expect("CUDA executor - RTX 4090 MUST be available");
let config = HarnessConfig::default();
setup_executor_harness(&mut exec, &config).expect("Harness setup MUST succeed");
let input = GpuBuffer::from_host(&exec.context, &vec![0.1f32; config.hidden_dim]).unwrap();
let layer_weights = exec.indexed_layer_weights[0].clone();
let result = exec.transformer_layer_indexed(
&input,
0,
&layer_weights,
config.hidden_dim as u32,
config.intermediate_dim as u32,
1e-5,
);
assert!(
result.is_ok(),
"transformer_layer_indexed MUST succeed with valid harness: {:?}",
result.err()
);
}
#[test]
fn test_indexed_rejects_null_weight_pointer() {
let mut exec = CudaExecutor::new(0).expect("CUDA executor");
let _ = exec.init_workspace(256, 1024);
let mut null_weights = test_zeroed_layer_weights();
null_weights.inner_mut().attn_norm_ptr = 0;
let input = GpuBuffer::from_host(&exec.context, &vec![0.1f32; 256]).unwrap();
let result = exec.transformer_layer_indexed(&input, 0, &null_weights, 256, 1024, 1e-5);
assert!(result.is_err(), "Null weight pointer MUST be rejected");
}
#[test]
fn test_indexed_rejects_mismatched_dimensions() {
use crate::cuda::executor::test_fixtures::{setup_executor_harness, HarnessConfig};
let mut exec = CudaExecutor::new(0).expect("CUDA executor");
let config = HarnessConfig::default();
setup_executor_harness(&mut exec, &config).expect("Harness setup");
let wrong_dim = config.hidden_dim * 2;
let input = GpuBuffer::from_host(&exec.context, &vec![0.1f32; wrong_dim]).unwrap();
let layer_weights = exec.indexed_layer_weights[0].clone();
let result = exec.transformer_layer_indexed(
&input,
0,
&layer_weights,
config.hidden_dim as u32, config.intermediate_dim as u32,
1e-5,
);
let _ = result;
}
#[test]
fn test_indexed_rejects_invalid_layer_index() {
use crate::cuda::executor::test_fixtures::{setup_executor_harness, HarnessConfig};
let mut exec = CudaExecutor::new(0).expect("CUDA executor");
let config = HarnessConfig::default();
setup_executor_harness(&mut exec, &config).expect("Harness setup");
let input = GpuBuffer::from_host(&exec.context, &vec![0.1f32; config.hidden_dim]).unwrap();
let layer_weights = exec.indexed_layer_weights[0].clone();
let invalid_layer_idx = config.num_layers + 100;
let result = exec.transformer_layer_indexed(
&input,
invalid_layer_idx,
&layer_weights,
config.hidden_dim as u32,
config.intermediate_dim as u32,
1e-5,
);
let _ = result;
}
#[test]
fn test_transformer_layer_indexed_multiple_layers() {
use crate::cuda::executor::test_fixtures::{setup_executor_harness, HarnessConfig};
let Some(mut exec) = create_executor() else {
return;
};
let mut config = HarnessConfig::default();
config.num_layers = 4;
if setup_executor_harness(&mut exec, &config).is_err() {
return;
}
for layer_idx in 0..config.num_layers {
let input = GpuBuffer::from_host(&exec.context, &vec![0.1f32; config.hidden_dim]).unwrap();
let layer_weights = exec.indexed_layer_weights[layer_idx].clone();
let result = exec.transformer_layer_indexed(
&input,
layer_idx,
&layer_weights,
config.hidden_dim as u32,
config.intermediate_dim as u32,
1e-5,
);
let _ = result;
}
}
#[test]
fn test_transformer_layer_indexed_q6k_v_weight() {
use crate::cuda::executor::test_fixtures::{setup_executor_harness, HarnessConfig};
let Some(mut exec) = create_executor() else {
return;
};
let config = HarnessConfig::default();
if setup_executor_harness(&mut exec, &config).is_err() {
return;
}
let mut layer_weights = exec.indexed_layer_weights[0].clone();
layer_weights.inner_mut().attn_v_qtype = WeightQuantType::Q6K;
let input = GpuBuffer::from_host(&exec.context, &vec![0.1f32; config.hidden_dim]).unwrap();
let result = exec.transformer_layer_indexed(
&input,
0,
&layer_weights,
config.hidden_dim as u32,
config.intermediate_dim as u32,
1e-5,
);
let _ = result;
}
#[test]
fn test_transformer_layer_indexed_different_epsilon() {
use crate::cuda::executor::test_fixtures::{setup_executor_harness, HarnessConfig};
let Some(mut exec) = create_executor() else {
return;
};
let config = HarnessConfig::default();
if setup_executor_harness(&mut exec, &config).is_err() {
return;
}
for epsilon in [1e-5f32, 1e-6, 1e-4] {
let input = GpuBuffer::from_host(&exec.context, &vec![0.1f32; config.hidden_dim]).unwrap();
let layer_weights = exec.indexed_layer_weights[0].clone();
let result = exec.transformer_layer_indexed(
&input,
0,
&layer_weights,
config.hidden_dim as u32,
config.intermediate_dim as u32,
epsilon,
);
let _ = result;
}
}
#[test]
fn test_transformer_layer_indexed_gqa_configuration() {
use crate::cuda::executor::test_fixtures::{setup_executor_harness, HarnessConfig};
let Some(mut exec) = create_executor() else {
return;
};
let mut config = HarnessConfig::default();
config.num_heads = 32;
config.num_kv_heads = 8; if setup_executor_harness(&mut exec, &config).is_err() {
return;
}
let input = GpuBuffer::from_host(&exec.context, &vec![0.1f32; config.hidden_dim]).unwrap();
let layer_weights = exec.indexed_layer_weights[0].clone();
let result = exec.transformer_layer_indexed(
&input,
0,
&layer_weights,
config.hidden_dim as u32,
config.intermediate_dim as u32,
1e-5,
);
let _ = result;
}
#[test]
fn test_indexed_layer_weights_pointers_valid() {
use crate::cuda::executor::test_fixtures::{setup_executor_harness, HarnessConfig};
let Some(mut exec) = create_executor() else {
return;
};
let config = HarnessConfig::default();
if setup_executor_harness(&mut exec, &config).is_err() {
return;
}
let layer_weights = &exec.indexed_layer_weights[0];
assert!(
layer_weights.attn_norm_ptr != 0,
"attn_norm_ptr should be set"
);
assert!(layer_weights.attn_q_ptr != 0, "attn_q_ptr should be set");
assert!(
layer_weights.ffn_gate_ptr != 0,
"ffn_gate_ptr should be set"
);
}
#[test]
fn test_indexed_weights_count_matches_layers() {
use crate::cuda::executor::test_fixtures::{setup_executor_harness, HarnessConfig};
let Some(mut exec) = create_executor() else {
return;
};
let mut config = HarnessConfig::default();
config.num_layers = 6;
if setup_executor_harness(&mut exec, &config).is_err() {
return;
}
assert_eq!(exec.indexed_layer_weights.len(), config.num_layers);
}
#[test]
fn test_q4k_gemv_indexed_async_with_harness() {
use crate::cuda::executor::test_fixtures::{setup_executor_harness, HarnessConfig};
let Some(mut exec) = create_executor() else {
return;
};
let config = HarnessConfig::default();
if setup_executor_harness(&mut exec, &config).is_err() {
return;
}
let layer_weights = &exec.indexed_layer_weights[0];
let input = GpuBuffer::from_host(&exec.context, &vec![0.1f32; config.hidden_dim]).unwrap();
let result = exec.q4k_gemv_indexed_async(
layer_weights.attn_q_ptr,
&input,
config.hidden_dim as u32,
config.hidden_dim as u32,
);
let _ = result;
}
#[test]
fn test_q6k_gemv_indexed_async_with_harness() {
use crate::cuda::executor::test_fixtures::{setup_executor_harness, HarnessConfig};
let Some(mut exec) = create_executor() else {
return;
};
let config = HarnessConfig::default();
if setup_executor_harness(&mut exec, &config).is_err() {
return;
}
let layer_weights = &exec.indexed_layer_weights[0];
let input = GpuBuffer::from_host(&exec.context, &vec![0.1f32; config.hidden_dim]).unwrap();
let result = exec.q6k_gemv_indexed_async(
layer_weights.attn_v_ptr,
&input,
config.hidden_dim as u32,
config.hidden_dim as u32,
);
let _ = result;
}
#[test]
fn test_rmsnorm_gpu_ptr_with_harness() {
use crate::cuda::executor::test_fixtures::{setup_executor_harness, HarnessConfig};
let Some(mut exec) = create_executor() else {
return;
};
let config = HarnessConfig::default();
if setup_executor_harness(&mut exec, &config).is_err() {
return;
}
let layer_weights = &exec.indexed_layer_weights[0];
let input = GpuBuffer::from_host(&exec.context, &vec![1.0f32; config.hidden_dim]).unwrap();
let result = exec.rmsnorm_gpu_ptr(
&input,
layer_weights.attn_norm_ptr,
layer_weights.attn_norm_len,
config.hidden_dim as u32,
1e-5,
);
let _ = result;
}
#[test]
fn test_transformer_layer_workspace_for_capture() {
use crate::cuda::executor::test_fixtures::{setup_executor_harness, HarnessConfig};
let Some(mut exec) = create_executor() else {
return;
};
let config = HarnessConfig::default();
if setup_executor_harness(&mut exec, &config).is_err() {
return;
}
let input = GpuBuffer::from_host(&exec.context, &vec![0.1f32; config.hidden_dim]).unwrap();
let layer_weights = exec.indexed_layer_weights[0].clone();
let result = exec.transformer_layer_workspace_for_capture(
&input,
0, &layer_weights,
config.hidden_dim as u32,
config.intermediate_dim as u32,
1e-5,
0, );
let _ = result;
}
include!("indexed_tests_transformer_layer.rs");