pub fn setup_executor_harness(
exec: &mut crate::cuda::executor::CudaExecutor,
config: &HarnessConfig,
) -> Result<(), crate::cuda::executor::GpuError> {
exec.kv_num_heads = config.num_heads;
exec.kv_num_kv_heads = config.num_kv_heads;
exec.kv_head_dim = config.head_dim;
exec.init_workspace(config.hidden_dim, config.intermediate_dim)?;
exec.init_kv_cache_gpu(
config.num_layers,
config.num_heads,
config.num_kv_heads,
config.head_dim,
config.max_seq_len,
)?;
let gamma: Vec<f32> = vec![1.0; config.hidden_dim];
for layer_idx in 0..config.num_layers {
exec.cache_rmsnorm_gamma(&format!("blk.{layer_idx}.attn_norm.gamma"), &gamma)?;
exec.cache_rmsnorm_gamma(&format!("blk.{layer_idx}.ffn_norm.gamma"), &gamma)?;
}
exec.cache_rmsnorm_gamma("output_norm.gamma", &gamma)?;
for layer_idx in 0..config.num_layers {
let prefix = format!("blk.{layer_idx}");
load_layer_attn_weights(exec, &prefix, config)?;
load_layer_ffn_weights(exec, &prefix, config)?;
}
let arch = crate::gguf::ArchConstraints::from_architecture("llama");
load_zero_weights(exec, "output.weight", config.vocab_size, config.hidden_dim)?;
exec.build_indexed_weights(config.num_layers, |i| format!("blk.{}", i), &arch)?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_q4_0_weight_generation() {
let weights = generate_q4_0_weights(4);
assert_eq!(weights.len(), 72);
let scale_bytes = [weights[0], weights[1]];
let scale = f16::from_le_bytes(scale_bytes);
assert!((scale.to_f32() - 0.1).abs() < 0.01);
}
#[test]
fn test_q5_0_weight_generation() {
let weights = generate_q5_0_weights(4);
assert_eq!(weights.len(), 88);
let qh = u32::from_le_bytes([weights[2], weights[3], weights[4], weights[5]]);
assert_eq!(qh, 0xAAAA_5555);
}
#[test]
fn test_gqa_config_dimensions() {
let config = GqaConfig::QWEN_0_5B;
assert_eq!(config.q_dim(), 896); assert_eq!(config.kv_dim(), 128); assert_eq!(config.gqa_group_size(), 7); assert!(config.is_gqa());
let mha_config = GqaConfig::LLAMA_7B_MHA;
assert!(!mha_config.is_gqa());
}
#[test]
fn test_parity_helpers() {
assert!(relative_diff(1.0, 1.01) < 0.02);
assert!(relative_diff(100.0, 101.0) < 0.02);
let a = vec![1.0, 2.0, 3.0];
let b = vec![1.01, 2.01, 3.01];
assert!(max_element_diff(&a, &b) < 0.02);
assert!(vectors_match(&a, &b, 0.02));
}
#[test]
fn test_harness_config_default() {
let config = HarnessConfig::default();
assert_eq!(config.hidden_dim, 256);
assert_eq!(config.intermediate_dim, 512);
assert_eq!(config.num_layers, 2);
assert_eq!(config.num_heads, 4);
assert_eq!(config.num_kv_heads, 2);
}
#[test]
fn test_harness_config_tiny() {
let config = HarnessConfig::tiny();
assert_eq!(config.hidden_dim, 64);
assert_eq!(config.num_layers, 1);
assert!(config.num_heads >= config.num_kv_heads);
}
#[test]
fn test_harness_config_qwen_like() {
let config = HarnessConfig::qwen_like();
assert!(config.num_heads > config.num_kv_heads);
assert_eq!(config.num_heads % config.num_kv_heads, 0);
}
#[test]
fn test_setup_executor_harness_tiny() {
use crate::cuda::executor::CudaExecutor;
let Some(mut exec) = CudaExecutor::new(0).ok() else {
return;
};
let config = HarnessConfig::tiny();
let result = setup_executor_harness(&mut exec, &config);
if result.is_ok() {
assert!(exec.has_workspace());
assert!(exec.has_indexed_weights());
}
}
#[test]
fn test_setup_executor_harness_default() {
use crate::cuda::executor::CudaExecutor;
let Some(mut exec) = CudaExecutor::new(0).ok() else {
return;
};
let config = HarnessConfig::default();
let result = setup_executor_harness(&mut exec, &config);
if result.is_ok() {
assert!(exec.has_workspace());
assert!(exec.has_indexed_weights());
for i in 0..config.num_layers {
let attn_name = format!("blk.{}.attn_norm.gamma", i);
assert!(exec.rmsnorm_cache.contains_key(&attn_name));
}
}
}
#[test]
fn test_harness_forward_all_layers() {
use crate::cuda::executor::CudaExecutor;
let Some(mut exec) = CudaExecutor::new(0).ok() else {
return;
};
let config = HarnessConfig::default();
if setup_executor_harness(&mut exec, &config).is_err() {
return;
}
let input = vec![0.1f32; config.hidden_dim];
let mut output = vec![0.0f32; config.hidden_dim];
let result = exec.forward_all_layers_gpu(
&input,
&mut output,
0, config.num_layers,
config.hidden_dim as u32,
config.intermediate_dim as u32,
1e-5,
);
let _ = result;
}
#[test]
fn test_harness_forward_to_logits() {
use crate::cuda::executor::CudaExecutor;
let Some(mut exec) = CudaExecutor::new(0).ok() else {
return;
};
let config = HarnessConfig::default();
if setup_executor_harness(&mut exec, &config).is_err() {
return;
}
let input = vec![0.1f32; config.hidden_dim];
let mut logits = vec![0.0f32; config.vocab_size];
let result = exec.forward_all_layers_gpu_to_logits(
&input,
&mut logits,
0, config.num_layers,
config.hidden_dim as u32,
config.intermediate_dim as u32,
config.vocab_size as u32,
1e-5,
);
let _ = result;
}
}