#[cfg(test)]
#[cfg(feature = "cuda")]
mod tests {
use super::*;
use crate::cuda::executor::test_fixtures::generate_q4_0_weights;
fn create_executor() -> Option<CudaExecutor> {
CudaExecutor::new(0).ok()
}
fn setup_ffn_weights(
exec: &mut CudaExecutor,
hidden_dim: u32,
intermediate_dim: u32,
) -> Result<(), GpuError> {
let gate_blocks = (intermediate_dim as usize) * (hidden_dim as usize / 256);
let gate_weights = vec![0u8; gate_blocks * 144];
exec.load_quantized_weights("ffn_gate", &gate_weights)?;
let up_weights = vec![0u8; gate_blocks * 144];
exec.load_quantized_weights("ffn_up", &up_weights)?;
let down_blocks = (hidden_dim as usize) * (intermediate_dim as usize / 256);
let down_weights = vec![0u8; down_blocks * 144];
exec.load_quantized_weights("ffn_down", &down_weights)?;
Ok(())
}
#[test]
fn test_fused_ffn_swiglu_gpu_path_selection() {
let Some(mut exec) = create_executor() else {
return;
};
let hidden_dim = 512u32; let intermediate_dim = 1024u32;
if setup_ffn_weights(&mut exec, hidden_dim, intermediate_dim).is_err() {
return;
}
let input: Vec<f32> = (0..hidden_dim as usize)
.map(|i| (i as f32) * 0.001)
.collect();
let input_buf = GpuBuffer::from_host(&exec.context, &input).expect("input_buf");
let result = fused_ffn_swiglu_gpu(
&mut exec,
&input_buf,
"ffn_gate",
"ffn_up",
"ffn_down",
hidden_dim,
intermediate_dim,
);
let _ = result;
}
#[test]
fn test_fused_ffn_swiglu_gpu_unaligned() {
let Some(mut exec) = create_executor() else {
return;
};
let hidden_dim = 256u32; let intermediate_dim = 768u32;
let gate_blocks = (intermediate_dim as usize) * (hidden_dim as usize / 32);
let gate_weights = generate_q4_0_weights(gate_blocks);
let _ = exec.load_quantized_weights("ffn_gate_unaligned", &gate_weights);
let up_weights = generate_q4_0_weights(gate_blocks);
let _ = exec.load_quantized_weights("ffn_up_unaligned", &up_weights);
let down_blocks = (hidden_dim as usize) * (intermediate_dim as usize / 32);
let down_weights = generate_q4_0_weights(down_blocks);
let _ = exec.load_quantized_weights("ffn_down_unaligned", &down_weights);
let input: Vec<f32> = vec![0.1f32; hidden_dim as usize];
let input_buf = GpuBuffer::from_host(&exec.context, &input).expect("input_buf");
let result = fused_ffn_swiglu_gpu(
&mut exec,
&input_buf,
"ffn_gate_unaligned",
"ffn_up_unaligned",
"ffn_down_unaligned",
hidden_dim,
intermediate_dim,
);
let _ = result;
}
#[test]
fn test_fused_ffn_chunk_threshold() {
let Some(mut exec) = create_executor() else {
return;
};
let hidden_dim = 256u32; let intermediate_dim = 512u32;
if setup_ffn_weights(&mut exec, hidden_dim, intermediate_dim).is_err() {
return;
}
let input: Vec<f32> = vec![0.1f32; hidden_dim as usize];
let input_buf = GpuBuffer::from_host(&exec.context, &input).expect("input_buf");
let result = fused_ffn_swiglu_gpu(
&mut exec,
&input_buf,
"ffn_gate",
"ffn_up",
"ffn_down",
hidden_dim,
intermediate_dim,
);
let _ = result;
}
#[test]
fn test_fused_ffn_swiglu_gpu_true_dp4a() {
let Some(mut exec) = create_executor() else {
return;
};
let hidden_dim = 256u32;
let intermediate_dim = 512u32;
if setup_ffn_weights(&mut exec, hidden_dim, intermediate_dim).is_err() {
return;
}
let input: Vec<f32> = (0..hidden_dim as usize)
.map(|i| (i as f32) * 0.001)
.collect();
let input_buf = GpuBuffer::from_host(&exec.context, &input).expect("input_buf");
let result = fused_ffn_swiglu_gpu_true_dp4a(
&mut exec,
&input_buf,
"ffn_gate",
"ffn_up",
"ffn_down",
hidden_dim,
intermediate_dim,
);
let _ = result;
}
#[test]
fn test_ffn_with_harness() {
use crate::cuda::executor::test_fixtures::{setup_executor_harness, HarnessConfig};
let Some(mut exec) = create_executor() else {
return;
};
let config = HarnessConfig::default();
if setup_executor_harness(&mut exec, &config).is_err() {
return;
}
let input: Vec<f32> = vec![0.1f32; config.hidden_dim];
let input_buf = GpuBuffer::from_host(&exec.context, &input).expect("input_buf");
let result = fused_ffn_swiglu_gpu(
&mut exec,
&input_buf,
"blk.0.ffn_gate",
"blk.0.ffn_up",
"blk.0.ffn_down",
config.hidden_dim as u32,
config.intermediate_dim as u32,
);
let _ = result;
}
#[test]
fn test_ffn_different_layers() {
use crate::cuda::executor::test_fixtures::{setup_executor_harness, HarnessConfig};
let Some(mut exec) = create_executor() else {
return;
};
let mut config = HarnessConfig::default();
config.num_layers = 4;
if setup_executor_harness(&mut exec, &config).is_err() {
return;
}
for layer_idx in 0..config.num_layers {
let input: Vec<f32> = vec![0.1f32; config.hidden_dim];
let input_buf = GpuBuffer::from_host(&exec.context, &input).expect("input_buf");
let result = fused_ffn_swiglu_gpu(
&mut exec,
&input_buf,
&format!("blk.{}.ffn_gate", layer_idx),
&format!("blk.{}.ffn_up", layer_idx),
&format!("blk.{}.ffn_down", layer_idx),
config.hidden_dim as u32,
config.intermediate_dim as u32,
);
let _ = result;
}
}
#[test]
fn test_ffn_varying_inputs() {
use crate::cuda::executor::test_fixtures::{setup_executor_harness, HarnessConfig};
let Some(mut exec) = create_executor() else {
return;
};
let config = HarnessConfig::default();
if setup_executor_harness(&mut exec, &config).is_err() {
return;
}
let inputs = [
vec![0.0f32; config.hidden_dim],
vec![1.0f32; config.hidden_dim],
(0..config.hidden_dim)
.map(|i| (i as f32 / 1000.0).sin())
.collect::<Vec<_>>(),
];
for input in inputs {
let input_buf = GpuBuffer::from_host(&exec.context, &input).expect("input_buf");
let result = fused_ffn_swiglu_gpu(
&mut exec,
&input_buf,
"blk.0.ffn_gate",
"blk.0.ffn_up",
"blk.0.ffn_down",
config.hidden_dim as u32,
config.intermediate_dim as u32,
);
let _ = result;
}
}
#[test]
fn test_ffn_true_dp4a_with_harness() {
use crate::cuda::executor::test_fixtures::{setup_executor_harness, HarnessConfig};
let Some(mut exec) = create_executor() else {
return;
};
let config = HarnessConfig::default();
if setup_executor_harness(&mut exec, &config).is_err() {
return;
}
let input: Vec<f32> = (0..config.hidden_dim).map(|i| (i as f32) * 0.001).collect();
let input_buf = GpuBuffer::from_host(&exec.context, &input).expect("input_buf");
let result = fused_ffn_swiglu_gpu_true_dp4a(
&mut exec,
&input_buf,
"blk.0.ffn_gate",
"blk.0.ffn_up",
"blk.0.ffn_down",
config.hidden_dim as u32,
config.intermediate_dim as u32,
);
let _ = result;
}
#[test]
fn test_ffn_larger_intermediate_dim() {
use crate::cuda::executor::test_fixtures::{setup_executor_harness, HarnessConfig};
let Some(mut exec) = create_executor() else {
return;
};
let mut config = HarnessConfig::default();
config.intermediate_dim = 2048; if setup_executor_harness(&mut exec, &config).is_err() {
return;
}
let input: Vec<f32> = vec![0.1f32; config.hidden_dim];
let input_buf = GpuBuffer::from_host(&exec.context, &input).expect("input_buf");
let result = fused_ffn_swiglu_gpu(
&mut exec,
&input_buf,
"blk.0.ffn_gate",
"blk.0.ffn_up",
"blk.0.ffn_down",
config.hidden_dim as u32,
config.intermediate_dim as u32,
);
let _ = result;
}
#[test]
fn test_ffn_output_dimensions() {
use crate::cuda::executor::test_fixtures::{setup_executor_harness, HarnessConfig};
let Some(mut exec) = create_executor() else {
return;
};
let config = HarnessConfig::default();
if setup_executor_harness(&mut exec, &config).is_err() {
return;
}
let input: Vec<f32> = vec![0.1f32; config.hidden_dim];
let input_buf = GpuBuffer::from_host(&exec.context, &input).expect("input_buf");
let result = fused_ffn_swiglu_gpu(
&mut exec,
&input_buf,
"blk.0.ffn_gate",
"blk.0.ffn_up",
"blk.0.ffn_down",
config.hidden_dim as u32,
config.intermediate_dim as u32,
);
if let Ok(output_buf) = result {
let mut output = vec![0.0f32; config.hidden_dim];
output_buf.copy_to_host(&mut output).expect("copy");
assert_eq!(
output.len(),
config.hidden_dim,
"FFN output should match hidden_dim"
);
}
}
}