#[test]
#[serial]
fn test_cov028_fused_gate_up_into_basic() {
if !CudaExecutor::is_available() {
return;
}
let mut executor = CudaExecutor::new(0).expect("CUDA executor");
let hidden_size = 64u32;
let intermediate_size = 128u32;
let x_data = vec![0.1f32; hidden_size as usize];
let x = GpuBuffer::from_host(executor.context(), &x_data).expect("x");
let w_gate_data = vec![0.01f32; (hidden_size * intermediate_size) as usize];
let w_up_data = vec![0.01f32; (hidden_size * intermediate_size) as usize];
let w_gate = GpuBuffer::from_host(executor.context(), &w_gate_data).expect("w_gate");
let w_up = GpuBuffer::from_host(executor.context(), &w_up_data).expect("w_up");
let output = GpuBuffer::new(executor.context(), intermediate_size as usize).expect("output");
let result =
executor.fused_gate_up_into(&x, &w_gate, &w_up, &output, hidden_size, intermediate_size);
assert!(
result.is_ok(),
"fused_gate_up_into should succeed: {:?}",
result.err()
);
}
#[test]
#[serial]
fn test_cov028_rope_into_basic() {
if !CudaExecutor::is_available() {
return;
}
let mut executor = CudaExecutor::new(0).expect("CUDA executor");
let num_heads = 4u32;
let head_dim = 32u32;
let position = 5u32;
let theta = 10000.0f32;
let input_size = (num_heads * head_dim) as usize;
let input_data = vec![0.5f32; input_size];
let input = GpuBuffer::from_host(executor.context(), &input_data).expect("input");
let output = GpuBuffer::new(executor.context(), input_size).expect("output");
let result = executor.rope_into(&input, &output, position, num_heads, head_dim, theta);
assert!(
result.is_ok(),
"rope_into should succeed: {:?}",
result.err()
);
}
#[test]
#[serial]
fn test_cov028_rope_into_varying_positions() {
if !CudaExecutor::is_available() {
return;
}
let mut executor = CudaExecutor::new(0).expect("CUDA executor");
let num_heads = 8u32;
let head_dim = 64u32;
let theta = 10000.0f32;
let input_size = (num_heads * head_dim) as usize;
let input_data = vec![1.0f32; input_size];
let input = GpuBuffer::from_host(executor.context(), &input_data).expect("input");
let output = GpuBuffer::new(executor.context(), input_size).expect("output");
for position in [0, 1, 10, 100, 1000] {
let result = executor.rope_into(&input, &output, position, num_heads, head_dim, theta);
assert!(
result.is_ok(),
"rope_into at position {} should succeed: {:?}",
position,
result.err()
);
}
}
#[test]
#[serial]
fn test_cov028_batched_q4k_gemv_into_basic() {
if !CudaExecutor::is_available() {
return;
}
let mut executor = CudaExecutor::new(0).expect("CUDA executor");
let m = 4u32; let n = 32u32;
let k = 256u32;
let weight_bytes = (n as usize) * 144; let weights = vec![0u8; weight_bytes];
executor
.load_quantized_weights("test_batched_q4k", &weights)
.expect("load weights");
let weight_ptr = executor
.get_quantized_weight_ptr("test_batched_q4k")
.expect("get ptr");
let input_data = vec![0.1f32; (m * k) as usize];
let input = GpuBuffer::from_host(executor.context(), &input_data).expect("input");
let output = GpuBuffer::new(executor.context(), (m * n) as usize).expect("output");
let result = executor.batched_q4k_gemv_into(weight_ptr, &input, &output, m, n, k);
assert!(
result.is_ok(),
"batched_q4k_gemv_into should succeed: {:?}",
result.err()
);
}
#[test]
#[serial]
fn test_cov028_batched_q4k_gemv_into_m16() {
if !CudaExecutor::is_available() {
return;
}
let mut executor = CudaExecutor::new(0).expect("CUDA executor");
let m = 16u32; let n = 32u32;
let k = 256u32;
let weight_bytes = (n as usize) * 144;
let weights = vec![0u8; weight_bytes];
executor
.load_quantized_weights("test_batched_q4k_m16", &weights)
.expect("load");
let weight_ptr = executor
.get_quantized_weight_ptr("test_batched_q4k_m16")
.expect("get ptr");
let input_data = vec![0.1f32; (m * k) as usize];
let input = GpuBuffer::from_host(executor.context(), &input_data).expect("input");
let output = GpuBuffer::new(executor.context(), (m * n) as usize).expect("output");
let result = executor.batched_q4k_gemv_into(weight_ptr, &input, &output, m, n, k);
assert!(
result.is_ok(),
"batched_q4k_gemv_into M=16 should succeed: {:?}",
result.err()
);
}
#[test]
#[serial]
fn test_cov028_batched_q6k_gemv_into_basic() {
if !CudaExecutor::is_available() {
return;
}
let mut executor = CudaExecutor::new(0).expect("CUDA executor");
let m = 4u32;
let n = 32u32;
let k = 256u32;
let weight_bytes = (n as usize) * 210;
let weights = vec![0u8; weight_bytes];
executor
.load_quantized_weights_with_type("test_batched_q6k", &weights, 14)
.expect("load");
let weight_ptr = executor
.get_quantized_weight_ptr("test_batched_q6k")
.expect("get ptr");
let input_data = vec![0.1f32; (m * k) as usize];
let input = GpuBuffer::from_host(executor.context(), &input_data).expect("input");
let output = GpuBuffer::new(executor.context(), (m * n) as usize).expect("output");
let result = executor.batched_q6k_gemv_into(weight_ptr, &input, &output, m, n, k);
assert!(
result.is_ok(),
"batched_q6k_gemv_into should succeed: {:?}",
result.err()
);
}
#[test]
#[serial]
fn test_cov028_layer_norm_gpu_basic() {
if !CudaExecutor::is_available() {
return;
}
let mut executor = CudaExecutor::new(0).expect("CUDA executor");
let hidden_size = 64u32;
let batch_size = 1u32;
let epsilon = 1e-5f32;
let input_data = vec![0.5f32; hidden_size as usize];
let gamma_data = vec![1.0f32; hidden_size as usize];
let beta_data = vec![0.0f32; hidden_size as usize];
let input = GpuBuffer::from_host(executor.context(), &input_data).expect("input");
let output = GpuBuffer::new(executor.context(), hidden_size as usize).expect("output");
let gamma = GpuBuffer::from_host(executor.context(), &gamma_data).expect("gamma");
let beta = GpuBuffer::from_host(executor.context(), &beta_data).expect("beta");
let result = executor.layer_norm_gpu(
&input,
&output,
&gamma,
&beta,
hidden_size,
batch_size,
epsilon,
);
assert!(
result.is_ok(),
"layer_norm_gpu should succeed: {:?}",
result.err()
);
}
#[test]
#[serial]
fn test_cov028_layer_norm_gpu_batched() {
if !CudaExecutor::is_available() {
return;
}
let mut executor = CudaExecutor::new(0).expect("CUDA executor");
let hidden_size = 128u32;
let batch_size = 4u32;
let epsilon = 1e-6f32;
let input_data = vec![0.5f32; (hidden_size * batch_size) as usize];
let gamma_data = vec![1.0f32; hidden_size as usize];
let beta_data = vec![0.1f32; hidden_size as usize];
let input = GpuBuffer::from_host(executor.context(), &input_data).expect("input");
let output =
GpuBuffer::new(executor.context(), (hidden_size * batch_size) as usize).expect("output");
let gamma = GpuBuffer::from_host(executor.context(), &gamma_data).expect("gamma");
let beta = GpuBuffer::from_host(executor.context(), &beta_data).expect("beta");
let result = executor.layer_norm_gpu(
&input,
&output,
&gamma,
&beta,
hidden_size,
batch_size,
epsilon,
);
assert!(
result.is_ok(),
"layer_norm_gpu batched should succeed: {:?}",
result.err()
);
}
#[test]
#[serial]
fn test_cov028_compute_stream() {
if !CudaExecutor::is_available() {
return;
}
let executor = CudaExecutor::new(0).expect("CUDA executor");
let stream = executor.compute_stream();
assert!(
std::ptr::from_ref(stream) as usize != 0,
"stream should be valid"
);
}
#[test]
#[serial]
fn test_cov029_load_weights_basic() {
if !CudaExecutor::is_available() {
return;
}
let mut executor = CudaExecutor::new(0).expect("CUDA executor");
let weights = vec![0.1f32; 256];
let result = executor.load_weights("test_weight", &weights);
assert!(
result.is_ok(),
"load_weights should succeed: {:?}",
result.err()
);
let bytes = result.unwrap();
assert_eq!(bytes, 256 * 4, "Should load 256 f32 values (1024 bytes)");
}
#[test]
#[serial]
fn test_cov029_load_weights_and_has() {
if !CudaExecutor::is_available() {
return;
}
let mut executor = CudaExecutor::new(0).expect("CUDA executor");
assert!(
!executor.has_weights("my_weight"),
"Should not have weight initially"
);
let weights = vec![1.0f32; 128];
executor.load_weights("my_weight", &weights).expect("load");
assert!(
executor.has_weights("my_weight"),
"Should have weight after load"
);
assert!(
!executor.has_weights("other_weight"),
"Should not have unloaded weight"
);
}
#[test]
#[serial]
fn test_cov029_cached_weight_count() {
if !CudaExecutor::is_available() {
return;
}
let mut executor = CudaExecutor::new(0).expect("CUDA executor");
assert_eq!(
executor.cached_weight_count(),
0,
"Initial count should be 0"
);
executor.load_weights("w1", &[1.0f32; 64]).expect("load w1");
assert_eq!(executor.cached_weight_count(), 1, "Count should be 1");
executor.load_weights("w2", &[1.0f32; 64]).expect("load w2");
assert_eq!(executor.cached_weight_count(), 2, "Count should be 2");
}
#[test]
#[serial]
fn test_cov029_cached_weight_bytes() {
if !CudaExecutor::is_available() {
return;
}
let mut executor = CudaExecutor::new(0).expect("CUDA executor");
assert_eq!(
executor.cached_weight_bytes(),
0,
"Initial bytes should be 0"
);
executor
.load_weights("w1", &[1.0f32; 100])
.expect("load w1");
assert_eq!(
executor.cached_weight_bytes(),
400,
"Should be 400 bytes (100 * 4)"
);
executor.load_weights("w2", &[1.0f32; 50]).expect("load w2");
assert_eq!(
executor.cached_weight_bytes(),
600,
"Should be 600 bytes total"
);
}
#[test]
#[serial]
fn test_cov029_has_indexed_weights() {
if !CudaExecutor::is_available() {
return;
}
let executor = CudaExecutor::new(0).expect("CUDA executor");
assert!(
!executor.has_indexed_weights(),
"Should not have indexed weights initially"
);
}
#[test]
#[serial]
fn test_cov029_return_staging_buffer() {
if !CudaExecutor::is_available() {
return;
}
let mut executor = CudaExecutor::new(0).expect("CUDA executor");
let buf = executor.get_staging_buffer(256);
executor.return_staging_buffer(buf);
let stats = executor.staging_pool_stats();
assert!(
stats.free_buffers >= 1,
"Pool should have at least 1 buffer after return"
);
}