#[test]
#[serial]
fn test_cov029_workspace_batch_size() {
if !CudaExecutor::is_available() {
return;
}
let mut executor = CudaExecutor::new(0).expect("CUDA executor");
assert_eq!(
executor.workspace_batch_size(),
0,
"Initial batch size should be 0"
);
executor.init_workspace(512, 256).expect("init workspace");
assert_eq!(
executor.workspace_batch_size(),
1,
"Batch size should be 1 after init"
);
}
#[test]
#[serial]
fn test_cov029_workspace_batch_size_batched() {
if !CudaExecutor::is_available() {
return;
}
let mut executor = CudaExecutor::new(0).expect("CUDA executor");
executor.init_workspace(512, 256).expect("init workspace");
executor
.init_batched_workspace(512, 256, 4)
.expect("init batched");
assert_eq!(executor.workspace_batch_size(), 4, "Batch size should be 4");
}
#[test]
#[serial]
fn test_cov029_profiler_mut() {
if !CudaExecutor::is_available() {
return;
}
let mut executor = CudaExecutor::new(0).expect("CUDA executor");
let profiler = executor.profiler_mut();
assert!(
std::ptr::from_mut(profiler) as usize != 0,
"profiler should be valid"
);
}
#[test]
#[serial]
fn test_cov029_execution_graph_ascii() {
if !CudaExecutor::is_available() {
return;
}
let executor = CudaExecutor::new(0).expect("CUDA executor");
let ascii = executor.execution_graph_ascii();
let _ = ascii.len();
}
#[test]
#[serial]
fn test_cov029_tile_stats() {
if !CudaExecutor::is_available() {
return;
}
let executor = CudaExecutor::new(0).expect("CUDA executor");
let _macro_stats = executor.tile_stats(trueno::TileLevel::Macro);
let _midi_stats = executor.tile_stats(trueno::TileLevel::Midi);
}
#[test]
#[serial]
fn test_cov030_workspace_output_before_init() {
if !CudaExecutor::is_available() {
return;
}
let executor = CudaExecutor::new(0).expect("CUDA executor");
assert!(
executor.workspace_output().is_none(),
"workspace_output should be None before init"
);
}
#[test]
#[serial]
fn test_cov030_workspace_output_after_init() {
if !CudaExecutor::is_available() {
return;
}
let mut executor = CudaExecutor::new(0).expect("CUDA executor");
executor.init_workspace(512, 256).expect("init workspace");
assert!(
executor.workspace_output().is_some(),
"workspace_output should be Some after init"
);
}
#[test]
#[serial]
fn test_cov030_has_rmsnorm_weights() {
if !CudaExecutor::is_available() {
return;
}
let mut executor = CudaExecutor::new(0).expect("CUDA executor");
assert!(
!executor.has_rmsnorm_weights(0),
"Should not have weights for layer 0 initially"
);
assert!(
!executor.has_rmsnorm_weights(1),
"Should not have weights for layer 1 initially"
);
let attn_norm_0 = vec![1.0f32; 512];
let attn_norm_1 = vec![1.0f32; 512];
let ffn_norm_0 = vec![1.0f32; 512];
let ffn_norm_1 = vec![1.0f32; 512];
let attn_norms: &[&[f32]] = &[&attn_norm_0, &attn_norm_1];
let ffn_norms: &[&[f32]] = &[&ffn_norm_0, &ffn_norm_1];
executor
.preload_rmsnorm_weights(2, attn_norms, ffn_norms)
.expect("preload");
assert!(
executor.has_rmsnorm_weights(0),
"Should have weights for layer 0 after preload"
);
assert!(
executor.has_rmsnorm_weights(1),
"Should have weights for layer 1 after preload"
);
assert!(
!executor.has_rmsnorm_weights(2),
"Should not have weights for layer 2 (not preloaded)"
);
}
#[test]
#[serial]
fn test_cov030_has_output_norm() {
if !CudaExecutor::is_available() {
return;
}
let mut executor = CudaExecutor::new(0).expect("CUDA executor");
assert!(
!executor.has_output_norm(),
"Should not have output norm initially"
);
let gamma = vec![1.0f32; 512];
executor
.preload_output_norm(&gamma)
.expect("preload output norm");
assert!(
executor.has_output_norm(),
"Should have output norm after preload"
);
}
#[test]
#[serial]
fn test_cov030_has_qkv_bias() {
if !CudaExecutor::is_available() {
return;
}
let mut executor = CudaExecutor::new(0).expect("CUDA executor");
assert!(
!executor.has_qkv_bias(0),
"Should not have QKV bias for layer 0 initially"
);
let q_bias_0 = vec![0.0f32; 512];
let k_bias_0 = vec![0.0f32; 64];
let v_bias_0 = vec![0.0f32; 64];
let q_biases: &[Option<&[f32]>] = &[Some(&q_bias_0)];
let k_biases: &[Option<&[f32]>] = &[Some(&k_bias_0)];
let v_biases: &[Option<&[f32]>] = &[Some(&v_bias_0)];
executor
.preload_qkv_bias(1, q_biases, k_biases, v_biases)
.expect("preload");
assert!(
executor.has_qkv_bias(0),
"Should have QKV bias for layer 0 after preload"
);
assert!(
!executor.has_qkv_bias(1),
"Should not have QKV bias for layer 1 (not preloaded)"
);
}
#[test]
#[serial]
fn test_cov030_has_lm_head_bias() {
if !CudaExecutor::is_available() {
return;
}
let mut executor = CudaExecutor::new(0).expect("CUDA executor");
assert!(
!executor.has_lm_head_bias(),
"Should not have LM head bias initially"
);
let bias = vec![0.0f32; 32000];
executor
.preload_lm_head_bias(Some(&bias))
.expect("preload lm head bias");
assert!(
executor.has_lm_head_bias(),
"Should have LM head bias after preload"
);
}
#[test]
#[serial]
fn test_cov030_output_rmsnorm_gpu() {
if !CudaExecutor::is_available() {
return;
}
let mut executor = CudaExecutor::new(0).expect("CUDA executor");
let hidden_dim = 512u32;
let epsilon = 1e-5f32;
let input = vec![1.0f32; hidden_dim as usize];
let mut output = vec![0.0f32; hidden_dim as usize];
let gamma = vec![1.0f32; hidden_dim as usize];
let result = executor.output_rmsnorm_gpu(&input, &mut output, &gamma, hidden_dim, epsilon);
assert!(
result.is_ok(),
"output_rmsnorm_gpu should succeed: {:?}",
result.err()
);
let sum: f32 = output.iter().sum();
assert!(sum.abs() > 0.0, "Output should be non-zero after RMSNorm");
}
#[test]
#[serial]
fn test_cov030_gpu_argmax_basic() {
if !CudaExecutor::is_available() {
return;
}
let mut executor = CudaExecutor::new(0).expect("CUDA executor");
let vocab_size = 1024u32;
let mut logits = vec![0.0f32; vocab_size as usize];
logits[42] = 100.0;
let logits_gpu = GpuBuffer::from_host(executor.context(), &logits).expect("upload logits");
let logits_ptr = logits_gpu.as_ptr();
let result = executor.gpu_argmax(logits_ptr, vocab_size);
assert!(
result.is_ok(),
"gpu_argmax should succeed: {:?}",
result.err()
);
assert_eq!(result.unwrap(), 42, "gpu_argmax should return index 42");
}
#[test]
#[serial]
fn test_cov030_gpu_argmax_large_vocab() {
if !CudaExecutor::is_available() {
return;
}
let mut executor = CudaExecutor::new(0).expect("CUDA executor");
let vocab_size = 32000u32;
let mut logits = vec![-10.0f32; vocab_size as usize];
logits[31999] = 50.0;
let logits_gpu = GpuBuffer::from_host(executor.context(), &logits).expect("upload logits");
let logits_ptr = logits_gpu.as_ptr();
let result = executor.gpu_argmax(logits_ptr, vocab_size);
assert!(result.is_ok(), "gpu_argmax with large vocab should succeed");
assert_eq!(result.unwrap(), 31999, "Should find max at last index");
}
#[test]
#[serial]
fn test_cov030_read_hidden_state_error_before_init() {
if !CudaExecutor::is_available() {
return;
}
let mut executor = CudaExecutor::new(0).expect("CUDA executor");
let result = executor.read_hidden_state_to_cpu();
assert!(
result.is_err(),
"read_hidden_state_to_cpu should error before workspace init"
);
}
#[test]
#[serial]
fn test_cov030_transformer_layer_batched_validation() {
if !CudaExecutor::is_available() {
return;
}
let mut executor = CudaExecutor::new(0).expect("CUDA executor");
let input_data = vec![0.1f32; 512 * 4];
let input = GpuBuffer::from_host(executor.context(), &input_data).expect("input");
let layer_weights = test_zeroed_layer_weights();
let result = executor.transformer_layer_batched(
&input,
0,
&layer_weights,
4,
&[0, 1, 2, 3],
512,
256,
1e-5,
);
assert!(
result.is_err(),
"transformer_layer_batched should error without workspace"
);
}
#[test]
#[serial]
fn test_cov030_transformer_layer_batched_mismatch() {
if !CudaExecutor::is_available() {
return;
}
let mut executor = CudaExecutor::new(0).expect("CUDA executor");
executor.init_workspace(512, 256).expect("init workspace");
executor
.init_batched_workspace(512, 256, 2)
.expect("init batched");
let input_data = vec![0.1f32; 512 * 4];
let input = GpuBuffer::from_host(executor.context(), &input_data).expect("input");
let layer_weights = test_zeroed_layer_weights();
let result = executor.transformer_layer_batched(
&input,
0,
&layer_weights,
4, &[0, 1, 2, 3],
512,
256,
1e-5,
);
assert!(result.is_err(), "Should error on batch size mismatch");
}