#[test]
fn test_batched_workspace_hidden_buffer() {
use crate::cuda::executor::test_fixtures::{setup_executor_harness, HarnessConfig};
let Some(mut exec) = create_executor() else {
return;
};
let config = HarnessConfig::default();
if setup_executor_harness(&mut exec, &config).is_err() {
return;
}
let result = exec.init_batched_workspace(config.hidden_dim, config.intermediate_dim, 4);
assert!(result.is_ok());
assert!(exec.workspace.hidden_buf1.is_some());
}
#[test]
fn test_rmsnorm_gpu_ptr_batched() {
use crate::cuda::executor::test_fixtures::{setup_executor_harness, HarnessConfig};
let Some(mut exec) = create_executor() else {
return;
};
let config = HarnessConfig::default();
if setup_executor_harness(&mut exec, &config).is_err() {
return;
}
let layer_weights = &exec.indexed_layer_weights[0];
let input = GpuBuffer::from_host(&exec.context, &vec![1.0f32; config.hidden_dim]).expect("input");
let result = exec.rmsnorm_gpu_ptr(
&input,
layer_weights.attn_norm_ptr,
layer_weights.attn_norm_len,
config.hidden_dim as u32,
1e-5,
);
let _ = result;
}
#[test]
fn test_forward_batched_different_positions() {
use crate::cuda::executor::test_fixtures::{setup_executor_harness, HarnessConfig};
let Some(mut exec) = create_executor() else {
return;
};
let config = HarnessConfig::default();
if setup_executor_harness(&mut exec, &config).is_err() {
return;
}
let _ = exec.init_batched_workspace(config.hidden_dim, config.intermediate_dim, 4);
let positions: Vec<u32> = vec![0, 5, 10, 15];
let inputs: Vec<f32> = vec![0.1; 4 * config.hidden_dim];
let result = exec.forward_batched_to_token_ids(
&inputs,
&positions,
config.num_layers,
config.hidden_dim as u32,
config.intermediate_dim as u32,
config.vocab_size as u32,
1e-5,
);
let _ = result;
}
#[test]
fn test_transformer_layer_batched_different_layers() {
use crate::cuda::executor::test_fixtures::{setup_executor_harness, HarnessConfig};
let Some(mut exec) = create_executor() else {
return;
};
let mut config = HarnessConfig::default();
config.num_layers = 4;
if setup_executor_harness(&mut exec, &config).is_err() {
return;
}
let _ = exec.init_batched_workspace(config.hidden_dim, config.intermediate_dim, 4);
let inputs: Vec<f32> = vec![0.1; 4 * config.hidden_dim];
let input_buf = GpuBuffer::from_host(&exec.context, &inputs).expect("input_buf");
let positions: [u32; 4] = [0, 1, 2, 3];
for layer_idx in 0..config.num_layers {
if layer_idx >= exec.indexed_layer_weights.len() {
break;
}
let layer_weights = exec.indexed_layer_weights[layer_idx].clone();
let result = exec.transformer_layer_batched(
&input_buf,
layer_idx,
&layer_weights,
4,
&positions,
config.hidden_dim as u32,
config.intermediate_dim as u32,
1e-5,
);
let _ = result;
}
}
#[test]
fn test_batched_forward_m1() {
use crate::cuda::executor::test_fixtures::{setup_executor_harness, HarnessConfig};
let Some(mut exec) = create_executor() else {
return;
};
let config = HarnessConfig::default();
if setup_executor_harness(&mut exec, &config).is_err() {
return;
}
let _ = exec.init_batched_workspace(config.hidden_dim, config.intermediate_dim, 1);
let positions: Vec<u32> = vec![0];
let inputs: Vec<f32> = vec![0.1; config.hidden_dim];
let result = exec.forward_batched_to_token_ids(
&inputs,
&positions,
config.num_layers,
config.hidden_dim as u32,
config.intermediate_dim as u32,
config.vocab_size as u32,
1e-5,
);
let _ = result;
}
#[test]
fn test_batched_kv_cache_multiple_batch_sizes() {
use crate::cuda::executor::test_fixtures::{setup_executor_harness, HarnessConfig};
let Some(mut exec) = create_executor() else {
return;
};
let config = HarnessConfig::default();
if setup_executor_harness(&mut exec, &config).is_err() {
return;
}
for batch_size in [1, 4, 8, 16] {
let result = exec.init_batched_kv_cache_gpu(config.num_layers, batch_size);
let _ = result;
}
}
#[test]
fn test_transformer_layer_batched_qtype_q6k() {
use crate::cuda::executor::test_fixtures::{setup_executor_harness, HarnessConfig};
let Some(mut exec) = create_executor() else {
return;
};
let config = HarnessConfig::default();
if setup_executor_harness(&mut exec, &config).is_err() {
return;
}
let _ = exec.init_batched_workspace(config.hidden_dim, config.intermediate_dim, 4);
let inputs: Vec<f32> = vec![0.1; 4 * config.hidden_dim];
let input_buf = GpuBuffer::from_host(&exec.context, &inputs).expect("input_buf");
if exec.indexed_layer_weights.is_empty() {
return;
}
let mut layer_weights = exec.indexed_layer_weights[0].clone();
layer_weights.inner_mut().attn_v_qtype = WeightQuantType::Q6K;
let positions: [u32; 4] = [0, 1, 2, 3];
let result = exec.transformer_layer_batched(
&input_buf,
0,
&layer_weights,
4,
&positions,
config.hidden_dim as u32,
config.intermediate_dim as u32,
1e-5,
);
let _ = result;
}
#[test]
fn test_batched_graph_token_count() {
use crate::cuda::executor::test_fixtures::{setup_executor_harness, HarnessConfig};
let Some(mut exec) = create_executor() else {
return;
};
let config = HarnessConfig::default();
if setup_executor_harness(&mut exec, &config).is_err() {
return;
}
let initial_count = exec.decode_token_count;
let _ = exec.init_batched_workspace(config.hidden_dim, config.intermediate_dim, 4);
let positions: Vec<u32> = vec![0, 1, 2, 3];
let inputs: Vec<f32> = vec![0.1; 4 * config.hidden_dim];
let _ = exec.forward_batched_to_token_ids_graphed(
&inputs,
&positions,
config.num_layers,
config.hidden_dim as u32,
config.intermediate_dim as u32,
config.vocab_size as u32,
1e-5,
);
assert!(exec.decode_token_count >= initial_count);
}
#[test]
fn test_batched_forward_varying_input_values() {
use crate::cuda::executor::test_fixtures::{setup_executor_harness, HarnessConfig};
let Some(mut exec) = create_executor() else {
return;
};
let config = HarnessConfig::default();
if setup_executor_harness(&mut exec, &config).is_err() {
return;
}
let _ = exec.init_batched_workspace(config.hidden_dim, config.intermediate_dim, 4);
let positions: Vec<u32> = vec![0, 1, 2, 3];
let inputs: Vec<f32> = (0..4 * config.hidden_dim)
.map(|i| (i as f32 / 1000.0).sin())
.collect();
let result = exec.forward_batched_to_token_ids(
&inputs,
&positions,
config.num_layers,
config.hidden_dim as u32,
config.intermediate_dim as u32,
config.vocab_size as u32,
1e-5,
);
let _ = result;
}