fn write_tensor_entry_binary(
name: &str,
dtype: u8,
shape: &[usize],
offset: u64,
size: u64,
) -> Vec<u8> {
let mut buf = Vec::new();
buf.extend_from_slice(&(name.len() as u16).to_le_bytes());
buf.extend_from_slice(name.as_bytes());
buf.push(dtype);
buf.push(shape.len() as u8);
for &dim in shape {
buf.extend_from_slice(&(dim as u64).to_le_bytes());
}
buf.extend_from_slice(&offset.to_le_bytes());
buf.extend_from_slice(&size.to_le_bytes());
buf
}
fn create_minimal_apr_model() -> AprV2Model {
let metadata = r#"{
"model_type": "test",
"name": "test-cuda-model",
"hidden_dim": 32,
"num_layers": 1,
"num_heads": 1,
"vocab_size": 100
}"#;
let tensor_index_binary = write_tensor_entry_binary(
"model.embed_tokens.weight", 0, &[100, 32], 0, 100 * 32 * 4, );
let tensor_data: Vec<f32> = vec![0.1; 100 * 32];
let tensor_bytes: Vec<u8> = tensor_data.iter().flat_map(|f| f.to_le_bytes()).collect();
let metadata_offset = HEADER_SIZE as u64;
let metadata_size = metadata.len() as u32;
let tensor_index_offset =
((metadata_offset as usize + metadata.len()).div_ceil(64) * 64) as u64;
let data_offset =
((tensor_index_offset as usize + tensor_index_binary.len()).div_ceil(64) * 64) as u64;
let total_size = data_offset as usize + tensor_bytes.len();
let mut data = vec![0u8; total_size];
data[0..4].copy_from_slice(&MAGIC);
data[4] = 2; data[5] = 0; data[6..8].copy_from_slice(&0u16.to_le_bytes()); data[8..12].copy_from_slice(&1u32.to_le_bytes()); data[12..20].copy_from_slice(&metadata_offset.to_le_bytes());
data[20..24].copy_from_slice(&metadata_size.to_le_bytes());
data[24..32].copy_from_slice(&tensor_index_offset.to_le_bytes());
data[32..40].copy_from_slice(&data_offset.to_le_bytes());
data[metadata_offset as usize..metadata_offset as usize + metadata.len()]
.copy_from_slice(metadata.as_bytes());
data[tensor_index_offset as usize
..tensor_index_offset as usize + tensor_index_binary.len()]
.copy_from_slice(&tensor_index_binary);
data[data_offset as usize..data_offset as usize + tensor_bytes.len()]
.copy_from_slice(&tensor_bytes);
AprV2Model::from_bytes(data).expect("should create minimal APR model")
}
#[test]
fn test_minimal_model_has_embedding_tensor() {
let model = create_minimal_apr_model();
for tensor in &model.tensors {
println!("Tensor: '{}' shape={:?}", tensor.name, tensor.shape);
}
let embedding = model.get_tensor("model.embed_tokens.weight");
assert!(
embedding.is_some(),
"Should have model.embed_tokens.weight tensor"
);
}
#[test]
fn test_apr_cuda_new_succeeds_with_gpu() {
let model = create_minimal_apr_model();
let result = AprV2ModelCuda::new(model, 0);
match result {
Ok(cuda_model) => {
if !cuda_model.device_name().is_empty() {
println!("GPU: {}", cuda_model.device_name());
}
},
Err(e) => {
println!("CUDA init result: {:?}", e);
},
}
}
#[test]
fn test_apr_cuda_with_max_seq_len() {
let model = create_minimal_apr_model();
let result = AprV2ModelCuda::with_max_seq_len(model, 0, 512);
match result {
Ok(_cuda_model) => {
println!("CUDA model created with max_seq_len=512");
},
Err(e) => {
println!("CUDA init result: {:?}", e);
},
}
}
#[test]
fn test_apr_cuda_invalid_device() {
let model = create_minimal_apr_model();
let result = AprV2ModelCuda::new(model, 999);
assert!(result.is_err(), "Device 999 should not exist");
}
#[test]
fn test_apr_cuda_device_name() {
let model = create_minimal_apr_model();
if let Ok(cuda_model) = AprV2ModelCuda::new(model, 0) {
let name = cuda_model.device_name();
assert!(!name.is_empty(), "Device name should not be empty");
println!("GPU device: {}", name);
}
}
#[test]
fn test_apr_cuda_memory_info() {
let model = create_minimal_apr_model();
if let Ok(cuda_model) = AprV2ModelCuda::new(model, 0) {
let (free, total) = cuda_model.memory_info();
assert!(total > 0, "Total GPU memory should be > 0");
assert!(free <= total, "Free memory should not exceed total");
println!("GPU memory: {}/{} bytes free", free, total);
}
}
#[test]
fn test_apr_cuda_vram_mb() {
let model = create_minimal_apr_model();
if let Ok(cuda_model) = AprV2ModelCuda::new(model, 0) {
let vram = cuda_model.vram_mb();
assert!(vram > 0, "VRAM should be > 0 MB");
println!("GPU VRAM: {} MB", vram);
}
}
#[test]
fn test_apr_cuda_inner_model() {
let model = create_minimal_apr_model();
if let Ok(cuda_model) = AprV2ModelCuda::new(model, 0) {
let inner = cuda_model.inner();
assert!(inner.tensor_count() > 0, "Model should have tensors");
}
}
#[test]
fn test_apr_cuda_is_available() {
let available = AprV2ModelCuda::is_available();
println!("CUDA available: {}", available);
}
#[test]
fn test_apr_cuda_num_devices() {
let count = AprV2ModelCuda::num_devices();
println!("CUDA devices: {}", count);
}
#[test]
fn test_apr_cuda_profiling() {
let model = create_minimal_apr_model();
if let Ok(mut cuda_model) = AprV2ModelCuda::new(model, 0) {
assert!(!cuda_model.is_profiling_enabled());
cuda_model.enable_profiling();
assert!(cuda_model.is_profiling_enabled());
cuda_model.disable_profiling();
assert!(!cuda_model.is_profiling_enabled());
}
}
#[test]
fn test_apr_cuda_profiler_access() {
let model = create_minimal_apr_model();
if let Ok(cuda_model) = AprV2ModelCuda::new(model, 0) {
let _profiler = cuda_model.profiler();
}
}
#[test]
fn test_apr_cuda_reset_profiler() {
let model = create_minimal_apr_model();
if let Ok(mut cuda_model) = AprV2ModelCuda::new(model, 0) {
cuda_model.enable_profiling();
cuda_model.reset_profiler();
}
}
#[test]
fn test_apr_cuda_reset_kv_cache() {
let model = create_minimal_apr_model();
if let Ok(mut cuda_model) = AprV2ModelCuda::new(model, 0) {
cuda_model.reset_kv_cache();
}
}
#[test]
fn test_apr_cuda_weights_cached() {
let model = create_minimal_apr_model();
if let Ok(cuda_model) = AprV2ModelCuda::new(model, 0) {
let cached = cuda_model.weights_cached();
println!("Weights cached: {}", cached);
}
}
#[test]
fn test_apr_cuda_cached_weight_mb() {
let model = create_minimal_apr_model();
if let Ok(cuda_model) = AprV2ModelCuda::new(model, 0) {
let mb = cuda_model.cached_weight_mb();
println!("Cached weight: {} MB", mb);
}
}
#[test]
fn test_apr_cuda_with_test_executor() {
use crate::gpu::executor::MockExecutor;
let model = create_minimal_apr_model();
if let Ok(mut cuda_model) = AprV2ModelCuda::new(model, 0) {
assert!(!cuda_model.has_test_executor());
let mock = MockExecutor::new("apr_test");
cuda_model.with_test_executor(Box::new(mock));
assert!(cuda_model.has_test_executor());
}
}
#[test]
fn test_apr_cuda_test_executor_bypasses_fast_path() {
use crate::gpu::executor::MockExecutor;
let model = create_minimal_apr_model();
if let Ok(mut cuda_model) = AprV2ModelCuda::new(model, 0) {
let mock = MockExecutor::new("bypass_test");
cuda_model.with_test_executor(Box::new(mock));
assert!(!cuda_model.has_test_executor() || !cuda_model.weights_cached());
assert!(cuda_model.has_test_executor());
}
}
#[test]
fn test_apr_cuda_forward_with_test_executor() {
use crate::gpu::executor::CpuExecutor;
let model = create_transformer_model();
if let Ok(mut cuda_model) = AprV2ModelCuda::new(model, 0) {
let cpu = CpuExecutor::new();
cuda_model.with_test_executor(Box::new(cpu));
let result = cuda_model.forward_cuda(&[1]);
println!("Forward with test executor result: {:?}", result.is_ok());
}
}
#[test]
fn test_apr_cuda_forward_with_mock_executor() {
use crate::gpu::executor::MockExecutor;
let model = create_transformer_model();
if let Ok(mut cuda_model) = AprV2ModelCuda::new(model, 0) {
let mock = MockExecutor::new("forward_mock");
cuda_model.with_test_executor(Box::new(mock));
let result = cuda_model.forward_cuda(&[1]);
println!("Forward with mock executor: {:?}", result.is_ok());
}
}
#[test]
fn test_apr_cuda_forward_mock_error_handling() {
use crate::gpu::executor::MockExecutor;
let model = create_transformer_model();
if let Ok(mut cuda_model) = AprV2ModelCuda::new(model, 0) {
let mock = MockExecutor::new("fail_mock").with_matmul_failure();
cuda_model.with_test_executor(Box::new(mock));
let result = cuda_model.forward_cuda(&[1]);
println!("Forward with failing mock: {:?}", result.is_err());
}
}
#[test]
fn test_apr_cuda_generate_with_test_executor() {
use crate::gpu::executor::CpuExecutor;
let model = create_transformer_model();
if let Ok(mut cuda_model) = AprV2ModelCuda::new(model, 0) {
let cpu = CpuExecutor::new();
cuda_model.with_test_executor(Box::new(cpu));
let result = cuda_model.generate_cuda(&[1], 1, 2);
println!("Generate with test executor: {:?}", result.is_ok());
}
}