#![cfg(all(test, feature = "cuda"))]
use super::context::{cuda_available, device_count, get_driver, CudaContext};
use super::graph::{CaptureMode, CudaGraph};
use super::memory::GpuBuffer;
use super::module::CudaModule;
use super::stream::CudaStream;
use super::types::LaunchConfig;
use std::ffi::c_void;
#[test]
fn test_cuda_driver_initialization() {
let driver = get_driver().expect("CUDA driver MUST be available");
assert!(!std::ptr::null::<()>().is_null() || true); let _ = driver; }
#[test]
fn test_cuda_available_with_hardware() {
assert!(cuda_available(), "CUDA MUST be available on RTX 4090");
}
#[test]
fn test_device_count_with_hardware() {
let count = device_count().expect("device_count MUST succeed");
assert!(count >= 1, "At least one CUDA device MUST be present");
}
#[test]
fn test_context_creation_device_0() {
let ctx = CudaContext::new(0).expect("CudaContext::new(0) MUST succeed");
assert_eq!(ctx.device(), 0);
assert!(!ctx.raw().is_null());
}
#[test]
fn test_context_memory_info() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let (free, total) = ctx.memory_info().expect("memory_info MUST succeed");
assert!(total > 20_000_000_000, "RTX 4090 should have >20GB VRAM");
assert!(free > 0, "Some VRAM MUST be free");
assert!(free <= total, "Free memory cannot exceed total");
}
#[test]
fn test_context_make_current() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
ctx.make_current().expect("make_current MUST succeed");
}
#[test]
fn test_context_synchronize() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
ctx.synchronize().expect("synchronize MUST succeed");
}
#[test]
fn test_context_invalid_device() {
let result = CudaContext::new(999);
assert!(result.is_err(), "Invalid device index MUST fail");
}
#[test]
fn test_context_drop_cleanup() {
for _ in 0..5 {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let _ = ctx.memory_info();
}
}
#[test]
fn test_context_device_name() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let name = ctx.device_name().expect("device_name MUST succeed");
assert!(!name.is_empty(), "Device name should not be empty");
assert!(name.len() < 256, "Device name should be reasonable length");
}
#[test]
fn test_context_total_memory() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let total = ctx.total_memory().expect("total_memory MUST succeed");
assert!(total > 20_000_000_000, "RTX 4090 should have >20GB VRAM");
}
#[test]
fn test_context_negative_device_ordinal() {
let result = CudaContext::new(-1);
assert!(result.is_err(), "Negative device ordinal MUST fail");
}
#[test]
fn test_stream_creation() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let stream = CudaStream::new(&ctx).expect("Stream creation MUST succeed");
assert!(!stream.raw().is_null());
}
#[test]
fn test_stream_synchronize() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let stream = CudaStream::new(&ctx).expect("Stream creation MUST succeed");
stream.synchronize().expect("Stream sync MUST succeed");
}
#[test]
fn test_multiple_streams() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let mut streams = Vec::new();
for _ in 0..4 {
streams.push(CudaStream::new(&ctx).expect("Stream creation MUST succeed"));
}
for stream in &streams {
stream.synchronize().expect("Stream sync MUST succeed");
}
}
#[test]
fn test_gpu_buffer_new() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let buffer: GpuBuffer<f32> = GpuBuffer::new(&ctx, 1024).expect("Buffer new MUST succeed");
assert_eq!(buffer.len(), 1024);
assert!(buffer.as_ptr() != 0);
}
#[test]
fn test_gpu_buffer_from_host() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let data = vec![1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
let buffer = GpuBuffer::from_host(&ctx, &data).expect("Buffer from_host MUST succeed");
assert_eq!(buffer.len(), 8);
}
#[test]
fn test_gpu_buffer_round_trip() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let data = vec![1.0f32, 2.0, 3.0, 4.0];
let buffer = GpuBuffer::from_host(&ctx, &data).expect("Buffer creation MUST succeed");
let mut result = vec![0.0f32; 4];
buffer
.copy_to_host(&mut result)
.expect("copy_to_host MUST succeed");
assert_eq!(result, data, "Round-trip data MUST match");
}
#[test]
fn test_gpu_buffer_large_allocation() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let size = 256 * 1024 * 1024; let buffer: GpuBuffer<f32> = GpuBuffer::new(&ctx, size).expect("Large buffer new MUST succeed");
assert_eq!(buffer.len(), size);
}
#[test]
fn test_gpu_buffer_copy_from_host() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let mut buffer: GpuBuffer<f32> = GpuBuffer::new(&ctx, 1024).expect("Buffer new MUST succeed");
let data = vec![42.0f32; 1024];
buffer
.copy_from_host(&data)
.expect("copy_from_host MUST succeed");
let mut result = vec![0.0f32; 1024];
buffer
.copy_to_host(&mut result)
.expect("copy_to_host MUST succeed");
assert_eq!(result[0], 42.0);
assert_eq!(result[1023], 42.0);
}
#[test]
fn test_gpu_buffer_size_bytes() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let buffer: GpuBuffer<f32> = GpuBuffer::new(&ctx, 256).expect("Buffer new MUST succeed");
assert_eq!(buffer.size_bytes(), 256 * std::mem::size_of::<f32>());
}
#[test]
fn test_gpu_buffer_is_empty() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let buffer: GpuBuffer<f32> = GpuBuffer::new(&ctx, 1).expect("Buffer new MUST succeed");
assert!(!buffer.is_empty());
}
#[test]
fn test_gpu_buffer_clone() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let data = vec![1.0f32, 2.0, 3.0, 4.0];
let original = GpuBuffer::from_host(&ctx, &data).expect("Buffer creation MUST succeed");
let cloned = original.clone(&ctx).expect("Buffer clone MUST succeed");
assert_eq!(cloned.len(), original.len());
let mut result = vec![0.0f32; 4];
cloned
.copy_to_host(&mut result)
.expect("copy_to_host MUST succeed");
assert_eq!(result, data);
}
#[test]
fn test_cuda_graph_new() {
let _ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let graph = CudaGraph::new().expect("Graph creation MUST succeed");
assert!(!graph.raw().is_null());
}
#[test]
fn test_cuda_graph_default() {
let _ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let graph = CudaGraph::default();
assert!(!graph.raw().is_null());
}
#[test]
fn test_cuda_graph_instantiate_empty() {
let _ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let graph = CudaGraph::new().expect("Graph creation MUST succeed");
let exec = graph.instantiate().expect("Graph instantiate MUST succeed");
assert!(!exec.raw().is_null());
}
#[test]
fn test_cuda_graph_capture_and_replay() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let stream = CudaStream::new(&ctx).expect("Stream creation MUST succeed");
stream
.begin_capture(CaptureMode::Global)
.expect("Begin capture MUST succeed");
let graph = stream.end_capture().expect("End capture MUST succeed");
assert!(!graph.raw().is_null());
let exec = graph.instantiate().expect("Instantiate MUST succeed");
assert!(!exec.raw().is_null());
for _ in 0..10 {
stream
.launch_graph(&exec)
.expect("Graph launch MUST succeed");
}
stream.synchronize().expect("Final sync MUST succeed");
}
#[test]
fn test_cuda_graph_capture_modes() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let stream = CudaStream::new(&ctx).expect("Stream creation MUST succeed");
for mode in [
CaptureMode::Global,
CaptureMode::ThreadLocal,
CaptureMode::Relaxed,
] {
stream
.begin_capture(mode)
.expect("Begin capture MUST succeed");
let graph = stream.end_capture().expect("End capture MUST succeed");
let exec = graph.instantiate().expect("Instantiate MUST succeed");
stream.launch_graph(&exec).expect("Launch MUST succeed");
stream.synchronize().expect("Sync MUST succeed");
}
}
#[test]
fn test_cuda_graph_with_kernel() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let stream = CudaStream::new(&ctx).expect("Stream creation MUST succeed");
let ptx = r#".version 8.0
.target sm_80
.address_size 64
.visible .entry add_one(
.param .u64 ptr,
.param .u32 n
) {
.reg .pred %p<2>;
.reg .u32 %r<4>;
.reg .u64 %rd<4>;
.reg .f32 %f<2>;
ld.param.u64 %rd1, [ptr];
ld.param.u32 %r1, [n];
mov.u32 %r2, %tid.x;
mov.u32 %r3, %ctaid.x;
mad.lo.u32 %r2, %r3, 256, %r2;
setp.ge.u32 %p1, %r2, %r1;
@%p1 bra $done;
cvt.u64.u32 %rd2, %r2;
shl.b64 %rd3, %rd2, 2;
add.u64 %rd3, %rd1, %rd3;
ld.global.f32 %f1, [%rd3];
add.f32 %f1, %f1, 1.0;
st.global.f32 [%rd3], %f1;
$done:
ret;
}
"#;
let mut module = CudaModule::from_ptx(&ctx, ptx).expect("Module load MUST succeed");
let data = vec![1.0f32; 256];
let mut buffer = GpuBuffer::from_host(&ctx, &data).expect("Buffer MUST succeed");
let n: u32 = 256;
let config = LaunchConfig::linear(256, 256);
stream
.begin_capture(CaptureMode::Global)
.expect("Begin capture MUST succeed");
let mut ptr_arg = buffer.as_ptr() as *mut c_void;
let mut n_arg = (&n as *const u32) as *mut c_void;
let mut args = [
&mut ptr_arg as *mut *mut c_void as *mut c_void,
&mut n_arg as *mut *mut c_void as *mut c_void,
];
unsafe {
stream
.launch_kernel(&mut module, "add_one", &config, &mut args)
.expect("Kernel launch MUST succeed");
}
let graph = stream.end_capture().expect("End capture MUST succeed");
let exec = graph.instantiate().expect("Instantiate MUST succeed");
for _ in 0..100 {
stream
.launch_graph(&exec)
.expect("Graph launch MUST succeed");
}
stream.synchronize().expect("Sync MUST succeed");
let mut result = vec![0.0f32; 256];
buffer
.copy_to_host(&mut result)
.expect("copy_to_host MUST succeed");
for (i, &val) in result.iter().enumerate() {
assert!(
(val - 101.0).abs() < 0.01,
"Element {} should be 101.0, got {}",
i,
val
);
}
}
#[test]
fn test_cuda_graph_drop_cleanup() {
let _ctx = CudaContext::new(0).expect("Context creation MUST succeed");
for _ in 0..10 {
let graph = CudaGraph::new().expect("Graph creation MUST succeed");
let _exec = graph.instantiate().expect("Instantiate MUST succeed");
}
}
#[test]
fn test_module_from_ptx() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let ptx = r#".version 8.0
.target sm_80
.address_size 64
.visible .entry noop() {
ret;
}
"#;
let module = CudaModule::from_ptx(&ctx, ptx).expect("Module from_ptx MUST succeed");
assert!(!module.raw().is_null());
}
#[test]
fn test_module_get_function() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let ptx = r#".version 8.0
.target sm_80
.address_size 64
.visible .entry test_func() {
ret;
}
"#;
let mut module = CudaModule::from_ptx(&ctx, ptx).expect("Module MUST succeed");
let func = module
.get_function("test_func")
.expect("get_function MUST succeed");
assert!(!func.is_null());
}
#[test]
fn test_module_has_function() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let ptx = r#".version 8.0
.target sm_80
.address_size 64
.visible .entry existing_func() {
ret;
}
"#;
let mut module = CudaModule::from_ptx(&ctx, ptx).expect("Module MUST succeed");
assert!(module.has_function("existing_func"));
assert!(!module.has_function("nonexistent_func"));
}
#[test]
fn test_module_launch_noop_kernel() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let stream = CudaStream::new(&ctx).expect("Stream creation MUST succeed");
let ptx = r#".version 8.0
.target sm_80
.address_size 64
.visible .entry noop() {
ret;
}
"#;
let mut module = CudaModule::from_ptx(&ctx, ptx).expect("Module MUST succeed");
let config = LaunchConfig::linear(1, 1);
let mut args: [*mut c_void; 0] = [];
unsafe {
stream
.launch_kernel(&mut module, "noop", &config, &mut args)
.expect("Kernel launch MUST succeed");
}
stream.synchronize().expect("Sync MUST succeed");
}
#[test]
fn test_cuda_stress_100_contexts() {
for i in 0..100 {
let ctx = CudaContext::new(0).expect(&format!("Context {} MUST succeed", i));
let _ = ctx.memory_info();
}
}
#[test]
fn test_cuda_stress_concurrent_streams() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let mut streams = Vec::new();
for _ in 0..32 {
streams.push(CudaStream::new(&ctx).expect("Stream MUST succeed"));
}
for stream in &streams {
stream.synchronize().expect("Sync MUST succeed");
}
}
#[test]
fn test_cuda_stress_memory_pressure() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let chunk_size = 64 * 1024 * 1024; let mut buffers: Vec<GpuBuffer<f32>> = Vec::new();
for i in 0..16 {
match GpuBuffer::<f32>::new(&ctx, chunk_size) {
Ok(buf) => buffers.push(buf),
Err(_) => {
eprintln!("Memory exhausted after {} chunks ({}MB)", i, i * 256);
break;
}
}
}
assert!(
buffers.len() >= 8,
"RTX 4090 should handle at least 2GB allocation"
);
drop(buffers);
let _new_buf: GpuBuffer<f32> =
GpuBuffer::new(&ctx, chunk_size).expect("Post-cleanup allocation MUST succeed");
}
#[test]
fn test_capture_mode_to_cuda_mode() {
assert_eq!(CaptureMode::Global.to_cuda_mode(), 0);
assert_eq!(CaptureMode::ThreadLocal.to_cuda_mode(), 1);
assert_eq!(CaptureMode::Relaxed.to_cuda_mode(), 2);
}
#[test]
fn test_capture_mode_default() {
assert_eq!(CaptureMode::default(), CaptureMode::Global);
}
#[test]
fn test_capture_mode_debug_and_clone() {
let mode = CaptureMode::Global;
let cloned = mode.clone();
assert_eq!(mode, cloned);
let debug_str = format!("{:?}", CaptureMode::ThreadLocal);
assert!(debug_str.contains("ThreadLocal"));
}
#[test]
fn test_launch_config_linear() {
let config = LaunchConfig::linear(1024, 256);
assert_eq!(config.grid, (4, 1, 1));
assert_eq!(config.block, (256, 1, 1));
}
#[test]
fn test_launch_config_grid_2d() {
let config = LaunchConfig::grid_2d(32, 32, 16, 16);
assert_eq!(config.grid, (32, 32, 1));
assert_eq!(config.block, (16, 16, 1));
}
#[test]
fn test_launch_config_with_shared_mem() {
let config = LaunchConfig::linear(256, 256).with_shared_mem(4096);
assert_eq!(config.shared_mem, 4096);
}
#[test]
fn test_launch_config_total_threads() {
let config = LaunchConfig::linear(1000, 256);
let total = config.grid.0
* config.grid.1
* config.grid.2
* config.block.0
* config.block.1
* config.block.2;
assert!(total >= 1000);
}
#[test]
fn test_gpu_buffer_copy_from_buffer_at_async_raw() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let stream = CudaStream::new(&ctx).expect("Stream creation MUST succeed");
let stream_handle = stream.raw();
let src_data: Vec<f32> = (0..32).map(|i| i as f32).collect();
let src = GpuBuffer::from_host(&ctx, &src_data).expect("src buffer MUST succeed");
let mut dst: GpuBuffer<f32> = GpuBuffer::new(&ctx, 64).expect("dst buffer MUST succeed");
let zeros = vec![0.0f32; 64];
dst.copy_from_host(&zeros)
.expect("copy_from_host MUST succeed");
unsafe {
dst.copy_from_buffer_at_async_raw(&src, 16, 0, 32, stream_handle)
.expect("copy_from_buffer_at_async_raw MUST succeed");
}
stream.synchronize().expect("Sync MUST succeed");
let mut result = vec![0.0f32; 64];
dst.copy_to_host(&mut result)
.expect("copy_to_host MUST succeed");
assert_eq!(result[15], 0.0, "Before copy region should be 0");
assert_eq!(result[16], 0.0, "First copied element should be 0.0");
assert_eq!(result[47], 31.0, "Last copied element should be 31.0");
assert_eq!(result[48], 0.0, "After copy region should be 0");
}
#[test]
fn test_gpu_buffer_copy_from_buffer_at_async_raw_bounds_check() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let stream = CudaStream::new(&ctx).expect("Stream creation MUST succeed");
let stream_handle = stream.raw();
let src: GpuBuffer<f32> = GpuBuffer::new(&ctx, 10).expect("src buffer MUST succeed");
let mut dst: GpuBuffer<f32> = GpuBuffer::new(&ctx, 20).expect("dst buffer MUST succeed");
let result = unsafe { dst.copy_from_buffer_at_async_raw(&src, 15, 0, 10, stream_handle) };
assert!(result.is_err(), "dst out of bounds MUST fail");
let result = unsafe { dst.copy_from_buffer_at_async_raw(&src, 0, 5, 10, stream_handle) };
assert!(result.is_err(), "src out of bounds MUST fail");
let result = unsafe { dst.copy_from_buffer_at_async_raw(&src, 0, 0, 0, stream_handle) };
assert!(result.is_ok(), "Zero count copy MUST succeed");
}
#[test]
fn test_gpu_buffer_async_host_to_device() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let stream = CudaStream::new(&ctx).expect("Stream creation MUST succeed");
let mut buffer: GpuBuffer<f32> = GpuBuffer::new(&ctx, 256).expect("Buffer MUST succeed");
let data: Vec<f32> = (0..256).map(|i| i as f32).collect();
unsafe {
buffer
.copy_from_host_async(&data, &stream)
.expect("copy_from_host_async MUST succeed");
}
stream.synchronize().expect("Sync MUST succeed");
let mut result = vec![0.0f32; 256];
buffer
.copy_to_host(&mut result)
.expect("copy_to_host MUST succeed");
assert_eq!(result, data);
}
#[test]
fn test_gpu_buffer_async_device_to_host() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let stream = CudaStream::new(&ctx).expect("Stream creation MUST succeed");
let data: Vec<f32> = (0..128).map(|i| i as f32).collect();
let buffer = GpuBuffer::from_host(&ctx, &data).expect("Buffer MUST succeed");
let mut result = vec![0.0f32; 128];
unsafe {
buffer
.copy_to_host_async(&mut result, &stream)
.expect("copy_to_host_async MUST succeed");
}
stream.synchronize().expect("Sync MUST succeed");
assert_eq!(result, data);
}
#[test]
fn test_gpu_buffer_async_copy_size_mismatch_h2d() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let stream = CudaStream::new(&ctx).expect("Stream creation MUST succeed");
let mut buffer: GpuBuffer<f32> = GpuBuffer::new(&ctx, 100).expect("Buffer MUST succeed");
let data: Vec<f32> = vec![1.0f32; 200];
let result = unsafe { buffer.copy_from_host_async(&data, &stream) };
assert!(result.is_err(), "Size mismatch MUST fail");
}
#[test]
fn test_gpu_buffer_async_copy_size_mismatch_d2h() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let stream = CudaStream::new(&ctx).expect("Stream creation MUST succeed");
let buffer: GpuBuffer<f32> = GpuBuffer::new(&ctx, 100).expect("Buffer MUST succeed");
let mut result: Vec<f32> = vec![0.0f32; 50];
let copy_result = unsafe { buffer.copy_to_host_async(&mut result, &stream) };
assert!(copy_result.is_err(), "Size mismatch MUST fail");
}
#[test]
fn test_gpu_buffer_async_copy_empty_h2d() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let stream = CudaStream::new(&ctx).expect("Stream creation MUST succeed");
let mut buffer: GpuBuffer<f32> = GpuBuffer::new(&ctx, 0).expect("Buffer MUST succeed");
let data: Vec<f32> = vec![];
let result = unsafe { buffer.copy_from_host_async(&data, &stream) };
assert!(result.is_ok(), "Empty copy MUST succeed");
}
#[test]
fn test_gpu_buffer_async_copy_empty_d2h() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let stream = CudaStream::new(&ctx).expect("Stream creation MUST succeed");
let buffer: GpuBuffer<f32> = GpuBuffer::new(&ctx, 0).expect("Buffer MUST succeed");
let mut result: Vec<f32> = vec![];
let copy_result = unsafe { buffer.copy_to_host_async(&mut result, &stream) };
assert!(copy_result.is_ok(), "Empty copy MUST succeed");
}
#[test]
fn test_gpu_buffer_copy_from_host_at_bounds_check() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let mut buffer: GpuBuffer<f32> = GpuBuffer::new(&ctx, 100).expect("Buffer MUST succeed");
let data = vec![1.0f32; 50];
let result = buffer.copy_from_host_at(&data, 60); assert!(result.is_err(), "Out of bounds MUST fail");
let empty: Vec<f32> = vec![];
let result = buffer.copy_from_host_at(&empty, 50);
assert!(result.is_ok(), "Empty copy MUST succeed");
}
#[test]
fn test_gpu_buffer_copy_to_host_at_bounds_check() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let buffer: GpuBuffer<f32> = GpuBuffer::new(&ctx, 100).expect("Buffer MUST succeed");
let mut data = vec![0.0f32; 50];
let result = buffer.copy_to_host_at(&mut data, 60); assert!(result.is_err(), "Out of bounds MUST fail");
let mut empty: Vec<f32> = vec![];
let result = buffer.copy_to_host_at(&mut empty, 50);
assert!(result.is_ok(), "Empty copy MUST succeed");
}
#[test]
fn test_gpu_buffer_copy_from_buffer_size_mismatch() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let src: GpuBuffer<f32> = GpuBuffer::new(&ctx, 100).expect("src MUST succeed");
let mut dst: GpuBuffer<f32> = GpuBuffer::new(&ctx, 50).expect("dst MUST succeed");
let result = dst.copy_from_buffer(&src);
assert!(result.is_err(), "Size mismatch MUST fail");
}
#[test]
fn test_gpu_buffer_copy_from_buffer_empty() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let src: GpuBuffer<f32> = GpuBuffer::new(&ctx, 0).expect("src MUST succeed");
let mut dst: GpuBuffer<f32> = GpuBuffer::new(&ctx, 0).expect("dst MUST succeed");
let result = dst.copy_from_buffer(&src);
assert!(result.is_ok(), "Empty copy MUST succeed");
}
#[test]
fn test_gpu_buffer_copy_from_buffer_at_bounds_check_dst() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let src: GpuBuffer<f32> = GpuBuffer::new(&ctx, 20).expect("src MUST succeed");
let mut dst: GpuBuffer<f32> = GpuBuffer::new(&ctx, 50).expect("dst MUST succeed");
let result = dst.copy_from_buffer_at(&src, 40, 0, 20);
assert!(result.is_err(), "dst out of bounds MUST fail");
}
#[test]
fn test_gpu_buffer_copy_from_buffer_at_bounds_check_src() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let src: GpuBuffer<f32> = GpuBuffer::new(&ctx, 20).expect("src MUST succeed");
let mut dst: GpuBuffer<f32> = GpuBuffer::new(&ctx, 50).expect("dst MUST succeed");
let result = dst.copy_from_buffer_at(&src, 0, 15, 20);
assert!(result.is_err(), "src out of bounds MUST fail");
}
#[test]
fn test_gpu_buffer_copy_from_buffer_at_zero_count() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let src: GpuBuffer<f32> = GpuBuffer::new(&ctx, 20).expect("src MUST succeed");
let mut dst: GpuBuffer<f32> = GpuBuffer::new(&ctx, 50).expect("dst MUST succeed");
let result = dst.copy_from_buffer_at(&src, 0, 0, 0);
assert!(result.is_ok(), "Zero count copy MUST succeed");
}
#[test]
fn test_gpu_buffer_view_operations() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let buffer: GpuBuffer<f32> = GpuBuffer::new(&ctx, 128).expect("Buffer MUST succeed");
let view = buffer.clone_metadata();
assert_eq!(view.as_ptr(), buffer.as_ptr());
assert_eq!(view.len(), 128);
assert!(!view.is_empty());
assert_eq!(view.size_bytes(), 128 * 4);
}
#[test]
fn test_gpu_buffer_empty_view() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let buffer: GpuBuffer<f32> = GpuBuffer::new(&ctx, 0).expect("Buffer MUST succeed");
let view = buffer.clone_metadata();
assert!(view.is_empty());
assert_eq!(view.len(), 0);
assert_eq!(view.size_bytes(), 0);
}
#[test]
fn test_gpu_buffer_kernel_arg() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let buffer: GpuBuffer<f32> = GpuBuffer::new(&ctx, 64).expect("Buffer MUST succeed");
let arg = buffer.as_kernel_arg();
assert!(!arg.is_null());
let ptr_to_ptr = arg as *const u64;
let device_ptr = unsafe { *ptr_to_ptr };
assert!(device_ptr != 0);
}
#[test]
fn test_module_cached_functions() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let ptx = r#".version 8.0
.target sm_80
.address_size 64
.visible .entry func_a() {
ret;
}
.visible .entry func_b() {
ret;
}
"#;
let mut module = CudaModule::from_ptx(&ctx, ptx).expect("Module MUST succeed");
module.get_function("func_a").expect("func_a MUST exist");
module.get_function("func_b").expect("func_b MUST exist");
let cached = module.cached_functions();
assert_eq!(cached.len(), 2);
assert!(cached.contains(&"func_a"));
assert!(cached.contains(&"func_b"));
}
#[test]
fn test_module_get_function_cached() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let ptx = r#".version 8.0
.target sm_80
.address_size 64
.visible .entry cached_test() {
ret;
}
"#;
let mut module = CudaModule::from_ptx(&ctx, ptx).expect("Module MUST succeed");
let func1 = module
.get_function("cached_test")
.expect("First lookup MUST succeed");
let func2 = module
.get_function("cached_test")
.expect("Second lookup MUST succeed");
assert_eq!(func1, func2);
}
#[test]
fn test_module_invalid_ptx_error() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let invalid_ptx = "this is not valid PTX";
let result = CudaModule::from_ptx(&ctx, invalid_ptx);
assert!(result.is_err());
}
#[test]
fn test_module_function_not_found() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let ptx = r#".version 8.0
.target sm_80
.address_size 64
.visible .entry existing() {
ret;
}
"#;
let mut module = CudaModule::from_ptx(&ctx, ptx).expect("Module MUST succeed");
let result = module.get_function("nonexistent_function");
assert!(result.is_err());
}
#[test]
fn test_module_drop_multiple() {
let ctx = CudaContext::new(0).expect("Context creation MUST succeed");
let ptx = r#".version 8.0
.target sm_80
.address_size 64
.visible .entry drop_test() {
ret;
}
"#;
for _ in 0..5 {
let module = CudaModule::from_ptx(&ctx, ptx).expect("Module MUST succeed");
assert!(!module.raw().is_null());
}
}