#[cfg(test)]
mod property_tests {
use cuda_rust_wasm::{CudaParser, CudaRust};
use cuda_rust_wasm::runtime::{
Grid, Block, Dim3, LaunchConfig, KernelFunction, ThreadContext, launch_kernel,
};
use cuda_rust_wasm::memory::MemoryPool;
#[test]
fn prop_valid_kernels_transpile() {
let kernels = vec![
r#"__global__ void add(float* a, float* b, float* c) { int i = threadIdx.x; c[i] = a[i] + b[i]; }"#,
r#"__global__ void scale(float* data, float s, int n) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < n) data[i] *= s; }"#,
r#"__global__ void identity(float* in, float* out, int n) { int i = threadIdx.x; if (i < n) out[i] = in[i]; }"#,
r#"__global__ void saxpy(float a, float* x, float* y, int n) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < n) y[i] = a * x[i] + y[i]; }"#,
];
let transpiler = CudaRust::new();
for (idx, kernel) in kernels.iter().enumerate() {
let result = transpiler.transpile(kernel);
assert!(result.is_ok(), "Kernel {} failed to transpile: {:?}", idx, result.err());
let code = result.unwrap();
assert!(!code.is_empty(), "Kernel {} produced empty output", idx);
}
}
#[test]
fn prop_parser_handles_types() {
let parser = CudaParser::new();
let typed_kernels = vec![
r#"__global__ void k1(int* a) { int i = threadIdx.x; a[i] = i; }"#,
r#"__global__ void k2(float* a) { int i = threadIdx.x; a[i] = 1.0f; }"#,
r#"__global__ void k3(double* a) { int i = threadIdx.x; a[i] = 1.0; }"#,
r#"__global__ void k4(unsigned int* a) { int i = threadIdx.x; a[i] = i; }"#,
];
for (idx, kernel) in typed_kernels.iter().enumerate() {
let result = parser.parse(kernel);
assert!(result.is_ok(), "Type kernel {} failed: {:?}", idx, result.err());
}
}
struct VectorAddKernel {
a: Vec<f32>,
b: Vec<f32>,
c: std::sync::Arc<std::sync::Mutex<Vec<f32>>>,
}
impl KernelFunction<()> for VectorAddKernel {
fn execute(&self, _args: (), ctx: ThreadContext) {
let idx = ctx.global_thread_id();
if idx < self.a.len() {
let mut c = self.c.lock().unwrap();
c[idx] = self.a[idx] + self.b[idx];
}
}
fn name(&self) -> &str { "vector_add" }
}
#[test]
fn prop_vector_add_commutative() {
let n = 256;
let a: Vec<f32> = (0..n).map(|i| i as f32 * 1.5).collect();
let b: Vec<f32> = (0..n).map(|i| (n - i) as f32 * 0.7).collect();
let c1 = std::sync::Arc::new(std::sync::Mutex::new(vec![0.0f32; n]));
let kernel1 = VectorAddKernel { a: a.clone(), b: b.clone(), c: c1.clone() };
let config = LaunchConfig::new(Grid::new(1u32), Block::new(n as u32));
launch_kernel(kernel1, config, ()).unwrap();
let c2 = std::sync::Arc::new(std::sync::Mutex::new(vec![0.0f32; n]));
let kernel2 = VectorAddKernel { a: b.clone(), b: a.clone(), c: c2.clone() };
let config = LaunchConfig::new(Grid::new(1u32), Block::new(n as u32));
launch_kernel(kernel2, config, ()).unwrap();
let r1 = c1.lock().unwrap();
let r2 = c2.lock().unwrap();
for i in 0..n {
assert!((r1[i] - r2[i]).abs() < 1e-5, "Mismatch at {}: {} vs {}", i, r1[i], r2[i]);
}
}
struct IdentityKernel {
input: Vec<f32>,
output: std::sync::Arc<std::sync::Mutex<Vec<f32>>>,
}
impl KernelFunction<()> for IdentityKernel {
fn execute(&self, _args: (), ctx: ThreadContext) {
let idx = ctx.global_thread_id();
if idx < self.input.len() {
let mut out = self.output.lock().unwrap();
out[idx] = self.input[idx];
}
}
fn name(&self) -> &str { "identity" }
}
#[test]
fn prop_identity_preserves_data() {
let data: Vec<f32> = (0..512).map(|i| (i as f32).sin()).collect();
let output = std::sync::Arc::new(std::sync::Mutex::new(vec![0.0f32; 512]));
let kernel = IdentityKernel { input: data.clone(), output: output.clone() };
let config = LaunchConfig::new(Grid::new(2u32), Block::new(256u32));
launch_kernel(kernel, config, ()).unwrap();
let result = output.lock().unwrap();
for i in 0..data.len() {
assert_eq!(result[i], data[i], "Identity failed at index {}", i);
}
}
struct CountKernel {
output: std::sync::Arc<std::sync::Mutex<Vec<u32>>>,
}
impl KernelFunction<()> for CountKernel {
fn execute(&self, _args: (), ctx: ThreadContext) {
let idx = ctx.global_thread_id();
let mut out = self.output.lock().unwrap();
if idx < out.len() {
out[idx] = idx as u32;
}
}
fn name(&self) -> &str { "count" }
}
#[test]
fn prop_multi_block_coverage() {
let n = 1024usize;
let output = std::sync::Arc::new(std::sync::Mutex::new(vec![u32::MAX; n]));
let kernel = CountKernel { output: output.clone() };
let blocks = ((n + 255) / 256) as u32;
let config = LaunchConfig::new(Grid::new(blocks), Block::new(256u32));
launch_kernel(kernel, config, ()).unwrap();
let result = output.lock().unwrap();
for i in 0..n {
assert_eq!(result[i], i as u32, "Thread {} not reached", i);
}
}
struct ScalarMulKernel {
data: Vec<f32>,
scalar: f32,
output: std::sync::Arc<std::sync::Mutex<Vec<f32>>>,
}
impl KernelFunction<()> for ScalarMulKernel {
fn execute(&self, _args: (), ctx: ThreadContext) {
let idx = ctx.global_thread_id();
if idx < self.data.len() {
let mut out = self.output.lock().unwrap();
out[idx] = self.data[idx] * self.scalar;
}
}
fn name(&self) -> &str { "scalar_mul" }
}
#[test]
fn prop_scalar_mult_correct() {
let n = 256;
let data: Vec<f32> = (0..n).map(|i| i as f32).collect();
let scalar = 3.14f32;
let output = std::sync::Arc::new(std::sync::Mutex::new(vec![0.0f32; n]));
let kernel = ScalarMulKernel { data: data.clone(), scalar, output: output.clone() };
let config = LaunchConfig::new(Grid::new(1u32), Block::new(n as u32));
launch_kernel(kernel, config, ()).unwrap();
let result = output.lock().unwrap();
for i in 0..n {
assert!((result[i] - data[i] * scalar).abs() < 1e-4,
"Scalar mult mismatch at {}: expected {}, got {}", i, data[i] * scalar, result[i]);
}
}
#[test]
fn prop_memory_pool_consistency() {
let pool = MemoryPool::new();
let sizes = [64usize, 128, 256, 512, 1024, 4096];
for &size in &sizes {
let buffer = pool.allocate(size);
assert_eq!(buffer.len(), size, "Buffer size mismatch for allocation of {} bytes", size);
pool.deallocate(buffer);
}
let stats = pool.stats();
assert!(stats.total_allocations >= sizes.len() as u64,
"Expected at least {} allocations, got {}", sizes.len(), stats.total_allocations);
}
#[test]
fn prop_memory_pool_reuse() {
let pool = MemoryPool::new();
let buffer = pool.allocate(2048);
assert_eq!(buffer.len(), 2048);
pool.deallocate(buffer);
let _buffer2 = pool.allocate(2048);
let stats = pool.stats();
assert!(stats.cache_hits > 0, "Expected cache hits after reuse, got {}", stats.cache_hits);
}
#[test]
fn prop_grid_block_dimensions() {
for x in [1u32, 2, 4, 8, 16, 32, 64, 128, 256] {
let grid = Grid::new(x);
assert_eq!(grid.num_blocks(), x);
let block = Block::new(x);
assert_eq!(block.num_threads(), x);
}
let grid_2d = Grid::new((4u32, 4u32));
assert_eq!(grid_2d.num_blocks(), 16);
let grid_3d = Grid::new((2u32, 3u32, 4u32));
assert_eq!(grid_3d.num_blocks(), 24);
}
#[test]
fn prop_block_validation() {
assert!(Block::new(1u32).validate().is_ok());
assert!(Block::new(256u32).validate().is_ok());
assert!(Block::new(1024u32).validate().is_ok());
assert!(Block::new(2048u32).validate().is_err());
}
#[test]
fn prop_thread_context_global_ids() {
let ctx = ThreadContext {
thread_idx: Dim3 { x: 5, y: 0, z: 0 },
block_idx: Dim3 { x: 2, y: 0, z: 0 },
block_dim: Dim3 { x: 256, y: 1, z: 1 },
grid_dim: Dim3 { x: 4, y: 1, z: 1 },
};
assert_eq!(ctx.global_thread_id(), 2 * 256 + 5);
let ctx_2d = ThreadContext {
thread_idx: Dim3 { x: 3, y: 7, z: 0 },
block_idx: Dim3 { x: 1, y: 2, z: 0 },
block_dim: Dim3 { x: 16, y: 16, z: 1 },
grid_dim: Dim3 { x: 4, y: 4, z: 1 },
};
let (gx, gy) = ctx_2d.global_thread_id_2d();
assert_eq!(gx, 1 * 16 + 3);
assert_eq!(gy, 2 * 16 + 7);
}
#[test]
fn prop_transpile_deterministic() {
let kernel = r#"__global__ void add(float* a, float* b, float* c) {
int i = threadIdx.x;
c[i] = a[i] + b[i];
}"#;
let transpiler = CudaRust::new();
let result1 = transpiler.transpile(kernel).unwrap();
let result2 = transpiler.transpile(kernel).unwrap();
assert_eq!(result1, result2, "Transpiler should produce deterministic output");
}
#[test]
fn prop_invalid_kernels_handled() {
let transpiler = CudaRust::new();
let result = transpiler.transpile("");
let _ = result;
let result = transpiler.transpile("this is not valid CUDA code at all!!!");
let _ = result;
}
}