use crate::driver::{CudaContext, CudaStream};
use crate::memory::resident::{clear_kernel_cache, reset_transfer_counters, GpuResidentTensor};
fn ramp_f32(size: usize, scale: f32, offset: f32) -> Vec<f32> {
(0..size).map(|i| i as f32 * scale + offset).collect()
}
fn upload(ctx: &CudaContext, data: &[f32]) -> GpuResidentTensor<f32> {
GpuResidentTensor::from_host(ctx, data).expect("GPU upload failed in test")
}
macro_rules! fresh_ctx {
() => {{
clear_kernel_cache();
cuda_ctx!()
}};
}
macro_rules! cuda_ctx {
() => {
match CudaContext::new(0) {
Ok(ctx) => ctx,
Err(e) => {
eprintln!("Skipping CUDA test: {:?}", e);
return;
}
}
};
}
mod layernorm_gelu_bias;
mod pmat018_extended;
mod softmax_add_scale;