use std::println;
use alloc::string::{String, ToString};
use crate::{self as cubecl, as_bytes};
use cubecl::prelude::*;
use cubecl_runtime::server::{ResourceLimitError, ServerError};
#[derive(CubeLaunch, CubeType)]
pub struct ComptimeTag {
array: Array<f32>,
#[cube(comptime)]
tag: String,
}
#[cube(launch)]
pub fn kernel_with_comptime_tag(output: &mut ComptimeTag) {
if UNIT_POS == 0 {
if comptime![&output.tag == "zero"] {
output.array[0] = f32::new(0.0);
} else {
output.array[0] = f32::new(1.0);
}
}
}
#[cube(launch)]
pub fn kernel_with_generics<F: Float>(output: &mut Array<F>) {
if UNIT_POS == 0 {
output[0] = F::new(5.0);
}
}
#[cube(launch)]
pub fn kernel_without_generics(output: &mut Array<f32>) {
if UNIT_POS == 0 {
output[0] = 5.0;
}
}
#[cube(launch, address_type = "dynamic")]
pub fn kernel_dynamic_addressing(output: &mut Array<f32>) {
if UNIT_POS == 0 {
output[0] = 5.0;
}
}
#[cube(launch)]
pub fn kernel_with_max_shared(
output: &mut Array<u32>,
#[comptime] shared_size_1: usize,
#[comptime] shared_size_2: usize,
) {
let mut shared_1 = SharedMemory::<u32>::new(shared_size_1);
let mut shared_2 = SharedMemory::<u32>::new(shared_size_2);
if UNIT_POS < 8 {
shared_1[shared_size_1 - UNIT_POS as usize - 1] = output[UNIT_POS as usize];
shared_2[shared_size_2 - UNIT_POS as usize - 1] = output[8 - UNIT_POS as usize];
}
sync_cube();
if UNIT_POS < 8 {
let a = shared_1[shared_size_1 - UNIT_POS as usize - 2];
let b = shared_2[shared_size_2 - UNIT_POS as usize - 1];
output[UNIT_POS as usize] = a + b;
}
}
#[cube(launch)]
pub fn kernel_resource_errors(output: &mut Array<u32>, #[comptime] shared_size: usize) {
let mut shared = SharedMemory::<u32>::new(shared_size);
shared[0] = 0;
sync_cube();
output[0] = shared[0];
}
pub fn test_kernel_with_comptime_tag<R: Runtime>(client: ComputeClient<R>) {
let handle = client.create_from_slice(f32::as_bytes(&[5.0]));
let array_arg = unsafe { ArrayArg::from_raw_parts(handle.clone(), 1) };
kernel_with_comptime_tag::launch(
&client,
CubeCount::Static(1, 1, 1),
CubeDim::new_1d(1),
ComptimeTagLaunch::new(array_arg, "zero".to_string()),
);
let actual = client.read_one_unchecked(handle);
let actual = f32::from_bytes(&actual);
assert_eq!(actual[0], f32::new(0.0));
let handle = client.create_from_slice(f32::as_bytes(&[5.0]));
let array_arg = unsafe { ArrayArg::from_raw_parts(handle.clone(), 1) };
kernel_with_comptime_tag::launch(
&client,
CubeCount::Static(1, 1, 1),
CubeDim::new_1d(1),
ComptimeTagLaunch::new(array_arg, "not_zero".to_string()),
);
let actual = client.read_one_unchecked(handle);
let actual = f32::from_bytes(&actual);
assert_eq!(actual[0], f32::new(1.0));
}
pub fn test_kernel_with_generics<R: Runtime, F: Float + CubeElement>(client: ComputeClient<R>) {
let handle = client.create_from_slice(as_bytes![F: 0.0, 1.0]);
kernel_with_generics::launch::<F, R>(
&client,
CubeCount::Static(1, 1, 1),
CubeDim::new_1d(1),
unsafe { ArrayArg::from_raw_parts(handle.clone(), 2) },
);
let actual = client.read_one_unchecked(handle);
let actual = F::from_bytes(&actual);
assert_eq!(actual[0], F::new(5.0));
}
pub fn test_kernel_without_generics<R: Runtime>(client: ComputeClient<R>) {
let handle = client.create_from_slice(f32::as_bytes(&[0.0, 1.0]));
kernel_without_generics::launch(
&client,
CubeCount::Static(1, 1, 1),
CubeDim::new_1d(1),
unsafe { ArrayArg::from_raw_parts(handle.clone(), 2) },
);
let actual = client.read_one_unchecked(handle);
let actual = f32::from_bytes(&actual);
assert_eq!(actual[0], 5.0);
}
pub fn test_kernel_max_shared<R: Runtime>(client: ComputeClient<R>) {
let total_shared_size = client.properties().hardware.max_shared_memory_size;
let handle = client.create_from_slice(u32::as_bytes(&[0, 1, 2, 3, 4, 5, 6, 7]));
let shared_size_1 = 24576 / size_of::<u32>();
let shared_size_2 = (total_shared_size - 24576) / size_of::<u32>();
kernel_with_max_shared::launch(
&client,
CubeCount::Static(1, 1, 1),
CubeDim::new_1d(1),
unsafe { ArrayArg::from_raw_parts(handle.clone(), 8) },
shared_size_1,
shared_size_2,
);
let actual = client.read_one_unchecked(handle);
let actual = u32::from_bytes(&actual);
assert_eq!(actual, &[1, 9, 9, 9, 9, 9, 9, 1]);
}
pub fn test_shared_memory_error<R: Runtime>(client: ComputeClient<R>) {
if client.properties().hardware.num_cpu_cores.is_some() {
return;
}
let max_shared_size = client.properties().hardware.max_shared_memory_size;
let shared_size = (max_shared_size + 1).div_ceil(size_of::<u32>());
let requested_bytes = shared_size * size_of::<u32>();
let handle = client.create_from_slice(u32::as_bytes(&[0]));
kernel_resource_errors::launch(
&client,
CubeCount::Static(1, 1, 1),
CubeDim::new_1d(1),
unsafe { ArrayArg::from_raw_parts(handle.clone(), 1) },
shared_size,
);
let result = client.flush();
if let Err(ServerError::ServerUnhealthy { mut errors, .. }) = result {
let error = errors.remove(0);
match error {
ServerError::Launch(LaunchError::TooManyResources(inner)) => match inner {
ResourceLimitError::SharedMemory { requested, max, .. } => {
assert_eq!(
requested_bytes, requested,
"Requested should be equal to requested size"
);
assert_eq!(
max_shared_size, max,
"Max should be equal to max shared size"
);
}
other => panic!("Should be shared memory resource error, is {other:?}"),
},
other => panic!("Should be resource error, is {other:?}"),
}
}
}
pub fn test_cube_dim_error<R: Runtime>(client: ComputeClient<R>) {
let max_cube_dim = client.properties().hardware.max_cube_dim;
let max_units = client.properties().hardware.max_units_per_cube;
if max_cube_dim.2 == u32::MAX {
return;
}
let handle = client.create_from_slice(u32::as_bytes(&[0]));
kernel_resource_errors::launch(
&client,
CubeCount::Static(1, 1, 1),
CubeDim::new_3d(1, 1, max_cube_dim.2 + 1),
unsafe { ArrayArg::from_raw_parts(handle.clone(), 1) },
1,
);
let result = client.flush();
if let Err(ServerError::ServerUnhealthy { mut errors, .. }) = result {
let error = errors.remove(0);
match error {
ServerError::Launch(LaunchError::TooManyResources(inner)) => match inner {
ResourceLimitError::CubeDim { requested, max, .. } => {
assert_eq!((1, 1, max_cube_dim.2 + 1), requested);
assert_eq!(max_cube_dim, max);
}
ResourceLimitError::Units { requested, max, .. } if max_cube_dim.2 >= max_units => {
assert_eq!(max_cube_dim.2 + 1, requested);
assert_eq!(max_units, max);
}
other => panic!("Should be shared memory resource error, is {other:?}"),
},
other => panic!("Should be resource error, is {other:?}"),
}
}
}
pub fn test_max_units_error<R: Runtime>(client: ComputeClient<R>) {
let max_cube_dim = client.properties().hardware.max_cube_dim;
if max_cube_dim.2 == u32::MAX {
return;
}
let max_units = client.properties().hardware.max_units_per_cube;
let cube_dim: CubeDim = max_cube_dim.into();
let requested_units = cube_dim.num_elems();
let handle = client.create_from_slice(u32::as_bytes(&[0]));
kernel_resource_errors::launch(
&client,
CubeCount::Static(1, 1, 1),
cube_dim,
unsafe { ArrayArg::from_raw_parts(handle.clone(), 1) },
1,
);
let result = client.flush();
if let Err(ServerError::ServerUnhealthy { mut errors, .. }) = result {
let error = errors.remove(0);
match error {
ServerError::Launch(LaunchError::TooManyResources(inner)) => match inner {
ResourceLimitError::Units { requested, max, .. } => {
assert_eq!(requested_units, requested);
assert_eq!(max_units, max);
}
other => panic!("Should be shared memory resource error, is {other:?}"),
},
other => panic!("Should be resource error, is {other:?}"),
}
}
}
pub fn test_kernel_dynamic_addressing<R: Runtime>(
client: ComputeClient<R>,
address_type: AddressType,
) {
let handle = client.create_from_slice(f32::as_bytes(&[0.0, 1.0]));
if !client.properties().supports_address(address_type) {
println!("Skipping dynamic addressing kernel, no type support");
return;
}
kernel_dynamic_addressing::launch(
&client,
CubeCount::Static(1, 1, 1),
CubeDim::new_1d(1),
address_type,
unsafe { ArrayArg::from_raw_parts(handle.clone(), 2) },
);
let actual = client.read_one_unchecked(handle);
let actual = f32::from_bytes(&actual);
assert_eq!(actual[0], 5.0);
}
#[allow(missing_docs)]
#[macro_export]
macro_rules! testgen_launch {
() => {
use super::*;
#[$crate::runtime_tests::test_log::test]
fn test_launch_with_generics() {
let client = TestRuntime::client(&Default::default());
cubecl_core::runtime_tests::launch::test_kernel_with_generics::<TestRuntime, FloatType>(
client,
);
}
#[$crate::runtime_tests::test_log::test]
fn test_launch_without_generics() {
let client = TestRuntime::client(&Default::default());
cubecl_core::runtime_tests::launch::test_kernel_without_generics::<TestRuntime>(client);
}
#[$crate::runtime_tests::test_log::test]
fn test_launch_with_comptime_tag() {
let client = TestRuntime::client(&Default::default());
cubecl_core::runtime_tests::launch::test_kernel_with_comptime_tag::<TestRuntime>(
client,
);
}
#[ignore = "Seemingly flaky with CPU emulation"]
#[$crate::runtime_tests::test_log::test]
fn test_launch_with_max_shared() {
let client = TestRuntime::client(&Default::default());
cubecl_core::runtime_tests::launch::test_kernel_max_shared::<TestRuntime>(client);
}
};
}
#[macro_export]
macro_rules! testgen_launch_untyped {
() => {
#[test]
fn test_launch_dynamic_addressing_32() {
let client = TestRuntime::client(&Default::default());
cubecl_core::runtime_tests::launch::test_kernel_dynamic_addressing::<TestRuntime>(
client.clone(),
AddressType::U32,
);
}
#[test]
fn test_launch_dynamic_addressing_64() {
let client = TestRuntime::client(&Default::default());
cubecl_core::runtime_tests::launch::test_kernel_dynamic_addressing::<TestRuntime>(
client,
AddressType::U64,
);
}
#[test]
#[ignore = "Broken by channel refactor"]
fn test_launch_shared_memory_error() {
let client = TestRuntime::client(&Default::default());
cubecl_core::runtime_tests::launch::test_shared_memory_error::<TestRuntime>(client);
}
#[test]
#[ignore = "Broken by channel refactor"]
fn test_launch_cube_dim_error() {
let client = TestRuntime::client(&Default::default());
cubecl_core::runtime_tests::launch::test_cube_dim_error::<TestRuntime>(client);
}
#[test]
#[ignore = "Broken by channel refactor"]
fn test_launch_units_error() {
let client = TestRuntime::client(&Default::default());
cubecl_core::runtime_tests::launch::test_max_units_error::<TestRuntime>(client);
}
};
}