use alloc::vec::Vec;
use core::ffi::c_void;
use core::sync::atomic::{AtomicBool, Ordering};
use spin::Mutex;
use crate::compress::gpu_compress::GpuComputeProvider;
pub type CuResult = i32;
pub type CuDevice = i32;
pub type CuContext = *mut c_void;
pub type CuStream = *mut c_void;
pub type CuDevicePtr = u64;
pub type CuModule = *mut c_void;
pub type CuFunction = *mut c_void;
pub const CUDA_SUCCESS: CuResult = 0;
pub const CUDA_ERROR_NOT_INITIALIZED: CuResult = 3;
pub const CUDA_ERROR_DEINITIALIZED: CuResult = 4;
pub const CUDA_ERROR_NO_DEVICE: CuResult = 100;
pub const CUDA_ERROR_INVALID_DEVICE: CuResult = 101;
pub const CUDA_ERROR_INVALID_CONTEXT: CuResult = 201;
pub const CUDA_ERROR_CONTEXT_ALREADY_CURRENT: CuResult = 202;
pub const CUDA_ERROR_OUT_OF_MEMORY: CuResult = 2;
pub const CUDA_ERROR_INVALID_VALUE: CuResult = 1;
pub const CUDA_ERROR_INVALID_HANDLE: CuResult = 400;
pub const CUDA_ERROR_NOT_READY: CuResult = 600;
pub const CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT: i32 = 16;
pub const CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH: i32 = 13;
pub const CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE: i32 = 36;
pub const CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR: i32 = 75;
pub const CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR: i32 = 76;
pub const CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK: i32 = 8;
pub const CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY: i32 = 9;
pub const CU_CTX_SCHED_AUTO: u32 = 0x00;
pub const CU_CTX_SCHED_SPIN: u32 = 0x01;
pub const CU_CTX_SCHED_YIELD: u32 = 0x02;
pub const CU_CTX_SCHED_BLOCKING_SYNC: u32 = 0x04;
pub const CU_CTX_MAP_HOST: u32 = 0x08;
pub const CU_MEMORYTYPE_HOST: u32 = 0x01;
pub const CU_MEMORYTYPE_DEVICE: u32 = 0x02;
pub const CU_MEMORYTYPE_ARRAY: u32 = 0x03;
pub const CU_MEMORYTYPE_UNIFIED: u32 = 0x04;
unsafe extern "C" {
pub fn cuInit(flags: u32) -> CuResult;
pub fn cuDeviceGetCount(count: *mut i32) -> CuResult;
pub fn cuDeviceGet(device: *mut CuDevice, ordinal: i32) -> CuResult;
pub fn cuDeviceGetName(name: *mut u8, len: i32, device: CuDevice) -> CuResult;
pub fn cuDeviceTotalMem_v2(bytes: *mut usize, device: CuDevice) -> CuResult;
pub fn cuDeviceGetAttribute(value: *mut i32, attrib: i32, device: CuDevice) -> CuResult;
pub fn cuCtxCreate_v2(ctx: *mut CuContext, flags: u32, device: CuDevice) -> CuResult;
pub fn cuCtxDestroy_v2(ctx: CuContext) -> CuResult;
pub fn cuCtxPushCurrent_v2(ctx: CuContext) -> CuResult;
pub fn cuCtxPopCurrent_v2(ctx: *mut CuContext) -> CuResult;
pub fn cuCtxSetCurrent(ctx: CuContext) -> CuResult;
pub fn cuCtxSynchronize() -> CuResult;
pub fn cuMemAlloc_v2(dptr: *mut CuDevicePtr, bytesize: usize) -> CuResult;
pub fn cuMemFree_v2(dptr: CuDevicePtr) -> CuResult;
pub fn cuMemcpyHtoD_v2(dst: CuDevicePtr, src: *const c_void, bytecount: usize) -> CuResult;
pub fn cuMemcpyDtoH_v2(dst: *mut c_void, src: CuDevicePtr, bytecount: usize) -> CuResult;
pub fn cuStreamCreate(stream: *mut CuStream, flags: u32) -> CuResult;
pub fn cuStreamDestroy_v2(stream: CuStream) -> CuResult;
pub fn cuStreamSynchronize(stream: CuStream) -> CuResult;
pub fn cuModuleLoad(module: *mut CuModule, fname: *const u8) -> CuResult;
pub fn cuModuleLoadData(module: *mut CuModule, image: *const c_void) -> CuResult;
pub fn cuModuleUnload(module: CuModule) -> CuResult;
pub fn cuModuleGetFunction(
hfunc: *mut CuFunction,
module: CuModule,
name: *const u8,
) -> CuResult;
pub fn cuLaunchKernel(
f: CuFunction,
grid_dim_x: u32,
grid_dim_y: u32,
grid_dim_z: u32,
block_dim_x: u32,
block_dim_y: u32,
block_dim_z: u32,
shared_mem_bytes: u32,
stream: CuStream,
kernel_params: *mut *mut c_void,
extra: *mut *mut c_void,
) -> CuResult;
}
pub type NvcompStatus = i32;
pub const NVCOMP_SUCCESS: NvcompStatus = 0;
pub const NVCOMP_ERROR_INVALID_VALUE: NvcompStatus = 1;
pub const NVCOMP_ERROR_NOT_SUPPORTED: NvcompStatus = 2;
pub const NVCOMP_ERROR_CUDA_ERROR: NvcompStatus = 3;
pub const NVCOMP_ERROR_INTERNAL: NvcompStatus = 4;
pub type NvcompType = i32;
pub const NVCOMP_TYPE_LZ4: NvcompType = 0;
pub const NVCOMP_TYPE_ZSTD: NvcompType = 5;
#[repr(C)]
#[derive(Debug, Clone, Copy)]
pub struct NvcompBatchedLz4Opts {
pub data_type: i32,
pub chunk_size: usize,
}
impl Default for NvcompBatchedLz4Opts {
fn default() -> Self {
Self {
data_type: 0,
chunk_size: 65536, }
}
}
#[repr(C)]
#[derive(Debug, Clone, Copy, Default)]
pub struct NvcompBatchedZstdOpts {
pub reserved: i32,
}
unsafe extern "C" {
pub fn nvcompBatchedLZ4CompressGetTempSize(
batch_size: usize,
max_uncompressed_chunk_bytes: usize,
format_opts: NvcompBatchedLz4Opts,
temp_bytes: *mut usize,
) -> NvcompStatus;
pub fn nvcompBatchedLZ4CompressGetMaxOutputChunkSize(
max_uncompressed_chunk_bytes: usize,
format_opts: NvcompBatchedLz4Opts,
max_compressed_bytes: *mut usize,
) -> NvcompStatus;
pub fn nvcompBatchedLZ4CompressAsync(
device_uncompressed_ptrs: *const CuDevicePtr,
device_uncompressed_bytes: *const usize,
max_uncompressed_chunk_bytes: usize,
batch_size: usize,
device_temp_ptr: CuDevicePtr,
temp_bytes: usize,
device_compressed_ptrs: *const CuDevicePtr,
device_compressed_bytes: *mut usize,
format_opts: NvcompBatchedLz4Opts,
stream: CuStream,
) -> NvcompStatus;
pub fn nvcompBatchedZstdCompressGetTempSize(
batch_size: usize,
max_uncompressed_chunk_bytes: usize,
format_opts: NvcompBatchedZstdOpts,
temp_bytes: *mut usize,
) -> NvcompStatus;
pub fn nvcompBatchedZstdCompressGetMaxOutputChunkSize(
max_uncompressed_chunk_bytes: usize,
format_opts: NvcompBatchedZstdOpts,
max_compressed_bytes: *mut usize,
) -> NvcompStatus;
pub fn nvcompBatchedZstdCompressAsync(
device_uncompressed_ptrs: *const CuDevicePtr,
device_uncompressed_bytes: *const usize,
max_uncompressed_chunk_bytes: usize,
batch_size: usize,
device_temp_ptr: CuDevicePtr,
temp_bytes: usize,
device_compressed_ptrs: *const CuDevicePtr,
device_compressed_bytes: *mut usize,
format_opts: NvcompBatchedZstdOpts,
stream: CuStream,
) -> NvcompStatus;
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CudaError {
NotInitialized,
Deinitialized,
NoDevice,
InvalidDevice,
InvalidContext,
OutOfMemory,
InvalidValue,
InvalidHandle,
NotReady,
Unknown(CuResult),
}
impl CudaError {
pub fn from_result(result: CuResult) -> Option<Self> {
if result == CUDA_SUCCESS {
return None;
}
Some(match result {
CUDA_ERROR_NOT_INITIALIZED => CudaError::NotInitialized,
CUDA_ERROR_DEINITIALIZED => CudaError::Deinitialized,
CUDA_ERROR_NO_DEVICE => CudaError::NoDevice,
CUDA_ERROR_INVALID_DEVICE => CudaError::InvalidDevice,
CUDA_ERROR_INVALID_CONTEXT => CudaError::InvalidContext,
CUDA_ERROR_OUT_OF_MEMORY => CudaError::OutOfMemory,
CUDA_ERROR_INVALID_VALUE => CudaError::InvalidValue,
CUDA_ERROR_INVALID_HANDLE => CudaError::InvalidHandle,
CUDA_ERROR_NOT_READY => CudaError::NotReady,
code => CudaError::Unknown(code),
})
}
pub fn description(&self) -> &'static str {
match self {
CudaError::NotInitialized => "CUDA driver not initialized",
CudaError::Deinitialized => "CUDA driver deinitialized",
CudaError::NoDevice => "No CUDA-capable device found",
CudaError::InvalidDevice => "Invalid device ordinal",
CudaError::InvalidContext => "Invalid CUDA context",
CudaError::OutOfMemory => "Out of device memory",
CudaError::InvalidValue => "Invalid value or parameter",
CudaError::InvalidHandle => "Invalid handle",
CudaError::NotReady => "Operation not ready",
CudaError::Unknown(_) => "Unknown CUDA error",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum NvcompError {
InvalidValue,
NotSupported,
CudaError,
Internal,
Unknown(NvcompStatus),
}
impl NvcompError {
pub fn from_status(status: NvcompStatus) -> Option<Self> {
if status == NVCOMP_SUCCESS {
return None;
}
Some(match status {
NVCOMP_ERROR_INVALID_VALUE => NvcompError::InvalidValue,
NVCOMP_ERROR_NOT_SUPPORTED => NvcompError::NotSupported,
NVCOMP_ERROR_CUDA_ERROR => NvcompError::CudaError,
NVCOMP_ERROR_INTERNAL => NvcompError::Internal,
code => NvcompError::Unknown(code),
})
}
pub fn description(&self) -> &'static str {
match self {
NvcompError::InvalidValue => "Invalid parameter",
NvcompError::NotSupported => "Operation not supported",
NvcompError::CudaError => "CUDA error occurred",
NvcompError::Internal => "Internal nvCOMP error",
NvcompError::Unknown(_) => "Unknown nvCOMP error",
}
}
}
#[derive(Clone, Copy)]
struct CudaContextHandle(CuContext);
unsafe impl Send for CudaContextHandle {}
unsafe impl Sync for CudaContextHandle {}
impl CudaContextHandle {
const unsafe fn from_raw(ctx: CuContext) -> Self {
Self(ctx)
}
fn as_raw(self) -> CuContext {
self.0
}
fn is_null(self) -> bool {
self.0.is_null()
}
}
#[derive(Clone, Copy)]
struct CudaStreamHandle(CuStream);
unsafe impl Send for CudaStreamHandle {}
unsafe impl Sync for CudaStreamHandle {}
impl CudaStreamHandle {
const unsafe fn from_raw(stream: CuStream) -> Self {
Self(stream)
}
fn as_raw(self) -> CuStream {
self.0
}
fn is_null(self) -> bool {
self.0.is_null()
}
}
#[derive(Debug, Clone)]
pub struct CudaDeviceInfo {
pub ordinal: i32,
pub name: [u8; 256],
pub total_memory: usize,
pub sm_count: i32,
pub compute_major: i32,
pub compute_minor: i32,
pub memory_clock_khz: i32,
pub memory_bus_width: i32,
}
impl CudaDeviceInfo {
pub fn name_str(&self) -> &str {
let len = self
.name
.iter()
.position(|&b| b == 0)
.unwrap_or(self.name.len());
core::str::from_utf8(&self.name[..len]).unwrap_or("Unknown")
}
pub fn memory_bandwidth_gbps(&self) -> f32 {
let clock_hz = self.memory_clock_khz as f64 * 1000.0;
let bus_bytes = self.memory_bus_width as f64 / 8.0;
(2.0 * clock_hz * bus_bytes / 1e9) as f32
}
}
struct CudaProviderState {
context: CudaContextHandle,
stream: CudaStreamHandle,
device_info: CudaDeviceInfo,
}
pub struct CudaComputeProvider {
state: Mutex<CudaProviderState>,
initialized: AtomicBool,
}
static CUDA_INITIALIZED: AtomicBool = AtomicBool::new(false);
impl CudaComputeProvider {
pub fn new(device_ordinal: i32) -> Result<Self, CudaError> {
if !CUDA_INITIALIZED.swap(true, Ordering::SeqCst) {
let result = unsafe { cuInit(0) };
if let Some(err) = CudaError::from_result(result) {
CUDA_INITIALIZED.store(false, Ordering::SeqCst);
return Err(err);
}
}
let mut device_count: i32 = 0;
let result = unsafe { cuDeviceGetCount(&mut device_count) };
if let Some(err) = CudaError::from_result(result) {
return Err(err);
}
if device_count == 0 {
return Err(CudaError::NoDevice);
}
if device_ordinal >= device_count {
return Err(CudaError::InvalidDevice);
}
let mut device: CuDevice = 0;
let result = unsafe { cuDeviceGet(&mut device, device_ordinal) };
if let Some(err) = CudaError::from_result(result) {
return Err(err);
}
let device_info = Self::query_device_info(device, device_ordinal)?;
let mut context: CuContext = core::ptr::null_mut();
let result = unsafe { cuCtxCreate_v2(&mut context, CU_CTX_SCHED_AUTO, device) };
if let Some(err) = CudaError::from_result(result) {
return Err(err);
}
let mut stream: CuStream = core::ptr::null_mut();
let result = unsafe { cuStreamCreate(&mut stream, 0) };
if let Some(err) = CudaError::from_result(result) {
unsafe { cuCtxDestroy_v2(context) };
return Err(err);
}
let context_handle = unsafe { CudaContextHandle::from_raw(context) };
let stream_handle = unsafe { CudaStreamHandle::from_raw(stream) };
Ok(Self {
state: Mutex::new(CudaProviderState {
context: context_handle,
stream: stream_handle,
device_info,
}),
initialized: AtomicBool::new(true),
})
}
fn query_device_info(device: CuDevice, ordinal: i32) -> Result<CudaDeviceInfo, CudaError> {
let mut info = CudaDeviceInfo {
ordinal,
name: [0u8; 256],
total_memory: 0,
sm_count: 0,
compute_major: 0,
compute_minor: 0,
memory_clock_khz: 0,
memory_bus_width: 0,
};
let result = unsafe { cuDeviceGetName(info.name.as_mut_ptr(), 256, device) };
if let Some(err) = CudaError::from_result(result) {
return Err(err);
}
let result = unsafe { cuDeviceTotalMem_v2(&mut info.total_memory, device) };
if let Some(err) = CudaError::from_result(result) {
return Err(err);
}
let result = unsafe {
cuDeviceGetAttribute(
&mut info.sm_count,
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
device,
)
};
if let Some(err) = CudaError::from_result(result) {
return Err(err);
}
let result = unsafe {
cuDeviceGetAttribute(
&mut info.compute_major,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
device,
)
};
if let Some(err) = CudaError::from_result(result) {
return Err(err);
}
let result = unsafe {
cuDeviceGetAttribute(
&mut info.compute_minor,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
device,
)
};
if let Some(err) = CudaError::from_result(result) {
return Err(err);
}
let result = unsafe {
cuDeviceGetAttribute(
&mut info.memory_clock_khz,
CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE,
device,
)
};
if let Some(err) = CudaError::from_result(result) {
return Err(err);
}
let result = unsafe {
cuDeviceGetAttribute(
&mut info.memory_bus_width,
CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH,
device,
)
};
if let Some(err) = CudaError::from_result(result) {
return Err(err);
}
Ok(info)
}
pub fn device_info(&self) -> CudaDeviceInfo {
let state = self.state.lock();
state.device_info.clone()
}
pub fn is_initialized(&self) -> bool {
self.initialized.load(Ordering::SeqCst)
}
fn ensure_context_current(state: &CudaProviderState) -> bool {
if state.context.is_null() {
return false;
}
let result = unsafe { cuCtxSetCurrent(state.context.as_raw()) };
CudaError::from_result(result).is_none()
}
}
impl Drop for CudaComputeProvider {
fn drop(&mut self) {
let state = self.state.lock();
if !state.stream.is_null() {
unsafe { cuStreamDestroy_v2(state.stream.as_raw()) };
}
if !state.context.is_null() {
unsafe { cuCtxDestroy_v2(state.context.as_raw()) };
}
self.initialized.store(false, Ordering::SeqCst);
}
}
impl GpuComputeProvider for CudaComputeProvider {
fn allocate(&self, size: usize) -> Option<u64> {
if size == 0 {
return None;
}
let state = self.state.lock();
if !Self::ensure_context_current(&state) {
return None;
}
let mut device_ptr: CuDevicePtr = 0;
let result = unsafe { cuMemAlloc_v2(&mut device_ptr, size) };
if CudaError::from_result(result).is_some() {
return None;
}
Some(device_ptr)
}
fn free(&self, device_addr: u64) {
if device_addr == 0 {
return;
}
let state = self.state.lock();
if !Self::ensure_context_current(&state) {
return;
}
unsafe { cuMemFree_v2(device_addr) };
}
fn copy_to_device(&self, device_addr: u64, data: &[u8]) -> bool {
if device_addr == 0 || data.is_empty() {
return false;
}
let state = self.state.lock();
if !Self::ensure_context_current(&state) {
return false;
}
let result =
unsafe { cuMemcpyHtoD_v2(device_addr, data.as_ptr() as *const c_void, data.len()) };
CudaError::from_result(result).is_none()
}
fn copy_from_device(&self, device_addr: u64, data: &mut [u8]) -> bool {
if device_addr == 0 || data.is_empty() {
return false;
}
let state = self.state.lock();
if !Self::ensure_context_current(&state) {
return false;
}
let result =
unsafe { cuMemcpyDtoH_v2(data.as_mut_ptr() as *mut c_void, device_addr, data.len()) };
CudaError::from_result(result).is_none()
}
fn compress_lz4(
&self,
input_addr: u64,
input_size: usize,
output_addr: u64,
output_size: usize,
) -> usize {
if input_addr == 0 || output_addr == 0 || input_size == 0 || output_size == 0 {
return 0;
}
let state = self.state.lock();
if !Self::ensure_context_current(&state) {
return 0;
}
let opts = NvcompBatchedLz4Opts::default();
let mut temp_bytes: usize = 0;
let status =
unsafe { nvcompBatchedLZ4CompressGetTempSize(1, input_size, opts, &mut temp_bytes) };
if NvcompError::from_status(status).is_some() {
return 0;
}
let mut temp_ptr: CuDevicePtr = 0;
if temp_bytes > 0 {
let result = unsafe { cuMemAlloc_v2(&mut temp_ptr, temp_bytes) };
if CudaError::from_result(result).is_some() {
return 0;
}
}
let input_ptrs = [input_addr];
let input_sizes = [input_size];
let output_ptrs = [output_addr];
let mut output_sizes = [0usize];
let status = unsafe {
nvcompBatchedLZ4CompressAsync(
input_ptrs.as_ptr(),
input_sizes.as_ptr(),
input_size,
1, temp_ptr,
temp_bytes,
output_ptrs.as_ptr(),
output_sizes.as_mut_ptr(),
opts,
state.stream.as_raw(),
)
};
unsafe { cuStreamSynchronize(state.stream.as_raw()) };
if temp_ptr != 0 {
unsafe { cuMemFree_v2(temp_ptr) };
}
if NvcompError::from_status(status).is_some() {
return 0;
}
output_sizes[0].min(output_size)
}
fn compress_zstd(
&self,
input_addr: u64,
input_size: usize,
output_addr: u64,
output_size: usize,
_level: u8,
) -> usize {
if input_addr == 0 || output_addr == 0 || input_size == 0 || output_size == 0 {
return 0;
}
let state = self.state.lock();
if !Self::ensure_context_current(&state) {
return 0;
}
let opts = NvcompBatchedZstdOpts::default();
let mut temp_bytes: usize = 0;
let status =
unsafe { nvcompBatchedZstdCompressGetTempSize(1, input_size, opts, &mut temp_bytes) };
if NvcompError::from_status(status).is_some() {
return 0;
}
let mut temp_ptr: CuDevicePtr = 0;
if temp_bytes > 0 {
let result = unsafe { cuMemAlloc_v2(&mut temp_ptr, temp_bytes) };
if CudaError::from_result(result).is_some() {
return 0;
}
}
let input_ptrs = [input_addr];
let input_sizes = [input_size];
let output_ptrs = [output_addr];
let mut output_sizes = [0usize];
let status = unsafe {
nvcompBatchedZstdCompressAsync(
input_ptrs.as_ptr(),
input_sizes.as_ptr(),
input_size,
1, temp_ptr,
temp_bytes,
output_ptrs.as_ptr(),
output_sizes.as_mut_ptr(),
opts,
state.stream.as_raw(),
)
};
unsafe { cuStreamSynchronize(state.stream.as_raw()) };
if temp_ptr != 0 {
unsafe { cuMemFree_v2(temp_ptr) };
}
if NvcompError::from_status(status).is_some() {
return 0;
}
output_sizes[0].min(output_size)
}
fn synchronize(&self) {
let state = self.state.lock();
if !Self::ensure_context_current(&state) {
return;
}
unsafe { cuStreamSynchronize(state.stream.as_raw()) };
}
}
pub fn get_device_count() -> Result<i32, CudaError> {
if !CUDA_INITIALIZED.swap(true, Ordering::SeqCst) {
let result = unsafe { cuInit(0) };
if let Some(err) = CudaError::from_result(result) {
CUDA_INITIALIZED.store(false, Ordering::SeqCst);
return Err(err);
}
}
let mut count: i32 = 0;
let result = unsafe { cuDeviceGetCount(&mut count) };
if let Some(err) = CudaError::from_result(result) {
return Err(err);
}
Ok(count)
}
pub fn get_device_info(ordinal: i32) -> Result<CudaDeviceInfo, CudaError> {
if !CUDA_INITIALIZED.swap(true, Ordering::SeqCst) {
let result = unsafe { cuInit(0) };
if let Some(err) = CudaError::from_result(result) {
CUDA_INITIALIZED.store(false, Ordering::SeqCst);
return Err(err);
}
}
let mut device: CuDevice = 0;
let result = unsafe { cuDeviceGet(&mut device, ordinal) };
if let Some(err) = CudaError::from_result(result) {
return Err(err);
}
CudaComputeProvider::query_device_info(device, ordinal)
}
pub fn list_devices() -> Vec<CudaDeviceInfo> {
let count = match get_device_count() {
Ok(c) => c,
Err(_) => return Vec::new(),
};
let mut devices = Vec::with_capacity(count as usize);
for i in 0..count {
if let Ok(info) = get_device_info(i) {
devices.push(info);
}
}
devices
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_cuda_error_codes() {
assert!(CudaError::from_result(CUDA_SUCCESS).is_none());
assert_eq!(
CudaError::from_result(CUDA_ERROR_NOT_INITIALIZED),
Some(CudaError::NotInitialized)
);
assert_eq!(
CudaError::from_result(CUDA_ERROR_OUT_OF_MEMORY),
Some(CudaError::OutOfMemory)
);
assert_eq!(
CudaError::from_result(CUDA_ERROR_NO_DEVICE),
Some(CudaError::NoDevice)
);
}
#[test]
fn test_cuda_error_descriptions() {
assert_eq!(
CudaError::NotInitialized.description(),
"CUDA driver not initialized"
);
assert_eq!(CudaError::OutOfMemory.description(), "Out of device memory");
assert_eq!(
CudaError::NoDevice.description(),
"No CUDA-capable device found"
);
}
#[test]
fn test_nvcomp_error_codes() {
assert!(NvcompError::from_status(NVCOMP_SUCCESS).is_none());
assert_eq!(
NvcompError::from_status(NVCOMP_ERROR_INVALID_VALUE),
Some(NvcompError::InvalidValue)
);
assert_eq!(
NvcompError::from_status(NVCOMP_ERROR_NOT_SUPPORTED),
Some(NvcompError::NotSupported)
);
}
#[test]
fn test_nvcomp_error_descriptions() {
assert_eq!(NvcompError::InvalidValue.description(), "Invalid parameter");
assert_eq!(
NvcompError::NotSupported.description(),
"Operation not supported"
);
assert_eq!(NvcompError::CudaError.description(), "CUDA error occurred");
}
#[test]
fn test_lz4_opts_default() {
let opts = NvcompBatchedLz4Opts::default();
assert_eq!(opts.data_type, 0);
assert_eq!(opts.chunk_size, 65536);
}
#[test]
fn test_zstd_opts_default() {
let opts = NvcompBatchedZstdOpts::default();
assert_eq!(opts.reserved, 0);
}
#[test]
fn test_context_handle_null_check() {
let handle = unsafe { CudaContextHandle::from_raw(core::ptr::null_mut()) };
assert!(handle.is_null());
}
#[test]
fn test_stream_handle_null_check() {
let handle = unsafe { CudaStreamHandle::from_raw(core::ptr::null_mut()) };
assert!(handle.is_null());
}
#[test]
fn test_device_info_bandwidth_calculation() {
let info = CudaDeviceInfo {
ordinal: 0,
name: [0u8; 256],
total_memory: 24 * 1024 * 1024 * 1024, sm_count: 128,
compute_major: 8,
compute_minor: 9,
memory_clock_khz: 10501000, memory_bus_width: 384, };
let bandwidth = info.memory_bandwidth_gbps();
assert!(bandwidth > 900.0 && bandwidth < 1100.0);
}
#[test]
fn test_device_info_name_str() {
let mut info = CudaDeviceInfo {
ordinal: 0,
name: [0u8; 256],
total_memory: 0,
sm_count: 0,
compute_major: 0,
compute_minor: 0,
memory_clock_khz: 0,
memory_bus_width: 0,
};
let name = b"Test GPU";
info.name[..name.len()].copy_from_slice(name);
assert_eq!(info.name_str(), "Test GPU");
}
}