Struct Device

Source

pub struct Device { /* private fields */ }

Expand description

Represents a CUDA-capable GPU device.

Wraps a CUdevice handle obtained from the driver API. Devices are identified by a zero-based ordinal index. The handle is a lightweight integer that can be freely copied.

§Examples

use oxicuda_driver::device::Device;

oxicuda_driver::init()?;
let device = Device::get(0)?;
println!("GPU: {}", device.name()?);
println!("Memory: {} MB", device.total_memory()? / (1024 * 1024));
let (major, minor) = device.compute_capability()?;
println!("Compute: {major}.{minor}");

Implementations§

Source §

impl Device

Source

pub fn get(ordinal: i32) -> CudaResult<Self>

Get a device handle by ordinal (0-indexed).

§Errors

Returns CudaError::InvalidDevice if the ordinal is out of range, or CudaError::NotInitialized if the driver has not been loaded.

Source

pub fn count() -> CudaResult<i32>

Get the number of CUDA-capable devices in the system.

§Errors

Returns an error if the driver cannot enumerate devices.

Source

pub fn name(&self) -> CudaResult<String>

Get the device name (e.g., "NVIDIA A100-SXM4-80GB").

The returned string is an ASCII identifier provided by the driver.

§Errors

Returns an error if the driver call fails.

Source

pub fn total_memory(&self) -> CudaResult<usize>

Get total device memory in bytes.

§Errors

Returns an error if the driver call fails.

Source

pub fn attribute(&self, attr: CUdevice_attribute) -> CudaResult<i32>

Query an arbitrary device attribute.

This is the low-level building block for all the convenience methods below. Callers can use any CUdevice_attribute variant directly.

§Errors

Returns an error if the attribute is not supported or the driver call fails.

Source

pub fn compute_capability(&self) -> CudaResult<(i32, i32)>

Get compute capability as (major, minor).

For example, an A100 returns (8, 0) and an RTX 4090 returns (8, 9).

§Errors

Returns an error if the driver call fails.

Source

pub fn max_threads_per_block(&self) -> CudaResult<i32>

Get the maximum number of threads per block.

Source

pub fn max_block_dim(&self) -> CudaResult<(i32, i32, i32)>

Get the maximum block dimensions as (x, y, z).

Source

pub fn max_grid_dim(&self) -> CudaResult<(i32, i32, i32)>

Get the maximum grid dimensions as (x, y, z).

Source

pub fn max_threads_per_multiprocessor(&self) -> CudaResult<i32>

Get the maximum number of threads per multiprocessor.

Source

pub fn max_blocks_per_multiprocessor(&self) -> CudaResult<i32>

Get the maximum number of blocks per multiprocessor.

Source

pub fn multiprocessor_count(&self) -> CudaResult<i32>

Get the number of streaming multiprocessors (SMs) on the device.

Source

pub fn warp_size(&self) -> CudaResult<i32>

Get the warp size in threads (typically 32 for all NVIDIA GPUs).

Source

pub fn max_shared_memory_per_block(&self) -> CudaResult<i32>

Get the maximum shared memory per block in bytes.

Source

pub fn max_shared_memory_per_multiprocessor(&self) -> CudaResult<i32>

Get the maximum shared memory per multiprocessor in bytes.

Source

pub fn max_shared_memory_per_block_optin(&self) -> CudaResult<i32>

Get the maximum opt-in shared memory per block in bytes.

This is the upper bound achievable via cuFuncSetAttribute(CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES).

Source

pub fn max_registers_per_block(&self) -> CudaResult<i32>

Get the maximum number of 32-bit registers per block.

Source

pub fn max_registers_per_multiprocessor(&self) -> CudaResult<i32>

Get the maximum number of 32-bit registers per multiprocessor.

Source

pub fn l2_cache_size(&self) -> CudaResult<i32>

Get the L2 cache size in bytes.

Source

pub fn total_constant_memory(&self) -> CudaResult<i32>

Get the total constant memory on the device in bytes.

Source

pub fn clock_rate_khz(&self) -> CudaResult<i32>

Get the core clock rate in kHz.

Source

pub fn memory_clock_rate_khz(&self) -> CudaResult<i32>

Get the memory clock rate in kHz.

Source

pub fn memory_bus_width(&self) -> CudaResult<i32>

Get the global memory bus width in bits.

Source

pub fn pci_bus_id(&self) -> CudaResult<i32>

Get the PCI bus ID of the device.

Source

pub fn pci_device_id(&self) -> CudaResult<i32>

Get the PCI device ID.

Source

pub fn pci_domain_id(&self) -> CudaResult<i32>

Get the PCI domain ID.

Source

pub fn supports_managed_memory(&self) -> CudaResult<bool>

Check if the device supports managed (unified) memory.

Source

pub fn supports_concurrent_managed_access(&self) -> CudaResult<bool>

Check if the device supports concurrent managed memory access.

Source

pub fn supports_concurrent_kernels(&self) -> CudaResult<bool>

Check if the device supports concurrent kernel execution.

Source

pub fn supports_cooperative_launch(&self) -> CudaResult<bool>

Check if the device supports cooperative kernel launches.

Source

pub fn ecc_enabled(&self) -> CudaResult<bool>

Check if ECC memory is enabled on the device.

Source

pub fn is_integrated(&self) -> CudaResult<bool>

Check if the device is integrated (shares memory with the host).

Source

pub fn can_map_host_memory(&self) -> CudaResult<bool>

Check if the device can map host memory into its address space.

Source

pub fn supports_unified_addressing(&self) -> CudaResult<bool>

Check if the device uses a unified address space with the host.

Source

pub fn supports_stream_priorities(&self) -> CudaResult<bool>

Check if the device supports stream priorities.

Source

pub fn supports_compute_preemption(&self) -> CudaResult<bool>

Check if the device supports compute preemption.

Source

pub fn async_engine_count(&self) -> CudaResult<i32>

Get the number of asynchronous engines (copy engines).

Source

pub fn is_multi_gpu_board(&self) -> CudaResult<bool>

Check if the device is on a multi-GPU board.

Source

pub fn has_kernel_exec_timeout(&self) -> CudaResult<bool>

Check if there is a kernel execution timeout enforced by the OS.

Source

pub fn compute_mode(&self) -> CudaResult<i32>

Get the compute mode (0=default, 1=exclusive-thread, 2=prohibited, 3=exclusive-process).

Source

pub fn tcc_driver(&self) -> CudaResult<bool>

Check if the device uses the TCC (Tesla Compute Cluster) driver model.

TCC mode disables the display driver, giving full GPU resources to compute workloads.

Source

pub fn multi_gpu_board_group_id(&self) -> CudaResult<i32>

Get the multi-GPU board group identifier.

Devices on the same board share the same group ID.

Source

pub fn max_persisting_l2_cache_size(&self) -> CudaResult<i32>

Get the maximum persisting L2 cache size in bytes (Ampere+).

Source

pub fn supports_generic_compression(&self) -> CudaResult<bool>

Check if the device supports generic memory compression.

Source

pub fn supports_pageable_memory_access(&self) -> CudaResult<bool>

Check if the device supports pageable memory access.

Source

pub fn pageable_memory_uses_host_page_tables(&self) -> CudaResult<bool>

Check if pageable memory access uses host page tables.

Source

pub fn supports_direct_managed_mem_from_host(&self) -> CudaResult<bool>

Check if the device supports direct managed memory access from the host.

Source

pub fn memory_pool_supported_handle_types(&self) -> CudaResult<i32>

Get memory pool supported handle types as a bitmask.

Source

pub fn supports_host_native_atomics(&self) -> CudaResult<bool>

Check if the device supports host-visible native atomic operations.

Source

pub fn single_to_double_perf_ratio(&self) -> CudaResult<i32>

Get the ratio of single-precision to double-precision performance.

A higher value means the GPU is relatively faster at FP32 than FP64.

Source

pub fn supports_cooperative_multi_device_launch(&self) -> CudaResult<bool>

Check if the device supports cooperative multi-device kernel launches.

Source

pub fn supports_flush_remote_writes(&self) -> CudaResult<bool>

Check if the device supports flushing outstanding remote writes.

Source

pub fn supports_host_register(&self) -> CudaResult<bool>

Check if the device supports host-side memory register functions.

Source

pub fn can_use_host_pointer_for_registered_mem(&self) -> CudaResult<bool>

Check if the device can use host pointers for registered memory.

Source

pub fn supports_gpu_direct_rdma(&self) -> CudaResult<bool>

Check if the device supports GPU Direct RDMA.

Source

pub fn supports_tensor_map_access(&self) -> CudaResult<bool>

Check if the device supports tensor-map access (Hopper+).

Source

pub fn supports_multicast(&self) -> CudaResult<bool>

Check if the device supports multicast operations.

Source

pub fn mps_enabled(&self) -> CudaResult<bool>

Check if Multi-Process Service (MPS) is enabled on the device.

Source

pub fn max_texture_1d_width(&self) -> CudaResult<i32>

Get the maximum 1D texture width.

Source

pub fn max_texture_2d_dims(&self) -> CudaResult<(i32, i32)>

Get the maximum 2D texture dimensions as (width, height).

Source

pub fn max_texture_3d_dims(&self) -> CudaResult<(i32, i32, i32)>

Get the maximum 3D texture dimensions as (width, height, depth).

Source

pub fn gpu_overlap(&self) -> CudaResult<bool>

Check if the device can copy memory and execute a kernel concurrently.

Source

pub fn max_pitch(&self) -> CudaResult<i32>

Get the maximum pitch for memory copies in bytes.

Source

pub fn texture_alignment(&self) -> CudaResult<i32>

Get the texture alignment requirement in bytes.

Source

pub fn surface_alignment(&self) -> CudaResult<i32>

Get the surface alignment requirement in bytes.

Source

pub fn supports_deferred_mapping(&self) -> CudaResult<bool>

Check if the device supports deferred mapping of CUDA arrays.

Source

pub fn supports_memory_pools(&self) -> CudaResult<bool>

Check if the device supports memory pools (cudaMallocAsync).

Source

pub fn supports_cluster_launch(&self) -> CudaResult<bool>

Check if the device supports cluster launch (Hopper+).

Source

pub fn supports_virtual_memory_management(&self) -> CudaResult<bool>

Check if the device supports virtual memory management APIs.

Source

pub fn supports_handle_type_posix_fd(&self) -> CudaResult<bool>

Check if the device supports POSIX file descriptor handles for IPC.

Source

pub fn supports_handle_type_win32(&self) -> CudaResult<bool>

Check if the device supports Win32 handles for IPC.

Source

pub fn supports_handle_type_win32_kmt(&self) -> CudaResult<bool>

Check if the device supports Win32 KMT handles for IPC.

Source

pub fn supports_gpu_direct_rdma_vmm(&self) -> CudaResult<bool>

Check if the device supports GPU Direct RDMA with CUDA VMM.

Source

pub fn gpu_direct_rdma_flush_writes_options(&self) -> CudaResult<i32>

Get the GPU Direct RDMA flush-writes options bitmask.

Source

pub fn gpu_direct_rdma_writes_ordering(&self) -> CudaResult<i32>

Get the GPU Direct RDMA writes ordering.

Source

pub fn max_access_policy_window_size(&self) -> CudaResult<i32>

Get the maximum access-policy window size for L2 cache.

Source

pub fn reserved_shared_memory_per_block(&self) -> CudaResult<i32>

Get the reserved shared memory per block in bytes.

Source

pub fn supports_timeline_semaphore_interop(&self) -> CudaResult<bool>

Check if timeline semaphore interop is supported.

Source

pub fn supports_mem_sync_domain(&self) -> CudaResult<bool>

Check if memory sync domain operations are supported.

Source

pub fn mem_sync_domain_count(&self) -> CudaResult<i32>

Get the number of memory sync domains.

Source

pub fn supports_gpu_direct_rdma_fabric(&self) -> CudaResult<bool>

Check if GPU-Direct Fabric (RDMA) is supported.

Source

pub fn supports_unified_function_pointers(&self) -> CudaResult<bool>

Check if unified function pointers are supported.

Source

pub fn supports_ipc_events(&self) -> CudaResult<bool>

Check if IPC event handles are supported.

Source

pub fn numa_config(&self) -> CudaResult<i32>

Get the NUMA configuration of the device.

Source

pub fn numa_id(&self) -> CudaResult<i32>

Get the NUMA ID of the device.

Source

pub fn host_numa_id(&self) -> CudaResult<i32>

Get the host NUMA ID of the device.

Source

pub fn texture_pitch_alignment(&self) -> CudaResult<i32>

Get the texture pitch alignment requirement in bytes.

Source

pub fn info(&self) -> CudaResult<DeviceInfo>

Gather comprehensive device information in a single call.

Returns a DeviceInfo with all key properties. Individual attribute query failures are silently replaced with default values (0 / false) so that the call succeeds even on older drivers that lack some attributes.

§Errors

Returns an error only if the device name or total memory cannot be queried (fundamental properties).

Source

pub fn raw(&self) -> CUdevice

Get the raw CUdevice handle for use with FFI calls.

Source

pub fn ordinal(&self) -> i32

Get the device ordinal that was used to obtain this handle.

Source §

impl Device

Source

pub fn occupancy_info(&self) -> CudaResult<DeviceOccupancyInfo>

Gather all occupancy-relevant hardware attributes into a DeviceOccupancyInfo struct.

On macOS (where no NVIDIA driver is available) this returns synthetic values for a typical SM 8.6 (Ampere) GPU so that CPU-side occupancy analysis can still run.