cuda_oxide/
device.rs

1use crate::*;
2use num_enum::TryFromPrimitive;
3
4/// A reference to a CUDA-enabled device
5pub struct Device {
6    pub(crate) handle: i32,
7}
8
9/// Type of native array format
10#[derive(Clone, Copy, Debug, TryFromPrimitive)]
11#[repr(u32)]
12pub enum CudaArrayFormat {
13    UnsignedInt8 = 0x01,
14    UnsignedInt16 = 0x02,
15    UnsignedInt32 = 0x03,
16    SignedInt8 = 0x08,
17    SignedInt16 = 0x09,
18    SignedInt32 = 0x0a,
19    Half = 0x10,
20    Float = 0x20,
21    Nv12 = 0xb0,
22}
23
24impl Device {
25    /// Fetches a human-readable name from the device
26    pub fn name(&self) -> CudaResult<String> {
27        let mut buf = [0u8; 256];
28        cuda_error(unsafe { sys::cuDeviceGetName(buf.as_mut_ptr() as *mut i8, 256, self.handle) })?;
29        Ok(
30            String::from_utf8_lossy(&buf[..buf.iter().position(|x| *x == 0).unwrap_or(0)])
31                .into_owned(),
32        )
33    }
34
35    /// Gets a UUID from the device
36    pub fn uuid(&self) -> CudaResult<u128> {
37        let mut out = 0u128;
38        cuda_error(unsafe { sys::cuDeviceGetUuid(&mut out as *mut u128 as *mut _, self.handle) })?;
39        Ok(out)
40    }
41
42    /// Gets the total available memory size of the device, in bytes
43    pub fn memory_size(&self) -> CudaResult<usize> {
44        let mut memory_size = 0usize;
45        cuda_error(unsafe {
46            sys::cuDeviceTotalMem_v2(&mut memory_size as *mut usize as *mut _, self.handle)
47        })?;
48        Ok(memory_size)
49    }
50
51    /// Gets a current attribute value for the device
52    pub fn get_attribute(&self, attribute: DeviceAttribute) -> CudaResult<i32> {
53        let mut out = 0i32;
54        cuda_error(unsafe {
55            sys::cuDeviceGetAttribute(&mut out as *mut i32, attribute as u32, self.handle)
56        })?;
57        Ok(out)
58    }
59
60    /// Gets the compute capability of the device
61    pub fn compute_capability(&self) -> CudaResult<CudaVersion> {
62        Ok(CudaVersion {
63            major: self.get_attribute(DeviceAttribute::ComputeCapabilityMajor)? as u32,
64            minor: self.get_attribute(DeviceAttribute::ComputeCapabilityMinor)? as u32,
65        })
66    }
67
68    /// Calculates the linear max width of 1D textures for a given native array format
69    pub fn get_texture_1d_linear_max_width(
70        &self,
71        format: CudaArrayFormat,
72        channels: u32,
73    ) -> CudaResult<usize> {
74        let mut out = 0usize;
75        cuda_error(unsafe {
76            sys::cuDeviceGetTexture1DLinearMaxWidth(
77                &mut out as *mut usize as *mut _,
78                format as u32,
79                channels,
80                self.handle,
81            )
82        })?;
83        Ok(out)
84    }
85}
86
87impl Cuda {
88    /// List all CUDA-enabled devices on the host
89    pub fn list_devices() -> CudaResult<Vec<Device>> {
90        let mut count = 0i32;
91        cuda_error(unsafe { sys::cuDeviceGetCount(&mut count as *mut i32) })?;
92        let mut out = Vec::with_capacity(count as usize);
93        for i in 0..count {
94            let mut device = Device { handle: 0 };
95            cuda_error(unsafe { sys::cuDeviceGet(&mut device.handle as *mut i32, i) })?;
96            out.push(device);
97        }
98        Ok(out)
99    }
100}
101
102/// A [`Device`]-specific attribute type
103#[derive(Clone, Copy, Debug, TryFromPrimitive)]
104#[repr(u32)]
105pub enum DeviceAttribute {
106    MaxThreadsPerBlock = 1,
107    MaxBlockDimX = 2,
108    MaxBlockDimY = 3,
109    MaxBlockDimZ = 4,
110    MaxGridDimX = 5,
111    MaxGridDimY = 6,
112    MaxGridDimZ = 7,
113    SharedMemoryPerBlock = 8,
114    TotalConstantMemory = 9,
115    WarpSize = 10,
116    MaxPitch = 11,
117    RegistersPerBlock = 12,
118    ClockRate = 13,
119    TextureAlignment = 14,
120    GpuOverlap = 15,
121    MultiprocessorCount = 16,
122    KernelExecTimeout = 17,
123    Integrated = 18,
124    CanMapHostMemory = 19,
125    ComputeMode = 20,
126    MaximumTexture1dWidth = 21,
127    MaximumTexture2dWidth = 22,
128    MaximumTexture2dHeight = 23,
129    MaximumTexture3dWidth = 24,
130    MaximumTexture3dHeight = 25,
131    MaximumTexture3dDepth = 26,
132    MaximumTexture2dArrayWidth = 27,
133    MaximumTexture2dArrayHeight = 28,
134    MaximumTexture2dArrayNumslices = 29,
135    SurfaceAlignment = 30,
136    ConcurrentKernels = 31,
137    EccEnabled = 32,
138    PciBusId = 33,
139    PciDeviceId = 34,
140    TccDriver = 35,
141    MemoryClockRate = 36,
142    GlobalMemoryBusWidth = 37,
143    L2CacheSize = 38,
144    MaxThreadsPerMultiprocessor = 39,
145    AsyncEngineCount = 40,
146    UnifiedAddressing = 41,
147    MaximumTexture1dLayeredWidth = 42,
148    MaximumTexture1dLayeredLayers = 43,
149    CanTex2dGather = 44,
150    MaximumTexture2dGatherWidth45,
151    MaximumTexture2dGatherHeight = 46,
152    MaximumTexture3dWidthAlternate = 47,
153    MaximumTexture3dHeightAlternate = 48,
154    MaximumTexture3dDepthAlternate = 49,
155    PciDomainId = 50,
156    TexturePitchAlignment = 51,
157    MaximumTexturecubemapWidth = 52,
158    MaximumTexturecubemapLayeredWidth = 53,
159    MaximumTexturecubemapLayeredLayers = 54,
160    MaximumSurface1dWidth = 55,
161    MaximumSurface2dWidth = 56,
162    MaximumSurface2dHeight = 57,
163    MaximumSurface3dWidth = 58,
164    MaximumSurface3dHeight = 59,
165    MaximumSurface3dDepth = 60,
166    MaximumSurface1dLayeredWidth = 61,
167    MaximumSurface1dLayeredLayers = 62,
168    MaximumSurface2dLayeredWidth = 63,
169    MaximumSurface2dLayeredHeight = 64,
170    MaximumSurface2dLayeredLayers = 65,
171    MaximumSurfacecubemapWidth = 66,
172    MaximumSurfacecubemapLayeredWidth = 67,
173    MaximumSurfacecubemapLayeredLayers = 68,
174    MaximumTexture1dLinearWidth = 69,
175    MaximumTexture2dLinearWidth = 70,
176    MaximumTexture2dLinearHeight = 71,
177    MaximumTexture2dLinearPitch = 72,
178    MaximumTexture2dMipmappedWidth = 73,
179    MaximumTexture2dMipmappedHeight = 74,
180    ComputeCapabilityMajor = 75,
181    ComputeCapabilityMinor = 76,
182    MaximumTexture1dMipmappedWidth = 77,
183    StreamPrioritiesSupported = 78,
184    GlobalL1CacheSupported = 79,
185    LocalL1CacheSupported = 80,
186    MaxSharedMemoryPerMultiprocessor = 81,
187    MaxRegistersPerMultiprocessor = 82,
188    ManagedMemory = 83,
189    MultiGpuBoard = 84,
190    MultiGpuBoardGroupId = 85,
191    HostNativeAtomicSupported = 86,
192    SingleToDoublePrecisionPerfRatio = 87,
193    PageableMemoryAccess = 88,
194    ConcurrentManagedAccess = 89,
195    ComputePreemptionSupported = 90,
196    CanUseHostPointerForRegisteredMem = 91,
197    CanUseStreamMemOps = 92,
198    CanUse64BitStreamMemOps = 93,
199    CanUseStreamWaitValueNor = 94,
200    CooperativeLaunch = 95,
201    CooperativeMultiDeviceLaunch = 96,
202    MaxSharedMemoryPerBlockOptin = 97,
203    CanFlushRemoteWrites = 98,
204    HostRegisterSupported = 99,
205    PageableMemoryAccessUsesHostPageTables = 100,
206    DirectManagedMemAccessFromHost = 101,
207    VirtualMemoryManagementSupported = 102,
208    HandleTypePosixFileDescriptorSupported = 103,
209    HandleTypeWin32HandleSupported = 104,
210    HandleTypeWin32KmtHandleSupported = 105,
211    MaxBlocksPerMultiprocessor = 106,
212    GenericCompressionSupported = 107,
213    MaxPersistingL2CacheSize = 108,
214    MaxAccessPolicyWindowSize = 109,
215    GpuDirectRdmaWithCudaVmmSupported = 110,
216    ReservedSharedMemoryPerBlock = 111,
217    SparseCudaArraySupported = 112,
218    ReadOnlyHostRegisterSupported = 113,
219    TimelineSemaphoreInteropSupported = 114,
220    MemoryPoolsSupported = 115,
221    GpuDirectRdmaSupported = 116,
222    GpuDirectRdmaFlushWritesOptions = 117,
223    GpuDirectRdmaWritesOrdering = 118,
224    MempoolSupportedHandleTypes = 119,
225}