1use crate::*;
2use num_enum::TryFromPrimitive;
3
4pub struct Device {
6 pub(crate) handle: i32,
7}
8
9#[derive(Clone, Copy, Debug, TryFromPrimitive)]
11#[repr(u32)]
12pub enum CudaArrayFormat {
13 UnsignedInt8 = 0x01,
14 UnsignedInt16 = 0x02,
15 UnsignedInt32 = 0x03,
16 SignedInt8 = 0x08,
17 SignedInt16 = 0x09,
18 SignedInt32 = 0x0a,
19 Half = 0x10,
20 Float = 0x20,
21 Nv12 = 0xb0,
22}
23
24impl Device {
25 pub fn name(&self) -> CudaResult<String> {
27 let mut buf = [0u8; 256];
28 cuda_error(unsafe { sys::cuDeviceGetName(buf.as_mut_ptr() as *mut i8, 256, self.handle) })?;
29 Ok(
30 String::from_utf8_lossy(&buf[..buf.iter().position(|x| *x == 0).unwrap_or(0)])
31 .into_owned(),
32 )
33 }
34
35 pub fn uuid(&self) -> CudaResult<u128> {
37 let mut out = 0u128;
38 cuda_error(unsafe { sys::cuDeviceGetUuid(&mut out as *mut u128 as *mut _, self.handle) })?;
39 Ok(out)
40 }
41
42 pub fn memory_size(&self) -> CudaResult<usize> {
44 let mut memory_size = 0usize;
45 cuda_error(unsafe {
46 sys::cuDeviceTotalMem_v2(&mut memory_size as *mut usize as *mut _, self.handle)
47 })?;
48 Ok(memory_size)
49 }
50
51 pub fn get_attribute(&self, attribute: DeviceAttribute) -> CudaResult<i32> {
53 let mut out = 0i32;
54 cuda_error(unsafe {
55 sys::cuDeviceGetAttribute(&mut out as *mut i32, attribute as u32, self.handle)
56 })?;
57 Ok(out)
58 }
59
60 pub fn compute_capability(&self) -> CudaResult<CudaVersion> {
62 Ok(CudaVersion {
63 major: self.get_attribute(DeviceAttribute::ComputeCapabilityMajor)? as u32,
64 minor: self.get_attribute(DeviceAttribute::ComputeCapabilityMinor)? as u32,
65 })
66 }
67
68 pub fn get_texture_1d_linear_max_width(
70 &self,
71 format: CudaArrayFormat,
72 channels: u32,
73 ) -> CudaResult<usize> {
74 let mut out = 0usize;
75 cuda_error(unsafe {
76 sys::cuDeviceGetTexture1DLinearMaxWidth(
77 &mut out as *mut usize as *mut _,
78 format as u32,
79 channels,
80 self.handle,
81 )
82 })?;
83 Ok(out)
84 }
85}
86
87impl Cuda {
88 pub fn list_devices() -> CudaResult<Vec<Device>> {
90 let mut count = 0i32;
91 cuda_error(unsafe { sys::cuDeviceGetCount(&mut count as *mut i32) })?;
92 let mut out = Vec::with_capacity(count as usize);
93 for i in 0..count {
94 let mut device = Device { handle: 0 };
95 cuda_error(unsafe { sys::cuDeviceGet(&mut device.handle as *mut i32, i) })?;
96 out.push(device);
97 }
98 Ok(out)
99 }
100}
101
102#[derive(Clone, Copy, Debug, TryFromPrimitive)]
104#[repr(u32)]
105pub enum DeviceAttribute {
106 MaxThreadsPerBlock = 1,
107 MaxBlockDimX = 2,
108 MaxBlockDimY = 3,
109 MaxBlockDimZ = 4,
110 MaxGridDimX = 5,
111 MaxGridDimY = 6,
112 MaxGridDimZ = 7,
113 SharedMemoryPerBlock = 8,
114 TotalConstantMemory = 9,
115 WarpSize = 10,
116 MaxPitch = 11,
117 RegistersPerBlock = 12,
118 ClockRate = 13,
119 TextureAlignment = 14,
120 GpuOverlap = 15,
121 MultiprocessorCount = 16,
122 KernelExecTimeout = 17,
123 Integrated = 18,
124 CanMapHostMemory = 19,
125 ComputeMode = 20,
126 MaximumTexture1dWidth = 21,
127 MaximumTexture2dWidth = 22,
128 MaximumTexture2dHeight = 23,
129 MaximumTexture3dWidth = 24,
130 MaximumTexture3dHeight = 25,
131 MaximumTexture3dDepth = 26,
132 MaximumTexture2dArrayWidth = 27,
133 MaximumTexture2dArrayHeight = 28,
134 MaximumTexture2dArrayNumslices = 29,
135 SurfaceAlignment = 30,
136 ConcurrentKernels = 31,
137 EccEnabled = 32,
138 PciBusId = 33,
139 PciDeviceId = 34,
140 TccDriver = 35,
141 MemoryClockRate = 36,
142 GlobalMemoryBusWidth = 37,
143 L2CacheSize = 38,
144 MaxThreadsPerMultiprocessor = 39,
145 AsyncEngineCount = 40,
146 UnifiedAddressing = 41,
147 MaximumTexture1dLayeredWidth = 42,
148 MaximumTexture1dLayeredLayers = 43,
149 CanTex2dGather = 44,
150 MaximumTexture2dGatherWidth45,
151 MaximumTexture2dGatherHeight = 46,
152 MaximumTexture3dWidthAlternate = 47,
153 MaximumTexture3dHeightAlternate = 48,
154 MaximumTexture3dDepthAlternate = 49,
155 PciDomainId = 50,
156 TexturePitchAlignment = 51,
157 MaximumTexturecubemapWidth = 52,
158 MaximumTexturecubemapLayeredWidth = 53,
159 MaximumTexturecubemapLayeredLayers = 54,
160 MaximumSurface1dWidth = 55,
161 MaximumSurface2dWidth = 56,
162 MaximumSurface2dHeight = 57,
163 MaximumSurface3dWidth = 58,
164 MaximumSurface3dHeight = 59,
165 MaximumSurface3dDepth = 60,
166 MaximumSurface1dLayeredWidth = 61,
167 MaximumSurface1dLayeredLayers = 62,
168 MaximumSurface2dLayeredWidth = 63,
169 MaximumSurface2dLayeredHeight = 64,
170 MaximumSurface2dLayeredLayers = 65,
171 MaximumSurfacecubemapWidth = 66,
172 MaximumSurfacecubemapLayeredWidth = 67,
173 MaximumSurfacecubemapLayeredLayers = 68,
174 MaximumTexture1dLinearWidth = 69,
175 MaximumTexture2dLinearWidth = 70,
176 MaximumTexture2dLinearHeight = 71,
177 MaximumTexture2dLinearPitch = 72,
178 MaximumTexture2dMipmappedWidth = 73,
179 MaximumTexture2dMipmappedHeight = 74,
180 ComputeCapabilityMajor = 75,
181 ComputeCapabilityMinor = 76,
182 MaximumTexture1dMipmappedWidth = 77,
183 StreamPrioritiesSupported = 78,
184 GlobalL1CacheSupported = 79,
185 LocalL1CacheSupported = 80,
186 MaxSharedMemoryPerMultiprocessor = 81,
187 MaxRegistersPerMultiprocessor = 82,
188 ManagedMemory = 83,
189 MultiGpuBoard = 84,
190 MultiGpuBoardGroupId = 85,
191 HostNativeAtomicSupported = 86,
192 SingleToDoublePrecisionPerfRatio = 87,
193 PageableMemoryAccess = 88,
194 ConcurrentManagedAccess = 89,
195 ComputePreemptionSupported = 90,
196 CanUseHostPointerForRegisteredMem = 91,
197 CanUseStreamMemOps = 92,
198 CanUse64BitStreamMemOps = 93,
199 CanUseStreamWaitValueNor = 94,
200 CooperativeLaunch = 95,
201 CooperativeMultiDeviceLaunch = 96,
202 MaxSharedMemoryPerBlockOptin = 97,
203 CanFlushRemoteWrites = 98,
204 HostRegisterSupported = 99,
205 PageableMemoryAccessUsesHostPageTables = 100,
206 DirectManagedMemAccessFromHost = 101,
207 VirtualMemoryManagementSupported = 102,
208 HandleTypePosixFileDescriptorSupported = 103,
209 HandleTypeWin32HandleSupported = 104,
210 HandleTypeWin32KmtHandleSupported = 105,
211 MaxBlocksPerMultiprocessor = 106,
212 GenericCompressionSupported = 107,
213 MaxPersistingL2CacheSize = 108,
214 MaxAccessPolicyWindowSize = 109,
215 GpuDirectRdmaWithCudaVmmSupported = 110,
216 ReservedSharedMemoryPerBlock = 111,
217 SparseCudaArraySupported = 112,
218 ReadOnlyHostRegisterSupported = 113,
219 TimelineSemaphoreInteropSupported = 114,
220 MemoryPoolsSupported = 115,
221 GpuDirectRdmaSupported = 116,
222 GpuDirectRdmaFlushWritesOptions = 117,
223 GpuDirectRdmaWritesOrdering = 118,
224 MempoolSupportedHandleTypes = 119,
225}