fil_rustacuda/device.rs
1//! Functions and types for enumerating CUDA devices and retrieving information about them.
2
3use crate::error::{CudaResult, ToResult};
4use cuda_driver_sys::*;
5use std::ffi::CStr;
6use std::ops::Range;
7
8/// All supported device attributes for [Device::get_attribute](struct.Device.html#method.get_attribute)
9#[repr(u32)]
10#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
11pub enum DeviceAttribute {
12 /// Maximum number of threads per block
13 MaxThreadsPerBlock = 1,
14 /// Maximum x-dimension of a block
15 MaxBlockDimX = 2,
16 /// Maximum y-dimension of a block
17 MaxBlockDimY = 3,
18 /// Maximum z-dimension of a block
19 MaxBlockDimZ = 4,
20 /// Maximum x-dimension of a grid
21 MaxGridDimX = 5,
22 /// Maximum y-dimension of a grid
23 MaxGridDimY = 6,
24 /// Maximum z-dimension of a grid
25 MaxGridDimZ = 7,
26 /// Maximum amount of shared memory available to a thread block in bytes
27 MaxSharedMemoryPerBlock = 8,
28 /// Memory available on device for constant variables in a kernel in bytes
29 TotalConstantMemory = 9,
30 /// Warp size in threads
31 WarpSize = 10,
32 /// Maximum pitch in bytes allowed by the memory copy functions that involve memory regions
33 /// allocated through cuMemAllocPitch()
34 MaxPitch = 11,
35 /// Maximum number of 32-bit registers available to a thread block
36 MaxRegistersPerBlock = 12,
37 /// Typical clock frequency in kilohertz
38 ClockRate = 13,
39 /// Alignment requirement for textures
40 TextureAlignment = 14,
41 //GpuOverlap = 15, - Deprecated.
42 /// Number of multiprocessors on device.
43 MultiprocessorCount = 16,
44 /// Specifies whether there is a run time limit on kernels
45 KernelExecTimeout = 17,
46 /// Device is integrated with host memory
47 Integrated = 18,
48 /// Device can map host memory into CUDA address space
49 CanMapHostMemory = 19,
50 /// Compute Mode
51 ComputeMode = 20,
52 /// Maximum 1D texture width
53 MaximumTexture1DWidth = 21,
54 /// Maximum 2D texture width
55 MaximumTexture2DWidth = 22,
56 /// Maximum 2D texture height
57 MaximumTexture2DHeight = 23,
58 /// Maximum 3D texture width
59 MaximumTexture3DWidth = 24,
60 /// Maximum 3D texture height
61 MaximumTexture3DHeight = 25,
62 /// Maximum 3D texture depth
63 MaximumTexture3DDepth = 26,
64 /// Maximum 2D layered texture width
65 MaximumTexture2DLayeredWidth = 27,
66 /// Maximum 2D layered texture height
67 MaximumTexture2DLayeredHeight = 28,
68 /// Maximum layers in a 2D layered texture
69 MaximumTexture2DLayeredLayers = 29,
70 /// Alignment requirement for surfaces
71 SurfaceAlignment = 30,
72 /// Device can possibly execute multiple kernels concurrently
73 ConcurrentKernels = 31,
74 /// Device has ECC support enabled
75 EccEnabled = 32,
76 /// PCI bus ID of the device
77 PciBusId = 33,
78 /// PCI device ID of the device
79 PciDeviceId = 34,
80 /// Device is using TCC driver model
81 TccDriver = 35,
82 /// Peak memory clock frequency in kilohertz
83 MemoryClockRate = 36,
84 /// Global memory bus width in bits
85 GlobalMemoryBusWidth = 37,
86 /// Size of L2 cache in bytes.
87 L2CacheSize = 38,
88 /// Maximum resident threads per multiprocessor
89 MaxThreadsPerMultiprocessor = 39,
90 /// Number of asynchronous engines
91 AsyncEngineCount = 40,
92 /// Device shares a unified address space with the host
93 UnifiedAddressing = 41,
94 /// Maximum 1D layered texture width
95 MaximumTexture1DLayeredWidth = 42,
96 /// Maximum layers in a 1D layered texture
97 MaximumTexture1DLayeredLayers = 43,
98 //CanTex2DGather = 44, deprecated
99 /// Maximum 2D texture width if CUDA_ARRAY3D_TEXTURE_GATHER is set
100 MaximumTexture2DGatherWidth = 45,
101 /// Maximum 2D texture height if CUDA_ARRAY3D_TEXTURE_GATHER is set
102 MaximumTexture2DGatherHeight = 46,
103 /// Alternate maximum 3D texture width
104 MaximumTexture3DWidthAlternate = 47,
105 /// Alternate maximum 3D texture height
106 MaximumTexture3DHeightAlternate = 48,
107 /// Alternate maximum 3D texture depth
108 MaximumTexture3DDepthAlternate = 49,
109 /// PCI domain ID of the device
110 PciDomainId = 50,
111 /// Pitch alignment requirement for textures
112 TexturePitchAlignment = 51,
113 /// Maximum cubemap texture width/height
114 MaximumTextureCubemapWidth = 52,
115 /// Maximum cubemap layered texture width/height
116 MaximumTextureCubemapLayeredWidth = 53,
117 /// Maximum layers in a cubemap layered texture
118 MaximumTextureCubemapLayeredLayers = 54,
119 /// Maximum 1D surface width
120 MaximumSurface1DWidth = 55,
121 /// Maximum 2D surface width
122 MaximumSurface2DWidth = 56,
123 /// Maximum 2D surface height
124 MaximumSurface2DHeight = 57,
125 /// Maximum 3D surface width
126 MaximumSurface3DWidth = 58,
127 /// Maximum 3D surface height
128 MaximumSurface3DHeight = 59,
129 /// Maximum 3D surface depth
130 MaximumSurface3DDepth = 60,
131 /// Maximum 1D layered surface width
132 MaximumSurface1DLayeredWidth = 61,
133 /// Maximum layers in a 1D layered surface
134 MaximumSurface1DLayeredLayers = 62,
135 /// Maximum 2D layered surface width
136 MaximumSurface2DLayeredWidth = 63,
137 /// Maximum 2D layered surface height
138 MaximumSurface2DLayeredHeight = 64,
139 /// Maximum layers in a 2D layered surface
140 MaximumSurface2DLayeredLayers = 65,
141 /// Maximum cubemap surface width
142 MaximumSurfacecubemapWidth = 66,
143 /// Maximum cubemap layered surface width
144 MaximumSurfacecubemapLayeredWidth = 67,
145 /// Maximum layers in a cubemap layered surface
146 MaximumSurfacecubemapLayeredLayers = 68,
147 /// Maximum 1D linear texture width
148 MaximumTexture1DLinearWidth = 69,
149 /// Maximum 2D linear texture width
150 MaximumTexture2DLinearWidth = 70,
151 /// Maximum 2D linear texture height
152 MaximumTexture2DLinearHeight = 71,
153 /// Maximum 2D linear texture pitch in bytes
154 MaximumTexture2DLinearPitch = 72,
155 /// Maximum mipmapped 2D texture height
156 MaximumTexture2DMipmappedWidth = 73,
157 /// Maximum mipmapped 2D texture width
158 MaximumTexture2DMipmappedHeight = 74,
159 /// Major compute capability version number
160 ComputeCapabilityMajor = 75,
161 /// Minor compute capability version number
162 ComputeCapabilityMinor = 76,
163 /// Maximum mipammed 1D texture width
164 MaximumTexture1DMipmappedWidth = 77,
165 /// Device supports stream priorities
166 StreamPrioritiesSupported = 78,
167 /// Device supports caching globals in L1
168 GlobalL1CacheSupported = 79,
169 /// Device supports caching locals in L1
170 LocalL1CacheSupported = 80,
171 /// Maximum shared memory available per multiprocessor in bytes
172 MaxSharedMemoryPerMultiprocessor = 81,
173 /// Maximum number of 32-bit registers available per multiprocessor
174 MaxRegistersPerMultiprocessor = 82,
175 /// Device can allocate managed memory on this system
176 ManagedMemory = 83,
177 /// Device is on a multi-GPU board
178 MultiGpuBoard = 84,
179 /// Unique ID for a group of devices on the same multi-GPU board
180 MultiGpuBoardGroupId = 85,
181 /// Link between the device and the host supports native atomic operations (this is a
182 /// placeholder attribute and is not supported on any current hardware)
183 HostNativeAtomicSupported = 86,
184 /// Ratio of single precision performance (in floating-point operations per second) to double
185 /// precision performance
186 SingleToDoublePrecisionPerfRatio = 87,
187 /// Device supports coherently accessing pageable memory without calling cudaHostRegister on it.
188 PageableMemoryAccess = 88,
189 /// Device can coherently access managed memory concurrently with the CPU
190 ConcurrentManagedAccess = 89,
191 /// Device supports compute preemption
192 ComputePreemptionSupported = 90,
193 /// Device can access host registered memory at the same virtual address as the CPU
194 CanUseHostPointerForRegisteredMem = 91,
195 /// Stream memory operations are supported.
196 CanUseStreamMemOps = 92,
197 /// 64-bit stream memory operations are supported.
198 CanUse64BitStreamMemOps = 93,
199 /// Wait value NOR is supported
200 CanUseStreamWaitValueNor = 94,
201 /// Supports launching cooperative kernels
202 CooperativeLaunch = 95,
203 /// Supports launching cooperative kernels on multiple devices.
204 CooperativeMultiDeviceLaunch = 96,
205 /// Maximum opt-in shared memory per block.
206 MaxSharedMemoryPerBlockOptin = 97,
207 /// Stream memory operations can wait for flush.
208 CanFlushRemoteWrites = 98,
209 /// Device supports host memory registration
210 HostRegisterSupported = 99,
211 /// Device accesses pageable memory via the host page tables
212 PageableMemoryAccessUsesHostPageTable = 100,
213 /// Device supports direct access to device memory without migration
214 DirectManagedMemAccessFromhost = 101,
215 /// Device supports virual memory management APIs
216 VirtualMemoryManagementSupported = 102,
217 /// Device supports exporting memory to a posix file descriptor
218 HandleTypePosixFileDescriptorSupported = 103,
219 /// Device supports exporting memory to a Win32 NT handle
220 HandleTypeWin32HandleSupported = 104,
221 /// Device supports exporting memory to a Win32 KMT handle
222 HandleTypeWin32KmtHandleSupported = 105,
223
224 #[doc(hidden)]
225 __NonExhaustive = 106,
226}
227
228/// Opaque handle to a CUDA device.
229#[derive(Debug, Clone, Copy, Hash, Eq, PartialEq)]
230pub struct Device {
231 pub(crate) device: CUdevice,
232}
233impl Device {
234 /// Get the number of CUDA-capable devices.
235 ///
236 /// Returns the number of devices with compute-capability 2.0 or greater which are available
237 /// for execution.
238 ///
239 /// # Example
240 /// ```
241 /// # use rustacuda::*;
242 /// # use std::error::Error;
243 /// # fn main() -> Result<(), Box<dyn Error>> {
244 /// # init(CudaFlags::empty())?;
245 /// use rustacuda::device::Device;
246 /// let num_devices = Device::num_devices()?;
247 /// println!("Number of devices: {}", num_devices);
248 /// # Ok(())
249 /// # }
250 /// ```
251 pub fn num_devices() -> CudaResult<u32> {
252 unsafe {
253 let mut num_devices = 0i32;
254 cuDeviceGetCount(&mut num_devices as *mut i32).to_result()?;
255 Ok(num_devices as u32)
256 }
257 }
258
259 /// Get a handle to the `ordinal`'th CUDA device.
260 ///
261 /// Ordinal must be in the range `0..num_devices()`. If not, an error will be returned.
262 ///
263 /// # Example
264 /// ```
265 /// # use rustacuda::*;
266 /// # use std::error::Error;
267 /// # fn main() -> Result<(), Box<dyn Error>> {
268 /// # init(CudaFlags::empty())?;
269 /// use rustacuda::device::Device;
270 /// let device = Device::get_device(0)?;
271 /// println!("Device Name: {}", device.name()?);
272 /// # Ok(())
273 /// # }
274 /// ```
275 pub fn get_device(ordinal: u32) -> CudaResult<Device> {
276 unsafe {
277 let mut device = Device { device: 0 };
278 cuDeviceGet(&mut device.device as *mut CUdevice, ordinal as i32).to_result()?;
279 Ok(device)
280 }
281 }
282
283 /// Return an iterator over all CUDA devices.
284 ///
285 /// # Example
286 /// ```
287 /// # use rustacuda::*;
288 /// # use std::error::Error;
289 /// # fn main() -> Result<(), Box<dyn Error>> {
290 /// # init(CudaFlags::empty())?;
291 /// use rustacuda::device::Device;
292 /// for device in Device::devices()? {
293 /// let device = device?;
294 /// println!("Device Name: {}", device.name()?);
295 /// }
296 /// # Ok(())
297 /// # }
298 /// ```
299 pub fn devices() -> CudaResult<Devices> {
300 Device::num_devices().map(|num_devices| Devices {
301 range: 0..num_devices,
302 })
303 }
304
305 /// Returns the total amount of memory available on the device in bytes.
306 ///
307 /// # Example
308 /// ```
309 /// # use rustacuda::*;
310 /// # use std::error::Error;
311 /// # fn main() -> Result<(), Box<dyn Error>> {
312 /// # init(CudaFlags::empty())?;
313 /// use rustacuda::device::Device;
314 /// let device = Device::get_device(0)?;
315 /// println!("Device Memory: {}", device.total_memory()?);
316 /// # Ok(())
317 /// # }
318 /// ```
319 pub fn total_memory(self) -> CudaResult<usize> {
320 unsafe {
321 let mut memory = 0;
322 cuDeviceTotalMem_v2(&mut memory as *mut usize, self.device).to_result()?;
323 Ok(memory)
324 }
325 }
326
327 /// Returns the name of this device.
328 ///
329 /// # Example
330 /// ```
331 /// # use rustacuda::*;
332 /// # use std::error::Error;
333 /// # fn main() -> Result<(), Box<dyn Error>> {
334 /// # init(CudaFlags::empty())?;
335 /// use rustacuda::device::Device;
336 /// let device = Device::get_device(0)?;
337 /// println!("Device Name: {}", device.name()?);
338 /// # Ok(())
339 /// # }
340 /// ```
341 pub fn name(self) -> CudaResult<String> {
342 unsafe {
343 let mut name = [0u8; 128]; // Hopefully this is big enough...
344 cuDeviceGetName(
345 &mut name[0] as *mut u8 as *mut ::std::os::raw::c_char,
346 128,
347 self.device,
348 )
349 .to_result()?;
350 let nul_index = name
351 .iter()
352 .cloned()
353 .position(|byte| byte == 0)
354 .expect("Expected device name to fit in 128 bytes and be nul-terminated.");
355 let cstr = CStr::from_bytes_with_nul_unchecked(&name[0..=nul_index]);
356 Ok(cstr.to_string_lossy().into_owned())
357 }
358 }
359
360 /// Returns the UUID of this device.
361 ///
362 /// # Example
363 /// ```
364 /// # use rustacuda::*;
365 /// # use std::error::Error;
366 /// # fn main() -> Result<(), Box<dyn Error>> {
367 /// # init(CudaFlags::empty())?;
368 /// use rustacuda::device::Device;
369 /// let device = Device::get_device(0)?;
370 /// println!("Device UUID: {:?}", device.uuid()?);
371 /// # Ok(())
372 /// # }
373 /// ```
374 pub fn uuid(self) -> CudaResult<[u8; 16]> {
375 unsafe {
376 let mut cu_uuid = CUuuid { bytes: [0; 16] };
377 cuDeviceGetUuid(&mut cu_uuid, self.device).to_result()?;
378 let uuid: [u8; 16] = ::std::mem::transmute(cu_uuid.bytes);
379 Ok(uuid)
380 }
381 }
382
383 /// Returns information about this device.
384 ///
385 /// # Example
386 /// ```
387 /// # use rustacuda::*;
388 /// # use std::error::Error;
389 /// # fn main() -> Result<(), Box<dyn Error>> {
390 /// # init(CudaFlags::empty())?;
391 /// use rustacuda::device::{Device, DeviceAttribute};
392 /// let device = Device::get_device(0)?;
393 /// println!("Max Threads Per Block: {}",
394 /// device.get_attribute(DeviceAttribute::MaxThreadsPerBlock).unwrap());
395 /// # Ok(())
396 /// # }
397 /// ```
398 pub fn get_attribute(self, attr: DeviceAttribute) -> CudaResult<i32> {
399 unsafe {
400 let mut val = 0i32;
401 cuDeviceGetAttribute(
402 &mut val as *mut i32,
403 // This should be safe, as the repr and values of DeviceAttribute should match.
404 ::std::mem::transmute(attr),
405 self.device,
406 )
407 .to_result()?;
408 Ok(val)
409 }
410 }
411
412 pub(crate) fn into_inner(self) -> CUdevice {
413 self.device
414 }
415}
416
417/// Iterator over all available CUDA devices. See
418/// [the Device::devices function](./struct.Device.html#method.devices) for more information.
419#[derive(Debug, Clone)]
420pub struct Devices {
421 range: Range<u32>,
422}
423impl Iterator for Devices {
424 type Item = CudaResult<Device>;
425
426 fn next(&mut self) -> Option<CudaResult<Device>> {
427 self.range.next().map(Device::get_device)
428 }
429}
430
431#[cfg(test)]
432mod test {
433 use super::*;
434 use std::error::Error;
435
436 fn test_init() -> Result<(), Box<dyn Error>> {
437 crate::init(crate::CudaFlags::empty())?;
438 Ok(())
439 }
440
441 #[test]
442 fn test_num_devices() -> Result<(), Box<dyn Error>> {
443 test_init()?;
444 let num_devices = Device::num_devices()?;
445 assert!(num_devices > 0);
446 Ok(())
447 }
448
449 #[test]
450 fn test_devices() -> Result<(), Box<dyn Error>> {
451 test_init()?;
452 let num_devices = Device::num_devices()?;
453 let all_devices: CudaResult<Vec<_>> = Device::devices()?.collect();
454 let all_devices = all_devices?;
455 assert_eq!(num_devices as usize, all_devices.len());
456 Ok(())
457 }
458
459 #[test]
460 fn test_get_name() -> Result<(), Box<dyn Error>> {
461 test_init()?;
462 let device_name = Device::get_device(0)?.name()?;
463 println!("{}", device_name);
464 assert!(device_name.len() < 127);
465 Ok(())
466 }
467
468 #[test]
469 fn test_get_memory() -> Result<(), Box<dyn Error>> {
470 test_init()?;
471 let memory = Device::get_device(0)?.total_memory()?;
472 println!("{}", memory);
473 Ok(())
474 }
475
476 // Ensure that the two enums always stay aligned.
477 #[test]
478 fn test_enums_align() {
479 assert_eq!(
480 DeviceAttribute::__NonExhaustive as u32,
481 CUdevice_attribute_enum::CU_DEVICE_ATTRIBUTE_MAX as u32
482 );
483 }
484
485 #[test]
486 fn test_uuid() -> Result<(), Box<dyn Error>> {
487 test_init()?;
488 let uuid = Device::get_device(0)?.uuid()?;
489 println!("{:?}", uuid);
490 Ok(())
491 }
492}