fil_rustacuda/
device.rs

1//! Functions and types for enumerating CUDA devices and retrieving information about them.
2
3use crate::error::{CudaResult, ToResult};
4use cuda_driver_sys::*;
5use std::ffi::CStr;
6use std::ops::Range;
7
8/// All supported device attributes for [Device::get_attribute](struct.Device.html#method.get_attribute)
9#[repr(u32)]
10#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
11pub enum DeviceAttribute {
12    /// Maximum number of threads per block
13    MaxThreadsPerBlock = 1,
14    /// Maximum x-dimension of a block
15    MaxBlockDimX = 2,
16    /// Maximum y-dimension of a block
17    MaxBlockDimY = 3,
18    /// Maximum z-dimension of a block
19    MaxBlockDimZ = 4,
20    /// Maximum x-dimension of a grid
21    MaxGridDimX = 5,
22    /// Maximum y-dimension of a grid
23    MaxGridDimY = 6,
24    /// Maximum z-dimension of a grid
25    MaxGridDimZ = 7,
26    /// Maximum amount of shared memory available to a thread block in bytes
27    MaxSharedMemoryPerBlock = 8,
28    /// Memory available on device for constant variables in a kernel in bytes
29    TotalConstantMemory = 9,
30    /// Warp size in threads
31    WarpSize = 10,
32    /// Maximum pitch in bytes allowed by the memory copy functions that involve memory regions
33    /// allocated through cuMemAllocPitch()
34    MaxPitch = 11,
35    /// Maximum number of 32-bit registers available to a thread block
36    MaxRegistersPerBlock = 12,
37    /// Typical clock frequency in kilohertz
38    ClockRate = 13,
39    /// Alignment requirement for textures
40    TextureAlignment = 14,
41    //GpuOverlap = 15, - Deprecated.
42    /// Number of multiprocessors on device.
43    MultiprocessorCount = 16,
44    /// Specifies whether there is a run time limit on kernels
45    KernelExecTimeout = 17,
46    /// Device is integrated with host memory
47    Integrated = 18,
48    /// Device can map host memory into CUDA address space
49    CanMapHostMemory = 19,
50    /// Compute Mode
51    ComputeMode = 20,
52    /// Maximum 1D texture width
53    MaximumTexture1DWidth = 21,
54    /// Maximum 2D texture width
55    MaximumTexture2DWidth = 22,
56    /// Maximum 2D texture height
57    MaximumTexture2DHeight = 23,
58    /// Maximum 3D texture width
59    MaximumTexture3DWidth = 24,
60    /// Maximum 3D texture height
61    MaximumTexture3DHeight = 25,
62    /// Maximum 3D texture depth
63    MaximumTexture3DDepth = 26,
64    /// Maximum 2D layered texture width
65    MaximumTexture2DLayeredWidth = 27,
66    /// Maximum 2D layered texture height
67    MaximumTexture2DLayeredHeight = 28,
68    /// Maximum layers in a 2D layered texture
69    MaximumTexture2DLayeredLayers = 29,
70    /// Alignment requirement for surfaces
71    SurfaceAlignment = 30,
72    /// Device can possibly execute multiple kernels concurrently
73    ConcurrentKernels = 31,
74    /// Device has ECC support enabled
75    EccEnabled = 32,
76    /// PCI bus ID of the device
77    PciBusId = 33,
78    /// PCI device ID of the device
79    PciDeviceId = 34,
80    /// Device is using TCC driver model
81    TccDriver = 35,
82    /// Peak memory clock frequency in kilohertz
83    MemoryClockRate = 36,
84    /// Global memory bus width in bits
85    GlobalMemoryBusWidth = 37,
86    /// Size of L2 cache in bytes.
87    L2CacheSize = 38,
88    /// Maximum resident threads per multiprocessor
89    MaxThreadsPerMultiprocessor = 39,
90    /// Number of asynchronous engines
91    AsyncEngineCount = 40,
92    /// Device shares a unified address space with the host
93    UnifiedAddressing = 41,
94    /// Maximum 1D layered texture width
95    MaximumTexture1DLayeredWidth = 42,
96    /// Maximum layers in a 1D layered texture
97    MaximumTexture1DLayeredLayers = 43,
98    //CanTex2DGather = 44, deprecated
99    /// Maximum 2D texture width if CUDA_ARRAY3D_TEXTURE_GATHER is set
100    MaximumTexture2DGatherWidth = 45,
101    /// Maximum 2D texture height if CUDA_ARRAY3D_TEXTURE_GATHER is set
102    MaximumTexture2DGatherHeight = 46,
103    /// Alternate maximum 3D texture width
104    MaximumTexture3DWidthAlternate = 47,
105    /// Alternate maximum 3D texture height
106    MaximumTexture3DHeightAlternate = 48,
107    /// Alternate maximum 3D texture depth
108    MaximumTexture3DDepthAlternate = 49,
109    /// PCI domain ID of the device
110    PciDomainId = 50,
111    /// Pitch alignment requirement for textures
112    TexturePitchAlignment = 51,
113    /// Maximum cubemap texture width/height
114    MaximumTextureCubemapWidth = 52,
115    /// Maximum cubemap layered texture width/height
116    MaximumTextureCubemapLayeredWidth = 53,
117    /// Maximum layers in a cubemap layered texture
118    MaximumTextureCubemapLayeredLayers = 54,
119    /// Maximum 1D surface width
120    MaximumSurface1DWidth = 55,
121    /// Maximum 2D surface width
122    MaximumSurface2DWidth = 56,
123    /// Maximum 2D surface height
124    MaximumSurface2DHeight = 57,
125    /// Maximum 3D surface width
126    MaximumSurface3DWidth = 58,
127    /// Maximum 3D surface height
128    MaximumSurface3DHeight = 59,
129    /// Maximum 3D surface depth
130    MaximumSurface3DDepth = 60,
131    /// Maximum 1D layered surface width
132    MaximumSurface1DLayeredWidth = 61,
133    /// Maximum layers in a 1D layered surface
134    MaximumSurface1DLayeredLayers = 62,
135    /// Maximum 2D layered surface width
136    MaximumSurface2DLayeredWidth = 63,
137    /// Maximum 2D layered surface height
138    MaximumSurface2DLayeredHeight = 64,
139    /// Maximum layers in a 2D layered surface
140    MaximumSurface2DLayeredLayers = 65,
141    /// Maximum cubemap surface width
142    MaximumSurfacecubemapWidth = 66,
143    /// Maximum cubemap layered surface width
144    MaximumSurfacecubemapLayeredWidth = 67,
145    /// Maximum layers in a cubemap layered surface
146    MaximumSurfacecubemapLayeredLayers = 68,
147    /// Maximum 1D linear texture width
148    MaximumTexture1DLinearWidth = 69,
149    /// Maximum 2D linear texture width
150    MaximumTexture2DLinearWidth = 70,
151    /// Maximum 2D linear texture height
152    MaximumTexture2DLinearHeight = 71,
153    /// Maximum 2D linear texture pitch in bytes
154    MaximumTexture2DLinearPitch = 72,
155    /// Maximum mipmapped 2D texture height
156    MaximumTexture2DMipmappedWidth = 73,
157    /// Maximum mipmapped 2D texture width
158    MaximumTexture2DMipmappedHeight = 74,
159    /// Major compute capability version number
160    ComputeCapabilityMajor = 75,
161    /// Minor compute capability version number
162    ComputeCapabilityMinor = 76,
163    /// Maximum mipammed 1D texture width
164    MaximumTexture1DMipmappedWidth = 77,
165    /// Device supports stream priorities
166    StreamPrioritiesSupported = 78,
167    /// Device supports caching globals in L1
168    GlobalL1CacheSupported = 79,
169    /// Device supports caching locals in L1
170    LocalL1CacheSupported = 80,
171    /// Maximum shared memory available per multiprocessor in bytes
172    MaxSharedMemoryPerMultiprocessor = 81,
173    /// Maximum number of 32-bit registers available per multiprocessor
174    MaxRegistersPerMultiprocessor = 82,
175    /// Device can allocate managed memory on this system
176    ManagedMemory = 83,
177    /// Device is on a multi-GPU board
178    MultiGpuBoard = 84,
179    /// Unique ID for a group of devices on the same multi-GPU board
180    MultiGpuBoardGroupId = 85,
181    /// Link between the device and the host supports native atomic operations (this is a
182    /// placeholder attribute and is not supported on any current hardware)
183    HostNativeAtomicSupported = 86,
184    /// Ratio of single precision performance (in floating-point operations per second) to double
185    /// precision performance
186    SingleToDoublePrecisionPerfRatio = 87,
187    /// Device supports coherently accessing pageable memory without calling cudaHostRegister on it.
188    PageableMemoryAccess = 88,
189    /// Device can coherently access managed memory concurrently with the CPU
190    ConcurrentManagedAccess = 89,
191    /// Device supports compute preemption
192    ComputePreemptionSupported = 90,
193    /// Device can access host registered memory at the same virtual address as the CPU
194    CanUseHostPointerForRegisteredMem = 91,
195    /// Stream memory operations are supported.
196    CanUseStreamMemOps = 92,
197    /// 64-bit stream memory operations are supported.
198    CanUse64BitStreamMemOps = 93,
199    /// Wait value NOR is supported
200    CanUseStreamWaitValueNor = 94,
201    /// Supports launching cooperative kernels
202    CooperativeLaunch = 95,
203    /// Supports launching cooperative kernels on multiple devices.
204    CooperativeMultiDeviceLaunch = 96,
205    /// Maximum opt-in shared memory per block.
206    MaxSharedMemoryPerBlockOptin = 97,
207    /// Stream memory operations can wait for flush.
208    CanFlushRemoteWrites = 98,
209    /// Device supports host memory registration
210    HostRegisterSupported = 99,
211    /// Device accesses pageable memory via the host page tables
212    PageableMemoryAccessUsesHostPageTable = 100,
213    /// Device supports direct access to device memory without migration
214    DirectManagedMemAccessFromhost = 101,
215    /// Device supports virual memory management APIs
216    VirtualMemoryManagementSupported = 102,
217    /// Device supports exporting memory to a posix file descriptor
218    HandleTypePosixFileDescriptorSupported = 103,
219    /// Device supports exporting memory to a Win32 NT handle
220    HandleTypeWin32HandleSupported = 104,
221    /// Device supports exporting memory to a Win32 KMT handle
222    HandleTypeWin32KmtHandleSupported = 105,
223
224    #[doc(hidden)]
225    __NonExhaustive = 106,
226}
227
228/// Opaque handle to a CUDA device.
229#[derive(Debug, Clone, Copy, Hash, Eq, PartialEq)]
230pub struct Device {
231    pub(crate) device: CUdevice,
232}
233impl Device {
234    /// Get the number of CUDA-capable devices.
235    ///
236    /// Returns the number of devices with compute-capability 2.0 or greater which are available
237    /// for execution.
238    ///
239    /// # Example
240    /// ```
241    /// # use rustacuda::*;
242    /// # use std::error::Error;
243    /// # fn main() -> Result<(), Box<dyn Error>> {
244    /// # init(CudaFlags::empty())?;
245    /// use rustacuda::device::Device;
246    /// let num_devices = Device::num_devices()?;
247    /// println!("Number of devices: {}", num_devices);
248    /// # Ok(())
249    /// # }
250    /// ```
251    pub fn num_devices() -> CudaResult<u32> {
252        unsafe {
253            let mut num_devices = 0i32;
254            cuDeviceGetCount(&mut num_devices as *mut i32).to_result()?;
255            Ok(num_devices as u32)
256        }
257    }
258
259    /// Get a handle to the `ordinal`'th CUDA device.
260    ///
261    /// Ordinal must be in the range `0..num_devices()`. If not, an error will be returned.
262    ///
263    /// # Example
264    /// ```
265    /// # use rustacuda::*;
266    /// # use std::error::Error;
267    /// # fn main() -> Result<(), Box<dyn Error>> {
268    /// # init(CudaFlags::empty())?;
269    /// use rustacuda::device::Device;
270    /// let device = Device::get_device(0)?;
271    /// println!("Device Name: {}", device.name()?);
272    /// # Ok(())
273    /// # }
274    /// ```
275    pub fn get_device(ordinal: u32) -> CudaResult<Device> {
276        unsafe {
277            let mut device = Device { device: 0 };
278            cuDeviceGet(&mut device.device as *mut CUdevice, ordinal as i32).to_result()?;
279            Ok(device)
280        }
281    }
282
283    /// Return an iterator over all CUDA devices.
284    ///
285    /// # Example
286    /// ```
287    /// # use rustacuda::*;
288    /// # use std::error::Error;
289    /// # fn main() -> Result<(), Box<dyn Error>> {
290    /// # init(CudaFlags::empty())?;
291    /// use rustacuda::device::Device;
292    /// for device in Device::devices()? {
293    ///     let device = device?;
294    ///     println!("Device Name: {}", device.name()?);
295    /// }
296    /// # Ok(())
297    /// # }
298    /// ```
299    pub fn devices() -> CudaResult<Devices> {
300        Device::num_devices().map(|num_devices| Devices {
301            range: 0..num_devices,
302        })
303    }
304
305    /// Returns the total amount of memory available on the device in bytes.
306    ///
307    /// # Example
308    /// ```
309    /// # use rustacuda::*;
310    /// # use std::error::Error;
311    /// # fn main() -> Result<(), Box<dyn Error>> {
312    /// # init(CudaFlags::empty())?;
313    /// use rustacuda::device::Device;
314    /// let device = Device::get_device(0)?;
315    /// println!("Device Memory: {}", device.total_memory()?);
316    /// # Ok(())
317    /// # }
318    /// ```
319    pub fn total_memory(self) -> CudaResult<usize> {
320        unsafe {
321            let mut memory = 0;
322            cuDeviceTotalMem_v2(&mut memory as *mut usize, self.device).to_result()?;
323            Ok(memory)
324        }
325    }
326
327    /// Returns the name of this device.
328    ///
329    /// # Example
330    /// ```
331    /// # use rustacuda::*;
332    /// # use std::error::Error;
333    /// # fn main() -> Result<(), Box<dyn Error>> {
334    /// # init(CudaFlags::empty())?;
335    /// use rustacuda::device::Device;
336    /// let device = Device::get_device(0)?;
337    /// println!("Device Name: {}", device.name()?);
338    /// # Ok(())
339    /// # }
340    /// ```
341    pub fn name(self) -> CudaResult<String> {
342        unsafe {
343            let mut name = [0u8; 128]; // Hopefully this is big enough...
344            cuDeviceGetName(
345                &mut name[0] as *mut u8 as *mut ::std::os::raw::c_char,
346                128,
347                self.device,
348            )
349            .to_result()?;
350            let nul_index = name
351                .iter()
352                .cloned()
353                .position(|byte| byte == 0)
354                .expect("Expected device name to fit in 128 bytes and be nul-terminated.");
355            let cstr = CStr::from_bytes_with_nul_unchecked(&name[0..=nul_index]);
356            Ok(cstr.to_string_lossy().into_owned())
357        }
358    }
359
360    /// Returns the UUID of this device.
361    ///
362    /// # Example
363    /// ```
364    /// # use rustacuda::*;
365    /// # use std::error::Error;
366    /// # fn main() -> Result<(), Box<dyn Error>> {
367    /// # init(CudaFlags::empty())?;
368    /// use rustacuda::device::Device;
369    /// let device = Device::get_device(0)?;
370    /// println!("Device UUID: {:?}", device.uuid()?);
371    /// # Ok(())
372    /// # }
373    /// ```
374    pub fn uuid(self) -> CudaResult<[u8; 16]> {
375        unsafe {
376            let mut cu_uuid = CUuuid { bytes: [0; 16] };
377            cuDeviceGetUuid(&mut cu_uuid, self.device).to_result()?;
378            let uuid: [u8; 16] = ::std::mem::transmute(cu_uuid.bytes);
379            Ok(uuid)
380        }
381    }
382
383    /// Returns information about this device.
384    ///
385    /// # Example
386    /// ```
387    /// # use rustacuda::*;
388    /// # use std::error::Error;
389    /// # fn main() -> Result<(), Box<dyn Error>> {
390    /// # init(CudaFlags::empty())?;
391    /// use rustacuda::device::{Device, DeviceAttribute};
392    /// let device = Device::get_device(0)?;
393    /// println!("Max Threads Per Block: {}",
394    ///     device.get_attribute(DeviceAttribute::MaxThreadsPerBlock).unwrap());
395    /// # Ok(())
396    /// # }
397    /// ```
398    pub fn get_attribute(self, attr: DeviceAttribute) -> CudaResult<i32> {
399        unsafe {
400            let mut val = 0i32;
401            cuDeviceGetAttribute(
402                &mut val as *mut i32,
403                // This should be safe, as the repr and values of DeviceAttribute should match.
404                ::std::mem::transmute(attr),
405                self.device,
406            )
407            .to_result()?;
408            Ok(val)
409        }
410    }
411
412    pub(crate) fn into_inner(self) -> CUdevice {
413        self.device
414    }
415}
416
417/// Iterator over all available CUDA devices. See
418/// [the Device::devices function](./struct.Device.html#method.devices) for more information.
419#[derive(Debug, Clone)]
420pub struct Devices {
421    range: Range<u32>,
422}
423impl Iterator for Devices {
424    type Item = CudaResult<Device>;
425
426    fn next(&mut self) -> Option<CudaResult<Device>> {
427        self.range.next().map(Device::get_device)
428    }
429}
430
431#[cfg(test)]
432mod test {
433    use super::*;
434    use std::error::Error;
435
436    fn test_init() -> Result<(), Box<dyn Error>> {
437        crate::init(crate::CudaFlags::empty())?;
438        Ok(())
439    }
440
441    #[test]
442    fn test_num_devices() -> Result<(), Box<dyn Error>> {
443        test_init()?;
444        let num_devices = Device::num_devices()?;
445        assert!(num_devices > 0);
446        Ok(())
447    }
448
449    #[test]
450    fn test_devices() -> Result<(), Box<dyn Error>> {
451        test_init()?;
452        let num_devices = Device::num_devices()?;
453        let all_devices: CudaResult<Vec<_>> = Device::devices()?.collect();
454        let all_devices = all_devices?;
455        assert_eq!(num_devices as usize, all_devices.len());
456        Ok(())
457    }
458
459    #[test]
460    fn test_get_name() -> Result<(), Box<dyn Error>> {
461        test_init()?;
462        let device_name = Device::get_device(0)?.name()?;
463        println!("{}", device_name);
464        assert!(device_name.len() < 127);
465        Ok(())
466    }
467
468    #[test]
469    fn test_get_memory() -> Result<(), Box<dyn Error>> {
470        test_init()?;
471        let memory = Device::get_device(0)?.total_memory()?;
472        println!("{}", memory);
473        Ok(())
474    }
475
476    // Ensure that the two enums always stay aligned.
477    #[test]
478    fn test_enums_align() {
479        assert_eq!(
480            DeviceAttribute::__NonExhaustive as u32,
481            CUdevice_attribute_enum::CU_DEVICE_ATTRIBUTE_MAX as u32
482        );
483    }
484
485    #[test]
486    fn test_uuid() -> Result<(), Box<dyn Error>> {
487        test_init()?;
488        let uuid = Device::get_device(0)?.uuid()?;
489        println!("{:?}", uuid);
490        Ok(())
491    }
492}