singe_cuda/device.rs
1#![allow(deprecated)]
2
3use std::{
4 ffi::CString,
5 mem::{self, MaybeUninit},
6};
7
8use num_enum::{IntoPrimitive, TryFromPrimitive};
9use singe_core::{impl_enum_conversion, impl_enum_display, string_from_c_chars};
10use singe_cuda_sys::driver;
11use singe_cuda_sys::runtime;
12
13use crate::{
14 context::ContextFlags,
15 error::{Error, Result},
16 try_ffi,
17 types::FunctionCache,
18};
19
20/// CUDA Limits.
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, TryFromPrimitive, IntoPrimitive)]
22#[repr(u32)]
23#[non_exhaustive]
24pub enum Limit {
25 /// GPU thread stack size.
26 StackSize = runtime::cudaLimit::cudaLimitStackSize as _,
27 /// GPU printf FIFO size.
28 PrintfFifoSize = runtime::cudaLimit::cudaLimitPrintfFifoSize as _,
29 /// GPU malloc heap size.
30 MallocHeapSize = runtime::cudaLimit::cudaLimitMallocHeapSize as _,
31 /// GPU device runtime synchronize depth.
32 DevRuntimeSyncDepth = runtime::cudaLimit::cudaLimitDevRuntimeSyncDepth as _,
33 /// GPU device runtime pending launch count.
34 DevRuntimePendingLaunchCount = runtime::cudaLimit::cudaLimitDevRuntimePendingLaunchCount as _,
35 /// A value between 0 and 128 that indicates the maximum fetch granularity of L2, in bytes.
36 /// The value is a hint.
37 MaxL2FetchGranularity = runtime::cudaLimit::cudaLimitMaxL2FetchGranularity as _,
38 /// A size in bytes for L2 persisting lines cache size.
39 PersistingL2CacheSize = runtime::cudaLimit::cudaLimitPersistingL2CacheSize as _,
40}
41
42impl_enum_conversion!(runtime::cudaLimit, Limit);
43
44impl_enum_display!(Limit, {
45 Self::StackSize => "cudaLimitStackSize",
46 Self::PrintfFifoSize => "cudaLimitPrintfFifoSize",
47 Self::MallocHeapSize => "cudaLimitMallocHeapSize",
48 Self::DevRuntimeSyncDepth => "cudaLimitDevRuntimeSyncDepth",
49 Self::DevRuntimePendingLaunchCount => "cudaLimitDevRuntimePendingLaunchCount",
50 Self::MaxL2FetchGranularity => "cudaLimitMaxL2FetchGranularity",
51 Self::PersistingL2CacheSize => "cudaLimitPersistingL2CacheSize",
52});
53
54/// CUDA device compute modes.
55#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, TryFromPrimitive, IntoPrimitive)]
56#[repr(u32)]
57#[non_exhaustive]
58pub enum ComputeMode {
59 /// Default compute mode; multiple threads can use [`Device::set_current`] with this device.
60 Default = runtime::cudaComputeMode::cudaComputeModeDefault as _,
61 /// Compute-exclusive-thread mode; only one thread in one process can use [`Device::set_current`] with this device.
62 Exclusive = runtime::cudaComputeMode::cudaComputeModeExclusive as _,
63 /// Compute-prohibited mode; no threads can use [`Device::set_current`] with this device.
64 Prohibited = runtime::cudaComputeMode::cudaComputeModeProhibited as _,
65 /// Compute-exclusive-process mode; many threads in one process can use [`Device::set_current`] with this device.
66 ExclusiveProcess = runtime::cudaComputeMode::cudaComputeModeExclusiveProcess as _,
67}
68
69impl_enum_conversion!(runtime::cudaComputeMode, ComputeMode);
70
71bitflags::bitflags! {
72 /// Flags for [`Device::enable_peer_access`].
73 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
74 pub struct PeerAccessFlags: u32 {
75 /// Default peer-access behavior.
76 const DEFAULT = runtime::cudaPeerAccessDefault;
77 }
78}
79
80/// Attributes queryable between two devices using [`Device::p2p_attribute`].
81#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, TryFromPrimitive, IntoPrimitive)]
82#[repr(u32)]
83#[non_exhaustive]
84pub enum PeerToPeerAttribute {
85 PerformanceRank = runtime::cudaDeviceP2PAttr::CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK as _,
86 AccessSupported = runtime::cudaDeviceP2PAttr::CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED as _,
87 NativeAtomicSupported =
88 runtime::cudaDeviceP2PAttr::CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED as _,
89 #[deprecated]
90 CudaArrayAccessSupported =
91 runtime::cudaDeviceP2PAttr::CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED as _,
92}
93
94impl_enum_conversion!(runtime::cudaDeviceP2PAttr, PeerToPeerAttribute);
95
96impl_enum_display!(PeerToPeerAttribute, {
97 Self::PerformanceRank => "cudaDevP2PAttrPerformanceRank",
98 Self::AccessSupported => "cudaDevP2PAttrAccessSupported",
99 Self::NativeAtomicSupported => "cudaDevP2PAttrNativeAtomicSupported",
100 Self::CudaArrayAccessSupported => "cudaDevP2PAttrCudaArrayAccessSupported",
101});
102
103#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
104pub struct StreamPriorityRange {
105 pub least: i32,
106 pub greatest: i32,
107}
108
109// TODO: use a crate
110#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
111pub struct Uuid {
112 pub bytes: [u8; 16],
113}
114
115impl From<driver::CUuuid> for Uuid {
116 fn from(value: driver::CUuuid) -> Self {
117 Self {
118 bytes: value.bytes.map(|byte| byte as u8),
119 }
120 }
121}
122
123impl From<Uuid> for driver::CUuuid {
124 fn from(value: Uuid) -> Self {
125 driver::CUuuid {
126 bytes: value.bytes.map(|byte| byte as _),
127 }
128 }
129}
130
131/// Rust representation of CUDA device properties.
132#[derive(Debug, Clone)]
133pub struct DeviceProperties {
134 /// ASCII string identifying device.
135 pub name: String,
136 /// 16-byte unique identifier.
137 pub uuid: Uuid,
138 /// 8-byte locally unique identifier. Value is undefined on TCC and non-Windows platforms.
139 pub luid: [u8; 8],
140 /// LUID device node mask. Value is undefined on TCC and non-Windows platforms.
141 pub luid_device_node_mask: u32,
142 /// Global memory available on device in bytes.
143 pub total_global_mem: usize,
144 /// Shared memory available per block in bytes.
145 pub shared_mem_per_block: usize,
146 /// 32-bit registers available per block.
147 pub regs_per_block: i32,
148 /// Warp size in threads.
149 pub warp_size: i32,
150 /// Maximum pitch in bytes allowed by memory copies.
151 pub mem_pitch: usize,
152 /// Maximum number of threads per block.
153 pub max_threads_per_block: i32,
154 /// Maximum size of each dimension of a block.
155 pub max_threads_dim: [i32; 3],
156 /// Maximum size of each dimension of a grid.
157 pub max_grid_size: [i32; 3],
158 /// Constant memory available on device in bytes.
159 pub total_const_mem: usize,
160 /// Major compute capability.
161 pub major: i32,
162 /// Minor compute capability.
163 pub minor: i32,
164 /// Alignment requirement for textures.
165 pub texture_alignment: usize,
166 /// Pitch alignment requirement for texture references bound to pitched memory.
167 pub texture_pitch_alignment: usize,
168 /// Number of multiprocessors on device.
169 pub multi_processor_count: i32,
170 /// Device is integrated as opposed to discrete.
171 pub integrated: bool,
172 /// Device can map host memory into CUDA address space.
173 pub can_map_host_memory: bool,
174 /// Maximum 1D texture size.
175 pub max_texture1d: i32,
176 /// Maximum 1D mipmapped texture size.
177 pub max_texture1d_mipmap: i32,
178 /// Maximum 2D texture dimensions.
179 pub max_texture2d: [i32; 2],
180 /// Maximum 2D mipmapped texture dimensions.
181 pub max_texture2d_mipmap: [i32; 2],
182 /// Maximum dimensions (width, height, pitch) for 2D textures bound to linear memory.
183 pub max_texture2d_linear: [i32; 3],
184 /// Maximum 2D texture dimensions for texture gather operations.
185 pub max_texture2d_gather: [i32; 2],
186 /// Maximum 3D texture dimensions.
187 pub max_texture3d: [i32; 3],
188 /// Maximum alternate 3D texture dimensions.
189 pub max_texture3d_alt: [i32; 3],
190 /// Maximum Cubemap texture dimensions.
191 pub max_texture_cubemap: i32,
192 /// Maximum 1D layered texture dimensions.
193 pub max_texture1d_layered: [i32; 2],
194 /// Maximum 2D layered texture dimensions.
195 pub max_texture2d_layered: [i32; 3],
196 /// Maximum Cubemap layered texture dimensions.
197 pub max_texture_cubemap_layered: [i32; 2],
198 /// Maximum 1D surface size.
199 pub max_surface1d: i32,
200 /// Maximum 2D surface dimensions.
201 pub max_surface2d: [i32; 2],
202 /// Maximum 3D surface dimensions.
203 pub max_surface3d: [i32; 3],
204 /// Maximum 1D layered surface dimensions.
205 pub max_surface1d_layered: [i32; 2],
206 /// Maximum 2D layered surface dimensions.
207 pub max_surface2d_layered: [i32; 3],
208 /// Maximum Cubemap surface dimensions.
209 pub max_surface_cubemap: i32,
210 /// Maximum Cubemap layered surface dimensions.
211 pub max_surface_cubemap_layered: [i32; 2],
212 /// Alignment requirements for surfaces.
213 pub surface_alignment: usize,
214 /// Device can possibly execute multiple kernels concurrently.
215 pub concurrent_kernels: bool,
216 /// Device has ECC support enabled.
217 pub ecc_enabled: bool,
218 /// PCI bus ID of the device.
219 pub pci_bus_id: i32,
220 /// PCI device ID of the device.
221 pub pci_device_id: i32,
222 /// PCI domain ID of the device.
223 pub pci_domain_id: i32,
224 /// 1 if device is a Tesla device using TCC driver, 0 otherwise.
225 pub tcc_driver: bool,
226 /// Number of asynchronous engines.
227 pub async_engine_count: i32,
228 /// Device shares a unified address space with the host.
229 pub unified_addressing: bool,
230 /// Global memory bus width in bits.
231 pub memory_bus_width: i32,
232 /// Size of L2 cache in bytes.
233 pub l2_cache_size: i32,
234 /// Device's maximum l2 persisting lines capacity setting in bytes.
235 pub persisting_l2_cache_max_size: i32,
236 /// Maximum resident threads per multiprocessor.
237 pub max_threads_per_multi_processor: i32,
238 /// Device supports stream priorities.
239 pub stream_priorities_supported: bool,
240 /// Device supports caching globals in L1.
241 pub global_l1_cache_supported: bool,
242 /// Device supports caching locals in L1.
243 pub local_l1_cache_supported: bool,
244 /// Shared memory available per multiprocessor in bytes.
245 pub shared_mem_per_multiprocessor: usize,
246 /// 32-bit registers available per multiprocessor.
247 pub regs_per_multiprocessor: i32,
248 /// Device supports allocating managed memory on this system.
249 pub managed_memory: bool,
250 /// Device is on a multi-GPU board.
251 pub is_multi_gpu_board: bool,
252 /// Unique identifier for a group of devices on the same multi-GPU board.
253 pub multi_gpu_board_group_id: i32,
254 /// Link between the device and the host supports native atomic operations.
255 pub host_native_atomic_supported: bool,
256 /// Device supports coherently accessing pageable memory without calling [`DeviceMemory::register_host`](crate::memory::DeviceMemory::register_host) on it.
257 pub pageable_memory_access: bool,
258 /// Device can coherently access managed memory concurrently with the CPU.
259 pub concurrent_managed_access: bool,
260 /// Device supports Compute Preemption.
261 pub compute_preemption_supported: bool,
262 /// Device can access host registered memory at the same virtual address as the CPU.
263 pub can_use_host_pointer_for_registered_mem: bool,
264 /// Device supports cooperative kernel launches.
265 pub cooperative_launch: bool,
266 /// Per device maximum shared memory per block usable by special opt-in.
267 pub shared_mem_per_block_optin: usize,
268 /// Device accesses pageable memory via the host's page tables.
269 pub pageable_memory_access_uses_host_page_tables: bool,
270 /// Host can directly access managed memory on the device without migration.
271 pub direct_managed_mem_access_from_host: bool,
272 /// Maximum number of resident blocks per multiprocessor.
273 pub max_blocks_per_multi_processor: i32,
274 /// Maximum value of the CUDA access-policy window `num_bytes` field.
275 pub access_policy_max_window_size: i32,
276 /// Shared memory reserved by CUDA driver per block in bytes.
277 pub reserved_shared_mem_per_block: usize,
278 /// Device supports host memory registration via [`DeviceMemory::register_host`](crate::memory::DeviceMemory::register_host).
279 pub host_register_supported: bool,
280 /// Device supports sparse CUDA arrays and sparse CUDA mipmapped arrays.
281 pub sparse_cuda_array_supported: bool,
282 /// Device supports [`HostRegisterFlags::READ_ONLY`](crate::memory::HostRegisterFlags::READ_ONLY) for host registrations mapped as read-only to the GPU.
283 pub host_register_read_only_supported: bool,
284 /// External timeline semaphore interop is supported.
285 pub timeline_semaphore_interop_supported: bool,
286 /// Device supports CUDA memory pools.
287 pub memory_pools_supported: bool,
288 /// Device supports GPUDirect RDMA APIs.
289 pub gpu_direct_rdma_supported: bool,
290 /// The returned flags may be used as subset of the supported write ordering MASTs supplied with GPUDirect RDMA writes.
291 pub gpu_direct_rdma_flush_writes_options: u32,
292 /// GPUDirect RDMA writes are guaranteed to be ordered with respect to other GPUDirect RDMA writes from the same GPU.
293 pub gpu_direct_rdma_writes_ordering: i32,
294 /// Handle types supported with mempool based IPC.
295 pub memory_pool_supported_handle_types: u32,
296 /// Indicates device supports deferred mapping CUDA arrays and mapping hints.
297 pub deferred_mapping_cuda_array_supported: bool,
298 /// Device supports IPC Events.
299 pub ipc_event_supported: bool,
300 /// Device supports Cluster Launch.
301 pub cluster_launch: bool,
302 /// Device supports unified function pointers.
303 pub unified_function_pointers: bool,
304}
305
306impl TryFrom<runtime::cudaDeviceProp> for DeviceProperties {
307 type Error = Error;
308
309 fn try_from(value: runtime::cudaDeviceProp) -> Result<Self> {
310 let end = value
311 .name
312 .iter()
313 .position(|&c| c == 0)
314 .unwrap_or(value.name.len());
315 let name_bytes: Vec<u8> = value.name[..end].iter().map(|&byte| byte as u8).collect();
316 let name = String::from_utf8_lossy(&name_bytes).into_owned();
317
318 let prop = Self {
319 name,
320 uuid: value.uuid.into(),
321 luid: value.luid.map(|byte| byte as u8),
322 luid_device_node_mask: value.luidDeviceNodeMask,
323 total_global_mem: value.totalGlobalMem as usize,
324 shared_mem_per_block: value.sharedMemPerBlock as usize,
325 regs_per_block: value.regsPerBlock,
326 warp_size: value.warpSize,
327 mem_pitch: value.memPitch as usize,
328 max_threads_per_block: value.maxThreadsPerBlock,
329 max_threads_dim: value.maxThreadsDim,
330 max_grid_size: value.maxGridSize,
331 total_const_mem: value.totalConstMem as usize,
332 major: value.major,
333 minor: value.minor,
334 texture_alignment: value.textureAlignment as usize,
335 texture_pitch_alignment: value.texturePitchAlignment as usize,
336 multi_processor_count: value.multiProcessorCount,
337 integrated: value.integrated != 0,
338 can_map_host_memory: value.canMapHostMemory != 0,
339 max_texture1d: value.maxTexture1D,
340 max_texture1d_mipmap: value.maxTexture1DMipmap,
341 max_texture2d: value.maxTexture2D,
342 max_texture2d_mipmap: value.maxTexture2DMipmap,
343 max_texture2d_linear: value.maxTexture2DLinear,
344 max_texture2d_gather: value.maxTexture2DGather,
345 max_texture3d: value.maxTexture3D,
346 max_texture3d_alt: value.maxTexture3DAlt,
347 max_texture_cubemap: value.maxTextureCubemap,
348 max_texture1d_layered: value.maxTexture1DLayered,
349 max_texture2d_layered: value.maxTexture2DLayered,
350 max_texture_cubemap_layered: value.maxTextureCubemapLayered,
351 max_surface1d: value.maxSurface1D,
352 max_surface2d: value.maxSurface2D,
353 max_surface3d: value.maxSurface3D,
354 max_surface1d_layered: value.maxSurface1DLayered,
355 max_surface2d_layered: value.maxSurface2DLayered,
356 max_surface_cubemap: value.maxSurfaceCubemap,
357 max_surface_cubemap_layered: value.maxSurfaceCubemapLayered,
358 surface_alignment: value.surfaceAlignment as usize,
359 concurrent_kernels: value.concurrentKernels != 0,
360 ecc_enabled: value.ECCEnabled != 0,
361 pci_bus_id: value.pciBusID,
362 pci_device_id: value.pciDeviceID,
363 pci_domain_id: value.pciDomainID,
364 tcc_driver: value.tccDriver != 0,
365 async_engine_count: value.asyncEngineCount,
366 unified_addressing: value.unifiedAddressing != 0,
367 memory_bus_width: value.memoryBusWidth,
368 l2_cache_size: value.l2CacheSize,
369 persisting_l2_cache_max_size: value.persistingL2CacheMaxSize,
370 max_threads_per_multi_processor: value.maxThreadsPerMultiProcessor,
371 stream_priorities_supported: value.streamPrioritiesSupported != 0,
372 global_l1_cache_supported: value.globalL1CacheSupported != 0,
373 local_l1_cache_supported: value.localL1CacheSupported != 0,
374 shared_mem_per_multiprocessor: value.sharedMemPerMultiprocessor as usize,
375 regs_per_multiprocessor: value.regsPerMultiprocessor,
376 managed_memory: value.managedMemory != 0,
377 is_multi_gpu_board: value.isMultiGpuBoard != 0,
378 multi_gpu_board_group_id: value.multiGpuBoardGroupID,
379 host_native_atomic_supported: value.hostNativeAtomicSupported != 0,
380 pageable_memory_access: value.pageableMemoryAccess != 0,
381 concurrent_managed_access: value.concurrentManagedAccess != 0,
382 compute_preemption_supported: value.computePreemptionSupported != 0,
383 can_use_host_pointer_for_registered_mem: value.canUseHostPointerForRegisteredMem != 0,
384 cooperative_launch: value.cooperativeLaunch != 0,
385 shared_mem_per_block_optin: value.sharedMemPerBlockOptin as usize,
386 pageable_memory_access_uses_host_page_tables: value
387 .pageableMemoryAccessUsesHostPageTables
388 != 0,
389 direct_managed_mem_access_from_host: value.directManagedMemAccessFromHost != 0,
390 max_blocks_per_multi_processor: value.maxBlocksPerMultiProcessor,
391 access_policy_max_window_size: value.accessPolicyMaxWindowSize,
392 reserved_shared_mem_per_block: value.reservedSharedMemPerBlock as usize,
393 host_register_supported: value.hostRegisterSupported != 0,
394 sparse_cuda_array_supported: value.sparseCudaArraySupported != 0,
395 host_register_read_only_supported: value.hostRegisterReadOnlySupported != 0,
396 timeline_semaphore_interop_supported: value.timelineSemaphoreInteropSupported != 0,
397 memory_pools_supported: value.memoryPoolsSupported != 0,
398 gpu_direct_rdma_supported: value.gpuDirectRDMASupported != 0,
399 gpu_direct_rdma_flush_writes_options: value.gpuDirectRDMAFlushWritesOptions,
400 gpu_direct_rdma_writes_ordering: value.gpuDirectRDMAWritesOrdering,
401 memory_pool_supported_handle_types: value.memoryPoolSupportedHandleTypes,
402 deferred_mapping_cuda_array_supported: value.deferredMappingCudaArraySupported != 0,
403 ipc_event_supported: value.ipcEventSupported != 0,
404 cluster_launch: value.clusterLaunch != 0,
405 unified_function_pointers: value.unifiedFunctionPointers != 0,
406 };
407
408 Ok(prop)
409 }
410}
411
412#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
413pub struct Device(DeviceId);
414
415pub type DeviceId = i32;
416
417impl Device {
418 pub const fn new(id: DeviceId) -> Self {
419 Self(id)
420 }
421
422 /// Returns the number of devices with compute capability greater or equal to 2.0 that are available for execution.
423 ///
424 /// # Errors
425 ///
426 /// Returns an error if CUDA cannot query the device count, a previous
427 /// asynchronous launch reports an error, or CUDA reports runtime
428 /// initialization diagnostics such as [`crate::error::Status::NotInitialized`],
429 /// [`crate::error::Status::CallRequiresNewerDriver`], or [`crate::error::Status::NoDevice`].
430 pub fn count() -> Result<i32> {
431 let mut count: i32 = 0;
432 unsafe {
433 try_ffi!(runtime::cudaGetDeviceCount(&raw mut count))?;
434 }
435 Ok(count)
436 }
437
438 /// Returns the current device for the calling host thread.
439 ///
440 /// # Errors
441 ///
442 /// Returns an error if CUDA cannot query the current device, a previous
443 /// asynchronous launch reports an error, or CUDA reports runtime
444 /// initialization diagnostics.
445 pub fn current() -> Result<Self> {
446 let mut device_id: i32 = 0;
447 unsafe {
448 try_ffi!(runtime::cudaGetDevice(&raw mut device_id))?;
449 }
450 Ok(Self(device_id))
451 }
452
453 /// Blocks until the device has completed all preceding requested tasks.
454 /// [`Device::synchronize`] returns an error if one of the preceding tasks has failed.
455 /// If [`ContextFlags::SCHEDULE_BLOCKING_SYNC`] was set for this device, the
456 /// host thread blocks until the device has finished its work.
457 ///
458 /// * Use of [`Device::synchronize`] in device code was deprecated in CUDA 11.6 and removed for compute_90+ compilation.
459 /// For compute capability < 9.0, compile-time opt-in with `-D CUDA_FORCE_CDP1_IF_SUPPORTED` is required to continue using [`Device::synchronize`] in device code for now.
460 /// This is different from host-side [`Device::synchronize`], which is still supported.
461 ///
462 /// # Errors
463 ///
464 /// Returns an error if synchronization fails, a previous asynchronous
465 /// launch reports an error, or CUDA reports runtime initialization
466 /// diagnostics.
467 pub fn synchronize() -> Result<()> {
468 unsafe {
469 try_ffi!(runtime::cudaDeviceSynchronize())?;
470 }
471 Ok(())
472 }
473
474 /// Explicitly destroys and cleans up all resources associated with the current device in the current process.
475 /// Accessing these resources or passing them to subsequent API calls after
476 /// reset results in undefined behavior.
477 /// These resources include streams, events, arrays, mipmapped arrays, pitched allocations, texture and surface objects, external memory and semaphore objects, and graphics resources owned by the current device state.
478 /// These resources also include memory allocations by [`DeviceMemory::alloc`](crate::memory::DeviceMemory::alloc), [`DeviceMemory::alloc_host`](crate::memory::DeviceMemory::alloc_host), [`DeviceMemory::alloc_managed`](crate::memory::DeviceMemory::alloc_managed) and [`sys::cudaMallocPitch`](singe_cuda_sys::runtime::cudaMallocPitch).
479 /// Any subsequent call to this device reinitializes it.
480 ///
481 /// This call resets the device immediately.
482 /// Ensure that no other host threads in the process are accessing the device when this is called.
483 ///
484 /// * [`Device::reset`] does not destroy memory allocated by [`DeviceMemory::alloc_async`](crate::memory::DeviceMemory::alloc_async) or [`sys::cudaMallocFromPoolAsync`](singe_cuda_sys::runtime::cudaMallocFromPoolAsync).
485 /// These memory allocations must be destroyed explicitly.
486 /// * If a non-primary CUDA context is current to the thread, [`Device::reset`] will destroy only the internal CUDA runtime state for that context.
487 ///
488 /// # Errors
489 ///
490 /// Returns an error if device reset fails, a previous asynchronous launch
491 /// reports an error, or CUDA reports runtime initialization diagnostics.
492 pub fn reset() -> Result<()> {
493 unsafe {
494 try_ffi!(runtime::cudaDeviceReset())?;
495 }
496 Ok(())
497 }
498
499 /// Returns the current size of limit.
500 /// The following [`Limit`] values are supported.
501 ///
502 /// * [`Limit::StackSize`] is the stack size in bytes of each GPU thread.
503 /// * [`Limit::PrintfFifoSize`] is the size in bytes of the shared FIFO used by the `printf()` device system call.
504 /// * [`Limit::MallocHeapSize`] is the size in bytes of the heap used by the `malloc()` and `free()` device system calls.
505 /// * [`Limit::DevRuntimeSyncDepth`] is the maximum grid depth at which a thread can issue the device runtime call [`Device::synchronize`] to wait on child grid launches to complete.
506 /// This feature is removed for devices of compute capability >= 9.0, so such devices return [`crate::error::Status::UnsupportedLimit`].
507 /// * [`Limit::DevRuntimePendingLaunchCount`] is the maximum number of outstanding device runtime launches.
508 /// * [`Limit::MaxL2FetchGranularity`] is the L2 cache fetch granularity.
509 /// * [`Limit::PersistingL2CacheSize`] is the persisting L2 cache size in bytes.
510 ///
511 /// # Errors
512 ///
513 /// Returns an error if `limit` is unsupported, CUDA cannot query the limit,
514 /// a previous asynchronous launch reports an error, or CUDA reports runtime
515 /// initialization diagnostics.
516 pub fn limit(limit: Limit) -> Result<usize> {
517 let mut value = 0;
518 unsafe {
519 try_ffi!(runtime::cudaDeviceGetLimit(&raw mut value, limit.into(),))?;
520 }
521 Ok(value as _)
522 }
523
524 /// Setting limit to value is a request by the application to update the current limit maintained by the device.
525 /// The driver may modify the requested value to meet hardware requirements, such as clamping to minimum or maximum values or rounding up to the nearest element size.
526 /// Use [`Device::limit`] to query the effective value.
527 ///
528 /// Setting each [`Limit`] has its own specific restrictions, so each is discussed here.
529 ///
530 /// * [`Limit::StackSize`] controls the stack size in bytes of each GPU thread.
531 ///
532 /// * [`Limit::PrintfFifoSize`] controls the size in bytes of the shared FIFO used by the `printf()` device system call.
533 /// Setting [`Limit::PrintfFifoSize`] must not be performed after launching any kernel that uses the `printf()` device system call; otherwise [`crate::error::Status::InvalidValue`] is returned.
534 ///
535 /// * [`Limit::MallocHeapSize`] controls the size in bytes of the heap used by the `malloc()` and `free()` device system calls.
536 /// Setting [`Limit::MallocHeapSize`] must not be performed after launching any kernel that uses the `malloc()` or `free()` device system calls; otherwise [`crate::error::Status::InvalidValue`] is returned.
537 ///
538 /// * [`Limit::DevRuntimeSyncDepth`] controls the maximum nesting depth of a grid at which a thread can safely call [`Device::synchronize`].
539 /// Setting this limit must be performed before any launch of a kernel that uses the device runtime and calls [`Device::synchronize`] above the default sync depth, two levels of grids.
540 /// Calls to [`Device::synchronize`] fail if this limit is violated.
541 /// This limit can be set smaller than the default or up to the maximum launch depth of 24.
542 /// Additional sync-depth levels require the runtime to reserve large amounts of device memory that can no longer be used for application allocations.
543 /// If these device-memory reservations fail, [`Device::set_limit`] returns an error, and the limit can be reset to a lower value.
544 /// This limit is only applicable to devices of compute capability < 9.0.
545 /// Setting this limit on devices with other compute capabilities returns [`crate::error::Status::UnsupportedLimit`].
546 ///
547 /// * [`Limit::DevRuntimePendingLaunchCount`] controls the maximum number of outstanding device runtime launches that can be made from the current device.
548 /// A grid is outstanding from launch until it is known to have completed.
549 /// Device runtime launches that violate this limit fail.
550 /// If a module using the device runtime needs more pending launches than the default 2048 launches, this limit can be increased.
551 /// Sustaining additional pending launches requires the runtime to reserve larger amounts of device memory up front, which can no longer be used for allocations.
552 /// If these reservations fail, [`Device::set_limit`] returns an error, and the limit can be reset to a lower value.
553 /// This limit is only applicable to devices of compute capability 3.5 and higher.
554 /// Setting this limit on devices with compute capability less than 3.5 returns [`crate::error::Status::UnsupportedLimit`].
555 ///
556 /// * [`Limit::MaxL2FetchGranularity`] controls the L2 cache fetch granularity.
557 /// Values can range from 0B to 128B.
558 /// Performance hint that can be ignored or clamped depending on the platform.
559 ///
560 /// * [`Limit::PersistingL2CacheSize`] controls size in bytes available for persisting L2 cache.
561 /// Performance hint that can be ignored or clamped depending on the platform.
562 ///
563 /// # Errors
564 ///
565 /// Returns an error if `limit` is unsupported, `value` is invalid for that
566 /// limit, CUDA cannot set the limit, a previous asynchronous launch reports
567 /// an error, or CUDA reports runtime initialization diagnostics.
568 pub fn set_limit(limit: Limit, value: usize) -> Result<()> {
569 unsafe {
570 try_ffi!(runtime::cudaDeviceSetLimit(limit.into(), value as _))?;
571 }
572 Ok(())
573 }
574
575 /// Records flags as the flags for the current device.
576 /// If the current device has been set and that device has already been initialized, the previous flags are overwritten.
577 /// If the current device has not been initialized, it is initialized with the provided flags.
578 /// If no device has been made current to the calling thread, a default device is selected and initialized with the provided flags.
579 ///
580 /// The three least significant bits of `flags` control how the CPU thread interacts with the OS scheduler while waiting for device results.
581 ///
582 /// * [`ContextFlags::SCHEDULE_AUTO`]: The default value if `flags` is zero.
583 /// Uses a heuristic based on the number of active CUDA contexts in the process (`C`) and the number of logical processors in the system (`P`).
584 /// If `C > P`, CUDA yields to other OS threads when waiting for the device; otherwise, CUDA actively spins while waiting for results.
585 /// Additionally, on Tegra devices, [`ContextFlags::SCHEDULE_AUTO`] uses a heuristic based on the power profile of the platform and may choose [`ContextFlags::SCHEDULE_BLOCKING_SYNC`] for low-powered devices.
586 /// * [`ContextFlags::SCHEDULE_SPIN`]: Instruct CUDA to actively spin when waiting for results from the device.
587 /// This can decrease latency when waiting for the
588 /// device, but may lower the performance of CPU threads if they are performing work in parallel with the CUDA thread.
589 /// * [`ContextFlags::SCHEDULE_YIELD`]: Instruct CUDA to yield its thread when waiting for results from the device.
590 /// This can increase latency when waiting for the
591 /// device, but can increase the performance of CPU threads performing work in parallel with the device.
592 /// * [`ContextFlags::SCHEDULE_BLOCKING_SYNC`]: Instruct CUDA to block the CPU thread on a synchronization primitive when waiting for the device to finish work.
593 ///
594 /// This matches the deprecated CUDA runtime blocking-sync behavior now represented by [`ContextFlags::SCHEDULE_BLOCKING_SYNC`].
595 /// * [`ContextFlags::MAP_HOST`]: This flag enables allocating pinned host memory that is accessible to the device.
596 /// It is implicit for the runtime but may
597 /// be absent if a context is created using the driver API.
598 /// If this flag is not set, [`sys::cudaHostGetDevicePointer`](singe_cuda_sys::runtime::cudaHostGetDevicePointer) always returns a failure code.
599 /// * [`ContextFlags::LOCAL_MEMORY_RESIZE_TO_MAX`]: Instruct CUDA to not reduce local memory after resizing local memory for a kernel.
600 /// This can prevent thrashing by local memory
601 /// allocations when launching many kernels with high local memory usage at the cost of potentially increased memory usage.
602 ///
603 /// Deprecated: this behavior is now the default and cannot be disabled.
604 /// * [`ContextFlags::SYNC_MEMORY_OPERATIONS`]: Ensures that synchronous memory operations initiated on this context always synchronize.
605 /// See further documentation
606 /// in the section titled "API Synchronization behavior" to learn more about cases when synchronous memory operations can exhibit
607 /// asynchronous behavior.
608 ///
609 /// # Errors
610 ///
611 /// Returns an error if CUDA cannot set the device flags, a previous
612 /// asynchronous launch reports an error, or CUDA reports runtime
613 /// initialization diagnostics.
614 pub fn set_flags(flags: ContextFlags) -> Result<()> {
615 unsafe { try_ffi!(runtime::cudaSetDeviceFlags(flags.bits())) }
616 }
617
618 /// Returns the flags for the current device.
619 /// If there is a current device for the calling thread, the flags for the device are returned.
620 /// If there is no current device, the flags for the first device are returned, which may be the default flags.
621 /// Compare to the behavior of [`Device::set_flags`].
622 ///
623 /// Typically, the returned flags match the behavior seen if the calling
624 /// thread uses a device after this call, assuming this thread or another
625 /// thread does not change the flags or current device in between.
626 /// If the device is not initialized, another thread can change the flags for the current device before it is initialized.
627 /// Additionally, when using exclusive mode, if this thread has not requested a specific device, it may use a device other than the first device, contrary to the assumption made by this query.
628 ///
629 /// If a context has been created via the driver API and is current to the calling thread, the flags for that context are always returned.
630 ///
631 /// Returned flags may specifically include [`ContextFlags::MAP_HOST`] even though it is not accepted by [`Device::set_flags`] because it is implicit in runtime API flags.
632 /// The reason for this is that the current context may have been created via the driver API in which case the flag is not implicit and may be unset.
633 ///
634 /// # Errors
635 ///
636 /// Returns an error if CUDA cannot query the device flags, a previous
637 /// asynchronous launch reports an error, or CUDA reports runtime
638 /// initialization diagnostics.
639 pub fn flags() -> Result<ContextFlags> {
640 let mut flags_raw: u32 = 0;
641 unsafe {
642 try_ffi!(runtime::cudaGetDeviceFlags(&raw mut flags_raw))?;
643 }
644 Ok(ContextFlags::from_bits_retain(flags_raw))
645 }
646
647 /// Returns the device which has properties that best match the given prop.
648 ///
649 /// # Errors
650 ///
651 /// Returns an error if CUDA cannot choose a matching device, the selected
652 /// device cannot be represented by this wrapper, a previous asynchronous
653 /// launch reports an error, or CUDA reports runtime initialization
654 /// diagnostics.
655 pub fn choose(prop: &DeviceProperties) -> Result<Self> {
656 // This function is tricky because cudaChooseDevice takes a *template* prop,
657 // and we have a fully filled DeviceProp. We need to construct a template
658 // cudaDeviceProp FFI struct based on the criteria we care about.
659 // Often, only major/minor compute capability is used.
660 // For simplicity here, let's assume we want an exact match on major/minor.
661 // A more robust implementation would allow specifying which fields matter.
662
663 let mut ffi_prop: runtime::cudaDeviceProp = unsafe { mem::zeroed() };
664 ffi_prop.major = prop.major;
665 ffi_prop.minor = prop.minor;
666 // Maybe add other critical fields like managedMemory support if needed?
667 ffi_prop.managedMemory = i32::from(prop.managed_memory);
668
669 let mut device: i32 = -1;
670 unsafe {
671 try_ffi!(runtime::cudaChooseDevice(
672 (&raw mut device).cast(),
673 &raw const ffi_prop
674 ))?;
675 }
676 if device == -1 {
677 Err(Error::DeviceNotFound)
678 } else {
679 Ok(Self(device))
680 }
681 }
682
683 /// Returns a device ordinal given a PCI bus ID string.
684 ///
685 /// # Errors
686 ///
687 /// Returns an error if `pci_bus_id` contains an interior NUL byte, CUDA
688 /// cannot resolve the bus ID, a previous asynchronous launch reports an
689 /// error, or CUDA reports runtime initialization diagnostics.
690 pub fn by_pci_bus_id(pci_bus_id: &str) -> Result<Self> {
691 let c_pci_bus_id = CString::new(pci_bus_id)?;
692 let mut device: i32 = -1;
693 unsafe {
694 try_ffi!(runtime::cudaDeviceGetByPCIBusId(
695 (&raw mut device).cast(),
696 c_pci_bus_id.as_ptr(),
697 ))?;
698 }
699 if device == -1 {
700 Err(Error::DeviceNotFound)
701 } else {
702 Ok(Self(device))
703 }
704 }
705
706 /// Sets device as the current device for the calling host thread.
707 /// Valid device id's are 0 to ([`Device::count`] - 1).
708 ///
709 /// Device memory subsequently allocated from this host thread is physically
710 /// resident on `device`.
711 /// Host memory allocated or registered from this host thread has its
712 /// lifetime associated with `device`.
713 /// Streams and events created from this host thread are associated with
714 /// `device`.
715 /// Kernels launched from this host thread execute on `device`.
716 ///
717 /// This may be called from any host thread, for any device, at any time.
718 /// This performs no synchronization with the previous or new device, and usually only takes significant time when it initializes the runtime's context state.
719 /// This binds the primary context of the specified device to the calling
720 /// thread; subsequent memory allocations, stream and event creations, and
721 /// kernel launches are associated with that primary context.
722 /// This also immediately initializes the runtime state on the primary context, and the context is current on the device immediately.
723 /// It returns an error if the device is in [`ComputeMode::ExclusiveProcess`] and is occupied by another process, or if it is in [`ComputeMode::Prohibited`].
724 ///
725 /// It is not required to call [`sys::cudaInitDevice`](singe_cuda_sys::runtime::cudaInitDevice) before using this method.
726 ///
727 /// # Errors
728 ///
729 /// Returns an error if CUDA cannot set the current device, the device is
730 /// unavailable due to compute mode restrictions, a previous asynchronous
731 /// launch reports an error, or CUDA reports runtime initialization
732 /// diagnostics.
733 pub fn set_current(self) -> Result<()> {
734 unsafe {
735 try_ffi!(runtime::cudaSetDevice(self.0))?;
736 }
737 Ok(())
738 }
739
740 /// Returns this device's properties.
741 ///
742 /// # Errors
743 ///
744 /// Returns an error if CUDA cannot query the device properties, the
745 /// returned properties cannot be converted into the safe wrapper type, a
746 /// previous asynchronous launch reports an error, or CUDA reports runtime
747 /// initialization diagnostics.
748 pub fn properties(self) -> Result<DeviceProperties> {
749 unsafe {
750 let mut prop = MaybeUninit::<runtime::cudaDeviceProp>::uninit();
751 try_ffi!(runtime::cudaGetDeviceProperties(prop.as_mut_ptr(), self.0))?;
752 prop.assume_init().try_into()
753 }
754 }
755
756 /// Returns the PCI bus ID string identifying the device.
757 ///
758 /// # Errors
759 ///
760 /// Returns an error if CUDA cannot query the PCI bus ID, a previous
761 /// asynchronous launch reports an error, or CUDA reports runtime
762 /// initialization diagnostics.
763 pub fn pci_bus_id(self) -> Result<String> {
764 const LEN: usize = 16; // Sufficient for typical PCI IDs like 0000:01:00.0
765 let mut pci_bus_id_buf = [0i8; LEN];
766 unsafe {
767 try_ffi!(runtime::cudaDeviceGetPCIBusId(
768 pci_bus_id_buf.as_mut_ptr().cast(),
769 LEN as _,
770 self.0,
771 ))?;
772 Ok(string_from_c_chars(&pci_bus_id_buf))
773 }
774 }
775
776 /// On success, all allocations from the peer device are immediately accessible by the current device.
777 /// They remain accessible until access is explicitly disabled using [`Device::disable_peer_access`] or either device is reset using [`Device::reset`].
778 ///
779 /// Access granted by this call is unidirectional; accessing memory on the current device from the peer device requires a separate symmetric call to [`Device::enable_peer_access`].
780 ///
781 /// There are both device-wide and system-wide limitations per system configuration, as noted in the CUDA Programming Guide under the section "Peer-to-Peer Memory Access".
782 ///
783 /// Returns [`crate::error::Status::InvalidDevice`] if [`Device::can_access_peer`] indicates that the current device cannot directly access memory from the peer device.
784 ///
785 /// Returns [`crate::error::Status::PeerAccessAlreadyEnabled`] if direct access to the peer device from the current device has already been enabled.
786 ///
787 /// Returns [`crate::error::Status::InvalidValue`] if flags is not 0.
788 ///
789 /// # Errors
790 ///
791 /// Returns an error if peer access cannot be enabled, `flags` is not
792 /// [`PeerAccessFlags::DEFAULT`], the peer is invalid or already enabled, a
793 /// previous asynchronous launch reports an error, or CUDA reports runtime
794 /// initialization diagnostics.
795 pub fn enable_peer_access(self, flags: PeerAccessFlags) -> Result<()> {
796 if flags != PeerAccessFlags::DEFAULT {
797 return Err(Error::InvalidValue);
798 }
799 unsafe { try_ffi!(runtime::cudaDeviceEnablePeerAccess(self.0, flags.bits(),)) }
800 }
801
802 /// Returns [`crate::error::Status::PeerAccessNotEnabled`] if direct access to memory on the peer device has not yet been enabled from the current device.
803 ///
804 /// # Errors
805 ///
806 /// Returns an error if peer access was not enabled, CUDA cannot disable
807 /// peer access, a previous asynchronous launch reports an error, or CUDA
808 /// reports runtime initialization diagnostics.
809 pub fn disable_peer_access(self) -> Result<()> {
810 unsafe { try_ffi!(runtime::cudaDeviceDisablePeerAccess(self.0)) }
811 }
812
813 /// Returns true if this device can directly access memory from `other`.
814 /// If direct access from this device to `other` is possible, access may be enabled by calling [`Device::enable_peer_access`].
815 ///
816 /// # Errors
817 ///
818 /// Returns an error if CUDA cannot query peer accessibility, either device
819 /// is invalid, a previous asynchronous launch reports an error, or CUDA
820 /// reports runtime initialization diagnostics.
821 pub fn can_access_peer(self, other: Self) -> Result<bool> {
822 let mut can_access_peer: i32 = 0;
823 unsafe {
824 try_ffi!(runtime::cudaDeviceCanAccessPeer(
825 (&raw mut can_access_peer).cast(),
826 self.0,
827 other.0,
828 ))?;
829 }
830 Ok(can_access_peer != 0)
831 }
832
833 /// Returns the value of the requested attribute of the link between devices.
834 /// Supported attributes are represented by [`PeerToPeerAttribute`]:
835 ///
836 /// * [`PeerToPeerAttribute::PerformanceRank`]: relative performance of the link between the two devices. Lower values are better.
837 /// * [`PeerToPeerAttribute::AccessSupported`]: whether peer access is enabled.
838 /// * [`PeerToPeerAttribute::NativeAtomicSupported`]: whether native atomic operations over the link are supported.
839 /// * [`PeerToPeerAttribute::CudaArrayAccessSupported`]: whether CUDA arrays are accessible over the link.
840 ///
841 /// Returns [`crate::error::Status::InvalidDevice`] if either device is invalid or if they represent the same device.
842 ///
843 /// Returns [`crate::error::Status::InvalidValue`] if `attrib` is not valid.
844 ///
845 /// # Errors
846 ///
847 /// Returns an error if either device is invalid, `attr` is not accepted by
848 /// CUDA, CUDA cannot query the attribute, a previous asynchronous launch
849 /// reports an error, or CUDA reports runtime initialization diagnostics.
850 pub fn p2p_attribute(self, attr: PeerToPeerAttribute, other: Self) -> Result<i32> {
851 let mut value: i32 = 0;
852 unsafe {
853 try_ffi!(runtime::cudaDeviceGetP2PAttribute(
854 (&raw mut value).cast(),
855 attr.into(),
856 self.0,
857 other.0,
858 ))?;
859 }
860 Ok(value)
861 }
862
863 /// On devices where the L1 cache and shared memory use the same hardware resources, this returns the preferred cache configuration for the current device.
864 /// This setting is only a preference.
865 /// The runtime uses the requested configuration if possible, but it may choose a different configuration if required to execute functions.
866 ///
867 /// This returns [`FunctionCache::PreferNone`] on devices where the size of the L1 cache and shared memory are fixed.
868 ///
869 /// The supported cache configurations are:
870 ///
871 /// * [`FunctionCache::PreferNone`]: no preference for shared memory or L1 (default)
872 /// * [`FunctionCache::PreferShared`]: prefer larger shared memory and smaller L1 cache
873 /// * [`FunctionCache::PreferL1`]: prefer larger L1 cache and smaller shared memory
874 /// * [`FunctionCache::PreferEqual`]: prefer equal size L1 cache and shared memory
875 ///
876 /// # Errors
877 ///
878 /// Returns an error if CUDA cannot query the cache configuration, a
879 /// previous asynchronous launch reports an error, or CUDA reports runtime
880 /// initialization diagnostics.
881 pub fn cache_config() -> Result<FunctionCache> {
882 let mut config = runtime::cudaFuncCache::CU_FUNC_CACHE_PREFER_NONE;
883 unsafe {
884 try_ffi!(runtime::cudaDeviceGetCacheConfig(&raw mut config))?;
885 }
886 Ok(config.into())
887 }
888
889 /// On devices where the L1 cache and shared memory use the same hardware resources, this sets through cacheConfig the preferred cache configuration for the current device.
890 /// This setting is only a preference.
891 /// The runtime uses the requested configuration if possible, but it is free to choose a different configuration if required to execute the kernel.
892 /// Any per-kernel cache preference set through the CUDA API takes precedence over this device-wide setting.
893 /// Setting the device-wide cache configuration to [`FunctionCache::PreferNone`] causes subsequent kernel launches to prefer not changing the cache configuration unless required to launch the kernel.
894 ///
895 /// This setting does nothing on devices where the size of the L1 cache and shared memory are fixed.
896 ///
897 /// Launching a kernel with a different preference than the most recent preference setting may insert a device-side synchronization point.
898 ///
899 /// The supported cache configurations are:
900 ///
901 /// * [`FunctionCache::PreferNone`]: no preference for shared memory or L1 (default)
902 /// * [`FunctionCache::PreferShared`]: prefer larger shared memory and smaller L1 cache
903 /// * [`FunctionCache::PreferL1`]: prefer larger L1 cache and smaller shared memory
904 /// * [`FunctionCache::PreferEqual`]: prefer equal size L1 cache and shared memory
905 ///
906 /// # Errors
907 ///
908 /// Returns an error if CUDA cannot set the cache configuration, a previous
909 /// asynchronous launch reports an error, or CUDA reports runtime
910 /// initialization diagnostics.
911 pub fn set_cache_config(config: FunctionCache) -> Result<()> {
912 unsafe {
913 try_ffi!(runtime::cudaDeviceSetCacheConfig(config.into()))?;
914 }
915 Ok(())
916 }
917
918 /// Returns the least and greatest stream priority numerical values.
919 /// Stream priorities follow a convention where lower numbers represent greater priorities.
920 /// The range of meaningful stream priorities is given by \[greatest, least\].
921 /// If a stream is created with a priority value outside this range, the priority is automatically clamped.
922 /// See [`Context::create_stream_with_priority`](crate::context::Context::create_stream_with_priority) for details on creating a priority stream.
923 ///
924 /// Returns 0 for both values if the current context's device does not support stream priorities.
925 ///
926 /// # Errors
927 ///
928 /// Returns an error if CUDA cannot query the stream-priority range, a
929 /// previous asynchronous launch reports an error, or CUDA reports runtime
930 /// initialization diagnostics.
931 pub fn stream_priority_range() -> Result<StreamPriorityRange> {
932 let mut least = 0;
933 let mut greatest = 0;
934 unsafe {
935 try_ffi!(runtime::cudaDeviceGetStreamPriorityRange(
936 &raw mut least,
937 &raw mut greatest,
938 ))?;
939 }
940 Ok(StreamPriorityRange { least, greatest })
941 }
942
943 pub const fn id(self) -> DeviceId {
944 self.0
945 }
946}
947
948#[cfg(all(test, feature = "testing"))]
949mod tests {
950 use super::*;
951
952 #[test]
953 fn it_works() {
954 match Device::count() {
955 Ok(count) => {
956 println!("Found {} CUDA devices.", count);
957 if count > 0 {
958 match Device::new(0).properties() {
959 Ok(props) => println!("Device 0: {}", props.name),
960 Err(e) => eprintln!("error getting properties for device 0: {:?}", e),
961 }
962 }
963 }
964 Err(e) => eprintln!("error getting device count: {:?}", e),
965 }
966 }
967}