1use std::ffi::{c_char, c_int};
26
27use crate::error::CudaResult;
28use crate::ffi::{CUdevice, CUdevice_attribute};
29use crate::loader::try_driver;
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
55pub struct Device {
56 raw: CUdevice,
58 ordinal: i32,
60}
61
62impl Device {
63 pub fn get(ordinal: i32) -> CudaResult<Self> {
72 let driver = try_driver()?;
73 let mut raw: CUdevice = 0;
74 crate::error::check(unsafe { (driver.cu_device_get)(&mut raw, ordinal) })?;
75 Ok(Self { raw, ordinal })
76 }
77
78 pub fn count() -> CudaResult<i32> {
84 let driver = try_driver()?;
85 let mut count: std::ffi::c_int = 0;
86 crate::error::check(unsafe { (driver.cu_device_get_count)(&mut count) })?;
87 Ok(count)
88 }
89
90 pub fn name(&self) -> CudaResult<String> {
100 let driver = try_driver()?;
101 let mut buf = [0u8; 256];
102 crate::error::check(unsafe {
103 (driver.cu_device_get_name)(buf.as_mut_ptr() as *mut c_char, 256, self.raw)
104 })?;
105 let len = buf.iter().position(|&b| b == 0).unwrap_or(buf.len());
106 Ok(String::from_utf8_lossy(&buf[..len]).into_owned())
107 }
108
109 pub fn total_memory(&self) -> CudaResult<usize> {
115 let driver = try_driver()?;
116 let mut bytes: usize = 0;
117 crate::error::check(unsafe { (driver.cu_device_total_mem_v2)(&mut bytes, self.raw) })?;
118 Ok(bytes)
119 }
120
121 pub fn attribute(&self, attr: CUdevice_attribute) -> CudaResult<i32> {
133 let driver = try_driver()?;
134 let mut value: std::ffi::c_int = 0;
135 crate::error::check(unsafe {
136 (driver.cu_device_get_attribute)(&mut value, attr, self.raw)
137 })?;
138 Ok(value)
139 }
140
141 pub fn compute_capability(&self) -> CudaResult<(i32, i32)> {
151 let major = self.attribute(CUdevice_attribute::ComputeCapabilityMajor)?;
152 let minor = self.attribute(CUdevice_attribute::ComputeCapabilityMinor)?;
153 Ok((major, minor))
154 }
155
156 pub fn max_threads_per_block(&self) -> CudaResult<i32> {
160 self.attribute(CUdevice_attribute::MaxThreadsPerBlock)
161 }
162
163 pub fn max_block_dim(&self) -> CudaResult<(i32, i32, i32)> {
165 Ok((
166 self.attribute(CUdevice_attribute::MaxBlockDimX)?,
167 self.attribute(CUdevice_attribute::MaxBlockDimY)?,
168 self.attribute(CUdevice_attribute::MaxBlockDimZ)?,
169 ))
170 }
171
172 pub fn max_grid_dim(&self) -> CudaResult<(i32, i32, i32)> {
174 Ok((
175 self.attribute(CUdevice_attribute::MaxGridDimX)?,
176 self.attribute(CUdevice_attribute::MaxGridDimY)?,
177 self.attribute(CUdevice_attribute::MaxGridDimZ)?,
178 ))
179 }
180
181 pub fn max_threads_per_multiprocessor(&self) -> CudaResult<i32> {
183 self.attribute(CUdevice_attribute::MaxThreadsPerMultiprocessor)
184 }
185
186 pub fn max_blocks_per_multiprocessor(&self) -> CudaResult<i32> {
188 self.attribute(CUdevice_attribute::MaxBlocksPerMultiprocessor)
189 }
190
191 pub fn multiprocessor_count(&self) -> CudaResult<i32> {
195 self.attribute(CUdevice_attribute::MultiprocessorCount)
196 }
197
198 pub fn warp_size(&self) -> CudaResult<i32> {
200 self.attribute(CUdevice_attribute::WarpSize)
201 }
202
203 pub fn max_shared_memory_per_block(&self) -> CudaResult<i32> {
207 self.attribute(CUdevice_attribute::MaxSharedMemoryPerBlock)
208 }
209
210 pub fn max_shared_memory_per_multiprocessor(&self) -> CudaResult<i32> {
212 self.attribute(CUdevice_attribute::MaxSharedMemoryPerMultiprocessor)
213 }
214
215 pub fn max_shared_memory_per_block_optin(&self) -> CudaResult<i32> {
220 self.attribute(CUdevice_attribute::MaxSharedMemoryPerBlockOptin)
221 }
222
223 pub fn max_registers_per_block(&self) -> CudaResult<i32> {
225 self.attribute(CUdevice_attribute::MaxRegistersPerBlock)
226 }
227
228 pub fn max_registers_per_multiprocessor(&self) -> CudaResult<i32> {
230 self.attribute(CUdevice_attribute::MaxRegistersPerMultiprocessor)
231 }
232
233 pub fn l2_cache_size(&self) -> CudaResult<i32> {
235 self.attribute(CUdevice_attribute::L2CacheSize)
236 }
237
238 pub fn total_constant_memory(&self) -> CudaResult<i32> {
240 self.attribute(CUdevice_attribute::TotalConstantMemory)
241 }
242
243 pub fn clock_rate_khz(&self) -> CudaResult<i32> {
247 self.attribute(CUdevice_attribute::ClockRate)
248 }
249
250 pub fn memory_clock_rate_khz(&self) -> CudaResult<i32> {
252 self.attribute(CUdevice_attribute::MemoryClockRate)
253 }
254
255 pub fn memory_bus_width(&self) -> CudaResult<i32> {
257 self.attribute(CUdevice_attribute::GlobalMemoryBusWidth)
258 }
259
260 pub fn pci_bus_id(&self) -> CudaResult<i32> {
264 self.attribute(CUdevice_attribute::PciBusId)
265 }
266
267 pub fn pci_device_id(&self) -> CudaResult<i32> {
269 self.attribute(CUdevice_attribute::PciDeviceId)
270 }
271
272 pub fn pci_domain_id(&self) -> CudaResult<i32> {
274 self.attribute(CUdevice_attribute::PciDomainId)
275 }
276
277 pub fn supports_managed_memory(&self) -> CudaResult<bool> {
281 Ok(self.attribute(CUdevice_attribute::ManagedMemory)? != 0)
282 }
283
284 pub fn supports_concurrent_managed_access(&self) -> CudaResult<bool> {
286 Ok(self.attribute(CUdevice_attribute::ConcurrentManagedAccess)? != 0)
287 }
288
289 pub fn supports_concurrent_kernels(&self) -> CudaResult<bool> {
291 Ok(self.attribute(CUdevice_attribute::ConcurrentKernels)? != 0)
292 }
293
294 pub fn supports_cooperative_launch(&self) -> CudaResult<bool> {
296 Ok(self.attribute(CUdevice_attribute::CooperativeLaunch)? != 0)
297 }
298
299 pub fn ecc_enabled(&self) -> CudaResult<bool> {
301 Ok(self.attribute(CUdevice_attribute::EccEnabled)? != 0)
302 }
303
304 pub fn is_integrated(&self) -> CudaResult<bool> {
306 Ok(self.attribute(CUdevice_attribute::Integrated)? != 0)
307 }
308
309 pub fn can_map_host_memory(&self) -> CudaResult<bool> {
311 Ok(self.attribute(CUdevice_attribute::CanMapHostMemory)? != 0)
312 }
313
314 pub fn supports_unified_addressing(&self) -> CudaResult<bool> {
316 Ok(self.attribute(CUdevice_attribute::UnifiedAddressing)? != 0)
317 }
318
319 pub fn supports_stream_priorities(&self) -> CudaResult<bool> {
321 Ok(self.attribute(CUdevice_attribute::StreamPrioritiesSupported)? != 0)
322 }
323
324 pub fn supports_compute_preemption(&self) -> CudaResult<bool> {
326 Ok(self.attribute(CUdevice_attribute::ComputePreemptionSupported)? != 0)
327 }
328
329 pub fn async_engine_count(&self) -> CudaResult<i32> {
331 self.attribute(CUdevice_attribute::AsyncEngineCount)
332 }
333
334 pub fn is_multi_gpu_board(&self) -> CudaResult<bool> {
336 Ok(self.attribute(CUdevice_attribute::IsMultiGpuBoard)? != 0)
337 }
338
339 pub fn has_kernel_exec_timeout(&self) -> CudaResult<bool> {
341 Ok(self.attribute(CUdevice_attribute::KernelExecTimeout)? != 0)
342 }
343
344 pub fn compute_mode(&self) -> CudaResult<i32> {
349 self.attribute(CUdevice_attribute::ComputeMode)
350 }
351
352 pub fn tcc_driver(&self) -> CudaResult<bool> {
357 Ok(self.attribute(CUdevice_attribute::TccDriver)? != 0)
358 }
359
360 pub fn multi_gpu_board_group_id(&self) -> CudaResult<i32> {
364 self.attribute(CUdevice_attribute::MultiGpuBoardGroupId)
365 }
366
367 pub fn max_persisting_l2_cache_size(&self) -> CudaResult<i32> {
371 self.attribute(CUdevice_attribute::MaxPersistingL2CacheSize)
372 }
373
374 pub fn supports_generic_compression(&self) -> CudaResult<bool> {
376 Ok(self.attribute(CUdevice_attribute::GenericCompressionSupported)? != 0)
377 }
378
379 pub fn supports_pageable_memory_access(&self) -> CudaResult<bool> {
381 Ok(self.attribute(CUdevice_attribute::PageableMemoryAccess)? != 0)
382 }
383
384 pub fn pageable_memory_uses_host_page_tables(&self) -> CudaResult<bool> {
386 Ok(self.attribute(CUdevice_attribute::PageableMemoryAccessUsesHostPageTables)? != 0)
387 }
388
389 pub fn supports_direct_managed_mem_from_host(&self) -> CudaResult<bool> {
391 Ok(self.attribute(CUdevice_attribute::DirectManagedMemAccessFromHost)? != 0)
392 }
393
394 pub fn memory_pool_supported_handle_types(&self) -> CudaResult<i32> {
396 self.attribute(CUdevice_attribute::MemoryPoolSupportedHandleTypes)
397 }
398
399 pub fn supports_host_native_atomics(&self) -> CudaResult<bool> {
403 Ok(self.attribute(CUdevice_attribute::HostNativeAtomicSupported)? != 0)
404 }
405
406 pub fn single_to_double_perf_ratio(&self) -> CudaResult<i32> {
410 self.attribute(CUdevice_attribute::SingleToDoublePrecisionPerfRatio)
411 }
412
413 pub fn supports_cooperative_multi_device_launch(&self) -> CudaResult<bool> {
415 Ok(self.attribute(CUdevice_attribute::CooperativeMultiDeviceLaunch)? != 0)
416 }
417
418 pub fn supports_flush_remote_writes(&self) -> CudaResult<bool> {
420 Ok(self.attribute(CUdevice_attribute::CanFlushRemoteWrites)? != 0)
421 }
422
423 pub fn supports_host_register(&self) -> CudaResult<bool> {
425 Ok(self.attribute(CUdevice_attribute::HostRegisterSupported)? != 0)
426 }
427
428 pub fn can_use_host_pointer_for_registered_mem(&self) -> CudaResult<bool> {
430 Ok(self.attribute(CUdevice_attribute::CanUseHostPointerForRegisteredMem)? != 0)
431 }
432
433 pub fn supports_gpu_direct_rdma(&self) -> CudaResult<bool> {
435 Ok(self.attribute(CUdevice_attribute::GpuDirectRdmaSupported)? != 0)
436 }
437
438 pub fn supports_tensor_map_access(&self) -> CudaResult<bool> {
440 Ok(self.attribute(CUdevice_attribute::TensorMapAccessSupported)? != 0)
441 }
442
443 pub fn supports_multicast(&self) -> CudaResult<bool> {
445 Ok(self.attribute(CUdevice_attribute::MulticastSupported)? != 0)
446 }
447
448 pub fn mps_enabled(&self) -> CudaResult<bool> {
450 Ok(self.attribute(CUdevice_attribute::MpsEnabled)? != 0)
451 }
452
453 pub fn max_texture_1d_width(&self) -> CudaResult<i32> {
457 self.attribute(CUdevice_attribute::MaxTexture1DWidth)
458 }
459
460 pub fn max_texture_2d_dims(&self) -> CudaResult<(i32, i32)> {
462 Ok((
463 self.attribute(CUdevice_attribute::MaxTexture2DWidth)?,
464 self.attribute(CUdevice_attribute::MaxTexture2DHeight)?,
465 ))
466 }
467
468 pub fn max_texture_3d_dims(&self) -> CudaResult<(i32, i32, i32)> {
470 Ok((
471 self.attribute(CUdevice_attribute::MaxTexture3DWidth)?,
472 self.attribute(CUdevice_attribute::MaxTexture3DHeight)?,
473 self.attribute(CUdevice_attribute::MaxTexture3DDepth)?,
474 ))
475 }
476
477 pub fn gpu_overlap(&self) -> CudaResult<bool> {
479 Ok(self.attribute(CUdevice_attribute::GpuOverlap)? != 0)
480 }
481
482 pub fn max_pitch(&self) -> CudaResult<i32> {
484 self.attribute(CUdevice_attribute::MaxPitch)
485 }
486
487 pub fn texture_alignment(&self) -> CudaResult<i32> {
489 self.attribute(CUdevice_attribute::TextureAlignment)
490 }
491
492 pub fn surface_alignment(&self) -> CudaResult<i32> {
494 self.attribute(CUdevice_attribute::SurfaceAlignment)
495 }
496
497 pub fn supports_deferred_mapping(&self) -> CudaResult<bool> {
499 Ok(self.attribute(CUdevice_attribute::DeferredMappingCudaArraySupported)? != 0)
500 }
501
502 pub fn supports_memory_pools(&self) -> CudaResult<bool> {
506 Ok(self.attribute(CUdevice_attribute::MemoryPoolsSupported)? != 0)
507 }
508
509 pub fn supports_cluster_launch(&self) -> CudaResult<bool> {
511 Ok(self.attribute(CUdevice_attribute::ClusterLaunch)? != 0)
512 }
513
514 pub fn supports_virtual_memory_management(&self) -> CudaResult<bool> {
516 Ok(self.attribute(CUdevice_attribute::VirtualMemoryManagementSupported)? != 0)
517 }
518
519 pub fn supports_handle_type_posix_fd(&self) -> CudaResult<bool> {
521 Ok(self.attribute(CUdevice_attribute::HandleTypePosixFileDescriptorSupported)? != 0)
522 }
523
524 pub fn supports_handle_type_win32(&self) -> CudaResult<bool> {
526 Ok(self.attribute(CUdevice_attribute::HandleTypeWin32HandleSupported)? != 0)
527 }
528
529 pub fn supports_handle_type_win32_kmt(&self) -> CudaResult<bool> {
531 Ok(self.attribute(CUdevice_attribute::HandleTypeWin32KmtHandleSupported)? != 0)
532 }
533
534 pub fn supports_gpu_direct_rdma_vmm(&self) -> CudaResult<bool> {
536 Ok(self.attribute(CUdevice_attribute::GpuDirectRdmaWithCudaVmmSupported)? != 0)
537 }
538
539 pub fn gpu_direct_rdma_flush_writes_options(&self) -> CudaResult<i32> {
541 self.attribute(CUdevice_attribute::GpuDirectRdmaFlushWritesOptions)
542 }
543
544 pub fn gpu_direct_rdma_writes_ordering(&self) -> CudaResult<i32> {
546 self.attribute(CUdevice_attribute::GpuDirectRdmaWritesOrdering)
547 }
548
549 pub fn max_access_policy_window_size(&self) -> CudaResult<i32> {
551 self.attribute(CUdevice_attribute::MaxAccessPolicyWindowSize)
552 }
553
554 pub fn reserved_shared_memory_per_block(&self) -> CudaResult<i32> {
556 self.attribute(CUdevice_attribute::ReservedSharedMemoryPerBlock)
557 }
558
559 pub fn supports_timeline_semaphore_interop(&self) -> CudaResult<bool> {
561 Ok(self.attribute(CUdevice_attribute::TimelineSemaphoreInteropSupported)? != 0)
562 }
563
564 pub fn supports_mem_sync_domain(&self) -> CudaResult<bool> {
566 Ok(self.attribute(CUdevice_attribute::MemSyncDomainSupported)? != 0)
567 }
568
569 pub fn mem_sync_domain_count(&self) -> CudaResult<i32> {
571 self.attribute(CUdevice_attribute::MemSyncDomainCount)
572 }
573
574 pub fn supports_gpu_direct_rdma_fabric(&self) -> CudaResult<bool> {
576 Ok(self.attribute(CUdevice_attribute::GpuDirectRdmaFabricSupported)? != 0)
577 }
578
579 pub fn supports_unified_function_pointers(&self) -> CudaResult<bool> {
581 Ok(self.attribute(CUdevice_attribute::UnifiedFunctionPointers)? != 0)
582 }
583
584 pub fn supports_ipc_events(&self) -> CudaResult<bool> {
586 Ok(self.attribute(CUdevice_attribute::IpcEventSupported)? != 0)
587 }
588
589 pub fn numa_config(&self) -> CudaResult<i32> {
591 self.attribute(CUdevice_attribute::NumaConfig)
592 }
593
594 pub fn numa_id(&self) -> CudaResult<i32> {
596 self.attribute(CUdevice_attribute::NumaId)
597 }
598
599 pub fn host_numa_id(&self) -> CudaResult<i32> {
601 self.attribute(CUdevice_attribute::HostNumaId)
602 }
603
604 pub fn texture_pitch_alignment(&self) -> CudaResult<i32> {
606 self.attribute(CUdevice_attribute::TexturePitchAlignment)
607 }
608
609 pub fn info(&self) -> CudaResult<DeviceInfo> {
622 let name = self.name()?;
623 let total_memory_bytes = self.total_memory()?;
624 let (cc_major, cc_minor) = self.compute_capability().unwrap_or((0, 0));
625
626 Ok(DeviceInfo {
627 name,
628 ordinal: self.ordinal,
629 compute_capability: (cc_major, cc_minor),
630 total_memory_bytes,
631 multiprocessor_count: self.multiprocessor_count().unwrap_or(0),
632 max_threads_per_block: self.max_threads_per_block().unwrap_or(0),
633 max_threads_per_sm: self.max_threads_per_multiprocessor().unwrap_or(0),
634 warp_size: self.warp_size().unwrap_or(0),
635 clock_rate_mhz: self.clock_rate_khz().unwrap_or(0) as f64 / 1000.0,
636 memory_clock_rate_mhz: self.memory_clock_rate_khz().unwrap_or(0) as f64 / 1000.0,
637 memory_bus_width_bits: self.memory_bus_width().unwrap_or(0),
638 l2_cache_bytes: self.l2_cache_size().unwrap_or(0),
639 max_shared_memory_per_block: self.max_shared_memory_per_block().unwrap_or(0),
640 max_shared_memory_per_sm: self.max_shared_memory_per_multiprocessor().unwrap_or(0),
641 max_registers_per_block: self.max_registers_per_block().unwrap_or(0),
642 ecc_enabled: self.ecc_enabled().unwrap_or(false),
643 tcc_driver: self.tcc_driver().unwrap_or(false),
644 compute_mode: self.compute_mode().unwrap_or(0),
645 supports_cooperative_launch: self.supports_cooperative_launch().unwrap_or(false),
646 supports_managed_memory: self.supports_managed_memory().unwrap_or(false),
647 max_persisting_l2_cache_bytes: self.max_persisting_l2_cache_size().unwrap_or(0),
648 async_engine_count: self.async_engine_count().unwrap_or(0),
649 supports_memory_pools: self.supports_memory_pools().unwrap_or(false),
650 supports_gpu_direct_rdma: self.supports_gpu_direct_rdma().unwrap_or(false),
651 supports_cluster_launch: self.supports_cluster_launch().unwrap_or(false),
652 supports_concurrent_kernels: self.supports_concurrent_kernels().unwrap_or(false),
653 supports_unified_addressing: self.supports_unified_addressing().unwrap_or(false),
654 max_blocks_per_sm: self.max_blocks_per_multiprocessor().unwrap_or(0),
655 single_to_double_perf_ratio: self.single_to_double_perf_ratio().unwrap_or(0),
656 })
657 }
658
659 #[inline]
663 pub fn raw(&self) -> CUdevice {
664 self.raw
665 }
666
667 #[inline]
669 pub fn ordinal(&self) -> i32 {
670 self.ordinal
671 }
672}
673
674impl std::fmt::Display for Device {
675 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
676 write!(f, "Device({})", self.ordinal)
677 }
678}
679
680#[derive(Debug, Clone)]
689pub struct DeviceInfo {
690 pub name: String,
692 pub ordinal: i32,
694 pub compute_capability: (i32, i32),
696 pub total_memory_bytes: usize,
698 pub multiprocessor_count: i32,
700 pub max_threads_per_block: i32,
702 pub max_threads_per_sm: i32,
704 pub warp_size: i32,
706 pub clock_rate_mhz: f64,
708 pub memory_clock_rate_mhz: f64,
710 pub memory_bus_width_bits: i32,
712 pub l2_cache_bytes: i32,
714 pub max_shared_memory_per_block: i32,
716 pub max_shared_memory_per_sm: i32,
718 pub max_registers_per_block: i32,
720 pub ecc_enabled: bool,
722 pub tcc_driver: bool,
724 pub compute_mode: i32,
726 pub supports_cooperative_launch: bool,
728 pub supports_managed_memory: bool,
730 pub max_persisting_l2_cache_bytes: i32,
732 pub async_engine_count: i32,
734 pub supports_memory_pools: bool,
736 pub supports_gpu_direct_rdma: bool,
738 pub supports_cluster_launch: bool,
740 pub supports_concurrent_kernels: bool,
742 pub supports_unified_addressing: bool,
744 pub max_blocks_per_sm: i32,
746 pub single_to_double_perf_ratio: i32,
748}
749
750impl std::fmt::Display for DeviceInfo {
751 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
752 let mem_mb = self.total_memory_bytes / (1024 * 1024);
753 let (major, minor) = self.compute_capability;
754 writeln!(f, "Device {}: {}", self.ordinal, self.name)?;
755 writeln!(f, " Compute capability : {major}.{minor}")?;
756 writeln!(f, " Total memory : {mem_mb} MB")?;
757 writeln!(f, " SMs : {}", self.multiprocessor_count)?;
758 writeln!(f, " Max threads/block : {}", self.max_threads_per_block)?;
759 writeln!(f, " Max threads/SM : {}", self.max_threads_per_sm)?;
760 writeln!(f, " Warp size : {}", self.warp_size)?;
761 writeln!(f, " Core clock : {:.1} MHz", self.clock_rate_mhz)?;
762 writeln!(
763 f,
764 " Memory clock : {:.1} MHz",
765 self.memory_clock_rate_mhz
766 )?;
767 writeln!(
768 f,
769 " Memory bus : {} bits",
770 self.memory_bus_width_bits
771 )?;
772 writeln!(
773 f,
774 " L2 cache : {} KB",
775 self.l2_cache_bytes / 1024
776 )?;
777 writeln!(
778 f,
779 " Shared mem/block : {} KB",
780 self.max_shared_memory_per_block / 1024
781 )?;
782 writeln!(
783 f,
784 " Shared mem/SM : {} KB",
785 self.max_shared_memory_per_sm / 1024
786 )?;
787 writeln!(f, " Registers/block : {}", self.max_registers_per_block)?;
788 writeln!(f, " ECC : {}", self.ecc_enabled)?;
789 writeln!(f, " TCC driver : {}", self.tcc_driver)?;
790 writeln!(f, " Compute mode : {}", self.compute_mode)?;
791 writeln!(
792 f,
793 " Cooperative launch : {}",
794 self.supports_cooperative_launch
795 )?;
796 writeln!(f, " Managed memory : {}", self.supports_managed_memory)?;
797 writeln!(
798 f,
799 " Persist L2 cache : {} KB",
800 self.max_persisting_l2_cache_bytes / 1024
801 )?;
802 writeln!(f, " Async engines : {}", self.async_engine_count)?;
803 writeln!(f, " Memory pools : {}", self.supports_memory_pools)?;
804 writeln!(
805 f,
806 " GPU Direct RDMA : {}",
807 self.supports_gpu_direct_rdma
808 )?;
809 writeln!(f, " Cluster launch : {}", self.supports_cluster_launch)?;
810 writeln!(
811 f,
812 " Concurrent kernels : {}",
813 self.supports_concurrent_kernels
814 )?;
815 writeln!(
816 f,
817 " Unified addressing : {}",
818 self.supports_unified_addressing
819 )?;
820 writeln!(f, " Max blocks/SM : {}", self.max_blocks_per_sm)?;
821 write!(
822 f,
823 " FP32/FP64 ratio : {}",
824 self.single_to_double_perf_ratio
825 )
826 }
827}
828
829pub fn list_devices() -> CudaResult<Vec<Device>> {
854 let count = Device::count()?;
855 let mut devices = Vec::with_capacity(count as usize);
856 for i in 0..count {
857 devices.push(Device::get(i)?);
858 }
859 Ok(devices)
860}
861
862pub fn driver_version() -> CudaResult<i32> {
871 let driver = try_driver()?;
872 let mut version: c_int = 0;
873 crate::error::check(unsafe { (driver.cu_driver_get_version)(&mut version) })?;
874 Ok(version)
875}
876
877pub fn can_access_peer(device: &Device, peer: &Device) -> CudaResult<bool> {
893 let driver = try_driver()?;
894 let mut can_access: c_int = 0;
895 crate::error::check(unsafe {
896 (driver.cu_device_can_access_peer)(&mut can_access, device.raw(), peer.raw())
897 })?;
898 Ok(can_access != 0)
899}
900
901pub fn best_device() -> CudaResult<Option<Device>> {
921 let devices = list_devices()?;
922 if devices.is_empty() {
923 return Ok(None);
924 }
925 let mut best = devices[0];
926 let mut best_mem = best.total_memory()?;
927 for dev in devices.iter().skip(1) {
928 let mem = dev.total_memory()?;
929 if mem > best_mem {
930 best = *dev;
931 best_mem = mem;
932 }
933 }
934 Ok(Some(best))
935}