1#[cfg(all(feature = "cuda", cuda_runtime_available))]
4use anyhow::anyhow;
5use anyhow::Result;
6
7#[derive(Debug, Clone)]
9pub struct GpuDevice {
10 pub device_id: i32,
11 pub name: String,
12 pub compute_capability: (i32, i32),
13 pub total_memory: usize,
14 pub free_memory: usize,
15 pub max_threads_per_block: i32,
16 pub max_blocks_per_grid: i32,
17 pub warp_size: i32,
18 pub memory_bandwidth: f32,
19 pub peak_flops: f64,
20}
21
22impl GpuDevice {
23 fn simulated(device_id: i32) -> Self {
25 Self {
26 device_id,
27 name: format!("Simulated GPU {device_id}"),
28 compute_capability: (7, 5),
29 total_memory: 8 * 1024 * 1024 * 1024,
30 free_memory: 6 * 1024 * 1024 * 1024,
31 max_threads_per_block: 1024,
32 max_blocks_per_grid: 65535,
33 warp_size: 32,
34 memory_bandwidth: 900.0,
35 peak_flops: 14000.0,
36 }
37 }
38
39 pub fn get_device_info(device_id: i32) -> Result<Self> {
41 #[cfg(all(feature = "cuda", cuda_runtime_available))]
42 {
43 use cuda_runtime_sys::*;
44 unsafe {
45 let result = cudaSetDevice(device_id);
46 if result != cudaError_t::cudaSuccess {
47 tracing::warn!(
49 "CUDA device {} not available - using simulated GPU device",
50 device_id
51 );
52 return Ok(Self::simulated(device_id));
53 }
54
55 let mut props: cudaDeviceProp = std::mem::zeroed();
56 let result = cudaGetDeviceProperties(&mut props, device_id);
57 if result != cudaError_t::cudaSuccess {
58 tracing::warn!(
59 "Failed to get properties for CUDA device {} - using simulated GPU device",
60 device_id
61 );
62 return Ok(Self::simulated(device_id));
63 }
64
65 let mut free_mem: usize = 0;
66 let mut total_mem: usize = 0;
67 let result = cudaMemGetInfo(&mut free_mem, &mut total_mem);
68 if result != cudaError_t::cudaSuccess {
69 tracing::warn!(
70 "Failed to get memory info for CUDA device {} - using simulated GPU device",
71 device_id
72 );
73 return Ok(Self::simulated(device_id));
74 }
75
76 Ok(Self {
77 device_id,
78 name: std::ffi::CStr::from_ptr(props.name.as_ptr())
79 .to_string_lossy()
80 .to_string(),
81 compute_capability: (props.major, props.minor),
82 total_memory: total_mem,
83 free_memory: free_mem,
84 max_threads_per_block: props.maxThreadsPerBlock,
85 max_blocks_per_grid: props.maxGridSize[0],
86 warp_size: props.warpSize,
87 memory_bandwidth: props.memoryBusWidth as f32
88 * props.memoryClockRate as f32
89 * 2.0
90 / 8.0
91 / 1e6,
92 peak_flops: props.clockRate as f64
93 * props.multiProcessorCount as f64
94 * props.maxThreadsPerMultiProcessor as f64
95 / 1e6,
96 })
97 }
98 }
99
100 #[cfg(not(all(feature = "cuda", cuda_runtime_available)))]
101 {
102 tracing::warn!("CUDA not available - using simulated GPU device");
104 Ok(Self::simulated(device_id))
105 }
106 }
107
108 pub fn get_all_devices() -> Result<Vec<Self>> {
110 #[cfg(all(feature = "cuda", cuda_runtime_available))]
111 {
112 use cuda_runtime_sys::*;
113 unsafe {
114 let mut device_count: i32 = 0;
115 let result = cudaGetDeviceCount(&mut device_count);
116 if result != cudaError_t::cudaSuccess {
117 return Err(anyhow!("Failed to get device count"));
118 }
119
120 let mut devices = Vec::new();
121 for i in 0..device_count {
122 if let Ok(device) = Self::get_device_info(i) {
123 devices.push(device);
124 }
125 }
126 Ok(devices)
127 }
128 }
129
130 #[cfg(not(all(feature = "cuda", cuda_runtime_available)))]
131 {
132 tracing::warn!("CUDA not available - using simulated GPU devices");
134 Ok(vec![Self::get_device_info(0)?, Self::get_device_info(1)?])
135 }
136 }
137
138 pub fn supports_compute_capability(&self, major: i32, minor: i32) -> bool {
140 self.compute_capability.0 > major
141 || (self.compute_capability.0 == major && self.compute_capability.1 >= minor)
142 }
143
144 pub fn peak_memory_bandwidth(&self) -> f32 {
146 self.memory_bandwidth
147 }
148
149 pub fn peak_compute_performance(&self) -> f64 {
151 self.peak_flops
152 }
153
154 pub fn calculate_optimal_block_config(&self, problem_size: usize) -> (i32, i32) {
156 let optimal_threads = (self.max_threads_per_block as f32 * 0.75) as i32; let blocks_needed = ((problem_size as f32) / (optimal_threads as f32)).ceil() as i32;
158 let blocks = blocks_needed.min(self.max_blocks_per_grid);
159 (blocks, optimal_threads)
160 }
161}