1use crate::error::{CoreError, CoreResult};
7
8#[derive(Debug, Clone)]
10pub struct GpuInfo {
11 pub name: String,
13 pub vendor: GpuVendor,
15 pub memory_total: usize,
17 pub memory_available: usize,
19 pub memorybandwidth_gbps: f64,
21 pub compute_units: usize,
23 pub base_clock_mhz: usize,
25 pub memory_clock_mhz: usize,
27 pub compute_capability: ComputeCapability,
29 pub features: GpuFeatures,
31 pub performance: GpuPerformance,
33}
34
35impl GpuInfo {
36 pub fn detect() -> CoreResult<Self> {
38 #[cfg(feature = "gpu")]
39 {
40 if let Ok(gpu) = Self::detect_cuda() {
42 return Ok(gpu);
43 }
44
45 if let Ok(gpu) = Self::detect_opencl() {
46 return Ok(gpu);
47 }
48
49 if let Ok(gpu) = Self::detect_vulkan() {
50 return Ok(gpu);
51 }
52 }
53
54 #[cfg(target_os = "linux")]
56 if let Ok(gpu) = Self::detect_linux() {
57 return Ok(gpu);
58 }
59
60 #[cfg(target_os = "windows")]
61 if let Ok(gpu) = Self::detect_windows() {
62 return Ok(gpu);
63 }
64
65 #[cfg(target_os = "macos")]
66 if let Ok(gpu) = Self::detect_macos() {
67 return Ok(gpu);
68 }
69
70 Err(CoreError::ComputationError(
71 crate::error::ErrorContext::new("No GPU detected"),
72 ))
73 }
74
75 #[cfg(feature = "gpu")]
77 fn detect_cuda() -> CoreResult<Self> {
78 Err(CoreError::ComputationError(
81 crate::error::ErrorContext::new("CUDA detection not implemented"),
82 ))
83 }
84
85 #[cfg(feature = "gpu")]
87 fn detect_opencl() -> CoreResult<Self> {
88 Err(CoreError::ComputationError(
90 crate::error::ErrorContext::new("OpenCL detection not implemented"),
91 ))
92 }
93
94 #[cfg(feature = "gpu")]
96 fn detect_vulkan() -> CoreResult<Self> {
97 Err(CoreError::ComputationError(
99 crate::error::ErrorContext::new("Vulkan detection not implemented"),
100 ))
101 }
102
103 #[cfg(target_os = "linux")]
105 fn detect_linux() -> CoreResult<Self> {
106 use std::fs;
107
108 if let Ok(entries) = fs::read_dir("/sys/class/drm") {
110 for entry in entries.flatten() {
111 let path = entry.path();
112 if let Some(name) = path.file_name() {
113 if name.to_string_lossy().starts_with("card") {
114 let device_path = path.join("device");
116 if let Ok(vendor) = fs::read_to_string(device_path.join("vendor")) {
117 if let Ok(device) = fs::read_to_string(device_path.join("device")) {
118 let vendor_id = vendor.trim();
119 let device_id = device.trim();
120
121 return Ok(Self::create_from_pci_ids(vendor_id, device_id));
122 }
123 }
124 }
125 }
126 }
127 }
128
129 Err(CoreError::ComputationError(
130 crate::error::ErrorContext::new("No GPU detected on Linux"),
131 ))
132 }
133
134 #[cfg(target_os = "windows")]
136 fn detect_windows() -> CoreResult<Self> {
137 Err(CoreError::ComputationError(
139 crate::error::ErrorContext::new("Windows GPU detection not implemented"),
140 ))
141 }
142
143 #[cfg(target_os = "macos")]
145 fn detect_macos() -> CoreResult<Self> {
146 #[cfg(target_arch = "aarch64")]
148 {
149 Ok(Self {
150 name: "Apple GPU".to_string(),
151 vendor: GpuVendor::Apple,
152 memory_total: 8 * 1024 * 1024 * 1024, memory_available: 6 * 1024 * 1024 * 1024,
154 memorybandwidth_gbps: 200.0,
155 compute_units: 8,
156 base_clock_mhz: 1000,
157 memory_clock_mhz: 2000,
158 compute_capability: ComputeCapability::Metal,
159 features: GpuFeatures {
160 unified_memory: true,
161 double_precision: true,
162 half_precision: true,
163 tensor_cores: false,
164 ray_tracing: false,
165 },
166 performance: GpuPerformance {
167 fp32_gflops: 2600.0,
168 fp16_gflops: 5200.0,
169 memorybandwidth_gbps: 200.0,
170 efficiency_score: 0.9,
171 },
172 })
173 }
174 #[cfg(not(target_arch = "aarch64"))]
175 {
176 Err(CoreError::ComputationError(
177 crate::error::ErrorContext::new("macOS GPU detection not implemented"),
178 ))
179 }
180 }
181
182 #[allow(dead_code)]
184 fn from_pci_ids(vendor_id: u16, _device_id: &str) -> Self {
185 let vendor = match vendor_id {
186 0x10de => GpuVendor::Nvidia,
187 0x1002 => GpuVendor::Amd,
188 0x8086 => GpuVendor::Intel,
189 _ => GpuVendor::Unknown,
190 };
191
192 let (name, memory_gb, compute_units) = match vendor_id {
195 0x10de => ("NVIDIA GPU".to_string(), 8, 2048),
196 0x1002 => ("AMD GPU".to_string(), 8, 64),
197 0x8086 => ("Intel GPU".to_string(), 4, 96),
198 _ => ("Unknown GPU".to_string(), 4, 32),
199 };
200
201 Self {
202 name,
203 vendor,
204 memory_total: memory_gb * 1024 * 1024 * 1024,
205 memory_available: (memory_gb * 1024 * 1024 * 1024 * 3) / 4, memorybandwidth_gbps: 500.0,
207 compute_units,
208 base_clock_mhz: 1500,
209 memory_clock_mhz: 7000,
210 compute_capability: ComputeCapability::Unknown,
211 features: GpuFeatures::default(),
212 performance: GpuPerformance::default(),
213 }
214 }
215
216 pub fn performance_score(&self) -> f64 {
218 let memory_score = (self.memory_total as f64 / (24.0 * 1024.0 * 1024.0 * 1024.0)).min(1.0); let compute_score = (self.compute_units as f64 / 4096.0).min(1.0); let bandwidth_score = (self.memorybandwidth_gbps / 1000.0).min(1.0); let efficiency_score = self.performance.efficiency_score;
222
223 (memory_score + compute_score + bandwidth_score + efficiency_score) / 4.0
224 }
225
226 pub fn optimal_workgroup_size(&self) -> usize {
228 match self.vendor {
229 GpuVendor::Nvidia => 256, GpuVendor::Amd => 64, GpuVendor::Intel => 128, GpuVendor::Apple => 32, GpuVendor::Unknown => 64,
234 }
235 }
236
237 pub fn is_compute_capable(&self) -> bool {
239 self.memory_total >= 2 * 1024 * 1024 * 1024 && self.compute_units >= 32 }
242
243 pub fn is_ml_capable(&self) -> bool {
245 self.is_compute_capable() && (self.features.tensor_cores || self.features.half_precision)
246 }
247
248 pub fn create_from_pci_ids(vendor_id: &str, device_id: &str) -> Self {
250 let vendor_id = vendor_id.strip_prefix("0x").unwrap_or(vendor_id);
252
253 let vendor = match vendor_id {
254 "10de" => GpuVendor::Nvidia,
255 "1002" => GpuVendor::Amd,
256 "8086" => GpuVendor::Intel,
257 _ => GpuVendor::Unknown,
258 };
259
260 let name = match vendor {
262 GpuVendor::Nvidia => format!("NVIDIA GPU {}", device_id),
263 GpuVendor::Amd => format!("AMD GPU {}", device_id),
264 GpuVendor::Intel => format!("Intel GPU {}", device_id),
265 GpuVendor::Apple => format!("Apple GPU {}", device_id),
266 GpuVendor::Unknown => format!("Unknown GPU {}", device_id),
267 };
268
269 Self {
272 name,
273 vendor,
274 memory_total: 8 * 1024 * 1024 * 1024, memory_available: 8 * 1024 * 1024 * 1024,
276 memorybandwidth_gbps: 400.0,
277 compute_capability: ComputeCapability::Cuda(7, 0), compute_units: 128,
279 base_clock_mhz: 1500,
280 memory_clock_mhz: 1750, features: GpuFeatures::default(),
282 performance: GpuPerformance::default(),
283 }
284 }
285}
286
287#[derive(Debug, Clone, Copy, PartialEq, Eq)]
289pub enum GpuVendor {
290 Nvidia,
292 Amd,
294 Intel,
296 Apple,
298 Unknown,
300}
301
302#[derive(Debug, Clone, Copy, PartialEq, Eq)]
304pub enum ComputeCapability {
305 Cuda(u32, u32), OpenCL(u32, u32), Vulkan(u32, u32), Metal,
313 DirectCompute,
315 Unknown,
317}
318
319#[derive(Debug, Clone)]
321pub struct GpuFeatures {
322 pub unified_memory: bool,
324 pub double_precision: bool,
326 pub half_precision: bool,
328 pub tensor_cores: bool,
330 pub ray_tracing: bool,
332}
333
334impl Default for GpuFeatures {
335 fn default() -> Self {
336 Self {
337 unified_memory: false,
338 double_precision: true,
339 half_precision: false,
340 tensor_cores: false,
341 ray_tracing: false,
342 }
343 }
344}
345
346#[derive(Debug, Clone)]
348pub struct GpuPerformance {
349 pub fp32_gflops: f64,
351 pub fp16_gflops: f64,
353 pub memorybandwidth_gbps: f64,
355 pub efficiency_score: f64,
357}
358
359impl Default for GpuPerformance {
360 fn default() -> Self {
361 Self {
362 fp32_gflops: 1000.0,
363 fp16_gflops: 2000.0,
364 memorybandwidth_gbps: 500.0,
365 efficiency_score: 0.7,
366 }
367 }
368}
369
370#[derive(Debug, Clone)]
372pub struct MultiGpuInfo {
373 pub gpus: Vec<GpuInfo>,
375 pub total_memory: usize,
377 pub p2p_capable: bool,
379 pub multi_gpuconfig: MultiGpuConfig,
381}
382
383impl MultiGpuInfo {
384 pub fn detect() -> CoreResult<Self> {
386 let mut gpus = Vec::new();
387
388 if let Ok(gpu) = GpuInfo::detect() {
391 gpus.push(gpu);
392 }
393
394 let total_memory = gpus.iter().map(|gpu| gpu.memory_total).sum();
395
396 Ok(Self {
397 gpus,
398 total_memory,
399 p2p_capable: false,
400 multi_gpuconfig: MultiGpuConfig::Single,
401 })
402 }
403
404 pub fn best_compute_gpu(&self) -> Option<&GpuInfo> {
406 self.gpus
407 .iter()
408 .filter(|gpu| gpu.is_compute_capable())
409 .max_by(|a, b| {
410 a.performance_score()
411 .partial_cmp(&b.performance_score())
412 .unwrap()
413 })
414 }
415
416 pub fn total_compute_units(&self) -> usize {
418 self.gpus.iter().map(|gpu| gpu.compute_units).sum()
419 }
420}
421
422#[derive(Debug, Clone, Copy, PartialEq, Eq)]
424pub enum MultiGpuConfig {
425 Single,
427 Sli,
429 CrossFire,
431 NvLink,
433 Independent,
435}
436
437impl Default for GpuInfo {
438 fn default() -> Self {
439 Self {
440 name: "Default GPU".to_string(),
441 vendor: GpuVendor::Unknown,
442 memory_total: 4 * 1024 * 1024 * 1024, memory_available: 3 * 1024 * 1024 * 1024, memorybandwidth_gbps: 200.0,
445 compute_units: 512,
446 base_clock_mhz: 1000,
447 memory_clock_mhz: 4000,
448 compute_capability: ComputeCapability::Unknown,
449 features: GpuFeatures::default(),
450 performance: GpuPerformance::default(),
451 }
452 }
453}
454
455#[cfg(test)]
456mod tests {
457 use super::*;
458
459 #[test]
460 fn test_gpu_vendor() {
461 assert_eq!(GpuVendor::Nvidia, GpuVendor::Nvidia);
462 assert_ne!(GpuVendor::Nvidia, GpuVendor::Amd);
463 }
464
465 #[test]
466 fn test_compute_capability() {
467 let cuda_cap = ComputeCapability::Cuda(7, 5);
468 assert_eq!(cuda_cap, ComputeCapability::Cuda(7, 5));
469 assert_ne!(cuda_cap, ComputeCapability::Metal);
470 }
471
472 #[test]
473 fn test_gpu_features() {
474 let features = GpuFeatures {
475 unified_memory: true,
476 tensor_cores: true,
477 ..Default::default()
478 };
479
480 assert!(features.unified_memory);
481 assert!(features.tensor_cores);
482 assert!(!features.ray_tracing);
483 }
484
485 #[test]
486 fn test_gpu_performance() {
487 let perf = GpuPerformance::default();
488 assert!(perf.fp32_gflops > 0.0);
489 assert!(perf.efficiency_score >= 0.0 && perf.efficiency_score <= 1.0);
490 }
491
492 #[test]
493 fn test_pci_id_parsing() {
494 let gpu = GpuInfo::create_from_pci_ids("0x10de", "0x1234");
495 assert_eq!(gpu.vendor, GpuVendor::Nvidia);
496 assert!(gpu.name.contains("NVIDIA"));
497 }
498
499 #[test]
500 fn test_multi_gpu_config() {
501 assert_eq!(MultiGpuConfig::Single, MultiGpuConfig::Single);
502 assert_ne!(MultiGpuConfig::Single, MultiGpuConfig::Sli);
503 }
504
505 #[test]
506 fn test_optimal_workgroup_size() {
507 let nvidia_gpu = GpuInfo {
508 vendor: GpuVendor::Nvidia,
509 ..Default::default()
510 };
511 assert_eq!(nvidia_gpu.optimal_workgroup_size(), 256);
512
513 let amd_gpu = GpuInfo {
514 vendor: GpuVendor::Amd,
515 ..Default::default()
516 };
517 assert_eq!(amd_gpu.optimal_workgroup_size(), 64);
518 }
519}