scirs2_core/resource/
gpu.rs

1//! # GPU Detection and Capabilities
2//!
3//! This module provides GPU detection and capability assessment for
4//! accelerated computing workloads.
5
6use crate::error::{CoreError, CoreResult};
7
8/// GPU information and capabilities
9#[derive(Debug, Clone)]
10pub struct GpuInfo {
11    /// GPU name/model
12    pub name: String,
13    /// GPU vendor
14    pub vendor: GpuVendor,
15    /// Total GPU memory in bytes
16    pub memory_total: usize,
17    /// Available GPU memory in bytes
18    pub memory_available: usize,
19    /// Memory bandwidth in GB/s
20    pub memorybandwidth_gbps: f64,
21    /// Number of compute units (CUDA cores, stream processors, etc.)
22    pub compute_units: usize,
23    /// Base clock frequency in MHz
24    pub base_clock_mhz: usize,
25    /// Memory clock frequency in MHz
26    pub memory_clock_mhz: usize,
27    /// Compute capability/architecture
28    pub compute_capability: ComputeCapability,
29    /// Supported features
30    pub features: GpuFeatures,
31    /// Performance characteristics
32    pub performance: GpuPerformance,
33}
34
35impl GpuInfo {
36    /// Detect GPU information
37    pub fn detect() -> CoreResult<Self> {
38        #[cfg(feature = "gpu")]
39        {
40            // Try different GPU detection methods
41            if let Ok(gpu) = Self::detect_cuda() {
42                return Ok(gpu);
43            }
44
45            if let Ok(gpu) = Self::detect_opencl() {
46                return Ok(gpu);
47            }
48
49            if let Ok(gpu) = Self::detect_vulkan() {
50                return Ok(gpu);
51            }
52        }
53
54        // Try platform-specific detection
55        #[cfg(target_os = "linux")]
56        if let Ok(gpu) = Self::detect_linux() {
57            return Ok(gpu);
58        }
59
60        #[cfg(target_os = "windows")]
61        if let Ok(gpu) = Self::detect_windows() {
62            return Ok(gpu);
63        }
64
65        #[cfg(target_os = "macos")]
66        if let Ok(gpu) = Self::detect_macos() {
67            return Ok(gpu);
68        }
69
70        Err(CoreError::ComputationError(
71            crate::error::ErrorContext::new("No GPU detected"),
72        ))
73    }
74
75    /// Detect CUDA-capable GPU
76    #[cfg(feature = "gpu")]
77    fn detect_cuda() -> CoreResult<Self> {
78        // In a real implementation, this would use CUDA runtime API
79        // For now, return a placeholder
80        Err(CoreError::ComputationError(
81            crate::error::ErrorContext::new("CUDA detection not implemented"),
82        ))
83    }
84
85    /// Detect OpenCL-capable GPU
86    #[cfg(feature = "gpu")]
87    fn detect_opencl() -> CoreResult<Self> {
88        // In a real implementation, this would use OpenCL API
89        Err(CoreError::ComputationError(
90            crate::error::ErrorContext::new("OpenCL detection not implemented"),
91        ))
92    }
93
94    /// Detect Vulkan-capable GPU
95    #[cfg(feature = "gpu")]
96    fn detect_vulkan() -> CoreResult<Self> {
97        // In a real implementation, this would use Vulkan API
98        Err(CoreError::ComputationError(
99            crate::error::ErrorContext::new("Vulkan detection not implemented"),
100        ))
101    }
102
103    /// Detect GPU on Linux via sysfs
104    #[cfg(target_os = "linux")]
105    fn detect_linux() -> CoreResult<Self> {
106        use std::fs;
107
108        // Try to detect via /sys/class/drm
109        if let Ok(entries) = fs::read_dir("/sys/class/drm") {
110            for entry in entries.flatten() {
111                let path = entry.path();
112                if let Some(name) = path.file_name() {
113                    if name.to_string_lossy().starts_with("card") {
114                        // Try to read device information
115                        let device_path = path.join("device");
116                        if let Ok(vendor) = fs::read_to_string(device_path.join("vendor")) {
117                            if let Ok(device) = fs::read_to_string(device_path.join("device")) {
118                                let vendor_id = vendor.trim();
119                                let device_id = device.trim();
120
121                                return Ok(Self::create_from_pci_ids(vendor_id, device_id));
122                            }
123                        }
124                    }
125                }
126            }
127        }
128
129        Err(CoreError::ComputationError(
130            crate::error::ErrorContext::new("No GPU detected on Linux"),
131        ))
132    }
133
134    /// Detect GPU on Windows
135    #[cfg(target_os = "windows")]
136    fn detect_windows() -> CoreResult<Self> {
137        // In a real implementation, this would use DXGI or WMI
138        Err(CoreError::ComputationError(
139            crate::error::ErrorContext::new("Windows GPU detection not implemented"),
140        ))
141    }
142
143    /// Detect GPU on macOS
144    #[cfg(target_os = "macos")]
145    fn detect_macos() -> CoreResult<Self> {
146        // For Apple Silicon, we know it has integrated GPU
147        #[cfg(target_arch = "aarch64")]
148        {
149            Ok(Self {
150                name: "Apple GPU".to_string(),
151                vendor: GpuVendor::Apple,
152                memory_total: 8 * 1024 * 1024 * 1024, // Unified memory
153                memory_available: 6 * 1024 * 1024 * 1024,
154                memorybandwidth_gbps: 200.0,
155                compute_units: 8,
156                base_clock_mhz: 1000,
157                memory_clock_mhz: 2000,
158                compute_capability: ComputeCapability::Metal,
159                features: GpuFeatures {
160                    unified_memory: true,
161                    double_precision: true,
162                    half_precision: true,
163                    tensor_cores: false,
164                    ray_tracing: false,
165                },
166                performance: GpuPerformance {
167                    fp32_gflops: 2600.0,
168                    fp16_gflops: 5200.0,
169                    memorybandwidth_gbps: 200.0,
170                    efficiency_score: 0.9,
171                },
172            })
173        }
174        #[cfg(not(target_arch = "aarch64"))]
175        {
176            Err(CoreError::ComputationError(
177                crate::error::ErrorContext::new("macOS GPU detection not implemented"),
178            ))
179        }
180    }
181
182    /// Create GPU info from PCI vendor/device IDs
183    #[allow(dead_code)]
184    fn from_pci_ids(vendor_id: u16, _device_id: &str) -> Self {
185        let vendor = match vendor_id {
186            0x10de => GpuVendor::Nvidia,
187            0x1002 => GpuVendor::Amd,
188            0x8086 => GpuVendor::Intel,
189            _ => GpuVendor::Unknown,
190        };
191
192        // This is a simplified mapping - real implementation would have
193        // comprehensive device databases
194        let (name, memory_gb, compute_units) = match vendor_id {
195            0x10de => ("NVIDIA GPU".to_string(), 8, 2048),
196            0x1002 => ("AMD GPU".to_string(), 8, 64),
197            0x8086 => ("Intel GPU".to_string(), 4, 96),
198            _ => ("Unknown GPU".to_string(), 4, 32),
199        };
200
201        Self {
202            name,
203            vendor,
204            memory_total: memory_gb * 1024 * 1024 * 1024,
205            memory_available: (memory_gb * 1024 * 1024 * 1024 * 3) / 4, // 75% available
206            memorybandwidth_gbps: 500.0,
207            compute_units,
208            base_clock_mhz: 1500,
209            memory_clock_mhz: 7000,
210            compute_capability: ComputeCapability::Unknown,
211            features: GpuFeatures::default(),
212            performance: GpuPerformance::default(),
213        }
214    }
215
216    /// Calculate performance score (0.0 to 1.0)
217    pub fn performance_score(&self) -> f64 {
218        let memory_score = (self.memory_total as f64 / (24.0 * 1024.0 * 1024.0 * 1024.0)).min(1.0); // Normalize to 24GB
219        let compute_score = (self.compute_units as f64 / 4096.0).min(1.0); // Normalize to 4096 units
220        let bandwidth_score = (self.memorybandwidth_gbps / 1000.0).min(1.0); // Normalize to 1000 GB/s
221        let efficiency_score = self.performance.efficiency_score;
222
223        (memory_score + compute_score + bandwidth_score + efficiency_score) / 4.0
224    }
225
226    /// Get optimal workgroup/block size
227    pub fn optimal_workgroup_size(&self) -> usize {
228        match self.vendor {
229            GpuVendor::Nvidia => 256, // Typical for NVIDIA
230            GpuVendor::Amd => 64,     // Typical for AMD
231            GpuVendor::Intel => 128,  // Typical for Intel
232            GpuVendor::Apple => 32,   // Typical for Apple
233            GpuVendor::Unknown => 64,
234        }
235    }
236
237    /// Check if suitable for compute workloads
238    pub fn is_compute_capable(&self) -> bool {
239        self.memory_total >= 2 * 1024 * 1024 * 1024 && // At least 2GB
240        self.compute_units >= 32 // At least 32 compute units
241    }
242
243    /// Check if suitable for machine learning
244    pub fn is_ml_capable(&self) -> bool {
245        self.is_compute_capable() && (self.features.tensor_cores || self.features.half_precision)
246    }
247
248    /// Create GpuInfo from PCI IDs
249    pub fn create_from_pci_ids(vendor_id: &str, device_id: &str) -> Self {
250        // Strip 0x prefix if present
251        let vendor_id = vendor_id.strip_prefix("0x").unwrap_or(vendor_id);
252
253        let vendor = match vendor_id {
254            "10de" => GpuVendor::Nvidia,
255            "1002" => GpuVendor::Amd,
256            "8086" => GpuVendor::Intel,
257            _ => GpuVendor::Unknown,
258        };
259
260        // Create appropriate name based on vendor
261        let name = match vendor {
262            GpuVendor::Nvidia => format!("NVIDIA GPU {}", device_id),
263            GpuVendor::Amd => format!("AMD GPU {}", device_id),
264            GpuVendor::Intel => format!("Intel GPU {}", device_id),
265            GpuVendor::Apple => format!("Apple GPU {}", device_id),
266            GpuVendor::Unknown => format!("Unknown GPU {}", device_id),
267        };
268
269        // Default GPU info based on vendor
270        // In a real implementation, this would look up specific device info
271        Self {
272            name,
273            vendor,
274            memory_total: 8 * 1024 * 1024 * 1024, // 8GB default
275            memory_available: 8 * 1024 * 1024 * 1024,
276            memorybandwidth_gbps: 400.0,
277            compute_capability: ComputeCapability::Cuda(7, 0), // Default compute capability
278            compute_units: 128,
279            base_clock_mhz: 1500,
280            memory_clock_mhz: 1750, // Default memory clock
281            features: GpuFeatures::default(),
282            performance: GpuPerformance::default(),
283        }
284    }
285}
286
287/// GPU vendor types
288#[derive(Debug, Clone, Copy, PartialEq, Eq)]
289pub enum GpuVendor {
290    /// NVIDIA
291    Nvidia,
292    /// AMD
293    Amd,
294    /// Intel
295    Intel,
296    /// Apple
297    Apple,
298    /// Unknown vendor
299    Unknown,
300}
301
302/// GPU compute capabilities
303#[derive(Debug, Clone, Copy, PartialEq, Eq)]
304pub enum ComputeCapability {
305    /// CUDA compute capability
306    Cuda(u32, u32), // major, minor
307    /// OpenCL version
308    OpenCL(u32, u32), // major, minor
309    /// Vulkan version
310    Vulkan(u32, u32), // major, minor
311    /// Metal (Apple)
312    Metal,
313    /// DirectCompute (Microsoft)
314    DirectCompute,
315    /// Unknown capability
316    Unknown,
317}
318
319/// GPU feature support
320#[derive(Debug, Clone)]
321pub struct GpuFeatures {
322    /// Unified memory support
323    pub unified_memory: bool,
324    /// Double precision (FP64) support
325    pub double_precision: bool,
326    /// Half precision (FP16) support
327    pub half_precision: bool,
328    /// Tensor cores or equivalent
329    pub tensor_cores: bool,
330    /// Ray tracing support
331    pub ray_tracing: bool,
332}
333
334impl Default for GpuFeatures {
335    fn default() -> Self {
336        Self {
337            unified_memory: false,
338            double_precision: true,
339            half_precision: false,
340            tensor_cores: false,
341            ray_tracing: false,
342        }
343    }
344}
345
346/// GPU performance characteristics
347#[derive(Debug, Clone)]
348pub struct GpuPerformance {
349    /// FP32 performance in GFLOPS
350    pub fp32_gflops: f64,
351    /// FP16 performance in GFLOPS
352    pub fp16_gflops: f64,
353    /// Memory bandwidth in GB/s
354    pub memorybandwidth_gbps: f64,
355    /// Overall efficiency score (0.0 to 1.0)
356    pub efficiency_score: f64,
357}
358
359impl Default for GpuPerformance {
360    fn default() -> Self {
361        Self {
362            fp32_gflops: 1000.0,
363            fp16_gflops: 2000.0,
364            memorybandwidth_gbps: 500.0,
365            efficiency_score: 0.7,
366        }
367    }
368}
369
370/// Multi-GPU information
371#[derive(Debug, Clone)]
372pub struct MultiGpuInfo {
373    /// List of detected GPUs
374    pub gpus: Vec<GpuInfo>,
375    /// Total combined memory
376    pub total_memory: usize,
377    /// Whether GPUs support peer-to-peer communication
378    pub p2p_capable: bool,
379    /// SLI/CrossFire configuration
380    pub multi_gpuconfig: MultiGpuConfig,
381}
382
383impl MultiGpuInfo {
384    /// Detect all available GPUs
385    pub fn detect() -> CoreResult<Self> {
386        let mut gpus = Vec::new();
387
388        // Try to detect multiple GPUs
389        // This is simplified - real implementation would enumerate all devices
390        if let Ok(gpu) = GpuInfo::detect() {
391            gpus.push(gpu);
392        }
393
394        let total_memory = gpus.iter().map(|gpu| gpu.memory_total).sum();
395
396        Ok(Self {
397            gpus,
398            total_memory,
399            p2p_capable: false,
400            multi_gpuconfig: MultiGpuConfig::Single,
401        })
402    }
403
404    /// Get the best GPU for compute workloads
405    pub fn best_compute_gpu(&self) -> Option<&GpuInfo> {
406        self.gpus
407            .iter()
408            .filter(|gpu| gpu.is_compute_capable())
409            .max_by(|a, b| {
410                a.performance_score()
411                    .partial_cmp(&b.performance_score())
412                    .unwrap()
413            })
414    }
415
416    /// Get total compute capability
417    pub fn total_compute_units(&self) -> usize {
418        self.gpus.iter().map(|gpu| gpu.compute_units).sum()
419    }
420}
421
422/// Multi-GPU configuration types
423#[derive(Debug, Clone, Copy, PartialEq, Eq)]
424pub enum MultiGpuConfig {
425    /// Single GPU
426    Single,
427    /// SLI (NVIDIA)
428    Sli,
429    /// CrossFire (AMD)
430    CrossFire,
431    /// NVLink (NVIDIA)
432    NvLink,
433    /// Independent GPUs
434    Independent,
435}
436
437impl Default for GpuInfo {
438    fn default() -> Self {
439        Self {
440            name: "Default GPU".to_string(),
441            vendor: GpuVendor::Unknown,
442            memory_total: 4 * 1024 * 1024 * 1024,     // 4GB
443            memory_available: 3 * 1024 * 1024 * 1024, // 3GB
444            memorybandwidth_gbps: 200.0,
445            compute_units: 512,
446            base_clock_mhz: 1000,
447            memory_clock_mhz: 4000,
448            compute_capability: ComputeCapability::Unknown,
449            features: GpuFeatures::default(),
450            performance: GpuPerformance::default(),
451        }
452    }
453}
454
455#[cfg(test)]
456mod tests {
457    use super::*;
458
459    #[test]
460    fn test_gpu_vendor() {
461        assert_eq!(GpuVendor::Nvidia, GpuVendor::Nvidia);
462        assert_ne!(GpuVendor::Nvidia, GpuVendor::Amd);
463    }
464
465    #[test]
466    fn test_compute_capability() {
467        let cuda_cap = ComputeCapability::Cuda(7, 5);
468        assert_eq!(cuda_cap, ComputeCapability::Cuda(7, 5));
469        assert_ne!(cuda_cap, ComputeCapability::Metal);
470    }
471
472    #[test]
473    fn test_gpu_features() {
474        let features = GpuFeatures {
475            unified_memory: true,
476            tensor_cores: true,
477            ..Default::default()
478        };
479
480        assert!(features.unified_memory);
481        assert!(features.tensor_cores);
482        assert!(!features.ray_tracing);
483    }
484
485    #[test]
486    fn test_gpu_performance() {
487        let perf = GpuPerformance::default();
488        assert!(perf.fp32_gflops > 0.0);
489        assert!(perf.efficiency_score >= 0.0 && perf.efficiency_score <= 1.0);
490    }
491
492    #[test]
493    fn test_pci_id_parsing() {
494        let gpu = GpuInfo::create_from_pci_ids("0x10de", "0x1234");
495        assert_eq!(gpu.vendor, GpuVendor::Nvidia);
496        assert!(gpu.name.contains("NVIDIA"));
497    }
498
499    #[test]
500    fn test_multi_gpu_config() {
501        assert_eq!(MultiGpuConfig::Single, MultiGpuConfig::Single);
502        assert_ne!(MultiGpuConfig::Single, MultiGpuConfig::Sli);
503    }
504
505    #[test]
506    fn test_optimal_workgroup_size() {
507        let nvidia_gpu = GpuInfo {
508            vendor: GpuVendor::Nvidia,
509            ..Default::default()
510        };
511        assert_eq!(nvidia_gpu.optimal_workgroup_size(), 256);
512
513        let amd_gpu = GpuInfo {
514            vendor: GpuVendor::Amd,
515            ..Default::default()
516        };
517        assert_eq!(amd_gpu.optimal_workgroup_size(), 64);
518    }
519}