Skip to main content

oxirs_core/
platform.rs

1//! Platform capabilities detection for OxiRS
2//!
3//! This module provides unified platform detection across the OxiRS ecosystem.
4//! All platform-specific code must use this module for capability detection.
5
6use std::sync::OnceLock;
7
8/// Platform capabilities detection result
9#[derive(Debug, Clone)]
10pub struct PlatformCapabilities {
11    /// SIMD support available
12    pub simd_available: bool,
13    /// GPU support available
14    pub gpu_available: bool,
15    /// CUDA support available
16    pub cuda_available: bool,
17    /// OpenCL support available
18    pub opencl_available: bool,
19    /// Metal support available (macOS)
20    pub metal_available: bool,
21    /// AVX2 instructions available
22    pub avx2_available: bool,
23    /// AVX512 instructions available
24    pub avx512_available: bool,
25    /// ARM NEON instructions available
26    pub neon_available: bool,
27    /// Number of CPU cores
28    pub cpu_cores: usize,
29    /// CPU architecture
30    pub arch: String,
31    /// Operating system
32    pub os: String,
33}
34
35// Cache the detected capabilities
36static CAPABILITIES: OnceLock<PlatformCapabilities> = OnceLock::new();
37
38impl PlatformCapabilities {
39    /// Detect platform capabilities
40    pub fn detect() -> &'static PlatformCapabilities {
41        CAPABILITIES.get_or_init(|| {
42            let mut caps = PlatformCapabilities {
43                simd_available: false,
44                gpu_available: false,
45                cuda_available: false,
46                opencl_available: false,
47                metal_available: false,
48                avx2_available: false,
49                avx512_available: false,
50                neon_available: false,
51                cpu_cores: num_cpus::get(),
52                arch: std::env::consts::ARCH.to_string(),
53                os: std::env::consts::OS.to_string(),
54            };
55
56            // Detect SIMD capabilities
57            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
58            {
59                caps.simd_available = is_x86_feature_detected!("sse2");
60                caps.avx2_available = is_x86_feature_detected!("avx2");
61                caps.avx512_available = is_x86_feature_detected!("avx512f");
62            }
63
64            #[cfg(target_arch = "aarch64")]
65            {
66                caps.simd_available = true; // NEON is mandatory on aarch64
67                caps.neon_available = true;
68            }
69
70            // Detect GPU capabilities
71            caps.gpu_available = Self::detect_gpu();
72
73            // Detect CUDA
74            #[cfg(feature = "cuda")]
75            {
76                caps.cuda_available = Self::detect_cuda();
77            }
78
79            // Detect OpenCL
80            #[cfg(feature = "opencl")]
81            {
82                caps.opencl_available = Self::detect_opencl();
83            }
84
85            // Detect Metal (macOS only)
86            #[cfg(all(target_os = "macos", feature = "metal"))]
87            {
88                caps.metal_available = Self::detect_metal();
89            }
90
91            caps
92        })
93    }
94
95    /// Get a human-readable summary of capabilities
96    pub fn summary(&self) -> String {
97        let mut features = Vec::new();
98
99        if self.simd_available {
100            features.push("SIMD");
101
102            if self.avx2_available {
103                features.push("AVX2");
104            }
105            if self.avx512_available {
106                features.push("AVX512");
107            }
108            if self.neon_available {
109                features.push("NEON");
110            }
111        }
112
113        if self.gpu_available {
114            features.push("GPU");
115
116            if self.cuda_available {
117                features.push("CUDA");
118            }
119            if self.opencl_available {
120                features.push("OpenCL");
121            }
122            if self.metal_available {
123                features.push("Metal");
124            }
125        }
126
127        format!(
128            "{} ({} cores, {})",
129            features.join(", "),
130            self.cpu_cores,
131            self.arch
132        )
133    }
134
135    /// Check if any GPU is available
136    fn detect_gpu() -> bool {
137        // Simple heuristic - check for common GPU environment variables
138        std::env::var("CUDA_VISIBLE_DEVICES").is_ok()
139            || std::env::var("GPU_DEVICE_ORDINAL").is_ok()
140            || std::env::var("ROCR_VISIBLE_DEVICES").is_ok()
141    }
142
143    /// Check if CUDA is available
144    #[cfg(feature = "cuda")]
145    fn detect_cuda() -> bool {
146        // Check for CUDA runtime
147        std::env::var("CUDA_PATH").is_ok()
148            || std::path::Path::new("/usr/local/cuda").exists()
149            || std::path::Path::new("C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA")
150                .exists()
151    }
152
153    #[cfg(not(feature = "cuda"))]
154    #[allow(dead_code)]
155    fn detect_cuda() -> bool {
156        false
157    }
158
159    /// Check if OpenCL is available
160    #[cfg(feature = "opencl")]
161    #[allow(dead_code)]
162    fn detect_opencl() -> bool {
163        // Check for OpenCL libraries
164        #[cfg(target_os = "linux")]
165        {
166            std::path::Path::new("/usr/lib/libOpenCL.so").exists()
167                || std::path::Path::new("/usr/lib64/libOpenCL.so").exists()
168        }
169        #[cfg(target_os = "windows")]
170        {
171            std::path::Path::new("C:\\Windows\\System32\\OpenCL.dll").exists()
172        }
173        #[cfg(target_os = "macos")]
174        {
175            true // OpenCL is included in macOS
176        }
177        #[cfg(not(any(target_os = "linux", target_os = "windows", target_os = "macos")))]
178        {
179            false
180        }
181    }
182
183    #[cfg(not(feature = "opencl"))]
184    #[allow(dead_code)]
185    fn detect_opencl() -> bool {
186        false
187    }
188
189    /// Check if Metal is available
190    #[cfg(all(target_os = "macos", feature = "metal"))]
191    #[allow(dead_code)]
192    fn detect_metal() -> bool {
193        // Metal is available on all modern macOS systems
194        true
195    }
196
197    #[cfg(not(all(target_os = "macos", feature = "metal")))]
198    #[allow(dead_code)]
199    fn detect_metal() -> bool {
200        false
201    }
202}
203
204/// Auto-optimizer for selecting best implementation based on problem size
205pub struct AutoOptimizer {
206    capabilities: &'static PlatformCapabilities,
207}
208
209impl AutoOptimizer {
210    /// Create a new auto-optimizer
211    pub fn new() -> Self {
212        Self {
213            capabilities: PlatformCapabilities::detect(),
214        }
215    }
216
217    /// Determine if GPU should be used based on problem size
218    pub fn should_use_gpu(&self, problem_size: usize) -> bool {
219        // Use GPU for large problems when available
220        self.capabilities.gpu_available && problem_size > 100_000
221    }
222
223    /// Determine if SIMD should be used based on problem size
224    pub fn should_use_simd(&self, problem_size: usize) -> bool {
225        // Use SIMD for medium to large problems
226        self.capabilities.simd_available && problem_size > 1000
227    }
228
229    /// Determine if parallel processing should be used
230    pub fn should_use_parallel(&self, problem_size: usize) -> bool {
231        // Use parallel processing for large problems on multi-core systems
232        self.capabilities.cpu_cores > 1 && problem_size > 10_000
233    }
234
235    /// Get recommended chunk size for parallel processing
236    pub fn recommended_chunk_size(&self, total_size: usize) -> usize {
237        // Balance between parallelism overhead and work distribution
238        let ideal_chunks = self.capabilities.cpu_cores * 4;
239        let chunk_size = total_size / ideal_chunks;
240
241        // Ensure reasonable chunk size
242        chunk_size.clamp(1000, 100_000)
243    }
244}
245
246impl Default for AutoOptimizer {
247    fn default() -> Self {
248        Self::new()
249    }
250}
251
252#[cfg(test)]
253mod tests {
254    use super::*;
255
256    #[test]
257    fn test_platform_detection() {
258        let caps = PlatformCapabilities::detect();
259
260        // Should have at least 1 CPU core
261        assert!(caps.cpu_cores >= 1);
262
263        // Should have valid architecture
264        assert!(!caps.arch.is_empty());
265
266        // Should have valid OS
267        assert!(!caps.os.is_empty());
268
269        println!("Platform capabilities: {}", caps.summary());
270    }
271
272    #[test]
273    fn test_auto_optimizer() {
274        let optimizer = AutoOptimizer::new();
275
276        // Small problem sizes should not use GPU
277        assert!(!optimizer.should_use_gpu(100));
278
279        // Medium problem sizes might use SIMD
280        let _ = optimizer.should_use_simd(5000);
281
282        // Get chunk size recommendation
283        let chunk_size = optimizer.recommended_chunk_size(1_000_000);
284        assert!(chunk_size > 0);
285    }
286}