1use std::sync::OnceLock;
7
8#[derive(Debug, Clone)]
10pub struct PlatformCapabilities {
11 pub simd_available: bool,
13 pub gpu_available: bool,
15 pub cuda_available: bool,
17 pub opencl_available: bool,
19 pub metal_available: bool,
21 pub avx2_available: bool,
23 pub avx512_available: bool,
25 pub neon_available: bool,
27 pub cpu_cores: usize,
29 pub arch: String,
31 pub os: String,
33}
34
35static CAPABILITIES: OnceLock<PlatformCapabilities> = OnceLock::new();
37
38impl PlatformCapabilities {
39 pub fn detect() -> &'static PlatformCapabilities {
41 CAPABILITIES.get_or_init(|| {
42 let mut caps = PlatformCapabilities {
43 simd_available: false,
44 gpu_available: false,
45 cuda_available: false,
46 opencl_available: false,
47 metal_available: false,
48 avx2_available: false,
49 avx512_available: false,
50 neon_available: false,
51 cpu_cores: num_cpus::get(),
52 arch: std::env::consts::ARCH.to_string(),
53 os: std::env::consts::OS.to_string(),
54 };
55
56 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
58 {
59 caps.simd_available = is_x86_feature_detected!("sse2");
60 caps.avx2_available = is_x86_feature_detected!("avx2");
61 caps.avx512_available = is_x86_feature_detected!("avx512f");
62 }
63
64 #[cfg(target_arch = "aarch64")]
65 {
66 caps.simd_available = true; caps.neon_available = true;
68 }
69
70 caps.gpu_available = Self::detect_gpu();
72
73 #[cfg(feature = "cuda")]
75 {
76 caps.cuda_available = Self::detect_cuda();
77 }
78
79 #[cfg(feature = "opencl")]
81 {
82 caps.opencl_available = Self::detect_opencl();
83 }
84
85 #[cfg(all(target_os = "macos", feature = "metal"))]
87 {
88 caps.metal_available = Self::detect_metal();
89 }
90
91 caps
92 })
93 }
94
95 pub fn summary(&self) -> String {
97 let mut features = Vec::new();
98
99 if self.simd_available {
100 features.push("SIMD");
101
102 if self.avx2_available {
103 features.push("AVX2");
104 }
105 if self.avx512_available {
106 features.push("AVX512");
107 }
108 if self.neon_available {
109 features.push("NEON");
110 }
111 }
112
113 if self.gpu_available {
114 features.push("GPU");
115
116 if self.cuda_available {
117 features.push("CUDA");
118 }
119 if self.opencl_available {
120 features.push("OpenCL");
121 }
122 if self.metal_available {
123 features.push("Metal");
124 }
125 }
126
127 format!(
128 "{} ({} cores, {})",
129 features.join(", "),
130 self.cpu_cores,
131 self.arch
132 )
133 }
134
135 fn detect_gpu() -> bool {
137 std::env::var("CUDA_VISIBLE_DEVICES").is_ok()
139 || std::env::var("GPU_DEVICE_ORDINAL").is_ok()
140 || std::env::var("ROCR_VISIBLE_DEVICES").is_ok()
141 }
142
143 #[cfg(feature = "cuda")]
145 fn detect_cuda() -> bool {
146 std::env::var("CUDA_PATH").is_ok()
148 || std::path::Path::new("/usr/local/cuda").exists()
149 || std::path::Path::new("C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA")
150 .exists()
151 }
152
153 #[cfg(not(feature = "cuda"))]
154 #[allow(dead_code)]
155 fn detect_cuda() -> bool {
156 false
157 }
158
159 #[cfg(feature = "opencl")]
161 #[allow(dead_code)]
162 fn detect_opencl() -> bool {
163 #[cfg(target_os = "linux")]
165 {
166 std::path::Path::new("/usr/lib/libOpenCL.so").exists()
167 || std::path::Path::new("/usr/lib64/libOpenCL.so").exists()
168 }
169 #[cfg(target_os = "windows")]
170 {
171 std::path::Path::new("C:\\Windows\\System32\\OpenCL.dll").exists()
172 }
173 #[cfg(target_os = "macos")]
174 {
175 true }
177 #[cfg(not(any(target_os = "linux", target_os = "windows", target_os = "macos")))]
178 {
179 false
180 }
181 }
182
183 #[cfg(not(feature = "opencl"))]
184 #[allow(dead_code)]
185 fn detect_opencl() -> bool {
186 false
187 }
188
189 #[cfg(all(target_os = "macos", feature = "metal"))]
191 #[allow(dead_code)]
192 fn detect_metal() -> bool {
193 true
195 }
196
197 #[cfg(not(all(target_os = "macos", feature = "metal")))]
198 #[allow(dead_code)]
199 fn detect_metal() -> bool {
200 false
201 }
202}
203
204pub struct AutoOptimizer {
206 capabilities: &'static PlatformCapabilities,
207}
208
209impl AutoOptimizer {
210 pub fn new() -> Self {
212 Self {
213 capabilities: PlatformCapabilities::detect(),
214 }
215 }
216
217 pub fn should_use_gpu(&self, problem_size: usize) -> bool {
219 self.capabilities.gpu_available && problem_size > 100_000
221 }
222
223 pub fn should_use_simd(&self, problem_size: usize) -> bool {
225 self.capabilities.simd_available && problem_size > 1000
227 }
228
229 pub fn should_use_parallel(&self, problem_size: usize) -> bool {
231 self.capabilities.cpu_cores > 1 && problem_size > 10_000
233 }
234
235 pub fn recommended_chunk_size(&self, total_size: usize) -> usize {
237 let ideal_chunks = self.capabilities.cpu_cores * 4;
239 let chunk_size = total_size / ideal_chunks;
240
241 chunk_size.clamp(1000, 100_000)
243 }
244}
245
246impl Default for AutoOptimizer {
247 fn default() -> Self {
248 Self::new()
249 }
250}
251
252#[cfg(test)]
253mod tests {
254 use super::*;
255
256 #[test]
257 fn test_platform_detection() {
258 let caps = PlatformCapabilities::detect();
259
260 assert!(caps.cpu_cores >= 1);
262
263 assert!(!caps.arch.is_empty());
265
266 assert!(!caps.os.is_empty());
268
269 println!("Platform capabilities: {}", caps.summary());
270 }
271
272 #[test]
273 fn test_auto_optimizer() {
274 let optimizer = AutoOptimizer::new();
275
276 assert!(!optimizer.should_use_gpu(100));
278
279 let _ = optimizer.should_use_simd(5000);
281
282 let chunk_size = optimizer.recommended_chunk_size(1_000_000);
284 assert!(chunk_size > 0);
285 }
286}