1use crate::device::{CpuDevice, Device, DeviceCapabilities, DeviceType, SimdFeatures};
4use crate::error::Result;
5use std::collections::HashMap;
6
7#[derive(Debug, Clone)]
9pub struct BackendFeatureDetector {
10 pub available_devices: Vec<DeviceInfo>,
12 pub runtime_features: RuntimeFeatures,
14 pub backend_summary: BackendSummary,
16}
17
18#[derive(Debug, Clone)]
20pub struct DeviceInfo {
21 pub device_type: DeviceType,
23 pub capabilities: DeviceCapabilities,
25 pub is_available: bool,
27 pub priority: u32,
29 pub metadata: HashMap<String, String>,
31}
32
33#[derive(Debug, Clone, Default)]
35pub struct RuntimeFeatures {
36 pub cpu_features: CpuFeatures,
38 pub gpu_features: GpuFeatures,
40 pub system_features: SystemFeatures,
42 pub build_features: BuildFeatures,
44}
45
46#[derive(Debug, Clone, Default)]
48pub struct CpuFeatures {
49 pub simd: SimdFeatures,
51 pub physical_cores: usize,
53 pub logical_cores: usize,
55 pub architecture: String,
57 pub vendor: Option<String>,
59 pub model_name: Option<String>,
61 pub base_frequency: Option<u64>,
63 pub cache_sizes: CacheSizes,
65}
66
67#[derive(Debug, Clone, Default)]
69pub struct CacheSizes {
70 pub l1_data: Option<usize>,
72 pub l1_instruction: Option<usize>,
74 pub l2: Option<usize>,
76 pub l3: Option<usize>,
78}
79
80#[derive(Debug, Clone, Default)]
82pub struct GpuFeatures {
83 pub cuda_version: Option<String>,
85 pub cuda_compute_capability: Option<(u32, u32)>,
87 pub opencl_version: Option<String>,
89 pub vulkan_version: Option<String>,
91 pub metal_version: Option<String>,
93 pub webgpu_available: bool,
95 pub gpu_count: usize,
97}
98
99#[derive(Debug, Clone, Default)]
101pub struct SystemFeatures {
102 pub os_info: OsInfo,
104 pub total_memory: usize,
106 pub page_size: usize,
108 pub numa_available: bool,
110 pub numa_nodes: usize,
112 pub memory_bandwidth: Option<u64>,
114}
115
116#[derive(Debug, Clone, Default)]
118pub struct OsInfo {
119 pub name: String,
121 pub version: Option<String>,
123 pub arch: String,
125 pub kernel_version: Option<String>,
127}
128
129#[derive(Debug, Clone, Default)]
131pub struct BuildFeatures {
132 pub target_triple: String,
134 pub opt_level: Option<String>,
136 pub debug_info: bool,
138 pub compile_features: Vec<String>,
140 pub cargo_features: Vec<String>,
142}
143
144#[derive(Debug, Clone, Default)]
146pub struct BackendSummary {
147 pub best_devices: HashMap<DeviceType, DeviceInfo>,
149 pub recommended_device: Option<DeviceInfo>,
151 pub performance_tier: PerformanceTier,
153 pub performance_notes: Vec<String>,
155 pub recommendations: Vec<String>,
157}
158
159#[derive(Debug, Clone, PartialEq, Eq, Default)]
161pub enum PerformanceTier {
162 Low,
164 #[default]
166 Medium,
167 High,
169 Extreme,
171}
172
173#[derive(Debug, Clone, Copy, PartialEq, Eq)]
175pub enum WorkloadType {
176 GeneralCompute,
178 HighPrecisionMath,
180 LargeMatrices,
182 ParallelWorkloads,
184}
185
186impl BackendFeatureDetector {
187 pub fn new() -> Result<Self> {
189 let mut detector = Self {
190 available_devices: Vec::new(),
191 runtime_features: RuntimeFeatures::default(),
192 backend_summary: BackendSummary::default(),
193 };
194
195 detector.detect_all_features()?;
196 detector.discover_devices()?;
197 detector.analyze_capabilities()?;
198
199 Ok(detector)
200 }
201
202 fn detect_all_features(&mut self) -> Result<()> {
204 self.runtime_features.cpu_features = self.detect_cpu_features()?;
205 self.runtime_features.gpu_features = self.detect_gpu_features()?;
206 self.runtime_features.system_features = self.detect_system_features()?;
207 self.runtime_features.build_features = self.detect_build_features()?;
208 Ok(())
209 }
210
211 fn detect_cpu_features(&self) -> Result<CpuFeatures> {
213 let cpu_device = CpuDevice::new();
214 let cpu_capabilities = cpu_device.capabilities()?;
215
216 let features = CpuFeatures {
217 simd: cpu_capabilities.simd_features().clone(),
218 physical_cores: num_cpus::get_physical(),
219 logical_cores: num_cpus::get(),
220 architecture: std::env::consts::ARCH.to_string(),
221 vendor: self.detect_cpu_vendor(),
222 model_name: None, base_frequency: None, cache_sizes: self.detect_cache_sizes()?,
225 };
226
227 Ok(features)
228 }
229
230 fn detect_cpu_vendor(&self) -> Option<String> {
232 #[cfg(target_arch = "x86_64")]
233 {
234 self.detect_x86_cpu_vendor()
235 }
236 #[cfg(target_arch = "aarch64")]
237 {
238 self.detect_arm_cpu_vendor()
239 }
240 #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
241 {
242 None
243 }
244 }
245
246 #[cfg(target_arch = "x86_64")]
248 fn detect_x86_cpu_vendor(&self) -> Option<String> {
249 use std::arch::x86_64::__cpuid;
250
251 unsafe {
252 let cpuid_result = __cpuid(0);
253 let vendor_string = format!(
254 "{}{}{}",
255 std::str::from_utf8(&cpuid_result.ebx.to_le_bytes()).unwrap_or(""),
256 std::str::from_utf8(&cpuid_result.edx.to_le_bytes()).unwrap_or(""),
257 std::str::from_utf8(&cpuid_result.ecx.to_le_bytes()).unwrap_or("")
258 );
259
260 match vendor_string.as_str() {
261 "GenuineIntel" => Some("Intel".to_string()),
262 "AuthenticAMD" => Some("AMD".to_string()),
263 "VIA VIA VIA " => Some("VIA".to_string()),
264 "CyrixInstead" => Some("Cyrix".to_string()),
265 "CentaurHauls" => Some("Centaur".to_string()),
266 "NexGenDriven" => Some("NexGen".to_string()),
267 "HygonGenuine" => Some("Hygon".to_string()),
268 _ => Some(format!(
269 "Unknown ({})",
270 vendor_string.trim_end_matches('\0')
271 )),
272 }
273 }
274 }
275
276 #[cfg(target_arch = "aarch64")]
278 fn detect_arm_cpu_vendor(&self) -> Option<String> {
279 #[cfg(target_os = "linux")]
280 {
281 if let Ok(cpuinfo) = std::fs::read_to_string("/proc/cpuinfo") {
282 for line in cpuinfo.lines() {
283 if line.starts_with("CPU implementer") {
284 if let Some(implementer) = line.split(':').nth(1) {
285 let implementer = implementer.trim();
286 return match implementer {
287 "0x41" => Some("ARM".to_string()),
288 "0x42" => Some("Broadcom".to_string()),
289 "0x43" => Some("Cavium".to_string()),
290 "0x44" => Some("DEC".to_string()),
291 "0x46" => Some("Fujitsu".to_string()),
292 "0x48" => Some("HiSilicon".to_string()),
293 "0x49" => Some("Infineon".to_string()),
294 "0x4d" => Some("Motorola".to_string()),
295 "0x4e" => Some("NVIDIA".to_string()),
296 "0x50" => Some("Applied Micro".to_string()),
297 "0x51" => Some("Qualcomm".to_string()),
298 "0x56" => Some("Marvell".to_string()),
299 "0x61" => Some("Apple".to_string()),
300 _ => Some(format!("Unknown ARM implementer ({})", implementer)),
301 };
302 }
303 }
304 if line.starts_with("Hardware") && line.contains("BCM") {
305 return Some("Broadcom".to_string());
306 }
307 if line.starts_with("Hardware") && line.contains("Apple") {
308 return Some("Apple".to_string());
309 }
310 }
311 }
312 }
313 Some("ARM".to_string())
314 }
315
316 fn detect_cache_sizes(&self) -> Result<CacheSizes> {
318 #[allow(unused_mut)] let mut cache_sizes = CacheSizes::default();
320
321 #[cfg(target_os = "linux")]
322 {
323 if let Ok(l1d) =
325 std::fs::read_to_string("/sys/devices/system/cpu/cpu0/cache/index0/size")
326 {
327 cache_sizes.l1_data = self.parse_cache_size(&l1d);
328 }
329 if let Ok(l1i) =
330 std::fs::read_to_string("/sys/devices/system/cpu/cpu0/cache/index1/size")
331 {
332 cache_sizes.l1_instruction = self.parse_cache_size(&l1i);
333 }
334 if let Ok(l2) =
335 std::fs::read_to_string("/sys/devices/system/cpu/cpu0/cache/index2/size")
336 {
337 cache_sizes.l2 = self.parse_cache_size(&l2);
338 }
339 if let Ok(l3) =
340 std::fs::read_to_string("/sys/devices/system/cpu/cpu0/cache/index3/size")
341 {
342 cache_sizes.l3 = self.parse_cache_size(&l3);
343 }
344 }
345
346 Ok(cache_sizes)
347 }
348
349 #[allow(dead_code)] fn parse_cache_size(&self, size_str: &str) -> Option<usize> {
352 let size_str = size_str.trim();
353 if size_str.is_empty() {
354 return None;
355 }
356
357 let (number_part, suffix) = if let Some(stripped) = size_str.strip_suffix('K') {
358 (stripped, 1024)
359 } else if let Some(stripped) = size_str.strip_suffix('M') {
360 (stripped, 1024 * 1024)
361 } else if let Some(stripped) = size_str.strip_suffix('G') {
362 (stripped, 1024 * 1024 * 1024)
363 } else {
364 (size_str, 1)
365 };
366
367 number_part.parse::<usize>().ok().map(|n| n * suffix)
368 }
369
370 fn detect_gpu_features(&self) -> Result<GpuFeatures> {
372 let features = GpuFeatures::default();
373 Ok(features)
375 }
376
377 fn detect_system_features(&self) -> Result<SystemFeatures> {
379 let cpu_device = CpuDevice::new();
380 let memory_info = cpu_device.memory_info()?;
381 let _cpu_capabilities = cpu_device.capabilities()?;
382
383 let features = SystemFeatures {
384 os_info: self.detect_os_info(),
385 total_memory: memory_info.total as usize,
386 page_size: self.detect_page_size(),
387 numa_available: self.detect_numa_support(),
388 numa_nodes: self.detect_numa_nodes(),
389 memory_bandwidth: None, };
391
392 Ok(features)
393 }
394
395 fn detect_os_info(&self) -> OsInfo {
397 OsInfo {
398 name: std::env::consts::OS.to_string(),
399 version: self.get_os_version(),
400 arch: std::env::consts::ARCH.to_string(),
401 kernel_version: self.get_kernel_version(),
402 }
403 }
404
405 fn get_os_version(&self) -> Option<String> {
407 #[cfg(target_os = "linux")]
408 {
409 std::fs::read_to_string("/proc/version")
410 .ok()
411 .and_then(|v| v.split_whitespace().nth(2).map(|s| s.to_string()))
412 }
413 #[cfg(not(target_os = "linux"))]
414 {
415 None
416 }
417 }
418
419 fn get_kernel_version(&self) -> Option<String> {
421 #[cfg(any(target_os = "linux", target_os = "macos"))]
422 {
423 use std::process::Command;
424 Command::new("uname")
425 .arg("-r")
426 .output()
427 .ok()
428 .and_then(|output| String::from_utf8(output.stdout).ok())
429 .map(|v| v.trim().to_string())
430 }
431 #[cfg(not(any(target_os = "linux", target_os = "macos")))]
432 {
433 None
434 }
435 }
436
437 fn detect_page_size(&self) -> usize {
439 4096 }
441
442 fn detect_numa_support(&self) -> bool {
444 #[cfg(target_os = "linux")]
445 {
446 std::path::Path::new("/sys/devices/system/node").exists()
447 }
448 #[cfg(not(target_os = "linux"))]
449 {
450 false
451 }
452 }
453
454 fn detect_numa_nodes(&self) -> usize {
456 #[cfg(target_os = "linux")]
457 {
458 if let Ok(entries) = std::fs::read_dir("/sys/devices/system/node") {
459 entries
460 .filter_map(|entry| {
461 entry.ok().and_then(|e| {
462 let name = e.file_name();
463 let name_str = name.to_string_lossy();
464 if name_str.starts_with("node")
465 && name_str[4..].chars().all(|c| c.is_ascii_digit())
466 {
467 Some(())
468 } else {
469 None
470 }
471 })
472 })
473 .count()
474 } else {
475 1
476 }
477 }
478 #[cfg(not(target_os = "linux"))]
479 {
480 1
481 }
482 }
483
484 fn detect_build_features(&self) -> Result<BuildFeatures> {
486 let features = BuildFeatures {
487 target_triple: format!("{}-{}", std::env::consts::ARCH, std::env::consts::OS),
488 opt_level: std::env::var("OPT_LEVEL").ok(),
489 debug_info: cfg!(debug_assertions),
490 compile_features: self.get_compile_features(),
491 cargo_features: self.get_cargo_features(),
492 };
493
494 Ok(features)
495 }
496
497 fn get_compile_features(&self) -> Vec<String> {
499 let mut features = Vec::new();
500
501 if cfg!(feature = "std") {
502 features.push("std".to_string());
503 }
504 if cfg!(feature = "no_std") {
505 features.push("no_std".to_string());
506 }
507 if cfg!(feature = "serialize") {
508 features.push("serialize".to_string());
509 }
510 if cfg!(feature = "half") {
511 features.push("half".to_string());
512 }
513 if cfg!(feature = "avx512") {
514 features.push("avx512".to_string());
515 }
516 if cfg!(feature = "simd") {
517 features.push("simd".to_string());
518 }
519 if cfg!(feature = "parallel") {
520 features.push("parallel".to_string());
521 }
522 if cfg!(feature = "fast-math") {
523 features.push("fast-math".to_string());
524 }
525
526 features
527 }
528
529 fn get_cargo_features(&self) -> Vec<String> {
531 vec!["default".to_string()]
532 }
533
534 fn discover_devices(&mut self) -> Result<()> {
536 let cpu_device = CpuDevice::new();
538 let cpu_capabilities = cpu_device.capabilities()?;
539
540 self.available_devices.push(DeviceInfo {
541 device_type: DeviceType::Cpu,
542 capabilities: cpu_capabilities,
543 is_available: cpu_device.is_available().unwrap_or(false),
544 priority: 10, metadata: HashMap::new(),
546 });
547
548 Ok(())
549 }
550
551 fn analyze_capabilities(&mut self) -> Result<()> {
553 for device in &self.available_devices {
555 self.backend_summary
556 .best_devices
557 .insert(device.device_type, device.clone());
558 }
559
560 self.backend_summary.recommended_device = self
562 .available_devices
563 .iter()
564 .filter(|d| d.is_available)
565 .max_by_key(|d| d.priority)
566 .cloned();
567
568 self.backend_summary.performance_tier = self.classify_performance_tier();
570
571 self.generate_performance_analysis();
573
574 Ok(())
575 }
576
577 fn classify_performance_tier(&self) -> PerformanceTier {
579 let cpu_features = &self.runtime_features.cpu_features;
580 let system_features = &self.runtime_features.system_features;
581
582 let memory_gb = system_features.total_memory / (1024 * 1024 * 1024);
583 let core_count = cpu_features.logical_cores;
584 let has_advanced_simd =
585 cpu_features.simd.avx2 || cpu_features.simd.avx512f || cpu_features.simd.neon;
586
587 if memory_gb >= 32 && core_count >= 16 && cpu_features.simd.avx512f {
588 PerformanceTier::Extreme
589 } else if memory_gb >= 16 && core_count >= 8 && has_advanced_simd {
590 PerformanceTier::High
591 } else if memory_gb >= 8 && core_count >= 4 {
592 PerformanceTier::Medium
593 } else {
594 PerformanceTier::Low
595 }
596 }
597
598 fn generate_performance_analysis(&mut self) {
600 let cpu_features = &self.runtime_features.cpu_features;
601 let system_features = &self.runtime_features.system_features;
602
603 if cpu_features.simd.avx512f {
605 self.backend_summary
606 .performance_notes
607 .push("AVX-512 support detected - excellent SIMD performance".to_string());
608 } else if cpu_features.simd.avx2 {
609 self.backend_summary
610 .performance_notes
611 .push("AVX2 support detected - good SIMD performance".to_string());
612 } else if cpu_features.simd.neon {
613 self.backend_summary
614 .performance_notes
615 .push("NEON support detected - good ARM SIMD performance".to_string());
616 }
617
618 if system_features.numa_available {
619 self.backend_summary.performance_notes.push(format!(
620 "NUMA topology available with {} nodes",
621 system_features.numa_nodes
622 ));
623 }
624
625 if cpu_features.logical_cores < 4 {
627 self.backend_summary.recommendations.push(
628 "Consider upgrading to a CPU with more cores for better parallel performance"
629 .to_string(),
630 );
631 }
632
633 if system_features.total_memory < 8 * 1024 * 1024 * 1024 {
634 self.backend_summary
635 .recommendations
636 .push("Consider adding more RAM (minimum 8GB recommended)".to_string());
637 }
638 }
639
640 pub fn best_device_for_workload(&self, workload: WorkloadType) -> Option<&DeviceInfo> {
642 match workload {
643 WorkloadType::GeneralCompute => self.backend_summary.recommended_device.as_ref(),
644 WorkloadType::HighPrecisionMath => {
645 self.available_devices
647 .iter()
648 .filter(|d| d.capabilities.supports_double_precision())
649 .max_by_key(|d| d.priority)
650 }
651 WorkloadType::LargeMatrices => {
652 self.available_devices.iter().max_by_key(|d| {
654 (
655 d.capabilities.total_memory(),
656 if d.capabilities.simd_features().avx512f {
657 8
658 } else if d.capabilities.simd_features().avx2 {
659 4
660 } else {
661 1
662 },
663 )
664 })
665 }
666 WorkloadType::ParallelWorkloads => {
667 self.available_devices
669 .iter()
670 .max_by_key(|d| d.capabilities.compute_units())
671 }
672 }
673 }
674
675 pub fn has_feature(&self, feature: &str) -> bool {
677 match feature {
678 "simd" => {
679 let simd = &self.runtime_features.cpu_features.simd;
680 simd.sse || simd.avx || simd.avx2 || simd.avx512f || simd.neon
681 }
682 "avx2" => self.runtime_features.cpu_features.simd.avx2,
683 "avx512" => self.runtime_features.cpu_features.simd.avx512f,
684 "neon" => self.runtime_features.cpu_features.simd.neon,
685 "numa" => self.runtime_features.system_features.numa_available,
686 "double_precision" => self
687 .available_devices
688 .iter()
689 .any(|d| d.capabilities.supports_double_precision()),
690 "half_precision" => self
691 .available_devices
692 .iter()
693 .any(|d| d.capabilities.supports_half_precision()),
694 _ => false,
695 }
696 }
697}
698
699impl Default for BackendFeatureDetector {
700 fn default() -> Self {
701 Self::new().unwrap_or_else(|_| Self {
702 available_devices: vec![],
703 runtime_features: RuntimeFeatures::default(),
704 backend_summary: BackendSummary::default(),
705 })
706 }
707}