torsh_core/
backend_detection.rs

1//! Backend feature detection system for runtime capability discovery
2
3use crate::device::{CpuDevice, Device, DeviceCapabilities, DeviceType, SimdFeatures};
4use crate::error::Result;
5use std::collections::HashMap;
6
7/// Backend feature detection system for runtime capability discovery
8#[derive(Debug, Clone)]
9pub struct BackendFeatureDetector {
10    /// Available devices discovered at runtime
11    pub available_devices: Vec<DeviceInfo>,
12    /// Runtime feature flags  
13    pub runtime_features: RuntimeFeatures,
14    /// Backend capabilities summary
15    pub backend_summary: BackendSummary,
16}
17
18/// Information about a discovered device
19#[derive(Debug, Clone)]
20pub struct DeviceInfo {
21    /// Device type and ID
22    pub device_type: DeviceType,
23    /// Device capabilities
24    pub capabilities: DeviceCapabilities,
25    /// Whether the device is currently available for use
26    pub is_available: bool,
27    /// Device priority for automatic selection (higher is better)
28    pub priority: u32,
29    /// Additional device-specific metadata
30    pub metadata: HashMap<String, String>,
31}
32
33/// Runtime feature detection results
34#[derive(Debug, Clone, Default)]
35pub struct RuntimeFeatures {
36    /// CPU features
37    pub cpu_features: CpuFeatures,
38    /// GPU features (if available)
39    pub gpu_features: GpuFeatures,
40    /// System features
41    pub system_features: SystemFeatures,
42    /// Compiler and build features
43    pub build_features: BuildFeatures,
44}
45
46/// CPU-specific runtime features
47#[derive(Debug, Clone, Default)]
48pub struct CpuFeatures {
49    /// Detected SIMD capabilities
50    pub simd: SimdFeatures,
51    /// Number of physical CPU cores
52    pub physical_cores: usize,
53    /// Number of logical CPU cores (including hyperthreading)
54    pub logical_cores: usize,
55    /// CPU architecture string
56    pub architecture: String,
57    /// CPU vendor (Intel, AMD, ARM, etc.)
58    pub vendor: Option<String>,
59    /// CPU model name
60    pub model_name: Option<String>,
61    /// CPU base frequency in Hz
62    pub base_frequency: Option<u64>,
63    /// Cache sizes (L1, L2, L3)
64    pub cache_sizes: CacheSizes,
65}
66
67/// Cache size information
68#[derive(Debug, Clone, Default)]
69pub struct CacheSizes {
70    /// L1 data cache size in bytes
71    pub l1_data: Option<usize>,
72    /// L1 instruction cache size in bytes
73    pub l1_instruction: Option<usize>,
74    /// L2 cache size in bytes
75    pub l2: Option<usize>,
76    /// L3 cache size in bytes
77    pub l3: Option<usize>,
78}
79
80/// GPU-specific runtime features
81#[derive(Debug, Clone, Default)]
82pub struct GpuFeatures {
83    /// CUDA support and version
84    pub cuda_version: Option<String>,
85    /// CUDA compute capability
86    pub cuda_compute_capability: Option<(u32, u32)>,
87    /// OpenCL support
88    pub opencl_version: Option<String>,
89    /// Vulkan support
90    pub vulkan_version: Option<String>,
91    /// Metal support (Apple)
92    pub metal_version: Option<String>,
93    /// WebGPU support
94    pub webgpu_available: bool,
95    /// Number of GPU devices detected
96    pub gpu_count: usize,
97}
98
99/// System-level features
100#[derive(Debug, Clone, Default)]
101pub struct SystemFeatures {
102    /// Operating system information
103    pub os_info: OsInfo,
104    /// Total system memory in bytes
105    pub total_memory: usize,
106    /// Page size in bytes
107    pub page_size: usize,
108    /// NUMA topology available
109    pub numa_available: bool,
110    /// Number of NUMA nodes
111    pub numa_nodes: usize,
112    /// Memory bandwidth estimate in bytes/sec
113    pub memory_bandwidth: Option<u64>,
114}
115
116/// Operating system information
117#[derive(Debug, Clone, Default)]
118pub struct OsInfo {
119    /// OS name (Linux, Windows, macOS, etc.)
120    pub name: String,
121    /// OS version
122    pub version: Option<String>,
123    /// OS architecture
124    pub arch: String,
125    /// Kernel version (for Linux/Unix systems)
126    pub kernel_version: Option<String>,
127}
128
129/// Build and compiler features
130#[derive(Debug, Clone, Default)]
131pub struct BuildFeatures {
132    /// Target triple for this build
133    pub target_triple: String,
134    /// Optimization level
135    pub opt_level: Option<String>,
136    /// Debug info available
137    pub debug_info: bool,
138    /// Feature flags enabled at compile time
139    pub compile_features: Vec<String>,
140    /// Cargo features enabled
141    pub cargo_features: Vec<String>,
142}
143
144/// Summary of available backend capabilities
145#[derive(Debug, Clone, Default)]
146pub struct BackendSummary {
147    /// Best available device for each type
148    pub best_devices: HashMap<DeviceType, DeviceInfo>,
149    /// Recommended device for general use
150    pub recommended_device: Option<DeviceInfo>,
151    /// Overall system performance tier (Low, Medium, High, Extreme)
152    pub performance_tier: PerformanceTier,
153    /// Features that may impact performance
154    pub performance_notes: Vec<String>,
155    /// Missing features or recommendations
156    pub recommendations: Vec<String>,
157}
158
159/// System performance classification
160#[derive(Debug, Clone, PartialEq, Eq, Default)]
161pub enum PerformanceTier {
162    /// Basic performance for simple tasks
163    Low,
164    /// Good performance for most workloads
165    #[default]
166    Medium,
167    /// High performance for demanding tasks
168    High,
169    /// Extreme performance for HPC workloads
170    Extreme,
171}
172
173/// Workload type for device selection
174#[derive(Debug, Clone, Copy, PartialEq, Eq)]
175pub enum WorkloadType {
176    /// General purpose computing
177    GeneralCompute,
178    /// High precision mathematical operations
179    HighPrecisionMath,
180    /// Large matrix operations
181    LargeMatrices,
182    /// Highly parallel workloads
183    ParallelWorkloads,
184}
185
186impl BackendFeatureDetector {
187    /// Create a new feature detector and perform comprehensive detection
188    pub fn new() -> Result<Self> {
189        let mut detector = Self {
190            available_devices: Vec::new(),
191            runtime_features: RuntimeFeatures::default(),
192            backend_summary: BackendSummary::default(),
193        };
194
195        detector.detect_all_features()?;
196        detector.discover_devices()?;
197        detector.analyze_capabilities()?;
198
199        Ok(detector)
200    }
201
202    /// Detect all runtime features
203    fn detect_all_features(&mut self) -> Result<()> {
204        self.runtime_features.cpu_features = self.detect_cpu_features()?;
205        self.runtime_features.gpu_features = self.detect_gpu_features()?;
206        self.runtime_features.system_features = self.detect_system_features()?;
207        self.runtime_features.build_features = self.detect_build_features()?;
208        Ok(())
209    }
210
211    /// Detect CPU features comprehensively
212    fn detect_cpu_features(&self) -> Result<CpuFeatures> {
213        let cpu_device = CpuDevice::new();
214        let cpu_capabilities = cpu_device.capabilities()?;
215
216        let features = CpuFeatures {
217            simd: cpu_capabilities.simd_features().clone(),
218            physical_cores: num_cpus::get_physical(),
219            logical_cores: num_cpus::get(),
220            architecture: std::env::consts::ARCH.to_string(),
221            vendor: self.detect_cpu_vendor(),
222            model_name: None,     // Custom properties access not available
223            base_frequency: None, // Clock frequency access not available
224            cache_sizes: self.detect_cache_sizes()?,
225        };
226
227        Ok(features)
228    }
229
230    /// Detect CPU vendor with detailed identification
231    fn detect_cpu_vendor(&self) -> Option<String> {
232        #[cfg(target_arch = "x86_64")]
233        {
234            self.detect_x86_cpu_vendor()
235        }
236        #[cfg(target_arch = "aarch64")]
237        {
238            self.detect_arm_cpu_vendor()
239        }
240        #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
241        {
242            None
243        }
244    }
245
246    /// Detect x86/x86_64 CPU vendor using CPUID
247    #[cfg(target_arch = "x86_64")]
248    fn detect_x86_cpu_vendor(&self) -> Option<String> {
249        use std::arch::x86_64::__cpuid;
250
251        unsafe {
252            let cpuid_result = __cpuid(0);
253            let vendor_string = format!(
254                "{}{}{}",
255                std::str::from_utf8(&cpuid_result.ebx.to_le_bytes()).unwrap_or(""),
256                std::str::from_utf8(&cpuid_result.edx.to_le_bytes()).unwrap_or(""),
257                std::str::from_utf8(&cpuid_result.ecx.to_le_bytes()).unwrap_or("")
258            );
259
260            match vendor_string.as_str() {
261                "GenuineIntel" => Some("Intel".to_string()),
262                "AuthenticAMD" => Some("AMD".to_string()),
263                "VIA VIA VIA " => Some("VIA".to_string()),
264                "CyrixInstead" => Some("Cyrix".to_string()),
265                "CentaurHauls" => Some("Centaur".to_string()),
266                "NexGenDriven" => Some("NexGen".to_string()),
267                "HygonGenuine" => Some("Hygon".to_string()),
268                _ => Some(format!(
269                    "Unknown ({})",
270                    vendor_string.trim_end_matches('\0')
271                )),
272            }
273        }
274    }
275
276    /// Detect ARM CPU vendor from /proc/cpuinfo
277    #[cfg(target_arch = "aarch64")]
278    fn detect_arm_cpu_vendor(&self) -> Option<String> {
279        #[cfg(target_os = "linux")]
280        {
281            if let Ok(cpuinfo) = std::fs::read_to_string("/proc/cpuinfo") {
282                for line in cpuinfo.lines() {
283                    if line.starts_with("CPU implementer") {
284                        if let Some(implementer) = line.split(':').nth(1) {
285                            let implementer = implementer.trim();
286                            return match implementer {
287                                "0x41" => Some("ARM".to_string()),
288                                "0x42" => Some("Broadcom".to_string()),
289                                "0x43" => Some("Cavium".to_string()),
290                                "0x44" => Some("DEC".to_string()),
291                                "0x46" => Some("Fujitsu".to_string()),
292                                "0x48" => Some("HiSilicon".to_string()),
293                                "0x49" => Some("Infineon".to_string()),
294                                "0x4d" => Some("Motorola".to_string()),
295                                "0x4e" => Some("NVIDIA".to_string()),
296                                "0x50" => Some("Applied Micro".to_string()),
297                                "0x51" => Some("Qualcomm".to_string()),
298                                "0x56" => Some("Marvell".to_string()),
299                                "0x61" => Some("Apple".to_string()),
300                                _ => Some(format!("Unknown ARM implementer ({})", implementer)),
301                            };
302                        }
303                    }
304                    if line.starts_with("Hardware") && line.contains("BCM") {
305                        return Some("Broadcom".to_string());
306                    }
307                    if line.starts_with("Hardware") && line.contains("Apple") {
308                        return Some("Apple".to_string());
309                    }
310                }
311            }
312        }
313        Some("ARM".to_string())
314    }
315
316    /// Detect cache sizes
317    fn detect_cache_sizes(&self) -> Result<CacheSizes> {
318        #[allow(unused_mut)] // mut needed for conditional compilation features
319        let mut cache_sizes = CacheSizes::default();
320
321        #[cfg(target_os = "linux")]
322        {
323            // Try to read cache info from sysfs
324            if let Ok(l1d) =
325                std::fs::read_to_string("/sys/devices/system/cpu/cpu0/cache/index0/size")
326            {
327                cache_sizes.l1_data = self.parse_cache_size(&l1d);
328            }
329            if let Ok(l1i) =
330                std::fs::read_to_string("/sys/devices/system/cpu/cpu0/cache/index1/size")
331            {
332                cache_sizes.l1_instruction = self.parse_cache_size(&l1i);
333            }
334            if let Ok(l2) =
335                std::fs::read_to_string("/sys/devices/system/cpu/cpu0/cache/index2/size")
336            {
337                cache_sizes.l2 = self.parse_cache_size(&l2);
338            }
339            if let Ok(l3) =
340                std::fs::read_to_string("/sys/devices/system/cpu/cpu0/cache/index3/size")
341            {
342                cache_sizes.l3 = self.parse_cache_size(&l3);
343            }
344        }
345
346        Ok(cache_sizes)
347    }
348
349    /// Parse cache size string (e.g., "32K" -> 32768)
350    #[allow(dead_code)] // Used in platform-specific cache detection
351    fn parse_cache_size(&self, size_str: &str) -> Option<usize> {
352        let size_str = size_str.trim();
353        if size_str.is_empty() {
354            return None;
355        }
356
357        let (number_part, suffix) = if let Some(stripped) = size_str.strip_suffix('K') {
358            (stripped, 1024)
359        } else if let Some(stripped) = size_str.strip_suffix('M') {
360            (stripped, 1024 * 1024)
361        } else if let Some(stripped) = size_str.strip_suffix('G') {
362            (stripped, 1024 * 1024 * 1024)
363        } else {
364            (size_str, 1)
365        };
366
367        number_part.parse::<usize>().ok().map(|n| n * suffix)
368    }
369
370    /// Detect GPU features
371    fn detect_gpu_features(&self) -> Result<GpuFeatures> {
372        let features = GpuFeatures::default();
373        // GPU detection would be implemented here
374        Ok(features)
375    }
376
377    /// Detect system features
378    fn detect_system_features(&self) -> Result<SystemFeatures> {
379        let cpu_device = CpuDevice::new();
380        let memory_info = cpu_device.memory_info()?;
381        let _cpu_capabilities = cpu_device.capabilities()?;
382
383        let features = SystemFeatures {
384            os_info: self.detect_os_info(),
385            total_memory: memory_info.total as usize,
386            page_size: self.detect_page_size(),
387            numa_available: self.detect_numa_support(),
388            numa_nodes: self.detect_numa_nodes(),
389            memory_bandwidth: None, // Memory bandwidth not accessible
390        };
391
392        Ok(features)
393    }
394
395    /// Detect operating system information
396    fn detect_os_info(&self) -> OsInfo {
397        OsInfo {
398            name: std::env::consts::OS.to_string(),
399            version: self.get_os_version(),
400            arch: std::env::consts::ARCH.to_string(),
401            kernel_version: self.get_kernel_version(),
402        }
403    }
404
405    /// Get OS version
406    fn get_os_version(&self) -> Option<String> {
407        #[cfg(target_os = "linux")]
408        {
409            std::fs::read_to_string("/proc/version")
410                .ok()
411                .and_then(|v| v.split_whitespace().nth(2).map(|s| s.to_string()))
412        }
413        #[cfg(not(target_os = "linux"))]
414        {
415            None
416        }
417    }
418
419    /// Get kernel version
420    fn get_kernel_version(&self) -> Option<String> {
421        #[cfg(any(target_os = "linux", target_os = "macos"))]
422        {
423            use std::process::Command;
424            Command::new("uname")
425                .arg("-r")
426                .output()
427                .ok()
428                .and_then(|output| String::from_utf8(output.stdout).ok())
429                .map(|v| v.trim().to_string())
430        }
431        #[cfg(not(any(target_os = "linux", target_os = "macos")))]
432        {
433            None
434        }
435    }
436
437    /// Detect system page size
438    fn detect_page_size(&self) -> usize {
439        4096 // Default page size
440    }
441
442    /// Detect NUMA support
443    fn detect_numa_support(&self) -> bool {
444        #[cfg(target_os = "linux")]
445        {
446            std::path::Path::new("/sys/devices/system/node").exists()
447        }
448        #[cfg(not(target_os = "linux"))]
449        {
450            false
451        }
452    }
453
454    /// Detect number of NUMA nodes
455    fn detect_numa_nodes(&self) -> usize {
456        #[cfg(target_os = "linux")]
457        {
458            if let Ok(entries) = std::fs::read_dir("/sys/devices/system/node") {
459                entries
460                    .filter_map(|entry| {
461                        entry.ok().and_then(|e| {
462                            let name = e.file_name();
463                            let name_str = name.to_string_lossy();
464                            if name_str.starts_with("node")
465                                && name_str[4..].chars().all(|c| c.is_ascii_digit())
466                            {
467                                Some(())
468                            } else {
469                                None
470                            }
471                        })
472                    })
473                    .count()
474            } else {
475                1
476            }
477        }
478        #[cfg(not(target_os = "linux"))]
479        {
480            1
481        }
482    }
483
484    /// Detect build features
485    fn detect_build_features(&self) -> Result<BuildFeatures> {
486        let features = BuildFeatures {
487            target_triple: format!("{}-{}", std::env::consts::ARCH, std::env::consts::OS),
488            opt_level: std::env::var("OPT_LEVEL").ok(),
489            debug_info: cfg!(debug_assertions),
490            compile_features: self.get_compile_features(),
491            cargo_features: self.get_cargo_features(),
492        };
493
494        Ok(features)
495    }
496
497    /// Get compile-time features
498    fn get_compile_features(&self) -> Vec<String> {
499        let mut features = Vec::new();
500
501        if cfg!(feature = "std") {
502            features.push("std".to_string());
503        }
504        if cfg!(feature = "no_std") {
505            features.push("no_std".to_string());
506        }
507        if cfg!(feature = "serialize") {
508            features.push("serialize".to_string());
509        }
510        if cfg!(feature = "half") {
511            features.push("half".to_string());
512        }
513        if cfg!(feature = "avx512") {
514            features.push("avx512".to_string());
515        }
516        if cfg!(feature = "simd") {
517            features.push("simd".to_string());
518        }
519        if cfg!(feature = "parallel") {
520            features.push("parallel".to_string());
521        }
522        if cfg!(feature = "fast-math") {
523            features.push("fast-math".to_string());
524        }
525
526        features
527    }
528
529    /// Get cargo features
530    fn get_cargo_features(&self) -> Vec<String> {
531        vec!["default".to_string()]
532    }
533
534    /// Discover available devices
535    fn discover_devices(&mut self) -> Result<()> {
536        // Always add CPU device
537        let cpu_device = CpuDevice::new();
538        let cpu_capabilities = cpu_device.capabilities()?;
539
540        self.available_devices.push(DeviceInfo {
541            device_type: DeviceType::Cpu,
542            capabilities: cpu_capabilities,
543            is_available: cpu_device.is_available().unwrap_or(false),
544            priority: 10, // Base priority for CPU
545            metadata: HashMap::new(),
546        });
547
548        Ok(())
549    }
550
551    /// Analyze capabilities and generate summary
552    fn analyze_capabilities(&mut self) -> Result<()> {
553        // Find best device for each type
554        for device in &self.available_devices {
555            self.backend_summary
556                .best_devices
557                .insert(device.device_type, device.clone());
558        }
559
560        // Find recommended device (highest priority available device)
561        self.backend_summary.recommended_device = self
562            .available_devices
563            .iter()
564            .filter(|d| d.is_available)
565            .max_by_key(|d| d.priority)
566            .cloned();
567
568        // Determine performance tier
569        self.backend_summary.performance_tier = self.classify_performance_tier();
570
571        // Generate performance notes and recommendations
572        self.generate_performance_analysis();
573
574        Ok(())
575    }
576
577    /// Classify overall system performance tier
578    fn classify_performance_tier(&self) -> PerformanceTier {
579        let cpu_features = &self.runtime_features.cpu_features;
580        let system_features = &self.runtime_features.system_features;
581
582        let memory_gb = system_features.total_memory / (1024 * 1024 * 1024);
583        let core_count = cpu_features.logical_cores;
584        let has_advanced_simd =
585            cpu_features.simd.avx2 || cpu_features.simd.avx512f || cpu_features.simd.neon;
586
587        if memory_gb >= 32 && core_count >= 16 && cpu_features.simd.avx512f {
588            PerformanceTier::Extreme
589        } else if memory_gb >= 16 && core_count >= 8 && has_advanced_simd {
590            PerformanceTier::High
591        } else if memory_gb >= 8 && core_count >= 4 {
592            PerformanceTier::Medium
593        } else {
594            PerformanceTier::Low
595        }
596    }
597
598    /// Generate performance analysis and recommendations
599    fn generate_performance_analysis(&mut self) {
600        let cpu_features = &self.runtime_features.cpu_features;
601        let system_features = &self.runtime_features.system_features;
602
603        // Performance notes
604        if cpu_features.simd.avx512f {
605            self.backend_summary
606                .performance_notes
607                .push("AVX-512 support detected - excellent SIMD performance".to_string());
608        } else if cpu_features.simd.avx2 {
609            self.backend_summary
610                .performance_notes
611                .push("AVX2 support detected - good SIMD performance".to_string());
612        } else if cpu_features.simd.neon {
613            self.backend_summary
614                .performance_notes
615                .push("NEON support detected - good ARM SIMD performance".to_string());
616        }
617
618        if system_features.numa_available {
619            self.backend_summary.performance_notes.push(format!(
620                "NUMA topology available with {} nodes",
621                system_features.numa_nodes
622            ));
623        }
624
625        // Recommendations
626        if cpu_features.logical_cores < 4 {
627            self.backend_summary.recommendations.push(
628                "Consider upgrading to a CPU with more cores for better parallel performance"
629                    .to_string(),
630            );
631        }
632
633        if system_features.total_memory < 8 * 1024 * 1024 * 1024 {
634            self.backend_summary
635                .recommendations
636                .push("Consider adding more RAM (minimum 8GB recommended)".to_string());
637        }
638    }
639
640    /// Get the best available device for a specific workload type
641    pub fn best_device_for_workload(&self, workload: WorkloadType) -> Option<&DeviceInfo> {
642        match workload {
643            WorkloadType::GeneralCompute => self.backend_summary.recommended_device.as_ref(),
644            WorkloadType::HighPrecisionMath => {
645                // Prefer devices with double precision support
646                self.available_devices
647                    .iter()
648                    .filter(|d| d.capabilities.supports_double_precision())
649                    .max_by_key(|d| d.priority)
650            }
651            WorkloadType::LargeMatrices => {
652                // Prefer devices with lots of memory and good SIMD
653                self.available_devices.iter().max_by_key(|d| {
654                    (
655                        d.capabilities.total_memory(),
656                        if d.capabilities.simd_features().avx512f {
657                            8
658                        } else if d.capabilities.simd_features().avx2 {
659                            4
660                        } else {
661                            1
662                        },
663                    )
664                })
665            }
666            WorkloadType::ParallelWorkloads => {
667                // Prefer devices with many cores
668                self.available_devices
669                    .iter()
670                    .max_by_key(|d| d.capabilities.compute_units())
671            }
672        }
673    }
674
675    /// Check if a specific feature is available
676    pub fn has_feature(&self, feature: &str) -> bool {
677        match feature {
678            "simd" => {
679                let simd = &self.runtime_features.cpu_features.simd;
680                simd.sse || simd.avx || simd.avx2 || simd.avx512f || simd.neon
681            }
682            "avx2" => self.runtime_features.cpu_features.simd.avx2,
683            "avx512" => self.runtime_features.cpu_features.simd.avx512f,
684            "neon" => self.runtime_features.cpu_features.simd.neon,
685            "numa" => self.runtime_features.system_features.numa_available,
686            "double_precision" => self
687                .available_devices
688                .iter()
689                .any(|d| d.capabilities.supports_double_precision()),
690            "half_precision" => self
691                .available_devices
692                .iter()
693                .any(|d| d.capabilities.supports_half_precision()),
694            _ => false,
695        }
696    }
697}
698
699impl Default for BackendFeatureDetector {
700    fn default() -> Self {
701        Self::new().unwrap_or_else(|_| Self {
702            available_devices: vec![],
703            runtime_features: RuntimeFeatures::default(),
704            backend_summary: BackendSummary::default(),
705        })
706    }
707}