ruvector_memopt/accel/
cpu.rs

1//! CPU feature detection
2
3#[cfg(target_arch = "x86_64")]
4use std::arch::x86_64::*;
5
6/// CPU capabilities for optimization decisions
7#[derive(Debug, Clone)]
8pub struct CpuCapabilities {
9    pub vendor: String,
10    pub model: String,
11    pub has_avx: bool,
12    pub has_avx2: bool,
13    pub has_avx512: bool,
14    pub has_avx_vnni: bool,
15    pub has_npu: bool,
16    pub has_neon: bool,
17    pub core_count: usize,
18    pub cache_line_size: usize,
19}
20
21impl CpuCapabilities {
22    pub fn detect() -> Self {
23        let mut caps = Self {
24            vendor: String::new(),
25            model: String::new(),
26            has_avx: false,
27            has_avx2: false,
28            has_avx512: false,
29            has_avx_vnni: false,
30            has_npu: false,
31            has_neon: false,
32            core_count: num_cpus::get(),
33            cache_line_size: 64,
34        };
35
36        #[cfg(target_arch = "x86_64")]
37        {
38            if is_x86_feature_detected!("avx") { caps.has_avx = true; }
39            if is_x86_feature_detected!("avx2") { caps.has_avx2 = true; }
40            if is_x86_feature_detected!("avx512f") { caps.has_avx512 = true; }
41
42            // AVX-VNNI detection
43            caps.has_avx_vnni = Self::detect_avx_vnni();
44
45            // NPU detection (Intel Core Ultra)
46            caps.has_npu = Self::detect_intel_npu();
47        }
48
49        #[cfg(target_arch = "aarch64")]
50        {
51            // ARM64 always has NEON
52            caps.has_neon = true;
53            caps.cache_line_size = 128; // Apple Silicon uses 128-byte cache lines
54        }
55
56        // Always detect vendor/model (works on all platforms)
57        caps.vendor = Self::get_vendor();
58        caps.model = Self::get_model();
59
60        caps
61    }
62
63    #[cfg(target_arch = "x86_64")]
64    fn detect_avx_vnni() -> bool {
65        // CPUID leaf 7, subleaf 1, EAX bit 4
66        unsafe {
67            let result = __cpuid_count(7, 1);
68            (result.eax & (1 << 4)) != 0
69        }
70    }
71
72    #[cfg(not(target_arch = "x86_64"))]
73    fn detect_avx_vnni() -> bool { false }
74
75    #[cfg(target_arch = "x86_64")]
76    fn detect_intel_npu() -> bool {
77        // Intel NPU present in Core Ultra (Meteor Lake+)
78        let model = Self::get_model();
79        model.contains("Ultra") || model.contains("Meteor") || model.contains("Arrow")
80    }
81
82    #[cfg(not(target_arch = "x86_64"))]
83    fn detect_intel_npu() -> bool { false }
84
85    #[cfg(target_arch = "x86_64")]
86    fn get_vendor() -> String {
87        unsafe {
88            let result = __cpuid(0);
89            let vendor_bytes: [u8; 12] = std::mem::transmute([result.ebx, result.edx, result.ecx]);
90            String::from_utf8_lossy(&vendor_bytes).trim().to_string()
91        }
92    }
93
94    #[cfg(not(target_arch = "x86_64"))]
95    fn get_vendor() -> String {
96        #[cfg(target_os = "macos")]
97        {
98            if let Ok(output) = std::process::Command::new("sysctl")
99                .args(["-n", "machdep.cpu.brand_string"])
100                .output()
101            {
102                let s = String::from_utf8_lossy(&output.stdout).trim().to_string();
103                if s.contains("Apple") { return "Apple".into(); }
104                if !s.is_empty() { return s.split_whitespace().next().unwrap_or("Unknown").into(); }
105            }
106        }
107        "Unknown".into()
108    }
109
110    #[cfg(target_arch = "x86_64")]
111    fn get_model() -> String {
112        let mut model = String::new();
113        unsafe {
114            for i in 0x80000002u32..=0x80000004u32 {
115                let result = __cpuid(i);
116                let bytes: [u8; 16] = std::mem::transmute([result.eax, result.ebx, result.ecx, result.edx]);
117                model.push_str(&String::from_utf8_lossy(&bytes));
118            }
119        }
120        model.trim().to_string()
121    }
122
123    #[cfg(not(target_arch = "x86_64"))]
124    fn get_model() -> String {
125        #[cfg(target_os = "macos")]
126        {
127            if let Ok(output) = std::process::Command::new("sysctl")
128                .args(["-n", "machdep.cpu.brand_string"])
129                .output()
130            {
131                let s = String::from_utf8_lossy(&output.stdout).trim().to_string();
132                if !s.is_empty() { return s; }
133            }
134            // Fallback: hw.model for Apple Silicon
135            if let Ok(output) = std::process::Command::new("sysctl")
136                .args(["-n", "hw.model"])
137                .output()
138            {
139                let s = String::from_utf8_lossy(&output.stdout).trim().to_string();
140                if !s.is_empty() { return format!("Apple {}", s); }
141            }
142        }
143        "Unknown".into()
144    }
145
146    /// Get recommended SIMD width for operations
147    pub fn recommended_simd_width(&self) -> usize {
148        if self.has_avx512 { 512 }
149        else if self.has_avx2 { 256 }
150        else if self.has_avx { 128 }
151        else { 64 }
152    }
153
154    /// Estimate speedup factor vs scalar
155    pub fn estimated_speedup(&self) -> f32 {
156        if self.has_avx512 { 16.0 }
157        else if self.has_avx2 { 8.0 }
158        else if self.has_avx { 4.0 }
159        else if self.has_neon { 4.0 }  // ARM NEON is 128-bit (4x f32)
160        else { 1.0 }
161    }
162
163    pub fn print_report(&self) {
164        println!("CPU Capabilities:");
165        println!("  Vendor:     {}", if self.vendor.is_empty() { "Unknown" } else { &self.vendor });
166        println!("  Model:      {}", if self.model.is_empty() { "Unknown" } else { &self.model });
167        println!("  Cores:      {}", self.core_count);
168        if cfg!(target_arch = "aarch64") {
169            println!("  NEON:       {}", if self.has_neon { "Yes (128-bit SIMD)" } else { "No" });
170            println!("  Cache Line: {} bytes", self.cache_line_size);
171        } else {
172            println!("  AVX:        {}", if self.has_avx { "Yes" } else { "No" });
173            println!("  AVX2:       {}", if self.has_avx2 { "Yes (8x SIMD)" } else { "No" });
174            println!("  AVX-512:    {}", if self.has_avx512 { "Yes (16x SIMD)" } else { "No" });
175            println!("  AVX-VNNI:   {}", if self.has_avx_vnni { "Yes (AI Accel)" } else { "No" });
176            println!("  Intel NPU:  {}", if self.has_npu { "Yes (Neural Proc)" } else { "No" });
177        }
178        println!("  Est Speedup: {:.0}x", self.estimated_speedup());
179    }
180}
181
182impl Default for CpuCapabilities {
183    fn default() -> Self { Self::detect() }
184}
ruvector_memopt/accel/cpu.rs

ruvector_memopt/accel/
cpu.rs