ruvector_memopt/accel/
cpu.rs

1//! CPU feature detection
2
3#[cfg(target_arch = "x86_64")]
4use std::arch::x86_64::*;
5
6/// CPU capabilities for optimization decisions
7#[derive(Debug, Clone)]
8pub struct CpuCapabilities {
9    pub vendor: String,
10    pub model: String,
11    pub has_avx: bool,
12    pub has_avx2: bool,
13    pub has_avx512: bool,
14    pub has_avx_vnni: bool,
15    pub has_npu: bool,
16    pub core_count: usize,
17    pub cache_line_size: usize,
18}
19
20impl CpuCapabilities {
21    pub fn detect() -> Self {
22        let mut caps = Self {
23            vendor: String::new(),
24            model: String::new(),
25            has_avx: false,
26            has_avx2: false,
27            has_avx512: false,
28            has_avx_vnni: false,
29            has_npu: false,
30            core_count: num_cpus::get(),
31            cache_line_size: 64,
32        };
33
34        #[cfg(target_arch = "x86_64")]
35        {
36            if is_x86_feature_detected!("avx") { caps.has_avx = true; }
37            if is_x86_feature_detected!("avx2") { caps.has_avx2 = true; }
38            if is_x86_feature_detected!("avx512f") { caps.has_avx512 = true; }
39
40            // AVX-VNNI detection
41            caps.has_avx_vnni = Self::detect_avx_vnni();
42
43            // NPU detection (Intel Core Ultra)
44            caps.has_npu = Self::detect_intel_npu();
45            caps.vendor = Self::get_vendor();
46            caps.model = Self::get_model();
47        }
48
49        caps
50    }
51
52    #[cfg(target_arch = "x86_64")]
53    fn detect_avx_vnni() -> bool {
54        // CPUID leaf 7, subleaf 1, EAX bit 4
55        unsafe {
56            let result = __cpuid_count(7, 1);
57            (result.eax & (1 << 4)) != 0
58        }
59    }
60
61    #[cfg(not(target_arch = "x86_64"))]
62    fn detect_avx_vnni() -> bool { false }
63
64    #[cfg(target_arch = "x86_64")]
65    fn detect_intel_npu() -> bool {
66        // Intel NPU present in Core Ultra (Meteor Lake+)
67        let model = Self::get_model();
68        model.contains("Ultra") || model.contains("Meteor") || model.contains("Arrow")
69    }
70
71    #[cfg(not(target_arch = "x86_64"))]
72    fn detect_intel_npu() -> bool { false }
73
74    #[cfg(target_arch = "x86_64")]
75    fn get_vendor() -> String {
76        unsafe {
77            let result = __cpuid(0);
78            let vendor_bytes: [u8; 12] = std::mem::transmute([result.ebx, result.edx, result.ecx]);
79            String::from_utf8_lossy(&vendor_bytes).trim().to_string()
80        }
81    }
82
83    #[cfg(not(target_arch = "x86_64"))]
84    fn get_vendor() -> String { "Unknown".into() }
85
86    #[cfg(target_arch = "x86_64")]
87    fn get_model() -> String {
88        let mut model = String::new();
89        unsafe {
90            for i in 0x80000002u32..=0x80000004u32 {
91                let result = __cpuid(i);
92                let bytes: [u8; 16] = std::mem::transmute([result.eax, result.ebx, result.ecx, result.edx]);
93                model.push_str(&String::from_utf8_lossy(&bytes));
94            }
95        }
96        model.trim().to_string()
97    }
98
99    #[cfg(not(target_arch = "x86_64"))]
100    fn get_model() -> String { "Unknown".into() }
101
102    /// Get recommended SIMD width for operations
103    pub fn recommended_simd_width(&self) -> usize {
104        if self.has_avx512 { 512 }
105        else if self.has_avx2 { 256 }
106        else if self.has_avx { 128 }
107        else { 64 }
108    }
109
110    /// Estimate speedup factor vs scalar
111    pub fn estimated_speedup(&self) -> f32 {
112        if self.has_avx512 { 16.0 }
113        else if self.has_avx2 { 8.0 }
114        else if self.has_avx { 4.0 }
115        else { 1.0 }
116    }
117
118    pub fn print_report(&self) {
119        println!("CPU Capabilities:");
120        println!("  Vendor:     {}", self.vendor);
121        println!("  Model:      {}", self.model);
122        println!("  Cores:      {}", self.core_count);
123        println!("  AVX:        {}", if self.has_avx { "Yes" } else { "No" });
124        println!("  AVX2:       {}", if self.has_avx2 { "Yes (8x SIMD)" } else { "No" });
125        println!("  AVX-512:    {}", if self.has_avx512 { "Yes (16x SIMD)" } else { "No" });
126        println!("  AVX-VNNI:   {}", if self.has_avx_vnni { "Yes (AI Accel)" } else { "No" });
127        println!("  Intel NPU:  {}", if self.has_npu { "Yes (Neural Proc)" } else { "No" });
128        println!("  Est Speedup: {:.0}x", self.estimated_speedup());
129    }
130}
131
132impl Default for CpuCapabilities {
133    fn default() -> Self { Self::detect() }
134}
ruvector_memopt/accel/cpu.rs

ruvector_memopt/accel/
cpu.rs