ruvector_memopt/accel/
cpu.rs1#[cfg(target_arch = "x86_64")]
4use std::arch::x86_64::*;
5
6#[derive(Debug, Clone)]
8pub struct CpuCapabilities {
9 pub vendor: String,
10 pub model: String,
11 pub has_avx: bool,
12 pub has_avx2: bool,
13 pub has_avx512: bool,
14 pub has_avx_vnni: bool,
15 pub has_npu: bool,
16 pub has_neon: bool,
17 pub core_count: usize,
18 pub cache_line_size: usize,
19}
20
21impl CpuCapabilities {
22 pub fn detect() -> Self {
23 let mut caps = Self {
24 vendor: String::new(),
25 model: String::new(),
26 has_avx: false,
27 has_avx2: false,
28 has_avx512: false,
29 has_avx_vnni: false,
30 has_npu: false,
31 has_neon: false,
32 core_count: num_cpus::get(),
33 cache_line_size: 64,
34 };
35
36 #[cfg(target_arch = "x86_64")]
37 {
38 if is_x86_feature_detected!("avx") { caps.has_avx = true; }
39 if is_x86_feature_detected!("avx2") { caps.has_avx2 = true; }
40 if is_x86_feature_detected!("avx512f") { caps.has_avx512 = true; }
41
42 caps.has_avx_vnni = Self::detect_avx_vnni();
44
45 caps.has_npu = Self::detect_intel_npu();
47 }
48
49 #[cfg(target_arch = "aarch64")]
50 {
51 caps.has_neon = true;
53 caps.cache_line_size = 128; }
55
56 caps.vendor = Self::get_vendor();
58 caps.model = Self::get_model();
59
60 caps
61 }
62
63 #[cfg(target_arch = "x86_64")]
64 fn detect_avx_vnni() -> bool {
65 unsafe {
67 let result = __cpuid_count(7, 1);
68 (result.eax & (1 << 4)) != 0
69 }
70 }
71
72 #[cfg(not(target_arch = "x86_64"))]
73 fn detect_avx_vnni() -> bool { false }
74
75 #[cfg(target_arch = "x86_64")]
76 fn detect_intel_npu() -> bool {
77 let model = Self::get_model();
79 model.contains("Ultra") || model.contains("Meteor") || model.contains("Arrow")
80 }
81
82 #[cfg(not(target_arch = "x86_64"))]
83 fn detect_intel_npu() -> bool { false }
84
85 #[cfg(target_arch = "x86_64")]
86 fn get_vendor() -> String {
87 unsafe {
88 let result = __cpuid(0);
89 let vendor_bytes: [u8; 12] = std::mem::transmute([result.ebx, result.edx, result.ecx]);
90 String::from_utf8_lossy(&vendor_bytes).trim().to_string()
91 }
92 }
93
94 #[cfg(not(target_arch = "x86_64"))]
95 fn get_vendor() -> String {
96 #[cfg(target_os = "macos")]
97 {
98 if let Ok(output) = std::process::Command::new("sysctl")
99 .args(["-n", "machdep.cpu.brand_string"])
100 .output()
101 {
102 let s = String::from_utf8_lossy(&output.stdout).trim().to_string();
103 if s.contains("Apple") { return "Apple".into(); }
104 if !s.is_empty() { return s.split_whitespace().next().unwrap_or("Unknown").into(); }
105 }
106 }
107 "Unknown".into()
108 }
109
110 #[cfg(target_arch = "x86_64")]
111 fn get_model() -> String {
112 let mut model = String::new();
113 unsafe {
114 for i in 0x80000002u32..=0x80000004u32 {
115 let result = __cpuid(i);
116 let bytes: [u8; 16] = std::mem::transmute([result.eax, result.ebx, result.ecx, result.edx]);
117 model.push_str(&String::from_utf8_lossy(&bytes));
118 }
119 }
120 model.trim().to_string()
121 }
122
123 #[cfg(not(target_arch = "x86_64"))]
124 fn get_model() -> String {
125 #[cfg(target_os = "macos")]
126 {
127 if let Ok(output) = std::process::Command::new("sysctl")
128 .args(["-n", "machdep.cpu.brand_string"])
129 .output()
130 {
131 let s = String::from_utf8_lossy(&output.stdout).trim().to_string();
132 if !s.is_empty() { return s; }
133 }
134 if let Ok(output) = std::process::Command::new("sysctl")
136 .args(["-n", "hw.model"])
137 .output()
138 {
139 let s = String::from_utf8_lossy(&output.stdout).trim().to_string();
140 if !s.is_empty() { return format!("Apple {}", s); }
141 }
142 }
143 "Unknown".into()
144 }
145
146 pub fn recommended_simd_width(&self) -> usize {
148 if self.has_avx512 { 512 }
149 else if self.has_avx2 { 256 }
150 else if self.has_avx { 128 }
151 else { 64 }
152 }
153
154 pub fn estimated_speedup(&self) -> f32 {
156 if self.has_avx512 { 16.0 }
157 else if self.has_avx2 { 8.0 }
158 else if self.has_avx { 4.0 }
159 else if self.has_neon { 4.0 } else { 1.0 }
161 }
162
163 pub fn print_report(&self) {
164 println!("CPU Capabilities:");
165 println!(" Vendor: {}", if self.vendor.is_empty() { "Unknown" } else { &self.vendor });
166 println!(" Model: {}", if self.model.is_empty() { "Unknown" } else { &self.model });
167 println!(" Cores: {}", self.core_count);
168 if cfg!(target_arch = "aarch64") {
169 println!(" NEON: {}", if self.has_neon { "Yes (128-bit SIMD)" } else { "No" });
170 println!(" Cache Line: {} bytes", self.cache_line_size);
171 } else {
172 println!(" AVX: {}", if self.has_avx { "Yes" } else { "No" });
173 println!(" AVX2: {}", if self.has_avx2 { "Yes (8x SIMD)" } else { "No" });
174 println!(" AVX-512: {}", if self.has_avx512 { "Yes (16x SIMD)" } else { "No" });
175 println!(" AVX-VNNI: {}", if self.has_avx_vnni { "Yes (AI Accel)" } else { "No" });
176 println!(" Intel NPU: {}", if self.has_npu { "Yes (Neural Proc)" } else { "No" });
177 }
178 println!(" Est Speedup: {:.0}x", self.estimated_speedup());
179 }
180}
181
182impl Default for CpuCapabilities {
183 fn default() -> Self { Self::detect() }
184}