temporal_neural_solver/benchmarks/
hardware_verification.rs1use std::collections::HashMap;
8use serde::{Serialize, Deserialize};
9use std::time::{SystemTime, UNIX_EPOCH};
10
11#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct HardwareCapabilities {
14 pub cpu_vendor: String,
15 pub cpu_brand: String,
16 pub cpu_cores: usize,
17 pub cpu_threads: usize,
18 pub cache_sizes: CacheSizes,
19 pub simd_features: SimdFeatures,
20 pub memory_info: MemoryInfo,
21 pub timestamp: u64,
22}
23
24#[derive(Debug, Clone, Serialize, Deserialize)]
25pub struct CacheSizes {
26 pub l1_data: Option<usize>,
27 pub l1_instruction: Option<usize>,
28 pub l2: Option<usize>,
29 pub l3: Option<usize>,
30}
31
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct SimdFeatures {
34 pub sse: bool,
35 pub sse2: bool,
36 pub sse3: bool,
37 pub ssse3: bool,
38 pub sse4_1: bool,
39 pub sse4_2: bool,
40 pub avx: bool,
41 pub avx2: bool,
42 pub avx512f: bool,
43 pub fma: bool,
44}
45
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct MemoryInfo {
48 pub total_memory: Option<usize>,
49 pub available_memory: Option<usize>,
50 pub page_size: Option<usize>,
51}
52
53#[derive(Debug, Clone, Serialize, Deserialize)]
55pub struct HardwareVerification {
56 pub capabilities: HardwareCapabilities,
57 pub feature_usage: FeatureUsage,
58 pub performance_baseline: PerformanceBaseline,
59 pub warnings: Vec<String>,
60 pub validation_passed: bool,
61}
62
63#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct FeatureUsage {
65 pub detected_simd_usage: Vec<String>,
66 pub memory_alignment_verified: bool,
67 pub cache_friendly_access: bool,
68 pub thread_affinity_set: bool,
69}
70
71#[derive(Debug, Clone, Serialize, Deserialize)]
72pub struct PerformanceBaseline {
73 pub memory_bandwidth_gb_s: f64,
74 pub cpu_frequency_ghz: f64,
75 pub cache_latency_ns: HashMap<String, f64>,
76 pub baseline_established: bool,
77}
78
79pub struct HardwareValidator {
81 baseline_measurements: Option<PerformanceBaseline>,
82}
83
84impl Default for HardwareValidator {
85 fn default() -> Self {
86 Self::new()
87 }
88}
89
90impl HardwareValidator {
91 pub fn new() -> Self {
92 Self {
93 baseline_measurements: None,
94 }
95 }
96
97 pub fn detect_capabilities(&self) -> HardwareCapabilities {
99 HardwareCapabilities {
100 cpu_vendor: self.detect_cpu_vendor(),
101 cpu_brand: self.detect_cpu_brand(),
102 cpu_cores: self.detect_cpu_cores(),
103 cpu_threads: self.detect_cpu_threads(),
104 cache_sizes: self.detect_cache_sizes(),
105 simd_features: self.detect_simd_features(),
106 memory_info: self.detect_memory_info(),
107 timestamp: SystemTime::now()
108 .duration_since(UNIX_EPOCH)
109 .unwrap()
110 .as_secs(),
111 }
112 }
113
114 pub fn verify_hardware(&mut self) -> HardwareVerification {
116 let capabilities = self.detect_capabilities();
117 let mut warnings = Vec::new();
118
119 if !capabilities.simd_features.avx2 {
121 warnings.push("AVX2 not available - performance may be suboptimal".to_string());
122 }
123
124 if capabilities.cpu_cores < 4 {
125 warnings.push("Less than 4 CPU cores detected - may affect performance".to_string());
126 }
127
128 if let Some(l3_size) = capabilities.cache_sizes.l3 {
129 if l3_size < 8 * 1024 * 1024 { warnings.push("Small L3 cache detected - may affect large workloads".to_string());
131 }
132 }
133
134 let feature_usage = self.verify_feature_usage(&capabilities);
136
137 let baseline = self.establish_baseline(&capabilities);
139
140 let validation_passed = warnings.is_empty() &&
141 feature_usage.detected_simd_usage.len() > 0 &&
142 baseline.baseline_established;
143
144 HardwareVerification {
145 capabilities,
146 feature_usage,
147 performance_baseline: baseline,
148 warnings,
149 validation_passed,
150 }
151 }
152
153 fn detect_cpu_vendor(&self) -> String {
154 #[cfg(target_arch = "x86_64")]
155 {
156 if is_x86_feature_detected!("avx2") {
157 "Unknown x86_64".to_string()
160 } else {
161 "x86_64 (limited features)".to_string()
162 }
163 }
164 #[cfg(not(target_arch = "x86_64"))]
165 {
166 std::env::consts::ARCH.to_string()
167 }
168 }
169
170 fn detect_cpu_brand(&self) -> String {
171 "Generic CPU".to_string()
173 }
174
175 fn detect_cpu_cores(&self) -> usize {
176 num_cpus::get_physical()
177 }
178
179 fn detect_cpu_threads(&self) -> usize {
180 num_cpus::get()
181 }
182
183 fn detect_cache_sizes(&self) -> CacheSizes {
184 CacheSizes {
186 l1_data: Some(32 * 1024), l1_instruction: Some(32 * 1024),
188 l2: Some(256 * 1024), l3: Some(8 * 1024 * 1024), }
191 }
192
193 fn detect_simd_features(&self) -> SimdFeatures {
194 #[cfg(target_arch = "x86_64")]
195 {
196 SimdFeatures {
197 sse: is_x86_feature_detected!("sse"),
198 sse2: is_x86_feature_detected!("sse2"),
199 sse3: is_x86_feature_detected!("sse3"),
200 ssse3: is_x86_feature_detected!("ssse3"),
201 sse4_1: is_x86_feature_detected!("sse4.1"),
202 sse4_2: is_x86_feature_detected!("sse4.2"),
203 avx: is_x86_feature_detected!("avx"),
204 avx2: is_x86_feature_detected!("avx2"),
205 avx512f: is_x86_feature_detected!("avx512f"),
206 fma: is_x86_feature_detected!("fma"),
207 }
208 }
209 #[cfg(not(target_arch = "x86_64"))]
210 {
211 SimdFeatures {
212 sse: false, sse2: false, sse3: false, ssse3: false,
213 sse4_1: false, sse4_2: false, avx: false, avx2: false,
214 avx512f: false, fma: false,
215 }
216 }
217 }
218
219 fn detect_memory_info(&self) -> MemoryInfo {
220 MemoryInfo {
221 total_memory: self.get_total_memory(),
222 available_memory: self.get_available_memory(),
223 page_size: Some(4096), }
225 }
226
227 fn get_total_memory(&self) -> Option<usize> {
228 #[cfg(target_os = "linux")]
230 {
231 std::fs::read_to_string("/proc/meminfo")
232 .ok()
233 .and_then(|contents| {
234 contents.lines()
235 .find(|line| line.starts_with("MemTotal:"))
236 .and_then(|line| {
237 line.split_whitespace()
238 .nth(1)
239 .and_then(|s| s.parse::<usize>().ok())
240 .map(|kb| kb * 1024) })
242 })
243 }
244 #[cfg(not(target_os = "linux"))]
245 {
246 None
247 }
248 }
249
250 fn get_available_memory(&self) -> Option<usize> {
251 #[cfg(target_os = "linux")]
252 {
253 std::fs::read_to_string("/proc/meminfo")
254 .ok()
255 .and_then(|contents| {
256 contents.lines()
257 .find(|line| line.starts_with("MemAvailable:"))
258 .and_then(|line| {
259 line.split_whitespace()
260 .nth(1)
261 .and_then(|s| s.parse::<usize>().ok())
262 .map(|kb| kb * 1024)
263 })
264 })
265 }
266 #[cfg(not(target_os = "linux"))]
267 {
268 None
269 }
270 }
271
272 fn verify_feature_usage(&self, capabilities: &HardwareCapabilities) -> FeatureUsage {
274 let mut detected_simd = Vec::new();
275
276 if capabilities.simd_features.avx512f {
278 detected_simd.push("AVX-512".to_string());
279 } else if capabilities.simd_features.avx2 {
280 detected_simd.push("AVX2".to_string());
281 } else if capabilities.simd_features.avx {
282 detected_simd.push("AVX".to_string());
283 } else if capabilities.simd_features.sse4_2 {
284 detected_simd.push("SSE4.2".to_string());
285 }
286
287 if capabilities.simd_features.fma {
288 detected_simd.push("FMA".to_string());
289 }
290
291 FeatureUsage {
292 detected_simd_usage: detected_simd,
293 memory_alignment_verified: self.verify_memory_alignment(),
294 cache_friendly_access: self.verify_cache_friendly_access(),
295 thread_affinity_set: self.verify_thread_affinity(),
296 }
297 }
298
299 fn verify_memory_alignment(&self) -> bool {
300 let test_data = vec![1.0f32; 32];
302 let ptr = test_data.as_ptr() as usize;
303
304 ptr % 32 == 0
306 }
307
308 fn verify_cache_friendly_access(&self) -> bool {
309 let size = 1024; let data = vec![1.0f32; size];
312
313 let start = std::time::Instant::now();
314
315 let mut sum = 0.0f32;
317 for &val in &data {
318 sum += val;
319 }
320
321 let sequential_time = start.elapsed();
322
323 let start = std::time::Instant::now();
325 let mut sum2 = 0.0f32;
326 for i in (0..size).step_by(64) { sum2 += data[i];
328 }
329 let random_time = start.elapsed();
330
331 sequential_time < random_time || random_time.as_nanos() < 1000
333 }
334
335 fn verify_thread_affinity(&self) -> bool {
336 core_affinity::get_core_ids().is_some()
338 }
339
340 fn establish_baseline(&mut self, capabilities: &HardwareCapabilities) -> PerformanceBaseline {
342 let memory_bandwidth = self.measure_memory_bandwidth();
343 let cpu_frequency = self.estimate_cpu_frequency();
344 let cache_latencies = self.measure_cache_latencies();
345
346 let baseline = PerformanceBaseline {
347 memory_bandwidth_gb_s: memory_bandwidth,
348 cpu_frequency_ghz: cpu_frequency,
349 cache_latency_ns: cache_latencies,
350 baseline_established: memory_bandwidth > 0.0 && cpu_frequency > 0.0,
351 };
352
353 self.baseline_measurements = Some(baseline.clone());
354 baseline
355 }
356
357 fn measure_memory_bandwidth(&self) -> f64 {
358 let size = 1024 * 1024; let data = vec![1u64; size];
361 let iterations = 100;
362
363 let start = std::time::Instant::now();
364
365 for _ in 0..iterations {
366 let sum: u64 = data.iter().sum();
367 std::hint::black_box(sum); }
369
370 let elapsed = start.elapsed();
371 let bytes_processed = size * iterations * 8; let seconds = elapsed.as_secs_f64();
373
374 if seconds > 0.0 {
375 (bytes_processed as f64) / seconds / 1e9 } else {
377 0.0
378 }
379 }
380
381 fn estimate_cpu_frequency(&self) -> f64 {
382 let iterations = 1_000_000;
384 let start = std::time::Instant::now();
385
386 let mut x = 1.0f64;
387 for _ in 0..iterations {
388 x = x.sin().cos(); }
390 std::hint::black_box(x);
391
392 let elapsed = start.elapsed();
393 let ops_per_second = iterations as f64 / elapsed.as_secs_f64();
394
395 ops_per_second / 1e9 }
398
399 fn measure_cache_latencies(&self) -> HashMap<String, f64> {
400 let mut latencies = HashMap::new();
401
402 let l1_latency = self.measure_latency_for_size(4 * 1024); latencies.insert("L1".to_string(), l1_latency);
405
406 let l2_latency = self.measure_latency_for_size(128 * 1024); latencies.insert("L2".to_string(), l2_latency);
409
410 let l3_latency = self.measure_latency_for_size(4 * 1024 * 1024); latencies.insert("L3".to_string(), l3_latency);
413
414 let mem_latency = self.measure_latency_for_size(64 * 1024 * 1024); latencies.insert("Memory".to_string(), mem_latency);
417
418 latencies
419 }
420
421 fn measure_latency_for_size(&self, size: usize) -> f64 {
422 let data = vec![0u8; size];
423 let iterations = 1000;
424 let stride = 64; let start = std::time::Instant::now();
427
428 let mut index = 0;
429 let mut sum = 0u8;
430 for _ in 0..iterations {
431 for _ in 0..(size / stride) {
432 sum = sum.wrapping_add(data[index]);
433 index = (index + stride) % size;
434 }
435 }
436 std::hint::black_box(sum);
437
438 let elapsed = start.elapsed();
439 elapsed.as_nanos() as f64 / (iterations * size / stride) as f64
440 }
441
442 pub fn generate_report(&self, verification: &HardwareVerification) -> String {
444 let mut report = String::new();
445
446 report.push_str(&format!("\n{}\n", "=".repeat(60)));
447 report.push_str("HARDWARE VERIFICATION REPORT\n");
448 report.push_str(&format!("{}\n", "=".repeat(60)));
449
450 let caps = &verification.capabilities;
451 report.push_str(&format!("CPU: {} ({})\n", caps.cpu_brand, caps.cpu_vendor));
452 report.push_str(&format!("Cores: {} physical, {} threads\n", caps.cpu_cores, caps.cpu_threads));
453
454 report.push_str("\nš¾ CACHE HIERARCHY:\n");
455 if let Some(l1) = caps.cache_sizes.l1_data {
456 report.push_str(&format!("⢠L1 Data: {} KB\n", l1 / 1024));
457 }
458 if let Some(l2) = caps.cache_sizes.l2 {
459 report.push_str(&format!("⢠L2: {} KB\n", l2 / 1024));
460 }
461 if let Some(l3) = caps.cache_sizes.l3 {
462 report.push_str(&format!("⢠L3: {} MB\n", l3 / (1024 * 1024)));
463 }
464
465 report.push_str("\nš SIMD FEATURES:\n");
466 let simd = &caps.simd_features;
467 if simd.avx512f { report.push_str("⢠ā
AVX-512\n"); }
468 if simd.avx2 { report.push_str("⢠ā
AVX2\n"); }
469 if simd.avx { report.push_str("⢠ā
AVX\n"); }
470 if simd.fma { report.push_str("⢠ā
FMA\n"); }
471 if simd.sse4_2 { report.push_str("⢠ā
SSE4.2\n"); }
472
473 report.push_str("\nš PERFORMANCE BASELINE:\n");
474 let baseline = &verification.performance_baseline;
475 report.push_str(&format!("⢠Memory Bandwidth: {:.2} GB/s\n", baseline.memory_bandwidth_gb_s));
476 report.push_str(&format!("⢠CPU Frequency: {:.2} GHz (estimated)\n", baseline.cpu_frequency_ghz));
477
478 report.push_str("\nā” FEATURE USAGE:\n");
479 let usage = &verification.feature_usage;
480 for feature in &usage.detected_simd_usage {
481 report.push_str(&format!("⢠ā
{} detected\n", feature));
482 }
483 if usage.memory_alignment_verified {
484 report.push_str("⢠ā
Memory alignment verified\n");
485 }
486 if usage.cache_friendly_access {
487 report.push_str("⢠ā
Cache-friendly access patterns\n");
488 }
489
490 if !verification.warnings.is_empty() {
491 report.push_str("\nā ļø WARNINGS:\n");
492 for warning in &verification.warnings {
493 report.push_str(&format!("⢠{}\n", warning));
494 }
495 }
496
497 report.push_str(&format!("\nšÆ HARDWARE VALIDATION: {}\n",
498 if verification.validation_passed { "ā
PASSED" } else { "ā FAILED" }));
499
500 report
501 }
502}
503
504#[cfg(test)]
505mod tests {
506 use super::*;
507
508 #[test]
509 fn test_hardware_detection() {
510 let mut validator = HardwareValidator::new();
511 let verification = validator.verify_hardware();
512
513 println!("{}", validator.generate_report(&verification));
514
515 assert!(!verification.capabilities.cpu_vendor.is_empty());
516 assert!(verification.capabilities.cpu_cores > 0);
517 }
518
519 #[test]
520 fn test_memory_bandwidth() {
521 let validator = HardwareValidator::new();
522 let bandwidth = validator.measure_memory_bandwidth();
523
524 println!("Memory bandwidth: {:.2} GB/s", bandwidth);
525 assert!(bandwidth > 0.0);
526 }
527
528 #[test]
529 fn test_cache_latencies() {
530 let validator = HardwareValidator::new();
531 let latencies = validator.measure_cache_latencies();
532
533 for (cache, latency) in latencies {
534 println!("{} latency: {:.2} ns", cache, latency);
535 }
536 }
537}