temporal_neural_solver/benchmarks/
hardware_verification.rs

1//! Hardware verification and capability detection
2//!
3//! This module ensures that performance measurements are consistent
4//! across different hardware configurations and verifies the use
5//! of specific CPU features.
6
7use std::collections::HashMap;
8use serde::{Serialize, Deserialize};
9use std::time::{SystemTime, UNIX_EPOCH};
10
11/// Hardware capabilities detected on the system
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct HardwareCapabilities {
14    pub cpu_vendor: String,
15    pub cpu_brand: String,
16    pub cpu_cores: usize,
17    pub cpu_threads: usize,
18    pub cache_sizes: CacheSizes,
19    pub simd_features: SimdFeatures,
20    pub memory_info: MemoryInfo,
21    pub timestamp: u64,
22}
23
24#[derive(Debug, Clone, Serialize, Deserialize)]
25pub struct CacheSizes {
26    pub l1_data: Option<usize>,
27    pub l1_instruction: Option<usize>,
28    pub l2: Option<usize>,
29    pub l3: Option<usize>,
30}
31
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct SimdFeatures {
34    pub sse: bool,
35    pub sse2: bool,
36    pub sse3: bool,
37    pub ssse3: bool,
38    pub sse4_1: bool,
39    pub sse4_2: bool,
40    pub avx: bool,
41    pub avx2: bool,
42    pub avx512f: bool,
43    pub fma: bool,
44}
45
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct MemoryInfo {
48    pub total_memory: Option<usize>,
49    pub available_memory: Option<usize>,
50    pub page_size: Option<usize>,
51}
52
53/// Hardware verification result
54#[derive(Debug, Clone, Serialize, Deserialize)]
55pub struct HardwareVerification {
56    pub capabilities: HardwareCapabilities,
57    pub feature_usage: FeatureUsage,
58    pub performance_baseline: PerformanceBaseline,
59    pub warnings: Vec<String>,
60    pub validation_passed: bool,
61}
62
63#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct FeatureUsage {
65    pub detected_simd_usage: Vec<String>,
66    pub memory_alignment_verified: bool,
67    pub cache_friendly_access: bool,
68    pub thread_affinity_set: bool,
69}
70
71#[derive(Debug, Clone, Serialize, Deserialize)]
72pub struct PerformanceBaseline {
73    pub memory_bandwidth_gb_s: f64,
74    pub cpu_frequency_ghz: f64,
75    pub cache_latency_ns: HashMap<String, f64>,
76    pub baseline_established: bool,
77}
78
79/// Hardware validator and verifier
80pub struct HardwareValidator {
81    baseline_measurements: Option<PerformanceBaseline>,
82}
83
84impl Default for HardwareValidator {
85    fn default() -> Self {
86        Self::new()
87    }
88}
89
90impl HardwareValidator {
91    pub fn new() -> Self {
92        Self {
93            baseline_measurements: None,
94        }
95    }
96
97    /// Detect all hardware capabilities
98    pub fn detect_capabilities(&self) -> HardwareCapabilities {
99        HardwareCapabilities {
100            cpu_vendor: self.detect_cpu_vendor(),
101            cpu_brand: self.detect_cpu_brand(),
102            cpu_cores: self.detect_cpu_cores(),
103            cpu_threads: self.detect_cpu_threads(),
104            cache_sizes: self.detect_cache_sizes(),
105            simd_features: self.detect_simd_features(),
106            memory_info: self.detect_memory_info(),
107            timestamp: SystemTime::now()
108                .duration_since(UNIX_EPOCH)
109                .unwrap()
110                .as_secs(),
111        }
112    }
113
114    /// Verify hardware configuration and measure baselines
115    pub fn verify_hardware(&mut self) -> HardwareVerification {
116        let capabilities = self.detect_capabilities();
117        let mut warnings = Vec::new();
118
119        // Check for minimum requirements
120        if !capabilities.simd_features.avx2 {
121            warnings.push("AVX2 not available - performance may be suboptimal".to_string());
122        }
123
124        if capabilities.cpu_cores < 4 {
125            warnings.push("Less than 4 CPU cores detected - may affect performance".to_string());
126        }
127
128        if let Some(l3_size) = capabilities.cache_sizes.l3 {
129            if l3_size < 8 * 1024 * 1024 { // 8MB
130                warnings.push("Small L3 cache detected - may affect large workloads".to_string());
131            }
132        }
133
134        // Verify feature usage
135        let feature_usage = self.verify_feature_usage(&capabilities);
136
137        // Establish performance baseline
138        let baseline = self.establish_baseline(&capabilities);
139
140        let validation_passed = warnings.is_empty() &&
141                               feature_usage.detected_simd_usage.len() > 0 &&
142                               baseline.baseline_established;
143
144        HardwareVerification {
145            capabilities,
146            feature_usage,
147            performance_baseline: baseline,
148            warnings,
149            validation_passed,
150        }
151    }
152
153    fn detect_cpu_vendor(&self) -> String {
154        #[cfg(target_arch = "x86_64")]
155        {
156            if is_x86_feature_detected!("avx2") {
157                // Try to detect vendor through CPUID
158                // This is a simplified detection
159                "Unknown x86_64".to_string()
160            } else {
161                "x86_64 (limited features)".to_string()
162            }
163        }
164        #[cfg(not(target_arch = "x86_64"))]
165        {
166            std::env::consts::ARCH.to_string()
167        }
168    }
169
170    fn detect_cpu_brand(&self) -> String {
171        // In a real implementation, this would use CPUID
172        "Generic CPU".to_string()
173    }
174
175    fn detect_cpu_cores(&self) -> usize {
176        num_cpus::get_physical()
177    }
178
179    fn detect_cpu_threads(&self) -> usize {
180        num_cpus::get()
181    }
182
183    fn detect_cache_sizes(&self) -> CacheSizes {
184        // In a real implementation, this would query CPU cache info
185        CacheSizes {
186            l1_data: Some(32 * 1024),    // 32KB typical
187            l1_instruction: Some(32 * 1024),
188            l2: Some(256 * 1024),        // 256KB typical
189            l3: Some(8 * 1024 * 1024),   // 8MB typical
190        }
191    }
192
193    fn detect_simd_features(&self) -> SimdFeatures {
194        #[cfg(target_arch = "x86_64")]
195        {
196            SimdFeatures {
197                sse: is_x86_feature_detected!("sse"),
198                sse2: is_x86_feature_detected!("sse2"),
199                sse3: is_x86_feature_detected!("sse3"),
200                ssse3: is_x86_feature_detected!("ssse3"),
201                sse4_1: is_x86_feature_detected!("sse4.1"),
202                sse4_2: is_x86_feature_detected!("sse4.2"),
203                avx: is_x86_feature_detected!("avx"),
204                avx2: is_x86_feature_detected!("avx2"),
205                avx512f: is_x86_feature_detected!("avx512f"),
206                fma: is_x86_feature_detected!("fma"),
207            }
208        }
209        #[cfg(not(target_arch = "x86_64"))]
210        {
211            SimdFeatures {
212                sse: false, sse2: false, sse3: false, ssse3: false,
213                sse4_1: false, sse4_2: false, avx: false, avx2: false,
214                avx512f: false, fma: false,
215            }
216        }
217    }
218
219    fn detect_memory_info(&self) -> MemoryInfo {
220        MemoryInfo {
221            total_memory: self.get_total_memory(),
222            available_memory: self.get_available_memory(),
223            page_size: Some(4096), // 4KB pages typical
224        }
225    }
226
227    fn get_total_memory(&self) -> Option<usize> {
228        // Platform-specific memory detection
229        #[cfg(target_os = "linux")]
230        {
231            std::fs::read_to_string("/proc/meminfo")
232                .ok()
233                .and_then(|contents| {
234                    contents.lines()
235                        .find(|line| line.starts_with("MemTotal:"))
236                        .and_then(|line| {
237                            line.split_whitespace()
238                                .nth(1)
239                                .and_then(|s| s.parse::<usize>().ok())
240                                .map(|kb| kb * 1024) // Convert KB to bytes
241                        })
242                })
243        }
244        #[cfg(not(target_os = "linux"))]
245        {
246            None
247        }
248    }
249
250    fn get_available_memory(&self) -> Option<usize> {
251        #[cfg(target_os = "linux")]
252        {
253            std::fs::read_to_string("/proc/meminfo")
254                .ok()
255                .and_then(|contents| {
256                    contents.lines()
257                        .find(|line| line.starts_with("MemAvailable:"))
258                        .and_then(|line| {
259                            line.split_whitespace()
260                                .nth(1)
261                                .and_then(|s| s.parse::<usize>().ok())
262                                .map(|kb| kb * 1024)
263                        })
264                })
265        }
266        #[cfg(not(target_os = "linux"))]
267        {
268            None
269        }
270    }
271
272    /// Verify that optimizations are actually being used
273    fn verify_feature_usage(&self, capabilities: &HardwareCapabilities) -> FeatureUsage {
274        let mut detected_simd = Vec::new();
275
276        // Check which SIMD features are available and likely being used
277        if capabilities.simd_features.avx512f {
278            detected_simd.push("AVX-512".to_string());
279        } else if capabilities.simd_features.avx2 {
280            detected_simd.push("AVX2".to_string());
281        } else if capabilities.simd_features.avx {
282            detected_simd.push("AVX".to_string());
283        } else if capabilities.simd_features.sse4_2 {
284            detected_simd.push("SSE4.2".to_string());
285        }
286
287        if capabilities.simd_features.fma {
288            detected_simd.push("FMA".to_string());
289        }
290
291        FeatureUsage {
292            detected_simd_usage: detected_simd,
293            memory_alignment_verified: self.verify_memory_alignment(),
294            cache_friendly_access: self.verify_cache_friendly_access(),
295            thread_affinity_set: self.verify_thread_affinity(),
296        }
297    }
298
299    fn verify_memory_alignment(&self) -> bool {
300        // Test memory alignment for SIMD operations
301        let test_data = vec![1.0f32; 32];
302        let ptr = test_data.as_ptr() as usize;
303
304        // Check if aligned to 32-byte boundary (AVX2 requirement)
305        ptr % 32 == 0
306    }
307
308    fn verify_cache_friendly_access(&self) -> bool {
309        // Simple cache-friendly access pattern test
310        let size = 1024; // 4KB = typical page size
311        let data = vec![1.0f32; size];
312
313        let start = std::time::Instant::now();
314
315        // Sequential access (cache-friendly)
316        let mut sum = 0.0f32;
317        for &val in &data {
318            sum += val;
319        }
320
321        let sequential_time = start.elapsed();
322
323        // Random access (cache-unfriendly)
324        let start = std::time::Instant::now();
325        let mut sum2 = 0.0f32;
326        for i in (0..size).step_by(64) { // Jump by cache line size
327            sum2 += data[i];
328        }
329        let random_time = start.elapsed();
330
331        // Cache-friendly should be significantly faster
332        sequential_time < random_time || random_time.as_nanos() < 1000
333    }
334
335    fn verify_thread_affinity(&self) -> bool {
336        // Check if thread affinity can be set (indicates scheduler control)
337        core_affinity::get_core_ids().is_some()
338    }
339
340    /// Establish performance baselines for the hardware
341    fn establish_baseline(&mut self, capabilities: &HardwareCapabilities) -> PerformanceBaseline {
342        let memory_bandwidth = self.measure_memory_bandwidth();
343        let cpu_frequency = self.estimate_cpu_frequency();
344        let cache_latencies = self.measure_cache_latencies();
345
346        let baseline = PerformanceBaseline {
347            memory_bandwidth_gb_s: memory_bandwidth,
348            cpu_frequency_ghz: cpu_frequency,
349            cache_latency_ns: cache_latencies,
350            baseline_established: memory_bandwidth > 0.0 && cpu_frequency > 0.0,
351        };
352
353        self.baseline_measurements = Some(baseline.clone());
354        baseline
355    }
356
357    fn measure_memory_bandwidth(&self) -> f64 {
358        // Simple memory bandwidth test
359        let size = 1024 * 1024; // 1MB
360        let data = vec![1u64; size];
361        let iterations = 100;
362
363        let start = std::time::Instant::now();
364
365        for _ in 0..iterations {
366            let sum: u64 = data.iter().sum();
367            std::hint::black_box(sum); // Prevent optimization
368        }
369
370        let elapsed = start.elapsed();
371        let bytes_processed = size * iterations * 8; // 8 bytes per u64
372        let seconds = elapsed.as_secs_f64();
373
374        if seconds > 0.0 {
375            (bytes_processed as f64) / seconds / 1e9 // GB/s
376        } else {
377            0.0
378        }
379    }
380
381    fn estimate_cpu_frequency(&self) -> f64 {
382        // Estimate CPU frequency using a compute-intensive loop
383        let iterations = 1_000_000;
384        let start = std::time::Instant::now();
385
386        let mut x = 1.0f64;
387        for _ in 0..iterations {
388            x = x.sin().cos(); // Floating point intensive
389        }
390        std::hint::black_box(x);
391
392        let elapsed = start.elapsed();
393        let ops_per_second = iterations as f64 / elapsed.as_secs_f64();
394
395        // Very rough estimate - assumes certain ops per clock
396        ops_per_second / 1e9 // Rough GHz estimate
397    }
398
399    fn measure_cache_latencies(&self) -> HashMap<String, f64> {
400        let mut latencies = HashMap::new();
401
402        // L1 cache test (should fit in L1)
403        let l1_latency = self.measure_latency_for_size(4 * 1024); // 4KB
404        latencies.insert("L1".to_string(), l1_latency);
405
406        // L2 cache test
407        let l2_latency = self.measure_latency_for_size(128 * 1024); // 128KB
408        latencies.insert("L2".to_string(), l2_latency);
409
410        // L3 cache test
411        let l3_latency = self.measure_latency_for_size(4 * 1024 * 1024); // 4MB
412        latencies.insert("L3".to_string(), l3_latency);
413
414        // Main memory test
415        let mem_latency = self.measure_latency_for_size(64 * 1024 * 1024); // 64MB
416        latencies.insert("Memory".to_string(), mem_latency);
417
418        latencies
419    }
420
421    fn measure_latency_for_size(&self, size: usize) -> f64 {
422        let data = vec![0u8; size];
423        let iterations = 1000;
424        let stride = 64; // Cache line size
425
426        let start = std::time::Instant::now();
427
428        let mut index = 0;
429        let mut sum = 0u8;
430        for _ in 0..iterations {
431            for _ in 0..(size / stride) {
432                sum = sum.wrapping_add(data[index]);
433                index = (index + stride) % size;
434            }
435        }
436        std::hint::black_box(sum);
437
438        let elapsed = start.elapsed();
439        elapsed.as_nanos() as f64 / (iterations * size / stride) as f64
440    }
441
442    /// Generate detailed hardware report
443    pub fn generate_report(&self, verification: &HardwareVerification) -> String {
444        let mut report = String::new();
445
446        report.push_str(&format!("\n{}\n", "=".repeat(60)));
447        report.push_str("HARDWARE VERIFICATION REPORT\n");
448        report.push_str(&format!("{}\n", "=".repeat(60)));
449
450        let caps = &verification.capabilities;
451        report.push_str(&format!("CPU: {} ({})\n", caps.cpu_brand, caps.cpu_vendor));
452        report.push_str(&format!("Cores: {} physical, {} threads\n", caps.cpu_cores, caps.cpu_threads));
453
454        report.push_str("\nšŸ’¾ CACHE HIERARCHY:\n");
455        if let Some(l1) = caps.cache_sizes.l1_data {
456            report.push_str(&format!("• L1 Data: {} KB\n", l1 / 1024));
457        }
458        if let Some(l2) = caps.cache_sizes.l2 {
459            report.push_str(&format!("• L2: {} KB\n", l2 / 1024));
460        }
461        if let Some(l3) = caps.cache_sizes.l3 {
462            report.push_str(&format!("• L3: {} MB\n", l3 / (1024 * 1024)));
463        }
464
465        report.push_str("\nšŸš€ SIMD FEATURES:\n");
466        let simd = &caps.simd_features;
467        if simd.avx512f { report.push_str("• āœ… AVX-512\n"); }
468        if simd.avx2 { report.push_str("• āœ… AVX2\n"); }
469        if simd.avx { report.push_str("• āœ… AVX\n"); }
470        if simd.fma { report.push_str("• āœ… FMA\n"); }
471        if simd.sse4_2 { report.push_str("• āœ… SSE4.2\n"); }
472
473        report.push_str("\nšŸ“Š PERFORMANCE BASELINE:\n");
474        let baseline = &verification.performance_baseline;
475        report.push_str(&format!("• Memory Bandwidth: {:.2} GB/s\n", baseline.memory_bandwidth_gb_s));
476        report.push_str(&format!("• CPU Frequency: {:.2} GHz (estimated)\n", baseline.cpu_frequency_ghz));
477
478        report.push_str("\n⚔ FEATURE USAGE:\n");
479        let usage = &verification.feature_usage;
480        for feature in &usage.detected_simd_usage {
481            report.push_str(&format!("• āœ… {} detected\n", feature));
482        }
483        if usage.memory_alignment_verified {
484            report.push_str("• āœ… Memory alignment verified\n");
485        }
486        if usage.cache_friendly_access {
487            report.push_str("• āœ… Cache-friendly access patterns\n");
488        }
489
490        if !verification.warnings.is_empty() {
491            report.push_str("\nāš ļø  WARNINGS:\n");
492            for warning in &verification.warnings {
493                report.push_str(&format!("• {}\n", warning));
494            }
495        }
496
497        report.push_str(&format!("\nšŸŽÆ HARDWARE VALIDATION: {}\n",
498            if verification.validation_passed { "āœ… PASSED" } else { "āŒ FAILED" }));
499
500        report
501    }
502}
503
504#[cfg(test)]
505mod tests {
506    use super::*;
507
508    #[test]
509    fn test_hardware_detection() {
510        let mut validator = HardwareValidator::new();
511        let verification = validator.verify_hardware();
512
513        println!("{}", validator.generate_report(&verification));
514
515        assert!(!verification.capabilities.cpu_vendor.is_empty());
516        assert!(verification.capabilities.cpu_cores > 0);
517    }
518
519    #[test]
520    fn test_memory_bandwidth() {
521        let validator = HardwareValidator::new();
522        let bandwidth = validator.measure_memory_bandwidth();
523
524        println!("Memory bandwidth: {:.2} GB/s", bandwidth);
525        assert!(bandwidth > 0.0);
526    }
527
528    #[test]
529    fn test_cache_latencies() {
530        let validator = HardwareValidator::new();
531        let latencies = validator.measure_cache_latencies();
532
533        for (cache, latency) in latencies {
534            println!("{} latency: {:.2} ns", cache, latency);
535        }
536    }
537}