use crate::TorshResult;
use std::collections::HashMap;
use std::time::Instant;
#[derive(Debug)]
pub struct HardwareBenchmark {
system_info: SystemInfo,
benchmark_cache: HashMap<String, f64>,
feature_support: HashMap<String, bool>,
}
#[derive(Debug, Clone)]
pub struct SystemInfo {
pub cpu_info: CpuInfo,
pub memory_info: MemoryInfo,
pub cache_info: CacheInfo,
pub hardware_features: Vec<String>,
pub os_info: String,
}
#[derive(Debug, Clone)]
pub struct CpuInfo {
pub physical_cores: usize,
pub logical_cores: usize,
pub base_frequency_mhz: f64,
pub architecture: String,
pub instruction_sets: Vec<String>,
}
#[derive(Debug, Clone)]
pub struct MemoryInfo {
pub total_memory: usize,
pub available_memory: usize,
pub memory_bandwidth_gbps: f64,
}
#[derive(Debug, Clone)]
pub struct CacheInfo {
pub l1_cache_size: usize,
pub l2_cache_size: usize,
pub l3_cache_size: usize,
pub cache_line_size: usize,
}
#[derive(Debug, Clone)]
pub struct SystemCapabilityReport {
pub system_info: SystemInfo,
pub capability_scores: HashMap<String, f64>,
pub recommendations: Vec<String>,
pub benchmark_timestamp: std::time::SystemTime,
}
impl Default for HardwareBenchmark {
fn default() -> Self {
Self::new()
}
}
impl HardwareBenchmark {
pub fn new() -> Self {
let system_info = SystemInfo::detect();
Self {
system_info,
benchmark_cache: HashMap::new(),
feature_support: HashMap::new(),
}
}
pub fn analyze_system_capabilities(&mut self) -> TorshResult<SystemCapabilityReport> {
let mut capabilities = HashMap::new();
capabilities.insert(
"cpu_compute_score".to_string(),
self.benchmark_cpu_compute()?,
);
capabilities.insert(
"memory_bandwidth_score".to_string(),
self.benchmark_memory_bandwidth()?,
);
capabilities.insert(
"cache_efficiency_score".to_string(),
self.benchmark_cache_efficiency()?,
);
if self.detect_simd_support() {
capabilities.insert(
"simd_acceleration_score".to_string(),
self.benchmark_simd_performance()?,
);
}
if self.detect_numa_support() {
capabilities.insert(
"numa_efficiency_score".to_string(),
self.benchmark_numa_performance()?,
);
}
let recommendations = self.generate_hardware_recommendations(&capabilities);
Ok(SystemCapabilityReport {
system_info: self.system_info.clone(),
capability_scores: capabilities,
recommendations,
benchmark_timestamp: std::time::SystemTime::now(),
})
}
pub fn benchmark_cpu_compute(&mut self) -> TorshResult<f64> {
let cache_key = "cpu_compute".to_string();
if let Some(&cached_score) = self.benchmark_cache.get(&cache_key) {
return Ok(cached_score);
}
let start = Instant::now();
let iterations = 1_000_000;
let mut sum = 0.0;
for i in 0..iterations {
sum += (i as f64).sqrt().sin().cos();
}
let duration = start.elapsed();
let score = (iterations as f64 / duration.as_secs_f64()) / 1_000_000.0;
std::hint::black_box(sum);
self.benchmark_cache.insert(cache_key, score);
Ok(score)
}
pub fn benchmark_memory_bandwidth(&mut self) -> TorshResult<f64> {
let cache_key = "memory_bandwidth".to_string();
if let Some(&cached_score) = self.benchmark_cache.get(&cache_key) {
return Ok(cached_score);
}
let size = 10_000_000; let data: Vec<f64> = (0..size).map(|i| i as f64).collect();
let start = Instant::now();
let sum: f64 = data.iter().sum();
let duration = start.elapsed();
let bytes_processed = size * std::mem::size_of::<f64>();
let bandwidth_gbps =
(bytes_processed as f64 / duration.as_secs_f64()) / (1024.0 * 1024.0 * 1024.0);
std::hint::black_box(sum);
self.benchmark_cache.insert(cache_key, bandwidth_gbps);
Ok(bandwidth_gbps)
}
pub fn benchmark_cache_efficiency(&mut self) -> TorshResult<f64> {
let cache_key = "cache_efficiency".to_string();
if let Some(&cached_score) = self.benchmark_cache.get(&cache_key) {
return Ok(cached_score);
}
let size = 1_000_000;
let data: Vec<f64> = vec![1.0; size];
let start_sequential = Instant::now();
let mut sum_sequential = 0.0;
for &value in &data {
sum_sequential += value;
}
let sequential_time = start_sequential.elapsed();
let start_random = Instant::now();
let mut sum_random = 0.0;
for i in 0..size {
let index = (i * 7) % size; sum_random += data[index];
}
let random_time = start_random.elapsed();
let efficiency_score = sequential_time.as_secs_f64() / random_time.as_secs_f64();
std::hint::black_box((sum_sequential, sum_random));
self.benchmark_cache.insert(cache_key, efficiency_score);
Ok(efficiency_score)
}
fn detect_simd_support(&mut self) -> bool {
let feature = "simd_support".to_string();
if let Some(&cached) = self.feature_support.get(&feature) {
return cached;
}
let supported = cfg!(target_feature = "sse2")
|| cfg!(target_feature = "avx")
|| cfg!(target_feature = "neon");
self.feature_support.insert(feature, supported);
supported
}
fn detect_numa_support(&mut self) -> bool {
let feature = "numa_support".to_string();
if let Some(&cached) = self.feature_support.get(&feature) {
return cached;
}
let supported = self.system_info.cpu_info.physical_cores > 4;
self.feature_support.insert(feature, supported);
supported
}
fn benchmark_simd_performance(&mut self) -> TorshResult<f64> {
let cache_key = "simd_performance".to_string();
if let Some(&cached_score) = self.benchmark_cache.get(&cache_key) {
return Ok(cached_score);
}
let score = if self.detect_simd_support() {
0.8 } else {
0.2 };
self.benchmark_cache.insert(cache_key, score);
Ok(score)
}
fn benchmark_numa_performance(&mut self) -> TorshResult<f64> {
let cache_key = "numa_performance".to_string();
if let Some(&cached_score) = self.benchmark_cache.get(&cache_key) {
return Ok(cached_score);
}
let score = if self.detect_numa_support() {
0.7 } else {
0.9 };
self.benchmark_cache.insert(cache_key, score);
Ok(score)
}
fn generate_hardware_recommendations(
&self,
capabilities: &HashMap<String, f64>,
) -> Vec<String> {
let mut recommendations = Vec::new();
if let Some(&cpu_score) = capabilities.get("cpu_compute_score") {
if cpu_score > 2.0 {
recommendations.push(
"High CPU performance detected - consider CPU-intensive algorithms".to_string(),
);
} else if cpu_score < 0.5 {
recommendations.push(
"Limited CPU performance - prefer memory-efficient algorithms".to_string(),
);
}
}
if let Some(&memory_score) = capabilities.get("memory_bandwidth_score") {
if memory_score > 10.0 {
recommendations.push(
"High memory bandwidth available - streaming algorithms recommended"
.to_string(),
);
} else if memory_score < 2.0 {
recommendations
.push("Limited memory bandwidth - minimize memory access patterns".to_string());
}
}
if let Some(&cache_score) = capabilities.get("cache_efficiency_score") {
if cache_score > 0.8 {
recommendations
.push("Excellent cache performance - leverage block algorithms".to_string());
} else if cache_score < 0.3 {
recommendations.push(
"Poor cache performance - consider cache-oblivious algorithms".to_string(),
);
}
}
if capabilities.contains_key("simd_acceleration_score") {
recommendations
.push("SIMD support detected - enable vectorized operations".to_string());
}
if capabilities.contains_key("numa_efficiency_score") {
recommendations
.push("NUMA system detected - consider thread affinity optimization".to_string());
}
recommendations
}
}
impl SystemInfo {
pub fn detect() -> Self {
Self {
cpu_info: CpuInfo::detect(),
memory_info: MemoryInfo::detect(),
cache_info: CacheInfo::detect(),
hardware_features: Self::detect_hardware_features(),
os_info: Self::detect_os_info(),
}
}
fn detect_hardware_features() -> Vec<String> {
#[allow(unused_mut)]
let mut features = Vec::new();
#[cfg(target_feature = "sse2")]
features.push("SSE2".to_string());
#[cfg(target_feature = "avx")]
features.push("AVX".to_string());
#[cfg(target_feature = "avx2")]
features.push("AVX2".to_string());
#[cfg(target_feature = "fma")]
features.push("FMA".to_string());
features
}
fn detect_os_info() -> String {
format!("{}", std::env::consts::OS)
}
}
impl CpuInfo {
pub fn detect() -> Self {
Self {
physical_cores: 4, logical_cores: 8, base_frequency_mhz: 2400.0, architecture: std::env::consts::ARCH.to_string(),
instruction_sets: vec!["x86_64".to_string()], }
}
}
impl MemoryInfo {
pub fn detect() -> Self {
Self {
total_memory: 16 * 1024 * 1024 * 1024, available_memory: 8 * 1024 * 1024 * 1024, memory_bandwidth_gbps: 25.6, }
}
}
impl CacheInfo {
pub fn detect() -> Self {
Self {
l1_cache_size: 32 * 1024, l2_cache_size: 256 * 1024, l3_cache_size: 8 * 1024 * 1024, cache_line_size: 64, }
}
}