use crate::error::StatsResult;
use scirs2_core::ndarray::{ArrayView1, ArrayView2};
use scirs2_core::numeric::{Float, NumCast};
use scirs2_core::simd_ops::SimdUnifiedOps;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::{Arc, Mutex};
use std::time::{Duration, Instant};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AdaptiveSimdConfig {
pub auto_detect_hardware: bool,
pub enable_profiling: bool,
pub min_simdsize: usize,
pub cachesize: usize,
pub benchmark_samples: usize,
pub enable_hybrid_processing: bool,
pub alignment_requirements: SimdAlignment,
pub optimization_level: OptimizationLevel,
pub adaptive_vectorization: bool,
pub memory_bandwidth_optimization: bool,
}
impl Default for AdaptiveSimdConfig {
fn default() -> Self {
Self {
auto_detect_hardware: true,
enable_profiling: true,
min_simdsize: 64,
cachesize: 1000,
benchmark_samples: 10,
enable_hybrid_processing: false,
alignment_requirements: SimdAlignment::Optimal,
optimization_level: OptimizationLevel::Aggressive,
adaptive_vectorization: true,
memory_bandwidth_optimization: true,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum SimdAlignment {
None,
Basic,
Optimal,
Custom(usize),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum OptimizationLevel {
Conservative,
Balanced,
Aggressive,
Extreme,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HardwareCapabilities {
pub simd_instructions: Vec<SimdInstructionSet>,
pub vector_width: usize,
pub simd_units: usize,
pub cache_info: CacheHierarchy,
pub memory_bandwidth: f64,
pub cpu_architecture: CpuArchitecture,
pub gpu_available: bool,
pub gpu_capabilities: Option<GpuCapabilities>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum SimdInstructionSet {
SSE,
SSE2,
SSE3,
SSE41,
SSE42,
AVX,
AVX2,
AVX512,
NEON,
SVE,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CacheHierarchy {
pub l1size: usize,
pub l2size: usize,
pub l3size: usize,
pub cache_linesize: usize,
pub associativity: Vec<usize>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum CpuArchitecture {
X86,
X86_64,
ARM,
ARM64,
RISCV,
Other(String),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GpuCapabilities {
pub compute_units: usize,
pub gpu_memory: usize,
pub gpu_bandwidth: f64,
pub compute_apis: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SimdStrategy {
pub name: String,
pub instruction_set: SimdInstructionSet,
pub vector_width: usize,
pub memory_pattern: MemoryAccessPattern,
pub alignment: AlignmentStrategy,
pub unroll_factor: usize,
pub prefetch_strategy: PrefetchStrategy,
pub expected_speedup: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum MemoryAccessPattern {
Sequential,
Strided { stride: usize },
Random,
Blocked { blocksize: usize },
Tiled { tilesize: (usize, usize) },
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum AlignmentStrategy {
ForceAlign,
UnalignedLoads,
DynamicAlign,
CopyAlign,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum PrefetchStrategy {
None,
Software { distance: usize },
Hardware,
Adaptive,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SimdPerformanceMetrics {
pub execution_time: Duration,
pub throughput: f64,
pub bandwidth_utilization: f64,
pub cache_hit_rate: f64,
pub simd_efficiency: f64,
pub energy_efficiency: Option<f64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SimdOptimizationResult<T> {
pub result: T,
pub strategy_used: SimdStrategy,
pub metrics: SimdPerformanceMetrics,
pub success: bool,
pub fallback_info: Option<FallbackInfo>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FallbackInfo {
pub reason: String,
pub fallback_strategy: String,
pub performance_impact: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DataCharacteristics {
pub size: usize,
pub elementsize: usize,
pub alignment: usize,
pub access_pattern: MemoryAccessPattern,
pub locality_score: f64,
pub sparsity: Option<f64>,
pub value_distribution: ValueDistribution,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ValueDistribution {
pub value_range: (f64, f64),
pub has_special_values: bool,
pub clustering: ClusteringInfo,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ClusteringInfo {
pub cluster_count: usize,
pub density: f64,
pub separation: f64,
}
pub struct AdaptiveSimdOptimizer {
config: AdaptiveSimdConfig,
hardware_capabilities: HardwareCapabilities,
strategy_cache: Arc<Mutex<HashMap<String, SimdStrategy>>>,
performance_cache: Arc<Mutex<HashMap<String, SimdPerformanceMetrics>>>,
benchmark_results: Arc<Mutex<HashMap<String, Vec<SimdPerformanceMetrics>>>>,
}
impl AdaptiveSimdOptimizer {
pub fn new(config: AdaptiveSimdConfig) -> StatsResult<Self> {
let hardware_capabilities = Self::detect_hardware_capabilities()?;
Ok(Self {
config,
hardware_capabilities,
strategy_cache: Arc::new(Mutex::new(HashMap::new())),
performance_cache: Arc::new(Mutex::new(HashMap::new())),
benchmark_results: Arc::new(Mutex::new(HashMap::new())),
})
}
pub fn default() -> StatsResult<Self> {
Self::new(AdaptiveSimdConfig::default())
}
pub fn optimize_vector_operation<F, T>(
&self,
operation_name: &str,
data: ArrayView1<F>,
operation: impl Fn(&ArrayView1<F>, &SimdStrategy) -> StatsResult<T> + Send + Sync,
) -> StatsResult<SimdOptimizationResult<T>>
where
F: Float + NumCast + SimdUnifiedOps + Send + Sync + std::fmt::Display,
T: Send + Sync + std::fmt::Display,
{
let data_characteristics = self.analyzedata_characteristics(&data)?;
let strategy = self.select_optimal_strategy(operation_name, &data_characteristics)?;
let start_time = Instant::now();
let result = operation(&data, &strategy);
let execution_time = start_time.elapsed();
match result {
Ok(value) => {
let metrics = self.calculate_performance_metrics(
&data_characteristics,
&strategy,
execution_time,
)?;
self.update_performance_cache(operation_name, &strategy, &metrics);
Ok(SimdOptimizationResult {
result: value,
strategy_used: strategy,
metrics,
success: true,
fallback_info: None,
})
}
Err(_e) => {
self.try_fallback_strategy(operation_name, data, operation, &strategy)
}
}
}
pub fn optimize_matrix_operation<F, T>(
&self,
operation_name: &str,
data: ArrayView2<F>,
operation: impl Fn(&ArrayView2<F>, &SimdStrategy) -> StatsResult<T> + Send + Sync,
) -> StatsResult<SimdOptimizationResult<T>>
where
F: Float + NumCast + SimdUnifiedOps + Send + Sync + std::fmt::Display,
T: Send + Sync + std::fmt::Display,
{
let data_characteristics = self.analyze_matrix_characteristics(&data)?;
let strategy =
self.select_optimal_matrix_strategy(operation_name, &data_characteristics)?;
let start_time = Instant::now();
let result = operation(&data, &strategy);
let execution_time = start_time.elapsed();
match result {
Ok(value) => {
let metrics = self.calculate_matrix_performance_metrics(
&data_characteristics,
&strategy,
execution_time,
)?;
self.update_performance_cache(operation_name, &strategy, &metrics);
Ok(SimdOptimizationResult {
result: value,
strategy_used: strategy,
metrics,
success: true,
fallback_info: None,
})
}
Err(_e) => {
self.try_matrix_fallback_strategy(operation_name, data, operation, &strategy)
}
}
}
fn detect_hardware_capabilities() -> StatsResult<HardwareCapabilities> {
let capabilities = HardwareCapabilities {
simd_instructions: vec![
SimdInstructionSet::SSE2,
SimdInstructionSet::AVX,
SimdInstructionSet::AVX2,
],
vector_width: 256, simd_units: 2,
cache_info: CacheHierarchy {
l1size: 32 * 1024, l2size: 256 * 1024, l3size: 8 * 1024 * 1024, cache_linesize: 64,
associativity: vec![8, 8, 16],
},
memory_bandwidth: 50.0, cpu_architecture: CpuArchitecture::X86_64,
gpu_available: false,
gpu_capabilities: None,
};
Ok(capabilities)
}
fn analyzedata_characteristics<F>(
&self,
data: &ArrayView1<F>,
) -> StatsResult<DataCharacteristics>
where
F: Float + NumCast + std::fmt::Display,
{
let size = data.len();
let elementsize = std::mem::size_of::<F>();
let alignment = (data.as_ptr() as usize) % 32;
let mut min_val = F::infinity();
let mut max_val = F::neg_infinity();
let mut has_special = false;
for &value in data.iter() {
if value.is_nan() || value.is_infinite() {
has_special = true;
} else {
if value < min_val {
min_val = value;
}
if value > max_val {
max_val = value;
}
}
}
let value_distribution = ValueDistribution {
value_range: (
min_val.to_f64().unwrap_or(0.0),
max_val.to_f64().unwrap_or(0.0),
),
has_special_values: has_special,
clustering: ClusteringInfo {
cluster_count: 1, density: 1.0,
separation: 0.0,
},
};
Ok(DataCharacteristics {
size,
elementsize,
alignment,
access_pattern: MemoryAccessPattern::Sequential,
locality_score: 1.0, sparsity: None,
value_distribution,
})
}
fn analyze_matrix_characteristics<F>(
&self,
data: &ArrayView2<F>,
) -> StatsResult<DataCharacteristics>
where
F: Float + NumCast + std::fmt::Display,
{
let size = data.len();
let elementsize = std::mem::size_of::<F>();
let access_pattern = if data.is_standard_layout() {
MemoryAccessPattern::Sequential
} else {
MemoryAccessPattern::Strided {
stride: data.strides()[0] as usize,
}
};
let zero_count = data.iter().filter(|&&x| x == F::zero()).count();
let sparsity = if size > 0 {
Some(zero_count as f64 / size as f64)
} else {
None
};
Ok(DataCharacteristics {
size,
elementsize,
alignment: (data.as_ptr() as usize) % 32,
access_pattern,
locality_score: if data.is_standard_layout() { 1.0 } else { 0.5 },
sparsity,
value_distribution: ValueDistribution {
value_range: (0.0, 1.0), has_special_values: false,
clustering: ClusteringInfo {
cluster_count: 1,
density: 1.0,
separation: 0.0,
},
},
})
}
fn select_optimal_strategy(
&self,
operation_name: &str,
characteristics: &DataCharacteristics,
) -> StatsResult<SimdStrategy> {
let cache_key = format!(
"{}_{}_{}",
operation_name, characteristics.size, characteristics.elementsize
);
if let Ok(cache) = self.strategy_cache.lock() {
if let Some(strategy) = cache.get(&cache_key) {
return Ok(strategy.clone());
}
}
let candidates = self.generate_candidate_strategies(characteristics)?;
let best_strategy = self.evaluate_strategies(&candidates, characteristics)?;
if let Ok(mut cache) = self.strategy_cache.lock() {
cache.insert(cache_key, best_strategy.clone());
if cache.len() > self.config.cachesize {
let oldest_key = cache.keys().next().cloned();
if let Some(key) = oldest_key {
cache.remove(&key);
}
}
}
Ok(best_strategy)
}
fn select_optimal_matrix_strategy(
&self,
operation_name: &str,
characteristics: &DataCharacteristics,
) -> StatsResult<SimdStrategy> {
let mut strategy = self.select_optimal_strategy(operation_name, characteristics)?;
if characteristics.size > 1000000 {
strategy.memory_pattern = MemoryAccessPattern::Tiled { tilesize: (64, 64) };
strategy.prefetch_strategy = PrefetchStrategy::Software { distance: 8 };
} else if matches!(
characteristics.access_pattern,
MemoryAccessPattern::Strided { .. }
) {
strategy.memory_pattern = MemoryAccessPattern::Blocked { blocksize: 256 };
}
Ok(strategy)
}
fn generate_candidate_strategies(
&self,
characteristics: &DataCharacteristics,
) -> StatsResult<Vec<SimdStrategy>> {
let mut candidates = Vec::new();
for instruction_set in &self.hardware_capabilities.simd_instructions {
let vector_width = match instruction_set {
SimdInstructionSet::SSE | SimdInstructionSet::SSE2 => 128,
SimdInstructionSet::AVX | SimdInstructionSet::AVX2 => 256,
SimdInstructionSet::AVX512 => 512,
SimdInstructionSet::NEON => 128,
_ => 128,
};
candidates.push(SimdStrategy {
name: format!("{:?}_conservative", instruction_set),
instruction_set: instruction_set.clone(),
vector_width,
memory_pattern: characteristics.access_pattern.clone(),
alignment: if characteristics.alignment == 0 {
AlignmentStrategy::ForceAlign
} else {
AlignmentStrategy::UnalignedLoads
},
unroll_factor: 2,
prefetch_strategy: PrefetchStrategy::None,
expected_speedup: 2.0,
});
if matches!(
self.config.optimization_level,
OptimizationLevel::Aggressive | OptimizationLevel::Extreme
) {
candidates.push(SimdStrategy {
name: format!("{:?}_aggressive", instruction_set),
instruction_set: instruction_set.clone(),
vector_width,
memory_pattern: characteristics.access_pattern.clone(),
alignment: AlignmentStrategy::DynamicAlign,
unroll_factor: 4,
prefetch_strategy: if characteristics.size > 10000 {
PrefetchStrategy::Software { distance: 4 }
} else {
PrefetchStrategy::None
},
expected_speedup: 4.0,
});
}
}
Ok(candidates)
}
fn evaluate_strategies(
&self,
candidates: &[SimdStrategy],
characteristics: &DataCharacteristics,
) -> StatsResult<SimdStrategy> {
let mut best_strategy = candidates[0].clone();
let mut best_score = 0.0;
for strategy in candidates {
let score = self.calculate_strategy_score(strategy, characteristics);
if score > best_score {
best_score = score;
best_strategy = strategy.clone();
}
}
Ok(best_strategy)
}
fn calculate_strategy_score(
&self,
strategy: &SimdStrategy,
characteristics: &DataCharacteristics,
) -> f64 {
let mut score = strategy.expected_speedup;
if characteristics.size < self.config.min_simdsize {
score *= 0.5; }
if characteristics.alignment == 0
&& matches!(strategy.alignment, AlignmentStrategy::ForceAlign)
{
score *= 1.2;
}
match &characteristics.access_pattern {
MemoryAccessPattern::Sequential => score *= 1.0,
MemoryAccessPattern::Strided { .. } => score *= 0.8,
MemoryAccessPattern::Random => score *= 0.5,
_ => score *= 0.7,
}
if self
.hardware_capabilities
.simd_instructions
.contains(&strategy.instruction_set)
{
score *= 1.5;
}
score
}
fn calculate_performance_metrics(
&self,
characteristics: &DataCharacteristics,
strategy: &SimdStrategy,
execution_time: Duration,
) -> StatsResult<SimdPerformanceMetrics> {
let throughput = characteristics.size as f64 / execution_time.as_secs_f64();
let bytes_processed = characteristics.size * characteristics.elementsize;
let bandwidth_used = bytes_processed as f64 / execution_time.as_secs_f64() / 1e9; let bandwidth_utilization = bandwidth_used / self.hardware_capabilities.memory_bandwidth;
let theoretical_max = strategy.vector_width / (characteristics.elementsize * 8); let actual_vectors = characteristics.size / theoretical_max;
let simd_efficiency = if actual_vectors > 0 {
characteristics.size as f64 / (actual_vectors * theoretical_max) as f64
} else {
0.0
};
Ok(SimdPerformanceMetrics {
execution_time,
throughput,
bandwidth_utilization: bandwidth_utilization.min(1.0),
cache_hit_rate: 0.9, simd_efficiency: simd_efficiency.min(1.0),
energy_efficiency: None, })
}
fn calculate_matrix_performance_metrics(
&self,
characteristics: &DataCharacteristics,
strategy: &SimdStrategy,
execution_time: Duration,
) -> StatsResult<SimdPerformanceMetrics> {
let mut metrics =
self.calculate_performance_metrics(characteristics, strategy, execution_time)?;
metrics.cache_hit_rate = match &characteristics.access_pattern {
MemoryAccessPattern::Sequential => 0.95,
MemoryAccessPattern::Strided { .. } => 0.8,
MemoryAccessPattern::Tiled { .. } => 0.9,
_ => 0.7,
};
Ok(metrics)
}
fn try_fallback_strategy<F, T>(
&self,
_operation_name: &str,
data: ArrayView1<F>,
operation: impl Fn(&ArrayView1<F>, &SimdStrategy) -> StatsResult<T> + Send + Sync,
failed_strategy: &SimdStrategy,
) -> StatsResult<SimdOptimizationResult<T>>
where
F: Float + NumCast + SimdUnifiedOps + Send + Sync + std::fmt::Display,
T: Send + Sync + std::fmt::Display,
{
let fallback_strategy = SimdStrategy {
name: "fallback_conservative".to_string(),
instruction_set: SimdInstructionSet::SSE2, vector_width: 128,
memory_pattern: MemoryAccessPattern::Sequential,
alignment: AlignmentStrategy::UnalignedLoads,
unroll_factor: 1,
prefetch_strategy: PrefetchStrategy::None,
expected_speedup: 1.0,
};
let start_time = Instant::now();
match operation(&data, &fallback_strategy) {
Ok(result) => {
let execution_time = start_time.elapsed();
let characteristics = self.analyzedata_characteristics(&data)?;
let metrics = self.calculate_performance_metrics(
&characteristics,
&fallback_strategy,
execution_time,
)?;
Ok(SimdOptimizationResult {
result,
strategy_used: fallback_strategy,
metrics,
success: true,
fallback_info: Some(FallbackInfo {
reason: format!("Primary _strategy '{}' failed", failed_strategy.name),
fallback_strategy: "conservative_sse2".to_string(),
performance_impact: 0.5, }),
})
}
Err(e) => Err(e),
}
}
fn try_matrix_fallback_strategy<F, T>(
&self,
_operation_name: &str,
data: ArrayView2<F>,
operation: impl Fn(&ArrayView2<F>, &SimdStrategy) -> StatsResult<T> + Send + Sync,
failed_strategy: &SimdStrategy,
) -> StatsResult<SimdOptimizationResult<T>>
where
F: Float + NumCast + SimdUnifiedOps + Send + Sync + std::fmt::Display,
T: Send + Sync + std::fmt::Display,
{
let fallback_strategy = SimdStrategy {
name: "matrix_fallback_conservative".to_string(),
instruction_set: SimdInstructionSet::SSE2,
vector_width: 128,
memory_pattern: MemoryAccessPattern::Sequential,
alignment: AlignmentStrategy::UnalignedLoads,
unroll_factor: 1,
prefetch_strategy: PrefetchStrategy::None,
expected_speedup: 1.0,
};
let start_time = Instant::now();
match operation(&data, &fallback_strategy) {
Ok(result) => {
let execution_time = start_time.elapsed();
let characteristics = self.analyze_matrix_characteristics(&data)?;
let metrics = self.calculate_matrix_performance_metrics(
&characteristics,
&fallback_strategy,
execution_time,
)?;
Ok(SimdOptimizationResult {
result,
strategy_used: fallback_strategy,
metrics,
success: true,
fallback_info: Some(FallbackInfo {
reason: format!(
"Primary matrix _strategy '{}' failed",
failed_strategy.name
),
fallback_strategy: "conservative_matrix_sse2".to_string(),
performance_impact: 0.6,
}),
})
}
Err(e) => Err(e),
}
}
fn update_performance_cache(
&self,
operation_name: &str,
strategy: &SimdStrategy,
metrics: &SimdPerformanceMetrics,
) {
if !self.config.enable_profiling {
return;
}
let cache_key = format!("{}_{}", operation_name, strategy.name);
if let Ok(mut cache) = self.performance_cache.lock() {
cache.insert(cache_key.clone(), metrics.clone());
}
if let Ok(mut benchmarks) = self.benchmark_results.lock() {
benchmarks
.entry(cache_key)
.or_insert_with(Vec::new)
.push(metrics.clone());
}
}
pub fn get_performance_statistics(&self) -> PerformanceStatistics {
let cache = self.performance_cache.lock().expect("Operation failed");
let _benchmarks = self.benchmark_results.lock().expect("Operation failed");
let total_operations = cache.len();
let avg_speedup = if !cache.is_empty() {
cache.values().map(|m| m.simd_efficiency).sum::<f64>() / cache.len() as f64
} else {
0.0
};
let best_strategies: Vec<(String, f64)> = cache
.iter()
.map(|(name, metrics)| (name.clone(), metrics.simd_efficiency))
.collect();
PerformanceStatistics {
total_operations,
average_speedup: avg_speedup,
best_strategies,
hardware_utilization: self.calculate_hardware_utilization(&cache),
}
}
fn calculate_hardware_utilization(
&self,
cache: &HashMap<String, SimdPerformanceMetrics>,
) -> HardwareUtilization {
let avg_bandwidth = if !cache.is_empty() {
cache.values().map(|m| m.bandwidth_utilization).sum::<f64>() / cache.len() as f64
} else {
0.0
};
let avg_cache_hit_rate = if !cache.is_empty() {
cache.values().map(|m| m.cache_hit_rate).sum::<f64>() / cache.len() as f64
} else {
0.0
};
HardwareUtilization {
simd_utilization: 0.8, memory_bandwidth_utilization: avg_bandwidth,
cache_efficiency: avg_cache_hit_rate,
energy_efficiency: None,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PerformanceStatistics {
pub total_operations: usize,
pub average_speedup: f64,
pub best_strategies: Vec<(String, f64)>,
pub hardware_utilization: HardwareUtilization,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HardwareUtilization {
pub simd_utilization: f64,
pub memory_bandwidth_utilization: f64,
pub cache_efficiency: f64,
pub energy_efficiency: Option<f64>,
}
#[allow(dead_code)]
pub fn create_adaptive_simd_optimizer() -> StatsResult<AdaptiveSimdOptimizer> {
AdaptiveSimdOptimizer::default()
}
#[allow(dead_code)]
pub fn optimize_simd_operation<F, T>(
operation_name: &str,
data: ArrayView1<F>,
operation: impl Fn(&ArrayView1<F>, &SimdStrategy) -> StatsResult<T> + Send + Sync,
) -> StatsResult<SimdOptimizationResult<T>>
where
F: Float + NumCast + SimdUnifiedOps + Send + Sync + std::fmt::Display,
T: Send + Sync + std::fmt::Display,
{
let optimizer = AdaptiveSimdOptimizer::default()?;
optimizer.optimize_vector_operation(operation_name, data, operation)
}
#[cfg(test)]
mod tests {
use super::*;
use scirs2_core::ndarray::array;
#[test]
fn test_adaptive_simd_config() {
let config = AdaptiveSimdConfig::default();
assert!(config.auto_detect_hardware);
assert!(config.enable_profiling);
assert!(config.min_simdsize > 0);
}
#[test]
fn test_hardware_detection() {
let capabilities =
AdaptiveSimdOptimizer::detect_hardware_capabilities().expect("Operation failed");
assert!(!capabilities.simd_instructions.is_empty());
assert!(capabilities.vector_width > 0);
}
#[test]
fn testdata_characteristics_analysis() {
let optimizer = AdaptiveSimdOptimizer::default().expect("Operation failed");
let data = array![1.0f64, 2.0, 3.0, 4.0, 5.0];
let characteristics = optimizer
.analyzedata_characteristics(&data.view())
.expect("Operation failed");
assert_eq!(characteristics.size, 5);
assert_eq!(characteristics.elementsize, 8); }
#[test]
fn test_strategy_generation() {
let optimizer = AdaptiveSimdOptimizer::default().expect("Operation failed");
let characteristics = DataCharacteristics {
size: 1000,
elementsize: 8,
alignment: 0,
access_pattern: MemoryAccessPattern::Sequential,
locality_score: 1.0,
sparsity: None,
value_distribution: ValueDistribution {
value_range: (0.0, 1.0),
has_special_values: false,
clustering: ClusteringInfo {
cluster_count: 1,
density: 1.0,
separation: 0.0,
},
},
};
let strategies = optimizer
.generate_candidate_strategies(&characteristics)
.expect("Operation failed");
assert!(!strategies.is_empty());
}
#[test]
fn test_strategy_selection() {
let optimizer = AdaptiveSimdOptimizer::default().expect("Operation failed");
let characteristics = DataCharacteristics {
size: 1000,
elementsize: 8,
alignment: 0,
access_pattern: MemoryAccessPattern::Sequential,
locality_score: 1.0,
sparsity: None,
value_distribution: ValueDistribution {
value_range: (0.0, 1.0),
has_special_values: false,
clustering: ClusteringInfo {
cluster_count: 1,
density: 1.0,
separation: 0.0,
},
},
};
let strategy = optimizer
.select_optimal_strategy("test_op", &characteristics)
.expect("Operation failed");
assert!(!strategy.name.is_empty());
assert!(strategy.expected_speedup > 0.0);
}
#[test]
fn test_performance_metrics_calculation() {
let optimizer = AdaptiveSimdOptimizer::default().expect("Operation failed");
let characteristics = DataCharacteristics {
size: 1000,
elementsize: 8,
alignment: 0,
access_pattern: MemoryAccessPattern::Sequential,
locality_score: 1.0,
sparsity: None,
value_distribution: ValueDistribution {
value_range: (0.0, 1.0),
has_special_values: false,
clustering: ClusteringInfo {
cluster_count: 1,
density: 1.0,
separation: 0.0,
},
},
};
let strategy = SimdStrategy {
name: "test_strategy".to_string(),
instruction_set: SimdInstructionSet::AVX2,
vector_width: 256,
memory_pattern: MemoryAccessPattern::Sequential,
alignment: AlignmentStrategy::ForceAlign,
unroll_factor: 2,
prefetch_strategy: PrefetchStrategy::None,
expected_speedup: 2.0,
};
let metrics = optimizer
.calculate_performance_metrics(&characteristics, &strategy, Duration::from_millis(10))
.expect("Operation failed");
assert!(metrics.throughput > 0.0);
assert!(metrics.simd_efficiency >= 0.0 && metrics.simd_efficiency <= 1.0);
}
}