quantrs2-sim 0.1.3

//! Memory prefetching and data locality optimizations for quantum simulations.
//!
//! This module implements advanced memory prefetching strategies, data locality
//! optimizations, and NUMA-aware memory management for high-performance quantum
//! circuit simulation with large state vectors.
use crate::error::Result;
use crate::memory_bandwidth_optimization::OptimizedStateVector;
use scirs2_core::parallel_ops::{IndexedParallelIterator, ParallelIterator};
use std::collections::{BTreeMap, HashMap, VecDeque};
use std::sync::{Arc, Mutex, RwLock};
use std::thread;
use std::time::{Duration, Instant};
/// Prefetching strategies for memory access optimization
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PrefetchStrategy {
    /// No prefetching
    None,
    /// Simple sequential prefetching
    Sequential,
    /// Stride-based prefetching
    Stride,
    /// Pattern-based prefetching
    Pattern,
    /// Machine learning guided prefetching
    MLGuided,
    /// Adaptive prefetching based on access patterns
    Adaptive,
    /// NUMA-aware prefetching
    NUMAAware,
}
/// Data locality optimization strategies
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum LocalityStrategy {
    /// Temporal locality optimization
    Temporal,
    /// Spatial locality optimization
    Spatial,
    /// Loop-based locality optimization
    Loop,
    /// Cache-conscious data placement
    CacheConscious,
    /// NUMA topology aware placement
    NUMATopology,
    /// Hybrid temporal-spatial optimization
    Hybrid,
}
/// NUMA topology information
#[derive(Debug, Clone)]
pub struct NUMATopology {
    /// Number of NUMA nodes
    pub num_nodes: usize,
    /// Memory size per node in bytes
    pub memory_per_node: Vec<usize>,
    /// CPU cores per node
    pub cores_per_node: Vec<usize>,
    /// Inter-node latency matrix (cycles)
    pub latency_matrix: Vec<Vec<usize>>,
    /// Memory bandwidth per node (bytes/sec)
    pub bandwidth_per_node: Vec<f64>,
    /// Current thread to node mapping
    pub thread_node_mapping: HashMap<usize, usize>,
}
impl Default for NUMATopology {
    fn default() -> Self {
        Self {
            num_nodes: 1,
            memory_per_node: vec![64 * 1024 * 1024 * 1024],
            cores_per_node: vec![8],
            latency_matrix: vec![vec![0]],
            bandwidth_per_node: vec![100.0 * 1024.0 * 1024.0 * 1024.0],
            thread_node_mapping: HashMap::new(),
        }
    }
}
/// Prefetching configuration
#[derive(Debug, Clone)]
pub struct PrefetchConfig {
    /// Primary prefetching strategy
    pub strategy: PrefetchStrategy,
    /// Prefetch distance (cache lines ahead)
    pub distance: usize,
    /// Prefetch degree (number of streams)
    pub degree: usize,
    /// Enable hardware prefetcher hints
    pub hardware_hints: bool,
    /// Prefetch threshold (minimum confidence)
    pub threshold: f64,
    /// Maximum prefetch queue size
    pub max_queue_size: usize,
    /// Enable cross-page prefetching
    pub cross_page_prefetch: bool,
    /// Adaptive prefetch adjustment
    pub adaptive_adjustment: bool,
}
impl Default for PrefetchConfig {
    fn default() -> Self {
        Self {
            strategy: PrefetchStrategy::Adaptive,
            distance: 8,
            degree: 4,
            hardware_hints: true,
            threshold: 0.7,
            max_queue_size: 64,
            cross_page_prefetch: true,
            adaptive_adjustment: true,
        }
    }
}
/// Memory access pattern predictor
#[derive(Debug)]
pub struct AccessPatternPredictor {
    /// Recent access history
    access_history: VecDeque<usize>,
    /// Detected stride patterns
    stride_patterns: HashMap<isize, u64>,
    /// Pattern confidence scores
    pattern_confidence: HashMap<String, f64>,
    /// Machine learning model weights (simplified)
    ml_weights: Vec<f64>,
    /// Prediction cache
    prediction_cache: HashMap<usize, Vec<usize>>,
    /// Statistics
    correct_predictions: u64,
    total_predictions: u64,
}
impl Default for AccessPatternPredictor {
    fn default() -> Self {
        Self {
            access_history: VecDeque::with_capacity(1000),
            stride_patterns: HashMap::new(),
            pattern_confidence: HashMap::new(),
            ml_weights: vec![0.5; 16],
            prediction_cache: HashMap::new(),
            correct_predictions: 0,
            total_predictions: 0,
        }
    }
}
impl AccessPatternPredictor {
    /// Record a memory access
    pub fn record_access(&mut self, address: usize) {
        self.access_history.push_back(address);
        if self.access_history.len() > 1000 {
            self.access_history.pop_front();
        }
        if self.access_history.len() >= 2 {
            let prev_addr = self.access_history[self.access_history.len() - 2];
            let stride = address as isize - prev_addr as isize;
            *self.stride_patterns.entry(stride).or_insert(0) += 1;
        }
        self.update_pattern_confidence();
    }
    /// Predict next memory accesses
    pub fn predict_next_accesses(&mut self, count: usize) -> Vec<usize> {
        if self.access_history.is_empty() {
            return Vec::new();
        }
        // Safety: We already checked access_history is not empty above
        let current_addr = *self
            .access_history
            .back()
            .expect("access_history is not empty (checked above)");
        if let Some(cached) = self.prediction_cache.get(&current_addr) {
            return cached.clone();
        }
        let predictions = match self.get_dominant_pattern() {
            PredictedPattern::Stride(stride) => {
                Self::predict_stride_pattern(current_addr, stride, count)
            }
            PredictedPattern::Sequential => Self::predict_sequential_pattern(current_addr, count),
            PredictedPattern::Random => Self::predict_random_pattern(current_addr, count),
            PredictedPattern::MLGuided => self.predict_ml_pattern(current_addr, count),
        };
        self.prediction_cache
            .insert(current_addr, predictions.clone());
        if self.prediction_cache.len() > 1000 {
            self.prediction_cache.clear();
        }
        self.total_predictions += 1;
        predictions
    }
    /// Update pattern confidence based on recent accuracy
    fn update_pattern_confidence(&mut self) {
        if self.total_predictions > 0 {
            let accuracy = self.correct_predictions as f64 / self.total_predictions as f64;
            self.pattern_confidence
                .insert("stride".to_string(), accuracy);
            self.pattern_confidence
                .insert("sequential".to_string(), accuracy * 0.9);
            self.pattern_confidence
                .insert("ml".to_string(), accuracy * 1.1);
        }
    }
    /// Get the dominant access pattern
    fn get_dominant_pattern(&self) -> PredictedPattern {
        let dominant_stride = self
            .stride_patterns
            .iter()
            .max_by_key(|(_, &count)| count)
            .map(|(&stride, _)| stride);
        match dominant_stride {
            Some(stride) if stride == 1 => PredictedPattern::Sequential,
            Some(stride) if stride != 0 => PredictedPattern::Stride(stride),
            _ => {
                let ml_confidence = self.pattern_confidence.get("ml").unwrap_or(&0.0);
                if *ml_confidence > 0.8 {
                    PredictedPattern::MLGuided
                } else {
                    PredictedPattern::Random
                }
            }
        }
    }
    /// Predict stride-based pattern
    fn predict_stride_pattern(current_addr: usize, stride: isize, count: usize) -> Vec<usize> {
        let mut predictions = Vec::with_capacity(count);
        let mut addr = current_addr;
        for _ in 0..count {
            addr = (addr as isize + stride) as usize;
            predictions.push(addr);
        }
        predictions
    }
    /// Predict sequential pattern
    fn predict_sequential_pattern(current_addr: usize, count: usize) -> Vec<usize> {
        (1..=count).map(|i| current_addr + i).collect()
    }
    /// Predict random pattern (simplified)
    fn predict_random_pattern(current_addr: usize, count: usize) -> Vec<usize> {
        (1..=count).map(|i| current_addr + i * 64).collect()
    }
    /// Predict using machine learning model
    fn predict_ml_pattern(&self, current_addr: usize, count: usize) -> Vec<usize> {
        let mut predictions = Vec::with_capacity(count);
        let features = self.extract_features();
        for i in 0..count {
            let prediction = self.ml_predict(&features, i);
            predictions.push((current_addr as f64 + prediction) as usize);
        }
        predictions
    }
    /// Extract features for ML prediction
    fn extract_features(&self) -> Vec<f64> {
        let mut features = [0.0; 16];
        if self.access_history.len() >= 4 {
            let recent: Vec<_> = self.access_history.iter().rev().take(4).collect();
            for i in 0..3 {
                if i + 1 < recent.len() {
                    let stride = *recent[i] as f64 - *recent[i + 1] as f64;
                    features[i] = stride / 1000.0;
                }
            }
            features[3] = (*recent[0] % 1024) as f64 / 1024.0;
            features[4] = (*recent[0] / 1024) as f64;
            let dominant_stride = self
                .stride_patterns
                .iter()
                .max_by_key(|(_, &count)| count)
                .map_or(0, |(&stride, _)| stride);
            features[5] = dominant_stride as f64 / 1000.0;
        }
        features.to_vec()
    }
    /// Simple ML prediction
    fn ml_predict(&self, features: &[f64], step: usize) -> f64 {
        let mut prediction = 0.0;
        for (i, &feature) in features.iter().enumerate() {
            if i < self.ml_weights.len() {
                prediction += feature * self.ml_weights[i];
            }
        }
        prediction * (step + 1) as f64
    }
    /// Update ML weights based on prediction accuracy
    pub fn update_ml_weights(&mut self, predictions: &[usize], actual: &[usize]) {
        if predictions.len() != actual.len() || predictions.is_empty() {
            return;
        }
        let learning_rate = 0.01;
        for (pred, &act) in predictions.iter().zip(actual.iter()) {
            let error = act as f64 - *pred as f64;
            for weight in &mut self.ml_weights {
                *weight += learning_rate * error * 0.1;
            }
        }
    }
    /// Get prediction accuracy
    #[must_use]
    pub fn get_accuracy(&self) -> f64 {
        if self.total_predictions > 0 {
            self.correct_predictions as f64 / self.total_predictions as f64
        } else {
            0.0
        }
    }
}
/// Predicted access pattern types
#[derive(Debug, Clone)]
enum PredictedPattern {
    Stride(isize),
    Sequential,
    Random,
    MLGuided,
}
/// Memory prefetching engine
#[derive(Debug)]
pub struct MemoryPrefetcher {
    /// Prefetch configuration
    config: PrefetchConfig,
    /// Access pattern predictor
    predictor: Arc<Mutex<AccessPatternPredictor>>,
    /// Prefetch queue
    prefetch_queue: Arc<Mutex<VecDeque<PrefetchRequest>>>,
    /// NUMA topology information
    numa_topology: NUMATopology,
    /// Prefetch statistics
    stats: Arc<RwLock<PrefetchStats>>,
    /// Active prefetch threads
    prefetch_threads: Vec<thread::JoinHandle<()>>,
}
/// Prefetch request
#[derive(Debug, Clone)]
pub struct PrefetchRequest {
    /// Memory address to prefetch
    pub address: usize,
    /// Prefetch priority (0.0 to 1.0)
    pub priority: f64,
    /// Prefetch hint type
    pub hint_type: PrefetchHint,
    /// Request timestamp
    pub timestamp: Instant,
}
/// Prefetch hint types
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PrefetchHint {
    /// Temporal hint - data will be reused soon
    Temporal,
    /// Non-temporal hint - data will not be reused
    NonTemporal,
    /// L1 cache hint
    L1,
    /// L2 cache hint
    L2,
    /// L3 cache hint
    L3,
    /// Write hint - data will be written
    Write,
}
/// Prefetch statistics
#[derive(Debug, Clone, Default)]
pub struct PrefetchStats {
    /// Total prefetch requests issued
    pub total_requests: u64,
    /// Successful prefetches (data was actually used)
    pub successful_prefetches: u64,
    /// Failed prefetches (data was not used)
    pub failed_prefetches: u64,
    /// Average prefetch latency
    pub average_latency: Duration,
    /// Memory bandwidth utilization
    pub bandwidth_utilization: f64,
    /// Cache hit rate improvement
    pub cache_hit_improvement: f64,
}
impl MemoryPrefetcher {
    /// Create a new memory prefetcher
    pub fn new(config: PrefetchConfig, numa_topology: NUMATopology) -> Result<Self> {
        let prefetcher = Self {
            config,
            predictor: Arc::new(Mutex::new(AccessPatternPredictor::default())),
            prefetch_queue: Arc::new(Mutex::new(VecDeque::new())),
            numa_topology,
            stats: Arc::new(RwLock::new(PrefetchStats::default())),
            prefetch_threads: Vec::new(),
        };
        Ok(prefetcher)
    }
    /// Start prefetching background threads
    pub fn start_prefetch_threads(&mut self) -> Result<()> {
        let num_threads = self.config.degree.min(4);
        for thread_id in 0..num_threads {
            let queue = Arc::clone(&self.prefetch_queue);
            let stats = Arc::clone(&self.stats);
            let config = self.config.clone();
            let handle = thread::spawn(move || {
                Self::prefetch_worker_thread(thread_id, queue, stats, config);
            });
            self.prefetch_threads.push(handle);
        }
        Ok(())
    }
    /// Worker thread for prefetching
    fn prefetch_worker_thread(
        _thread_id: usize,
        queue: Arc<Mutex<VecDeque<PrefetchRequest>>>,
        stats: Arc<RwLock<PrefetchStats>>,
        _config: PrefetchConfig,
    ) {
        loop {
            let request = {
                let mut q = queue
                    .lock()
                    .unwrap_or_else(|poisoned| poisoned.into_inner());
                q.pop_front()
            };
            if let Some(req) = request {
                let start_time = Instant::now();
                Self::execute_prefetch(&req);
                let latency = start_time.elapsed();
                if let Ok(mut s) = stats.write() {
                    s.total_requests += 1;
                    s.average_latency = if s.total_requests == 1 {
                        latency
                    } else {
                        Duration::from_nanos(u128::midpoint(
                            s.average_latency.as_nanos(),
                            latency.as_nanos(),
                        ) as u64)
                    };
                }
            } else {
                thread::sleep(Duration::from_micros(100));
            }
        }
    }
    /// Execute a prefetch request
    fn execute_prefetch(request: &PrefetchRequest) {
        unsafe {
            match request.hint_type {
                PrefetchHint::Temporal
                | PrefetchHint::L1
                | PrefetchHint::L2
                | PrefetchHint::L3
                | PrefetchHint::NonTemporal
                | PrefetchHint::Write => {
                    let _ = std::ptr::read_volatile(request.address as *const u8);
                }
            }
        }
    }
    /// Record a memory access and potentially trigger prefetching
    pub fn record_access(&self, address: usize) -> Result<()> {
        if let Ok(mut predictor) = self.predictor.lock() {
            predictor.record_access(address);
            let predictions = predictor.predict_next_accesses(self.config.distance);
            if let Ok(mut queue) = self.prefetch_queue.lock() {
                for (i, &pred_addr) in predictions.iter().enumerate() {
                    if queue.len() < self.config.max_queue_size {
                        let priority = 1.0 - (i as f64 / predictions.len() as f64);
                        let hint_type = Self::determine_prefetch_hint(pred_addr, i);
                        queue.push_back(PrefetchRequest {
                            address: pred_addr,
                            priority,
                            hint_type,
                            timestamp: Instant::now(),
                        });
                    }
                }
            }
        }
        Ok(())
    }
    /// Determine appropriate prefetch hint based on address and distance
    const fn determine_prefetch_hint(_address: usize, distance: usize) -> PrefetchHint {
        match distance {
            0..=2 => PrefetchHint::L1,
            3..=6 => PrefetchHint::L2,
            7..=12 => PrefetchHint::L3,
            _ => PrefetchHint::NonTemporal,
        }
    }
    /// Get prefetch statistics
    #[must_use]
    pub fn get_stats(&self) -> PrefetchStats {
        self.stats
            .read()
            .unwrap_or_else(|poisoned| poisoned.into_inner())
            .clone()
    }
    /// Optimize prefetch strategy based on performance feedback
    pub fn optimize_strategy(&mut self, performance_feedback: &PerformanceFeedback) -> Result<()> {
        if !self.config.adaptive_adjustment {
            return Ok(());
        }
        if performance_feedback.cache_hit_rate < 0.8 {
            self.config.distance = (self.config.distance + 2).min(16);
        } else if performance_feedback.cache_hit_rate > 0.95 {
            self.config.distance = (self.config.distance.saturating_sub(1)).max(2);
        }
        if performance_feedback.bandwidth_utilization < 0.6 {
            self.config.degree = (self.config.degree + 1).min(8);
        } else if performance_feedback.bandwidth_utilization > 0.9 {
            self.config.degree = (self.config.degree.saturating_sub(1)).max(1);
        }
        if self.config.strategy == PrefetchStrategy::MLGuided {
            if let Ok(mut predictor) = self.predictor.lock() {
                let accuracy_improvement = performance_feedback.cache_hit_rate - 0.8;
                predictor
                    .ml_weights
                    .iter_mut()
                    .for_each(|w| *w += accuracy_improvement * 0.01);
            }
        }
        Ok(())
    }
}
/// Performance feedback for prefetch optimization
#[derive(Debug, Clone)]
pub struct PerformanceFeedback {
    /// Current cache hit rate (0.0 to 1.0)
    pub cache_hit_rate: f64,
    /// Memory bandwidth utilization (0.0 to 1.0)
    pub bandwidth_utilization: f64,
    /// Average memory access latency
    pub memory_latency: Duration,
    /// CPU utilization (0.0 to 1.0)
    pub cpu_utilization: f64,
}
/// Data locality optimizer
#[derive(Debug)]
pub struct DataLocalityOptimizer {
    /// Optimization strategy
    strategy: LocalityStrategy,
    /// NUMA topology
    numa_topology: NUMATopology,
    /// Memory region tracking
    memory_regions: HashMap<usize, MemoryRegionInfo>,
    /// Access pattern analyzer
    access_analyzer: AccessPatternAnalyzer,
}
/// Memory region information
#[derive(Debug, Clone)]
pub struct MemoryRegionInfo {
    /// Start address of the region
    pub start_address: usize,
    /// Size of the region in bytes
    pub size: usize,
    /// NUMA node where data is located
    pub numa_node: usize,
    /// Access frequency
    pub access_frequency: u64,
    /// Last access time
    pub last_access: Instant,
    /// Access pattern type
    pub access_pattern: AccessPatternType,
}
/// Access pattern analyzer
#[derive(Debug)]
pub struct AccessPatternAnalyzer {
    /// Temporal access patterns
    temporal_patterns: BTreeMap<Instant, Vec<usize>>,
    /// Spatial access patterns
    spatial_patterns: HashMap<usize, Vec<usize>>,
    /// Loop detection state
    loop_detection: LoopDetectionState,
}
/// Loop detection state
#[derive(Debug)]
pub struct LoopDetectionState {
    /// Loop start candidates
    loop_starts: HashMap<usize, usize>,
    /// Current loop iteration
    current_iteration: Vec<usize>,
    /// Detected loops
    detected_loops: Vec<LoopPattern>,
}
/// Detected loop pattern
#[derive(Debug, Clone)]
pub struct LoopPattern {
    /// Loop start address
    pub start_address: usize,
    /// Loop stride
    pub stride: isize,
    /// Loop iterations
    pub iterations: usize,
    /// Loop confidence
    pub confidence: f64,
}
/// Access pattern types
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AccessPatternType {
    Sequential,
    Random,
    Strided,
    Loop,
    Temporal,
    Hybrid,
}
impl DataLocalityOptimizer {
    /// Create a new data locality optimizer
    #[must_use]
    pub fn new(strategy: LocalityStrategy, numa_topology: NUMATopology) -> Self {
        Self {
            strategy,
            numa_topology,
            memory_regions: HashMap::new(),
            access_analyzer: AccessPatternAnalyzer {
                temporal_patterns: BTreeMap::new(),
                spatial_patterns: HashMap::new(),
                loop_detection: LoopDetectionState {
                    loop_starts: HashMap::new(),
                    current_iteration: Vec::new(),
                    detected_loops: Vec::new(),
                },
            },
        }
    }
    /// Optimize data placement for better locality
    pub fn optimize_data_placement(
        &mut self,
        state_vector: &mut OptimizedStateVector,
        access_pattern: &[usize],
    ) -> Result<LocalityOptimizationResult> {
        let start_time = Instant::now();
        self.analyze_access_patterns(access_pattern)?;
        let optimization_result = match self.strategy {
            LocalityStrategy::Temporal => {
                Self::optimize_temporal_locality(state_vector, access_pattern)?
            }
            LocalityStrategy::Spatial => {
                Self::optimize_spatial_locality(state_vector, access_pattern)?
            }
            LocalityStrategy::Loop => self.optimize_loop_locality(state_vector, access_pattern)?,
            LocalityStrategy::CacheConscious => {
                Self::optimize_cache_conscious(state_vector, access_pattern)?
            }
            LocalityStrategy::NUMATopology => {
                self.optimize_numa_topology(state_vector, access_pattern)?
            }
            LocalityStrategy::Hybrid => {
                self.optimize_hybrid_locality(state_vector, access_pattern)?
            }
        };
        let optimization_time = start_time.elapsed();
        Ok(LocalityOptimizationResult {
            optimization_time,
            locality_improvement: optimization_result.locality_improvement,
            memory_movements: optimization_result.memory_movements,
            numa_migrations: optimization_result.numa_migrations,
            cache_efficiency_gain: optimization_result.cache_efficiency_gain,
            strategy_used: self.strategy,
        })
    }
    /// Analyze access patterns to understand locality characteristics
    fn analyze_access_patterns(&mut self, access_pattern: &[usize]) -> Result<()> {
        let now = Instant::now();
        self.access_analyzer
            .temporal_patterns
            .insert(now, access_pattern.to_vec());
        for &address in access_pattern {
            let page = address / 4096;
            self.access_analyzer
                .spatial_patterns
                .entry(page)
                .or_default()
                .push(address);
        }
        self.detect_loop_patterns(access_pattern)?;
        while self.access_analyzer.temporal_patterns.len() > 1000 {
            self.access_analyzer.temporal_patterns.pop_first();
        }
        Ok(())
    }
    /// Detect loop patterns in access sequence
    fn detect_loop_patterns(&mut self, access_pattern: &[usize]) -> Result<()> {
        if access_pattern.len() < 3 {
            return Ok(());
        }
        for window in access_pattern.windows(3) {
            if let [start, middle, end] = window {
                let stride1 = *middle as isize - *start as isize;
                let stride2 = *end as isize - *middle as isize;
                if stride1 == stride2 && stride1 != 0 {
                    *self
                        .access_analyzer
                        .loop_detection
                        .loop_starts
                        .entry(*start)
                        .or_insert(0) += 1;
                    if self.access_analyzer.loop_detection.loop_starts[start] >= 3 {
                        let confidence =
                            self.access_analyzer.loop_detection.loop_starts[start] as f64 / 10.0;
                        let confidence = confidence.min(1.0);
                        self.access_analyzer
                            .loop_detection
                            .detected_loops
                            .push(LoopPattern {
                                start_address: *start,
                                stride: stride1,
                                iterations: self.access_analyzer.loop_detection.loop_starts[start],
                                confidence,
                            });
                    }
                }
            }
        }
        Ok(())
    }
    /// Optimize temporal locality
    fn optimize_temporal_locality(
        _state_vector: &mut OptimizedStateVector,
        access_pattern: &[usize],
    ) -> Result<OptimizationResult> {
        let mut reuse_distances = HashMap::new();
        let mut last_access = HashMap::new();
        for (i, &address) in access_pattern.iter().enumerate() {
            if let Some(&last_pos) = last_access.get(&address) {
                let reuse_distance = i - last_pos;
                reuse_distances.insert(address, reuse_distance);
            }
            last_access.insert(address, i);
        }
        let avg_reuse_distance: f64 = reuse_distances.values().map(|&d| d as f64).sum::<f64>()
            / reuse_distances.len().max(1) as f64;
        let locality_improvement = (100.0 / (avg_reuse_distance + 1.0)).min(1.0);
        Ok(OptimizationResult {
            locality_improvement,
            memory_movements: 0,
            numa_migrations: 0,
            cache_efficiency_gain: locality_improvement * 0.5,
        })
    }
    /// Optimize spatial locality
    fn optimize_spatial_locality(
        _state_vector: &mut OptimizedStateVector,
        access_pattern: &[usize],
    ) -> Result<OptimizationResult> {
        let mut spatial_clusters = HashMap::new();
        for &address in access_pattern {
            let cache_line = address / 64;
            *spatial_clusters.entry(cache_line).or_insert(0) += 1;
        }
        let total_accesses = access_pattern.len();
        let unique_cache_lines = spatial_clusters.len();
        let spatial_efficiency = if unique_cache_lines > 0 {
            total_accesses as f64 / unique_cache_lines as f64
        } else {
            1.0
        };
        let locality_improvement = (spatial_efficiency / 10.0).min(1.0);
        Ok(OptimizationResult {
            locality_improvement,
            memory_movements: spatial_clusters.len(),
            numa_migrations: 0,
            cache_efficiency_gain: locality_improvement * 0.7,
        })
    }
    /// Optimize loop locality
    fn optimize_loop_locality(
        &self,
        _state_vector: &mut OptimizedStateVector,
        _access_pattern: &[usize],
    ) -> Result<OptimizationResult> {
        let total_loops = self.access_analyzer.loop_detection.detected_loops.len();
        let high_confidence_loops = self
            .access_analyzer
            .loop_detection
            .detected_loops
            .iter()
            .filter(|loop_pattern| loop_pattern.confidence > 0.8)
            .count();
        let loop_efficiency = if total_loops > 0 {
            high_confidence_loops as f64 / total_loops as f64
        } else {
            0.5
        };
        Ok(OptimizationResult {
            locality_improvement: loop_efficiency,
            memory_movements: total_loops,
            numa_migrations: 0,
            cache_efficiency_gain: loop_efficiency * 0.8,
        })
    }
    /// Optimize cache-conscious placement
    fn optimize_cache_conscious(
        _state_vector: &mut OptimizedStateVector,
        access_pattern: &[usize],
    ) -> Result<OptimizationResult> {
        let cache_size = 256 * 1024;
        let cache_line_size = 64;
        let cache_lines = cache_size / cache_line_size;
        let mut cache_hits = 0;
        let mut cache_misses = 0;
        let mut cache_state = HashMap::new();
        for &address in access_pattern {
            let cache_line = address / cache_line_size;
            let cache_set = cache_line % cache_lines;
            if let std::collections::hash_map::Entry::Vacant(e) = cache_state.entry(cache_set) {
                cache_misses += 1;
                e.insert(cache_line);
            } else {
                cache_hits += 1;
            }
        }
        let cache_hit_rate = if cache_hits + cache_misses > 0 {
            cache_hits as f64 / (cache_hits + cache_misses) as f64
        } else {
            0.0
        };
        Ok(OptimizationResult {
            locality_improvement: cache_hit_rate,
            memory_movements: cache_misses,
            numa_migrations: 0,
            cache_efficiency_gain: cache_hit_rate,
        })
    }
    /// Optimize NUMA topology awareness
    fn optimize_numa_topology(
        &self,
        _state_vector: &mut OptimizedStateVector,
        access_pattern: &[usize],
    ) -> Result<OptimizationResult> {
        let mut numa_accesses = HashMap::new();
        for &address in access_pattern {
            let numa_node = (address / (1024 * 1024 * 1024)) % self.numa_topology.num_nodes;
            *numa_accesses.entry(numa_node).or_insert(0) += 1;
        }
        let dominant_node = numa_accesses.iter().max_by_key(|(_, &count)| count);
        let numa_efficiency = if let Some((_, &dominant_count)) = dominant_node {
            f64::from(dominant_count) / access_pattern.len() as f64
        } else {
            0.0
        };
        let numa_migrations = numa_accesses.len().saturating_sub(1);
        Ok(OptimizationResult {
            locality_improvement: numa_efficiency,
            memory_movements: 0,
            numa_migrations,
            cache_efficiency_gain: numa_efficiency * 0.6,
        })
    }
    /// Optimize with hybrid strategy
    fn optimize_hybrid_locality(
        &self,
        state_vector: &mut OptimizedStateVector,
        access_pattern: &[usize],
    ) -> Result<OptimizationResult> {
        let temporal = Self::optimize_temporal_locality(state_vector, access_pattern)?;
        let spatial = Self::optimize_spatial_locality(state_vector, access_pattern)?;
        let numa = self.optimize_numa_topology(state_vector, access_pattern)?;
        let locality_improvement = numa.locality_improvement.mul_add(
            0.2,
            temporal
                .locality_improvement
                .mul_add(0.4, spatial.locality_improvement * 0.4),
        );
        Ok(OptimizationResult {
            locality_improvement,
            memory_movements: temporal.memory_movements + spatial.memory_movements,
            numa_migrations: numa.numa_migrations,
            cache_efficiency_gain: temporal
                .cache_efficiency_gain
                .max(spatial.cache_efficiency_gain),
        })
    }
    /// Get detected loop patterns
    #[must_use]
    pub fn get_detected_loops(&self) -> &[LoopPattern] {
        &self.access_analyzer.loop_detection.detected_loops
    }
}
/// Optimization result
#[derive(Debug, Clone)]
pub struct OptimizationResult {
    /// Locality improvement score (0.0 to 1.0)
    pub locality_improvement: f64,
    /// Number of memory block movements
    pub memory_movements: usize,
    /// Number of NUMA migrations
    pub numa_migrations: usize,
    /// Cache efficiency gain (0.0 to 1.0)
    pub cache_efficiency_gain: f64,
}
/// Locality optimization result
#[derive(Debug, Clone)]
pub struct LocalityOptimizationResult {
    /// Time spent on optimization
    pub optimization_time: Duration,
    /// Locality improvement achieved
    pub locality_improvement: f64,
    /// Number of memory movements performed
    pub memory_movements: usize,
    /// Number of NUMA migrations
    pub numa_migrations: usize,
    /// Cache efficiency gain
    pub cache_efficiency_gain: f64,
    /// Strategy used for optimization
    pub strategy_used: LocalityStrategy,
}
#[cfg(test)]
mod tests {
    use super::*;
    use crate::memory_bandwidth_optimization::{MemoryOptimizationConfig, OptimizedStateVector};
    #[test]
    fn test_access_pattern_predictor() {
        let mut predictor = AccessPatternPredictor::default();
        for i in 0..10 {
            predictor.record_access(i * 64);
        }
        let predictions = predictor.predict_next_accesses(5);
        assert_eq!(predictions.len(), 5);
        for (i, &pred) in predictions.iter().enumerate() {
            assert_eq!(pred, (10 + i) * 64);
        }
    }
    #[test]
    fn test_memory_prefetcher_creation() {
        let config = PrefetchConfig::default();
        let numa = NUMATopology::default();
        let prefetcher = MemoryPrefetcher::new(config, numa)
            .expect("MemoryPrefetcher creation should succeed with default config");
        assert_eq!(prefetcher.config.strategy, PrefetchStrategy::Adaptive);
    }
    #[test]
    fn test_prefetch_request() {
        let request = PrefetchRequest {
            address: 0x1000,
            priority: 0.8,
            hint_type: PrefetchHint::L1,
            timestamp: Instant::now(),
        };
        assert_eq!(request.address, 0x1000);
        assert_eq!(request.priority, 0.8);
        assert_eq!(request.hint_type, PrefetchHint::L1);
    }
    #[test]
    fn test_data_locality_optimizer() {
        let numa = NUMATopology::default();
        let optimizer = DataLocalityOptimizer::new(LocalityStrategy::Spatial, numa);
        assert!(matches!(optimizer.strategy, LocalityStrategy::Spatial));
    }
    #[test]
    fn test_loop_pattern_detection() {
        let mut optimizer =
            DataLocalityOptimizer::new(LocalityStrategy::Loop, NUMATopology::default());
        let access_pattern = vec![100, 200, 300, 400, 500, 600];
        optimizer
            .detect_loop_patterns(&access_pattern)
            .expect("loop pattern detection should succeed");
        assert!(!optimizer
            .access_analyzer
            .loop_detection
            .loop_starts
            .is_empty());
    }
    #[test]
    fn test_spatial_locality_optimization() {
        let numa = NUMATopology::default();
        let optimizer = DataLocalityOptimizer::new(LocalityStrategy::Spatial, numa);
        let access_pattern = vec![0, 8, 16, 24, 32, 40];
        let config = MemoryOptimizationConfig::default();
        let mut state_vector = OptimizedStateVector::new(3, config)
            .expect("OptimizedStateVector creation should succeed");
        let result =
            DataLocalityOptimizer::optimize_spatial_locality(&mut state_vector, &access_pattern)
                .expect("spatial locality optimization should succeed");
        assert!(result.locality_improvement > 0.0);
        assert!(result.cache_efficiency_gain >= 0.0);
    }
    #[test]
    fn test_numa_topology_default() {
        let numa = NUMATopology::default();
        assert_eq!(numa.num_nodes, 1);
        assert_eq!(numa.cores_per_node.len(), 1);
        assert_eq!(numa.memory_per_node.len(), 1);
    }
    #[test]
    fn test_prefetch_hint_determination() {
        let config = PrefetchConfig::default();
        let numa = NUMATopology::default();
        let _prefetcher = MemoryPrefetcher::new(config, numa)
            .expect("MemoryPrefetcher creation should succeed for prefetch hint test");
        assert_eq!(
            MemoryPrefetcher::determine_prefetch_hint(0x1000, 0),
            PrefetchHint::L1
        );
        assert_eq!(
            MemoryPrefetcher::determine_prefetch_hint(0x1000, 5),
            PrefetchHint::L2
        );
        assert_eq!(
            MemoryPrefetcher::determine_prefetch_hint(0x1000, 10),
            PrefetchHint::L3
        );
        assert_eq!(
            MemoryPrefetcher::determine_prefetch_hint(0x1000, 15),
            PrefetchHint::NonTemporal
        );
    }
    #[test]
    fn test_ml_prediction() {
        let mut predictor = AccessPatternPredictor::default();
        for i in 0..20 {
            predictor.record_access(i * 8);
        }
        let features = predictor.extract_features();
        assert_eq!(features.len(), 16);
        let prediction = predictor.ml_predict(&features, 0);
        assert!(prediction.is_finite());
    }
    #[test]
    fn test_performance_feedback() {
        let feedback = PerformanceFeedback {
            cache_hit_rate: 0.85,
            bandwidth_utilization: 0.7,
            memory_latency: Duration::from_nanos(100),
            cpu_utilization: 0.6,
        };
        assert_eq!(feedback.cache_hit_rate, 0.85);
        assert_eq!(feedback.bandwidth_utilization, 0.7);
    }
}