1use crate::error::Result;
7use crate::memory_bandwidth_optimization::OptimizedStateVector;
8use scirs2_core::parallel_ops::{IndexedParallelIterator, ParallelIterator};
9use std::collections::{BTreeMap, HashMap, VecDeque};
10use std::sync::{Arc, Mutex, RwLock};
11use std::thread;
12use std::time::{Duration, Instant};
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
15pub enum PrefetchStrategy {
16 None,
18 Sequential,
20 Stride,
22 Pattern,
24 MLGuided,
26 Adaptive,
28 NUMAAware,
30}
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
33pub enum LocalityStrategy {
34 Temporal,
36 Spatial,
38 Loop,
40 CacheConscious,
42 NUMATopology,
44 Hybrid,
46}
47#[derive(Debug, Clone)]
49pub struct NUMATopology {
50 pub num_nodes: usize,
52 pub memory_per_node: Vec<usize>,
54 pub cores_per_node: Vec<usize>,
56 pub latency_matrix: Vec<Vec<usize>>,
58 pub bandwidth_per_node: Vec<f64>,
60 pub thread_node_mapping: HashMap<usize, usize>,
62}
63impl Default for NUMATopology {
64 fn default() -> Self {
65 Self {
66 num_nodes: 1,
67 memory_per_node: vec![64 * 1024 * 1024 * 1024],
68 cores_per_node: vec![8],
69 latency_matrix: vec![vec![0]],
70 bandwidth_per_node: vec![100.0 * 1024.0 * 1024.0 * 1024.0],
71 thread_node_mapping: HashMap::new(),
72 }
73 }
74}
75#[derive(Debug, Clone)]
77pub struct PrefetchConfig {
78 pub strategy: PrefetchStrategy,
80 pub distance: usize,
82 pub degree: usize,
84 pub hardware_hints: bool,
86 pub threshold: f64,
88 pub max_queue_size: usize,
90 pub cross_page_prefetch: bool,
92 pub adaptive_adjustment: bool,
94}
95impl Default for PrefetchConfig {
96 fn default() -> Self {
97 Self {
98 strategy: PrefetchStrategy::Adaptive,
99 distance: 8,
100 degree: 4,
101 hardware_hints: true,
102 threshold: 0.7,
103 max_queue_size: 64,
104 cross_page_prefetch: true,
105 adaptive_adjustment: true,
106 }
107 }
108}
109#[derive(Debug)]
111pub struct AccessPatternPredictor {
112 access_history: VecDeque<usize>,
114 stride_patterns: HashMap<isize, u64>,
116 pattern_confidence: HashMap<String, f64>,
118 ml_weights: Vec<f64>,
120 prediction_cache: HashMap<usize, Vec<usize>>,
122 correct_predictions: u64,
124 total_predictions: u64,
125}
126impl Default for AccessPatternPredictor {
127 fn default() -> Self {
128 Self {
129 access_history: VecDeque::with_capacity(1000),
130 stride_patterns: HashMap::new(),
131 pattern_confidence: HashMap::new(),
132 ml_weights: vec![0.5; 16],
133 prediction_cache: HashMap::new(),
134 correct_predictions: 0,
135 total_predictions: 0,
136 }
137 }
138}
139impl AccessPatternPredictor {
140 pub fn record_access(&mut self, address: usize) {
142 self.access_history.push_back(address);
143 if self.access_history.len() > 1000 {
144 self.access_history.pop_front();
145 }
146 if self.access_history.len() >= 2 {
147 let prev_addr = self.access_history[self.access_history.len() - 2];
148 let stride = address as isize - prev_addr as isize;
149 *self.stride_patterns.entry(stride).or_insert(0) += 1;
150 }
151 self.update_pattern_confidence();
152 }
153 pub fn predict_next_accesses(&mut self, count: usize) -> Vec<usize> {
155 if self.access_history.is_empty() {
156 return Vec::new();
157 }
158 let current_addr = *self
160 .access_history
161 .back()
162 .expect("access_history is not empty (checked above)");
163 if let Some(cached) = self.prediction_cache.get(¤t_addr) {
164 return cached.clone();
165 }
166 let predictions = match self.get_dominant_pattern() {
167 PredictedPattern::Stride(stride) => {
168 Self::predict_stride_pattern(current_addr, stride, count)
169 }
170 PredictedPattern::Sequential => Self::predict_sequential_pattern(current_addr, count),
171 PredictedPattern::Random => Self::predict_random_pattern(current_addr, count),
172 PredictedPattern::MLGuided => self.predict_ml_pattern(current_addr, count),
173 };
174 self.prediction_cache
175 .insert(current_addr, predictions.clone());
176 if self.prediction_cache.len() > 1000 {
177 self.prediction_cache.clear();
178 }
179 self.total_predictions += 1;
180 predictions
181 }
182 fn update_pattern_confidence(&mut self) {
184 if self.total_predictions > 0 {
185 let accuracy = self.correct_predictions as f64 / self.total_predictions as f64;
186 self.pattern_confidence
187 .insert("stride".to_string(), accuracy);
188 self.pattern_confidence
189 .insert("sequential".to_string(), accuracy * 0.9);
190 self.pattern_confidence
191 .insert("ml".to_string(), accuracy * 1.1);
192 }
193 }
194 fn get_dominant_pattern(&self) -> PredictedPattern {
196 let dominant_stride = self
197 .stride_patterns
198 .iter()
199 .max_by_key(|(_, &count)| count)
200 .map(|(&stride, _)| stride);
201 match dominant_stride {
202 Some(stride) if stride == 1 => PredictedPattern::Sequential,
203 Some(stride) if stride != 0 => PredictedPattern::Stride(stride),
204 _ => {
205 let ml_confidence = self.pattern_confidence.get("ml").unwrap_or(&0.0);
206 if *ml_confidence > 0.8 {
207 PredictedPattern::MLGuided
208 } else {
209 PredictedPattern::Random
210 }
211 }
212 }
213 }
214 fn predict_stride_pattern(current_addr: usize, stride: isize, count: usize) -> Vec<usize> {
216 let mut predictions = Vec::with_capacity(count);
217 let mut addr = current_addr;
218 for _ in 0..count {
219 addr = (addr as isize + stride) as usize;
220 predictions.push(addr);
221 }
222 predictions
223 }
224 fn predict_sequential_pattern(current_addr: usize, count: usize) -> Vec<usize> {
226 (1..=count).map(|i| current_addr + i).collect()
227 }
228 fn predict_random_pattern(current_addr: usize, count: usize) -> Vec<usize> {
230 (1..=count).map(|i| current_addr + i * 64).collect()
231 }
232 fn predict_ml_pattern(&self, current_addr: usize, count: usize) -> Vec<usize> {
234 let mut predictions = Vec::with_capacity(count);
235 let features = self.extract_features();
236 for i in 0..count {
237 let prediction = self.ml_predict(&features, i);
238 predictions.push((current_addr as f64 + prediction) as usize);
239 }
240 predictions
241 }
242 fn extract_features(&self) -> Vec<f64> {
244 let mut features = [0.0; 16];
245 if self.access_history.len() >= 4 {
246 let recent: Vec<_> = self.access_history.iter().rev().take(4).collect();
247 for i in 0..3 {
248 if i + 1 < recent.len() {
249 let stride = *recent[i] as f64 - *recent[i + 1] as f64;
250 features[i] = stride / 1000.0;
251 }
252 }
253 features[3] = (*recent[0] % 1024) as f64 / 1024.0;
254 features[4] = (*recent[0] / 1024) as f64;
255 let dominant_stride = self
256 .stride_patterns
257 .iter()
258 .max_by_key(|(_, &count)| count)
259 .map_or(0, |(&stride, _)| stride);
260 features[5] = dominant_stride as f64 / 1000.0;
261 }
262 features.to_vec()
263 }
264 fn ml_predict(&self, features: &[f64], step: usize) -> f64 {
266 let mut prediction = 0.0;
267 for (i, &feature) in features.iter().enumerate() {
268 if i < self.ml_weights.len() {
269 prediction += feature * self.ml_weights[i];
270 }
271 }
272 prediction * (step + 1) as f64
273 }
274 pub fn update_ml_weights(&mut self, predictions: &[usize], actual: &[usize]) {
276 if predictions.len() != actual.len() || predictions.is_empty() {
277 return;
278 }
279 let learning_rate = 0.01;
280 for (pred, &act) in predictions.iter().zip(actual.iter()) {
281 let error = act as f64 - *pred as f64;
282 for weight in &mut self.ml_weights {
283 *weight += learning_rate * error * 0.1;
284 }
285 }
286 }
287 #[must_use]
289 pub fn get_accuracy(&self) -> f64 {
290 if self.total_predictions > 0 {
291 self.correct_predictions as f64 / self.total_predictions as f64
292 } else {
293 0.0
294 }
295 }
296}
297#[derive(Debug, Clone)]
299enum PredictedPattern {
300 Stride(isize),
301 Sequential,
302 Random,
303 MLGuided,
304}
305#[derive(Debug)]
307pub struct MemoryPrefetcher {
308 config: PrefetchConfig,
310 predictor: Arc<Mutex<AccessPatternPredictor>>,
312 prefetch_queue: Arc<Mutex<VecDeque<PrefetchRequest>>>,
314 numa_topology: NUMATopology,
316 stats: Arc<RwLock<PrefetchStats>>,
318 prefetch_threads: Vec<thread::JoinHandle<()>>,
320}
321#[derive(Debug, Clone)]
323pub struct PrefetchRequest {
324 pub address: usize,
326 pub priority: f64,
328 pub hint_type: PrefetchHint,
330 pub timestamp: Instant,
332}
333#[derive(Debug, Clone, Copy, PartialEq, Eq)]
335pub enum PrefetchHint {
336 Temporal,
338 NonTemporal,
340 L1,
342 L2,
344 L3,
346 Write,
348}
349#[derive(Debug, Clone, Default)]
351pub struct PrefetchStats {
352 pub total_requests: u64,
354 pub successful_prefetches: u64,
356 pub failed_prefetches: u64,
358 pub average_latency: Duration,
360 pub bandwidth_utilization: f64,
362 pub cache_hit_improvement: f64,
364}
365impl MemoryPrefetcher {
366 pub fn new(config: PrefetchConfig, numa_topology: NUMATopology) -> Result<Self> {
368 let prefetcher = Self {
369 config,
370 predictor: Arc::new(Mutex::new(AccessPatternPredictor::default())),
371 prefetch_queue: Arc::new(Mutex::new(VecDeque::new())),
372 numa_topology,
373 stats: Arc::new(RwLock::new(PrefetchStats::default())),
374 prefetch_threads: Vec::new(),
375 };
376 Ok(prefetcher)
377 }
378 pub fn start_prefetch_threads(&mut self) -> Result<()> {
380 let num_threads = self.config.degree.min(4);
381 for thread_id in 0..num_threads {
382 let queue = Arc::clone(&self.prefetch_queue);
383 let stats = Arc::clone(&self.stats);
384 let config = self.config.clone();
385 let handle = thread::spawn(move || {
386 Self::prefetch_worker_thread(thread_id, queue, stats, config);
387 });
388 self.prefetch_threads.push(handle);
389 }
390 Ok(())
391 }
392 fn prefetch_worker_thread(
394 _thread_id: usize,
395 queue: Arc<Mutex<VecDeque<PrefetchRequest>>>,
396 stats: Arc<RwLock<PrefetchStats>>,
397 _config: PrefetchConfig,
398 ) {
399 loop {
400 let request = {
401 let mut q = queue
402 .lock()
403 .unwrap_or_else(|poisoned| poisoned.into_inner());
404 q.pop_front()
405 };
406 if let Some(req) = request {
407 let start_time = Instant::now();
408 Self::execute_prefetch(&req);
409 let latency = start_time.elapsed();
410 if let Ok(mut s) = stats.write() {
411 s.total_requests += 1;
412 s.average_latency = if s.total_requests == 1 {
413 latency
414 } else {
415 Duration::from_nanos(u128::midpoint(
416 s.average_latency.as_nanos(),
417 latency.as_nanos(),
418 ) as u64)
419 };
420 }
421 } else {
422 thread::sleep(Duration::from_micros(100));
423 }
424 }
425 }
426 fn execute_prefetch(request: &PrefetchRequest) {
428 unsafe {
429 match request.hint_type {
430 PrefetchHint::Temporal
431 | PrefetchHint::L1
432 | PrefetchHint::L2
433 | PrefetchHint::L3
434 | PrefetchHint::NonTemporal
435 | PrefetchHint::Write => {
436 let _ = std::ptr::read_volatile(request.address as *const u8);
437 }
438 }
439 }
440 }
441 pub fn record_access(&self, address: usize) -> Result<()> {
443 if let Ok(mut predictor) = self.predictor.lock() {
444 predictor.record_access(address);
445 let predictions = predictor.predict_next_accesses(self.config.distance);
446 if let Ok(mut queue) = self.prefetch_queue.lock() {
447 for (i, &pred_addr) in predictions.iter().enumerate() {
448 if queue.len() < self.config.max_queue_size {
449 let priority = 1.0 - (i as f64 / predictions.len() as f64);
450 let hint_type = Self::determine_prefetch_hint(pred_addr, i);
451 queue.push_back(PrefetchRequest {
452 address: pred_addr,
453 priority,
454 hint_type,
455 timestamp: Instant::now(),
456 });
457 }
458 }
459 }
460 }
461 Ok(())
462 }
463 const fn determine_prefetch_hint(_address: usize, distance: usize) -> PrefetchHint {
465 match distance {
466 0..=2 => PrefetchHint::L1,
467 3..=6 => PrefetchHint::L2,
468 7..=12 => PrefetchHint::L3,
469 _ => PrefetchHint::NonTemporal,
470 }
471 }
472 #[must_use]
474 pub fn get_stats(&self) -> PrefetchStats {
475 self.stats
476 .read()
477 .unwrap_or_else(|poisoned| poisoned.into_inner())
478 .clone()
479 }
480 pub fn optimize_strategy(&mut self, performance_feedback: &PerformanceFeedback) -> Result<()> {
482 if !self.config.adaptive_adjustment {
483 return Ok(());
484 }
485 if performance_feedback.cache_hit_rate < 0.8 {
486 self.config.distance = (self.config.distance + 2).min(16);
487 } else if performance_feedback.cache_hit_rate > 0.95 {
488 self.config.distance = (self.config.distance.saturating_sub(1)).max(2);
489 }
490 if performance_feedback.bandwidth_utilization < 0.6 {
491 self.config.degree = (self.config.degree + 1).min(8);
492 } else if performance_feedback.bandwidth_utilization > 0.9 {
493 self.config.degree = (self.config.degree.saturating_sub(1)).max(1);
494 }
495 if self.config.strategy == PrefetchStrategy::MLGuided {
496 if let Ok(mut predictor) = self.predictor.lock() {
497 let accuracy_improvement = performance_feedback.cache_hit_rate - 0.8;
498 predictor
499 .ml_weights
500 .iter_mut()
501 .for_each(|w| *w += accuracy_improvement * 0.01);
502 }
503 }
504 Ok(())
505 }
506}
507#[derive(Debug, Clone)]
509pub struct PerformanceFeedback {
510 pub cache_hit_rate: f64,
512 pub bandwidth_utilization: f64,
514 pub memory_latency: Duration,
516 pub cpu_utilization: f64,
518}
519#[derive(Debug)]
521pub struct DataLocalityOptimizer {
522 strategy: LocalityStrategy,
524 numa_topology: NUMATopology,
526 memory_regions: HashMap<usize, MemoryRegionInfo>,
528 access_analyzer: AccessPatternAnalyzer,
530}
531#[derive(Debug, Clone)]
533pub struct MemoryRegionInfo {
534 pub start_address: usize,
536 pub size: usize,
538 pub numa_node: usize,
540 pub access_frequency: u64,
542 pub last_access: Instant,
544 pub access_pattern: AccessPatternType,
546}
547#[derive(Debug)]
549pub struct AccessPatternAnalyzer {
550 temporal_patterns: BTreeMap<Instant, Vec<usize>>,
552 spatial_patterns: HashMap<usize, Vec<usize>>,
554 loop_detection: LoopDetectionState,
556}
557#[derive(Debug)]
559pub struct LoopDetectionState {
560 loop_starts: HashMap<usize, usize>,
562 current_iteration: Vec<usize>,
564 detected_loops: Vec<LoopPattern>,
566}
567#[derive(Debug, Clone)]
569pub struct LoopPattern {
570 pub start_address: usize,
572 pub stride: isize,
574 pub iterations: usize,
576 pub confidence: f64,
578}
579#[derive(Debug, Clone, Copy, PartialEq, Eq)]
581pub enum AccessPatternType {
582 Sequential,
583 Random,
584 Strided,
585 Loop,
586 Temporal,
587 Hybrid,
588}
589impl DataLocalityOptimizer {
590 #[must_use]
592 pub fn new(strategy: LocalityStrategy, numa_topology: NUMATopology) -> Self {
593 Self {
594 strategy,
595 numa_topology,
596 memory_regions: HashMap::new(),
597 access_analyzer: AccessPatternAnalyzer {
598 temporal_patterns: BTreeMap::new(),
599 spatial_patterns: HashMap::new(),
600 loop_detection: LoopDetectionState {
601 loop_starts: HashMap::new(),
602 current_iteration: Vec::new(),
603 detected_loops: Vec::new(),
604 },
605 },
606 }
607 }
608 pub fn optimize_data_placement(
610 &mut self,
611 state_vector: &mut OptimizedStateVector,
612 access_pattern: &[usize],
613 ) -> Result<LocalityOptimizationResult> {
614 let start_time = Instant::now();
615 self.analyze_access_patterns(access_pattern)?;
616 let optimization_result = match self.strategy {
617 LocalityStrategy::Temporal => {
618 Self::optimize_temporal_locality(state_vector, access_pattern)?
619 }
620 LocalityStrategy::Spatial => {
621 Self::optimize_spatial_locality(state_vector, access_pattern)?
622 }
623 LocalityStrategy::Loop => self.optimize_loop_locality(state_vector, access_pattern)?,
624 LocalityStrategy::CacheConscious => {
625 Self::optimize_cache_conscious(state_vector, access_pattern)?
626 }
627 LocalityStrategy::NUMATopology => {
628 self.optimize_numa_topology(state_vector, access_pattern)?
629 }
630 LocalityStrategy::Hybrid => {
631 self.optimize_hybrid_locality(state_vector, access_pattern)?
632 }
633 };
634 let optimization_time = start_time.elapsed();
635 Ok(LocalityOptimizationResult {
636 optimization_time,
637 locality_improvement: optimization_result.locality_improvement,
638 memory_movements: optimization_result.memory_movements,
639 numa_migrations: optimization_result.numa_migrations,
640 cache_efficiency_gain: optimization_result.cache_efficiency_gain,
641 strategy_used: self.strategy,
642 })
643 }
644 fn analyze_access_patterns(&mut self, access_pattern: &[usize]) -> Result<()> {
646 let now = Instant::now();
647 self.access_analyzer
648 .temporal_patterns
649 .insert(now, access_pattern.to_vec());
650 for &address in access_pattern {
651 let page = address / 4096;
652 self.access_analyzer
653 .spatial_patterns
654 .entry(page)
655 .or_default()
656 .push(address);
657 }
658 self.detect_loop_patterns(access_pattern)?;
659 while self.access_analyzer.temporal_patterns.len() > 1000 {
660 self.access_analyzer.temporal_patterns.pop_first();
661 }
662 Ok(())
663 }
664 fn detect_loop_patterns(&mut self, access_pattern: &[usize]) -> Result<()> {
666 if access_pattern.len() < 3 {
667 return Ok(());
668 }
669 for window in access_pattern.windows(3) {
670 if let [start, middle, end] = window {
671 let stride1 = *middle as isize - *start as isize;
672 let stride2 = *end as isize - *middle as isize;
673 if stride1 == stride2 && stride1 != 0 {
674 *self
675 .access_analyzer
676 .loop_detection
677 .loop_starts
678 .entry(*start)
679 .or_insert(0) += 1;
680 if self.access_analyzer.loop_detection.loop_starts[start] >= 3 {
681 let confidence =
682 self.access_analyzer.loop_detection.loop_starts[start] as f64 / 10.0;
683 let confidence = confidence.min(1.0);
684 self.access_analyzer
685 .loop_detection
686 .detected_loops
687 .push(LoopPattern {
688 start_address: *start,
689 stride: stride1,
690 iterations: self.access_analyzer.loop_detection.loop_starts[start],
691 confidence,
692 });
693 }
694 }
695 }
696 }
697 Ok(())
698 }
699 fn optimize_temporal_locality(
701 _state_vector: &mut OptimizedStateVector,
702 access_pattern: &[usize],
703 ) -> Result<OptimizationResult> {
704 let mut reuse_distances = HashMap::new();
705 let mut last_access = HashMap::new();
706 for (i, &address) in access_pattern.iter().enumerate() {
707 if let Some(&last_pos) = last_access.get(&address) {
708 let reuse_distance = i - last_pos;
709 reuse_distances.insert(address, reuse_distance);
710 }
711 last_access.insert(address, i);
712 }
713 let avg_reuse_distance: f64 = reuse_distances.values().map(|&d| d as f64).sum::<f64>()
714 / reuse_distances.len().max(1) as f64;
715 let locality_improvement = (100.0 / (avg_reuse_distance + 1.0)).min(1.0);
716 Ok(OptimizationResult {
717 locality_improvement,
718 memory_movements: 0,
719 numa_migrations: 0,
720 cache_efficiency_gain: locality_improvement * 0.5,
721 })
722 }
723 fn optimize_spatial_locality(
725 _state_vector: &mut OptimizedStateVector,
726 access_pattern: &[usize],
727 ) -> Result<OptimizationResult> {
728 let mut spatial_clusters = HashMap::new();
729 for &address in access_pattern {
730 let cache_line = address / 64;
731 *spatial_clusters.entry(cache_line).or_insert(0) += 1;
732 }
733 let total_accesses = access_pattern.len();
734 let unique_cache_lines = spatial_clusters.len();
735 let spatial_efficiency = if unique_cache_lines > 0 {
736 total_accesses as f64 / unique_cache_lines as f64
737 } else {
738 1.0
739 };
740 let locality_improvement = (spatial_efficiency / 10.0).min(1.0);
741 Ok(OptimizationResult {
742 locality_improvement,
743 memory_movements: spatial_clusters.len(),
744 numa_migrations: 0,
745 cache_efficiency_gain: locality_improvement * 0.7,
746 })
747 }
748 fn optimize_loop_locality(
750 &self,
751 _state_vector: &mut OptimizedStateVector,
752 _access_pattern: &[usize],
753 ) -> Result<OptimizationResult> {
754 let total_loops = self.access_analyzer.loop_detection.detected_loops.len();
755 let high_confidence_loops = self
756 .access_analyzer
757 .loop_detection
758 .detected_loops
759 .iter()
760 .filter(|loop_pattern| loop_pattern.confidence > 0.8)
761 .count();
762 let loop_efficiency = if total_loops > 0 {
763 high_confidence_loops as f64 / total_loops as f64
764 } else {
765 0.5
766 };
767 Ok(OptimizationResult {
768 locality_improvement: loop_efficiency,
769 memory_movements: total_loops,
770 numa_migrations: 0,
771 cache_efficiency_gain: loop_efficiency * 0.8,
772 })
773 }
774 fn optimize_cache_conscious(
776 _state_vector: &mut OptimizedStateVector,
777 access_pattern: &[usize],
778 ) -> Result<OptimizationResult> {
779 let cache_size = 256 * 1024;
780 let cache_line_size = 64;
781 let cache_lines = cache_size / cache_line_size;
782 let mut cache_hits = 0;
783 let mut cache_misses = 0;
784 let mut cache_state = HashMap::new();
785 for &address in access_pattern {
786 let cache_line = address / cache_line_size;
787 let cache_set = cache_line % cache_lines;
788 if let std::collections::hash_map::Entry::Vacant(e) = cache_state.entry(cache_set) {
789 cache_misses += 1;
790 e.insert(cache_line);
791 } else {
792 cache_hits += 1;
793 }
794 }
795 let cache_hit_rate = if cache_hits + cache_misses > 0 {
796 cache_hits as f64 / (cache_hits + cache_misses) as f64
797 } else {
798 0.0
799 };
800 Ok(OptimizationResult {
801 locality_improvement: cache_hit_rate,
802 memory_movements: cache_misses,
803 numa_migrations: 0,
804 cache_efficiency_gain: cache_hit_rate,
805 })
806 }
807 fn optimize_numa_topology(
809 &self,
810 _state_vector: &mut OptimizedStateVector,
811 access_pattern: &[usize],
812 ) -> Result<OptimizationResult> {
813 let mut numa_accesses = HashMap::new();
814 for &address in access_pattern {
815 let numa_node = (address / (1024 * 1024 * 1024)) % self.numa_topology.num_nodes;
816 *numa_accesses.entry(numa_node).or_insert(0) += 1;
817 }
818 let dominant_node = numa_accesses.iter().max_by_key(|(_, &count)| count);
819 let numa_efficiency = if let Some((_, &dominant_count)) = dominant_node {
820 f64::from(dominant_count) / access_pattern.len() as f64
821 } else {
822 0.0
823 };
824 let numa_migrations = numa_accesses.len().saturating_sub(1);
825 Ok(OptimizationResult {
826 locality_improvement: numa_efficiency,
827 memory_movements: 0,
828 numa_migrations,
829 cache_efficiency_gain: numa_efficiency * 0.6,
830 })
831 }
832 fn optimize_hybrid_locality(
834 &self,
835 state_vector: &mut OptimizedStateVector,
836 access_pattern: &[usize],
837 ) -> Result<OptimizationResult> {
838 let temporal = Self::optimize_temporal_locality(state_vector, access_pattern)?;
839 let spatial = Self::optimize_spatial_locality(state_vector, access_pattern)?;
840 let numa = self.optimize_numa_topology(state_vector, access_pattern)?;
841 let locality_improvement = numa.locality_improvement.mul_add(
842 0.2,
843 temporal
844 .locality_improvement
845 .mul_add(0.4, spatial.locality_improvement * 0.4),
846 );
847 Ok(OptimizationResult {
848 locality_improvement,
849 memory_movements: temporal.memory_movements + spatial.memory_movements,
850 numa_migrations: numa.numa_migrations,
851 cache_efficiency_gain: temporal
852 .cache_efficiency_gain
853 .max(spatial.cache_efficiency_gain),
854 })
855 }
856 #[must_use]
858 pub fn get_detected_loops(&self) -> &[LoopPattern] {
859 &self.access_analyzer.loop_detection.detected_loops
860 }
861}
862#[derive(Debug, Clone)]
864pub struct OptimizationResult {
865 pub locality_improvement: f64,
867 pub memory_movements: usize,
869 pub numa_migrations: usize,
871 pub cache_efficiency_gain: f64,
873}
874#[derive(Debug, Clone)]
876pub struct LocalityOptimizationResult {
877 pub optimization_time: Duration,
879 pub locality_improvement: f64,
881 pub memory_movements: usize,
883 pub numa_migrations: usize,
885 pub cache_efficiency_gain: f64,
887 pub strategy_used: LocalityStrategy,
889}
890#[cfg(test)]
891mod tests {
892 use super::*;
893 use crate::memory_bandwidth_optimization::{MemoryOptimizationConfig, OptimizedStateVector};
894 #[test]
895 fn test_access_pattern_predictor() {
896 let mut predictor = AccessPatternPredictor::default();
897 for i in 0..10 {
898 predictor.record_access(i * 64);
899 }
900 let predictions = predictor.predict_next_accesses(5);
901 assert_eq!(predictions.len(), 5);
902 for (i, &pred) in predictions.iter().enumerate() {
903 assert_eq!(pred, (10 + i) * 64);
904 }
905 }
906 #[test]
907 fn test_memory_prefetcher_creation() {
908 let config = PrefetchConfig::default();
909 let numa = NUMATopology::default();
910 let prefetcher = MemoryPrefetcher::new(config, numa)
911 .expect("MemoryPrefetcher creation should succeed with default config");
912 assert_eq!(prefetcher.config.strategy, PrefetchStrategy::Adaptive);
913 }
914 #[test]
915 fn test_prefetch_request() {
916 let request = PrefetchRequest {
917 address: 0x1000,
918 priority: 0.8,
919 hint_type: PrefetchHint::L1,
920 timestamp: Instant::now(),
921 };
922 assert_eq!(request.address, 0x1000);
923 assert_eq!(request.priority, 0.8);
924 assert_eq!(request.hint_type, PrefetchHint::L1);
925 }
926 #[test]
927 fn test_data_locality_optimizer() {
928 let numa = NUMATopology::default();
929 let optimizer = DataLocalityOptimizer::new(LocalityStrategy::Spatial, numa);
930 assert!(matches!(optimizer.strategy, LocalityStrategy::Spatial));
931 }
932 #[test]
933 fn test_loop_pattern_detection() {
934 let mut optimizer =
935 DataLocalityOptimizer::new(LocalityStrategy::Loop, NUMATopology::default());
936 let access_pattern = vec![100, 200, 300, 400, 500, 600];
937 optimizer
938 .detect_loop_patterns(&access_pattern)
939 .expect("loop pattern detection should succeed");
940 assert!(!optimizer
941 .access_analyzer
942 .loop_detection
943 .loop_starts
944 .is_empty());
945 }
946 #[test]
947 fn test_spatial_locality_optimization() {
948 let numa = NUMATopology::default();
949 let optimizer = DataLocalityOptimizer::new(LocalityStrategy::Spatial, numa);
950 let access_pattern = vec![0, 8, 16, 24, 32, 40];
951 let config = MemoryOptimizationConfig::default();
952 let mut state_vector = OptimizedStateVector::new(3, config)
953 .expect("OptimizedStateVector creation should succeed");
954 let result =
955 DataLocalityOptimizer::optimize_spatial_locality(&mut state_vector, &access_pattern)
956 .expect("spatial locality optimization should succeed");
957 assert!(result.locality_improvement > 0.0);
958 assert!(result.cache_efficiency_gain >= 0.0);
959 }
960 #[test]
961 fn test_numa_topology_default() {
962 let numa = NUMATopology::default();
963 assert_eq!(numa.num_nodes, 1);
964 assert_eq!(numa.cores_per_node.len(), 1);
965 assert_eq!(numa.memory_per_node.len(), 1);
966 }
967 #[test]
968 fn test_prefetch_hint_determination() {
969 let config = PrefetchConfig::default();
970 let numa = NUMATopology::default();
971 let _prefetcher = MemoryPrefetcher::new(config, numa)
972 .expect("MemoryPrefetcher creation should succeed for prefetch hint test");
973 assert_eq!(
974 MemoryPrefetcher::determine_prefetch_hint(0x1000, 0),
975 PrefetchHint::L1
976 );
977 assert_eq!(
978 MemoryPrefetcher::determine_prefetch_hint(0x1000, 5),
979 PrefetchHint::L2
980 );
981 assert_eq!(
982 MemoryPrefetcher::determine_prefetch_hint(0x1000, 10),
983 PrefetchHint::L3
984 );
985 assert_eq!(
986 MemoryPrefetcher::determine_prefetch_hint(0x1000, 15),
987 PrefetchHint::NonTemporal
988 );
989 }
990 #[test]
991 fn test_ml_prediction() {
992 let mut predictor = AccessPatternPredictor::default();
993 for i in 0..20 {
994 predictor.record_access(i * 8);
995 }
996 let features = predictor.extract_features();
997 assert_eq!(features.len(), 16);
998 let prediction = predictor.ml_predict(&features, 0);
999 assert!(prediction.is_finite());
1000 }
1001 #[test]
1002 fn test_performance_feedback() {
1003 let feedback = PerformanceFeedback {
1004 cache_hit_rate: 0.85,
1005 bandwidth_utilization: 0.7,
1006 memory_latency: Duration::from_nanos(100),
1007 cpu_utilization: 0.6,
1008 };
1009 assert_eq!(feedback.cache_hit_rate, 0.85);
1010 assert_eq!(feedback.bandwidth_utilization, 0.7);
1011 }
1012}