1use scirs2_core::parallel_ops::*;
8use std::collections::{BTreeMap, HashMap, VecDeque};
9use std::sync::{Arc, Mutex, RwLock};
10use std::thread;
11use std::time::{Duration, Instant};
12
13use crate::error::Result;
14use crate::memory_bandwidth_optimization::OptimizedStateVector;
15
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub enum PrefetchStrategy {
19 None,
21 Sequential,
23 Stride,
25 Pattern,
27 MLGuided,
29 Adaptive,
31 NUMAAware,
33}
34
35#[derive(Debug, Clone, Copy, PartialEq, Eq)]
37pub enum LocalityStrategy {
38 Temporal,
40 Spatial,
42 Loop,
44 CacheConscious,
46 NUMATopology,
48 Hybrid,
50}
51
52#[derive(Debug, Clone)]
54pub struct NUMATopology {
55 pub num_nodes: usize,
57 pub memory_per_node: Vec<usize>,
59 pub cores_per_node: Vec<usize>,
61 pub latency_matrix: Vec<Vec<usize>>,
63 pub bandwidth_per_node: Vec<f64>,
65 pub thread_node_mapping: HashMap<usize, usize>,
67}
68
69impl Default for NUMATopology {
70 fn default() -> Self {
71 Self {
73 num_nodes: 1,
74 memory_per_node: vec![64 * 1024 * 1024 * 1024], cores_per_node: vec![8],
76 latency_matrix: vec![vec![0]],
77 bandwidth_per_node: vec![100.0 * 1024.0 * 1024.0 * 1024.0], thread_node_mapping: HashMap::new(),
79 }
80 }
81}
82
83#[derive(Debug, Clone)]
85pub struct PrefetchConfig {
86 pub strategy: PrefetchStrategy,
88 pub distance: usize,
90 pub degree: usize,
92 pub hardware_hints: bool,
94 pub threshold: f64,
96 pub max_queue_size: usize,
98 pub cross_page_prefetch: bool,
100 pub adaptive_adjustment: bool,
102}
103
104impl Default for PrefetchConfig {
105 fn default() -> Self {
106 Self {
107 strategy: PrefetchStrategy::Adaptive,
108 distance: 8,
109 degree: 4,
110 hardware_hints: true,
111 threshold: 0.7,
112 max_queue_size: 64,
113 cross_page_prefetch: true,
114 adaptive_adjustment: true,
115 }
116 }
117}
118
119#[derive(Debug)]
121pub struct AccessPatternPredictor {
122 access_history: VecDeque<usize>,
124 stride_patterns: HashMap<isize, u64>,
126 pattern_confidence: HashMap<String, f64>,
128 ml_weights: Vec<f64>,
130 prediction_cache: HashMap<usize, Vec<usize>>,
132 correct_predictions: u64,
134 total_predictions: u64,
135}
136
137impl Default for AccessPatternPredictor {
138 fn default() -> Self {
139 Self {
140 access_history: VecDeque::with_capacity(1000),
141 stride_patterns: HashMap::new(),
142 pattern_confidence: HashMap::new(),
143 ml_weights: vec![0.5; 16], prediction_cache: HashMap::new(),
145 correct_predictions: 0,
146 total_predictions: 0,
147 }
148 }
149}
150
151impl AccessPatternPredictor {
152 pub fn record_access(&mut self, address: usize) {
154 self.access_history.push_back(address);
155
156 if self.access_history.len() > 1000 {
158 self.access_history.pop_front();
159 }
160
161 if self.access_history.len() >= 2 {
163 let prev_addr = self.access_history[self.access_history.len() - 2];
164 let stride = address as isize - prev_addr as isize;
165 *self.stride_patterns.entry(stride).or_insert(0) += 1;
166 }
167
168 self.update_pattern_confidence();
170 }
171
172 pub fn predict_next_accesses(&mut self, count: usize) -> Vec<usize> {
174 if self.access_history.is_empty() {
175 return Vec::new();
176 }
177
178 let current_addr = *self.access_history.back().unwrap();
179
180 if let Some(cached) = self.prediction_cache.get(¤t_addr) {
182 return cached.clone();
183 }
184
185 let predictions = match self.get_dominant_pattern() {
186 PredictedPattern::Stride(stride) => {
187 self.predict_stride_pattern(current_addr, stride, count)
188 }
189 PredictedPattern::Sequential => self.predict_sequential_pattern(current_addr, count),
190 PredictedPattern::Random => self.predict_random_pattern(current_addr, count),
191 PredictedPattern::MLGuided => self.predict_ml_pattern(current_addr, count),
192 };
193
194 self.prediction_cache
196 .insert(current_addr, predictions.clone());
197
198 if self.prediction_cache.len() > 1000 {
200 self.prediction_cache.clear();
201 }
202
203 self.total_predictions += 1;
204 predictions
205 }
206
207 fn update_pattern_confidence(&mut self) {
209 if self.total_predictions > 0 {
211 let accuracy = self.correct_predictions as f64 / self.total_predictions as f64;
212
213 self.pattern_confidence
214 .insert("stride".to_string(), accuracy);
215 self.pattern_confidence
216 .insert("sequential".to_string(), accuracy * 0.9);
217 self.pattern_confidence
218 .insert("ml".to_string(), accuracy * 1.1);
219 }
220 }
221
222 fn get_dominant_pattern(&self) -> PredictedPattern {
224 let dominant_stride = self
226 .stride_patterns
227 .iter()
228 .max_by_key(|(_, &count)| count)
229 .map(|(&stride, _)| stride);
230
231 match dominant_stride {
232 Some(stride) if stride == 1 => PredictedPattern::Sequential,
233 Some(stride) if stride != 0 => PredictedPattern::Stride(stride),
234 _ => {
235 let ml_confidence = self.pattern_confidence.get("ml").unwrap_or(&0.0);
237 if *ml_confidence > 0.8 {
238 PredictedPattern::MLGuided
239 } else {
240 PredictedPattern::Random
241 }
242 }
243 }
244 }
245
246 fn predict_stride_pattern(
248 &self,
249 current_addr: usize,
250 stride: isize,
251 count: usize,
252 ) -> Vec<usize> {
253 let mut predictions = Vec::with_capacity(count);
254 let mut addr = current_addr;
255
256 for _ in 0..count {
257 addr = (addr as isize + stride) as usize;
258 predictions.push(addr);
259 }
260
261 predictions
262 }
263
264 fn predict_sequential_pattern(&self, current_addr: usize, count: usize) -> Vec<usize> {
266 (1..=count).map(|i| current_addr + i).collect()
267 }
268
269 fn predict_random_pattern(&self, current_addr: usize, count: usize) -> Vec<usize> {
271 (1..=count).map(|i| current_addr + i * 64).collect() }
274
275 fn predict_ml_pattern(&self, current_addr: usize, count: usize) -> Vec<usize> {
277 let mut predictions = Vec::with_capacity(count);
278
279 let features = self.extract_features();
281
282 for i in 0..count {
284 let prediction = self.ml_predict(&features, i);
285 predictions.push((current_addr as f64 + prediction) as usize);
286 }
287
288 predictions
289 }
290
291 fn extract_features(&self) -> Vec<f64> {
293 let mut features = [0.0; 16];
294
295 if self.access_history.len() >= 4 {
296 let recent: Vec<_> = self.access_history.iter().rev().take(4).collect();
297
298 for i in 0..3 {
300 if i + 1 < recent.len() {
301 let stride = *recent[i] as f64 - *recent[i + 1] as f64;
302 features[i] = stride / 1000.0; }
304 }
305
306 features[3] = (*recent[0] % 1024) as f64 / 1024.0; features[4] = (*recent[0] / 1024) as f64; let dominant_stride = self
312 .stride_patterns
313 .iter()
314 .max_by_key(|(_, &count)| count)
315 .map(|(&stride, _)| stride)
316 .unwrap_or(0);
317 features[5] = dominant_stride as f64 / 1000.0;
318 }
319
320 features.to_vec()
321 }
322
323 fn ml_predict(&self, features: &[f64], step: usize) -> f64 {
325 let mut prediction = 0.0;
326
327 for (i, &feature) in features.iter().enumerate() {
328 if i < self.ml_weights.len() {
329 prediction += feature * self.ml_weights[i];
330 }
331 }
332
333 prediction * (step + 1) as f64
334 }
335
336 pub fn update_ml_weights(&mut self, predictions: &[usize], actual: &[usize]) {
338 if predictions.len() != actual.len() || predictions.is_empty() {
339 return;
340 }
341
342 let learning_rate = 0.01;
344
345 for (pred, &act) in predictions.iter().zip(actual.iter()) {
346 let error = act as f64 - *pred as f64;
347
348 for weight in &mut self.ml_weights {
350 *weight += learning_rate * error * 0.1; }
352 }
353 }
354
355 pub fn get_accuracy(&self) -> f64 {
357 if self.total_predictions > 0 {
358 self.correct_predictions as f64 / self.total_predictions as f64
359 } else {
360 0.0
361 }
362 }
363}
364
365#[derive(Debug, Clone)]
367enum PredictedPattern {
368 Stride(isize),
369 Sequential,
370 Random,
371 MLGuided,
372}
373
374#[derive(Debug)]
376pub struct MemoryPrefetcher {
377 config: PrefetchConfig,
379 predictor: Arc<Mutex<AccessPatternPredictor>>,
381 prefetch_queue: Arc<Mutex<VecDeque<PrefetchRequest>>>,
383 numa_topology: NUMATopology,
385 stats: Arc<RwLock<PrefetchStats>>,
387 prefetch_threads: Vec<thread::JoinHandle<()>>,
389}
390
391#[derive(Debug, Clone)]
393pub struct PrefetchRequest {
394 pub address: usize,
396 pub priority: f64,
398 pub hint_type: PrefetchHint,
400 pub timestamp: Instant,
402}
403
404#[derive(Debug, Clone, Copy, PartialEq, Eq)]
406pub enum PrefetchHint {
407 Temporal,
409 NonTemporal,
411 L1,
413 L2,
415 L3,
417 Write,
419}
420
421#[derive(Debug, Clone, Default)]
423pub struct PrefetchStats {
424 pub total_requests: u64,
426 pub successful_prefetches: u64,
428 pub failed_prefetches: u64,
430 pub average_latency: Duration,
432 pub bandwidth_utilization: f64,
434 pub cache_hit_improvement: f64,
436}
437
438impl MemoryPrefetcher {
439 pub fn new(config: PrefetchConfig, numa_topology: NUMATopology) -> Result<Self> {
441 let prefetcher = Self {
442 config,
443 predictor: Arc::new(Mutex::new(AccessPatternPredictor::default())),
444 prefetch_queue: Arc::new(Mutex::new(VecDeque::new())),
445 numa_topology,
446 stats: Arc::new(RwLock::new(PrefetchStats::default())),
447 prefetch_threads: Vec::new(),
448 };
449
450 Ok(prefetcher)
451 }
452
453 pub fn start_prefetch_threads(&mut self) -> Result<()> {
455 let num_threads = self.config.degree.min(4); for thread_id in 0..num_threads {
458 let queue = Arc::clone(&self.prefetch_queue);
459 let stats = Arc::clone(&self.stats);
460 let config = self.config.clone();
461
462 let handle = thread::spawn(move || {
463 Self::prefetch_worker_thread(thread_id, queue, stats, config);
464 });
465
466 self.prefetch_threads.push(handle);
467 }
468
469 Ok(())
470 }
471
472 fn prefetch_worker_thread(
474 _thread_id: usize,
475 queue: Arc<Mutex<VecDeque<PrefetchRequest>>>,
476 stats: Arc<RwLock<PrefetchStats>>,
477 _config: PrefetchConfig,
478 ) {
479 loop {
480 let request = {
481 let mut q = queue.lock().unwrap();
482 q.pop_front()
483 };
484
485 if let Some(req) = request {
486 let start_time = Instant::now();
487
488 Self::execute_prefetch(&req);
490
491 let latency = start_time.elapsed();
493 if let Ok(mut s) = stats.write() {
494 s.total_requests += 1;
495 s.average_latency = if s.total_requests == 1 {
496 latency
497 } else {
498 Duration::from_nanos(
499 ((s.average_latency.as_nanos() + latency.as_nanos()) / 2) as u64,
500 )
501 };
502 }
503 } else {
504 thread::sleep(Duration::from_micros(100));
506 }
507 }
508 }
509
510 fn execute_prefetch(request: &PrefetchRequest) {
512 unsafe {
515 match request.hint_type {
516 PrefetchHint::Temporal
517 | PrefetchHint::L1
518 | PrefetchHint::L2
519 | PrefetchHint::L3
520 | PrefetchHint::NonTemporal
521 | PrefetchHint::Write => {
522 let _ = std::ptr::read_volatile(request.address as *const u8);
524 }
525 }
526 }
527 }
528
529 pub fn record_access(&self, address: usize) -> Result<()> {
531 if let Ok(mut predictor) = self.predictor.lock() {
533 predictor.record_access(address);
534
535 let predictions = predictor.predict_next_accesses(self.config.distance);
537
538 if let Ok(mut queue) = self.prefetch_queue.lock() {
540 for (i, &pred_addr) in predictions.iter().enumerate() {
541 if queue.len() < self.config.max_queue_size {
542 let priority = 1.0 - (i as f64 / predictions.len() as f64);
543 let hint_type = self.determine_prefetch_hint(pred_addr, i);
544
545 queue.push_back(PrefetchRequest {
546 address: pred_addr,
547 priority,
548 hint_type,
549 timestamp: Instant::now(),
550 });
551 }
552 }
553 }
554 }
555
556 Ok(())
557 }
558
559 fn determine_prefetch_hint(&self, _address: usize, distance: usize) -> PrefetchHint {
561 match distance {
562 0..=2 => PrefetchHint::L1,
563 3..=6 => PrefetchHint::L2,
564 7..=12 => PrefetchHint::L3,
565 _ => PrefetchHint::NonTemporal,
566 }
567 }
568
569 pub fn get_stats(&self) -> PrefetchStats {
571 self.stats.read().unwrap().clone()
572 }
573
574 pub fn optimize_strategy(&mut self, performance_feedback: &PerformanceFeedback) -> Result<()> {
576 if !self.config.adaptive_adjustment {
577 return Ok(());
578 }
579
580 if performance_feedback.cache_hit_rate < 0.8 {
582 self.config.distance = (self.config.distance + 2).min(16);
583 } else if performance_feedback.cache_hit_rate > 0.95 {
584 self.config.distance = (self.config.distance.saturating_sub(1)).max(2);
585 }
586
587 if performance_feedback.bandwidth_utilization < 0.6 {
589 self.config.degree = (self.config.degree + 1).min(8);
590 } else if performance_feedback.bandwidth_utilization > 0.9 {
591 self.config.degree = (self.config.degree.saturating_sub(1)).max(1);
592 }
593
594 if self.config.strategy == PrefetchStrategy::MLGuided {
596 if let Ok(mut predictor) = self.predictor.lock() {
597 let accuracy_improvement = performance_feedback.cache_hit_rate - 0.8;
599 predictor
600 .ml_weights
601 .iter_mut()
602 .for_each(|w| *w += accuracy_improvement * 0.01);
603 }
604 }
605
606 Ok(())
607 }
608}
609
610#[derive(Debug, Clone)]
612pub struct PerformanceFeedback {
613 pub cache_hit_rate: f64,
615 pub bandwidth_utilization: f64,
617 pub memory_latency: Duration,
619 pub cpu_utilization: f64,
621}
622
623#[derive(Debug)]
625pub struct DataLocalityOptimizer {
626 strategy: LocalityStrategy,
628 numa_topology: NUMATopology,
630 memory_regions: HashMap<usize, MemoryRegionInfo>,
632 access_analyzer: AccessPatternAnalyzer,
634}
635
636#[derive(Debug, Clone)]
638pub struct MemoryRegionInfo {
639 pub start_address: usize,
641 pub size: usize,
643 pub numa_node: usize,
645 pub access_frequency: u64,
647 pub last_access: Instant,
649 pub access_pattern: AccessPatternType,
651}
652
653#[derive(Debug)]
655pub struct AccessPatternAnalyzer {
656 temporal_patterns: BTreeMap<Instant, Vec<usize>>,
658 spatial_patterns: HashMap<usize, Vec<usize>>, loop_detection: LoopDetectionState,
662}
663
664#[derive(Debug)]
666pub struct LoopDetectionState {
667 loop_starts: HashMap<usize, usize>, current_iteration: Vec<usize>,
671 detected_loops: Vec<LoopPattern>,
673}
674
675#[derive(Debug, Clone)]
677pub struct LoopPattern {
678 pub start_address: usize,
680 pub stride: isize,
682 pub iterations: usize,
684 pub confidence: f64,
686}
687
688#[derive(Debug, Clone, Copy, PartialEq, Eq)]
690pub enum AccessPatternType {
691 Sequential,
692 Random,
693 Strided,
694 Loop,
695 Temporal,
696 Hybrid,
697}
698
699impl DataLocalityOptimizer {
700 pub fn new(strategy: LocalityStrategy, numa_topology: NUMATopology) -> Self {
702 Self {
703 strategy,
704 numa_topology,
705 memory_regions: HashMap::new(),
706 access_analyzer: AccessPatternAnalyzer {
707 temporal_patterns: BTreeMap::new(),
708 spatial_patterns: HashMap::new(),
709 loop_detection: LoopDetectionState {
710 loop_starts: HashMap::new(),
711 current_iteration: Vec::new(),
712 detected_loops: Vec::new(),
713 },
714 },
715 }
716 }
717
718 pub fn optimize_data_placement(
720 &mut self,
721 state_vector: &mut OptimizedStateVector,
722 access_pattern: &[usize],
723 ) -> Result<LocalityOptimizationResult> {
724 let start_time = Instant::now();
725
726 self.analyze_access_patterns(access_pattern)?;
728
729 let optimization_result = match self.strategy {
731 LocalityStrategy::Temporal => {
732 self.optimize_temporal_locality(state_vector, access_pattern)?
733 }
734 LocalityStrategy::Spatial => {
735 self.optimize_spatial_locality(state_vector, access_pattern)?
736 }
737 LocalityStrategy::Loop => self.optimize_loop_locality(state_vector, access_pattern)?,
738 LocalityStrategy::CacheConscious => {
739 self.optimize_cache_conscious(state_vector, access_pattern)?
740 }
741 LocalityStrategy::NUMATopology => {
742 self.optimize_numa_topology(state_vector, access_pattern)?
743 }
744 LocalityStrategy::Hybrid => {
745 self.optimize_hybrid_locality(state_vector, access_pattern)?
746 }
747 };
748
749 let optimization_time = start_time.elapsed();
750
751 Ok(LocalityOptimizationResult {
752 optimization_time,
753 locality_improvement: optimization_result.locality_improvement,
754 memory_movements: optimization_result.memory_movements,
755 numa_migrations: optimization_result.numa_migrations,
756 cache_efficiency_gain: optimization_result.cache_efficiency_gain,
757 strategy_used: self.strategy,
758 })
759 }
760
761 fn analyze_access_patterns(&mut self, access_pattern: &[usize]) -> Result<()> {
763 let now = Instant::now();
764
765 self.access_analyzer
767 .temporal_patterns
768 .insert(now, access_pattern.to_vec());
769
770 for &address in access_pattern {
772 let page = address / 4096; self.access_analyzer
774 .spatial_patterns
775 .entry(page)
776 .or_insert_with(Vec::new)
777 .push(address);
778 }
779
780 self.detect_loop_patterns(access_pattern)?;
782
783 while self.access_analyzer.temporal_patterns.len() > 1000 {
785 self.access_analyzer.temporal_patterns.pop_first();
786 }
787
788 Ok(())
789 }
790
791 fn detect_loop_patterns(&mut self, access_pattern: &[usize]) -> Result<()> {
793 if access_pattern.len() < 3 {
794 return Ok(());
795 }
796
797 for window in access_pattern.windows(3) {
799 if let [start, middle, end] = window {
800 let stride1 = *middle as isize - *start as isize;
801 let stride2 = *end as isize - *middle as isize;
802
803 if stride1 == stride2 && stride1 != 0 {
804 *self
806 .access_analyzer
807 .loop_detection
808 .loop_starts
809 .entry(*start)
810 .or_insert(0) += 1;
811
812 if self.access_analyzer.loop_detection.loop_starts[start] >= 3 {
814 let confidence =
815 self.access_analyzer.loop_detection.loop_starts[start] as f64 / 10.0;
816 let confidence = confidence.min(1.0);
817
818 self.access_analyzer
819 .loop_detection
820 .detected_loops
821 .push(LoopPattern {
822 start_address: *start,
823 stride: stride1,
824 iterations: self.access_analyzer.loop_detection.loop_starts[start],
825 confidence,
826 });
827 }
828 }
829 }
830 }
831
832 Ok(())
833 }
834
835 fn optimize_temporal_locality(
837 &self,
838 _state_vector: &mut OptimizedStateVector,
839 access_pattern: &[usize],
840 ) -> Result<OptimizationResult> {
841 let mut reuse_distances = HashMap::new();
843 let mut last_access = HashMap::new();
844
845 for (i, &address) in access_pattern.iter().enumerate() {
846 if let Some(&last_pos) = last_access.get(&address) {
847 let reuse_distance = i - last_pos;
848 reuse_distances.insert(address, reuse_distance);
849 }
850 last_access.insert(address, i);
851 }
852
853 let avg_reuse_distance: f64 = reuse_distances.values().map(|&d| d as f64).sum::<f64>()
855 / reuse_distances.len().max(1) as f64;
856
857 let locality_improvement = (100.0 / (avg_reuse_distance + 1.0)).min(1.0);
858
859 Ok(OptimizationResult {
860 locality_improvement,
861 memory_movements: 0,
862 numa_migrations: 0,
863 cache_efficiency_gain: locality_improvement * 0.5,
864 })
865 }
866
867 fn optimize_spatial_locality(
869 &self,
870 _state_vector: &mut OptimizedStateVector,
871 access_pattern: &[usize],
872 ) -> Result<OptimizationResult> {
873 let mut spatial_clusters = HashMap::new();
875
876 for &address in access_pattern {
877 let cache_line = address / 64; *spatial_clusters.entry(cache_line).or_insert(0) += 1;
879 }
880
881 let total_accesses = access_pattern.len();
883 let unique_cache_lines = spatial_clusters.len();
884
885 let spatial_efficiency = if unique_cache_lines > 0 {
886 total_accesses as f64 / unique_cache_lines as f64
887 } else {
888 1.0
889 };
890
891 let locality_improvement = (spatial_efficiency / 10.0).min(1.0);
892
893 Ok(OptimizationResult {
894 locality_improvement,
895 memory_movements: spatial_clusters.len(),
896 numa_migrations: 0,
897 cache_efficiency_gain: locality_improvement * 0.7,
898 })
899 }
900
901 fn optimize_loop_locality(
903 &self,
904 _state_vector: &mut OptimizedStateVector,
905 _access_pattern: &[usize],
906 ) -> Result<OptimizationResult> {
907 let total_loops = self.access_analyzer.loop_detection.detected_loops.len();
909 let high_confidence_loops = self
910 .access_analyzer
911 .loop_detection
912 .detected_loops
913 .iter()
914 .filter(|loop_pattern| loop_pattern.confidence > 0.8)
915 .count();
916
917 let loop_efficiency = if total_loops > 0 {
918 high_confidence_loops as f64 / total_loops as f64
919 } else {
920 0.5
921 };
922
923 Ok(OptimizationResult {
924 locality_improvement: loop_efficiency,
925 memory_movements: total_loops,
926 numa_migrations: 0,
927 cache_efficiency_gain: loop_efficiency * 0.8,
928 })
929 }
930
931 fn optimize_cache_conscious(
933 &self,
934 _state_vector: &mut OptimizedStateVector,
935 access_pattern: &[usize],
936 ) -> Result<OptimizationResult> {
937 let cache_size = 256 * 1024; let cache_line_size = 64;
940 let cache_lines = cache_size / cache_line_size;
941
942 let mut cache_hits = 0;
943 let mut cache_misses = 0;
944 let mut cache_state = HashMap::new();
945
946 for &address in access_pattern {
947 let cache_line = address / cache_line_size;
948 let cache_set = cache_line % cache_lines;
949
950 if cache_state.contains_key(&cache_set) {
951 cache_hits += 1;
952 } else {
953 cache_misses += 1;
954 cache_state.insert(cache_set, cache_line);
955 }
956 }
957
958 let cache_hit_rate = if cache_hits + cache_misses > 0 {
959 cache_hits as f64 / (cache_hits + cache_misses) as f64
960 } else {
961 0.0
962 };
963
964 Ok(OptimizationResult {
965 locality_improvement: cache_hit_rate,
966 memory_movements: cache_misses,
967 numa_migrations: 0,
968 cache_efficiency_gain: cache_hit_rate,
969 })
970 }
971
972 fn optimize_numa_topology(
974 &self,
975 _state_vector: &mut OptimizedStateVector,
976 access_pattern: &[usize],
977 ) -> Result<OptimizationResult> {
978 let mut numa_accesses = HashMap::new();
980
981 for &address in access_pattern {
982 let numa_node = (address / (1024 * 1024 * 1024)) % self.numa_topology.num_nodes; *numa_accesses.entry(numa_node).or_insert(0) += 1;
985 }
986
987 let dominant_node = numa_accesses.iter().max_by_key(|(_, &count)| count);
989 let numa_efficiency = if let Some((_, &dominant_count)) = dominant_node {
990 dominant_count as f64 / access_pattern.len() as f64
991 } else {
992 0.0
993 };
994
995 let numa_migrations = numa_accesses.len().saturating_sub(1);
996
997 Ok(OptimizationResult {
998 locality_improvement: numa_efficiency,
999 memory_movements: 0,
1000 numa_migrations,
1001 cache_efficiency_gain: numa_efficiency * 0.6,
1002 })
1003 }
1004
1005 fn optimize_hybrid_locality(
1007 &self,
1008 state_vector: &mut OptimizedStateVector,
1009 access_pattern: &[usize],
1010 ) -> Result<OptimizationResult> {
1011 let temporal = self.optimize_temporal_locality(state_vector, access_pattern)?;
1013 let spatial = self.optimize_spatial_locality(state_vector, access_pattern)?;
1014 let numa = self.optimize_numa_topology(state_vector, access_pattern)?;
1015
1016 let locality_improvement = temporal.locality_improvement * 0.4
1018 + spatial.locality_improvement * 0.4
1019 + numa.locality_improvement * 0.2;
1020
1021 Ok(OptimizationResult {
1022 locality_improvement,
1023 memory_movements: temporal.memory_movements + spatial.memory_movements,
1024 numa_migrations: numa.numa_migrations,
1025 cache_efficiency_gain: temporal
1026 .cache_efficiency_gain
1027 .max(spatial.cache_efficiency_gain),
1028 })
1029 }
1030
1031 pub fn get_detected_loops(&self) -> &[LoopPattern] {
1033 &self.access_analyzer.loop_detection.detected_loops
1034 }
1035}
1036
1037#[derive(Debug, Clone)]
1039pub struct OptimizationResult {
1040 pub locality_improvement: f64,
1042 pub memory_movements: usize,
1044 pub numa_migrations: usize,
1046 pub cache_efficiency_gain: f64,
1048}
1049
1050#[derive(Debug, Clone)]
1052pub struct LocalityOptimizationResult {
1053 pub optimization_time: Duration,
1055 pub locality_improvement: f64,
1057 pub memory_movements: usize,
1059 pub numa_migrations: usize,
1061 pub cache_efficiency_gain: f64,
1063 pub strategy_used: LocalityStrategy,
1065}
1066
1067#[cfg(test)]
1068mod tests {
1069 use super::*;
1070 use crate::memory_bandwidth_optimization::{MemoryOptimizationConfig, OptimizedStateVector};
1071
1072 #[test]
1073 fn test_access_pattern_predictor() {
1074 let mut predictor = AccessPatternPredictor::default();
1075
1076 for i in 0..10 {
1078 predictor.record_access(i * 64);
1079 }
1080
1081 let predictions = predictor.predict_next_accesses(5);
1082 assert_eq!(predictions.len(), 5);
1083
1084 for (i, &pred) in predictions.iter().enumerate() {
1086 assert_eq!(pred, (10 + i) * 64);
1087 }
1088 }
1089
1090 #[test]
1091 fn test_memory_prefetcher_creation() {
1092 let config = PrefetchConfig::default();
1093 let numa = NUMATopology::default();
1094
1095 let prefetcher = MemoryPrefetcher::new(config, numa).unwrap();
1096 assert_eq!(prefetcher.config.strategy, PrefetchStrategy::Adaptive);
1097 }
1098
1099 #[test]
1100 fn test_prefetch_request() {
1101 let request = PrefetchRequest {
1102 address: 0x1000,
1103 priority: 0.8,
1104 hint_type: PrefetchHint::L1,
1105 timestamp: Instant::now(),
1106 };
1107
1108 assert_eq!(request.address, 0x1000);
1109 assert_eq!(request.priority, 0.8);
1110 assert_eq!(request.hint_type, PrefetchHint::L1);
1111 }
1112
1113 #[test]
1114 fn test_data_locality_optimizer() {
1115 let numa = NUMATopology::default();
1116 let optimizer = DataLocalityOptimizer::new(LocalityStrategy::Spatial, numa);
1117
1118 assert!(matches!(optimizer.strategy, LocalityStrategy::Spatial));
1119 }
1120
1121 #[test]
1122 fn test_loop_pattern_detection() {
1123 let mut optimizer =
1124 DataLocalityOptimizer::new(LocalityStrategy::Loop, NUMATopology::default());
1125
1126 let access_pattern = vec![100, 200, 300, 400, 500, 600]; optimizer.detect_loop_patterns(&access_pattern).unwrap();
1130
1131 assert!(!optimizer
1133 .access_analyzer
1134 .loop_detection
1135 .loop_starts
1136 .is_empty());
1137 }
1138
1139 #[test]
1140 fn test_spatial_locality_optimization() {
1141 let numa = NUMATopology::default();
1142 let optimizer = DataLocalityOptimizer::new(LocalityStrategy::Spatial, numa);
1143
1144 let access_pattern = vec![0, 8, 16, 24, 32, 40]; let config = MemoryOptimizationConfig::default();
1148 let mut state_vector = OptimizedStateVector::new(3, config).unwrap();
1149
1150 let result = optimizer
1151 .optimize_spatial_locality(&mut state_vector, &access_pattern)
1152 .unwrap();
1153
1154 assert!(result.locality_improvement > 0.0);
1155 assert!(result.cache_efficiency_gain >= 0.0);
1156 }
1157
1158 #[test]
1159 fn test_numa_topology_default() {
1160 let numa = NUMATopology::default();
1161
1162 assert_eq!(numa.num_nodes, 1);
1163 assert_eq!(numa.cores_per_node.len(), 1);
1164 assert_eq!(numa.memory_per_node.len(), 1);
1165 }
1166
1167 #[test]
1168 fn test_prefetch_hint_determination() {
1169 let config = PrefetchConfig::default();
1170 let numa = NUMATopology::default();
1171 let prefetcher = MemoryPrefetcher::new(config, numa).unwrap();
1172
1173 assert_eq!(
1174 prefetcher.determine_prefetch_hint(0x1000, 0),
1175 PrefetchHint::L1
1176 );
1177 assert_eq!(
1178 prefetcher.determine_prefetch_hint(0x1000, 5),
1179 PrefetchHint::L2
1180 );
1181 assert_eq!(
1182 prefetcher.determine_prefetch_hint(0x1000, 10),
1183 PrefetchHint::L3
1184 );
1185 assert_eq!(
1186 prefetcher.determine_prefetch_hint(0x1000, 15),
1187 PrefetchHint::NonTemporal
1188 );
1189 }
1190
1191 #[test]
1192 fn test_ml_prediction() {
1193 let mut predictor = AccessPatternPredictor::default();
1194
1195 for i in 0..20 {
1197 predictor.record_access(i * 8);
1198 }
1199
1200 let features = predictor.extract_features();
1201 assert_eq!(features.len(), 16);
1202
1203 let prediction = predictor.ml_predict(&features, 0);
1204 assert!(prediction.is_finite());
1205 }
1206
1207 #[test]
1208 fn test_performance_feedback() {
1209 let feedback = PerformanceFeedback {
1210 cache_hit_rate: 0.85,
1211 bandwidth_utilization: 0.7,
1212 memory_latency: Duration::from_nanos(100),
1213 cpu_utilization: 0.6,
1214 };
1215
1216 assert_eq!(feedback.cache_hit_rate, 0.85);
1217 assert_eq!(feedback.bandwidth_utilization, 0.7);
1218 }
1219}