1use scirs2_core::parallel_ops::*;
8use std::collections::{BTreeMap, HashMap, VecDeque};
9use std::sync::{Arc, Mutex, RwLock};
10use std::thread;
11use std::time::{Duration, Instant};
12
13use crate::error::Result;
14use crate::memory_bandwidth_optimization::OptimizedStateVector;
15
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub enum PrefetchStrategy {
19 None,
21 Sequential,
23 Stride,
25 Pattern,
27 MLGuided,
29 Adaptive,
31 NUMAAware,
33}
34
35#[derive(Debug, Clone, Copy, PartialEq, Eq)]
37pub enum LocalityStrategy {
38 Temporal,
40 Spatial,
42 Loop,
44 CacheConscious,
46 NUMATopology,
48 Hybrid,
50}
51
52#[derive(Debug, Clone)]
54pub struct NUMATopology {
55 pub num_nodes: usize,
57 pub memory_per_node: Vec<usize>,
59 pub cores_per_node: Vec<usize>,
61 pub latency_matrix: Vec<Vec<usize>>,
63 pub bandwidth_per_node: Vec<f64>,
65 pub thread_node_mapping: HashMap<usize, usize>,
67}
68
69impl Default for NUMATopology {
70 fn default() -> Self {
71 Self {
73 num_nodes: 1,
74 memory_per_node: vec![64 * 1024 * 1024 * 1024], cores_per_node: vec![8],
76 latency_matrix: vec![vec![0]],
77 bandwidth_per_node: vec![100.0 * 1024.0 * 1024.0 * 1024.0], thread_node_mapping: HashMap::new(),
79 }
80 }
81}
82
83#[derive(Debug, Clone)]
85pub struct PrefetchConfig {
86 pub strategy: PrefetchStrategy,
88 pub distance: usize,
90 pub degree: usize,
92 pub hardware_hints: bool,
94 pub threshold: f64,
96 pub max_queue_size: usize,
98 pub cross_page_prefetch: bool,
100 pub adaptive_adjustment: bool,
102}
103
104impl Default for PrefetchConfig {
105 fn default() -> Self {
106 Self {
107 strategy: PrefetchStrategy::Adaptive,
108 distance: 8,
109 degree: 4,
110 hardware_hints: true,
111 threshold: 0.7,
112 max_queue_size: 64,
113 cross_page_prefetch: true,
114 adaptive_adjustment: true,
115 }
116 }
117}
118
119#[derive(Debug)]
121pub struct AccessPatternPredictor {
122 access_history: VecDeque<usize>,
124 stride_patterns: HashMap<isize, u64>,
126 pattern_confidence: HashMap<String, f64>,
128 ml_weights: Vec<f64>,
130 prediction_cache: HashMap<usize, Vec<usize>>,
132 correct_predictions: u64,
134 total_predictions: u64,
135}
136
137impl Default for AccessPatternPredictor {
138 fn default() -> Self {
139 Self {
140 access_history: VecDeque::with_capacity(1000),
141 stride_patterns: HashMap::new(),
142 pattern_confidence: HashMap::new(),
143 ml_weights: vec![0.5; 16], prediction_cache: HashMap::new(),
145 correct_predictions: 0,
146 total_predictions: 0,
147 }
148 }
149}
150
151impl AccessPatternPredictor {
152 pub fn record_access(&mut self, address: usize) {
154 self.access_history.push_back(address);
155
156 if self.access_history.len() > 1000 {
158 self.access_history.pop_front();
159 }
160
161 if self.access_history.len() >= 2 {
163 let prev_addr = self.access_history[self.access_history.len() - 2];
164 let stride = address as isize - prev_addr as isize;
165 *self.stride_patterns.entry(stride).or_insert(0) += 1;
166 }
167
168 self.update_pattern_confidence();
170 }
171
172 pub fn predict_next_accesses(&mut self, count: usize) -> Vec<usize> {
174 if self.access_history.is_empty() {
175 return Vec::new();
176 }
177
178 let current_addr = *self.access_history.back().unwrap();
179
180 if let Some(cached) = self.prediction_cache.get(¤t_addr) {
182 return cached.clone();
183 }
184
185 let predictions = match self.get_dominant_pattern() {
186 PredictedPattern::Stride(stride) => {
187 self.predict_stride_pattern(current_addr, stride, count)
188 }
189 PredictedPattern::Sequential => self.predict_sequential_pattern(current_addr, count),
190 PredictedPattern::Random => self.predict_random_pattern(current_addr, count),
191 PredictedPattern::MLGuided => self.predict_ml_pattern(current_addr, count),
192 };
193
194 self.prediction_cache
196 .insert(current_addr, predictions.clone());
197
198 if self.prediction_cache.len() > 1000 {
200 self.prediction_cache.clear();
201 }
202
203 self.total_predictions += 1;
204 predictions
205 }
206
207 fn update_pattern_confidence(&mut self) {
209 if self.total_predictions > 0 {
211 let accuracy = self.correct_predictions as f64 / self.total_predictions as f64;
212
213 self.pattern_confidence
214 .insert("stride".to_string(), accuracy);
215 self.pattern_confidence
216 .insert("sequential".to_string(), accuracy * 0.9);
217 self.pattern_confidence
218 .insert("ml".to_string(), accuracy * 1.1);
219 }
220 }
221
222 fn get_dominant_pattern(&self) -> PredictedPattern {
224 let dominant_stride = self
226 .stride_patterns
227 .iter()
228 .max_by_key(|(_, &count)| count)
229 .map(|(&stride, _)| stride);
230
231 match dominant_stride {
232 Some(stride) if stride == 1 => PredictedPattern::Sequential,
233 Some(stride) if stride != 0 => PredictedPattern::Stride(stride),
234 _ => {
235 let ml_confidence = self.pattern_confidence.get("ml").unwrap_or(&0.0);
237 if *ml_confidence > 0.8 {
238 PredictedPattern::MLGuided
239 } else {
240 PredictedPattern::Random
241 }
242 }
243 }
244 }
245
246 fn predict_stride_pattern(
248 &self,
249 current_addr: usize,
250 stride: isize,
251 count: usize,
252 ) -> Vec<usize> {
253 let mut predictions = Vec::with_capacity(count);
254 let mut addr = current_addr;
255
256 for _ in 0..count {
257 addr = (addr as isize + stride) as usize;
258 predictions.push(addr);
259 }
260
261 predictions
262 }
263
264 fn predict_sequential_pattern(&self, current_addr: usize, count: usize) -> Vec<usize> {
266 (1..=count).map(|i| current_addr + i).collect()
267 }
268
269 fn predict_random_pattern(&self, current_addr: usize, count: usize) -> Vec<usize> {
271 (1..=count).map(|i| current_addr + i * 64).collect() }
274
275 fn predict_ml_pattern(&self, current_addr: usize, count: usize) -> Vec<usize> {
277 let mut predictions = Vec::with_capacity(count);
278
279 let features = self.extract_features();
281
282 for i in 0..count {
284 let prediction = self.ml_predict(&features, i);
285 predictions.push((current_addr as f64 + prediction) as usize);
286 }
287
288 predictions
289 }
290
291 fn extract_features(&self) -> Vec<f64> {
293 let mut features = [0.0; 16];
294
295 if self.access_history.len() >= 4 {
296 let recent: Vec<_> = self.access_history.iter().rev().take(4).collect();
297
298 for i in 0..3 {
300 if i + 1 < recent.len() {
301 let stride = *recent[i] as f64 - *recent[i + 1] as f64;
302 features[i] = stride / 1000.0; }
304 }
305
306 features[3] = (*recent[0] % 1024) as f64 / 1024.0; features[4] = (*recent[0] / 1024) as f64; let dominant_stride = self
312 .stride_patterns
313 .iter()
314 .max_by_key(|(_, &count)| count)
315 .map_or(0, |(&stride, _)| stride);
316 features[5] = dominant_stride as f64 / 1000.0;
317 }
318
319 features.to_vec()
320 }
321
322 fn ml_predict(&self, features: &[f64], step: usize) -> f64 {
324 let mut prediction = 0.0;
325
326 for (i, &feature) in features.iter().enumerate() {
327 if i < self.ml_weights.len() {
328 prediction += feature * self.ml_weights[i];
329 }
330 }
331
332 prediction * (step + 1) as f64
333 }
334
335 pub fn update_ml_weights(&mut self, predictions: &[usize], actual: &[usize]) {
337 if predictions.len() != actual.len() || predictions.is_empty() {
338 return;
339 }
340
341 let learning_rate = 0.01;
343
344 for (pred, &act) in predictions.iter().zip(actual.iter()) {
345 let error = act as f64 - *pred as f64;
346
347 for weight in &mut self.ml_weights {
349 *weight += learning_rate * error * 0.1; }
351 }
352 }
353
354 pub fn get_accuracy(&self) -> f64 {
356 if self.total_predictions > 0 {
357 self.correct_predictions as f64 / self.total_predictions as f64
358 } else {
359 0.0
360 }
361 }
362}
363
364#[derive(Debug, Clone)]
366enum PredictedPattern {
367 Stride(isize),
368 Sequential,
369 Random,
370 MLGuided,
371}
372
373#[derive(Debug)]
375pub struct MemoryPrefetcher {
376 config: PrefetchConfig,
378 predictor: Arc<Mutex<AccessPatternPredictor>>,
380 prefetch_queue: Arc<Mutex<VecDeque<PrefetchRequest>>>,
382 numa_topology: NUMATopology,
384 stats: Arc<RwLock<PrefetchStats>>,
386 prefetch_threads: Vec<thread::JoinHandle<()>>,
388}
389
390#[derive(Debug, Clone)]
392pub struct PrefetchRequest {
393 pub address: usize,
395 pub priority: f64,
397 pub hint_type: PrefetchHint,
399 pub timestamp: Instant,
401}
402
403#[derive(Debug, Clone, Copy, PartialEq, Eq)]
405pub enum PrefetchHint {
406 Temporal,
408 NonTemporal,
410 L1,
412 L2,
414 L3,
416 Write,
418}
419
420#[derive(Debug, Clone, Default)]
422pub struct PrefetchStats {
423 pub total_requests: u64,
425 pub successful_prefetches: u64,
427 pub failed_prefetches: u64,
429 pub average_latency: Duration,
431 pub bandwidth_utilization: f64,
433 pub cache_hit_improvement: f64,
435}
436
437impl MemoryPrefetcher {
438 pub fn new(config: PrefetchConfig, numa_topology: NUMATopology) -> Result<Self> {
440 let prefetcher = Self {
441 config,
442 predictor: Arc::new(Mutex::new(AccessPatternPredictor::default())),
443 prefetch_queue: Arc::new(Mutex::new(VecDeque::new())),
444 numa_topology,
445 stats: Arc::new(RwLock::new(PrefetchStats::default())),
446 prefetch_threads: Vec::new(),
447 };
448
449 Ok(prefetcher)
450 }
451
452 pub fn start_prefetch_threads(&mut self) -> Result<()> {
454 let num_threads = self.config.degree.min(4); for thread_id in 0..num_threads {
457 let queue = Arc::clone(&self.prefetch_queue);
458 let stats = Arc::clone(&self.stats);
459 let config = self.config.clone();
460
461 let handle = thread::spawn(move || {
462 Self::prefetch_worker_thread(thread_id, queue, stats, config);
463 });
464
465 self.prefetch_threads.push(handle);
466 }
467
468 Ok(())
469 }
470
471 fn prefetch_worker_thread(
473 _thread_id: usize,
474 queue: Arc<Mutex<VecDeque<PrefetchRequest>>>,
475 stats: Arc<RwLock<PrefetchStats>>,
476 _config: PrefetchConfig,
477 ) {
478 loop {
479 let request = {
480 let mut q = queue.lock().unwrap();
481 q.pop_front()
482 };
483
484 if let Some(req) = request {
485 let start_time = Instant::now();
486
487 Self::execute_prefetch(&req);
489
490 let latency = start_time.elapsed();
492 if let Ok(mut s) = stats.write() {
493 s.total_requests += 1;
494 s.average_latency = if s.total_requests == 1 {
495 latency
496 } else {
497 Duration::from_nanos(u128::midpoint(
498 s.average_latency.as_nanos(),
499 latency.as_nanos(),
500 ) as u64)
501 };
502 }
503 } else {
504 thread::sleep(Duration::from_micros(100));
506 }
507 }
508 }
509
510 fn execute_prefetch(request: &PrefetchRequest) {
512 unsafe {
515 match request.hint_type {
516 PrefetchHint::Temporal
517 | PrefetchHint::L1
518 | PrefetchHint::L2
519 | PrefetchHint::L3
520 | PrefetchHint::NonTemporal
521 | PrefetchHint::Write => {
522 let _ = std::ptr::read_volatile(request.address as *const u8);
524 }
525 }
526 }
527 }
528
529 pub fn record_access(&self, address: usize) -> Result<()> {
531 if let Ok(mut predictor) = self.predictor.lock() {
533 predictor.record_access(address);
534
535 let predictions = predictor.predict_next_accesses(self.config.distance);
537
538 if let Ok(mut queue) = self.prefetch_queue.lock() {
540 for (i, &pred_addr) in predictions.iter().enumerate() {
541 if queue.len() < self.config.max_queue_size {
542 let priority = 1.0 - (i as f64 / predictions.len() as f64);
543 let hint_type = self.determine_prefetch_hint(pred_addr, i);
544
545 queue.push_back(PrefetchRequest {
546 address: pred_addr,
547 priority,
548 hint_type,
549 timestamp: Instant::now(),
550 });
551 }
552 }
553 }
554 }
555
556 Ok(())
557 }
558
559 const fn determine_prefetch_hint(&self, _address: usize, distance: usize) -> PrefetchHint {
561 match distance {
562 0..=2 => PrefetchHint::L1,
563 3..=6 => PrefetchHint::L2,
564 7..=12 => PrefetchHint::L3,
565 _ => PrefetchHint::NonTemporal,
566 }
567 }
568
569 pub fn get_stats(&self) -> PrefetchStats {
571 self.stats.read().unwrap().clone()
572 }
573
574 pub fn optimize_strategy(&mut self, performance_feedback: &PerformanceFeedback) -> Result<()> {
576 if !self.config.adaptive_adjustment {
577 return Ok(());
578 }
579
580 if performance_feedback.cache_hit_rate < 0.8 {
582 self.config.distance = (self.config.distance + 2).min(16);
583 } else if performance_feedback.cache_hit_rate > 0.95 {
584 self.config.distance = (self.config.distance.saturating_sub(1)).max(2);
585 }
586
587 if performance_feedback.bandwidth_utilization < 0.6 {
589 self.config.degree = (self.config.degree + 1).min(8);
590 } else if performance_feedback.bandwidth_utilization > 0.9 {
591 self.config.degree = (self.config.degree.saturating_sub(1)).max(1);
592 }
593
594 if self.config.strategy == PrefetchStrategy::MLGuided {
596 if let Ok(mut predictor) = self.predictor.lock() {
597 let accuracy_improvement = performance_feedback.cache_hit_rate - 0.8;
599 predictor
600 .ml_weights
601 .iter_mut()
602 .for_each(|w| *w += accuracy_improvement * 0.01);
603 }
604 }
605
606 Ok(())
607 }
608}
609
610#[derive(Debug, Clone)]
612pub struct PerformanceFeedback {
613 pub cache_hit_rate: f64,
615 pub bandwidth_utilization: f64,
617 pub memory_latency: Duration,
619 pub cpu_utilization: f64,
621}
622
623#[derive(Debug)]
625pub struct DataLocalityOptimizer {
626 strategy: LocalityStrategy,
628 numa_topology: NUMATopology,
630 memory_regions: HashMap<usize, MemoryRegionInfo>,
632 access_analyzer: AccessPatternAnalyzer,
634}
635
636#[derive(Debug, Clone)]
638pub struct MemoryRegionInfo {
639 pub start_address: usize,
641 pub size: usize,
643 pub numa_node: usize,
645 pub access_frequency: u64,
647 pub last_access: Instant,
649 pub access_pattern: AccessPatternType,
651}
652
653#[derive(Debug)]
655pub struct AccessPatternAnalyzer {
656 temporal_patterns: BTreeMap<Instant, Vec<usize>>,
658 spatial_patterns: HashMap<usize, Vec<usize>>, loop_detection: LoopDetectionState,
662}
663
664#[derive(Debug)]
666pub struct LoopDetectionState {
667 loop_starts: HashMap<usize, usize>, current_iteration: Vec<usize>,
671 detected_loops: Vec<LoopPattern>,
673}
674
675#[derive(Debug, Clone)]
677pub struct LoopPattern {
678 pub start_address: usize,
680 pub stride: isize,
682 pub iterations: usize,
684 pub confidence: f64,
686}
687
688#[derive(Debug, Clone, Copy, PartialEq, Eq)]
690pub enum AccessPatternType {
691 Sequential,
692 Random,
693 Strided,
694 Loop,
695 Temporal,
696 Hybrid,
697}
698
699impl DataLocalityOptimizer {
700 pub fn new(strategy: LocalityStrategy, numa_topology: NUMATopology) -> Self {
702 Self {
703 strategy,
704 numa_topology,
705 memory_regions: HashMap::new(),
706 access_analyzer: AccessPatternAnalyzer {
707 temporal_patterns: BTreeMap::new(),
708 spatial_patterns: HashMap::new(),
709 loop_detection: LoopDetectionState {
710 loop_starts: HashMap::new(),
711 current_iteration: Vec::new(),
712 detected_loops: Vec::new(),
713 },
714 },
715 }
716 }
717
718 pub fn optimize_data_placement(
720 &mut self,
721 state_vector: &mut OptimizedStateVector,
722 access_pattern: &[usize],
723 ) -> Result<LocalityOptimizationResult> {
724 let start_time = Instant::now();
725
726 self.analyze_access_patterns(access_pattern)?;
728
729 let optimization_result = match self.strategy {
731 LocalityStrategy::Temporal => {
732 self.optimize_temporal_locality(state_vector, access_pattern)?
733 }
734 LocalityStrategy::Spatial => {
735 self.optimize_spatial_locality(state_vector, access_pattern)?
736 }
737 LocalityStrategy::Loop => self.optimize_loop_locality(state_vector, access_pattern)?,
738 LocalityStrategy::CacheConscious => {
739 self.optimize_cache_conscious(state_vector, access_pattern)?
740 }
741 LocalityStrategy::NUMATopology => {
742 self.optimize_numa_topology(state_vector, access_pattern)?
743 }
744 LocalityStrategy::Hybrid => {
745 self.optimize_hybrid_locality(state_vector, access_pattern)?
746 }
747 };
748
749 let optimization_time = start_time.elapsed();
750
751 Ok(LocalityOptimizationResult {
752 optimization_time,
753 locality_improvement: optimization_result.locality_improvement,
754 memory_movements: optimization_result.memory_movements,
755 numa_migrations: optimization_result.numa_migrations,
756 cache_efficiency_gain: optimization_result.cache_efficiency_gain,
757 strategy_used: self.strategy,
758 })
759 }
760
761 fn analyze_access_patterns(&mut self, access_pattern: &[usize]) -> Result<()> {
763 let now = Instant::now();
764
765 self.access_analyzer
767 .temporal_patterns
768 .insert(now, access_pattern.to_vec());
769
770 for &address in access_pattern {
772 let page = address / 4096; self.access_analyzer
774 .spatial_patterns
775 .entry(page)
776 .or_default()
777 .push(address);
778 }
779
780 self.detect_loop_patterns(access_pattern)?;
782
783 while self.access_analyzer.temporal_patterns.len() > 1000 {
785 self.access_analyzer.temporal_patterns.pop_first();
786 }
787
788 Ok(())
789 }
790
791 fn detect_loop_patterns(&mut self, access_pattern: &[usize]) -> Result<()> {
793 if access_pattern.len() < 3 {
794 return Ok(());
795 }
796
797 for window in access_pattern.windows(3) {
799 if let [start, middle, end] = window {
800 let stride1 = *middle as isize - *start as isize;
801 let stride2 = *end as isize - *middle as isize;
802
803 if stride1 == stride2 && stride1 != 0 {
804 *self
806 .access_analyzer
807 .loop_detection
808 .loop_starts
809 .entry(*start)
810 .or_insert(0) += 1;
811
812 if self.access_analyzer.loop_detection.loop_starts[start] >= 3 {
814 let confidence =
815 self.access_analyzer.loop_detection.loop_starts[start] as f64 / 10.0;
816 let confidence = confidence.min(1.0);
817
818 self.access_analyzer
819 .loop_detection
820 .detected_loops
821 .push(LoopPattern {
822 start_address: *start,
823 stride: stride1,
824 iterations: self.access_analyzer.loop_detection.loop_starts[start],
825 confidence,
826 });
827 }
828 }
829 }
830 }
831
832 Ok(())
833 }
834
835 fn optimize_temporal_locality(
837 &self,
838 _state_vector: &mut OptimizedStateVector,
839 access_pattern: &[usize],
840 ) -> Result<OptimizationResult> {
841 let mut reuse_distances = HashMap::new();
843 let mut last_access = HashMap::new();
844
845 for (i, &address) in access_pattern.iter().enumerate() {
846 if let Some(&last_pos) = last_access.get(&address) {
847 let reuse_distance = i - last_pos;
848 reuse_distances.insert(address, reuse_distance);
849 }
850 last_access.insert(address, i);
851 }
852
853 let avg_reuse_distance: f64 = reuse_distances.values().map(|&d| d as f64).sum::<f64>()
855 / reuse_distances.len().max(1) as f64;
856
857 let locality_improvement = (100.0 / (avg_reuse_distance + 1.0)).min(1.0);
858
859 Ok(OptimizationResult {
860 locality_improvement,
861 memory_movements: 0,
862 numa_migrations: 0,
863 cache_efficiency_gain: locality_improvement * 0.5,
864 })
865 }
866
867 fn optimize_spatial_locality(
869 &self,
870 _state_vector: &mut OptimizedStateVector,
871 access_pattern: &[usize],
872 ) -> Result<OptimizationResult> {
873 let mut spatial_clusters = HashMap::new();
875
876 for &address in access_pattern {
877 let cache_line = address / 64; *spatial_clusters.entry(cache_line).or_insert(0) += 1;
879 }
880
881 let total_accesses = access_pattern.len();
883 let unique_cache_lines = spatial_clusters.len();
884
885 let spatial_efficiency = if unique_cache_lines > 0 {
886 total_accesses as f64 / unique_cache_lines as f64
887 } else {
888 1.0
889 };
890
891 let locality_improvement = (spatial_efficiency / 10.0).min(1.0);
892
893 Ok(OptimizationResult {
894 locality_improvement,
895 memory_movements: spatial_clusters.len(),
896 numa_migrations: 0,
897 cache_efficiency_gain: locality_improvement * 0.7,
898 })
899 }
900
901 fn optimize_loop_locality(
903 &self,
904 _state_vector: &mut OptimizedStateVector,
905 _access_pattern: &[usize],
906 ) -> Result<OptimizationResult> {
907 let total_loops = self.access_analyzer.loop_detection.detected_loops.len();
909 let high_confidence_loops = self
910 .access_analyzer
911 .loop_detection
912 .detected_loops
913 .iter()
914 .filter(|loop_pattern| loop_pattern.confidence > 0.8)
915 .count();
916
917 let loop_efficiency = if total_loops > 0 {
918 high_confidence_loops as f64 / total_loops as f64
919 } else {
920 0.5
921 };
922
923 Ok(OptimizationResult {
924 locality_improvement: loop_efficiency,
925 memory_movements: total_loops,
926 numa_migrations: 0,
927 cache_efficiency_gain: loop_efficiency * 0.8,
928 })
929 }
930
931 fn optimize_cache_conscious(
933 &self,
934 _state_vector: &mut OptimizedStateVector,
935 access_pattern: &[usize],
936 ) -> Result<OptimizationResult> {
937 let cache_size = 256 * 1024; let cache_line_size = 64;
940 let cache_lines = cache_size / cache_line_size;
941
942 let mut cache_hits = 0;
943 let mut cache_misses = 0;
944 let mut cache_state = HashMap::new();
945
946 for &address in access_pattern {
947 let cache_line = address / cache_line_size;
948 let cache_set = cache_line % cache_lines;
949
950 if let std::collections::hash_map::Entry::Vacant(e) = cache_state.entry(cache_set) {
951 cache_misses += 1;
952 e.insert(cache_line);
953 } else {
954 cache_hits += 1;
955 }
956 }
957
958 let cache_hit_rate = if cache_hits + cache_misses > 0 {
959 cache_hits as f64 / (cache_hits + cache_misses) as f64
960 } else {
961 0.0
962 };
963
964 Ok(OptimizationResult {
965 locality_improvement: cache_hit_rate,
966 memory_movements: cache_misses,
967 numa_migrations: 0,
968 cache_efficiency_gain: cache_hit_rate,
969 })
970 }
971
972 fn optimize_numa_topology(
974 &self,
975 _state_vector: &mut OptimizedStateVector,
976 access_pattern: &[usize],
977 ) -> Result<OptimizationResult> {
978 let mut numa_accesses = HashMap::new();
980
981 for &address in access_pattern {
982 let numa_node = (address / (1024 * 1024 * 1024)) % self.numa_topology.num_nodes; *numa_accesses.entry(numa_node).or_insert(0) += 1;
985 }
986
987 let dominant_node = numa_accesses.iter().max_by_key(|(_, &count)| count);
989 let numa_efficiency = if let Some((_, &dominant_count)) = dominant_node {
990 dominant_count as f64 / access_pattern.len() as f64
991 } else {
992 0.0
993 };
994
995 let numa_migrations = numa_accesses.len().saturating_sub(1);
996
997 Ok(OptimizationResult {
998 locality_improvement: numa_efficiency,
999 memory_movements: 0,
1000 numa_migrations,
1001 cache_efficiency_gain: numa_efficiency * 0.6,
1002 })
1003 }
1004
1005 fn optimize_hybrid_locality(
1007 &self,
1008 state_vector: &mut OptimizedStateVector,
1009 access_pattern: &[usize],
1010 ) -> Result<OptimizationResult> {
1011 let temporal = self.optimize_temporal_locality(state_vector, access_pattern)?;
1013 let spatial = self.optimize_spatial_locality(state_vector, access_pattern)?;
1014 let numa = self.optimize_numa_topology(state_vector, access_pattern)?;
1015
1016 let locality_improvement = numa.locality_improvement.mul_add(
1018 0.2,
1019 temporal
1020 .locality_improvement
1021 .mul_add(0.4, spatial.locality_improvement * 0.4),
1022 );
1023
1024 Ok(OptimizationResult {
1025 locality_improvement,
1026 memory_movements: temporal.memory_movements + spatial.memory_movements,
1027 numa_migrations: numa.numa_migrations,
1028 cache_efficiency_gain: temporal
1029 .cache_efficiency_gain
1030 .max(spatial.cache_efficiency_gain),
1031 })
1032 }
1033
1034 pub fn get_detected_loops(&self) -> &[LoopPattern] {
1036 &self.access_analyzer.loop_detection.detected_loops
1037 }
1038}
1039
1040#[derive(Debug, Clone)]
1042pub struct OptimizationResult {
1043 pub locality_improvement: f64,
1045 pub memory_movements: usize,
1047 pub numa_migrations: usize,
1049 pub cache_efficiency_gain: f64,
1051}
1052
1053#[derive(Debug, Clone)]
1055pub struct LocalityOptimizationResult {
1056 pub optimization_time: Duration,
1058 pub locality_improvement: f64,
1060 pub memory_movements: usize,
1062 pub numa_migrations: usize,
1064 pub cache_efficiency_gain: f64,
1066 pub strategy_used: LocalityStrategy,
1068}
1069
1070#[cfg(test)]
1071mod tests {
1072 use super::*;
1073 use crate::memory_bandwidth_optimization::{MemoryOptimizationConfig, OptimizedStateVector};
1074
1075 #[test]
1076 fn test_access_pattern_predictor() {
1077 let mut predictor = AccessPatternPredictor::default();
1078
1079 for i in 0..10 {
1081 predictor.record_access(i * 64);
1082 }
1083
1084 let predictions = predictor.predict_next_accesses(5);
1085 assert_eq!(predictions.len(), 5);
1086
1087 for (i, &pred) in predictions.iter().enumerate() {
1089 assert_eq!(pred, (10 + i) * 64);
1090 }
1091 }
1092
1093 #[test]
1094 fn test_memory_prefetcher_creation() {
1095 let config = PrefetchConfig::default();
1096 let numa = NUMATopology::default();
1097
1098 let prefetcher = MemoryPrefetcher::new(config, numa).unwrap();
1099 assert_eq!(prefetcher.config.strategy, PrefetchStrategy::Adaptive);
1100 }
1101
1102 #[test]
1103 fn test_prefetch_request() {
1104 let request = PrefetchRequest {
1105 address: 0x1000,
1106 priority: 0.8,
1107 hint_type: PrefetchHint::L1,
1108 timestamp: Instant::now(),
1109 };
1110
1111 assert_eq!(request.address, 0x1000);
1112 assert_eq!(request.priority, 0.8);
1113 assert_eq!(request.hint_type, PrefetchHint::L1);
1114 }
1115
1116 #[test]
1117 fn test_data_locality_optimizer() {
1118 let numa = NUMATopology::default();
1119 let optimizer = DataLocalityOptimizer::new(LocalityStrategy::Spatial, numa);
1120
1121 assert!(matches!(optimizer.strategy, LocalityStrategy::Spatial));
1122 }
1123
1124 #[test]
1125 fn test_loop_pattern_detection() {
1126 let mut optimizer =
1127 DataLocalityOptimizer::new(LocalityStrategy::Loop, NUMATopology::default());
1128
1129 let access_pattern = vec![100, 200, 300, 400, 500, 600]; optimizer.detect_loop_patterns(&access_pattern).unwrap();
1133
1134 assert!(!optimizer
1136 .access_analyzer
1137 .loop_detection
1138 .loop_starts
1139 .is_empty());
1140 }
1141
1142 #[test]
1143 fn test_spatial_locality_optimization() {
1144 let numa = NUMATopology::default();
1145 let optimizer = DataLocalityOptimizer::new(LocalityStrategy::Spatial, numa);
1146
1147 let access_pattern = vec![0, 8, 16, 24, 32, 40]; let config = MemoryOptimizationConfig::default();
1151 let mut state_vector = OptimizedStateVector::new(3, config).unwrap();
1152
1153 let result = optimizer
1154 .optimize_spatial_locality(&mut state_vector, &access_pattern)
1155 .unwrap();
1156
1157 assert!(result.locality_improvement > 0.0);
1158 assert!(result.cache_efficiency_gain >= 0.0);
1159 }
1160
1161 #[test]
1162 fn test_numa_topology_default() {
1163 let numa = NUMATopology::default();
1164
1165 assert_eq!(numa.num_nodes, 1);
1166 assert_eq!(numa.cores_per_node.len(), 1);
1167 assert_eq!(numa.memory_per_node.len(), 1);
1168 }
1169
1170 #[test]
1171 fn test_prefetch_hint_determination() {
1172 let config = PrefetchConfig::default();
1173 let numa = NUMATopology::default();
1174 let prefetcher = MemoryPrefetcher::new(config, numa).unwrap();
1175
1176 assert_eq!(
1177 prefetcher.determine_prefetch_hint(0x1000, 0),
1178 PrefetchHint::L1
1179 );
1180 assert_eq!(
1181 prefetcher.determine_prefetch_hint(0x1000, 5),
1182 PrefetchHint::L2
1183 );
1184 assert_eq!(
1185 prefetcher.determine_prefetch_hint(0x1000, 10),
1186 PrefetchHint::L3
1187 );
1188 assert_eq!(
1189 prefetcher.determine_prefetch_hint(0x1000, 15),
1190 PrefetchHint::NonTemporal
1191 );
1192 }
1193
1194 #[test]
1195 fn test_ml_prediction() {
1196 let mut predictor = AccessPatternPredictor::default();
1197
1198 for i in 0..20 {
1200 predictor.record_access(i * 8);
1201 }
1202
1203 let features = predictor.extract_features();
1204 assert_eq!(features.len(), 16);
1205
1206 let prediction = predictor.ml_predict(&features, 0);
1207 assert!(prediction.is_finite());
1208 }
1209
1210 #[test]
1211 fn test_performance_feedback() {
1212 let feedback = PerformanceFeedback {
1213 cache_hit_rate: 0.85,
1214 bandwidth_utilization: 0.7,
1215 memory_latency: Duration::from_nanos(100),
1216 cpu_utilization: 0.6,
1217 };
1218
1219 assert_eq!(feedback.cache_hit_rate, 0.85);
1220 assert_eq!(feedback.bandwidth_utilization, 0.7);
1221 }
1222}