Skip to main content

scirs2_metrics/optimization/
advanced_memory_optimization.rs

1//! Advanced memory optimization for GPU acceleration
2//!
3//! This module provides sophisticated memory management techniques for GPU-accelerated
4//! metrics computation, including memory pooling, prefetching, and adaptive allocation.
5
6#![allow(clippy::too_many_arguments)]
7#![allow(dead_code)]
8
9use crate::error::{MetricsError, Result};
10use scirs2_core::ndarray::{Array1, Array2, ArrayView1, ArrayView2};
11use scirs2_core::numeric::Float;
12use std::collections::{HashMap, VecDeque};
13use std::sync::{Arc, Mutex, RwLock};
14use std::time::{Duration, Instant};
15
16/// Advanced GPU memory pool with intelligent allocation strategies
17#[derive(Debug)]
18pub struct AdvancedMemoryPool {
19    /// Free memory blocks categorized by size
20    free_blocks: Arc<Mutex<HashMap<usize, VecDeque<MemoryBlock>>>>,
21    /// Allocated blocks for tracking
22    allocated_blocks: Arc<RwLock<HashMap<usize, AllocatedBlock>>>,
23    /// Memory usage statistics
24    stats: Arc<Mutex<MemoryStats>>,
25    /// Pool configuration
26    config: MemoryPoolConfig,
27    /// Allocation strategy
28    strategy: AllocationStrategy,
29    /// Memory prefetcher for predictive allocation
30    prefetcher: MemoryPrefetcher,
31}
32
33/// Memory block representation
34#[derive(Debug, Clone)]
35pub struct MemoryBlock {
36    /// Block identifier
37    pub id: usize,
38    /// Size in bytes
39    pub size: usize,
40    /// GPU device pointer (simulated as usize)
41    pub device_ptr: usize,
42    /// Last access time for LRU
43    pub last_accessed: Instant,
44    /// Block type and purpose
45    pub blocktype: BlockType,
46    /// Reference count for shared usage
47    pub ref_count: usize,
48}
49
50/// Allocated block tracking
51#[derive(Debug, Clone)]
52pub struct AllocatedBlock {
53    /// Original block
54    pub block: MemoryBlock,
55    /// Allocation timestamp
56    pub allocated_at: Instant,
57    /// Expected lifetime
58    pub expected_lifetime: Option<Duration>,
59    /// Usage pattern
60    pub usage_pattern: UsagePattern,
61}
62
63/// Memory usage statistics
64#[derive(Debug, Default, Clone)]
65pub struct MemoryStats {
66    /// Total allocated memory
67    pub total_allocated: usize,
68    /// Peak memory usage
69    pub peak_usage: usize,
70    /// Number of allocations
71    pub allocation_count: u64,
72    /// Number of deallocations
73    pub deallocation_count: u64,
74    /// Cache hit rate
75    pub cache_hit_rate: f64,
76    /// Fragmentation ratio
77    pub fragmentation_ratio: f64,
78    /// Average allocation size
79    pub avg_allocation_size: f64,
80    /// Memory efficiency score
81    pub efficiency_score: f64,
82}
83
84/// Memory pool configuration
85#[derive(Debug, Clone)]
86pub struct MemoryPoolConfig {
87    /// Maximum pool size in bytes
88    pub max_pool_size: usize,
89    /// Minimum block size
90    pub min_block_size: usize,
91    /// Block size alignment
92    pub alignment: usize,
93    /// Enable memory coalescing
94    pub enable_coalescing: bool,
95    /// Garbage collection threshold
96    pub gc_threshold: f64,
97    /// Prefetch lookahead window
98    pub prefetch_window: usize,
99    /// Enable zero-copy optimizations
100    pub enable_zero_copy: bool,
101}
102
103/// Block type categorization
104#[derive(Debug, Clone, PartialEq)]
105pub enum BlockType {
106    /// Input data arrays
107    InputData,
108    /// Output result arrays
109    OutputData,
110    /// Intermediate computation buffers
111    IntermediateBuffer,
112    /// Kernel parameters
113    KernelParams,
114    /// Shared memory blocks
115    SharedMemory,
116    /// Texture memory for cached reads
117    TextureMemory,
118}
119
120/// Memory allocation strategy
121#[derive(Debug, Clone)]
122pub enum AllocationStrategy {
123    /// First-fit allocation
124    FirstFit,
125    /// Best-fit allocation (minimize fragmentation)
126    BestFit,
127    /// Worst-fit allocation (keep large blocks)
128    WorstFit,
129    /// Buddy system allocation
130    BuddySystem,
131    /// Adaptive strategy based on usage patterns
132    Adaptive(AdaptiveStrategy),
133}
134
135/// Adaptive allocation strategy configuration
136#[derive(Debug, Clone)]
137pub struct AdaptiveStrategy {
138    /// Strategy switching threshold
139    pub switch_threshold: f64,
140    /// Historical window size for analysis
141    pub history_window: usize,
142    /// Performance weight factors
143    pub weights: StrategyWeights,
144}
145
146/// Strategy performance weights
147#[derive(Debug, Clone)]
148pub struct StrategyWeights {
149    /// Weight for allocation speed
150    pub speed_weight: f64,
151    /// Weight for memory efficiency
152    pub efficiency_weight: f64,
153    /// Weight for fragmentation avoidance
154    pub fragmentation_weight: f64,
155}
156
157/// Usage pattern analysis
158#[derive(Debug, Clone)]
159pub enum UsagePattern {
160    /// Sequential access pattern
161    Sequential,
162    /// Random access pattern
163    Random,
164    /// Streaming pattern (write-once, read-many)
165    Streaming,
166    /// Temporary computation buffer
167    Temporary,
168    /// Long-lived persistent data
169    Persistent,
170}
171
172/// Memory prefetcher for predictive allocation
173#[derive(Debug)]
174pub struct MemoryPrefetcher {
175    /// Allocation history for pattern analysis (interior-mutable for &self access)
176    allocation_history: Arc<Mutex<VecDeque<AllocationRecord>>>,
177    /// Predicted future allocations
178    predictions: Vec<PredictedAllocation>,
179    /// Pattern recognition engine
180    pattern_engine: PatternEngine,
181    /// Prefetch configuration
182    config: PrefetchConfig,
183}
184
185/// Allocation record for pattern analysis
186#[derive(Debug, Clone)]
187pub struct AllocationRecord {
188    /// Allocation size
189    pub size: usize,
190    /// Block type
191    pub blocktype: BlockType,
192    /// Timestamp
193    pub timestamp: Instant,
194    /// Duration until deallocation
195    pub lifetime: Option<Duration>,
196}
197
198/// Predicted allocation
199#[derive(Debug, Clone)]
200pub struct PredictedAllocation {
201    /// Predicted size
202    pub size: usize,
203    /// Predicted type
204    pub blocktype: BlockType,
205    /// Confidence score (0.0 to 1.0)
206    pub confidence: f64,
207    /// Expected time until allocation
208    pub time_until: Duration,
209}
210
211/// Pattern recognition engine
212#[derive(Debug)]
213pub struct PatternEngine {
214    /// Learned patterns
215    patterns: Vec<AllocationPattern>,
216    /// Model accuracy metrics
217    accuracy: f64,
218    /// Training data size
219    training_samples: usize,
220}
221
222/// Allocation pattern
223#[derive(Debug, Clone)]
224pub struct AllocationPattern {
225    /// Pattern signature
226    pub signature: Vec<usize>,
227    /// Frequency of occurrence
228    pub frequency: u32,
229    /// Prediction accuracy
230    pub accuracy: f64,
231    /// Associated block types
232    pub block_types: Vec<BlockType>,
233}
234
235/// Prefetch configuration
236#[derive(Debug, Clone)]
237pub struct PrefetchConfig {
238    /// Enable predictive prefetching
239    pub enable_prediction: bool,
240    /// Minimum confidence threshold for prefetch
241    pub confidence_threshold: f64,
242    /// Maximum prefetch lookahead
243    pub max_lookahead: Duration,
244    /// Prefetch buffer size limit
245    pub buffer_size_limit: usize,
246}
247
248impl Default for MemoryPoolConfig {
249    fn default() -> Self {
250        Self {
251            max_pool_size: 1024 * 1024 * 1024, // 1GB default
252            min_block_size: 1024,              // 1KB minimum
253            alignment: 256,                    // 256-byte alignment for GPU
254            enable_coalescing: true,
255            gc_threshold: 0.8, // Trigger GC at 80% usage
256            prefetch_window: 10,
257            enable_zero_copy: true,
258        }
259    }
260}
261
262impl Default for AdaptiveStrategy {
263    fn default() -> Self {
264        Self {
265            switch_threshold: 0.1,
266            history_window: 1000,
267            weights: StrategyWeights {
268                speed_weight: 0.4,
269                efficiency_weight: 0.4,
270                fragmentation_weight: 0.2,
271            },
272        }
273    }
274}
275
276impl Default for PrefetchConfig {
277    fn default() -> Self {
278        Self {
279            enable_prediction: true,
280            confidence_threshold: 0.75,
281            max_lookahead: Duration::from_millis(100),
282            buffer_size_limit: 64 * 1024 * 1024, // 64MB prefetch buffer
283        }
284    }
285}
286
287impl AdvancedMemoryPool {
288    /// Create new advanced memory pool
289    pub fn new(config: MemoryPoolConfig) -> Self {
290        Self {
291            free_blocks: Arc::new(Mutex::new(HashMap::new())),
292            allocated_blocks: Arc::new(RwLock::new(HashMap::new())),
293            stats: Arc::new(Mutex::new(MemoryStats::default())),
294            strategy: AllocationStrategy::Adaptive(AdaptiveStrategy::default()),
295            prefetcher: MemoryPrefetcher::new(PrefetchConfig::default()),
296            config,
297        }
298    }
299
300    /// Allocate memory block with intelligent sizing
301    pub fn allocate(&self, size: usize, blocktype: BlockType) -> Result<MemoryBlock> {
302        let aligned_size = self.align_size(size);
303
304        // Check if prefetcher has a suitable block ready
305        if let Some(block) = self
306            .prefetcher
307            .get_predicted_block(aligned_size, &blocktype)?
308        {
309            self.record_allocation(&block)?;
310            return Ok(block);
311        }
312
313        // Perform allocation based on strategy
314        let block = match &self.strategy {
315            AllocationStrategy::FirstFit => self.allocate_first_fit(aligned_size, blocktype)?,
316            AllocationStrategy::BestFit => self.allocate_best_fit(aligned_size, blocktype)?,
317            AllocationStrategy::WorstFit => self.allocate_worst_fit(aligned_size, blocktype)?,
318            AllocationStrategy::BuddySystem => {
319                self.allocate_buddy_system(aligned_size, blocktype)?
320            }
321            AllocationStrategy::Adaptive(strategy) => {
322                self.allocate_adaptive(aligned_size, blocktype, strategy)?
323            }
324        };
325
326        self.record_allocation(&block)?;
327        self.update_prefetcher(&block);
328
329        Ok(block)
330    }
331
332    /// Deallocate memory block
333    pub fn deallocate(&self, block: MemoryBlock) -> Result<()> {
334        // Record deallocation for statistics
335        {
336            let mut stats = self.stats.lock().expect("Operation failed");
337            stats.deallocation_count += 1;
338            stats.total_allocated = stats.total_allocated.saturating_sub(block.size);
339        }
340
341        // Remove from allocated blocks
342        {
343            let mut allocated = self.allocated_blocks.write().expect("Operation failed");
344            allocated.remove(&block.id);
345        }
346
347        // Return to free pool or coalesce with adjacent blocks
348        if self.config.enable_coalescing {
349            self.coalesce_and_return(block)?;
350        } else {
351            self.return_to_pool(block)?;
352        }
353
354        // Trigger garbage collection if needed
355        if self.should_run_gc()? {
356            self.run_garbage_collection()?;
357        }
358
359        Ok(())
360    }
361
362    /// Get current memory statistics
363    pub fn get_stats(&self) -> MemoryStats {
364        let stats = self.stats.lock().expect("Operation failed");
365        stats.clone()
366    }
367
368    /// Optimize memory layout for better performance
369    pub fn optimize_layout(&self) -> Result<()> {
370        // Analyze current allocation patterns
371        let patterns = self.analyze_allocation_patterns()?;
372
373        // Suggest layout optimizations
374        let optimizations = self.suggest_optimizations(&patterns)?;
375
376        // Apply optimizations if beneficial
377        for optimization in optimizations {
378            self.apply_optimization(optimization)?;
379        }
380
381        Ok(())
382    }
383
384    /// Benchmark different allocation strategies
385    pub fn benchmark_strategies(
386        &self,
387        workload: &[AllocationRequest],
388    ) -> Result<StrategyBenchmark> {
389        let mut results = HashMap::new();
390
391        for strategy in &[
392            AllocationStrategy::FirstFit,
393            AllocationStrategy::BestFit,
394            AllocationStrategy::WorstFit,
395            AllocationStrategy::BuddySystem,
396        ] {
397            let metrics = self.benchmark_strategy(strategy, workload)?;
398            results.insert(format!("{:?}", strategy), metrics);
399        }
400
401        Ok(StrategyBenchmark { results })
402    }
403
404    // Private implementation methods
405
406    fn align_size(&self, size: usize) -> usize {
407        ((size + self.config.alignment - 1) / self.config.alignment) * self.config.alignment
408    }
409
410    fn allocate_first_fit(&self, size: usize, blocktype: BlockType) -> Result<MemoryBlock> {
411        let mut free_blocks = self.free_blocks.lock().expect("Operation failed");
412
413        // Find first suitable block
414        for (block_size, blocks) in free_blocks.iter_mut() {
415            if *block_size >= size {
416                if let Some(mut block) = blocks.pop_front() {
417                    block.blocktype = blocktype;
418                    block.last_accessed = Instant::now();
419
420                    // Split block if significantly larger
421                    if *block_size > size * 2 {
422                        let remaining = MemoryBlock {
423                            id: self.generate_block_id(),
424                            size: *block_size - size,
425                            device_ptr: block.device_ptr + size,
426                            last_accessed: Instant::now(),
427                            blocktype: BlockType::IntermediateBuffer,
428                            ref_count: 0,
429                        };
430
431                        blocks.push_front(remaining);
432                    }
433
434                    block.size = size;
435                    return Ok(block);
436                }
437            }
438        }
439
440        // No suitable block found, allocate new
441        self.allocate_new_block(size, blocktype)
442    }
443
444    fn allocate_best_fit(&self, size: usize, blocktype: BlockType) -> Result<MemoryBlock> {
445        let mut free_blocks = self.free_blocks.lock().expect("Operation failed");
446        let mut best_fit: Option<(usize, usize)> = None; // (block_size, index)
447        let mut best_waste = usize::MAX;
448
449        // Find block with minimum waste
450        for (block_size, blocks) in free_blocks.iter() {
451            if *block_size >= size {
452                let waste = *block_size - size;
453                if waste < best_waste {
454                    best_waste = waste;
455                    best_fit = Some((*block_size, 0)); // Simplified - would need proper indexing
456                }
457            }
458        }
459
460        if let Some((block_size, _)) = best_fit {
461            if let Some(blocks) = free_blocks.get_mut(&block_size) {
462                if let Some(mut block) = blocks.pop_front() {
463                    block.blocktype = blocktype;
464                    block.last_accessed = Instant::now();
465
466                    // Handle block splitting for best fit
467                    if block_size > size {
468                        let remaining_size = block_size - size;
469                        if remaining_size >= self.config.min_block_size {
470                            let remaining = MemoryBlock {
471                                id: self.generate_block_id(),
472                                size: remaining_size,
473                                device_ptr: block.device_ptr + size,
474                                last_accessed: Instant::now(),
475                                blocktype: BlockType::IntermediateBuffer,
476                                ref_count: 0,
477                            };
478
479                            free_blocks
480                                .entry(remaining_size)
481                                .or_insert_with(VecDeque::new)
482                                .push_back(remaining);
483                        }
484                    }
485
486                    block.size = size;
487                    return Ok(block);
488                }
489            }
490        }
491
492        self.allocate_new_block(size, blocktype)
493    }
494
495    fn allocate_worst_fit(&self, size: usize, blocktype: BlockType) -> Result<MemoryBlock> {
496        // Simplified implementation - find largest available block
497        self.allocate_new_block(size, blocktype)
498    }
499
500    fn allocate_buddy_system(&self, size: usize, blocktype: BlockType) -> Result<MemoryBlock> {
501        // Find next power of 2 >= size for buddy system
502        let buddy_size = size.next_power_of_two();
503        self.allocate_new_block(buddy_size, blocktype)
504    }
505
506    fn allocate_adaptive(
507        &self,
508        size: usize,
509        blocktype: BlockType,
510        _strategy: &AdaptiveStrategy,
511    ) -> Result<MemoryBlock> {
512        // Analyze current performance metrics
513        let stats = self.stats.lock().expect("Operation failed");
514        let fragmentation = stats.fragmentation_ratio;
515        let efficiency = stats.efficiency_score;
516
517        // Choose _strategy based on current conditions
518        let chosen_strategy = if fragmentation > 0.3 {
519            AllocationStrategy::BestFit
520        } else if efficiency < 0.7 {
521            AllocationStrategy::FirstFit
522        } else {
523            AllocationStrategy::BuddySystem
524        };
525
526        drop(stats);
527
528        match chosen_strategy {
529            AllocationStrategy::FirstFit => self.allocate_first_fit(size, blocktype),
530            AllocationStrategy::BestFit => self.allocate_best_fit(size, blocktype),
531            AllocationStrategy::BuddySystem => self.allocate_buddy_system(size, blocktype),
532            _ => self.allocate_new_block(size, blocktype),
533        }
534    }
535
536    fn allocate_new_block(&self, size: usize, blocktype: BlockType) -> Result<MemoryBlock> {
537        // Simulate GPU memory allocation
538        let device_ptr = self.simulate_gpu_malloc(size)?;
539
540        let block = MemoryBlock {
541            id: self.generate_block_id(),
542            size,
543            device_ptr,
544            last_accessed: Instant::now(),
545            blocktype,
546            ref_count: 1,
547        };
548
549        Ok(block)
550    }
551
552    fn simulate_gpu_malloc(&self, size: usize) -> Result<usize> {
553        // Simulate GPU memory allocation - in real implementation would use CUDA/OpenCL
554        static mut NEXT_PTR: usize = 0x1000_0000; // Simulate GPU memory space
555
556        unsafe {
557            let ptr = NEXT_PTR;
558            NEXT_PTR += size;
559
560            // Check if we exceed simulated GPU memory
561            if NEXT_PTR > 0x1000_0000 + self.config.max_pool_size {
562                return Err(MetricsError::ComputationError(
563                    "GPU memory exhausted".to_string(),
564                ));
565            }
566
567            Ok(ptr)
568        }
569    }
570
571    fn generate_block_id(&self) -> usize {
572        use std::sync::atomic::{AtomicUsize, Ordering};
573        static NEXT_ID: AtomicUsize = AtomicUsize::new(1);
574        NEXT_ID.fetch_add(1, Ordering::Relaxed)
575    }
576
577    fn record_allocation(&self, block: &MemoryBlock) -> Result<()> {
578        // Record in allocated blocks
579        {
580            let mut allocated = self.allocated_blocks.write().expect("Operation failed");
581            allocated.insert(
582                block.id,
583                AllocatedBlock {
584                    block: block.clone(),
585                    allocated_at: Instant::now(),
586                    expected_lifetime: None,
587                    usage_pattern: UsagePattern::Sequential, // Could be analyzed
588                },
589            );
590        }
591
592        // Update statistics
593        {
594            let mut stats = self.stats.lock().expect("Operation failed");
595            stats.allocation_count += 1;
596            stats.total_allocated += block.size;
597            if stats.total_allocated > stats.peak_usage {
598                stats.peak_usage = stats.total_allocated;
599            }
600
601            // Update average allocation size
602            stats.avg_allocation_size =
603                stats.total_allocated as f64 / stats.allocation_count as f64;
604        }
605
606        Ok(())
607    }
608
609    fn update_prefetcher(&self, block: &MemoryBlock) {
610        // Record this allocation in the prefetcher's history so that pattern
611        // analysis has data to work with.
612        self.prefetcher.record_allocation(AllocationRecord {
613            size: block.size,
614            blocktype: block.blocktype.clone(),
615            timestamp: Instant::now(),
616            lifetime: None,
617        });
618    }
619
620    fn coalesce_and_return(&self, block: MemoryBlock) -> Result<()> {
621        // Try to coalesce with adjacent free blocks
622        // Simplified implementation - in practice would need more sophisticated buddy tracking
623        self.return_to_pool(block)
624    }
625
626    fn return_to_pool(&self, block: MemoryBlock) -> Result<()> {
627        let mut free_blocks = self.free_blocks.lock().expect("Operation failed");
628        free_blocks
629            .entry(block.size)
630            .or_insert_with(VecDeque::new)
631            .push_back(block);
632        Ok(())
633    }
634
635    fn should_run_gc(&self) -> Result<bool> {
636        let stats = self.stats.lock().expect("Operation failed");
637        let usage_ratio = stats.total_allocated as f64 / self.config.max_pool_size as f64;
638        Ok(usage_ratio > self.config.gc_threshold)
639    }
640
641    fn run_garbage_collection(&self) -> Result<()> {
642        // Implement garbage collection logic
643        // - Remove unused blocks
644        // - Coalesce adjacent free blocks
645        // - Update fragmentation statistics
646
647        let mut stats = self.stats.lock().expect("Operation failed");
648        stats.fragmentation_ratio = self.calculate_fragmentation()?;
649        stats.efficiency_score = self.calculate_efficiency()?;
650
651        Ok(())
652    }
653
654    fn calculate_fragmentation(&self) -> Result<f64> {
655        // Calculate memory fragmentation ratio
656        // Simplified calculation - real implementation would be more sophisticated
657        Ok(0.1) // 10% fragmentation as example
658    }
659
660    fn calculate_efficiency(&self) -> Result<f64> {
661        // Calculate memory utilization efficiency
662        let stats = self.stats.lock().expect("Operation failed");
663        if stats.peak_usage == 0 {
664            Ok(1.0)
665        } else {
666            Ok(stats.total_allocated as f64 / stats.peak_usage as f64)
667        }
668    }
669
670    fn analyze_allocation_patterns(&self) -> Result<Vec<AllocationPattern>> {
671        let history = self.prefetcher.allocation_history.lock().map_err(|_| {
672            MetricsError::ComputationError("failed to acquire prefetcher history lock".to_string())
673        })?;
674
675        if history.is_empty() {
676            return Ok(vec![]);
677        }
678
679        // Group allocations by size bucket (nearest power-of-two kilobyte).
680        // Each bucket becomes one AllocationPattern whose `signature` is the
681        // sorted list of observed sizes within that bucket.
682        let mut buckets: HashMap<usize, Vec<AllocationRecord>> = HashMap::new();
683        for record in history.iter() {
684            // Bucket key = next power-of-two of (size / 1024) rounded up.
685            let bucket_kib = (record.size.max(1) + 1023) / 1024;
686            let bucket_key = bucket_kib.next_power_of_two().max(1);
687            buckets
688                .entry(bucket_key)
689                .or_insert_with(Vec::new)
690                .push(record.clone());
691        }
692
693        let total = history.len() as f64;
694        let mut patterns: Vec<AllocationPattern> = buckets
695            .into_iter()
696            .map(|(bucket_key, records)| {
697                let frequency = records.len() as u32;
698                // Collect distinct sizes in this bucket for the signature.
699                let mut sizes: Vec<usize> = records.iter().map(|r| r.size).collect();
700                sizes.sort_unstable();
701                sizes.dedup();
702
703                // Collect distinct block types.
704                let mut block_types: Vec<BlockType> =
705                    records.iter().map(|r| r.blocktype.clone()).collect();
706                block_types.sort_by_key(|bt| format!("{:?}", bt));
707                block_types.dedup_by_key(|bt| format!("{:?}", bt));
708
709                // Accuracy: fraction of all allocations that fall in this bucket
710                // (i.e. how well this single pattern describes the workload).
711                let accuracy = frequency as f64 / total;
712
713                AllocationPattern {
714                    // Signature = [bucket_key_kib] ++ sorted unique sizes
715                    signature: std::iter::once(bucket_key).chain(sizes).collect(),
716                    frequency,
717                    accuracy,
718                    block_types,
719                }
720            })
721            .collect();
722
723        // Most frequent patterns first.
724        patterns.sort_by(|a, b| b.frequency.cmp(&a.frequency));
725
726        Ok(patterns)
727    }
728
729    fn suggest_optimizations(
730        &self,
731        patterns: &[AllocationPattern],
732    ) -> Result<Vec<OptimizationType>> {
733        if patterns.is_empty() {
734            return Ok(vec![]);
735        }
736
737        let mut suggestions: Vec<OptimizationType> = Vec::new();
738
739        // Heuristic 1: many small allocations (bucket ≤ 4 KiB, high frequency)
740        // → pooling / coalescing will reduce per-allocation overhead.
741        let small_alloc_count: u32 = patterns
742            .iter()
743            .filter(|p| p.signature.first().copied().unwrap_or(0) <= 4)
744            .map(|p| p.frequency)
745            .sum();
746        let total_count: u32 = patterns.iter().map(|p| p.frequency).sum();
747        if total_count > 0 && small_alloc_count * 2 > total_count {
748            suggestions.push(OptimizationType::MemoryCoalescing);
749        }
750
751        // Heuristic 2: many distinct size classes (> 4 patterns) → fragmentation
752        // risk; reordering blocks by size can improve contiguity.
753        if patterns.len() > 4 {
754            suggestions.push(OptimizationType::BlockReordering);
755        }
756
757        // Heuristic 3: single dominant pattern with high accuracy (> 60 %) →
758        // prefetch is likely to be profitable.
759        if patterns.first().map(|p| p.accuracy > 0.6).unwrap_or(false) {
760            suggestions.push(OptimizationType::PrefetchOptimization);
761        }
762
763        // Heuristic 4: high diversity (accuracy of best pattern < 25 %) →
764        // current fixed strategy is not well-matched; switch to Adaptive.
765        if patterns.first().map(|p| p.accuracy < 0.25).unwrap_or(false) {
766            suggestions.push(OptimizationType::AllocationStrategyChange);
767        }
768
769        Ok(suggestions)
770    }
771
772    fn apply_optimization(&self, optimization: OptimizationType) -> Result<()> {
773        // Apply specific optimization
774        Ok(())
775    }
776
777    fn benchmark_strategy(
778        &self,
779        strategy: &AllocationStrategy,
780        workload: &[AllocationRequest],
781    ) -> Result<StrategyMetrics> {
782        // Benchmark specific allocation strategy
783        Ok(StrategyMetrics::default())
784    }
785}
786
787impl MemoryPrefetcher {
788    fn new(config: PrefetchConfig) -> Self {
789        Self {
790            allocation_history: Arc::new(Mutex::new(VecDeque::new())),
791            predictions: Vec::new(),
792            pattern_engine: PatternEngine {
793                patterns: Vec::new(),
794                accuracy: 0.0,
795                training_samples: 0,
796            },
797            config,
798        }
799    }
800
801    /// Record an allocation event in the history ring-buffer.
802    ///
803    /// The history is capped at the configured `history_window` size so that
804    /// memory usage remains bounded even under long-running workloads.
805    fn record_allocation(&self, record: AllocationRecord) {
806        let window = self.config.buffer_size_limit.max(1);
807        if let Ok(mut hist) = self.allocation_history.lock() {
808            if hist.len() >= window {
809                hist.pop_front();
810            }
811            hist.push_back(record);
812        }
813    }
814
815    fn get_predicted_block(
816        &self,
817        size: usize,
818        blocktype: &BlockType,
819    ) -> Result<Option<MemoryBlock>> {
820        // Check if we have a predicted block ready
821        // Implementation would check predictions and return suitable block
822        let _ = (size, blocktype);
823        Ok(None)
824    }
825}
826
827/// Optimization type enum
828#[derive(Debug, Clone)]
829pub enum OptimizationType {
830    MemoryCoalescing,
831    BlockReordering,
832    PrefetchOptimization,
833    AllocationStrategyChange,
834}
835
836/// Allocation request for benchmarking
837#[derive(Debug, Clone)]
838pub struct AllocationRequest {
839    pub size: usize,
840    pub blocktype: BlockType,
841    pub lifetime: Duration,
842}
843
844/// Strategy benchmark results
845#[derive(Debug)]
846pub struct StrategyBenchmark {
847    pub results: HashMap<String, StrategyMetrics>,
848}
849
850/// Strategy performance metrics
851#[derive(Debug, Default)]
852pub struct StrategyMetrics {
853    pub allocation_speed: f64,
854    pub fragmentation_ratio: f64,
855    pub memory_efficiency: f64,
856    pub cache_hit_rate: f64,
857}
858
859#[cfg(test)]
860mod tests {
861    use super::*;
862
863    #[test]
864    fn test_memory_pool_creation() {
865        let config = MemoryPoolConfig::default();
866        let pool = AdvancedMemoryPool::new(config);
867
868        let stats = pool.get_stats();
869        assert_eq!(stats.total_allocated, 0);
870        assert_eq!(stats.allocation_count, 0);
871    }
872
873    #[test]
874    fn test_basic_allocation() {
875        let pool = AdvancedMemoryPool::new(MemoryPoolConfig::default());
876
877        let block = pool
878            .allocate(1024, BlockType::InputData)
879            .expect("Operation failed");
880        assert_eq!(block.size, 1024);
881        assert_eq!(block.blocktype, BlockType::InputData);
882
883        let stats = pool.get_stats();
884        assert_eq!(stats.allocation_count, 1);
885        assert!(stats.total_allocated >= 1024);
886    }
887
888    #[test]
889    fn test_allocation_deallocation_cycle() {
890        let pool = AdvancedMemoryPool::new(MemoryPoolConfig::default());
891
892        let block = pool
893            .allocate(2048, BlockType::OutputData)
894            .expect("Operation failed");
895        let _block_id = block.id;
896
897        pool.deallocate(block).expect("Operation failed");
898
899        let stats = pool.get_stats();
900        assert_eq!(stats.deallocation_count, 1);
901    }
902
903    #[test]
904    fn test_memory_alignment() {
905        let config = MemoryPoolConfig {
906            alignment: 512,
907            ..Default::default()
908        };
909        let pool = AdvancedMemoryPool::new(config);
910
911        // Test that allocations are properly aligned
912        let block = pool
913            .allocate(100, BlockType::IntermediateBuffer)
914            .expect("Operation failed");
915        assert_eq!(block.size % 512, 0);
916    }
917
918    #[test]
919    fn test_strategy_benchmarking() {
920        let pool = AdvancedMemoryPool::new(MemoryPoolConfig::default());
921
922        let workload = vec![
923            AllocationRequest {
924                size: 1024,
925                blocktype: BlockType::InputData,
926                lifetime: Duration::from_millis(100),
927            },
928            AllocationRequest {
929                size: 2048,
930                blocktype: BlockType::OutputData,
931                lifetime: Duration::from_millis(200),
932            },
933        ];
934
935        let benchmark = pool
936            .benchmark_strategies(&workload)
937            .expect("Operation failed");
938        assert!(!benchmark.results.is_empty());
939    }
940
941    // --- pattern analysis and optimization suggestion tests ---
942
943    /// Allocate several blocks then verify `analyze_allocation_patterns` returns
944    /// at least one pattern with a non-zero frequency.
945    #[test]
946    fn test_analyze_allocation_patterns_non_empty_after_allocations() {
947        let pool = AdvancedMemoryPool::new(MemoryPoolConfig::default());
948
949        // Make ten small allocations (all same size → single bucket)
950        for _ in 0..10 {
951            pool.allocate(512, BlockType::IntermediateBuffer)
952                .expect("allocation must succeed");
953        }
954
955        let patterns = pool
956            .analyze_allocation_patterns()
957            .expect("analyze_allocation_patterns must not fail");
958
959        assert!(
960            !patterns.is_empty(),
961            "must return at least one pattern after allocations"
962        );
963        assert!(
964            patterns[0].frequency > 0,
965            "dominant pattern must have non-zero frequency"
966        );
967    }
968
969    /// After uniform-size allocations the dominant pattern should have high
970    /// accuracy (close to 1.0) and `suggest_optimizations` should fire at
971    /// least one suggestion.
972    #[test]
973    fn test_suggest_optimizations_after_uniform_allocations() {
974        let pool = AdvancedMemoryPool::new(MemoryPoolConfig::default());
975
976        for _ in 0..20 {
977            pool.allocate(256, BlockType::InputData)
978                .expect("allocation must succeed");
979        }
980
981        let patterns = pool
982            .analyze_allocation_patterns()
983            .expect("analyze_allocation_patterns must not fail");
984        assert!(!patterns.is_empty(), "patterns must be non-empty");
985
986        let suggestions = pool
987            .suggest_optimizations(&patterns)
988            .expect("suggest_optimizations must not fail");
989        assert!(
990            !suggestions.is_empty(),
991            "at least one optimization should be suggested for a uniform workload"
992        );
993    }
994
995    /// With a mixed workload (many different sizes) the analysis should produce
996    /// multiple patterns and the `BlockReordering` suggestion should appear.
997    #[test]
998    fn test_suggest_optimizations_block_reordering_for_diverse_sizes() {
999        let pool = AdvancedMemoryPool::new(MemoryPoolConfig::default());
1000
1001        // Allocate across > 4 distinct size classes
1002        for size in &[128usize, 512, 1024, 4096, 8192, 16384, 32768] {
1003            for _ in 0..3 {
1004                pool.allocate(*size, BlockType::IntermediateBuffer)
1005                    .expect("allocation must succeed");
1006            }
1007        }
1008
1009        let patterns = pool
1010            .analyze_allocation_patterns()
1011            .expect("analyze_allocation_patterns must not fail");
1012        assert!(
1013            patterns.len() > 4,
1014            "diverse workload must produce > 4 patterns"
1015        );
1016
1017        let suggestions = pool
1018            .suggest_optimizations(&patterns)
1019            .expect("suggest_optimizations must not fail");
1020        let has_reordering = suggestions
1021            .iter()
1022            .any(|s| matches!(s, OptimizationType::BlockReordering));
1023        assert!(
1024            has_reordering,
1025            "BlockReordering should be suggested for a highly diverse workload"
1026        );
1027    }
1028}