oxirs_vec/gpu/
memory_pool.rs

1//! GPU memory pool management for efficient allocation and reuse
2//!
3//! Enhanced with leak detection, metrics tracking, and adaptive sizing
4
5use super::{GpuBuffer, GpuConfig};
6use anyhow::{anyhow, Result};
7use std::collections::{HashMap, VecDeque};
8use std::sync::{Arc, Mutex};
9use std::time::{Duration, Instant};
10
11/// GPU memory pool for efficient buffer management with advanced tracking
12#[derive(Debug)]
13pub struct GpuMemoryPool {
14    device_id: i32,
15    available_buffers: Arc<Mutex<VecDeque<GpuBuffer>>>,
16    allocated_buffers: Arc<Mutex<Vec<GpuBuffer>>>,
17    total_memory: usize,
18    used_memory: usize,
19    buffer_size: usize,
20    max_buffers: usize,
21    /// Allocation tracking for leak detection
22    allocation_times: Arc<Mutex<Vec<(usize, Instant)>>>,
23    /// Performance operation timings
24    operation_timings: Arc<Mutex<HashMap<String, Vec<Duration>>>>,
25    /// Performance metrics
26    allocation_count: usize,
27    deallocation_count: usize,
28    peak_memory_usage: usize,
29}
30
31impl GpuMemoryPool {
32    /// Create a new GPU memory pool with advanced metrics and leak detection
33    pub fn new(config: &GpuConfig, buffer_size: usize) -> Result<Self> {
34        let max_buffers = config.memory_pool_size / (buffer_size * std::mem::size_of::<f32>());
35
36        Ok(Self {
37            device_id: config.device_id,
38            available_buffers: Arc::new(Mutex::new(VecDeque::new())),
39            allocated_buffers: Arc::new(Mutex::new(Vec::new())),
40            total_memory: config.memory_pool_size,
41            used_memory: 0,
42            buffer_size,
43            max_buffers,
44            allocation_times: Arc::new(Mutex::new(Vec::new())),
45            operation_timings: Arc::new(Mutex::new(HashMap::new())),
46            allocation_count: 0,
47            deallocation_count: 0,
48            peak_memory_usage: 0,
49        })
50    }
51
52    /// Get a buffer from the pool or allocate a new one (with performance tracking)
53    pub fn get_buffer(&mut self) -> Result<GpuBuffer> {
54        let start_time = Instant::now();
55
56        // Try to get a buffer from the available pool
57        {
58            let mut available = self
59                .available_buffers
60                .lock()
61                .map_err(|e| anyhow!("Failed to lock available buffers: {}", e))?;
62
63            if let Some(buffer) = available.pop_front() {
64                // Track timing
65                let elapsed = start_time.elapsed();
66                self.record_operation_time("buffer_acquire_reuse", elapsed);
67
68                // Track allocation for leak detection
69                let ptr_value = buffer.ptr() as usize;
70                self.allocation_times
71                    .lock()
72                    .expect("lock poisoned")
73                    .push((ptr_value, Instant::now()));
74
75                return Ok(buffer);
76            }
77        }
78
79        // No available buffers, check if we can allocate a new one
80        if self.allocated_buffers.lock().expect("lock poisoned").len() >= self.max_buffers {
81            let elapsed = start_time.elapsed();
82            self.record_operation_time("buffer_acquire_failed", elapsed);
83            return Err(anyhow!("Memory pool exhausted"));
84        }
85
86        // Allocate a new buffer
87        let alloc_start = Instant::now();
88        let buffer = GpuBuffer::new(self.buffer_size, self.device_id)?;
89        let alloc_elapsed = alloc_start.elapsed();
90        self.record_operation_time("buffer_alloc", alloc_elapsed);
91
92        // Update metrics
93        self.used_memory += self.buffer_size * std::mem::size_of::<f32>();
94        self.allocation_count += 1;
95        if self.used_memory > self.peak_memory_usage {
96            self.peak_memory_usage = self.used_memory;
97        }
98
99        // Track allocation for leak detection
100        let ptr_value = buffer.ptr() as usize;
101        self.allocation_times
102            .lock()
103            .expect("lock poisoned")
104            .push((ptr_value, Instant::now()));
105
106        // Record total acquisition time
107        let total_elapsed = start_time.elapsed();
108        self.record_operation_time("buffer_acquire_new", total_elapsed);
109
110        Ok(buffer)
111    }
112
113    /// Record timing for an operation
114    fn record_operation_time(&self, operation: &str, duration: Duration) {
115        if let Ok(mut timings) = self.operation_timings.lock() {
116            timings
117                .entry(operation.to_string())
118                .or_insert_with(Vec::new)
119                .push(duration);
120        }
121    }
122
123    /// Return a buffer to the pool (with performance tracking)
124    pub fn return_buffer(&mut self, buffer: GpuBuffer) -> Result<()> {
125        let start_time = Instant::now();
126
127        let ptr_value = buffer.ptr() as usize;
128
129        // Remove from allocated buffers
130        {
131            let mut allocated = self
132                .allocated_buffers
133                .lock()
134                .map_err(|e| anyhow!("Failed to lock allocated buffers: {}", e))?;
135
136            // Find and remove the buffer
137            allocated.retain(|b| b.ptr() != buffer.ptr());
138        }
139
140        // Remove from allocation tracking
141        {
142            let mut alloc_times = self.allocation_times.lock().expect("lock poisoned");
143            alloc_times.retain(|(ptr, _)| *ptr != ptr_value);
144        }
145
146        // Update metrics
147        self.deallocation_count += 1;
148
149        // Add to available buffers
150        self.available_buffers
151            .lock()
152            .map_err(|e| anyhow!("Failed to lock available buffers: {}", e))?
153            .push_back(buffer);
154
155        // Record timing
156        let elapsed = start_time.elapsed();
157        self.record_operation_time("buffer_return", elapsed);
158
159        Ok(())
160    }
161
162    /// Get pool statistics
163    pub fn stats(&self) -> MemoryPoolStats {
164        let allocated_count = self.allocated_buffers.lock().expect("lock poisoned").len();
165        let available_count = self.available_buffers.lock().expect("lock poisoned").len();
166
167        MemoryPoolStats {
168            total_buffers: allocated_count + available_count,
169            allocated_buffers: allocated_count,
170            available_buffers: available_count,
171            total_memory: self.total_memory,
172            used_memory: self.used_memory,
173            buffer_size: self.buffer_size,
174            utilization: if self.total_memory > 0 {
175                self.used_memory as f64 / self.total_memory as f64
176            } else {
177                0.0
178            },
179        }
180    }
181
182    /// Preallocate buffers to warm up the pool
183    pub fn preallocate(&mut self, count: usize) -> Result<()> {
184        let effective_count = count.min(self.max_buffers);
185
186        for _ in 0..effective_count {
187            let buffer = GpuBuffer::new(self.buffer_size, self.device_id)?;
188            self.used_memory += self.buffer_size * std::mem::size_of::<f32>();
189
190            self.available_buffers
191                .lock()
192                .map_err(|e| anyhow!("Failed to lock available buffers: {}", e))?
193                .push_back(buffer);
194        }
195
196        Ok(())
197    }
198
199    /// Clear all buffers and reset the pool
200    pub fn clear(&mut self) {
201        // Clear all buffers (Drop will handle GPU memory deallocation)
202        self.available_buffers
203            .lock()
204            .expect("lock poisoned")
205            .clear();
206        self.allocated_buffers
207            .lock()
208            .expect("lock poisoned")
209            .clear();
210        self.used_memory = 0;
211    }
212
213    /// Check if pool has available capacity
214    pub fn has_capacity(&self) -> bool {
215        let total_buffers = self.available_buffers.lock().expect("lock poisoned").len()
216            + self.allocated_buffers.lock().expect("lock poisoned").len();
217        total_buffers < self.max_buffers
218    }
219
220    /// Get current memory usage
221    pub fn memory_usage(&self) -> usize {
222        self.used_memory
223    }
224
225    /// Get memory utilization percentage
226    pub fn utilization(&self) -> f64 {
227        if self.total_memory > 0 {
228            self.used_memory as f64 / self.total_memory as f64
229        } else {
230            0.0
231        }
232    }
233
234    /// Defragment the pool by compacting available buffers
235    pub fn defragment(&mut self) -> Result<()> {
236        let start_time = Instant::now();
237
238        // In a real implementation, this might involve more sophisticated memory management
239        // For now, we'll just ensure all available buffers are contiguous in the queue
240        let mut available = self
241            .available_buffers
242            .lock()
243            .map_err(|e| anyhow!("Failed to lock available buffers: {}", e))?;
244
245        // Sort available buffers by memory address for better locality
246        let mut buffers: Vec<GpuBuffer> = available.drain(..).collect();
247        buffers.sort_by_key(|b| b.ptr() as usize);
248
249        for buffer in buffers {
250            available.push_back(buffer);
251        }
252
253        // Record timing
254        let elapsed = start_time.elapsed();
255        self.record_operation_time("pool_defrag", elapsed);
256
257        Ok(())
258    }
259
260    /// Detect memory leaks (buffers held for too long)
261    pub fn detect_leaks(&self, threshold_secs: u64) -> Vec<MemoryLeak> {
262        let mut leaks = Vec::new();
263        let now = Instant::now();
264        let alloc_times = self.allocation_times.lock().expect("lock poisoned");
265
266        for (ptr, alloc_time) in alloc_times.iter() {
267            let duration = now.duration_since(*alloc_time);
268            if duration.as_secs() > threshold_secs {
269                leaks.push(MemoryLeak {
270                    ptr_address: *ptr,
271                    allocated_for_secs: duration.as_secs(),
272                    buffer_size: self.buffer_size,
273                });
274            }
275        }
276
277        leaks
278    }
279
280    /// Get profiling report for memory operations
281    pub fn profiling_report(&self) -> String {
282        let timings = self.operation_timings.lock().expect("lock poisoned");
283        let mut report = String::from("GPU Memory Pool Performance Report:\n");
284
285        for (operation, durations) in timings.iter() {
286            if !durations.is_empty() {
287                let total: Duration = durations.iter().sum();
288                let avg = total / durations.len() as u32;
289                let min = durations.iter().min().expect("non-empty durations");
290                let max = durations.iter().max().expect("non-empty durations");
291
292                report.push_str(&format!(
293                    "  {}: {} calls, avg={:.2}µs, min={:.2}µs, max={:.2}µs\n",
294                    operation,
295                    durations.len(),
296                    avg.as_micros(),
297                    min.as_micros(),
298                    max.as_micros()
299                ));
300            }
301        }
302
303        report
304    }
305
306    /// Get comprehensive metrics
307    pub fn get_metrics(&self) -> PoolMetrics {
308        PoolMetrics {
309            allocation_count: self.allocation_count,
310            deallocation_count: self.deallocation_count,
311            peak_memory_usage: self.peak_memory_usage,
312            current_memory_usage: self.used_memory,
313            memory_efficiency: if self.allocation_count > 0 {
314                self.deallocation_count as f64 / self.allocation_count as f64
315            } else {
316                0.0
317            },
318            active_allocations: self.allocation_times.lock().expect("lock poisoned").len(),
319        }
320    }
321
322    /// Adaptive buffer sizing based on usage patterns
323    pub fn suggest_optimal_buffer_size(&self) -> usize {
324        let metrics = self.get_metrics();
325
326        // If we're frequently allocating/deallocating, suggest smaller buffers
327        if metrics.memory_efficiency > 0.95 && self.utilization() < 0.5 {
328            self.buffer_size / 2
329        }
330        // If we're holding memory for long periods, suggest larger buffers
331        else if metrics.memory_efficiency < 0.7 && self.utilization() > 0.8 {
332            self.buffer_size * 2
333        } else {
334            self.buffer_size
335        }
336    }
337
338    /// Reset profiling statistics
339    pub fn reset_profiling(&mut self) {
340        if let Ok(mut timings) = self.operation_timings.lock() {
341            timings.clear();
342        }
343    }
344
345    /// Get average operation time for specific operation (in microseconds)
346    pub fn get_avg_operation_time(&self, operation: &str) -> Option<f64> {
347        let timings = self.operation_timings.lock().ok()?;
348        let durations = timings.get(operation)?;
349
350        if durations.is_empty() {
351            return None;
352        }
353
354        let total: Duration = durations.iter().sum();
355        let avg = total / durations.len() as u32;
356        Some(avg.as_micros() as f64)
357    }
358}
359
360/// Memory leak detection result
361#[derive(Debug, Clone)]
362pub struct MemoryLeak {
363    /// Pointer address of the leaked buffer
364    pub ptr_address: usize,
365    /// How long the buffer has been allocated (seconds)
366    pub allocated_for_secs: u64,
367    /// Size of the leaked buffer
368    pub buffer_size: usize,
369}
370
371impl MemoryLeak {
372    /// Get formatted description of the leak
373    pub fn description(&self) -> String {
374        format!(
375            "Memory leak at 0x{:x}: {} bytes held for {} seconds",
376            self.ptr_address, self.buffer_size, self.allocated_for_secs
377        )
378    }
379}
380
381/// Comprehensive pool metrics for performance analysis
382#[derive(Debug, Clone)]
383pub struct PoolMetrics {
384    /// Total number of allocations performed
385    pub allocation_count: usize,
386    /// Total number of deallocations performed
387    pub deallocation_count: usize,
388    /// Peak memory usage reached
389    pub peak_memory_usage: usize,
390    /// Current memory usage
391    pub current_memory_usage: usize,
392    /// Memory efficiency (deallocations / allocations)
393    pub memory_efficiency: f64,
394    /// Number of currently active allocations
395    pub active_allocations: usize,
396}
397
398impl PoolMetrics {
399    /// Check if there might be a memory leak
400    pub fn has_potential_leak(&self) -> bool {
401        self.memory_efficiency < 0.5 && self.active_allocations > 100
402    }
403
404    /// Get formatted metrics report
405    pub fn report(&self) -> String {
406        format!(
407            "Pool Metrics:\n\
408             - Allocations: {}\n\
409             - Deallocations: {}\n\
410             - Active: {}\n\
411             - Peak memory: {:.2} MB\n\
412             - Current memory: {:.2} MB\n\
413             - Efficiency: {:.1}%",
414            self.allocation_count,
415            self.deallocation_count,
416            self.active_allocations,
417            self.peak_memory_usage as f64 / 1024.0 / 1024.0,
418            self.current_memory_usage as f64 / 1024.0 / 1024.0,
419            self.memory_efficiency * 100.0
420        )
421    }
422}
423
424/// Statistics about memory pool usage
425#[derive(Debug, Clone)]
426pub struct MemoryPoolStats {
427    pub total_buffers: usize,
428    pub allocated_buffers: usize,
429    pub available_buffers: usize,
430    pub total_memory: usize,
431    pub used_memory: usize,
432    pub buffer_size: usize,
433    pub utilization: f64,
434}
435
436impl MemoryPoolStats {
437    /// Check if the pool is under memory pressure
438    pub fn is_under_pressure(&self) -> bool {
439        self.utilization > 0.8 || self.available_buffers < 2
440    }
441
442    /// Get the number of buffers that can still be allocated
443    pub fn remaining_capacity(&self) -> usize {
444        if self.total_memory > self.used_memory {
445            let remaining_memory = self.total_memory - self.used_memory;
446            remaining_memory / (self.buffer_size * std::mem::size_of::<f32>())
447        } else {
448            0
449        }
450    }
451
452    /// Print pool statistics
453    pub fn print(&self) {
454        println!("GPU Memory Pool Statistics:");
455        println!("  Total buffers: {}", self.total_buffers);
456        println!(
457            "  Allocated: {}, Available: {}",
458            self.allocated_buffers, self.available_buffers
459        );
460        println!(
461            "  Memory usage: {:.2} MB / {:.2} MB ({:.1}%)",
462            self.used_memory as f64 / 1024.0 / 1024.0,
463            self.total_memory as f64 / 1024.0 / 1024.0,
464            self.utilization * 100.0
465        );
466        println!(
467            "  Buffer size: {:.2} KB",
468            self.buffer_size as f64 * 4.0 / 1024.0
469        );
470        println!(
471            "  Remaining capacity: {} buffers",
472            self.remaining_capacity()
473        );
474
475        if self.is_under_pressure() {
476            println!("  ⚠️  Memory pool is under pressure!");
477        }
478    }
479}
480
481/// Advanced memory pool with multiple buffer sizes
482#[derive(Debug)]
483pub struct AdvancedGpuMemoryPool {
484    pools: Vec<GpuMemoryPool>,
485    buffer_sizes: Vec<usize>,
486    device_id: i32,
487}
488
489impl AdvancedGpuMemoryPool {
490    /// Create an advanced memory pool with multiple buffer sizes
491    pub fn new(config: &GpuConfig, buffer_sizes: Vec<usize>) -> Result<Self> {
492        let mut pools = Vec::new();
493
494        for &size in &buffer_sizes {
495            let pool = GpuMemoryPool::new(config, size)?;
496            pools.push(pool);
497        }
498
499        Ok(Self {
500            pools,
501            buffer_sizes: buffer_sizes.clone(),
502            device_id: config.device_id,
503        })
504    }
505
506    /// Get a buffer of the best fitting size
507    pub fn get_buffer(&mut self, required_size: usize) -> Result<GpuBuffer> {
508        // Find the smallest buffer size that can accommodate the request
509        let pool_index = self
510            .buffer_sizes
511            .iter()
512            .position(|&size| size >= required_size)
513            .ok_or_else(|| anyhow!("No buffer size large enough for request"))?;
514
515        self.pools[pool_index].get_buffer()
516    }
517
518    /// Return a buffer to the appropriate pool
519    pub fn return_buffer(&mut self, buffer: GpuBuffer) -> Result<()> {
520        let buffer_size = buffer.size();
521
522        // Find the pool this buffer belongs to
523        let pool_index = self
524            .buffer_sizes
525            .iter()
526            .position(|&size| size == buffer_size)
527            .ok_or_else(|| anyhow!("Buffer size does not match any pool"))?;
528
529        self.pools[pool_index].return_buffer(buffer)
530    }
531
532    /// Get combined statistics for all pools
533    pub fn combined_stats(&self) -> AdvancedMemoryPoolStats {
534        let mut total_buffers = 0;
535        let mut total_allocated = 0;
536        let mut total_available = 0;
537        let mut total_memory = 0;
538        let mut total_used = 0;
539        let mut pool_stats = Vec::new();
540
541        for pool in &self.pools {
542            let stats = pool.stats();
543            total_buffers += stats.total_buffers;
544            total_allocated += stats.allocated_buffers;
545            total_available += stats.available_buffers;
546            total_memory += stats.total_memory;
547            total_used += stats.used_memory;
548            pool_stats.push(stats);
549        }
550
551        AdvancedMemoryPoolStats {
552            pool_stats,
553            total_buffers,
554            total_allocated,
555            total_available,
556            total_memory,
557            total_used,
558            utilization: if total_memory > 0 {
559                total_used as f64 / total_memory as f64
560            } else {
561                0.0
562            },
563        }
564    }
565
566    /// Preallocate buffers in all pools
567    pub fn preallocate_all(&mut self, buffers_per_pool: usize) -> Result<()> {
568        for pool in &mut self.pools {
569            pool.preallocate(buffers_per_pool)?;
570        }
571        Ok(())
572    }
573}
574
575/// Statistics for advanced memory pool
576#[derive(Debug, Clone)]
577pub struct AdvancedMemoryPoolStats {
578    pub pool_stats: Vec<MemoryPoolStats>,
579    pub total_buffers: usize,
580    pub total_allocated: usize,
581    pub total_available: usize,
582    pub total_memory: usize,
583    pub total_used: usize,
584    pub utilization: f64,
585}
586
587impl AdvancedMemoryPoolStats {
588    /// Print detailed statistics for all pools
589    pub fn print_detailed(&self) {
590        println!("Advanced GPU Memory Pool Statistics:");
591        println!(
592            "  Overall: {} buffers, {:.1}% utilization",
593            self.total_buffers,
594            self.utilization * 100.0
595        );
596        println!(
597            "  Total memory: {:.2} MB",
598            self.total_memory as f64 / 1024.0 / 1024.0
599        );
600
601        for (i, stats) in self.pool_stats.iter().enumerate() {
602            println!(
603                "  Pool {}: {:.2} KB buffers, {} total, {:.1}% util",
604                i,
605                stats.buffer_size as f64 * 4.0 / 1024.0,
606                stats.total_buffers,
607                stats.utilization * 100.0
608            );
609        }
610    }
611}