oxirs_vec/gpu/
memory_pool.rs

1//! GPU memory pool management for efficient allocation and reuse
2//!
3//! Enhanced with leak detection, metrics tracking, and adaptive sizing
4
5use super::{GpuBuffer, GpuConfig};
6use anyhow::{anyhow, Result};
7use std::collections::{HashMap, VecDeque};
8use std::sync::{Arc, Mutex};
9use std::time::{Duration, Instant};
10
11/// GPU memory pool for efficient buffer management with advanced tracking
12#[derive(Debug)]
13pub struct GpuMemoryPool {
14    device_id: i32,
15    available_buffers: Arc<Mutex<VecDeque<GpuBuffer>>>,
16    allocated_buffers: Arc<Mutex<Vec<GpuBuffer>>>,
17    total_memory: usize,
18    used_memory: usize,
19    buffer_size: usize,
20    max_buffers: usize,
21    /// Allocation tracking for leak detection
22    allocation_times: Arc<Mutex<Vec<(usize, Instant)>>>,
23    /// Performance operation timings
24    operation_timings: Arc<Mutex<HashMap<String, Vec<Duration>>>>,
25    /// Performance metrics
26    allocation_count: usize,
27    deallocation_count: usize,
28    peak_memory_usage: usize,
29}
30
31impl GpuMemoryPool {
32    /// Create a new GPU memory pool with advanced metrics and leak detection
33    pub fn new(config: &GpuConfig, buffer_size: usize) -> Result<Self> {
34        let max_buffers = config.memory_pool_size / (buffer_size * std::mem::size_of::<f32>());
35
36        Ok(Self {
37            device_id: config.device_id,
38            available_buffers: Arc::new(Mutex::new(VecDeque::new())),
39            allocated_buffers: Arc::new(Mutex::new(Vec::new())),
40            total_memory: config.memory_pool_size,
41            used_memory: 0,
42            buffer_size,
43            max_buffers,
44            allocation_times: Arc::new(Mutex::new(Vec::new())),
45            operation_timings: Arc::new(Mutex::new(HashMap::new())),
46            allocation_count: 0,
47            deallocation_count: 0,
48            peak_memory_usage: 0,
49        })
50    }
51
52    /// Get a buffer from the pool or allocate a new one (with performance tracking)
53    pub fn get_buffer(&mut self) -> Result<GpuBuffer> {
54        let start_time = Instant::now();
55
56        // Try to get a buffer from the available pool
57        {
58            let mut available = self
59                .available_buffers
60                .lock()
61                .map_err(|e| anyhow!("Failed to lock available buffers: {}", e))?;
62
63            if let Some(buffer) = available.pop_front() {
64                // Track timing
65                let elapsed = start_time.elapsed();
66                self.record_operation_time("buffer_acquire_reuse", elapsed);
67
68                // Track allocation for leak detection
69                let ptr_value = buffer.ptr() as usize;
70                self.allocation_times
71                    .lock()
72                    .unwrap()
73                    .push((ptr_value, Instant::now()));
74
75                return Ok(buffer);
76            }
77        }
78
79        // No available buffers, check if we can allocate a new one
80        if self.allocated_buffers.lock().unwrap().len() >= self.max_buffers {
81            let elapsed = start_time.elapsed();
82            self.record_operation_time("buffer_acquire_failed", elapsed);
83            return Err(anyhow!("Memory pool exhausted"));
84        }
85
86        // Allocate a new buffer
87        let alloc_start = Instant::now();
88        let buffer = GpuBuffer::new(self.buffer_size, self.device_id)?;
89        let alloc_elapsed = alloc_start.elapsed();
90        self.record_operation_time("buffer_alloc", alloc_elapsed);
91
92        // Update metrics
93        self.used_memory += self.buffer_size * std::mem::size_of::<f32>();
94        self.allocation_count += 1;
95        if self.used_memory > self.peak_memory_usage {
96            self.peak_memory_usage = self.used_memory;
97        }
98
99        // Track allocation for leak detection
100        let ptr_value = buffer.ptr() as usize;
101        self.allocation_times
102            .lock()
103            .unwrap()
104            .push((ptr_value, Instant::now()));
105
106        // Record total acquisition time
107        let total_elapsed = start_time.elapsed();
108        self.record_operation_time("buffer_acquire_new", total_elapsed);
109
110        Ok(buffer)
111    }
112
113    /// Record timing for an operation
114    fn record_operation_time(&self, operation: &str, duration: Duration) {
115        if let Ok(mut timings) = self.operation_timings.lock() {
116            timings
117                .entry(operation.to_string())
118                .or_insert_with(Vec::new)
119                .push(duration);
120        }
121    }
122
123    /// Return a buffer to the pool (with performance tracking)
124    pub fn return_buffer(&mut self, buffer: GpuBuffer) -> Result<()> {
125        let start_time = Instant::now();
126
127        let ptr_value = buffer.ptr() as usize;
128
129        // Remove from allocated buffers
130        {
131            let mut allocated = self
132                .allocated_buffers
133                .lock()
134                .map_err(|e| anyhow!("Failed to lock allocated buffers: {}", e))?;
135
136            // Find and remove the buffer
137            allocated.retain(|b| b.ptr() != buffer.ptr());
138        }
139
140        // Remove from allocation tracking
141        {
142            let mut alloc_times = self.allocation_times.lock().unwrap();
143            alloc_times.retain(|(ptr, _)| *ptr != ptr_value);
144        }
145
146        // Update metrics
147        self.deallocation_count += 1;
148
149        // Add to available buffers
150        self.available_buffers
151            .lock()
152            .map_err(|e| anyhow!("Failed to lock available buffers: {}", e))?
153            .push_back(buffer);
154
155        // Record timing
156        let elapsed = start_time.elapsed();
157        self.record_operation_time("buffer_return", elapsed);
158
159        Ok(())
160    }
161
162    /// Get pool statistics
163    pub fn stats(&self) -> MemoryPoolStats {
164        let allocated_count = self.allocated_buffers.lock().unwrap().len();
165        let available_count = self.available_buffers.lock().unwrap().len();
166
167        MemoryPoolStats {
168            total_buffers: allocated_count + available_count,
169            allocated_buffers: allocated_count,
170            available_buffers: available_count,
171            total_memory: self.total_memory,
172            used_memory: self.used_memory,
173            buffer_size: self.buffer_size,
174            utilization: if self.total_memory > 0 {
175                self.used_memory as f64 / self.total_memory as f64
176            } else {
177                0.0
178            },
179        }
180    }
181
182    /// Preallocate buffers to warm up the pool
183    pub fn preallocate(&mut self, count: usize) -> Result<()> {
184        let effective_count = count.min(self.max_buffers);
185
186        for _ in 0..effective_count {
187            let buffer = GpuBuffer::new(self.buffer_size, self.device_id)?;
188            self.used_memory += self.buffer_size * std::mem::size_of::<f32>();
189
190            self.available_buffers
191                .lock()
192                .map_err(|e| anyhow!("Failed to lock available buffers: {}", e))?
193                .push_back(buffer);
194        }
195
196        Ok(())
197    }
198
199    /// Clear all buffers and reset the pool
200    pub fn clear(&mut self) {
201        // Clear all buffers (Drop will handle GPU memory deallocation)
202        self.available_buffers.lock().unwrap().clear();
203        self.allocated_buffers.lock().unwrap().clear();
204        self.used_memory = 0;
205    }
206
207    /// Check if pool has available capacity
208    pub fn has_capacity(&self) -> bool {
209        let total_buffers = self.available_buffers.lock().unwrap().len()
210            + self.allocated_buffers.lock().unwrap().len();
211        total_buffers < self.max_buffers
212    }
213
214    /// Get current memory usage
215    pub fn memory_usage(&self) -> usize {
216        self.used_memory
217    }
218
219    /// Get memory utilization percentage
220    pub fn utilization(&self) -> f64 {
221        if self.total_memory > 0 {
222            self.used_memory as f64 / self.total_memory as f64
223        } else {
224            0.0
225        }
226    }
227
228    /// Defragment the pool by compacting available buffers
229    pub fn defragment(&mut self) -> Result<()> {
230        let start_time = Instant::now();
231
232        // In a real implementation, this might involve more sophisticated memory management
233        // For now, we'll just ensure all available buffers are contiguous in the queue
234        let mut available = self
235            .available_buffers
236            .lock()
237            .map_err(|e| anyhow!("Failed to lock available buffers: {}", e))?;
238
239        // Sort available buffers by memory address for better locality
240        let mut buffers: Vec<GpuBuffer> = available.drain(..).collect();
241        buffers.sort_by_key(|b| b.ptr() as usize);
242
243        for buffer in buffers {
244            available.push_back(buffer);
245        }
246
247        // Record timing
248        let elapsed = start_time.elapsed();
249        self.record_operation_time("pool_defrag", elapsed);
250
251        Ok(())
252    }
253
254    /// Detect memory leaks (buffers held for too long)
255    pub fn detect_leaks(&self, threshold_secs: u64) -> Vec<MemoryLeak> {
256        let mut leaks = Vec::new();
257        let now = Instant::now();
258        let alloc_times = self.allocation_times.lock().unwrap();
259
260        for (ptr, alloc_time) in alloc_times.iter() {
261            let duration = now.duration_since(*alloc_time);
262            if duration.as_secs() > threshold_secs {
263                leaks.push(MemoryLeak {
264                    ptr_address: *ptr,
265                    allocated_for_secs: duration.as_secs(),
266                    buffer_size: self.buffer_size,
267                });
268            }
269        }
270
271        leaks
272    }
273
274    /// Get profiling report for memory operations
275    pub fn profiling_report(&self) -> String {
276        let timings = self.operation_timings.lock().unwrap();
277        let mut report = String::from("GPU Memory Pool Performance Report:\n");
278
279        for (operation, durations) in timings.iter() {
280            if !durations.is_empty() {
281                let total: Duration = durations.iter().sum();
282                let avg = total / durations.len() as u32;
283                let min = durations.iter().min().unwrap();
284                let max = durations.iter().max().unwrap();
285
286                report.push_str(&format!(
287                    "  {}: {} calls, avg={:.2}µs, min={:.2}µs, max={:.2}µs\n",
288                    operation,
289                    durations.len(),
290                    avg.as_micros(),
291                    min.as_micros(),
292                    max.as_micros()
293                ));
294            }
295        }
296
297        report
298    }
299
300    /// Get comprehensive metrics
301    pub fn get_metrics(&self) -> PoolMetrics {
302        PoolMetrics {
303            allocation_count: self.allocation_count,
304            deallocation_count: self.deallocation_count,
305            peak_memory_usage: self.peak_memory_usage,
306            current_memory_usage: self.used_memory,
307            memory_efficiency: if self.allocation_count > 0 {
308                self.deallocation_count as f64 / self.allocation_count as f64
309            } else {
310                0.0
311            },
312            active_allocations: self.allocation_times.lock().unwrap().len(),
313        }
314    }
315
316    /// Adaptive buffer sizing based on usage patterns
317    pub fn suggest_optimal_buffer_size(&self) -> usize {
318        let metrics = self.get_metrics();
319
320        // If we're frequently allocating/deallocating, suggest smaller buffers
321        if metrics.memory_efficiency > 0.95 && self.utilization() < 0.5 {
322            self.buffer_size / 2
323        }
324        // If we're holding memory for long periods, suggest larger buffers
325        else if metrics.memory_efficiency < 0.7 && self.utilization() > 0.8 {
326            self.buffer_size * 2
327        } else {
328            self.buffer_size
329        }
330    }
331
332    /// Reset profiling statistics
333    pub fn reset_profiling(&mut self) {
334        if let Ok(mut timings) = self.operation_timings.lock() {
335            timings.clear();
336        }
337    }
338
339    /// Get average operation time for specific operation (in microseconds)
340    pub fn get_avg_operation_time(&self, operation: &str) -> Option<f64> {
341        let timings = self.operation_timings.lock().ok()?;
342        let durations = timings.get(operation)?;
343
344        if durations.is_empty() {
345            return None;
346        }
347
348        let total: Duration = durations.iter().sum();
349        let avg = total / durations.len() as u32;
350        Some(avg.as_micros() as f64)
351    }
352}
353
354/// Memory leak detection result
355#[derive(Debug, Clone)]
356pub struct MemoryLeak {
357    /// Pointer address of the leaked buffer
358    pub ptr_address: usize,
359    /// How long the buffer has been allocated (seconds)
360    pub allocated_for_secs: u64,
361    /// Size of the leaked buffer
362    pub buffer_size: usize,
363}
364
365impl MemoryLeak {
366    /// Get formatted description of the leak
367    pub fn description(&self) -> String {
368        format!(
369            "Memory leak at 0x{:x}: {} bytes held for {} seconds",
370            self.ptr_address, self.buffer_size, self.allocated_for_secs
371        )
372    }
373}
374
375/// Comprehensive pool metrics for performance analysis
376#[derive(Debug, Clone)]
377pub struct PoolMetrics {
378    /// Total number of allocations performed
379    pub allocation_count: usize,
380    /// Total number of deallocations performed
381    pub deallocation_count: usize,
382    /// Peak memory usage reached
383    pub peak_memory_usage: usize,
384    /// Current memory usage
385    pub current_memory_usage: usize,
386    /// Memory efficiency (deallocations / allocations)
387    pub memory_efficiency: f64,
388    /// Number of currently active allocations
389    pub active_allocations: usize,
390}
391
392impl PoolMetrics {
393    /// Check if there might be a memory leak
394    pub fn has_potential_leak(&self) -> bool {
395        self.memory_efficiency < 0.5 && self.active_allocations > 100
396    }
397
398    /// Get formatted metrics report
399    pub fn report(&self) -> String {
400        format!(
401            "Pool Metrics:\n\
402             - Allocations: {}\n\
403             - Deallocations: {}\n\
404             - Active: {}\n\
405             - Peak memory: {:.2} MB\n\
406             - Current memory: {:.2} MB\n\
407             - Efficiency: {:.1}%",
408            self.allocation_count,
409            self.deallocation_count,
410            self.active_allocations,
411            self.peak_memory_usage as f64 / 1024.0 / 1024.0,
412            self.current_memory_usage as f64 / 1024.0 / 1024.0,
413            self.memory_efficiency * 100.0
414        )
415    }
416}
417
418/// Statistics about memory pool usage
419#[derive(Debug, Clone)]
420pub struct MemoryPoolStats {
421    pub total_buffers: usize,
422    pub allocated_buffers: usize,
423    pub available_buffers: usize,
424    pub total_memory: usize,
425    pub used_memory: usize,
426    pub buffer_size: usize,
427    pub utilization: f64,
428}
429
430impl MemoryPoolStats {
431    /// Check if the pool is under memory pressure
432    pub fn is_under_pressure(&self) -> bool {
433        self.utilization > 0.8 || self.available_buffers < 2
434    }
435
436    /// Get the number of buffers that can still be allocated
437    pub fn remaining_capacity(&self) -> usize {
438        if self.total_memory > self.used_memory {
439            let remaining_memory = self.total_memory - self.used_memory;
440            remaining_memory / (self.buffer_size * std::mem::size_of::<f32>())
441        } else {
442            0
443        }
444    }
445
446    /// Print pool statistics
447    pub fn print(&self) {
448        println!("GPU Memory Pool Statistics:");
449        println!("  Total buffers: {}", self.total_buffers);
450        println!(
451            "  Allocated: {}, Available: {}",
452            self.allocated_buffers, self.available_buffers
453        );
454        println!(
455            "  Memory usage: {:.2} MB / {:.2} MB ({:.1}%)",
456            self.used_memory as f64 / 1024.0 / 1024.0,
457            self.total_memory as f64 / 1024.0 / 1024.0,
458            self.utilization * 100.0
459        );
460        println!(
461            "  Buffer size: {:.2} KB",
462            self.buffer_size as f64 * 4.0 / 1024.0
463        );
464        println!(
465            "  Remaining capacity: {} buffers",
466            self.remaining_capacity()
467        );
468
469        if self.is_under_pressure() {
470            println!("  ⚠️  Memory pool is under pressure!");
471        }
472    }
473}
474
475/// Advanced memory pool with multiple buffer sizes
476#[derive(Debug)]
477pub struct AdvancedGpuMemoryPool {
478    pools: Vec<GpuMemoryPool>,
479    buffer_sizes: Vec<usize>,
480    device_id: i32,
481}
482
483impl AdvancedGpuMemoryPool {
484    /// Create an advanced memory pool with multiple buffer sizes
485    pub fn new(config: &GpuConfig, buffer_sizes: Vec<usize>) -> Result<Self> {
486        let mut pools = Vec::new();
487
488        for &size in &buffer_sizes {
489            let pool = GpuMemoryPool::new(config, size)?;
490            pools.push(pool);
491        }
492
493        Ok(Self {
494            pools,
495            buffer_sizes: buffer_sizes.clone(),
496            device_id: config.device_id,
497        })
498    }
499
500    /// Get a buffer of the best fitting size
501    pub fn get_buffer(&mut self, required_size: usize) -> Result<GpuBuffer> {
502        // Find the smallest buffer size that can accommodate the request
503        let pool_index = self
504            .buffer_sizes
505            .iter()
506            .position(|&size| size >= required_size)
507            .ok_or_else(|| anyhow!("No buffer size large enough for request"))?;
508
509        self.pools[pool_index].get_buffer()
510    }
511
512    /// Return a buffer to the appropriate pool
513    pub fn return_buffer(&mut self, buffer: GpuBuffer) -> Result<()> {
514        let buffer_size = buffer.size();
515
516        // Find the pool this buffer belongs to
517        let pool_index = self
518            .buffer_sizes
519            .iter()
520            .position(|&size| size == buffer_size)
521            .ok_or_else(|| anyhow!("Buffer size does not match any pool"))?;
522
523        self.pools[pool_index].return_buffer(buffer)
524    }
525
526    /// Get combined statistics for all pools
527    pub fn combined_stats(&self) -> AdvancedMemoryPoolStats {
528        let mut total_buffers = 0;
529        let mut total_allocated = 0;
530        let mut total_available = 0;
531        let mut total_memory = 0;
532        let mut total_used = 0;
533        let mut pool_stats = Vec::new();
534
535        for pool in &self.pools {
536            let stats = pool.stats();
537            total_buffers += stats.total_buffers;
538            total_allocated += stats.allocated_buffers;
539            total_available += stats.available_buffers;
540            total_memory += stats.total_memory;
541            total_used += stats.used_memory;
542            pool_stats.push(stats);
543        }
544
545        AdvancedMemoryPoolStats {
546            pool_stats,
547            total_buffers,
548            total_allocated,
549            total_available,
550            total_memory,
551            total_used,
552            utilization: if total_memory > 0 {
553                total_used as f64 / total_memory as f64
554            } else {
555                0.0
556            },
557        }
558    }
559
560    /// Preallocate buffers in all pools
561    pub fn preallocate_all(&mut self, buffers_per_pool: usize) -> Result<()> {
562        for pool in &mut self.pools {
563            pool.preallocate(buffers_per_pool)?;
564        }
565        Ok(())
566    }
567}
568
569/// Statistics for advanced memory pool
570#[derive(Debug, Clone)]
571pub struct AdvancedMemoryPoolStats {
572    pub pool_stats: Vec<MemoryPoolStats>,
573    pub total_buffers: usize,
574    pub total_allocated: usize,
575    pub total_available: usize,
576    pub total_memory: usize,
577    pub total_used: usize,
578    pub utilization: f64,
579}
580
581impl AdvancedMemoryPoolStats {
582    /// Print detailed statistics for all pools
583    pub fn print_detailed(&self) {
584        println!("Advanced GPU Memory Pool Statistics:");
585        println!(
586            "  Overall: {} buffers, {:.1}% utilization",
587            self.total_buffers,
588            self.utilization * 100.0
589        );
590        println!(
591            "  Total memory: {:.2} MB",
592            self.total_memory as f64 / 1024.0 / 1024.0
593        );
594
595        for (i, stats) in self.pool_stats.iter().enumerate() {
596            println!(
597                "  Pool {}: {:.2} KB buffers, {} total, {:.1}% util",
598                i,
599                stats.buffer_size as f64 * 4.0 / 1024.0,
600                stats.total_buffers,
601                stats.utilization * 100.0
602            );
603        }
604    }
605}