1use super::{GpuBuffer, GpuConfig};
6use anyhow::{anyhow, Result};
7use std::collections::{HashMap, VecDeque};
8use std::sync::{Arc, Mutex};
9use std::time::{Duration, Instant};
10
11#[derive(Debug)]
13pub struct GpuMemoryPool {
14 device_id: i32,
15 available_buffers: Arc<Mutex<VecDeque<GpuBuffer>>>,
16 allocated_buffers: Arc<Mutex<Vec<GpuBuffer>>>,
17 total_memory: usize,
18 used_memory: usize,
19 buffer_size: usize,
20 max_buffers: usize,
21 allocation_times: Arc<Mutex<Vec<(usize, Instant)>>>,
23 operation_timings: Arc<Mutex<HashMap<String, Vec<Duration>>>>,
25 allocation_count: usize,
27 deallocation_count: usize,
28 peak_memory_usage: usize,
29}
30
31impl GpuMemoryPool {
32 pub fn new(config: &GpuConfig, buffer_size: usize) -> Result<Self> {
34 let max_buffers = config.memory_pool_size / (buffer_size * std::mem::size_of::<f32>());
35
36 Ok(Self {
37 device_id: config.device_id,
38 available_buffers: Arc::new(Mutex::new(VecDeque::new())),
39 allocated_buffers: Arc::new(Mutex::new(Vec::new())),
40 total_memory: config.memory_pool_size,
41 used_memory: 0,
42 buffer_size,
43 max_buffers,
44 allocation_times: Arc::new(Mutex::new(Vec::new())),
45 operation_timings: Arc::new(Mutex::new(HashMap::new())),
46 allocation_count: 0,
47 deallocation_count: 0,
48 peak_memory_usage: 0,
49 })
50 }
51
52 pub fn get_buffer(&mut self) -> Result<GpuBuffer> {
54 let start_time = Instant::now();
55
56 {
58 let mut available = self
59 .available_buffers
60 .lock()
61 .map_err(|e| anyhow!("Failed to lock available buffers: {}", e))?;
62
63 if let Some(buffer) = available.pop_front() {
64 let elapsed = start_time.elapsed();
66 self.record_operation_time("buffer_acquire_reuse", elapsed);
67
68 let ptr_value = buffer.ptr() as usize;
70 self.allocation_times
71 .lock()
72 .unwrap()
73 .push((ptr_value, Instant::now()));
74
75 return Ok(buffer);
76 }
77 }
78
79 if self.allocated_buffers.lock().unwrap().len() >= self.max_buffers {
81 let elapsed = start_time.elapsed();
82 self.record_operation_time("buffer_acquire_failed", elapsed);
83 return Err(anyhow!("Memory pool exhausted"));
84 }
85
86 let alloc_start = Instant::now();
88 let buffer = GpuBuffer::new(self.buffer_size, self.device_id)?;
89 let alloc_elapsed = alloc_start.elapsed();
90 self.record_operation_time("buffer_alloc", alloc_elapsed);
91
92 self.used_memory += self.buffer_size * std::mem::size_of::<f32>();
94 self.allocation_count += 1;
95 if self.used_memory > self.peak_memory_usage {
96 self.peak_memory_usage = self.used_memory;
97 }
98
99 let ptr_value = buffer.ptr() as usize;
101 self.allocation_times
102 .lock()
103 .unwrap()
104 .push((ptr_value, Instant::now()));
105
106 let total_elapsed = start_time.elapsed();
108 self.record_operation_time("buffer_acquire_new", total_elapsed);
109
110 Ok(buffer)
111 }
112
113 fn record_operation_time(&self, operation: &str, duration: Duration) {
115 if let Ok(mut timings) = self.operation_timings.lock() {
116 timings
117 .entry(operation.to_string())
118 .or_insert_with(Vec::new)
119 .push(duration);
120 }
121 }
122
123 pub fn return_buffer(&mut self, buffer: GpuBuffer) -> Result<()> {
125 let start_time = Instant::now();
126
127 let ptr_value = buffer.ptr() as usize;
128
129 {
131 let mut allocated = self
132 .allocated_buffers
133 .lock()
134 .map_err(|e| anyhow!("Failed to lock allocated buffers: {}", e))?;
135
136 allocated.retain(|b| b.ptr() != buffer.ptr());
138 }
139
140 {
142 let mut alloc_times = self.allocation_times.lock().unwrap();
143 alloc_times.retain(|(ptr, _)| *ptr != ptr_value);
144 }
145
146 self.deallocation_count += 1;
148
149 self.available_buffers
151 .lock()
152 .map_err(|e| anyhow!("Failed to lock available buffers: {}", e))?
153 .push_back(buffer);
154
155 let elapsed = start_time.elapsed();
157 self.record_operation_time("buffer_return", elapsed);
158
159 Ok(())
160 }
161
162 pub fn stats(&self) -> MemoryPoolStats {
164 let allocated_count = self.allocated_buffers.lock().unwrap().len();
165 let available_count = self.available_buffers.lock().unwrap().len();
166
167 MemoryPoolStats {
168 total_buffers: allocated_count + available_count,
169 allocated_buffers: allocated_count,
170 available_buffers: available_count,
171 total_memory: self.total_memory,
172 used_memory: self.used_memory,
173 buffer_size: self.buffer_size,
174 utilization: if self.total_memory > 0 {
175 self.used_memory as f64 / self.total_memory as f64
176 } else {
177 0.0
178 },
179 }
180 }
181
182 pub fn preallocate(&mut self, count: usize) -> Result<()> {
184 let effective_count = count.min(self.max_buffers);
185
186 for _ in 0..effective_count {
187 let buffer = GpuBuffer::new(self.buffer_size, self.device_id)?;
188 self.used_memory += self.buffer_size * std::mem::size_of::<f32>();
189
190 self.available_buffers
191 .lock()
192 .map_err(|e| anyhow!("Failed to lock available buffers: {}", e))?
193 .push_back(buffer);
194 }
195
196 Ok(())
197 }
198
199 pub fn clear(&mut self) {
201 self.available_buffers.lock().unwrap().clear();
203 self.allocated_buffers.lock().unwrap().clear();
204 self.used_memory = 0;
205 }
206
207 pub fn has_capacity(&self) -> bool {
209 let total_buffers = self.available_buffers.lock().unwrap().len()
210 + self.allocated_buffers.lock().unwrap().len();
211 total_buffers < self.max_buffers
212 }
213
214 pub fn memory_usage(&self) -> usize {
216 self.used_memory
217 }
218
219 pub fn utilization(&self) -> f64 {
221 if self.total_memory > 0 {
222 self.used_memory as f64 / self.total_memory as f64
223 } else {
224 0.0
225 }
226 }
227
228 pub fn defragment(&mut self) -> Result<()> {
230 let start_time = Instant::now();
231
232 let mut available = self
235 .available_buffers
236 .lock()
237 .map_err(|e| anyhow!("Failed to lock available buffers: {}", e))?;
238
239 let mut buffers: Vec<GpuBuffer> = available.drain(..).collect();
241 buffers.sort_by_key(|b| b.ptr() as usize);
242
243 for buffer in buffers {
244 available.push_back(buffer);
245 }
246
247 let elapsed = start_time.elapsed();
249 self.record_operation_time("pool_defrag", elapsed);
250
251 Ok(())
252 }
253
254 pub fn detect_leaks(&self, threshold_secs: u64) -> Vec<MemoryLeak> {
256 let mut leaks = Vec::new();
257 let now = Instant::now();
258 let alloc_times = self.allocation_times.lock().unwrap();
259
260 for (ptr, alloc_time) in alloc_times.iter() {
261 let duration = now.duration_since(*alloc_time);
262 if duration.as_secs() > threshold_secs {
263 leaks.push(MemoryLeak {
264 ptr_address: *ptr,
265 allocated_for_secs: duration.as_secs(),
266 buffer_size: self.buffer_size,
267 });
268 }
269 }
270
271 leaks
272 }
273
274 pub fn profiling_report(&self) -> String {
276 let timings = self.operation_timings.lock().unwrap();
277 let mut report = String::from("GPU Memory Pool Performance Report:\n");
278
279 for (operation, durations) in timings.iter() {
280 if !durations.is_empty() {
281 let total: Duration = durations.iter().sum();
282 let avg = total / durations.len() as u32;
283 let min = durations.iter().min().unwrap();
284 let max = durations.iter().max().unwrap();
285
286 report.push_str(&format!(
287 " {}: {} calls, avg={:.2}µs, min={:.2}µs, max={:.2}µs\n",
288 operation,
289 durations.len(),
290 avg.as_micros(),
291 min.as_micros(),
292 max.as_micros()
293 ));
294 }
295 }
296
297 report
298 }
299
300 pub fn get_metrics(&self) -> PoolMetrics {
302 PoolMetrics {
303 allocation_count: self.allocation_count,
304 deallocation_count: self.deallocation_count,
305 peak_memory_usage: self.peak_memory_usage,
306 current_memory_usage: self.used_memory,
307 memory_efficiency: if self.allocation_count > 0 {
308 self.deallocation_count as f64 / self.allocation_count as f64
309 } else {
310 0.0
311 },
312 active_allocations: self.allocation_times.lock().unwrap().len(),
313 }
314 }
315
316 pub fn suggest_optimal_buffer_size(&self) -> usize {
318 let metrics = self.get_metrics();
319
320 if metrics.memory_efficiency > 0.95 && self.utilization() < 0.5 {
322 self.buffer_size / 2
323 }
324 else if metrics.memory_efficiency < 0.7 && self.utilization() > 0.8 {
326 self.buffer_size * 2
327 } else {
328 self.buffer_size
329 }
330 }
331
332 pub fn reset_profiling(&mut self) {
334 if let Ok(mut timings) = self.operation_timings.lock() {
335 timings.clear();
336 }
337 }
338
339 pub fn get_avg_operation_time(&self, operation: &str) -> Option<f64> {
341 let timings = self.operation_timings.lock().ok()?;
342 let durations = timings.get(operation)?;
343
344 if durations.is_empty() {
345 return None;
346 }
347
348 let total: Duration = durations.iter().sum();
349 let avg = total / durations.len() as u32;
350 Some(avg.as_micros() as f64)
351 }
352}
353
354#[derive(Debug, Clone)]
356pub struct MemoryLeak {
357 pub ptr_address: usize,
359 pub allocated_for_secs: u64,
361 pub buffer_size: usize,
363}
364
365impl MemoryLeak {
366 pub fn description(&self) -> String {
368 format!(
369 "Memory leak at 0x{:x}: {} bytes held for {} seconds",
370 self.ptr_address, self.buffer_size, self.allocated_for_secs
371 )
372 }
373}
374
375#[derive(Debug, Clone)]
377pub struct PoolMetrics {
378 pub allocation_count: usize,
380 pub deallocation_count: usize,
382 pub peak_memory_usage: usize,
384 pub current_memory_usage: usize,
386 pub memory_efficiency: f64,
388 pub active_allocations: usize,
390}
391
392impl PoolMetrics {
393 pub fn has_potential_leak(&self) -> bool {
395 self.memory_efficiency < 0.5 && self.active_allocations > 100
396 }
397
398 pub fn report(&self) -> String {
400 format!(
401 "Pool Metrics:\n\
402 - Allocations: {}\n\
403 - Deallocations: {}\n\
404 - Active: {}\n\
405 - Peak memory: {:.2} MB\n\
406 - Current memory: {:.2} MB\n\
407 - Efficiency: {:.1}%",
408 self.allocation_count,
409 self.deallocation_count,
410 self.active_allocations,
411 self.peak_memory_usage as f64 / 1024.0 / 1024.0,
412 self.current_memory_usage as f64 / 1024.0 / 1024.0,
413 self.memory_efficiency * 100.0
414 )
415 }
416}
417
418#[derive(Debug, Clone)]
420pub struct MemoryPoolStats {
421 pub total_buffers: usize,
422 pub allocated_buffers: usize,
423 pub available_buffers: usize,
424 pub total_memory: usize,
425 pub used_memory: usize,
426 pub buffer_size: usize,
427 pub utilization: f64,
428}
429
430impl MemoryPoolStats {
431 pub fn is_under_pressure(&self) -> bool {
433 self.utilization > 0.8 || self.available_buffers < 2
434 }
435
436 pub fn remaining_capacity(&self) -> usize {
438 if self.total_memory > self.used_memory {
439 let remaining_memory = self.total_memory - self.used_memory;
440 remaining_memory / (self.buffer_size * std::mem::size_of::<f32>())
441 } else {
442 0
443 }
444 }
445
446 pub fn print(&self) {
448 println!("GPU Memory Pool Statistics:");
449 println!(" Total buffers: {}", self.total_buffers);
450 println!(
451 " Allocated: {}, Available: {}",
452 self.allocated_buffers, self.available_buffers
453 );
454 println!(
455 " Memory usage: {:.2} MB / {:.2} MB ({:.1}%)",
456 self.used_memory as f64 / 1024.0 / 1024.0,
457 self.total_memory as f64 / 1024.0 / 1024.0,
458 self.utilization * 100.0
459 );
460 println!(
461 " Buffer size: {:.2} KB",
462 self.buffer_size as f64 * 4.0 / 1024.0
463 );
464 println!(
465 " Remaining capacity: {} buffers",
466 self.remaining_capacity()
467 );
468
469 if self.is_under_pressure() {
470 println!(" ⚠️ Memory pool is under pressure!");
471 }
472 }
473}
474
475#[derive(Debug)]
477pub struct AdvancedGpuMemoryPool {
478 pools: Vec<GpuMemoryPool>,
479 buffer_sizes: Vec<usize>,
480 device_id: i32,
481}
482
483impl AdvancedGpuMemoryPool {
484 pub fn new(config: &GpuConfig, buffer_sizes: Vec<usize>) -> Result<Self> {
486 let mut pools = Vec::new();
487
488 for &size in &buffer_sizes {
489 let pool = GpuMemoryPool::new(config, size)?;
490 pools.push(pool);
491 }
492
493 Ok(Self {
494 pools,
495 buffer_sizes: buffer_sizes.clone(),
496 device_id: config.device_id,
497 })
498 }
499
500 pub fn get_buffer(&mut self, required_size: usize) -> Result<GpuBuffer> {
502 let pool_index = self
504 .buffer_sizes
505 .iter()
506 .position(|&size| size >= required_size)
507 .ok_or_else(|| anyhow!("No buffer size large enough for request"))?;
508
509 self.pools[pool_index].get_buffer()
510 }
511
512 pub fn return_buffer(&mut self, buffer: GpuBuffer) -> Result<()> {
514 let buffer_size = buffer.size();
515
516 let pool_index = self
518 .buffer_sizes
519 .iter()
520 .position(|&size| size == buffer_size)
521 .ok_or_else(|| anyhow!("Buffer size does not match any pool"))?;
522
523 self.pools[pool_index].return_buffer(buffer)
524 }
525
526 pub fn combined_stats(&self) -> AdvancedMemoryPoolStats {
528 let mut total_buffers = 0;
529 let mut total_allocated = 0;
530 let mut total_available = 0;
531 let mut total_memory = 0;
532 let mut total_used = 0;
533 let mut pool_stats = Vec::new();
534
535 for pool in &self.pools {
536 let stats = pool.stats();
537 total_buffers += stats.total_buffers;
538 total_allocated += stats.allocated_buffers;
539 total_available += stats.available_buffers;
540 total_memory += stats.total_memory;
541 total_used += stats.used_memory;
542 pool_stats.push(stats);
543 }
544
545 AdvancedMemoryPoolStats {
546 pool_stats,
547 total_buffers,
548 total_allocated,
549 total_available,
550 total_memory,
551 total_used,
552 utilization: if total_memory > 0 {
553 total_used as f64 / total_memory as f64
554 } else {
555 0.0
556 },
557 }
558 }
559
560 pub fn preallocate_all(&mut self, buffers_per_pool: usize) -> Result<()> {
562 for pool in &mut self.pools {
563 pool.preallocate(buffers_per_pool)?;
564 }
565 Ok(())
566 }
567}
568
569#[derive(Debug, Clone)]
571pub struct AdvancedMemoryPoolStats {
572 pub pool_stats: Vec<MemoryPoolStats>,
573 pub total_buffers: usize,
574 pub total_allocated: usize,
575 pub total_available: usize,
576 pub total_memory: usize,
577 pub total_used: usize,
578 pub utilization: f64,
579}
580
581impl AdvancedMemoryPoolStats {
582 pub fn print_detailed(&self) {
584 println!("Advanced GPU Memory Pool Statistics:");
585 println!(
586 " Overall: {} buffers, {:.1}% utilization",
587 self.total_buffers,
588 self.utilization * 100.0
589 );
590 println!(
591 " Total memory: {:.2} MB",
592 self.total_memory as f64 / 1024.0 / 1024.0
593 );
594
595 for (i, stats) in self.pool_stats.iter().enumerate() {
596 println!(
597 " Pool {}: {:.2} KB buffers, {} total, {:.1}% util",
598 i,
599 stats.buffer_size as f64 * 4.0 / 1024.0,
600 stats.total_buffers,
601 stats.utilization * 100.0
602 );
603 }
604 }
605}