1use std::collections::HashMap;
8use std::sync::{Arc, Mutex, RwLock};
9use std::time::{Duration, Instant};
10
11use scirs2_core::ndarray::{Array, ArrayView, Dimension};
12use scirs2_core::numeric::{Float, FromPrimitive};
13
14use crate::backend::Backend;
15use crate::error::NdimageResult;
16
17#[derive(Debug)]
19pub struct GpuMemoryPool {
20 buffers: Arc<Mutex<Vec<GpuBuffer>>>,
22 total_allocated: Arc<Mutex<usize>>,
24 peak_usage: Arc<Mutex<usize>>,
26 config: MemoryPoolConfig,
28}
29
30#[derive(Debug, Clone)]
31pub struct MemoryPoolConfig {
32 pub max_pool_size: usize,
34 pub initial_buffer_sizes: Vec<usize>,
36 pub enable_pooling: bool,
38 pub min_buffer_size: usize,
40}
41
42impl Default for MemoryPoolConfig {
43 fn default() -> Self {
44 Self {
45 max_pool_size: 2 * 1024 * 1024 * 1024, initial_buffer_sizes: vec![
47 1024 * 1024, 16 * 1024 * 1024, 64 * 1024 * 1024, 256 * 1024 * 1024, ],
52 enable_pooling: true,
53 min_buffer_size: 1024, }
55 }
56}
57
58#[derive(Debug, Clone)]
60pub struct GpuBuffer {
61 pub id: u64,
63 pub size: usize,
65 pub handle: GpuBufferHandle,
67 pub in_use: bool,
69 pub created_at: Instant,
71 pub last_used: Instant,
73}
74
75#[derive(Debug, Clone)]
77pub enum GpuBufferHandle {
78 #[cfg(feature = "cuda")]
79 Cuda(CudaBufferHandle),
80 #[cfg(feature = "opencl")]
81 OpenCL(OpenCLBufferHandle),
82 #[cfg(all(target_os = "macos", feature = "metal"))]
83 Metal(MetalBufferHandle),
84 Placeholder,
85}
86
87#[cfg(feature = "cuda")]
88#[derive(Debug, Clone)]
89pub struct CudaBufferHandle {
90 pub device_ptr: usize, pub device_id: i32,
92 pub stream: Option<usize>, }
94
95#[cfg(feature = "opencl")]
96#[derive(Debug, Clone)]
97pub struct OpenCLBufferHandle {
98 pub buffer: usize, pub context: usize, pub queue: usize, }
102
103#[cfg(all(target_os = "macos", feature = "metal"))]
104#[derive(Debug, Clone)]
105pub struct MetalBufferHandle {
106 pub buffer: usize, pub device: usize, }
109
110impl GpuMemoryPool {
111 pub fn new(config: MemoryPoolConfig) -> Self {
113 let pool = Self {
114 buffers: Arc::new(Mutex::new(Vec::new())),
115 total_allocated: Arc::new(Mutex::new(0)),
116 peak_usage: Arc::new(Mutex::new(0)),
117 config,
118 };
119
120 if pool.config.enable_pooling {
122 for &size in &pool.config.initial_buffer_sizes {
123 if let Err(e) = pool.pre_allocate_buffer(size) {
124 eprintln!(
125 "Warning: Failed to pre-allocate buffer of size {}: {:?}",
126 size, e
127 );
128 }
129 }
130 }
131
132 pool
133 }
134
135 pub fn allocate(&self, size: usize, backend: Backend) -> NdimageResult<GpuBuffer> {
137 if !self.config.enable_pooling || size < self.config.min_buffer_size {
138 return self.allocate_new_buffer(size, backend);
139 }
140
141 let mut buffers = self.buffers.lock().expect("Operation failed");
142
143 for buffer in buffers.iter_mut() {
145 if !buffer.in_use && buffer.size >= size {
146 buffer.in_use = true;
147 buffer.last_used = Instant::now();
148 return Ok(buffer.clone());
149 }
150 }
151
152 drop(buffers);
154 let new_buffer = self.allocate_new_buffer(size, backend)?;
155
156 let mut buffers = self.buffers.lock().expect("Operation failed");
158 let current_total = *self.total_allocated.lock().expect("Operation failed");
159 if current_total + size <= self.config.max_pool_size {
160 buffers.push(new_buffer.clone());
161 }
162
163 Ok(new_buffer)
164 }
165
166 pub fn deallocate(&self, buffer: &GpuBuffer) -> NdimageResult<()> {
168 if !self.config.enable_pooling {
169 return self.deallocate_immediate(buffer);
170 }
171
172 let mut buffers = self.buffers.lock().expect("Operation failed");
173 for pool_buffer in buffers.iter_mut() {
174 if pool_buffer.id == buffer.id {
175 pool_buffer.in_use = false;
176 return Ok(());
177 }
178 }
179
180 self.deallocate_immediate(buffer)
182 }
183
184 pub fn get_statistics(&self) -> MemoryPoolStatistics {
186 let buffers = self.buffers.lock().expect("Operation failed");
187 let total_allocated = *self.total_allocated.lock().expect("Operation failed");
188 let peak_usage = *self.peak_usage.lock().expect("Operation failed");
189
190 let active_buffers = buffers.iter().filter(|b| b.in_use).count();
191 let total_buffers = buffers.len();
192 let total_pool_memory: usize = buffers.iter().map(|b| b.size).sum();
193
194 MemoryPoolStatistics {
195 total_allocated,
196 peak_usage,
197 active_buffers,
198 total_buffers,
199 total_pool_memory,
200 fragmentation_ratio: Self::calculate_fragmentation(&buffers),
201 }
202 }
203
204 fn pre_allocate_buffer(&self, size: usize) -> NdimageResult<()> {
205 Ok(())
208 }
209
210 fn allocate_new_buffer(&self, size: usize, backend: Backend) -> NdimageResult<GpuBuffer> {
211 let buffer_id = self.generate_buffer_id();
212 let handle = self.create_buffer_handle(size, backend)?;
213
214 let mut total_allocated = self.total_allocated.lock().expect("Operation failed");
215 *total_allocated += size;
216
217 let mut peak_usage = self.peak_usage.lock().expect("Operation failed");
218 *peak_usage = (*peak_usage).max(*total_allocated);
219
220 Ok(GpuBuffer {
221 id: buffer_id,
222 size,
223 handle,
224 in_use: true,
225 created_at: Instant::now(),
226 last_used: Instant::now(),
227 })
228 }
229
230 fn deallocate_immediate(&self, buffer: &GpuBuffer) -> NdimageResult<()> {
231 match &buffer.handle {
233 #[cfg(feature = "cuda")]
234 GpuBufferHandle::Cuda(handle) => {
235 self.deallocate_cuda_buffer(handle)?;
236 }
237 #[cfg(feature = "opencl")]
238 GpuBufferHandle::OpenCL(handle) => {
239 self.deallocate_opencl_buffer(handle)?;
240 }
241 #[cfg(all(target_os = "macos", feature = "metal"))]
242 GpuBufferHandle::Metal(handle) => {
243 self.deallocate_metal_buffer(handle)?;
244 }
245 GpuBufferHandle::Placeholder => {}
246 }
247
248 let mut total_allocated = self.total_allocated.lock().expect("Operation failed");
249 *total_allocated = total_allocated.saturating_sub(buffer.size);
250
251 Ok(())
252 }
253
254 fn create_buffer_handle(
255 &self,
256 size: usize,
257 backend: Backend,
258 ) -> NdimageResult<GpuBufferHandle> {
259 match backend {
260 #[cfg(feature = "cuda")]
261 Backend::Cuda => {
262 let handle = self.create_cuda_buffer(size)?;
263 Ok(GpuBufferHandle::Cuda(handle))
264 }
265 #[cfg(feature = "opencl")]
266 Backend::OpenCL => {
267 let handle = self.create_opencl_buffer(size)?;
268 Ok(GpuBufferHandle::OpenCL(handle))
269 }
270 #[cfg(all(target_os = "macos", feature = "metal"))]
271 Backend::Metal => {
272 let handle = self.create_metal_buffer(size)?;
273 Ok(GpuBufferHandle::Metal(handle))
274 }
275 _ => Ok(GpuBufferHandle::Placeholder),
276 }
277 }
278
279 #[cfg(feature = "cuda")]
280 fn create_cuda_buffer(&self, size: usize) -> NdimageResult<CudaBufferHandle> {
281 Ok(CudaBufferHandle {
284 device_ptr: 0,
285 device_id: 0,
286 stream: None,
287 })
288 }
289
290 #[cfg(feature = "cuda")]
291 fn deallocate_cuda_buffer(&self, handle: &CudaBufferHandle) -> NdimageResult<()> {
292 Ok(())
294 }
295
296 #[cfg(feature = "opencl")]
297 fn create_opencl_buffer(&self, size: usize) -> NdimageResult<OpenCLBufferHandle> {
298 Ok(OpenCLBufferHandle {
300 buffer: 0,
301 context: 0,
302 queue: 0,
303 })
304 }
305
306 #[cfg(feature = "opencl")]
307 fn deallocate_opencl_buffer(&self, handle: &OpenCLBufferHandle) -> NdimageResult<()> {
308 Ok(())
310 }
311
312 #[cfg(all(target_os = "macos", feature = "metal"))]
313 fn create_metal_buffer(&self, size: usize) -> NdimageResult<MetalBufferHandle> {
314 Ok(MetalBufferHandle {
316 buffer: 0,
317 device: 0,
318 })
319 }
320
321 #[cfg(all(target_os = "macos", feature = "metal"))]
322 fn deallocate_metal_buffer(&self, handle: &MetalBufferHandle) -> NdimageResult<()> {
323 Ok(())
325 }
326
327 fn generate_buffer_id(&self) -> u64 {
328 use std::sync::atomic::{AtomicU64, Ordering};
329 static BUFFER_ID_COUNTER: AtomicU64 = AtomicU64::new(1);
330 BUFFER_ID_COUNTER.fetch_add(1, Ordering::Relaxed)
331 }
332
333 fn calculate_fragmentation(buffers: &[GpuBuffer]) -> f64 {
334 if buffers.is_empty() {
335 return 0.0;
336 }
337
338 let total_size: usize = buffers.iter().map(|b| b.size).sum();
339 let used_size: usize = buffers.iter().filter(|b| b.in_use).map(|b| b.size).sum();
340
341 if total_size == 0 {
342 0.0
343 } else {
344 1.0 - (used_size as f64 / total_size as f64)
345 }
346 }
347}
348
349#[derive(Debug, Clone)]
351pub struct MemoryPoolStatistics {
352 pub total_allocated: usize,
354 pub peak_usage: usize,
356 pub active_buffers: usize,
358 pub total_buffers: usize,
360 pub total_pool_memory: usize,
362 pub fragmentation_ratio: f64,
364}
365
366#[derive(Debug)]
368pub struct GpuKernelCache {
369 kernels: Arc<RwLock<HashMap<String, CompiledKernel>>>,
371 stats: Arc<Mutex<KernelCacheStats>>,
373}
374
375#[derive(Debug, Clone)]
376pub struct CompiledKernel {
377 pub id: String,
379 pub handle: KernelHandle,
381 pub compiled_at: Instant,
383 pub last_used: Instant,
385 pub use_count: usize,
387 pub performance_stats: KernelPerformanceStats,
389}
390
391#[derive(Debug, Clone)]
392pub enum KernelHandle {
393 #[cfg(feature = "cuda")]
394 Cuda(CudaKernelHandle),
395 #[cfg(feature = "opencl")]
396 OpenCL(OpenCLKernelHandle),
397 #[cfg(all(target_os = "macos", feature = "metal"))]
398 Metal(MetalKernelHandle),
399 Placeholder,
400}
401
402#[cfg(feature = "cuda")]
403#[derive(Debug, Clone)]
404pub struct CudaKernelHandle {
405 pub function: usize, pub module: usize, }
408
409#[cfg(feature = "opencl")]
410#[derive(Debug, Clone)]
411pub struct OpenCLKernelHandle {
412 pub kernel: usize, pub program: usize, }
415
416#[cfg(all(target_os = "macos", feature = "metal"))]
417#[derive(Debug, Clone)]
418pub struct MetalKernelHandle {
419 pub function: usize, pub library: usize, }
422
423#[derive(Debug, Clone)]
424pub struct KernelPerformanceStats {
425 pub avg_execution_time: Duration,
427 pub min_execution_time: Duration,
429 pub max_execution_time: Duration,
431 pub total_execution_time: Duration,
433 pub memory_bandwidth: f64,
435 pub compute_utilization: f64,
437}
438
439impl Default for KernelPerformanceStats {
440 fn default() -> Self {
441 Self {
442 avg_execution_time: Duration::ZERO,
443 min_execution_time: Duration::MAX,
444 max_execution_time: Duration::ZERO,
445 total_execution_time: Duration::ZERO,
446 memory_bandwidth: 0.0,
447 compute_utilization: 0.0,
448 }
449 }
450}
451
452#[derive(Debug, Clone)]
453pub struct KernelCacheStats {
454 pub cache_hits: usize,
456 pub cache_misses: usize,
458 pub kernels_compiled: usize,
460 pub total_compilation_time: Duration,
462}
463
464impl Default for KernelCacheStats {
465 fn default() -> Self {
466 Self {
467 cache_hits: 0,
468 cache_misses: 0,
469 kernels_compiled: 0,
470 total_compilation_time: Duration::ZERO,
471 }
472 }
473}
474
475impl GpuKernelCache {
476 pub fn new() -> Self {
478 Self {
479 kernels: Arc::new(RwLock::new(HashMap::new())),
480 stats: Arc::new(Mutex::new(KernelCacheStats::default())),
481 }
482 }
483
484 pub fn get_or_compile_kernel(
486 &self,
487 kernel_id: &str,
488 kernel_source: &str,
489 backend: Backend,
490 compile_options: &[String],
491 ) -> NdimageResult<CompiledKernel> {
492 {
494 let kernels = self.kernels.read().expect("Operation failed");
495 if let Some(kernel) = kernels.get(kernel_id) {
496 let mut stats = self.stats.lock().expect("Operation failed");
497 stats.cache_hits += 1;
498
499 let mut updated_kernel = kernel.clone();
501 updated_kernel.last_used = Instant::now();
502 updated_kernel.use_count += 1;
503
504 return Ok(updated_kernel);
505 }
506 }
507
508 let mut stats = self.stats.lock().expect("Operation failed");
510 stats.cache_misses += 1;
511 let compilation_start = Instant::now();
512
513 let kernel_handle = self.compile_kernel(kernel_source, backend, compile_options)?;
514
515 let compilation_time = compilation_start.elapsed();
516 stats.kernels_compiled += 1;
517 stats.total_compilation_time += compilation_time;
518 drop(stats);
519
520 let compiled_kernel = CompiledKernel {
521 id: kernel_id.to_string(),
522 handle: kernel_handle,
523 compiled_at: Instant::now(),
524 last_used: Instant::now(),
525 use_count: 1,
526 performance_stats: KernelPerformanceStats::default(),
527 };
528
529 {
531 let mut kernels = self.kernels.write().expect("Operation failed");
532 kernels.insert(kernel_id.to_string(), compiled_kernel.clone());
533 }
534
535 Ok(compiled_kernel)
536 }
537
538 pub fn update_kernel_stats(
540 &self,
541 kernel_id: &str,
542 execution_time: Duration,
543 memory_bandwidth: f64,
544 compute_utilization: f64,
545 ) -> NdimageResult<()> {
546 let mut kernels = self.kernels.write().expect("Operation failed");
547 if let Some(kernel) = kernels.get_mut(kernel_id) {
548 let stats = &mut kernel.performance_stats;
549
550 stats.total_execution_time += execution_time;
552 stats.min_execution_time = stats.min_execution_time.min(execution_time);
553 stats.max_execution_time = stats.max_execution_time.max(execution_time);
554 stats.avg_execution_time = stats.total_execution_time / kernel.use_count as u32;
555
556 let alpha = 0.1; stats.memory_bandwidth =
559 alpha * memory_bandwidth + (1.0 - alpha) * stats.memory_bandwidth;
560 stats.compute_utilization =
561 alpha * compute_utilization + (1.0 - alpha) * stats.compute_utilization;
562 }
563
564 Ok(())
565 }
566
567 pub fn get_cache_stats(&self) -> KernelCacheStats {
569 self.stats.lock().expect("Operation failed").clone()
570 }
571
572 pub fn clear_cache(&self) {
574 let mut kernels = self.kernels.write().expect("Operation failed");
575 kernels.clear();
576
577 let mut stats = self.stats.lock().expect("Operation failed");
578 *stats = KernelCacheStats::default();
579 }
580
581 fn compile_kernel(
582 &self,
583 source: &str,
584 backend: Backend,
585 options: &[String],
586 ) -> NdimageResult<KernelHandle> {
587 match backend {
588 #[cfg(feature = "cuda")]
589 Backend::Cuda => {
590 let handle = self.compile_cuda_kernel(source, options)?;
591 Ok(KernelHandle::Cuda(handle))
592 }
593 #[cfg(feature = "opencl")]
594 Backend::OpenCL => {
595 let handle = self.compile_opencl_kernel(source, options)?;
596 Ok(KernelHandle::OpenCL(handle))
597 }
598 #[cfg(all(target_os = "macos", feature = "metal"))]
599 Backend::Metal => {
600 let handle = self.compile_metal_kernel(source, options)?;
601 Ok(KernelHandle::Metal(handle))
602 }
603 _ => Ok(KernelHandle::Placeholder),
604 }
605 }
606
607 #[cfg(feature = "cuda")]
608 fn compile_cuda_kernel(
609 &self,
610 source: &str,
611 options: &[String],
612 ) -> NdimageResult<CudaKernelHandle> {
613 Ok(CudaKernelHandle {
615 function: 0,
616 module: 0,
617 })
618 }
619
620 #[cfg(feature = "opencl")]
621 fn compile_opencl_kernel(
622 &self,
623 source: &str,
624 options: &[String],
625 ) -> NdimageResult<OpenCLKernelHandle> {
626 Ok(OpenCLKernelHandle {
628 kernel: 0,
629 program: 0,
630 })
631 }
632
633 #[cfg(all(target_os = "macos", feature = "metal"))]
634 fn compile_metal_kernel(
635 &self,
636 source: &str,
637 options: &[String],
638 ) -> NdimageResult<MetalKernelHandle> {
639 Ok(MetalKernelHandle {
641 function: 0,
642 library: 0,
643 })
644 }
645}
646
647pub struct GpuAccelerationManager {
649 memory_pool: GpuMemoryPool,
651 kernel_cache: GpuKernelCache,
653 device_manager: crate::backend::DeviceManager,
655 profiler: Arc<Mutex<GpuProfiler>>,
657}
658
659#[derive(Debug)]
660pub struct GpuProfiler {
661 timinghistory: Vec<(String, Duration)>,
663 memoryhistory: Vec<(Instant, usize)>,
665 metrics: GpuPerformanceMetrics,
667}
668
669#[derive(Debug, Clone)]
670pub struct GpuPerformanceMetrics {
671 pub total_operations: usize,
673 pub total_gpu_time: Duration,
675 pub avg_memory_bandwidth: f64,
677 pub gpu_utilization: f64,
679 pub memory_efficiency: f64,
681}
682
683impl Default for GpuPerformanceMetrics {
684 fn default() -> Self {
685 Self {
686 total_operations: 0,
687 total_gpu_time: Duration::ZERO,
688 avg_memory_bandwidth: 0.0,
689 gpu_utilization: 0.0,
690 memory_efficiency: 0.0,
691 }
692 }
693}
694
695impl GpuAccelerationManager {
696 pub fn new(config: MemoryPoolConfig) -> NdimageResult<Self> {
698 Ok(Self {
699 memory_pool: GpuMemoryPool::new(config),
700 kernel_cache: GpuKernelCache::new(),
701 device_manager: crate::backend::DeviceManager::new()?,
702 profiler: Arc::new(Mutex::new(GpuProfiler {
703 timinghistory: Vec::new(),
704 memoryhistory: Vec::new(),
705 metrics: GpuPerformanceMetrics::default(),
706 })),
707 })
708 }
709
710 pub fn execute_operation<T, D>(
712 &self,
713 operation_name: &str,
714 input: ArrayView<T, D>,
715 kernel_source: &str,
716 backend: Backend,
717 ) -> NdimageResult<Array<T, D>>
718 where
719 T: Float + FromPrimitive + Clone + Send + Sync,
720 D: Dimension,
721 {
722 let start_time = Instant::now();
723
724 let input_size = input.len() * std::mem::size_of::<T>();
726 let output_size = input_size; let total_memory_needed = input_size + output_size;
728
729 let input_buffer = self.memory_pool.allocate(input_size, backend)?;
731 let output_buffer = self.memory_pool.allocate(output_size, backend)?;
732
733 let kernel = self.kernel_cache.get_or_compile_kernel(
735 operation_name,
736 kernel_source,
737 backend,
738 &[], )?;
740
741 let result =
743 self.execute_kernel_operation(&kernel, &input, &input_buffer, &output_buffer)?;
744
745 self.memory_pool.deallocate(&input_buffer)?;
747 self.memory_pool.deallocate(&output_buffer)?;
748
749 let execution_time = start_time.elapsed();
751 self.update_profiling_stats(operation_name, execution_time, total_memory_needed)?;
752
753 Ok(result)
754 }
755
756 pub fn get_performance_report(&self) -> GpuPerformanceReport {
758 let memory_stats = self.memory_pool.get_statistics();
759 let cache_stats = self.kernel_cache.get_cache_stats();
760 let profiler = self.profiler.lock().expect("Operation failed");
761
762 GpuPerformanceReport {
763 memory_statistics: memory_stats,
764 cache_statistics: cache_stats,
765 performancemetrics: profiler.metrics.clone(),
766 recommendations: self.generate_performance_recommendations(),
767 }
768 }
769
770 fn execute_kernel_operation<T, D>(
771 &self,
772 kernel: &CompiledKernel,
773 input: &ArrayView<T, D>,
774 input_buffer: &GpuBuffer,
775 output_buffer: &GpuBuffer,
776 ) -> NdimageResult<Array<T, D>>
777 where
778 T: Float + FromPrimitive + Clone,
779 D: Dimension,
780 {
781 Ok(Array::zeros(input.raw_dim()))
784 }
785
786 fn update_profiling_stats(
787 &self,
788 operation_name: &str,
789 execution_time: Duration,
790 memory_used: usize,
791 ) -> NdimageResult<()> {
792 let mut profiler = self.profiler.lock().expect("Operation failed");
793
794 profiler
795 .timinghistory
796 .push((operation_name.to_string(), execution_time));
797 profiler.memoryhistory.push((Instant::now(), memory_used));
798
799 profiler.metrics.total_operations += 1;
801 profiler.metrics.total_gpu_time += execution_time;
802
803 if profiler.timinghistory.len() > 1 {
805 let avg_time =
806 profiler.metrics.total_gpu_time / profiler.metrics.total_operations as u32;
807 }
809
810 Ok(())
811 }
812
813 fn generate_performance_recommendations(&self) -> Vec<String> {
814 let mut recommendations = Vec::new();
815
816 let memory_stats = self.memory_pool.get_statistics();
817 let cache_stats = self.kernel_cache.get_cache_stats();
818
819 if memory_stats.fragmentation_ratio > 0.3 {
821 recommendations.push(
822 "High memory fragmentation detected. Consider defragmenting GPU memory pool."
823 .to_string(),
824 );
825 }
826
827 if memory_stats.peak_usage > memory_stats.total_pool_memory {
828 recommendations.push(
829 "Memory usage exceeded pool size. Consider increasing pool size.".to_string(),
830 );
831 }
832
833 let cache_hit_ratio = cache_stats.cache_hits as f64
835 / (cache_stats.cache_hits + cache_stats.cache_misses) as f64;
836 if cache_hit_ratio < 0.7 {
837 recommendations.push(
838 "Low kernel cache hit ratio. Consider pre-compiling frequently used kernels."
839 .to_string(),
840 );
841 }
842
843 if recommendations.is_empty() {
845 recommendations.push("GPU acceleration is performing optimally.".to_string());
846 }
847
848 recommendations
849 }
850}
851
852#[derive(Debug, Clone)]
854pub struct GpuPerformanceReport {
855 pub memory_statistics: MemoryPoolStatistics,
857 pub cache_statistics: KernelCacheStats,
859 pub performancemetrics: GpuPerformanceMetrics,
861 pub recommendations: Vec<String>,
863}
864
865impl GpuPerformanceReport {
866 pub fn display(&self) {
868 println!("\n=== GPU Performance Report ===\n");
869
870 println!("Memory Statistics:");
871 println!(
872 " Total Allocated: {} MB",
873 self.memory_statistics.total_allocated / (1024 * 1024)
874 );
875 println!(
876 " Peak Usage: {} MB",
877 self.memory_statistics.peak_usage / (1024 * 1024)
878 );
879 println!(
880 " Active Buffers: {}",
881 self.memory_statistics.active_buffers
882 );
883 println!(
884 " Fragmentation: {:.2}%",
885 self.memory_statistics.fragmentation_ratio * 100.0
886 );
887
888 println!("\nKernel Cache Statistics:");
889 println!(" Cache Hits: {}", self.cache_statistics.cache_hits);
890 println!(" Cache Misses: {}", self.cache_statistics.cache_misses);
891 println!(
892 " Hit Ratio: {:.2}%",
893 (self.cache_statistics.cache_hits as f64
894 / (self.cache_statistics.cache_hits + self.cache_statistics.cache_misses).max(1)
895 as f64)
896 * 100.0
897 );
898
899 println!("\nPerformance Metrics:");
900 println!(
901 " Total Operations: {}",
902 self.performancemetrics.total_operations
903 );
904 println!(
905 " Total GPU Time: {:.3}ms",
906 self.performancemetrics.total_gpu_time.as_secs_f64() * 1000.0
907 );
908 println!(
909 " GPU Utilization: {:.2}%",
910 self.performancemetrics.gpu_utilization * 100.0
911 );
912
913 if !self.recommendations.is_empty() {
914 println!("\nRecommendations:");
915 for (i, rec) in self.recommendations.iter().enumerate() {
916 println!(" {}. {}", i + 1, rec);
917 }
918 }
919 }
920}
921
922#[cfg(test)]
923mod tests {
924 use super::*;
925
926 #[test]
927 fn test_memory_pool_creation() {
928 let config = MemoryPoolConfig::default();
929 let pool = GpuMemoryPool::new(config);
930
931 let stats = pool.get_statistics();
932 assert_eq!(stats.active_buffers, 0);
933 }
934
935 #[test]
936 fn test_kernel_cache_creation() {
937 let cache = GpuKernelCache::new();
938 let stats = cache.get_cache_stats();
939
940 assert_eq!(stats.cache_hits, 0);
941 assert_eq!(stats.cache_misses, 0);
942 }
943
944 #[test]
945 fn test_gpu_acceleration_manager_creation() {
946 let config = MemoryPoolConfig::default();
947 let result = GpuAccelerationManager::new(config);
948
949 assert!(result.is_ok() || result.is_err());
952 }
953}