scirs2_metrics/optimization/gpu_kernels/
mod.rs

1//! Advanced GPU kernels for high-performance metrics computation
2//!
3//! This module provides production-ready GPU kernels using CUDA, OpenCL, Metal, and Vulkan
4//! for large-scale metrics computation with optimal memory management.
5
6#![allow(clippy::too_many_arguments)]
7#![allow(clippy::uninlined_format_args)]
8#![allow(dead_code)]
9
10// Module declarations
11pub mod computer;
12pub mod config;
13pub mod contexts;
14pub mod kernels;
15pub mod runtime;
16
17// Re-export core types and functions for backward compatibility
18pub use computer::AdvancedGpuComputer;
19pub use config::{
20    BatchSettings, ComputeStrategy, ErrorHandling, GpuApi, GpuComputeConfig, GpuComputeResults,
21    GpuPerformanceStats, KernelConfig, KernelMetrics, KernelOptimization, MemoryStrategy,
22    TransferMetrics, VectorizationLevel,
23};
24pub use contexts::{
25    CudaContext, CudaDeviceProperties, CudaMemoryBlock, CudaMemoryPool, CudaMemoryStats,
26    OpenClContext, OpenClDeviceInfo,
27};
28pub use kernels::{cuda_kernels, metal_kernels, opencl_kernels, vulkan_kernels};
29pub use runtime::{
30    CudaRuntime, GpuBuffer, GpuBufferHandle, GpuBufferType, GpuKernelArg, GpuMemoryStats,
31    GpuPerformanceStats as RuntimeGpuPerformanceStats, GpuRuntime, GpuScalar, MetalRuntime,
32    OpenClRuntime, VulkanRuntime,
33};
34
35// Legacy type aliases for backward compatibility
36pub use computer::AdvancedGpuComputer as GpuComputer;
37pub use config::GpuComputeConfig as GpuConfig;
38
39#[cfg(test)]
40mod tests {
41    use super::*;
42    use scirs2_core::ndarray::array;
43
44    #[test]
45    fn test_advanced_gpu_computer_creation() {
46        let config = GpuComputeConfig::default();
47        let computer = AdvancedGpuComputer::new(config);
48        assert!(computer.is_ok());
49    }
50
51    #[test]
52    fn test_cuda_availability_detection() {
53        let available = AdvancedGpuComputer::is_cuda_available();
54        // Should work regardless of actual CUDA availability
55        println!("CUDA available: {}", available);
56    }
57
58    #[test]
59    fn test_opencl_availability_detection() {
60        let available = AdvancedGpuComputer::is_opencl_available();
61        println!("OpenCL available: {}", available);
62    }
63
64    #[test]
65    #[ignore = "timeout"]
66    fn test_batch_metrics_computation() {
67        let computer = AdvancedGpuComputer::default();
68
69        let y_true_batch = array![[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]];
70        let y_pred_batch = array![[1.1, 2.1, 2.9], [4.1, 4.9, 6.1]];
71
72        let results = computer.compute_batch_metrics(
73            &y_true_batch.view(),
74            &y_pred_batch.view(),
75            &["mse", "mae", "r2_score"],
76        );
77
78        assert!(results.is_ok());
79
80        if let Ok(gpu_results) = results {
81            assert_eq!(gpu_results.results.len(), 2);
82            assert!(gpu_results.execution_time.as_nanos() > 0);
83            assert!(gpu_results.memory_used > 0);
84        }
85    }
86
87    #[test]
88    #[ignore = "timeout"]
89    fn test_performance_stats_tracking() {
90        let computer = AdvancedGpuComputer::default();
91
92        // Simulate some operations
93        let y_true_batch = array![[1.0, 2.0], [3.0, 4.0]];
94        let y_pred_batch = array![[1.1, 2.1], [2.9, 4.1]];
95
96        let _ =
97            computer.compute_batch_metrics(&y_true_batch.view(), &y_pred_batch.view(), &["mse"]);
98
99        let stats = computer.get_performance_stats();
100        assert!(stats.total_operations > 0);
101    }
102
103    #[test]
104    fn test_kernel_config_defaults() {
105        let config = KernelConfig::default();
106        assert_eq!(config.block_size, (256, 1, 1));
107        assert_eq!(config.grid_size, (1, 1, 1));
108        assert!(config.async_execution);
109    }
110
111    #[test]
112    fn test_gpu_compute_config_defaults() {
113        let config = GpuComputeConfig::default();
114        matches!(config.preferred_api, GpuApi::Auto);
115        assert!(config.kernel_optimization.fast_math);
116        assert!(config.batch_settings.multi_stream);
117    }
118
119    #[test]
120    fn test_gpu_runtime_initialization() {
121        let mut cuda_runtime = CudaRuntime::new(0);
122        assert!(cuda_runtime.initialize().is_ok());
123
124        let mut opencl_runtime = OpenClRuntime::new(1, 1);
125        assert!(opencl_runtime.initialize().is_ok());
126
127        let mut metal_runtime = MetalRuntime::new();
128        assert!(metal_runtime.initialize().is_ok());
129
130        let mut vulkan_runtime = VulkanRuntime::new();
131        assert!(vulkan_runtime.initialize().is_ok());
132    }
133
134    #[test]
135    fn test_gpu_buffer_creation() {
136        let buffer = GpuBuffer {
137            id: 12345,
138            size: 1024,
139            buffer_type: GpuBufferType::InputOutput,
140            handle: GpuBufferHandle::Cuda(0x11111111),
141        };
142
143        assert_eq!(buffer.id, 12345);
144        assert_eq!(buffer.size, 1024);
145        matches!(buffer.buffer_type, GpuBufferType::InputOutput);
146    }
147
148    #[test]
149    fn test_cuda_memory_pool() {
150        let mut pool = CudaMemoryPool::new(1024 * 1024); // 1MB limit
151
152        // Allocate a block
153        let block = pool.allocate(512);
154        assert!(block.is_some());
155
156        let block = block.unwrap();
157        assert_eq!(block.size, 512);
158
159        // Free the block
160        let freed = pool.free(block.ptr);
161        assert!(freed);
162
163        // Check stats
164        let stats = pool.get_stats();
165        assert_eq!(stats.memory_limit, 1024 * 1024);
166    }
167
168    #[test]
169    #[allow(clippy::const_is_empty)]
170    fn test_kernel_source_availability() {
171        // Test that kernel sources are available
172        assert!(!cuda_kernels::MSE_KERNEL.is_empty());
173        assert!(!cuda_kernels::MAE_KERNEL.is_empty());
174        assert!(!cuda_kernels::R2_KERNEL.is_empty());
175
176        assert!(!opencl_kernels::MSE_KERNEL.is_empty());
177        assert!(!opencl_kernels::MAE_KERNEL.is_empty());
178
179        assert!(!metal_kernels::MSE_KERNEL.is_empty());
180        assert!(!metal_kernels::MAE_KERNEL.is_empty());
181
182        assert!(!vulkan_kernels::MSE_GLSL_SOURCE.is_empty());
183        assert!(!vulkan_kernels::MAE_GLSL_SOURCE.is_empty());
184    }
185}