scirs2_metrics/optimization/gpu_kernels/
mod.rs1#![allow(clippy::too_many_arguments)]
7#![allow(clippy::uninlined_format_args)]
8#![allow(dead_code)]
9
10pub mod computer;
12pub mod config;
13pub mod contexts;
14pub mod kernels;
15pub mod runtime;
16
17pub use computer::AdvancedGpuComputer;
19pub use config::{
20 BatchSettings, ComputeStrategy, ErrorHandling, GpuApi, GpuComputeConfig, GpuComputeResults,
21 GpuPerformanceStats, KernelConfig, KernelMetrics, KernelOptimization, MemoryStrategy,
22 TransferMetrics, VectorizationLevel,
23};
24pub use contexts::{
25 CudaContext, CudaDeviceProperties, CudaMemoryBlock, CudaMemoryPool, CudaMemoryStats,
26 OpenClContext, OpenClDeviceInfo,
27};
28pub use kernels::{cuda_kernels, metal_kernels, opencl_kernels, vulkan_kernels};
29pub use runtime::{
30 CudaRuntime, GpuBuffer, GpuBufferHandle, GpuBufferType, GpuKernelArg, GpuMemoryStats,
31 GpuPerformanceStats as RuntimeGpuPerformanceStats, GpuRuntime, GpuScalar, MetalRuntime,
32 OpenClRuntime, VulkanRuntime,
33};
34
35pub use computer::AdvancedGpuComputer as GpuComputer;
37pub use config::GpuComputeConfig as GpuConfig;
38
39#[cfg(test)]
40mod tests {
41 use super::*;
42 use scirs2_core::ndarray::array;
43
44 #[test]
45 fn test_advanced_gpu_computer_creation() {
46 let config = GpuComputeConfig::default();
47 let computer = AdvancedGpuComputer::new(config);
48 assert!(computer.is_ok());
49 }
50
51 #[test]
52 fn test_cuda_availability_detection() {
53 let available = AdvancedGpuComputer::is_cuda_available();
54 println!("CUDA available: {}", available);
56 }
57
58 #[test]
59 fn test_opencl_availability_detection() {
60 let available = AdvancedGpuComputer::is_opencl_available();
61 println!("OpenCL available: {}", available);
62 }
63
64 #[test]
65 #[ignore = "timeout"]
66 fn test_batch_metrics_computation() {
67 let computer = AdvancedGpuComputer::default();
68
69 let y_true_batch = array![[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]];
70 let y_pred_batch = array![[1.1, 2.1, 2.9], [4.1, 4.9, 6.1]];
71
72 let results = computer.compute_batch_metrics(
73 &y_true_batch.view(),
74 &y_pred_batch.view(),
75 &["mse", "mae", "r2_score"],
76 );
77
78 assert!(results.is_ok());
79
80 if let Ok(gpu_results) = results {
81 assert_eq!(gpu_results.results.len(), 2);
82 assert!(gpu_results.execution_time.as_nanos() > 0);
83 assert!(gpu_results.memory_used > 0);
84 }
85 }
86
87 #[test]
88 #[ignore = "timeout"]
89 fn test_performance_stats_tracking() {
90 let computer = AdvancedGpuComputer::default();
91
92 let y_true_batch = array![[1.0, 2.0], [3.0, 4.0]];
94 let y_pred_batch = array![[1.1, 2.1], [2.9, 4.1]];
95
96 let _ =
97 computer.compute_batch_metrics(&y_true_batch.view(), &y_pred_batch.view(), &["mse"]);
98
99 let stats = computer.get_performance_stats();
100 assert!(stats.total_operations > 0);
101 }
102
103 #[test]
104 fn test_kernel_config_defaults() {
105 let config = KernelConfig::default();
106 assert_eq!(config.block_size, (256, 1, 1));
107 assert_eq!(config.grid_size, (1, 1, 1));
108 assert!(config.async_execution);
109 }
110
111 #[test]
112 fn test_gpu_compute_config_defaults() {
113 let config = GpuComputeConfig::default();
114 matches!(config.preferred_api, GpuApi::Auto);
115 assert!(config.kernel_optimization.fast_math);
116 assert!(config.batch_settings.multi_stream);
117 }
118
119 #[test]
120 fn test_gpu_runtime_initialization() {
121 let mut cuda_runtime = CudaRuntime::new(0);
122 assert!(cuda_runtime.initialize().is_ok());
123
124 let mut opencl_runtime = OpenClRuntime::new(1, 1);
125 assert!(opencl_runtime.initialize().is_ok());
126
127 let mut metal_runtime = MetalRuntime::new();
128 assert!(metal_runtime.initialize().is_ok());
129
130 let mut vulkan_runtime = VulkanRuntime::new();
131 assert!(vulkan_runtime.initialize().is_ok());
132 }
133
134 #[test]
135 fn test_gpu_buffer_creation() {
136 let buffer = GpuBuffer {
137 id: 12345,
138 size: 1024,
139 buffer_type: GpuBufferType::InputOutput,
140 handle: GpuBufferHandle::Cuda(0x11111111),
141 };
142
143 assert_eq!(buffer.id, 12345);
144 assert_eq!(buffer.size, 1024);
145 matches!(buffer.buffer_type, GpuBufferType::InputOutput);
146 }
147
148 #[test]
149 fn test_cuda_memory_pool() {
150 let mut pool = CudaMemoryPool::new(1024 * 1024); let block = pool.allocate(512);
154 assert!(block.is_some());
155
156 let block = block.unwrap();
157 assert_eq!(block.size, 512);
158
159 let freed = pool.free(block.ptr);
161 assert!(freed);
162
163 let stats = pool.get_stats();
165 assert_eq!(stats.memory_limit, 1024 * 1024);
166 }
167
168 #[test]
169 #[allow(clippy::const_is_empty)]
170 fn test_kernel_source_availability() {
171 assert!(!cuda_kernels::MSE_KERNEL.is_empty());
173 assert!(!cuda_kernels::MAE_KERNEL.is_empty());
174 assert!(!cuda_kernels::R2_KERNEL.is_empty());
175
176 assert!(!opencl_kernels::MSE_KERNEL.is_empty());
177 assert!(!opencl_kernels::MAE_KERNEL.is_empty());
178
179 assert!(!metal_kernels::MSE_KERNEL.is_empty());
180 assert!(!metal_kernels::MAE_KERNEL.is_empty());
181
182 assert!(!vulkan_kernels::MSE_GLSL_SOURCE.is_empty());
183 assert!(!vulkan_kernels::MAE_GLSL_SOURCE.is_empty());
184 }
185}