scirs2_sparse/gpu/
mod.rs

1//! GPU acceleration for sparse matrix operations
2//!
3//! This module provides GPU acceleration support for sparse matrix operations
4//! across multiple backends including CUDA, OpenCL, Metal, ROCm, and WGPU.
5
6pub mod cuda;
7pub mod metal;
8pub mod opencl;
9
10// Re-export common types and traits
11#[cfg(feature = "gpu")]
12pub use scirs2_core::gpu::{
13    GpuBackend, GpuBuffer, GpuContext, GpuDataType, GpuDevice, GpuError, GpuKernelHandle,
14};
15
16// Fallback types when GPU feature is not enabled
17#[cfg(not(feature = "gpu"))]
18pub use crate::gpu_ops::{
19    GpuBackend, GpuBuffer, GpuDataType, GpuDevice, GpuError, GpuKernelHandle,
20};
21
22// Re-export backend-specific modules
23pub use cuda::{CudaMemoryManager, CudaOptimizationLevel, CudaSpMatVec};
24pub use metal::{MetalDeviceInfo, MetalMemoryManager, MetalOptimizationLevel, MetalSpMatVec};
25pub use opencl::{
26    OpenCLMemoryManager, OpenCLOptimizationLevel, OpenCLPlatformInfo, OpenCLSpMatVec,
27};
28
29use crate::csr_array::CsrArray;
30use crate::error::{SparseError, SparseResult};
31use crate::sparray::SparseArray;
32use scirs2_core::ndarray::{Array1, ArrayView1};
33use scirs2_core::numeric::Float;
34use std::fmt::Debug;
35
36// GpuDataType is already available from the pub use statements above
37
38/// Unified GPU sparse matrix operations interface
39pub struct GpuSpMatVec {
40    backend: GpuBackend,
41    cuda_handler: Option<CudaSpMatVec>,
42    opencl_handler: Option<OpenCLSpMatVec>,
43    metal_handler: Option<MetalSpMatVec>,
44}
45
46impl GpuSpMatVec {
47    /// Create a new GPU sparse matrix handler with automatic backend detection
48    pub fn new() -> SparseResult<Self> {
49        let backend = Self::detect_best_backend();
50
51        let mut handler = Self {
52            backend,
53            cuda_handler: None,
54            opencl_handler: None,
55            metal_handler: None,
56        };
57
58        // Initialize the appropriate backend
59        handler.initialize_backend()?;
60
61        Ok(handler)
62    }
63
64    /// Create a new GPU sparse matrix handler with specified backend
65    pub fn with_backend(backend: GpuBackend) -> SparseResult<Self> {
66        let mut handler = Self {
67            backend,
68            cuda_handler: None,
69            opencl_handler: None,
70            metal_handler: None,
71        };
72
73        handler.initialize_backend()?;
74
75        Ok(handler)
76    }
77
78    /// Initialize the selected backend
79    fn initialize_backend(&mut self) -> SparseResult<()> {
80        match self.backend {
81            GpuBackend::Cuda => {
82                self.cuda_handler = Some(CudaSpMatVec::new()?);
83            }
84            GpuBackend::OpenCL => {
85                self.opencl_handler = Some(OpenCLSpMatVec::new()?);
86            }
87            GpuBackend::Metal => {
88                self.metal_handler = Some(MetalSpMatVec::new()?);
89            }
90            GpuBackend::Cpu => {
91                // CPU fallback - no initialization needed
92            }
93            _ => {
94                // For other backends (ROCm, WGPU), fall back to CPU for now
95                self.backend = GpuBackend::Cpu;
96            }
97        }
98
99        Ok(())
100    }
101
102    /// Detect the best available GPU backend
103    fn detect_best_backend() -> GpuBackend {
104        // Priority order: Metal (on macOS), CUDA, OpenCL, CPU
105        #[cfg(target_os = "macos")]
106        {
107            if Self::is_metal_available() {
108                return GpuBackend::Metal;
109            }
110        }
111
112        if Self::is_cuda_available() {
113            return GpuBackend::Cuda;
114        }
115
116        if Self::is_opencl_available() {
117            return GpuBackend::OpenCL;
118        }
119
120        GpuBackend::Cpu
121    }
122
123    /// Check if CUDA is available
124    fn is_cuda_available() -> bool {
125        // In a real implementation, this would check for CUDA runtime
126        #[cfg(feature = "gpu")]
127        {
128            // Simplified detection
129            std::env::var("CUDA_PATH").is_ok() || std::path::Path::new("/usr/local/cuda").exists()
130        }
131        #[cfg(not(feature = "gpu"))]
132        false
133    }
134
135    /// Check if OpenCL is available
136    fn is_opencl_available() -> bool {
137        // In a real implementation, this would check for OpenCL runtime
138        #[cfg(feature = "gpu")]
139        {
140            // Simplified detection - assume available on most systems
141            true
142        }
143        #[cfg(not(feature = "gpu"))]
144        false
145    }
146
147    /// Check if Metal is available (macOS only)
148    fn is_metal_available() -> bool {
149        #[cfg(target_os = "macos")]
150        {
151            // Metal is available on all modern macOS systems
152            true
153        }
154        #[cfg(not(target_os = "macos"))]
155        false
156    }
157
158    /// Execute sparse matrix-vector multiplication on GPU
159    pub fn spmv<T>(
160        &self,
161        matrix: &CsrArray<T>,
162        vector: &ArrayView1<T>,
163        device: Option<&GpuDevice>,
164    ) -> SparseResult<Array1<T>>
165    where
166        T: Float + Debug + Copy + GpuDataType + std::iter::Sum,
167    {
168        match self.backend {
169            GpuBackend::Cuda => {
170                if let Some(ref handler) = self.cuda_handler {
171                    #[cfg(feature = "gpu")]
172                    {
173                        if let Some(device) = device {
174                            handler.execute_spmv(matrix, vector, device)
175                        } else {
176                            return Err(SparseError::ComputationError(
177                                "GPU device required for CUDA operations".to_string(),
178                            ));
179                        }
180                    }
181                    #[cfg(not(feature = "gpu"))]
182                    handler.execute_spmv_cpu(matrix, vector)
183                } else {
184                    Err(SparseError::ComputationError(
185                        "CUDA handler not initialized".to_string(),
186                    ))
187                }
188            }
189            GpuBackend::OpenCL => {
190                if let Some(ref handler) = self.opencl_handler {
191                    #[cfg(feature = "gpu")]
192                    {
193                        if let Some(device) = device {
194                            handler.execute_spmv(matrix, vector, device)
195                        } else {
196                            return Err(SparseError::ComputationError(
197                                "GPU device required for OpenCL operations".to_string(),
198                            ));
199                        }
200                    }
201                    #[cfg(not(feature = "gpu"))]
202                    handler.execute_spmv_cpu(matrix, vector)
203                } else {
204                    Err(SparseError::ComputationError(
205                        "OpenCL handler not initialized".to_string(),
206                    ))
207                }
208            }
209            GpuBackend::Metal => {
210                if let Some(ref handler) = self.metal_handler {
211                    #[cfg(feature = "gpu")]
212                    {
213                        if let Some(device) = device {
214                            handler.execute_spmv(matrix, vector, device)
215                        } else {
216                            return Err(SparseError::ComputationError(
217                                "GPU device required for Metal operations".to_string(),
218                            ));
219                        }
220                    }
221                    #[cfg(not(feature = "gpu"))]
222                    handler.execute_spmv_cpu(matrix, vector)
223                } else {
224                    Err(SparseError::ComputationError(
225                        "Metal handler not initialized".to_string(),
226                    ))
227                }
228            }
229            GpuBackend::Cpu => {
230                // CPU fallback
231                matrix.dot_vector(vector)
232            }
233            _ => {
234                // Unsupported backend, fall back to CPU
235                matrix.dot_vector(vector)
236            }
237        }
238    }
239
240    /// Execute optimized sparse matrix-vector multiplication
241    pub fn spmv_optimized<T>(
242        &self,
243        matrix: &CsrArray<T>,
244        vector: &ArrayView1<T>,
245        device: Option<&GpuDevice>,
246        optimization_hint: OptimizationHint,
247    ) -> SparseResult<Array1<T>>
248    where
249        T: Float + Debug + Copy + GpuDataType + std::iter::Sum,
250    {
251        match self.backend {
252            GpuBackend::Cuda => {
253                if let Some(ref handler) = self.cuda_handler {
254                    let cuda_level = optimization_hint.to_cuda_level();
255                    #[cfg(feature = "gpu")]
256                    {
257                        if let Some(device) = device {
258                            handler.execute_optimized_spmv(matrix, vector, device, cuda_level)
259                        } else {
260                            return Err(SparseError::ComputationError(
261                                "GPU device required for CUDA operations".to_string(),
262                            ));
263                        }
264                    }
265                    #[cfg(not(feature = "gpu"))]
266                    handler.execute_spmv_cpu(matrix, vector)
267                } else {
268                    Err(SparseError::ComputationError(
269                        "CUDA handler not initialized".to_string(),
270                    ))
271                }
272            }
273            GpuBackend::OpenCL => {
274                if let Some(ref handler) = self.opencl_handler {
275                    let opencl_level = optimization_hint.to_opencl_level();
276                    #[cfg(feature = "gpu")]
277                    {
278                        if let Some(device) = device {
279                            handler.execute_optimized_spmv(matrix, vector, device, opencl_level)
280                        } else {
281                            return Err(SparseError::ComputationError(
282                                "GPU device required for OpenCL operations".to_string(),
283                            ));
284                        }
285                    }
286                    #[cfg(not(feature = "gpu"))]
287                    handler.execute_spmv_cpu(matrix, vector)
288                } else {
289                    Err(SparseError::ComputationError(
290                        "OpenCL handler not initialized".to_string(),
291                    ))
292                }
293            }
294            GpuBackend::Metal => {
295                if let Some(ref handler) = self.metal_handler {
296                    let metal_level = optimization_hint.to_metal_level();
297                    #[cfg(feature = "gpu")]
298                    {
299                        if let Some(device) = device {
300                            handler.execute_optimized_spmv(matrix, vector, device, metal_level)
301                        } else {
302                            return Err(SparseError::ComputationError(
303                                "GPU device required for Metal operations".to_string(),
304                            ));
305                        }
306                    }
307                    #[cfg(not(feature = "gpu"))]
308                    handler.execute_spmv_cpu(matrix, vector)
309                } else {
310                    Err(SparseError::ComputationError(
311                        "Metal handler not initialized".to_string(),
312                    ))
313                }
314            }
315            _ => {
316                // Fall back to basic implementation
317                self.spmv(matrix, vector, device)
318            }
319        }
320    }
321
322    /// Get the current backend
323    pub fn backend(&self) -> GpuBackend {
324        self.backend
325    }
326
327    /// Check if GPU acceleration is available
328    pub fn is_gpu_available(&self) -> bool {
329        !matches!(self.backend, GpuBackend::Cpu)
330    }
331
332    /// Get backend-specific information
333    pub fn get_backend_info(&self) -> BackendInfo {
334        match self.backend {
335            GpuBackend::Cuda => BackendInfo {
336                name: "CUDA".to_string(),
337                version: "Unknown".to_string(),
338                device_count: 1, // Simplified
339                supports_double_precision: true,
340                max_memory_mb: 8192, // 8GB default
341            },
342            GpuBackend::OpenCL => BackendInfo {
343                name: "OpenCL".to_string(),
344                version: "Unknown".to_string(),
345                device_count: 1,
346                supports_double_precision: true,
347                max_memory_mb: 4096, // 4GB default
348            },
349            GpuBackend::Metal => BackendInfo {
350                name: "Metal".to_string(),
351                version: "Unknown".to_string(),
352                device_count: 1,
353                supports_double_precision: false, // Metal has limited f64 support
354                max_memory_mb: if MetalDeviceInfo::detect().is_apple_silicon {
355                    16384
356                } else {
357                    8192
358                },
359            },
360            _ => BackendInfo {
361                name: "CPU".to_string(),
362                version: "Fallback".to_string(),
363                device_count: 0,
364                supports_double_precision: true,
365                max_memory_mb: 0,
366            },
367        }
368    }
369}
370
371impl Default for GpuSpMatVec {
372    fn default() -> Self {
373        Self::new().unwrap_or_else(|_| Self {
374            backend: GpuBackend::Cpu,
375            cuda_handler: None,
376            opencl_handler: None,
377            metal_handler: None,
378        })
379    }
380}
381
382/// Cross-platform optimization hints
383#[derive(Debug, Clone, Copy, PartialEq, Eq)]
384pub enum OptimizationHint {
385    /// Basic optimization level
386    Basic,
387    /// Balanced optimization (default)
388    Balanced,
389    /// Maximum performance optimization
390    Maximum,
391    /// Memory-optimized implementation
392    MemoryOptimized,
393}
394
395impl OptimizationHint {
396    /// Convert to CUDA optimization level
397    pub fn to_cuda_level(self) -> CudaOptimizationLevel {
398        match self {
399            OptimizationHint::Basic => CudaOptimizationLevel::Basic,
400            OptimizationHint::Balanced => CudaOptimizationLevel::Vectorized,
401            OptimizationHint::Maximum => CudaOptimizationLevel::WarpLevel,
402            OptimizationHint::MemoryOptimized => CudaOptimizationLevel::Basic,
403        }
404    }
405
406    /// Convert to OpenCL optimization level
407    pub fn to_opencl_level(self) -> OpenCLOptimizationLevel {
408        match self {
409            OptimizationHint::Basic => OpenCLOptimizationLevel::Basic,
410            OptimizationHint::Balanced => OpenCLOptimizationLevel::Workgroup,
411            OptimizationHint::Maximum => OpenCLOptimizationLevel::Vectorized,
412            OptimizationHint::MemoryOptimized => OpenCLOptimizationLevel::Workgroup,
413        }
414    }
415
416    /// Convert to Metal optimization level
417    pub fn to_metal_level(self) -> MetalOptimizationLevel {
418        match self {
419            OptimizationHint::Basic => MetalOptimizationLevel::Basic,
420            OptimizationHint::Balanced => MetalOptimizationLevel::SimdGroup,
421            OptimizationHint::Maximum => MetalOptimizationLevel::AppleSilicon,
422            OptimizationHint::MemoryOptimized => MetalOptimizationLevel::AppleSilicon,
423        }
424    }
425}
426
427impl Default for OptimizationHint {
428    fn default() -> Self {
429        Self::Balanced
430    }
431}
432
433/// GPU backend information
434#[derive(Debug, Clone)]
435pub struct BackendInfo {
436    pub name: String,
437    pub version: String,
438    pub device_count: usize,
439    pub supports_double_precision: bool,
440    pub max_memory_mb: usize,
441}
442
443/// Convenient functions for common operations
444pub mod convenience {
445    use super::*;
446
447    /// Execute sparse matrix-vector multiplication with automatic GPU detection
448    pub fn gpu_spmv<T>(matrix: &CsrArray<T>, vector: &ArrayView1<T>) -> SparseResult<Array1<T>>
449    where
450        T: Float + Debug + Copy + GpuDataType + std::iter::Sum,
451    {
452        let gpu_handler = GpuSpMatVec::new()?;
453        gpu_handler.spmv(matrix, vector, None)
454    }
455
456    /// Execute optimized sparse matrix-vector multiplication
457    pub fn gpu_spmv_optimized<T>(
458        matrix: &CsrArray<T>,
459        vector: &ArrayView1<T>,
460        optimization: OptimizationHint,
461    ) -> SparseResult<Array1<T>>
462    where
463        T: Float + Debug + Copy + GpuDataType + std::iter::Sum,
464    {
465        let gpu_handler = GpuSpMatVec::new()?;
466        gpu_handler.spmv_optimized(matrix, vector, None, optimization)
467    }
468
469    /// Get information about available GPU backends
470    pub fn available_backends() -> Vec<GpuBackend> {
471        let mut backends = Vec::new();
472
473        if GpuSpMatVec::is_cuda_available() {
474            backends.push(GpuBackend::Cuda);
475        }
476
477        if GpuSpMatVec::is_opencl_available() {
478            backends.push(GpuBackend::OpenCL);
479        }
480
481        if GpuSpMatVec::is_metal_available() {
482            backends.push(GpuBackend::Metal);
483        }
484
485        backends.push(GpuBackend::Cpu); // Always available
486
487        backends
488    }
489}
490
491#[cfg(test)]
492mod tests {
493    use super::*;
494    use scirs2_core::ndarray::Array1;
495
496    #[test]
497    fn test_gpu_spmv_creation() {
498        let gpu_spmv = GpuSpMatVec::new();
499        assert!(gpu_spmv.is_ok());
500    }
501
502    #[test]
503    fn test_backend_detection() {
504        let backend = GpuSpMatVec::detect_best_backend();
505
506        // Should return a valid backend
507        match backend {
508            GpuBackend::Cuda | GpuBackend::OpenCL | GpuBackend::Metal | GpuBackend::Cpu => (),
509            _ => panic!("Unexpected backend detected"),
510        }
511    }
512
513    #[test]
514    fn test_optimization_hint_conversions() {
515        let hint = OptimizationHint::Maximum;
516
517        let cuda_level = hint.to_cuda_level();
518        let opencl_level = hint.to_opencl_level();
519        let metal_level = hint.to_metal_level();
520
521        assert_eq!(cuda_level, CudaOptimizationLevel::WarpLevel);
522        assert_eq!(opencl_level, OpenCLOptimizationLevel::Vectorized);
523        assert_eq!(metal_level, MetalOptimizationLevel::AppleSilicon);
524    }
525
526    #[test]
527    fn test_backend_info() {
528        let gpu_spmv = GpuSpMatVec::new().unwrap();
529        let info = gpu_spmv.get_backend_info();
530
531        assert!(!info.name.is_empty());
532        assert!(!info.version.is_empty());
533    }
534
535    #[test]
536    fn test_convenience_functions() {
537        let backends = convenience::available_backends();
538        assert!(!backends.is_empty());
539        assert!(backends.contains(&GpuBackend::Cpu)); // CPU should always be available
540    }
541
542    #[test]
543    fn test_is_gpu_available() {
544        let gpu_spmv = GpuSpMatVec::new().unwrap();
545
546        // Should not panic - either true or false is valid
547        let _available = gpu_spmv.is_gpu_available();
548    }
549
550    #[test]
551    fn test_optimization_hint_default() {
552        assert_eq!(OptimizationHint::default(), OptimizationHint::Balanced);
553    }
554}