scirs2_sparse/gpu/
mod.rs

1//! GPU acceleration for sparse matrix operations
2//!
3//! This module provides GPU acceleration support for sparse matrix operations
4//! across multiple backends including CUDA, OpenCL, Metal, ROCm, and WGPU.
5
6pub mod cuda;
7pub mod metal;
8pub mod opencl;
9
10// Re-export common types and traits
11#[cfg(feature = "gpu")]
12pub use scirs2_core::gpu::{
13    GpuBackend, GpuBuffer, GpuContext, GpuDataType, GpuDevice, GpuError, GpuKernelHandle,
14};
15
16// Fallback types when GPU feature is not enabled
17#[cfg(not(feature = "gpu"))]
18pub use crate::gpu_ops::{
19    GpuBackend, GpuBuffer, GpuDataType, GpuDevice, GpuError, GpuKernelHandle,
20};
21
22// Re-export backend-specific modules
23pub use cuda::{CudaMemoryManager, CudaOptimizationLevel, CudaSpMatVec};
24pub use metal::{MetalDeviceInfo, MetalMemoryManager, MetalOptimizationLevel, MetalSpMatVec};
25pub use opencl::{
26    OpenCLMemoryManager, OpenCLOptimizationLevel, OpenCLPlatformInfo, OpenCLSpMatVec,
27};
28
29use crate::csr_array::CsrArray;
30use crate::error::{SparseError, SparseResult};
31use crate::sparray::SparseArray;
32use scirs2_core::ndarray::{Array1, ArrayView1};
33use scirs2_core::numeric::{Float, SparseElement};
34use std::fmt::Debug;
35
36// GpuDataType is already available from the pub use statements above
37
38/// Unified GPU sparse matrix operations interface
39pub struct GpuSpMatVec {
40    backend: GpuBackend,
41    cuda_handler: Option<CudaSpMatVec>,
42    opencl_handler: Option<OpenCLSpMatVec>,
43    metal_handler: Option<MetalSpMatVec>,
44}
45
46impl GpuSpMatVec {
47    /// Create a new GPU sparse matrix handler with automatic backend detection
48    pub fn new() -> SparseResult<Self> {
49        let backend = Self::detect_best_backend();
50
51        let mut handler = Self {
52            backend,
53            cuda_handler: None,
54            opencl_handler: None,
55            metal_handler: None,
56        };
57
58        // Initialize the appropriate backend
59        handler.initialize_backend()?;
60
61        Ok(handler)
62    }
63
64    /// Create a new GPU sparse matrix handler with specified backend
65    pub fn with_backend(backend: GpuBackend) -> SparseResult<Self> {
66        let mut handler = Self {
67            backend,
68            cuda_handler: None,
69            opencl_handler: None,
70            metal_handler: None,
71        };
72
73        handler.initialize_backend()?;
74
75        Ok(handler)
76    }
77
78    /// Initialize the selected backend
79    fn initialize_backend(&mut self) -> SparseResult<()> {
80        match self.backend {
81            GpuBackend::Cuda => {
82                self.cuda_handler = Some(CudaSpMatVec::new()?);
83            }
84            GpuBackend::OpenCL => {
85                self.opencl_handler = Some(OpenCLSpMatVec::new()?);
86            }
87            GpuBackend::Metal => {
88                self.metal_handler = Some(MetalSpMatVec::new()?);
89            }
90            GpuBackend::Cpu => {
91                // CPU fallback - no initialization needed
92            }
93            _ => {
94                // For other backends (ROCm, WGPU), fall back to CPU for now
95                self.backend = GpuBackend::Cpu;
96            }
97        }
98
99        Ok(())
100    }
101
102    /// Detect the best available GPU backend
103    fn detect_best_backend() -> GpuBackend {
104        // Priority order: Metal (on macOS), CUDA, OpenCL, CPU
105        #[cfg(target_os = "macos")]
106        {
107            if Self::is_metal_available() {
108                return GpuBackend::Metal;
109            }
110        }
111
112        if Self::is_cuda_available() {
113            return GpuBackend::Cuda;
114        }
115
116        if Self::is_opencl_available() {
117            return GpuBackend::OpenCL;
118        }
119
120        GpuBackend::Cpu
121    }
122
123    /// Check if CUDA is available
124    fn is_cuda_available() -> bool {
125        // In a real implementation, this would check for CUDA runtime
126        #[cfg(feature = "gpu")]
127        {
128            // Simplified detection
129            std::env::var("CUDA_PATH").is_ok() || std::path::Path::new("/usr/local/cuda").exists()
130        }
131        #[cfg(not(feature = "gpu"))]
132        false
133    }
134
135    /// Check if OpenCL is available
136    fn is_opencl_available() -> bool {
137        // In a real implementation, this would check for OpenCL runtime
138        #[cfg(feature = "gpu")]
139        {
140            // Simplified detection - assume available on most systems
141            true
142        }
143        #[cfg(not(feature = "gpu"))]
144        false
145    }
146
147    /// Check if Metal is available (macOS only)
148    fn is_metal_available() -> bool {
149        #[cfg(target_os = "macos")]
150        {
151            // Metal is available on all modern macOS systems
152            true
153        }
154        #[cfg(not(target_os = "macos"))]
155        false
156    }
157
158    /// Execute sparse matrix-vector multiplication on GPU
159    pub fn spmv<T>(
160        &self,
161        matrix: &CsrArray<T>,
162        vector: &ArrayView1<T>,
163        device: Option<&GpuDevice>,
164    ) -> SparseResult<Array1<T>>
165    where
166        T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
167    {
168        match self.backend {
169            GpuBackend::Cuda => {
170                if let Some(ref handler) = self.cuda_handler {
171                    #[cfg(feature = "gpu")]
172                    {
173                        if let Some(device) = device {
174                            handler.execute_spmv(matrix, vector, device)
175                        } else {
176                            return Err(SparseError::ComputationError(
177                                "GPU device required for CUDA operations".to_string(),
178                            ));
179                        }
180                    }
181                    #[cfg(not(feature = "gpu"))]
182                    handler.execute_spmv_cpu(matrix, vector)
183                } else {
184                    Err(SparseError::ComputationError(
185                        "CUDA handler not initialized".to_string(),
186                    ))
187                }
188            }
189            GpuBackend::OpenCL => {
190                if let Some(ref handler) = self.opencl_handler {
191                    #[cfg(feature = "gpu")]
192                    {
193                        if let Some(device) = device {
194                            handler.execute_spmv(matrix, vector, device)
195                        } else {
196                            return Err(SparseError::ComputationError(
197                                "GPU device required for OpenCL operations".to_string(),
198                            ));
199                        }
200                    }
201                    #[cfg(not(feature = "gpu"))]
202                    handler.execute_spmv_cpu(matrix, vector)
203                } else {
204                    Err(SparseError::ComputationError(
205                        "OpenCL handler not initialized".to_string(),
206                    ))
207                }
208            }
209            GpuBackend::Metal => {
210                if let Some(ref handler) = self.metal_handler {
211                    #[cfg(feature = "gpu")]
212                    {
213                        if let Some(device) = device {
214                            handler.execute_spmv(matrix, vector, device)
215                        } else {
216                            return Err(SparseError::ComputationError(
217                                "GPU device required for Metal operations".to_string(),
218                            ));
219                        }
220                    }
221                    #[cfg(not(feature = "gpu"))]
222                    handler.execute_spmv_cpu(matrix, vector)
223                } else {
224                    Err(SparseError::ComputationError(
225                        "Metal handler not initialized".to_string(),
226                    ))
227                }
228            }
229            GpuBackend::Cpu => {
230                // CPU fallback
231                matrix.dot_vector(vector)
232            }
233            _ => {
234                // Unsupported backend, fall back to CPU
235                matrix.dot_vector(vector)
236            }
237        }
238    }
239
240    /// Execute optimized sparse matrix-vector multiplication
241    pub fn spmv_optimized<T>(
242        &self,
243        matrix: &CsrArray<T>,
244        vector: &ArrayView1<T>,
245        device: Option<&GpuDevice>,
246        optimization_hint: OptimizationHint,
247    ) -> SparseResult<Array1<T>>
248    where
249        T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
250    {
251        match self.backend {
252            GpuBackend::Cuda => {
253                if let Some(ref handler) = self.cuda_handler {
254                    let cuda_level = optimization_hint.to_cuda_level();
255                    #[cfg(feature = "gpu")]
256                    {
257                        if let Some(device) = device {
258                            handler.execute_optimized_spmv(matrix, vector, device, cuda_level)
259                        } else {
260                            return Err(SparseError::ComputationError(
261                                "GPU device required for CUDA operations".to_string(),
262                            ));
263                        }
264                    }
265                    #[cfg(not(feature = "gpu"))]
266                    handler.execute_spmv_cpu(matrix, vector)
267                } else {
268                    Err(SparseError::ComputationError(
269                        "CUDA handler not initialized".to_string(),
270                    ))
271                }
272            }
273            GpuBackend::OpenCL => {
274                if let Some(ref handler) = self.opencl_handler {
275                    let opencl_level = optimization_hint.to_opencl_level();
276                    #[cfg(feature = "gpu")]
277                    {
278                        if let Some(device) = device {
279                            handler.execute_optimized_spmv(matrix, vector, device, opencl_level)
280                        } else {
281                            return Err(SparseError::ComputationError(
282                                "GPU device required for OpenCL operations".to_string(),
283                            ));
284                        }
285                    }
286                    #[cfg(not(feature = "gpu"))]
287                    handler.execute_spmv_cpu(matrix, vector)
288                } else {
289                    Err(SparseError::ComputationError(
290                        "OpenCL handler not initialized".to_string(),
291                    ))
292                }
293            }
294            GpuBackend::Metal => {
295                if let Some(ref handler) = self.metal_handler {
296                    let metal_level = optimization_hint.to_metal_level();
297                    #[cfg(feature = "gpu")]
298                    {
299                        if let Some(device) = device {
300                            handler.execute_optimized_spmv(matrix, vector, device, metal_level)
301                        } else {
302                            return Err(SparseError::ComputationError(
303                                "GPU device required for Metal operations".to_string(),
304                            ));
305                        }
306                    }
307                    #[cfg(not(feature = "gpu"))]
308                    handler.execute_spmv_cpu(matrix, vector)
309                } else {
310                    Err(SparseError::ComputationError(
311                        "Metal handler not initialized".to_string(),
312                    ))
313                }
314            }
315            _ => {
316                // Fall back to basic implementation
317                self.spmv(matrix, vector, device)
318            }
319        }
320    }
321
322    /// Get the current backend
323    pub fn backend(&self) -> GpuBackend {
324        self.backend
325    }
326
327    /// Check if GPU acceleration is available
328    pub fn is_gpu_available(&self) -> bool {
329        !matches!(self.backend, GpuBackend::Cpu)
330    }
331
332    /// Get backend-specific information
333    pub fn get_backend_info(&self) -> BackendInfo {
334        match self.backend {
335            GpuBackend::Cuda => BackendInfo {
336                name: "CUDA".to_string(),
337                version: "Unknown".to_string(),
338                device_count: 1, // Simplified
339                supports_double_precision: true,
340                max_memory_mb: 8192, // 8GB default
341            },
342            GpuBackend::OpenCL => BackendInfo {
343                name: "OpenCL".to_string(),
344                version: "Unknown".to_string(),
345                device_count: 1,
346                supports_double_precision: true,
347                max_memory_mb: 4096, // 4GB default
348            },
349            GpuBackend::Metal => BackendInfo {
350                name: "Metal".to_string(),
351                version: "Unknown".to_string(),
352                device_count: 1,
353                supports_double_precision: false, // Metal has limited f64 support
354                max_memory_mb: if MetalDeviceInfo::detect().is_apple_silicon {
355                    16384
356                } else {
357                    8192
358                },
359            },
360            _ => BackendInfo {
361                name: "CPU".to_string(),
362                version: "Fallback".to_string(),
363                device_count: 0,
364                supports_double_precision: true,
365                max_memory_mb: 0,
366            },
367        }
368    }
369}
370
371impl Default for GpuSpMatVec {
372    fn default() -> Self {
373        Self::new().unwrap_or_else(|_| Self {
374            backend: GpuBackend::Cpu,
375            cuda_handler: None,
376            opencl_handler: None,
377            metal_handler: None,
378        })
379    }
380}
381
382/// Cross-platform optimization hints
383#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
384pub enum OptimizationHint {
385    /// Basic optimization level
386    Basic,
387    /// Balanced optimization (default)
388    #[default]
389    Balanced,
390    /// Maximum performance optimization
391    Maximum,
392    /// Memory-optimized implementation
393    MemoryOptimized,
394}
395
396impl OptimizationHint {
397    /// Convert to CUDA optimization level
398    pub fn to_cuda_level(self) -> CudaOptimizationLevel {
399        match self {
400            OptimizationHint::Basic => CudaOptimizationLevel::Basic,
401            OptimizationHint::Balanced => CudaOptimizationLevel::Vectorized,
402            OptimizationHint::Maximum => CudaOptimizationLevel::WarpLevel,
403            OptimizationHint::MemoryOptimized => CudaOptimizationLevel::Basic,
404        }
405    }
406
407    /// Convert to OpenCL optimization level
408    pub fn to_opencl_level(self) -> OpenCLOptimizationLevel {
409        match self {
410            OptimizationHint::Basic => OpenCLOptimizationLevel::Basic,
411            OptimizationHint::Balanced => OpenCLOptimizationLevel::Workgroup,
412            OptimizationHint::Maximum => OpenCLOptimizationLevel::Vectorized,
413            OptimizationHint::MemoryOptimized => OpenCLOptimizationLevel::Workgroup,
414        }
415    }
416
417    /// Convert to Metal optimization level
418    pub fn to_metal_level(self) -> MetalOptimizationLevel {
419        match self {
420            OptimizationHint::Basic => MetalOptimizationLevel::Basic,
421            OptimizationHint::Balanced => MetalOptimizationLevel::SimdGroup,
422            OptimizationHint::Maximum => MetalOptimizationLevel::AppleSilicon,
423            OptimizationHint::MemoryOptimized => MetalOptimizationLevel::AppleSilicon,
424        }
425    }
426}
427
428/// GPU backend information
429#[derive(Debug, Clone)]
430pub struct BackendInfo {
431    pub name: String,
432    pub version: String,
433    pub device_count: usize,
434    pub supports_double_precision: bool,
435    pub max_memory_mb: usize,
436}
437
438/// Convenient functions for common operations
439pub mod convenience {
440    use super::*;
441
442    /// Execute sparse matrix-vector multiplication with automatic GPU detection
443    pub fn gpu_spmv<T>(matrix: &CsrArray<T>, vector: &ArrayView1<T>) -> SparseResult<Array1<T>>
444    where
445        T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
446    {
447        let gpu_handler = GpuSpMatVec::new()?;
448        gpu_handler.spmv(matrix, vector, None)
449    }
450
451    /// Execute optimized sparse matrix-vector multiplication
452    pub fn gpu_spmv_optimized<T>(
453        matrix: &CsrArray<T>,
454        vector: &ArrayView1<T>,
455        optimization: OptimizationHint,
456    ) -> SparseResult<Array1<T>>
457    where
458        T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
459    {
460        let gpu_handler = GpuSpMatVec::new()?;
461        gpu_handler.spmv_optimized(matrix, vector, None, optimization)
462    }
463
464    /// Get information about available GPU backends
465    pub fn available_backends() -> Vec<GpuBackend> {
466        let mut backends = Vec::new();
467
468        if GpuSpMatVec::is_cuda_available() {
469            backends.push(GpuBackend::Cuda);
470        }
471
472        if GpuSpMatVec::is_opencl_available() {
473            backends.push(GpuBackend::OpenCL);
474        }
475
476        if GpuSpMatVec::is_metal_available() {
477            backends.push(GpuBackend::Metal);
478        }
479
480        backends.push(GpuBackend::Cpu); // Always available
481
482        backends
483    }
484}
485
486#[cfg(test)]
487mod tests {
488    use super::*;
489    use scirs2_core::ndarray::Array1;
490
491    #[test]
492    fn test_gpu_spmv_creation() {
493        let gpu_spmv = GpuSpMatVec::new();
494        assert!(gpu_spmv.is_ok());
495    }
496
497    #[test]
498    fn test_backend_detection() {
499        let backend = GpuSpMatVec::detect_best_backend();
500
501        // Should return a valid backend
502        match backend {
503            GpuBackend::Cuda | GpuBackend::OpenCL | GpuBackend::Metal | GpuBackend::Cpu => (),
504            _ => panic!("Unexpected backend detected"),
505        }
506    }
507
508    #[test]
509    fn test_optimization_hint_conversions() {
510        let hint = OptimizationHint::Maximum;
511
512        let cuda_level = hint.to_cuda_level();
513        let opencl_level = hint.to_opencl_level();
514        let metal_level = hint.to_metal_level();
515
516        assert_eq!(cuda_level, CudaOptimizationLevel::WarpLevel);
517        assert_eq!(opencl_level, OpenCLOptimizationLevel::Vectorized);
518        assert_eq!(metal_level, MetalOptimizationLevel::AppleSilicon);
519    }
520
521    #[test]
522    fn test_backend_info() {
523        let gpu_spmv = GpuSpMatVec::new().expect("Operation failed");
524        let info = gpu_spmv.get_backend_info();
525
526        assert!(!info.name.is_empty());
527        assert!(!info.version.is_empty());
528    }
529
530    #[test]
531    fn test_convenience_functions() {
532        let backends = convenience::available_backends();
533        assert!(!backends.is_empty());
534        assert!(backends.contains(&GpuBackend::Cpu)); // CPU should always be available
535    }
536
537    #[test]
538    fn test_is_gpu_available() {
539        let gpu_spmv = GpuSpMatVec::new().expect("Operation failed");
540
541        // Should not panic - either true or false is valid
542        let _available = gpu_spmv.is_gpu_available();
543    }
544
545    #[test]
546    fn test_optimization_hint_default() {
547        assert_eq!(OptimizationHint::default(), OptimizationHint::Balanced);
548    }
549}
scirs2_sparse/gpu/mod.rs

scirs2_sparse/gpu/
mod.rs