Skip to main content

scirs2_sparse/gpu/
mod.rs

1//! GPU acceleration for sparse matrix operations
2//!
3//! This module provides GPU acceleration support for sparse matrix operations
4//! across multiple backends including CUDA, OpenCL, Metal, ROCm, and WGPU.
5
6pub mod cuda;
7pub mod metal;
8pub mod opencl;
9pub mod vulkan;
10
11// Re-export common types and traits
12#[cfg(feature = "gpu")]
13pub use scirs2_core::gpu::{
14    GpuBackend, GpuBuffer, GpuContext, GpuDataType, GpuDevice, GpuError, GpuKernelHandle,
15};
16
17// Fallback types when GPU feature is not enabled
18#[cfg(not(feature = "gpu"))]
19pub use crate::gpu_ops::{
20    GpuBackend, GpuBuffer, GpuDataType, GpuDevice, GpuError, GpuKernelHandle,
21};
22
23// Re-export backend-specific modules
24pub use cuda::{CudaMemoryManager, CudaOptimizationLevel, CudaSpMatVec};
25pub use metal::{MetalDeviceInfo, MetalMemoryManager, MetalOptimizationLevel, MetalSpMatVec};
26pub use opencl::{
27    OpenCLMemoryManager, OpenCLOptimizationLevel, OpenCLPlatformInfo, OpenCLSpMatVec,
28};
29pub use vulkan::{
30    VulkanDeviceInfo, VulkanDeviceType, VulkanMemoryManager, VulkanOptimizationLevel,
31    VulkanSpMatVec,
32};
33
34use crate::csr_array::CsrArray;
35use crate::error::{SparseError, SparseResult};
36use crate::sparray::SparseArray;
37use scirs2_core::ndarray::{Array1, ArrayView1};
38use scirs2_core::numeric::{Float, SparseElement};
39use std::fmt::Debug;
40
41// GpuDataType is already available from the pub use statements above
42
43/// Unified GPU sparse matrix operations interface
44pub struct GpuSpMatVec {
45    backend: GpuBackend,
46    cuda_handler: Option<CudaSpMatVec>,
47    opencl_handler: Option<OpenCLSpMatVec>,
48    metal_handler: Option<MetalSpMatVec>,
49    vulkan_handler: Option<VulkanSpMatVec>,
50}
51
52impl GpuSpMatVec {
53    /// Create a new GPU sparse matrix handler with automatic backend detection
54    pub fn new() -> SparseResult<Self> {
55        let backend = Self::detect_best_backend();
56
57        let mut handler = Self {
58            backend,
59            cuda_handler: None,
60            opencl_handler: None,
61            metal_handler: None,
62            vulkan_handler: None,
63        };
64
65        // Initialize the appropriate backend
66        handler.initialize_backend()?;
67
68        Ok(handler)
69    }
70
71    /// Create a new GPU sparse matrix handler with specified backend
72    pub fn with_backend(backend: GpuBackend) -> SparseResult<Self> {
73        let mut handler = Self {
74            backend,
75            cuda_handler: None,
76            opencl_handler: None,
77            metal_handler: None,
78            vulkan_handler: None,
79        };
80
81        handler.initialize_backend()?;
82
83        Ok(handler)
84    }
85
86    /// Initialize the selected backend
87    fn initialize_backend(&mut self) -> SparseResult<()> {
88        match self.backend {
89            GpuBackend::Cuda => {
90                self.cuda_handler = Some(CudaSpMatVec::new()?);
91            }
92            GpuBackend::OpenCL => {
93                self.opencl_handler = Some(OpenCLSpMatVec::new()?);
94            }
95            GpuBackend::Metal => {
96                self.metal_handler = Some(MetalSpMatVec::new()?);
97            }
98            GpuBackend::Wgpu => {
99                self.vulkan_handler = Some(VulkanSpMatVec::new()?);
100            }
101            #[cfg(not(feature = "gpu"))]
102            GpuBackend::Vulkan => {
103                self.vulkan_handler = Some(VulkanSpMatVec::new()?);
104            }
105            GpuBackend::Cpu => {
106                // CPU fallback - no initialization needed
107            }
108            GpuBackend::Rocm => {
109                // For ROCm, fall back to CPU for now
110                self.backend = GpuBackend::Cpu;
111            }
112        }
113
114        Ok(())
115    }
116
117    /// Detect the best available GPU backend
118    fn detect_best_backend() -> GpuBackend {
119        // Priority order: Metal (on macOS), CUDA, Vulkan, OpenCL, CPU
120        #[cfg(target_os = "macos")]
121        {
122            if Self::is_metal_available() {
123                return GpuBackend::Metal;
124            }
125        }
126
127        if Self::is_cuda_available() {
128            return GpuBackend::Cuda;
129        }
130
131        if Self::is_vulkan_available() {
132            return GpuBackend::Wgpu;
133        }
134
135        if Self::is_opencl_available() {
136            return GpuBackend::OpenCL;
137        }
138
139        GpuBackend::Cpu
140    }
141
142    /// Check if CUDA is available
143    fn is_cuda_available() -> bool {
144        // In a real implementation, this would check for CUDA runtime
145        #[cfg(feature = "gpu")]
146        {
147            // Simplified detection
148            std::env::var("CUDA_PATH").is_ok() || std::path::Path::new("/usr/local/cuda").exists()
149        }
150        #[cfg(not(feature = "gpu"))]
151        false
152    }
153
154    /// Check if OpenCL is available
155    fn is_opencl_available() -> bool {
156        // In a real implementation, this would check for OpenCL runtime
157        #[cfg(feature = "gpu")]
158        {
159            // Simplified detection - assume available on most systems
160            true
161        }
162        #[cfg(not(feature = "gpu"))]
163        false
164    }
165
166    /// Check if Metal is available (macOS only)
167    fn is_metal_available() -> bool {
168        #[cfg(target_os = "macos")]
169        {
170            // Metal is available on all modern macOS systems
171            true
172        }
173        #[cfg(not(target_os = "macos"))]
174        false
175    }
176
177    /// Check if Vulkan is available
178    fn is_vulkan_available() -> bool {
179        #[cfg(feature = "gpu")]
180        {
181            // Check for Vulkan SDK or runtime
182            std::env::var("VULKAN_SDK").is_ok()
183                || std::path::Path::new("/usr/share/vulkan").exists()
184                || std::path::Path::new("/usr/local/share/vulkan").exists()
185        }
186        #[cfg(not(feature = "gpu"))]
187        false
188    }
189
190    /// Execute sparse matrix-vector multiplication on GPU
191    pub fn spmv<T>(
192        &self,
193        matrix: &CsrArray<T>,
194        vector: &ArrayView1<T>,
195        device: Option<&GpuDevice>,
196    ) -> SparseResult<Array1<T>>
197    where
198        T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
199    {
200        match self.backend {
201            GpuBackend::Cuda => {
202                if let Some(ref handler) = self.cuda_handler {
203                    #[cfg(feature = "gpu")]
204                    {
205                        if let Some(device) = device {
206                            handler.execute_spmv(matrix, vector, device)
207                        } else {
208                            // Fall back to CPU when no device is provided
209                            matrix.dot_vector(vector)
210                        }
211                    }
212                    #[cfg(not(feature = "gpu"))]
213                    handler.execute_spmv_cpu(matrix, vector)
214                } else {
215                    Err(SparseError::ComputationError(
216                        "CUDA handler not initialized".to_string(),
217                    ))
218                }
219            }
220            GpuBackend::OpenCL => {
221                if let Some(ref handler) = self.opencl_handler {
222                    #[cfg(feature = "gpu")]
223                    {
224                        if let Some(device) = device {
225                            handler.execute_spmv(matrix, vector, device)
226                        } else {
227                            // Fall back to CPU when no device is provided
228                            matrix.dot_vector(vector)
229                        }
230                    }
231                    #[cfg(not(feature = "gpu"))]
232                    handler.execute_spmv_cpu(matrix, vector)
233                } else {
234                    Err(SparseError::ComputationError(
235                        "OpenCL handler not initialized".to_string(),
236                    ))
237                }
238            }
239            GpuBackend::Metal => {
240                if let Some(ref handler) = self.metal_handler {
241                    #[cfg(feature = "gpu")]
242                    {
243                        if let Some(device) = device {
244                            handler.execute_spmv(matrix, vector, device)
245                        } else {
246                            // Fall back to CPU when no device is provided
247                            matrix.dot_vector(vector)
248                        }
249                    }
250                    #[cfg(not(feature = "gpu"))]
251                    handler.execute_spmv_cpu(matrix, vector)
252                } else {
253                    Err(SparseError::ComputationError(
254                        "Metal handler not initialized".to_string(),
255                    ))
256                }
257            }
258            GpuBackend::Wgpu => {
259                if let Some(ref handler) = self.vulkan_handler {
260                    #[cfg(feature = "gpu")]
261                    {
262                        if let Some(device) = device {
263                            handler.execute_spmv(matrix, vector, device)
264                        } else {
265                            // Fall back to CPU when no device is provided
266                            matrix.dot_vector(vector)
267                        }
268                    }
269                    #[cfg(not(feature = "gpu"))]
270                    handler.execute_spmv_cpu(matrix, vector)
271                } else {
272                    Err(SparseError::ComputationError(
273                        "Vulkan handler not initialized".to_string(),
274                    ))
275                }
276            }
277            #[cfg(not(feature = "gpu"))]
278            GpuBackend::Vulkan => {
279                if let Some(ref handler) = self.vulkan_handler {
280                    handler.execute_spmv_cpu(matrix, vector)
281                } else {
282                    Err(SparseError::ComputationError(
283                        "Vulkan handler not initialized".to_string(),
284                    ))
285                }
286            }
287            GpuBackend::Cpu => {
288                // CPU fallback
289                matrix.dot_vector(vector)
290            }
291            GpuBackend::Rocm => {
292                // Unsupported backend, fall back to CPU
293                matrix.dot_vector(vector)
294            }
295        }
296    }
297
298    /// Execute optimized sparse matrix-vector multiplication
299    pub fn spmv_optimized<T>(
300        &self,
301        matrix: &CsrArray<T>,
302        vector: &ArrayView1<T>,
303        device: Option<&GpuDevice>,
304        optimization_hint: OptimizationHint,
305    ) -> SparseResult<Array1<T>>
306    where
307        T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
308    {
309        match self.backend {
310            GpuBackend::Cuda => {
311                if let Some(ref handler) = self.cuda_handler {
312                    let cuda_level = optimization_hint.to_cuda_level();
313                    #[cfg(feature = "gpu")]
314                    {
315                        if let Some(device) = device {
316                            handler.execute_optimized_spmv(matrix, vector, device, cuda_level)
317                        } else {
318                            return Err(SparseError::ComputationError(
319                                "GPU device required for CUDA operations".to_string(),
320                            ));
321                        }
322                    }
323                    #[cfg(not(feature = "gpu"))]
324                    handler.execute_spmv_cpu(matrix, vector)
325                } else {
326                    Err(SparseError::ComputationError(
327                        "CUDA handler not initialized".to_string(),
328                    ))
329                }
330            }
331            GpuBackend::OpenCL => {
332                if let Some(ref handler) = self.opencl_handler {
333                    let opencl_level = optimization_hint.to_opencl_level();
334                    #[cfg(feature = "gpu")]
335                    {
336                        if let Some(device) = device {
337                            handler.execute_optimized_spmv(matrix, vector, device, opencl_level)
338                        } else {
339                            return Err(SparseError::ComputationError(
340                                "GPU device required for OpenCL operations".to_string(),
341                            ));
342                        }
343                    }
344                    #[cfg(not(feature = "gpu"))]
345                    handler.execute_spmv_cpu(matrix, vector)
346                } else {
347                    Err(SparseError::ComputationError(
348                        "OpenCL handler not initialized".to_string(),
349                    ))
350                }
351            }
352            GpuBackend::Metal => {
353                if let Some(ref handler) = self.metal_handler {
354                    let metal_level = optimization_hint.to_metal_level();
355                    #[cfg(feature = "gpu")]
356                    {
357                        if let Some(device) = device {
358                            handler.execute_optimized_spmv(matrix, vector, device, metal_level)
359                        } else {
360                            return Err(SparseError::ComputationError(
361                                "GPU device required for Metal operations".to_string(),
362                            ));
363                        }
364                    }
365                    #[cfg(not(feature = "gpu"))]
366                    handler.execute_spmv_cpu(matrix, vector)
367                } else {
368                    Err(SparseError::ComputationError(
369                        "Metal handler not initialized".to_string(),
370                    ))
371                }
372            }
373            GpuBackend::Wgpu => {
374                if let Some(ref handler) = self.vulkan_handler {
375                    let vulkan_level = optimization_hint.to_vulkan_level();
376                    #[cfg(feature = "gpu")]
377                    {
378                        if let Some(device) = device {
379                            handler.execute_optimized_spmv(matrix, vector, device, vulkan_level)
380                        } else {
381                            return Err(SparseError::ComputationError(
382                                "GPU device required for Vulkan operations".to_string(),
383                            ));
384                        }
385                    }
386                    #[cfg(not(feature = "gpu"))]
387                    handler.execute_spmv_cpu(matrix, vector)
388                } else {
389                    Err(SparseError::ComputationError(
390                        "Vulkan handler not initialized".to_string(),
391                    ))
392                }
393            }
394            #[cfg(not(feature = "gpu"))]
395            GpuBackend::Vulkan => {
396                if let Some(ref handler) = self.vulkan_handler {
397                    let vulkan_level = optimization_hint.to_vulkan_level();
398                    handler.execute_spmv_cpu(matrix, vector)
399                } else {
400                    Err(SparseError::ComputationError(
401                        "Vulkan handler not initialized".to_string(),
402                    ))
403                }
404            }
405            GpuBackend::Cpu | GpuBackend::Rocm => {
406                // Fall back to basic implementation
407                self.spmv(matrix, vector, device)
408            }
409        }
410    }
411
412    /// Get the current backend
413    pub fn backend(&self) -> GpuBackend {
414        self.backend
415    }
416
417    /// Check if GPU acceleration is available
418    pub fn is_gpu_available(&self) -> bool {
419        !matches!(self.backend, GpuBackend::Cpu)
420    }
421
422    /// Get backend-specific information
423    pub fn get_backend_info(&self) -> BackendInfo {
424        match self.backend {
425            GpuBackend::Cuda => BackendInfo {
426                name: "CUDA".to_string(),
427                version: "Unknown".to_string(),
428                device_count: 1, // Simplified
429                supports_double_precision: true,
430                max_memory_mb: 8192, // 8GB default
431            },
432            GpuBackend::OpenCL => BackendInfo {
433                name: "OpenCL".to_string(),
434                version: "Unknown".to_string(),
435                device_count: 1,
436                supports_double_precision: true,
437                max_memory_mb: 4096, // 4GB default
438            },
439            GpuBackend::Metal => BackendInfo {
440                name: "Metal".to_string(),
441                version: "Unknown".to_string(),
442                device_count: 1,
443                supports_double_precision: false, // Metal has limited f64 support
444                max_memory_mb: if MetalDeviceInfo::detect().is_apple_silicon {
445                    16384
446                } else {
447                    8192
448                },
449            },
450            GpuBackend::Wgpu => BackendInfo {
451                name: "Vulkan".to_string(),
452                version: "Unknown".to_string(),
453                device_count: 1,
454                supports_double_precision: true,
455                max_memory_mb: 8192, // 8GB default
456            },
457            #[cfg(not(feature = "gpu"))]
458            GpuBackend::Vulkan => BackendInfo {
459                name: "Vulkan".to_string(),
460                version: "Unknown".to_string(),
461                device_count: 1,
462                supports_double_precision: true,
463                max_memory_mb: 8192, // 8GB default
464            },
465            GpuBackend::Cpu | GpuBackend::Rocm => BackendInfo {
466                name: "CPU".to_string(),
467                version: "Fallback".to_string(),
468                device_count: 0,
469                supports_double_precision: true,
470                max_memory_mb: 0,
471            },
472        }
473    }
474}
475
476impl Default for GpuSpMatVec {
477    fn default() -> Self {
478        Self::new().unwrap_or_else(|_| Self {
479            backend: GpuBackend::Cpu,
480            cuda_handler: None,
481            opencl_handler: None,
482            metal_handler: None,
483            vulkan_handler: None,
484        })
485    }
486}
487
488/// Cross-platform optimization hints
489#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
490pub enum OptimizationHint {
491    /// Basic optimization level
492    Basic,
493    /// Balanced optimization (default)
494    #[default]
495    Balanced,
496    /// Maximum performance optimization
497    Maximum,
498    /// Memory-optimized implementation
499    MemoryOptimized,
500}
501
502impl OptimizationHint {
503    /// Convert to CUDA optimization level
504    pub fn to_cuda_level(self) -> CudaOptimizationLevel {
505        match self {
506            OptimizationHint::Basic => CudaOptimizationLevel::Basic,
507            OptimizationHint::Balanced => CudaOptimizationLevel::Vectorized,
508            OptimizationHint::Maximum => CudaOptimizationLevel::WarpLevel,
509            OptimizationHint::MemoryOptimized => CudaOptimizationLevel::Basic,
510        }
511    }
512
513    /// Convert to OpenCL optimization level
514    pub fn to_opencl_level(self) -> OpenCLOptimizationLevel {
515        match self {
516            OptimizationHint::Basic => OpenCLOptimizationLevel::Basic,
517            OptimizationHint::Balanced => OpenCLOptimizationLevel::Workgroup,
518            OptimizationHint::Maximum => OpenCLOptimizationLevel::Vectorized,
519            OptimizationHint::MemoryOptimized => OpenCLOptimizationLevel::Workgroup,
520        }
521    }
522
523    /// Convert to Metal optimization level
524    pub fn to_metal_level(self) -> MetalOptimizationLevel {
525        match self {
526            OptimizationHint::Basic => MetalOptimizationLevel::Basic,
527            OptimizationHint::Balanced => MetalOptimizationLevel::SimdGroup,
528            OptimizationHint::Maximum => MetalOptimizationLevel::AppleSilicon,
529            OptimizationHint::MemoryOptimized => MetalOptimizationLevel::AppleSilicon,
530        }
531    }
532
533    /// Convert to Vulkan optimization level
534    pub fn to_vulkan_level(self) -> VulkanOptimizationLevel {
535        match self {
536            OptimizationHint::Basic => VulkanOptimizationLevel::Basic,
537            OptimizationHint::Balanced => VulkanOptimizationLevel::ComputeShader,
538            OptimizationHint::Maximum => VulkanOptimizationLevel::Maximum,
539            OptimizationHint::MemoryOptimized => VulkanOptimizationLevel::Subgroup,
540        }
541    }
542}
543
544/// GPU backend information
545#[derive(Debug, Clone)]
546pub struct BackendInfo {
547    pub name: String,
548    pub version: String,
549    pub device_count: usize,
550    pub supports_double_precision: bool,
551    pub max_memory_mb: usize,
552}
553
554/// Convenient functions for common operations
555pub mod convenience {
556    use super::*;
557
558    /// Execute sparse matrix-vector multiplication with automatic GPU detection
559    pub fn gpu_spmv<T>(matrix: &CsrArray<T>, vector: &ArrayView1<T>) -> SparseResult<Array1<T>>
560    where
561        T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
562    {
563        let gpu_handler = GpuSpMatVec::new()?;
564        gpu_handler.spmv(matrix, vector, None)
565    }
566
567    /// Execute optimized sparse matrix-vector multiplication
568    pub fn gpu_spmv_optimized<T>(
569        matrix: &CsrArray<T>,
570        vector: &ArrayView1<T>,
571        optimization: OptimizationHint,
572    ) -> SparseResult<Array1<T>>
573    where
574        T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
575    {
576        let gpu_handler = GpuSpMatVec::new()?;
577        gpu_handler.spmv_optimized(matrix, vector, None, optimization)
578    }
579
580    /// Get information about available GPU backends
581    pub fn available_backends() -> Vec<GpuBackend> {
582        let mut backends = Vec::new();
583
584        if GpuSpMatVec::is_cuda_available() {
585            backends.push(GpuBackend::Cuda);
586        }
587
588        if GpuSpMatVec::is_vulkan_available() {
589            backends.push(GpuBackend::Wgpu);
590        }
591
592        if GpuSpMatVec::is_opencl_available() {
593            backends.push(GpuBackend::OpenCL);
594        }
595
596        if GpuSpMatVec::is_metal_available() {
597            backends.push(GpuBackend::Metal);
598        }
599
600        backends.push(GpuBackend::Cpu); // Always available
601
602        backends
603    }
604}
605
606#[cfg(test)]
607mod tests {
608    use super::*;
609    use scirs2_core::ndarray::Array1;
610
611    #[test]
612    fn test_gpu_spmv_creation() {
613        let gpu_spmv = GpuSpMatVec::new();
614        assert!(gpu_spmv.is_ok());
615    }
616
617    #[test]
618    fn test_backend_detection() {
619        let backend = GpuSpMatVec::detect_best_backend();
620
621        // Should return a valid backend
622        match backend {
623            GpuBackend::Cuda
624            | GpuBackend::OpenCL
625            | GpuBackend::Metal
626            | GpuBackend::Cpu
627            | GpuBackend::Wgpu
628            | GpuBackend::Rocm => (),
629            #[cfg(not(feature = "gpu"))]
630            GpuBackend::Vulkan => (),
631        }
632    }
633
634    #[test]
635    fn test_optimization_hint_conversions() {
636        let hint = OptimizationHint::Maximum;
637
638        let cuda_level = hint.to_cuda_level();
639        let opencl_level = hint.to_opencl_level();
640        let metal_level = hint.to_metal_level();
641
642        assert_eq!(cuda_level, CudaOptimizationLevel::WarpLevel);
643        assert_eq!(opencl_level, OpenCLOptimizationLevel::Vectorized);
644        assert_eq!(metal_level, MetalOptimizationLevel::AppleSilicon);
645    }
646
647    #[test]
648    fn test_backend_info() {
649        let gpu_spmv = GpuSpMatVec::new().expect("Operation failed");
650        let info = gpu_spmv.get_backend_info();
651
652        assert!(!info.name.is_empty());
653        assert!(!info.version.is_empty());
654    }
655
656    #[test]
657    fn test_convenience_functions() {
658        let backends = convenience::available_backends();
659        assert!(!backends.is_empty());
660        assert!(backends.contains(&GpuBackend::Cpu)); // CPU should always be available
661    }
662
663    #[test]
664    fn test_is_gpu_available() {
665        let gpu_spmv = GpuSpMatVec::new().expect("Operation failed");
666
667        // Should not panic - either true or false is valid
668        let _available = gpu_spmv.is_gpu_available();
669    }
670
671    #[test]
672    fn test_optimization_hint_default() {
673        assert_eq!(OptimizationHint::default(), OptimizationHint::Balanced);
674    }
675}