Skip to main content

scirs2_sparse/gpu/
mod.rs

1//! GPU acceleration for sparse matrix operations
2//!
3//! This module provides GPU acceleration support for sparse matrix operations
4//! across multiple backends including CUDA, OpenCL, Metal, ROCm, and WGPU.
5
6pub mod construction;
7pub mod cuda;
8pub mod metal;
9pub mod opencl;
10pub mod solvers;
11pub mod spmv;
12pub mod vulkan;
13
14// Re-export common types and traits
15#[cfg(feature = "gpu")]
16pub use scirs2_core::gpu::{
17    GpuBackend, GpuBuffer, GpuContext, GpuDataType, GpuDevice, GpuError, GpuKernelHandle,
18};
19
20// Fallback types when GPU feature is not enabled
21#[cfg(not(feature = "gpu"))]
22pub use crate::gpu_ops::{
23    GpuBackend, GpuBuffer, GpuDataType, GpuDevice, GpuError, GpuKernelHandle,
24};
25
26// Re-export backend-specific modules
27pub use cuda::{CudaMemoryManager, CudaOptimizationLevel, CudaSpMatVec};
28pub use metal::{MetalDeviceInfo, MetalMemoryManager, MetalOptimizationLevel, MetalSpMatVec};
29pub use opencl::{
30    OpenCLMemoryManager, OpenCLOptimizationLevel, OpenCLPlatformInfo, OpenCLSpMatVec,
31};
32pub use vulkan::{
33    VulkanDeviceInfo, VulkanDeviceType, VulkanMemoryManager, VulkanOptimizationLevel,
34    VulkanSpMatVec,
35};
36
37use crate::csr_array::CsrArray;
38use crate::error::{SparseError, SparseResult};
39use crate::sparray::SparseArray;
40use scirs2_core::ndarray::{Array1, ArrayView1};
41use scirs2_core::numeric::{Float, SparseElement};
42use std::fmt::Debug;
43
44// GpuDataType is already available from the pub use statements above
45
46/// Unified GPU sparse matrix operations interface
47pub struct GpuSpMatVec {
48    backend: GpuBackend,
49    cuda_handler: Option<CudaSpMatVec>,
50    opencl_handler: Option<OpenCLSpMatVec>,
51    metal_handler: Option<MetalSpMatVec>,
52    vulkan_handler: Option<VulkanSpMatVec>,
53}
54
55impl GpuSpMatVec {
56    /// Create a new GPU sparse matrix handler with automatic backend detection
57    pub fn new() -> SparseResult<Self> {
58        let backend = Self::detect_best_backend();
59
60        let mut handler = Self {
61            backend,
62            cuda_handler: None,
63            opencl_handler: None,
64            metal_handler: None,
65            vulkan_handler: None,
66        };
67
68        // Initialize the appropriate backend
69        handler.initialize_backend()?;
70
71        Ok(handler)
72    }
73
74    /// Create a new GPU sparse matrix handler with specified backend
75    pub fn with_backend(backend: GpuBackend) -> SparseResult<Self> {
76        let mut handler = Self {
77            backend,
78            cuda_handler: None,
79            opencl_handler: None,
80            metal_handler: None,
81            vulkan_handler: None,
82        };
83
84        handler.initialize_backend()?;
85
86        Ok(handler)
87    }
88
89    /// Initialize the selected backend
90    fn initialize_backend(&mut self) -> SparseResult<()> {
91        match self.backend {
92            GpuBackend::Cuda => {
93                self.cuda_handler = Some(CudaSpMatVec::new()?);
94            }
95            GpuBackend::OpenCL => {
96                self.opencl_handler = Some(OpenCLSpMatVec::new()?);
97            }
98            GpuBackend::Metal => {
99                self.metal_handler = Some(MetalSpMatVec::new()?);
100            }
101            GpuBackend::Wgpu => {
102                self.vulkan_handler = Some(VulkanSpMatVec::new()?);
103            }
104            #[cfg(not(feature = "gpu"))]
105            GpuBackend::Vulkan => {
106                self.vulkan_handler = Some(VulkanSpMatVec::new()?);
107            }
108            GpuBackend::Cpu => {
109                // CPU fallback - no initialization needed
110            }
111            GpuBackend::Rocm => {
112                // For ROCm, fall back to CPU for now
113                self.backend = GpuBackend::Cpu;
114            }
115        }
116
117        Ok(())
118    }
119
120    /// Detect the best available GPU backend
121    fn detect_best_backend() -> GpuBackend {
122        // Priority order: Metal (on macOS), CUDA, Vulkan, OpenCL, CPU
123        #[cfg(target_os = "macos")]
124        {
125            if Self::is_metal_available() {
126                return GpuBackend::Metal;
127            }
128        }
129
130        if Self::is_cuda_available() {
131            return GpuBackend::Cuda;
132        }
133
134        if Self::is_vulkan_available() {
135            return GpuBackend::Wgpu;
136        }
137
138        if Self::is_opencl_available() {
139            return GpuBackend::OpenCL;
140        }
141
142        GpuBackend::Cpu
143    }
144
145    /// Check if CUDA is available
146    fn is_cuda_available() -> bool {
147        // In a real implementation, this would check for CUDA runtime
148        #[cfg(feature = "gpu")]
149        {
150            // Simplified detection
151            std::env::var("CUDA_PATH").is_ok() || std::path::Path::new("/usr/local/cuda").exists()
152        }
153        #[cfg(not(feature = "gpu"))]
154        false
155    }
156
157    /// Check if OpenCL is available
158    fn is_opencl_available() -> bool {
159        // In a real implementation, this would check for OpenCL runtime
160        #[cfg(feature = "gpu")]
161        {
162            // Simplified detection - assume available on most systems
163            true
164        }
165        #[cfg(not(feature = "gpu"))]
166        false
167    }
168
169    /// Check if Metal is available (macOS only)
170    fn is_metal_available() -> bool {
171        #[cfg(target_os = "macos")]
172        {
173            // Metal is available on all modern macOS systems
174            true
175        }
176        #[cfg(not(target_os = "macos"))]
177        false
178    }
179
180    /// Check if Vulkan is available
181    fn is_vulkan_available() -> bool {
182        #[cfg(feature = "gpu")]
183        {
184            // Check for Vulkan SDK or runtime
185            std::env::var("VULKAN_SDK").is_ok()
186                || std::path::Path::new("/usr/share/vulkan").exists()
187                || std::path::Path::new("/usr/local/share/vulkan").exists()
188        }
189        #[cfg(not(feature = "gpu"))]
190        false
191    }
192
193    /// Execute sparse matrix-vector multiplication on GPU
194    pub fn spmv<T>(
195        &self,
196        matrix: &CsrArray<T>,
197        vector: &ArrayView1<T>,
198        device: Option<&GpuDevice>,
199    ) -> SparseResult<Array1<T>>
200    where
201        T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
202    {
203        match self.backend {
204            GpuBackend::Cuda => {
205                if let Some(ref handler) = self.cuda_handler {
206                    #[cfg(feature = "gpu")]
207                    {
208                        if let Some(device) = device {
209                            handler.execute_spmv(matrix, vector, device)
210                        } else {
211                            // Fall back to CPU when no device is provided
212                            matrix.dot_vector(vector)
213                        }
214                    }
215                    #[cfg(not(feature = "gpu"))]
216                    handler.execute_spmv_cpu(matrix, vector)
217                } else {
218                    Err(SparseError::ComputationError(
219                        "CUDA handler not initialized".to_string(),
220                    ))
221                }
222            }
223            GpuBackend::OpenCL => {
224                if let Some(ref handler) = self.opencl_handler {
225                    #[cfg(feature = "gpu")]
226                    {
227                        if let Some(device) = device {
228                            handler.execute_spmv(matrix, vector, device)
229                        } else {
230                            // Fall back to CPU when no device is provided
231                            matrix.dot_vector(vector)
232                        }
233                    }
234                    #[cfg(not(feature = "gpu"))]
235                    handler.execute_spmv_cpu(matrix, vector)
236                } else {
237                    Err(SparseError::ComputationError(
238                        "OpenCL handler not initialized".to_string(),
239                    ))
240                }
241            }
242            GpuBackend::Metal => {
243                if let Some(ref handler) = self.metal_handler {
244                    #[cfg(feature = "gpu")]
245                    {
246                        if let Some(device) = device {
247                            handler.execute_spmv(matrix, vector, device)
248                        } else {
249                            // Fall back to CPU when no device is provided
250                            matrix.dot_vector(vector)
251                        }
252                    }
253                    #[cfg(not(feature = "gpu"))]
254                    handler.execute_spmv_cpu(matrix, vector)
255                } else {
256                    Err(SparseError::ComputationError(
257                        "Metal handler not initialized".to_string(),
258                    ))
259                }
260            }
261            GpuBackend::Wgpu => {
262                if let Some(ref handler) = self.vulkan_handler {
263                    #[cfg(feature = "gpu")]
264                    {
265                        if let Some(device) = device {
266                            handler.execute_spmv(matrix, vector, device)
267                        } else {
268                            // Fall back to CPU when no device is provided
269                            matrix.dot_vector(vector)
270                        }
271                    }
272                    #[cfg(not(feature = "gpu"))]
273                    handler.execute_spmv_cpu(matrix, vector)
274                } else {
275                    Err(SparseError::ComputationError(
276                        "Vulkan handler not initialized".to_string(),
277                    ))
278                }
279            }
280            #[cfg(not(feature = "gpu"))]
281            GpuBackend::Vulkan => {
282                if let Some(ref handler) = self.vulkan_handler {
283                    handler.execute_spmv_cpu(matrix, vector)
284                } else {
285                    Err(SparseError::ComputationError(
286                        "Vulkan handler not initialized".to_string(),
287                    ))
288                }
289            }
290            GpuBackend::Cpu => {
291                // CPU fallback
292                matrix.dot_vector(vector)
293            }
294            GpuBackend::Rocm => {
295                // Unsupported backend, fall back to CPU
296                matrix.dot_vector(vector)
297            }
298        }
299    }
300
301    /// Execute optimized sparse matrix-vector multiplication
302    pub fn spmv_optimized<T>(
303        &self,
304        matrix: &CsrArray<T>,
305        vector: &ArrayView1<T>,
306        device: Option<&GpuDevice>,
307        optimization_hint: OptimizationHint,
308    ) -> SparseResult<Array1<T>>
309    where
310        T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
311    {
312        match self.backend {
313            GpuBackend::Cuda => {
314                if let Some(ref handler) = self.cuda_handler {
315                    let cuda_level = optimization_hint.to_cuda_level();
316                    #[cfg(feature = "gpu")]
317                    {
318                        if let Some(device) = device {
319                            handler.execute_optimized_spmv(matrix, vector, device, cuda_level)
320                        } else {
321                            return Err(SparseError::ComputationError(
322                                "GPU device required for CUDA operations".to_string(),
323                            ));
324                        }
325                    }
326                    #[cfg(not(feature = "gpu"))]
327                    handler.execute_spmv_cpu(matrix, vector)
328                } else {
329                    Err(SparseError::ComputationError(
330                        "CUDA handler not initialized".to_string(),
331                    ))
332                }
333            }
334            GpuBackend::OpenCL => {
335                if let Some(ref handler) = self.opencl_handler {
336                    let opencl_level = optimization_hint.to_opencl_level();
337                    #[cfg(feature = "gpu")]
338                    {
339                        if let Some(device) = device {
340                            handler.execute_optimized_spmv(matrix, vector, device, opencl_level)
341                        } else {
342                            return Err(SparseError::ComputationError(
343                                "GPU device required for OpenCL operations".to_string(),
344                            ));
345                        }
346                    }
347                    #[cfg(not(feature = "gpu"))]
348                    handler.execute_spmv_cpu(matrix, vector)
349                } else {
350                    Err(SparseError::ComputationError(
351                        "OpenCL handler not initialized".to_string(),
352                    ))
353                }
354            }
355            GpuBackend::Metal => {
356                if let Some(ref handler) = self.metal_handler {
357                    let metal_level = optimization_hint.to_metal_level();
358                    #[cfg(feature = "gpu")]
359                    {
360                        if let Some(device) = device {
361                            handler.execute_optimized_spmv(matrix, vector, device, metal_level)
362                        } else {
363                            return Err(SparseError::ComputationError(
364                                "GPU device required for Metal operations".to_string(),
365                            ));
366                        }
367                    }
368                    #[cfg(not(feature = "gpu"))]
369                    handler.execute_spmv_cpu(matrix, vector)
370                } else {
371                    Err(SparseError::ComputationError(
372                        "Metal handler not initialized".to_string(),
373                    ))
374                }
375            }
376            GpuBackend::Wgpu => {
377                if let Some(ref handler) = self.vulkan_handler {
378                    let vulkan_level = optimization_hint.to_vulkan_level();
379                    #[cfg(feature = "gpu")]
380                    {
381                        if let Some(device) = device {
382                            handler.execute_optimized_spmv(matrix, vector, device, vulkan_level)
383                        } else {
384                            return Err(SparseError::ComputationError(
385                                "GPU device required for Vulkan operations".to_string(),
386                            ));
387                        }
388                    }
389                    #[cfg(not(feature = "gpu"))]
390                    handler.execute_spmv_cpu(matrix, vector)
391                } else {
392                    Err(SparseError::ComputationError(
393                        "Vulkan handler not initialized".to_string(),
394                    ))
395                }
396            }
397            #[cfg(not(feature = "gpu"))]
398            GpuBackend::Vulkan => {
399                if let Some(ref handler) = self.vulkan_handler {
400                    let vulkan_level = optimization_hint.to_vulkan_level();
401                    handler.execute_spmv_cpu(matrix, vector)
402                } else {
403                    Err(SparseError::ComputationError(
404                        "Vulkan handler not initialized".to_string(),
405                    ))
406                }
407            }
408            GpuBackend::Cpu | GpuBackend::Rocm => {
409                // Fall back to basic implementation
410                self.spmv(matrix, vector, device)
411            }
412        }
413    }
414
415    /// Get the current backend
416    pub fn backend(&self) -> GpuBackend {
417        self.backend
418    }
419
420    /// Check if GPU acceleration is available
421    pub fn is_gpu_available(&self) -> bool {
422        !matches!(self.backend, GpuBackend::Cpu)
423    }
424
425    /// Get backend-specific information
426    pub fn get_backend_info(&self) -> BackendInfo {
427        match self.backend {
428            GpuBackend::Cuda => BackendInfo {
429                name: "CUDA".to_string(),
430                version: "Unknown".to_string(),
431                device_count: 1, // Simplified
432                supports_double_precision: true,
433                max_memory_mb: 8192, // 8GB default
434            },
435            GpuBackend::OpenCL => BackendInfo {
436                name: "OpenCL".to_string(),
437                version: "Unknown".to_string(),
438                device_count: 1,
439                supports_double_precision: true,
440                max_memory_mb: 4096, // 4GB default
441            },
442            GpuBackend::Metal => BackendInfo {
443                name: "Metal".to_string(),
444                version: "Unknown".to_string(),
445                device_count: 1,
446                supports_double_precision: false, // Metal has limited f64 support
447                max_memory_mb: if MetalDeviceInfo::detect().is_apple_silicon {
448                    16384
449                } else {
450                    8192
451                },
452            },
453            GpuBackend::Wgpu => BackendInfo {
454                name: "Vulkan".to_string(),
455                version: "Unknown".to_string(),
456                device_count: 1,
457                supports_double_precision: true,
458                max_memory_mb: 8192, // 8GB default
459            },
460            #[cfg(not(feature = "gpu"))]
461            GpuBackend::Vulkan => BackendInfo {
462                name: "Vulkan".to_string(),
463                version: "Unknown".to_string(),
464                device_count: 1,
465                supports_double_precision: true,
466                max_memory_mb: 8192, // 8GB default
467            },
468            GpuBackend::Cpu | GpuBackend::Rocm => BackendInfo {
469                name: "CPU".to_string(),
470                version: "Fallback".to_string(),
471                device_count: 0,
472                supports_double_precision: true,
473                max_memory_mb: 0,
474            },
475        }
476    }
477}
478
479impl Default for GpuSpMatVec {
480    fn default() -> Self {
481        Self::new().unwrap_or_else(|_| Self {
482            backend: GpuBackend::Cpu,
483            cuda_handler: None,
484            opencl_handler: None,
485            metal_handler: None,
486            vulkan_handler: None,
487        })
488    }
489}
490
491/// Cross-platform optimization hints
492#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
493pub enum OptimizationHint {
494    /// Basic optimization level
495    Basic,
496    /// Balanced optimization (default)
497    #[default]
498    Balanced,
499    /// Maximum performance optimization
500    Maximum,
501    /// Memory-optimized implementation
502    MemoryOptimized,
503}
504
505impl OptimizationHint {
506    /// Convert to CUDA optimization level
507    pub fn to_cuda_level(self) -> CudaOptimizationLevel {
508        match self {
509            OptimizationHint::Basic => CudaOptimizationLevel::Basic,
510            OptimizationHint::Balanced => CudaOptimizationLevel::Vectorized,
511            OptimizationHint::Maximum => CudaOptimizationLevel::WarpLevel,
512            OptimizationHint::MemoryOptimized => CudaOptimizationLevel::Basic,
513        }
514    }
515
516    /// Convert to OpenCL optimization level
517    pub fn to_opencl_level(self) -> OpenCLOptimizationLevel {
518        match self {
519            OptimizationHint::Basic => OpenCLOptimizationLevel::Basic,
520            OptimizationHint::Balanced => OpenCLOptimizationLevel::Workgroup,
521            OptimizationHint::Maximum => OpenCLOptimizationLevel::Vectorized,
522            OptimizationHint::MemoryOptimized => OpenCLOptimizationLevel::Workgroup,
523        }
524    }
525
526    /// Convert to Metal optimization level
527    pub fn to_metal_level(self) -> MetalOptimizationLevel {
528        match self {
529            OptimizationHint::Basic => MetalOptimizationLevel::Basic,
530            OptimizationHint::Balanced => MetalOptimizationLevel::SimdGroup,
531            OptimizationHint::Maximum => MetalOptimizationLevel::AppleSilicon,
532            OptimizationHint::MemoryOptimized => MetalOptimizationLevel::AppleSilicon,
533        }
534    }
535
536    /// Convert to Vulkan optimization level
537    pub fn to_vulkan_level(self) -> VulkanOptimizationLevel {
538        match self {
539            OptimizationHint::Basic => VulkanOptimizationLevel::Basic,
540            OptimizationHint::Balanced => VulkanOptimizationLevel::ComputeShader,
541            OptimizationHint::Maximum => VulkanOptimizationLevel::Maximum,
542            OptimizationHint::MemoryOptimized => VulkanOptimizationLevel::Subgroup,
543        }
544    }
545}
546
547/// GPU backend information
548#[derive(Debug, Clone)]
549pub struct BackendInfo {
550    pub name: String,
551    pub version: String,
552    pub device_count: usize,
553    pub supports_double_precision: bool,
554    pub max_memory_mb: usize,
555}
556
557/// Convenient functions for common operations
558pub mod convenience {
559    use super::*;
560
561    /// Execute sparse matrix-vector multiplication with automatic GPU detection
562    pub fn gpu_spmv<T>(matrix: &CsrArray<T>, vector: &ArrayView1<T>) -> SparseResult<Array1<T>>
563    where
564        T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
565    {
566        let gpu_handler = GpuSpMatVec::new()?;
567        gpu_handler.spmv(matrix, vector, None)
568    }
569
570    /// Execute optimized sparse matrix-vector multiplication
571    pub fn gpu_spmv_optimized<T>(
572        matrix: &CsrArray<T>,
573        vector: &ArrayView1<T>,
574        optimization: OptimizationHint,
575    ) -> SparseResult<Array1<T>>
576    where
577        T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
578    {
579        let gpu_handler = GpuSpMatVec::new()?;
580        gpu_handler.spmv_optimized(matrix, vector, None, optimization)
581    }
582
583    /// Get information about available GPU backends
584    pub fn available_backends() -> Vec<GpuBackend> {
585        let mut backends = Vec::new();
586
587        if GpuSpMatVec::is_cuda_available() {
588            backends.push(GpuBackend::Cuda);
589        }
590
591        if GpuSpMatVec::is_vulkan_available() {
592            backends.push(GpuBackend::Wgpu);
593        }
594
595        if GpuSpMatVec::is_opencl_available() {
596            backends.push(GpuBackend::OpenCL);
597        }
598
599        if GpuSpMatVec::is_metal_available() {
600            backends.push(GpuBackend::Metal);
601        }
602
603        backends.push(GpuBackend::Cpu); // Always available
604
605        backends
606    }
607}
608
609#[cfg(test)]
610mod tests {
611    use super::*;
612    use scirs2_core::ndarray::Array1;
613
614    #[test]
615    fn test_gpu_spmv_creation() {
616        let gpu_spmv = GpuSpMatVec::new();
617        assert!(gpu_spmv.is_ok());
618    }
619
620    #[test]
621    fn test_backend_detection() {
622        let backend = GpuSpMatVec::detect_best_backend();
623
624        // Should return a valid backend
625        match backend {
626            GpuBackend::Cuda
627            | GpuBackend::OpenCL
628            | GpuBackend::Metal
629            | GpuBackend::Cpu
630            | GpuBackend::Wgpu
631            | GpuBackend::Rocm => (),
632            #[cfg(not(feature = "gpu"))]
633            GpuBackend::Vulkan => (),
634        }
635    }
636
637    #[test]
638    fn test_optimization_hint_conversions() {
639        let hint = OptimizationHint::Maximum;
640
641        let cuda_level = hint.to_cuda_level();
642        let opencl_level = hint.to_opencl_level();
643        let metal_level = hint.to_metal_level();
644
645        assert_eq!(cuda_level, CudaOptimizationLevel::WarpLevel);
646        assert_eq!(opencl_level, OpenCLOptimizationLevel::Vectorized);
647        assert_eq!(metal_level, MetalOptimizationLevel::AppleSilicon);
648    }
649
650    #[test]
651    fn test_backend_info() {
652        let gpu_spmv = GpuSpMatVec::new().expect("Operation failed");
653        let info = gpu_spmv.get_backend_info();
654
655        assert!(!info.name.is_empty());
656        assert!(!info.version.is_empty());
657    }
658
659    #[test]
660    fn test_convenience_functions() {
661        let backends = convenience::available_backends();
662        assert!(!backends.is_empty());
663        assert!(backends.contains(&GpuBackend::Cpu)); // CPU should always be available
664    }
665
666    #[test]
667    fn test_is_gpu_available() {
668        let gpu_spmv = GpuSpMatVec::new().expect("Operation failed");
669
670        // Should not panic - either true or false is valid
671        let _available = gpu_spmv.is_gpu_available();
672    }
673
674    #[test]
675    fn test_optimization_hint_default() {
676        assert_eq!(OptimizationHint::default(), OptimizationHint::Balanced);
677    }
678}