scirs2_sparse/
gpu_ops.rs

1//! GPU-accelerated operations for sparse matrices
2//!
3//! This module provides GPU acceleration for sparse matrix operations
4//! using the scirs2-core GPU backend system. The implementation has been
5//! modularized for better maintainability and vendor-specific optimizations.
6
7// Re-export all GPU operations from the modular structure
8pub use crate::gpu::*;
9
10// For backward compatibility, re-export the main functions and types
11
12// Common GPU types and traits
13#[cfg(feature = "gpu")]
14pub use scirs2_core::gpu::{GpuBackend, GpuBuffer, GpuContext, GpuDataType, GpuKernelHandle};
15
16#[cfg(feature = "gpu")]
17pub use scirs2_core::GpuError;
18
19// Fallback types when GPU feature is not enabled
20
21// Fallback trait for GpuDataType when GPU feature is not enabled
22#[cfg(not(feature = "gpu"))]
23pub trait GpuDataType: Copy + Send + Sync + 'static {}
24
25// Implement GpuDataType for common numeric types
26#[cfg(not(feature = "gpu"))]
27impl GpuDataType for f32 {}
28#[cfg(not(feature = "gpu"))]
29impl GpuDataType for f64 {}
30#[cfg(not(feature = "gpu"))]
31impl GpuDataType for i32 {}
32#[cfg(not(feature = "gpu"))]
33impl GpuDataType for i64 {}
34#[cfg(not(feature = "gpu"))]
35impl GpuDataType for u32 {}
36#[cfg(not(feature = "gpu"))]
37impl GpuDataType for u64 {}
38
39#[cfg(not(feature = "gpu"))]
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
41pub enum GpuBackend {
42    #[default]
43    Cpu,
44    Cuda,
45    OpenCL,
46    Metal,
47    Rocm,
48    Wgpu,
49}
50
51#[cfg(not(feature = "gpu"))]
52#[derive(Debug, Clone)]
53pub struct GpuError(String);
54
55#[cfg(not(feature = "gpu"))]
56impl GpuError {
57    pub fn new(msg: &str) -> Self {
58        Self(msg.to_string())
59    }
60
61    pub fn invalid_buffer(msg: String) -> Self {
62        Self(msg)
63    }
64
65    pub fn invalid_parameter(msg: String) -> Self {
66        Self(msg)
67    }
68
69    pub fn kernel_compilation_error(msg: String) -> Self {
70        Self(msg)
71    }
72
73    pub fn other(msg: String) -> Self {
74        Self(msg)
75    }
76}
77
78#[cfg(not(feature = "gpu"))]
79impl std::fmt::Display for GpuError {
80    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
81        write!(f, "{}", self.0)
82    }
83}
84
85#[cfg(not(feature = "gpu"))]
86impl std::error::Error for GpuError {}
87
88#[cfg(not(feature = "gpu"))]
89pub struct GpuBuffer<T> {
90    data: Vec<T>,
91}
92
93#[cfg(not(feature = "gpu"))]
94impl<T: Clone + Copy> GpuBuffer<T> {
95    pub fn from_vec(data: Vec<T>) -> Self {
96        Self { data }
97    }
98
99    pub fn as_slice(&self) -> &[T] {
100        &self.data
101    }
102
103    pub fn as_mut_slice(&mut self) -> &mut [T] {
104        &mut self.data
105    }
106
107    pub fn to_vec(&self) -> Vec<T> {
108        self.data.clone()
109    }
110
111    pub fn to_host(&self) -> Result<Vec<T>, GpuError> {
112        Ok(self.data.clone())
113    }
114
115    pub fn len(&self) -> usize {
116        self.data.len()
117    }
118
119    pub fn is_empty(&self) -> bool {
120        self.data.is_empty()
121    }
122}
123
124#[cfg(not(feature = "gpu"))]
125#[derive(Debug, Clone)]
126pub struct GpuKernelHandle;
127
128#[cfg(not(feature = "gpu"))]
129pub struct GpuDevice {
130    backend: GpuBackend,
131}
132
133#[cfg(not(feature = "gpu"))]
134impl GpuDevice {
135    pub fn new(_backend: GpuBackend) -> Result<Self, GpuError> {
136        Ok(Self {
137            backend: GpuBackend::Cpu,
138        })
139    }
140
141    pub fn get_default(_backend: GpuBackend) -> Result<Self, GpuError> {
142        Self::new(_backend)
143    }
144
145    pub fn backend(&self) -> GpuBackend {
146        self.backend
147    }
148
149    pub fn create_buffer<T>(&self, data: &[T]) -> Result<GpuBuffer<T>, GpuError>
150    where
151        T: Clone + Copy,
152    {
153        Ok(GpuBuffer {
154            data: data.to_vec(),
155        })
156    }
157
158    pub fn create_buffer_zeros<T>(&self, size: usize) -> Result<GpuBuffer<T>, GpuError>
159    where
160        T: Clone + Copy + Default,
161    {
162        Ok(GpuBuffer {
163            data: vec![T::default(); size],
164        })
165    }
166}
167
168// GPU data type implementations for compatibility with scirs2-core
169// Note: GpuDataType trait is provided by scirs2-core
170
171// Re-export unified GPU interface
172pub use crate::gpu::{BackendInfo, GpuSpMatVec, OptimizationHint};
173
174// Re-export convenience functions for backward compatibility
175pub use crate::gpu::convenience::{available_backends, gpu_spmv, gpu_spmv_optimized};
176
177// Legacy types for backward compatibility
178pub struct AdvancedGpuOps {
179    gpu_handler: GpuSpMatVec,
180}
181
182#[derive(Debug, Clone)]
183pub struct GpuKernelScheduler {
184    backend: GpuBackend,
185}
186
187#[derive(Debug, Clone)]
188pub struct GpuMemoryManager {
189    backend: GpuBackend,
190}
191
192#[derive(Debug, Clone)]
193pub struct GpuOptions {
194    pub backend: GpuBackend,
195    pub optimization: OptimizationHint,
196}
197
198#[derive(Debug, Clone)]
199pub struct GpuProfiler {
200    enabled: bool,
201}
202
203pub struct OptimizedGpuOps {
204    gpu_handler: GpuSpMatVec,
205}
206
207// Legacy function names for backward compatibility
208use crate::csr_array::CsrArray;
209use crate::error::SparseResult;
210use scirs2_core::ndarray::{Array1, ArrayView1};
211use scirs2_core::numeric::Float;
212use std::fmt::Debug;
213
214// GpuDataType is already defined above in this module
215
216/// GPU sparse matrix-vector multiplication (legacy interface)
217///
218/// This function provides backward compatibility with the original API.
219/// For new code, consider using the unified `GpuSpMatVec` interface.
220#[allow(dead_code)]
221pub fn gpu_sparse_matvec<T>(
222    matrix: &CsrArray<T>,
223    vector: &ArrayView1<T>,
224    backend: Option<GpuBackend>,
225) -> SparseResult<Array1<T>>
226where
227    T: Float + Debug + Copy + GpuDataType + std::iter::Sum,
228{
229    let gpu_handler = if let Some(backend) = backend {
230        GpuSpMatVec::with_backend(backend)?
231    } else {
232        GpuSpMatVec::new()?
233    };
234
235    // For Metal backend on macOS, create a dummy device
236    #[cfg(all(target_os = "macos", feature = "gpu"))]
237    let device = if matches!(backend, Some(GpuBackend::Metal) | None) {
238        Some(GpuDevice::new(GpuBackend::Metal, 0))
239    } else {
240        None
241    };
242    #[cfg(all(target_os = "macos", not(feature = "gpu")))]
243    let device = if matches!(backend, Some(GpuBackend::Metal) | None) {
244        GpuDevice::new(GpuBackend::Metal).ok()
245    } else {
246        None
247    };
248    #[cfg(not(target_os = "macos"))]
249    let device = None;
250
251    gpu_handler.spmv(matrix, vector, device.as_ref())
252}
253
254/// GPU symmetric sparse matrix-vector multiplication (legacy interface)
255#[allow(dead_code)]
256pub fn gpu_sym_sparse_matvec<T>(
257    matrix: &CsrArray<T>,
258    vector: &ArrayView1<T>,
259    backend: Option<GpuBackend>,
260) -> SparseResult<Array1<T>>
261where
262    T: Float + Debug + Copy + GpuDataType + std::iter::Sum,
263{
264    let gpu_handler = if let Some(backend) = backend {
265        GpuSpMatVec::with_backend(backend)?
266    } else {
267        GpuSpMatVec::new()?
268    };
269
270    // For Metal backend on macOS, create a dummy device
271    #[cfg(all(target_os = "macos", feature = "gpu"))]
272    let device = if matches!(backend, Some(GpuBackend::Metal) | None) {
273        Some(GpuDevice::new(GpuBackend::Metal, 0))
274    } else {
275        None
276    };
277    #[cfg(all(target_os = "macos", not(feature = "gpu")))]
278    let device = if matches!(backend, Some(GpuBackend::Metal) | None) {
279        GpuDevice::new(GpuBackend::Metal).ok()
280    } else {
281        None
282    };
283    #[cfg(not(target_os = "macos"))]
284    let device = None;
285
286    gpu_handler.spmv(matrix, vector, device.as_ref())
287}
288
289/// Advanced GPU sparse matrix-vector multiplication with optimization hints
290#[allow(dead_code)]
291pub fn gpu_advanced_spmv<T>(
292    matrix: &CsrArray<T>,
293    vector: &ArrayView1<T>,
294    backend: Option<GpuBackend>,
295    optimization: OptimizationHint,
296) -> SparseResult<Array1<T>>
297where
298    T: Float + Debug + Copy + GpuDataType + std::iter::Sum,
299{
300    let gpu_handler = if let Some(backend) = backend {
301        GpuSpMatVec::with_backend(backend)?
302    } else {
303        GpuSpMatVec::new()?
304    };
305
306    // For Metal backend on macOS, create a dummy device
307    #[cfg(all(target_os = "macos", feature = "gpu"))]
308    let device = if matches!(backend, Some(GpuBackend::Metal) | None) {
309        Some(GpuDevice::new(GpuBackend::Metal, 0))
310    } else {
311        None
312    };
313    #[cfg(all(target_os = "macos", not(feature = "gpu")))]
314    let device = if matches!(backend, Some(GpuBackend::Metal) | None) {
315        GpuDevice::new(GpuBackend::Metal).ok()
316    } else {
317        None
318    };
319    #[cfg(not(target_os = "macos"))]
320    let device = None;
321
322    gpu_handler.spmv_optimized(matrix, vector, device.as_ref(), optimization)
323}
324
325// Legacy kernel and device management structures
326#[allow(dead_code)]
327pub struct SpMVKernel {
328    gpu_handler: GpuSpMatVec,
329}
330
331impl SpMVKernel {
332    pub fn new(_device: &GpuDevice, _workgroupsize: [u32; 3]) -> Result<Self, GpuError> {
333        let gpu_handler = GpuSpMatVec::new().map_err(|e| {
334            #[cfg(feature = "gpu")]
335            return GpuError::Other(format!("{:?}", e));
336            #[cfg(not(feature = "gpu"))]
337            return GpuError::other(format!("{:?}", e));
338        })?;
339        Ok(Self { gpu_handler })
340    }
341
342    pub fn execute<T>(
343        &self,
344        matrix: &CsrArray<T>,
345        vector: &ArrayView1<T>,
346        device: &GpuDevice,
347    ) -> Result<Array1<T>, GpuError>
348    where
349        T: Float + Debug + Copy + GpuDataType + std::iter::Sum,
350    {
351        self.gpu_handler
352            .spmv(matrix, vector, Some(device))
353            .map_err(|e| {
354                #[cfg(feature = "gpu")]
355                return GpuError::Other(format!("{:?}", e));
356                #[cfg(not(feature = "gpu"))]
357                return GpuError::other(format!("{:?}", e));
358            })
359    }
360}
361
362// GPU buffer extension trait for compatibility
363pub trait GpuBufferExt<T: GpuDataType> {
364    fn to_host(&self) -> Result<Vec<T>, GpuError>;
365    fn to_host_range(&self, range: std::ops::Range<usize>) -> Result<Vec<T>, GpuError>;
366}
367
368impl<T: GpuDataType> GpuBufferExt<T> for GpuBuffer<T> {
369    fn to_host(&self) -> Result<Vec<T>, GpuError> {
370        Ok(self.to_vec())
371    }
372
373    fn to_host_range(&self, range: std::ops::Range<usize>) -> Result<Vec<T>, GpuError> {
374        let full_data = self.to_vec();
375        if range.end <= full_data.len() {
376            Ok(full_data[range].to_vec())
377        } else {
378            #[cfg(feature = "gpu")]
379            return Err(GpuError::InvalidParameter(
380                "Range out of bounds".to_string(),
381            ));
382            #[cfg(not(feature = "gpu"))]
383            return Err(GpuError::invalid_parameter(
384                "Range out of bounds".to_string(),
385            ));
386        }
387    }
388}
389
390#[cfg(test)]
391mod tests {
392    use super::*;
393    use scirs2_core::ndarray::Array1;
394
395    #[test]
396    fn test_backward_compatibility_gpu_sparse_matvec() {
397        // Create a simple CSR matrix for testing
398        let data = vec![1.0, 2.0, 3.0, 4.0];
399        let indices = vec![0, 1, 0, 1];
400        let indptr = vec![0, 2, 4];
401        let matrix = CsrArray::new(data.into(), indices.into(), indptr.into(), (2, 2)).unwrap();
402
403        let vector = Array1::from_vec(vec![1.0, 2.0]);
404
405        // Test with automatic backend selection
406        // This may fail if GPU hardware is not available, which is expected in CI
407        let result = gpu_sparse_matvec(&matrix, &vector.view(), None);
408        if let Err(e) = &result {
409            eprintln!("Error from gpu_sparse_matvec: {:?}", e);
410            // If GPU hardware is not available, we should get a specific error
411            // and that's acceptable for testing purposes
412            let error_msg = format!("{:?}", e);
413            assert!(
414                error_msg.contains("GPU device required")
415                    || error_msg.contains("not initialized")
416                    || error_msg.contains("not available"),
417                "Unexpected error: {:?}",
418                e
419            );
420        } else {
421            // If it succeeds, that's also fine
422            assert!(result.is_ok());
423        }
424
425        // Test with specific CPU backend - this should always work
426        let result = gpu_sparse_matvec(&matrix, &vector.view(), Some(GpuBackend::Cpu));
427        if let Err(e) = &result {
428            eprintln!("Error from gpu_sparse_matvec with CPU backend: {:?}", e);
429        }
430        assert!(result.is_ok(), "CPU backend should always work");
431    }
432
433    #[test]
434    fn test_gpu_spmv_kernel() {
435        #[cfg(feature = "gpu")]
436        let device = scirs2_core::gpu::GpuDevice::new(GpuBackend::Cpu, 0);
437        #[cfg(not(feature = "gpu"))]
438        let device = GpuDevice::new(GpuBackend::Cpu).unwrap();
439
440        let kernel = SpMVKernel::new(&device, [1, 1, 1]);
441        assert!(kernel.is_ok());
442    }
443
444    #[test]
445    fn test_gpu_buffer_ext() {
446        #[cfg(not(feature = "gpu"))]
447        {
448            let buffer = GpuBuffer {
449                data: vec![1.0, 2.0, 3.0, 4.0],
450            };
451            let host_data = buffer.to_host().unwrap();
452            assert_eq!(host_data, vec![1.0, 2.0, 3.0, 4.0]);
453
454            let range_data = buffer.to_host_range(1..3).unwrap();
455            assert_eq!(range_data, vec![2.0, 3.0]);
456        }
457    }
458
459    #[test]
460    fn test_gpu_data_types() {
461        // Test that the trait is implemented for expected types
462        fn is_gpu_data_type<T: GpuDataType>() {}
463
464        is_gpu_data_type::<f32>();
465        is_gpu_data_type::<f64>();
466        is_gpu_data_type::<u32>();
467        is_gpu_data_type::<u64>();
468        is_gpu_data_type::<i32>();
469        is_gpu_data_type::<i64>();
470    }
471
472    #[test]
473    fn test_gpu_backend_enum() {
474        let backends = [
475            GpuBackend::Cpu,
476            GpuBackend::Cuda,
477            GpuBackend::OpenCL,
478            GpuBackend::Metal,
479            GpuBackend::Rocm,
480            GpuBackend::Wgpu,
481        ];
482
483        for backend in &backends {
484            match backend {
485                GpuBackend::Cpu => (),
486                GpuBackend::Cuda => (),
487                GpuBackend::OpenCL => (),
488                GpuBackend::Metal => (),
489                GpuBackend::Rocm => (),
490                GpuBackend::Wgpu => (),
491            }
492        }
493    }
494
495    #[test]
496    fn test_available_backends() {
497        let backends = available_backends();
498        assert!(!backends.is_empty());
499        assert!(backends.contains(&GpuBackend::Cpu)); // CPU should always be available
500    }
501}