Skip to main content

scirs2_sparse/
gpu_ops.rs

1//! GPU-accelerated operations for sparse matrices
2//!
3//! This module provides GPU acceleration for sparse matrix operations
4//! using the scirs2-core GPU backend system. The implementation has been
5//! modularized for better maintainability and vendor-specific optimizations.
6
7// Re-export all GPU operations from the modular structure
8pub use crate::gpu::*;
9
10// For backward compatibility, re-export the main functions and types
11
12// Common GPU types and traits
13#[cfg(feature = "gpu")]
14pub use scirs2_core::gpu::{GpuBackend, GpuBuffer, GpuContext, GpuDataType, GpuKernelHandle};
15
16#[cfg(feature = "gpu")]
17pub use scirs2_core::GpuError;
18
19// Fallback types when GPU feature is not enabled
20
21// Fallback trait for GpuDataType when GPU feature is not enabled
22#[cfg(not(feature = "gpu"))]
23pub trait GpuDataType: Copy + Send + Sync + 'static {}
24
25// Implement GpuDataType for common numeric types
26#[cfg(not(feature = "gpu"))]
27impl GpuDataType for f32 {}
28#[cfg(not(feature = "gpu"))]
29impl GpuDataType for f64 {}
30#[cfg(not(feature = "gpu"))]
31impl GpuDataType for i32 {}
32#[cfg(not(feature = "gpu"))]
33impl GpuDataType for i64 {}
34#[cfg(not(feature = "gpu"))]
35impl GpuDataType for u32 {}
36#[cfg(not(feature = "gpu"))]
37impl GpuDataType for u64 {}
38
39#[cfg(not(feature = "gpu"))]
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
41pub enum GpuBackend {
42    #[default]
43    Cpu,
44    Cuda,
45    OpenCL,
46    Metal,
47    Rocm,
48    Wgpu,
49    Vulkan,
50}
51
52#[cfg(not(feature = "gpu"))]
53#[derive(Debug, Clone)]
54pub struct GpuError(String);
55
56#[cfg(not(feature = "gpu"))]
57impl GpuError {
58    pub fn new(msg: &str) -> Self {
59        Self(msg.to_string())
60    }
61
62    pub fn invalid_buffer(msg: String) -> Self {
63        Self(msg)
64    }
65
66    pub fn invalid_parameter(msg: String) -> Self {
67        Self(msg)
68    }
69
70    pub fn kernel_compilation_error(msg: String) -> Self {
71        Self(msg)
72    }
73
74    pub fn other(msg: String) -> Self {
75        Self(msg)
76    }
77}
78
79#[cfg(not(feature = "gpu"))]
80impl std::fmt::Display for GpuError {
81    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
82        write!(f, "{}", self.0)
83    }
84}
85
86#[cfg(not(feature = "gpu"))]
87impl std::error::Error for GpuError {}
88
89#[cfg(not(feature = "gpu"))]
90pub struct GpuBuffer<T> {
91    data: Vec<T>,
92}
93
94#[cfg(not(feature = "gpu"))]
95impl<T: Clone + Copy> GpuBuffer<T> {
96    pub fn from_vec(data: Vec<T>) -> Self {
97        Self { data }
98    }
99
100    pub fn as_slice(&self) -> &[T] {
101        &self.data
102    }
103
104    pub fn as_mut_slice(&mut self) -> &mut [T] {
105        &mut self.data
106    }
107
108    pub fn to_vec(&self) -> Vec<T> {
109        self.data.clone()
110    }
111
112    pub fn to_host(&self) -> Result<Vec<T>, GpuError> {
113        Ok(self.data.clone())
114    }
115
116    pub fn len(&self) -> usize {
117        self.data.len()
118    }
119
120    pub fn is_empty(&self) -> bool {
121        self.data.is_empty()
122    }
123}
124
125#[cfg(not(feature = "gpu"))]
126#[derive(Debug, Clone)]
127pub struct GpuKernelHandle;
128
129#[cfg(not(feature = "gpu"))]
130pub struct GpuDevice {
131    backend: GpuBackend,
132}
133
134#[cfg(not(feature = "gpu"))]
135impl GpuDevice {
136    pub fn new(_backend: GpuBackend) -> Result<Self, GpuError> {
137        Ok(Self {
138            backend: GpuBackend::Cpu,
139        })
140    }
141
142    pub fn get_default(_backend: GpuBackend) -> Result<Self, GpuError> {
143        Self::new(_backend)
144    }
145
146    pub fn backend(&self) -> GpuBackend {
147        self.backend
148    }
149
150    pub fn create_buffer<T>(&self, data: &[T]) -> Result<GpuBuffer<T>, GpuError>
151    where
152        T: Clone + Copy,
153    {
154        Ok(GpuBuffer {
155            data: data.to_vec(),
156        })
157    }
158
159    pub fn create_buffer_zeros<T>(&self, size: usize) -> Result<GpuBuffer<T>, GpuError>
160    where
161        T: Clone + Copy + Default,
162    {
163        Ok(GpuBuffer {
164            data: vec![T::default(); size],
165        })
166    }
167}
168
169// GPU data type implementations for compatibility with scirs2-core
170// Note: GpuDataType trait is provided by scirs2-core
171
172// Re-export unified GPU interface
173pub use crate::gpu::{BackendInfo, GpuSpMatVec, OptimizationHint};
174
175// Re-export convenience functions for backward compatibility
176pub use crate::gpu::convenience::{available_backends, gpu_spmv, gpu_spmv_optimized};
177
178// Legacy types for backward compatibility
179pub struct AdvancedGpuOps {
180    gpu_handler: GpuSpMatVec,
181}
182
183#[derive(Debug, Clone)]
184pub struct GpuKernelScheduler {
185    backend: GpuBackend,
186}
187
188#[derive(Debug, Clone)]
189pub struct GpuMemoryManager {
190    backend: GpuBackend,
191}
192
193#[derive(Debug, Clone)]
194pub struct GpuOptions {
195    pub backend: GpuBackend,
196    pub optimization: OptimizationHint,
197}
198
199#[derive(Debug, Clone)]
200pub struct GpuProfiler {
201    enabled: bool,
202}
203
204pub struct OptimizedGpuOps {
205    gpu_handler: GpuSpMatVec,
206}
207
208// Legacy function names for backward compatibility
209use crate::csr_array::CsrArray;
210use crate::error::SparseResult;
211use scirs2_core::ndarray::{Array1, ArrayView1};
212use scirs2_core::numeric::{Float, SparseElement};
213use std::fmt::Debug;
214
215// GpuDataType is already defined above in this module
216
217/// GPU sparse matrix-vector multiplication (legacy interface)
218///
219/// This function provides backward compatibility with the original API.
220/// For new code, consider using the unified `GpuSpMatVec` interface.
221#[allow(dead_code)]
222pub fn gpu_sparse_matvec<T>(
223    matrix: &CsrArray<T>,
224    vector: &ArrayView1<T>,
225    backend: Option<GpuBackend>,
226) -> SparseResult<Array1<T>>
227where
228    T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
229{
230    let gpu_handler = if let Some(backend) = backend {
231        GpuSpMatVec::with_backend(backend)?
232    } else {
233        GpuSpMatVec::new()?
234    };
235
236    // For Metal backend on macOS, create a dummy device
237    #[cfg(all(target_os = "macos", feature = "gpu"))]
238    let device = if matches!(backend, Some(GpuBackend::Metal) | None) {
239        Some(GpuDevice::new(GpuBackend::Metal, 0))
240    } else {
241        None
242    };
243    #[cfg(all(target_os = "macos", not(feature = "gpu")))]
244    let device = if matches!(backend, Some(GpuBackend::Metal) | None) {
245        GpuDevice::new(GpuBackend::Metal).ok()
246    } else {
247        None
248    };
249    #[cfg(not(target_os = "macos"))]
250    let device = None;
251
252    gpu_handler.spmv(matrix, vector, device.as_ref())
253}
254
255/// GPU symmetric sparse matrix-vector multiplication (legacy interface)
256#[allow(dead_code)]
257pub fn gpu_sym_sparse_matvec<T>(
258    matrix: &CsrArray<T>,
259    vector: &ArrayView1<T>,
260    backend: Option<GpuBackend>,
261) -> SparseResult<Array1<T>>
262where
263    T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
264{
265    let gpu_handler = if let Some(backend) = backend {
266        GpuSpMatVec::with_backend(backend)?
267    } else {
268        GpuSpMatVec::new()?
269    };
270
271    // For Metal backend on macOS, create a dummy device
272    #[cfg(all(target_os = "macos", feature = "gpu"))]
273    let device = if matches!(backend, Some(GpuBackend::Metal) | None) {
274        Some(GpuDevice::new(GpuBackend::Metal, 0))
275    } else {
276        None
277    };
278    #[cfg(all(target_os = "macos", not(feature = "gpu")))]
279    let device = if matches!(backend, Some(GpuBackend::Metal) | None) {
280        GpuDevice::new(GpuBackend::Metal).ok()
281    } else {
282        None
283    };
284    #[cfg(not(target_os = "macos"))]
285    let device = None;
286
287    gpu_handler.spmv(matrix, vector, device.as_ref())
288}
289
290/// Advanced GPU sparse matrix-vector multiplication with optimization hints
291#[allow(dead_code)]
292pub fn gpu_advanced_spmv<T>(
293    matrix: &CsrArray<T>,
294    vector: &ArrayView1<T>,
295    backend: Option<GpuBackend>,
296    optimization: OptimizationHint,
297) -> SparseResult<Array1<T>>
298where
299    T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
300{
301    let gpu_handler = if let Some(backend) = backend {
302        GpuSpMatVec::with_backend(backend)?
303    } else {
304        GpuSpMatVec::new()?
305    };
306
307    // For Metal backend on macOS, create a dummy device
308    #[cfg(all(target_os = "macos", feature = "gpu"))]
309    let device = if matches!(backend, Some(GpuBackend::Metal) | None) {
310        Some(GpuDevice::new(GpuBackend::Metal, 0))
311    } else {
312        None
313    };
314    #[cfg(all(target_os = "macos", not(feature = "gpu")))]
315    let device = if matches!(backend, Some(GpuBackend::Metal) | None) {
316        GpuDevice::new(GpuBackend::Metal).ok()
317    } else {
318        None
319    };
320    #[cfg(not(target_os = "macos"))]
321    let device = None;
322
323    gpu_handler.spmv_optimized(matrix, vector, device.as_ref(), optimization)
324}
325
326// Legacy kernel and device management structures
327#[allow(dead_code)]
328pub struct SpMVKernel {
329    gpu_handler: GpuSpMatVec,
330}
331
332impl SpMVKernel {
333    pub fn new(_device: &GpuDevice, _workgroupsize: [u32; 3]) -> Result<Self, GpuError> {
334        let gpu_handler = GpuSpMatVec::new().map_err(|e| {
335            #[cfg(feature = "gpu")]
336            return GpuError::Other(format!("{:?}", e));
337            #[cfg(not(feature = "gpu"))]
338            return GpuError::other(format!("{:?}", e));
339        })?;
340        Ok(Self { gpu_handler })
341    }
342
343    pub fn execute<T>(
344        &self,
345        matrix: &CsrArray<T>,
346        vector: &ArrayView1<T>,
347        device: &GpuDevice,
348    ) -> Result<Array1<T>, GpuError>
349    where
350        T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
351    {
352        self.gpu_handler
353            .spmv(matrix, vector, Some(device))
354            .map_err(|e| {
355                #[cfg(feature = "gpu")]
356                return GpuError::Other(format!("{:?}", e));
357                #[cfg(not(feature = "gpu"))]
358                return GpuError::other(format!("{:?}", e));
359            })
360    }
361}
362
363// GPU buffer extension trait for compatibility
364pub trait GpuBufferExt<T: GpuDataType> {
365    fn to_host(&self) -> Result<Vec<T>, GpuError>;
366    fn to_host_range(&self, range: std::ops::Range<usize>) -> Result<Vec<T>, GpuError>;
367}
368
369impl<T: GpuDataType> GpuBufferExt<T> for GpuBuffer<T> {
370    fn to_host(&self) -> Result<Vec<T>, GpuError> {
371        Ok(self.to_vec())
372    }
373
374    fn to_host_range(&self, range: std::ops::Range<usize>) -> Result<Vec<T>, GpuError> {
375        let full_data = self.to_vec();
376        if range.end <= full_data.len() {
377            Ok(full_data[range].to_vec())
378        } else {
379            #[cfg(feature = "gpu")]
380            return Err(GpuError::InvalidParameter(
381                "Range out of bounds".to_string(),
382            ));
383            #[cfg(not(feature = "gpu"))]
384            return Err(GpuError::invalid_parameter(
385                "Range out of bounds".to_string(),
386            ));
387        }
388    }
389}
390
391#[cfg(test)]
392mod tests {
393    use super::*;
394    use scirs2_core::ndarray::Array1;
395
396    #[test]
397    fn test_backward_compatibility_gpu_sparse_matvec() {
398        // Create a simple CSR matrix for testing
399        let data = vec![1.0, 2.0, 3.0, 4.0];
400        let indices = vec![0, 1, 0, 1];
401        let indptr = vec![0, 2, 4];
402        let matrix = CsrArray::new(data.into(), indices.into(), indptr.into(), (2, 2))
403            .expect("Operation failed");
404
405        let vector = Array1::from_vec(vec![1.0, 2.0]);
406
407        // Test with automatic backend selection
408        // This may fail if GPU hardware is not available, which is expected in CI
409        let result = gpu_sparse_matvec(&matrix, &vector.view(), None);
410        if let Err(e) = &result {
411            eprintln!("Error from gpu_sparse_matvec: {:?}", e);
412            // If GPU hardware is not available, we should get a specific error
413            // and that's acceptable for testing purposes
414            let error_msg = format!("{:?}", e);
415            assert!(
416                error_msg.contains("GPU device required")
417                    || error_msg.contains("not initialized")
418                    || error_msg.contains("not available"),
419                "Unexpected error: {:?}",
420                e
421            );
422        } else {
423            // If it succeeds, that's also fine
424            assert!(result.is_ok());
425        }
426
427        // Test with specific CPU backend - this should always work
428        let result = gpu_sparse_matvec(&matrix, &vector.view(), Some(GpuBackend::Cpu));
429        if let Err(e) = &result {
430            eprintln!("Error from gpu_sparse_matvec with CPU backend: {:?}", e);
431        }
432        assert!(result.is_ok(), "CPU backend should always work");
433    }
434
435    #[test]
436    fn test_gpu_spmv_kernel() {
437        #[cfg(feature = "gpu")]
438        let device = scirs2_core::gpu::GpuDevice::new(GpuBackend::Cpu, 0);
439        #[cfg(not(feature = "gpu"))]
440        let device = GpuDevice::new(GpuBackend::Cpu).expect("Operation failed");
441
442        let kernel = SpMVKernel::new(&device, [1, 1, 1]);
443        assert!(kernel.is_ok());
444    }
445
446    #[test]
447    fn test_gpu_buffer_ext() {
448        #[cfg(not(feature = "gpu"))]
449        {
450            let buffer = GpuBuffer {
451                data: vec![1.0, 2.0, 3.0, 4.0],
452            };
453            let host_data = buffer.to_host().expect("Operation failed");
454            assert_eq!(host_data, vec![1.0, 2.0, 3.0, 4.0]);
455
456            let range_data = buffer.to_host_range(1..3).expect("Operation failed");
457            assert_eq!(range_data, vec![2.0, 3.0]);
458        }
459    }
460
461    #[test]
462    fn test_gpu_data_types() {
463        // Test that the trait is implemented for expected types
464        fn is_gpu_data_type<T: GpuDataType>() {}
465
466        is_gpu_data_type::<f32>();
467        is_gpu_data_type::<f64>();
468        is_gpu_data_type::<u32>();
469        is_gpu_data_type::<u64>();
470        is_gpu_data_type::<i32>();
471        is_gpu_data_type::<i64>();
472    }
473
474    #[test]
475    fn test_gpu_backend_enum() {
476        #[cfg(not(feature = "gpu"))]
477        let backends = [
478            GpuBackend::Cpu,
479            GpuBackend::Cuda,
480            GpuBackend::OpenCL,
481            GpuBackend::Metal,
482            GpuBackend::Rocm,
483            GpuBackend::Wgpu,
484            GpuBackend::Vulkan,
485        ];
486        #[cfg(feature = "gpu")]
487        let backends = [
488            GpuBackend::Cpu,
489            GpuBackend::Cuda,
490            GpuBackend::OpenCL,
491            GpuBackend::Metal,
492            GpuBackend::Rocm,
493            GpuBackend::Wgpu,
494        ];
495
496        for backend in &backends {
497            match backend {
498                GpuBackend::Cpu => (),
499                GpuBackend::Cuda => (),
500                GpuBackend::OpenCL => (),
501                GpuBackend::Metal => (),
502                GpuBackend::Rocm => (),
503                GpuBackend::Wgpu => (),
504                #[cfg(not(feature = "gpu"))]
505                GpuBackend::Vulkan => (),
506            }
507        }
508    }
509
510    #[test]
511    fn test_available_backends() {
512        let backends = available_backends();
513        assert!(!backends.is_empty());
514        assert!(backends.contains(&GpuBackend::Cpu)); // CPU should always be available
515    }
516}